From eac711997751476e5b61489d2b32066876ab6e04 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Sat, 13 Jun 2026 09:26:16 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: amphora/qwen3-4b-think Source: Original Platform --- .gitattributes | 38 + README.md | 61 + all_results.json | 8 + chat_template.jinja | 86 + checkpoint-500/chat_template.jinja | 86 + checkpoint-500/config.json | 71 + checkpoint-500/generation_config.json | 12 + ..._zero_pp_rank_0_mp_rank_00_optim_states.pt | 3 + ..._zero_pp_rank_1_mp_rank_00_optim_states.pt | 3 + ..._zero_pp_rank_2_mp_rank_00_optim_states.pt | 3 + ..._zero_pp_rank_3_mp_rank_00_optim_states.pt | 3 + ..._zero_pp_rank_4_mp_rank_00_optim_states.pt | 3 + ..._zero_pp_rank_5_mp_rank_00_optim_states.pt | 3 + ..._zero_pp_rank_6_mp_rank_00_optim_states.pt | 3 + ..._zero_pp_rank_7_mp_rank_00_optim_states.pt | 3 + .../zero_pp_rank_0_mp_rank_00_model_states.pt | 3 + .../zero_pp_rank_1_mp_rank_00_model_states.pt | 3 + .../zero_pp_rank_2_mp_rank_00_model_states.pt | 3 + .../zero_pp_rank_3_mp_rank_00_model_states.pt | 3 + .../zero_pp_rank_4_mp_rank_00_model_states.pt | 3 + .../zero_pp_rank_5_mp_rank_00_model_states.pt | 3 + .../zero_pp_rank_6_mp_rank_00_model_states.pt | 3 + .../zero_pp_rank_7_mp_rank_00_model_states.pt | 3 + checkpoint-500/latest | 1 + checkpoint-500/model.safetensors | 3 + checkpoint-500/rng_state_0.pth | 3 + checkpoint-500/rng_state_1.pth | 3 + checkpoint-500/rng_state_2.pth | 3 + checkpoint-500/rng_state_3.pth | 3 + checkpoint-500/rng_state_4.pth | 3 + checkpoint-500/rng_state_5.pth | 3 + checkpoint-500/rng_state_6.pth | 3 + checkpoint-500/rng_state_7.pth | 3 + checkpoint-500/scheduler.pt | 3 + checkpoint-500/tokenizer.json | 3 + checkpoint-500/tokenizer_config.json | 30 + checkpoint-500/trainer_state.json | 3534 +++++++++ checkpoint-500/training_args.bin | 3 + checkpoint-500/zero_to_fp32.py | 760 ++ checkpoint-909/chat_template.jinja | 86 + checkpoint-909/config.json | 71 + checkpoint-909/generation_config.json | 12 + ..._zero_pp_rank_0_mp_rank_00_optim_states.pt | 3 + ..._zero_pp_rank_1_mp_rank_00_optim_states.pt | 3 + ..._zero_pp_rank_2_mp_rank_00_optim_states.pt | 3 + ..._zero_pp_rank_3_mp_rank_00_optim_states.pt | 3 + ..._zero_pp_rank_4_mp_rank_00_optim_states.pt | 3 + ..._zero_pp_rank_5_mp_rank_00_optim_states.pt | 3 + ..._zero_pp_rank_6_mp_rank_00_optim_states.pt | 3 + ..._zero_pp_rank_7_mp_rank_00_optim_states.pt | 3 + .../zero_pp_rank_0_mp_rank_00_model_states.pt | 3 + .../zero_pp_rank_1_mp_rank_00_model_states.pt | 3 + .../zero_pp_rank_2_mp_rank_00_model_states.pt | 3 + .../zero_pp_rank_3_mp_rank_00_model_states.pt | 3 + .../zero_pp_rank_4_mp_rank_00_model_states.pt | 3 + .../zero_pp_rank_5_mp_rank_00_model_states.pt | 3 + .../zero_pp_rank_6_mp_rank_00_model_states.pt | 3 + .../zero_pp_rank_7_mp_rank_00_model_states.pt | 3 + checkpoint-909/latest | 1 + checkpoint-909/model.safetensors | 3 + checkpoint-909/rng_state_0.pth | 3 + checkpoint-909/rng_state_1.pth | 3 + checkpoint-909/rng_state_2.pth | 3 + checkpoint-909/rng_state_3.pth | 3 + checkpoint-909/rng_state_4.pth | 3 + checkpoint-909/rng_state_5.pth | 3 + checkpoint-909/rng_state_6.pth | 3 + checkpoint-909/rng_state_7.pth | 3 + checkpoint-909/scheduler.pt | 3 + checkpoint-909/tokenizer.json | 3 + checkpoint-909/tokenizer_config.json | 30 + checkpoint-909/trainer_state.json | 6397 ++++++++++++++++ checkpoint-909/training_args.bin | 3 + checkpoint-909/zero_to_fp32.py | 760 ++ config.json | 71 + generation_config.json | 12 + model.safetensors | 3 + tokenizer.json | 3 + tokenizer_config.json | 30 + train_results.json | 8 + trainer_log.jsonl | 1119 +++ trainer_state.json | 6406 +++++++++++++++++ training_args.bin | 3 + training_loss.png | Bin 0 -> 40959 bytes 84 files changed, 19867 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 all_results.json create mode 100644 chat_template.jinja create mode 100644 checkpoint-500/chat_template.jinja create mode 100644 checkpoint-500/config.json create mode 100644 checkpoint-500/generation_config.json create mode 100644 checkpoint-500/global_step500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt create mode 100644 checkpoint-500/global_step500/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt create mode 100644 checkpoint-500/global_step500/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt create mode 100644 checkpoint-500/global_step500/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt create mode 100644 checkpoint-500/global_step500/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt create mode 100644 checkpoint-500/global_step500/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt create mode 100644 checkpoint-500/global_step500/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt create mode 100644 checkpoint-500/global_step500/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt create mode 100644 checkpoint-500/global_step500/zero_pp_rank_0_mp_rank_00_model_states.pt create mode 100644 checkpoint-500/global_step500/zero_pp_rank_1_mp_rank_00_model_states.pt create mode 100644 checkpoint-500/global_step500/zero_pp_rank_2_mp_rank_00_model_states.pt create mode 100644 checkpoint-500/global_step500/zero_pp_rank_3_mp_rank_00_model_states.pt create mode 100644 checkpoint-500/global_step500/zero_pp_rank_4_mp_rank_00_model_states.pt create mode 100644 checkpoint-500/global_step500/zero_pp_rank_5_mp_rank_00_model_states.pt create mode 100644 checkpoint-500/global_step500/zero_pp_rank_6_mp_rank_00_model_states.pt create mode 100644 checkpoint-500/global_step500/zero_pp_rank_7_mp_rank_00_model_states.pt create mode 100644 checkpoint-500/latest create mode 100644 checkpoint-500/model.safetensors create mode 100644 checkpoint-500/rng_state_0.pth create mode 100644 checkpoint-500/rng_state_1.pth create mode 100644 checkpoint-500/rng_state_2.pth create mode 100644 checkpoint-500/rng_state_3.pth create mode 100644 checkpoint-500/rng_state_4.pth create mode 100644 checkpoint-500/rng_state_5.pth create mode 100644 checkpoint-500/rng_state_6.pth create mode 100644 checkpoint-500/rng_state_7.pth create mode 100644 checkpoint-500/scheduler.pt create mode 100644 checkpoint-500/tokenizer.json create mode 100644 checkpoint-500/tokenizer_config.json create mode 100644 checkpoint-500/trainer_state.json create mode 100644 checkpoint-500/training_args.bin create mode 100644 checkpoint-500/zero_to_fp32.py create mode 100644 checkpoint-909/chat_template.jinja create mode 100644 checkpoint-909/config.json create mode 100644 checkpoint-909/generation_config.json create mode 100644 checkpoint-909/global_step909/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt create mode 100644 checkpoint-909/global_step909/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt create mode 100644 checkpoint-909/global_step909/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt create mode 100644 checkpoint-909/global_step909/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt create mode 100644 checkpoint-909/global_step909/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt create mode 100644 checkpoint-909/global_step909/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt create mode 100644 checkpoint-909/global_step909/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt create mode 100644 checkpoint-909/global_step909/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt create mode 100644 checkpoint-909/global_step909/zero_pp_rank_0_mp_rank_00_model_states.pt create mode 100644 checkpoint-909/global_step909/zero_pp_rank_1_mp_rank_00_model_states.pt create mode 100644 checkpoint-909/global_step909/zero_pp_rank_2_mp_rank_00_model_states.pt create mode 100644 checkpoint-909/global_step909/zero_pp_rank_3_mp_rank_00_model_states.pt create mode 100644 checkpoint-909/global_step909/zero_pp_rank_4_mp_rank_00_model_states.pt create mode 100644 checkpoint-909/global_step909/zero_pp_rank_5_mp_rank_00_model_states.pt create mode 100644 checkpoint-909/global_step909/zero_pp_rank_6_mp_rank_00_model_states.pt create mode 100644 checkpoint-909/global_step909/zero_pp_rank_7_mp_rank_00_model_states.pt create mode 100644 checkpoint-909/latest create mode 100644 checkpoint-909/model.safetensors create mode 100644 checkpoint-909/rng_state_0.pth create mode 100644 checkpoint-909/rng_state_1.pth create mode 100644 checkpoint-909/rng_state_2.pth create mode 100644 checkpoint-909/rng_state_3.pth create mode 100644 checkpoint-909/rng_state_4.pth create mode 100644 checkpoint-909/rng_state_5.pth create mode 100644 checkpoint-909/rng_state_6.pth create mode 100644 checkpoint-909/rng_state_7.pth create mode 100644 checkpoint-909/scheduler.pt create mode 100644 checkpoint-909/tokenizer.json create mode 100644 checkpoint-909/tokenizer_config.json create mode 100644 checkpoint-909/trainer_state.json create mode 100644 checkpoint-909/training_args.bin create mode 100644 checkpoint-909/zero_to_fp32.py create mode 100644 config.json create mode 100644 generation_config.json create mode 100644 model.safetensors create mode 100644 tokenizer.json create mode 100644 tokenizer_config.json create mode 100644 train_results.json create mode 100644 trainer_log.jsonl create mode 100644 trainer_state.json create mode 100644 training_args.bin create mode 100644 training_loss.png diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..a78b26b --- /dev/null +++ b/.gitattributes @@ -0,0 +1,38 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +checkpoint-500/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-909/tokenizer.json filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..2305038 --- /dev/null +++ b/README.md @@ -0,0 +1,61 @@ +--- +library_name: transformers +license: other +base_model: Qwen/Qwen3-4B-Thinking-2507 +tags: +- llama-factory +- full +- generated_from_trainer +model-index: +- name: trainer_output + results: [] +--- + + + +# trainer_output + +This model is a fine-tuned version of [Qwen/Qwen3-4B-Thinking-2507](https://huggingface.co/Qwen/Qwen3-4B-Thinking-2507) on the combined_reasoning_sft_lt30k dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 4e-05 +- train_batch_size: 8 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 8 +- gradient_accumulation_steps: 2 +- total_train_batch_size: 128 +- total_eval_batch_size: 64 +- optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_steps: 0.1 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- Transformers 5.2.0 +- Pytorch 2.11.0+cu130 +- Datasets 4.0.0 +- Tokenizers 0.22.2 diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..31ec503 --- /dev/null +++ b/all_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 3.0, + "total_flos": 1274755977576448.0, + "train_loss": 0.5495063810720958, + "train_runtime": 34913.8374, + "train_samples_per_second": 3.33, + "train_steps_per_second": 0.026 +} \ No newline at end of file diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..2e2f69c --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,86 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- messages[0].content + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('') and message.content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- if message.content is string %} + {%- set content = message.content %} + {%- else %} + {%- set content = '' %} + {%- endif %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is string %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in content %} + {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- set content = content.split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {%- if loop.last or (not loop.last and reasoning_content) %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n\n' }} +{%- endif %} \ No newline at end of file diff --git a/checkpoint-500/chat_template.jinja b/checkpoint-500/chat_template.jinja new file mode 100644 index 0000000..2e2f69c --- /dev/null +++ b/checkpoint-500/chat_template.jinja @@ -0,0 +1,86 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- messages[0].content + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('') and message.content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- if message.content is string %} + {%- set content = message.content %} + {%- else %} + {%- set content = '' %} + {%- endif %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is string %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in content %} + {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- set content = content.split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {%- if loop.last or (not loop.last and reasoning_content) %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n\n' }} +{%- endif %} \ No newline at end of file diff --git a/checkpoint-500/config.json b/checkpoint-500/config.json new file mode 100644 index 0000000..6dae724 --- /dev/null +++ b/checkpoint-500/config.json @@ -0,0 +1,71 @@ +{ + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": null, + "dtype": "bfloat16", + "eos_token_id": 151645, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 262144, + "max_window_layers": 36, + "model_type": "qwen3", + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "pad_token_id": 151643, + "rms_norm_eps": 1e-06, + "rope_parameters": { + "rope_theta": 5000000, + "rope_type": "default" + }, + "sliding_window": null, + "tie_word_embeddings": true, + "transformers_version": "5.2.0", + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/checkpoint-500/generation_config.json b/checkpoint-500/generation_config.json new file mode 100644 index 0000000..1701c94 --- /dev/null +++ b/checkpoint-500/generation_config.json @@ -0,0 +1,12 @@ +{ + "do_sample": true, + "eos_token_id": [ + 151645, + 151643 + ], + "pad_token_id": 151643, + "temperature": 0.6, + "top_k": 20, + "top_p": 0.95, + "transformers_version": "5.2.0" +} diff --git a/checkpoint-500/global_step500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/checkpoint-500/global_step500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000..8c7998d --- /dev/null +++ b/checkpoint-500/global_step500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12457718e1aa5245ffeebc8d6ae9e817a1eebef10b3c9cc7ea7594d22dae860d +size 6033707889 diff --git a/checkpoint-500/global_step500/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/checkpoint-500/global_step500/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000..1ac32c0 --- /dev/null +++ b/checkpoint-500/global_step500/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:639acb57300135a74c355bbd8de504121d78052d17099b1d69c4cadda98eb7b6 +size 6033707889 diff --git a/checkpoint-500/global_step500/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/checkpoint-500/global_step500/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000..e4c441d --- /dev/null +++ b/checkpoint-500/global_step500/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:994e817512738d5864e1fff17e11495b1bc9e27faefa5ad574480e55f8ada050 +size 6033707889 diff --git a/checkpoint-500/global_step500/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/checkpoint-500/global_step500/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000..8c6370c --- /dev/null +++ b/checkpoint-500/global_step500/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5ba3463b2654a19eecc4b8d53a9ecb44bef1a55e09ed7af537134b0d6a8ead1 +size 6033707889 diff --git a/checkpoint-500/global_step500/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/checkpoint-500/global_step500/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000..68b48cf --- /dev/null +++ b/checkpoint-500/global_step500/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d40bcc7c997b24f56df316de04ce8d89ed3ff58c521a7f7fb5d47b54e8531d23 +size 6033707889 diff --git a/checkpoint-500/global_step500/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/checkpoint-500/global_step500/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000..c853af9 --- /dev/null +++ b/checkpoint-500/global_step500/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c2caaf16f7db9452258df0d6cab89f9c7debb2a9714b7711a0e348a6d54996e +size 6033707889 diff --git a/checkpoint-500/global_step500/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/checkpoint-500/global_step500/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000..27495d1 --- /dev/null +++ b/checkpoint-500/global_step500/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e89b4b485e7348f77b351cf202157c2f4ba3bc7c636e0e3f214ccaba04a8173 +size 6033707889 diff --git a/checkpoint-500/global_step500/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/checkpoint-500/global_step500/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000..0641674 --- /dev/null +++ b/checkpoint-500/global_step500/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:feaad9d6abf019b6a325a4f81a923ee31cbc7cb2afd60fdc79323b3809f96b8c +size 6033707889 diff --git a/checkpoint-500/global_step500/zero_pp_rank_0_mp_rank_00_model_states.pt b/checkpoint-500/global_step500/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000..6a7a224 --- /dev/null +++ b/checkpoint-500/global_step500/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86dcaac8ed6dc4a5ebdf13f4d6b3829325b006bdd11599103c9d90ddd3eb67bd +size 200719 diff --git a/checkpoint-500/global_step500/zero_pp_rank_1_mp_rank_00_model_states.pt b/checkpoint-500/global_step500/zero_pp_rank_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000..9487a00 --- /dev/null +++ b/checkpoint-500/global_step500/zero_pp_rank_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61e0d3a3bc6e16551f8ac5aed92edccf2d5cb6fd01db5a0d37bf0a70ee09d7e4 +size 200655 diff --git a/checkpoint-500/global_step500/zero_pp_rank_2_mp_rank_00_model_states.pt b/checkpoint-500/global_step500/zero_pp_rank_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000..2eacb81 --- /dev/null +++ b/checkpoint-500/global_step500/zero_pp_rank_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384728fdf8521dcea3d3f1a7cb84aee101a7e27c837e4a9e32ff3d6cbda51465 +size 200655 diff --git a/checkpoint-500/global_step500/zero_pp_rank_3_mp_rank_00_model_states.pt b/checkpoint-500/global_step500/zero_pp_rank_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000..71b9f82 --- /dev/null +++ b/checkpoint-500/global_step500/zero_pp_rank_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:772f6bb659fab7da3e165101161711c04f2c50e6fdd3c8bb694718e81bb19a4d +size 200655 diff --git a/checkpoint-500/global_step500/zero_pp_rank_4_mp_rank_00_model_states.pt b/checkpoint-500/global_step500/zero_pp_rank_4_mp_rank_00_model_states.pt new file mode 100644 index 0000000..07f1225 --- /dev/null +++ b/checkpoint-500/global_step500/zero_pp_rank_4_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14904cc1edb394babe1a62ee284baf9650a641493febe723c0097588854499fd +size 200655 diff --git a/checkpoint-500/global_step500/zero_pp_rank_5_mp_rank_00_model_states.pt b/checkpoint-500/global_step500/zero_pp_rank_5_mp_rank_00_model_states.pt new file mode 100644 index 0000000..71d2f40 --- /dev/null +++ b/checkpoint-500/global_step500/zero_pp_rank_5_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3bb4f7d593c86077890d3b241e8bb23c9751e5784ed6e627faca2a3a0e30ed7 +size 200655 diff --git a/checkpoint-500/global_step500/zero_pp_rank_6_mp_rank_00_model_states.pt b/checkpoint-500/global_step500/zero_pp_rank_6_mp_rank_00_model_states.pt new file mode 100644 index 0000000..f618181 --- /dev/null +++ b/checkpoint-500/global_step500/zero_pp_rank_6_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:087c4bf8eb33d74388526751a7dc34d72a9c79b5f6c128c704c47d14a6e5e6fe +size 200655 diff --git a/checkpoint-500/global_step500/zero_pp_rank_7_mp_rank_00_model_states.pt b/checkpoint-500/global_step500/zero_pp_rank_7_mp_rank_00_model_states.pt new file mode 100644 index 0000000..d15b39d --- /dev/null +++ b/checkpoint-500/global_step500/zero_pp_rank_7_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a27738f4b59debaa01f51d4af01e3e05f33ccb1f9d7c6c52caf0cacd1077dee1 +size 200655 diff --git a/checkpoint-500/latest b/checkpoint-500/latest new file mode 100644 index 0000000..f0b47ce --- /dev/null +++ b/checkpoint-500/latest @@ -0,0 +1 @@ +global_step500 \ No newline at end of file diff --git a/checkpoint-500/model.safetensors b/checkpoint-500/model.safetensors new file mode 100644 index 0000000..b21a199 --- /dev/null +++ b/checkpoint-500/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3210c8c22625aa54f941c899cb2fc148432fbc5b0088fee75efea24c0c937de1 +size 8044982080 diff --git a/checkpoint-500/rng_state_0.pth b/checkpoint-500/rng_state_0.pth new file mode 100644 index 0000000..059b96b --- /dev/null +++ b/checkpoint-500/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bfe1981024ef92f2da08a90c72c7c793d1cc9de1547abd2556c968be70232eb +size 16389 diff --git a/checkpoint-500/rng_state_1.pth b/checkpoint-500/rng_state_1.pth new file mode 100644 index 0000000..69a4018 --- /dev/null +++ b/checkpoint-500/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a35b845d476d830805793c3dcf8ac2daad87fec289bff3f7eda9e72fc374eda1 +size 16389 diff --git a/checkpoint-500/rng_state_2.pth b/checkpoint-500/rng_state_2.pth new file mode 100644 index 0000000..7e33ffe --- /dev/null +++ b/checkpoint-500/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03e9880996b01262a807d1ec3ebd91eee540e08130a14a45a4648731fd0d48a9 +size 16389 diff --git a/checkpoint-500/rng_state_3.pth b/checkpoint-500/rng_state_3.pth new file mode 100644 index 0000000..fedd64c --- /dev/null +++ b/checkpoint-500/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee25c237d6fe62ec76adcf7daf899d7ed32eab5d1a5b447b911f4451c9a1b258 +size 16389 diff --git a/checkpoint-500/rng_state_4.pth b/checkpoint-500/rng_state_4.pth new file mode 100644 index 0000000..b03eafe --- /dev/null +++ b/checkpoint-500/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a6b31133f29a8fc0cb538aa807d6a403bd51939336bfd425cd3d122d8c5595c +size 16389 diff --git a/checkpoint-500/rng_state_5.pth b/checkpoint-500/rng_state_5.pth new file mode 100644 index 0000000..bbfc582 --- /dev/null +++ b/checkpoint-500/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a26c55b5c7fa0522b1d27b2c00a7ea77ad010f19a1321991165c5c972b8fa97a +size 16389 diff --git a/checkpoint-500/rng_state_6.pth b/checkpoint-500/rng_state_6.pth new file mode 100644 index 0000000..d2bc738 --- /dev/null +++ b/checkpoint-500/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a1a3cf85626196804f25a8293e22dc561bba068a70fb123e04afe4896c33972 +size 16389 diff --git a/checkpoint-500/rng_state_7.pth b/checkpoint-500/rng_state_7.pth new file mode 100644 index 0000000..45026a0 --- /dev/null +++ b/checkpoint-500/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28f87c1ee5f5db346c7b913137cbccd196eaf8ec5a4cf9f192418a3069269b49 +size 16389 diff --git a/checkpoint-500/scheduler.pt b/checkpoint-500/scheduler.pt new file mode 100644 index 0000000..1822113 --- /dev/null +++ b/checkpoint-500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cfe667749f7ab3de45a046bca34fd8ea28bbd9fa9b32f20f0c5552400f31c39 +size 1465 diff --git a/checkpoint-500/tokenizer.json b/checkpoint-500/tokenizer.json new file mode 100644 index 0000000..c7afbed --- /dev/null +++ b/checkpoint-500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/checkpoint-500/tokenizer_config.json b/checkpoint-500/tokenizer_config.json new file mode 100644 index 0000000..79dfc69 --- /dev/null +++ b/checkpoint-500/tokenizer_config.json @@ -0,0 +1,30 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 262144, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/checkpoint-500/trainer_state.json b/checkpoint-500/trainer_state.json new file mode 100644 index 0000000..b5f2d9c --- /dev/null +++ b/checkpoint-500/trainer_state.json @@ -0,0 +1,3534 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.6501650165016502, + "eval_steps": 500, + "global_step": 500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0033003300330033004, + "grad_norm": 10.81499361768409, + "learning_rate": 0.0, + "loss": 1.2079360485076904, + "step": 1 + }, + { + "epoch": 0.006600660066006601, + "grad_norm": 10.226770877445293, + "learning_rate": 4.395604395604396e-07, + "loss": 1.123347520828247, + "step": 2 + }, + { + "epoch": 0.009900990099009901, + "grad_norm": 11.292644267807786, + "learning_rate": 8.791208791208792e-07, + "loss": 1.261695384979248, + "step": 3 + }, + { + "epoch": 0.013201320132013201, + "grad_norm": 10.504638106263508, + "learning_rate": 1.3186813186813187e-06, + "loss": 1.1276888847351074, + "step": 4 + }, + { + "epoch": 0.0165016501650165, + "grad_norm": 10.822100601159539, + "learning_rate": 1.7582417582417585e-06, + "loss": 1.2254480123519897, + "step": 5 + }, + { + "epoch": 0.019801980198019802, + "grad_norm": 9.905516433474448, + "learning_rate": 2.197802197802198e-06, + "loss": 1.1809396743774414, + "step": 6 + }, + { + "epoch": 0.0231023102310231, + "grad_norm": 9.323364829402967, + "learning_rate": 2.6373626373626375e-06, + "loss": 1.2000095844268799, + "step": 7 + }, + { + "epoch": 0.026402640264026403, + "grad_norm": 6.706098746162178, + "learning_rate": 3.0769230769230774e-06, + "loss": 1.0248074531555176, + "step": 8 + }, + { + "epoch": 0.0297029702970297, + "grad_norm": 5.761138380327878, + "learning_rate": 3.516483516483517e-06, + "loss": 1.0840561389923096, + "step": 9 + }, + { + "epoch": 0.033003300330033, + "grad_norm": 2.7364343552329315, + "learning_rate": 3.9560439560439565e-06, + "loss": 0.955639123916626, + "step": 10 + }, + { + "epoch": 0.036303630363036306, + "grad_norm": 2.113810438625661, + "learning_rate": 4.395604395604396e-06, + "loss": 0.9281604290008545, + "step": 11 + }, + { + "epoch": 0.039603960396039604, + "grad_norm": 1.849238684536393, + "learning_rate": 4.8351648351648355e-06, + "loss": 0.9079018831253052, + "step": 12 + }, + { + "epoch": 0.0429042904290429, + "grad_norm": 1.6747171029255208, + "learning_rate": 5.274725274725275e-06, + "loss": 0.9039217233657837, + "step": 13 + }, + { + "epoch": 0.0462046204620462, + "grad_norm": 2.0121666555693416, + "learning_rate": 5.7142857142857145e-06, + "loss": 0.8910936117172241, + "step": 14 + }, + { + "epoch": 0.04950495049504951, + "grad_norm": 2.0600124028897526, + "learning_rate": 6.153846153846155e-06, + "loss": 0.895532488822937, + "step": 15 + }, + { + "epoch": 0.052805280528052806, + "grad_norm": 2.0613449368510044, + "learning_rate": 6.5934065934065935e-06, + "loss": 0.8889240622520447, + "step": 16 + }, + { + "epoch": 0.056105610561056105, + "grad_norm": 1.785450637059245, + "learning_rate": 7.032967032967034e-06, + "loss": 0.8499570488929749, + "step": 17 + }, + { + "epoch": 0.0594059405940594, + "grad_norm": 1.5894161631201256, + "learning_rate": 7.472527472527473e-06, + "loss": 0.839992105960846, + "step": 18 + }, + { + "epoch": 0.0627062706270627, + "grad_norm": 1.1904834264503976, + "learning_rate": 7.912087912087913e-06, + "loss": 0.7718420028686523, + "step": 19 + }, + { + "epoch": 0.066006600660066, + "grad_norm": 1.0397335564670163, + "learning_rate": 8.351648351648353e-06, + "loss": 0.7865867614746094, + "step": 20 + }, + { + "epoch": 0.06930693069306931, + "grad_norm": 0.8314739102256958, + "learning_rate": 8.791208791208792e-06, + "loss": 0.7982739806175232, + "step": 21 + }, + { + "epoch": 0.07260726072607261, + "grad_norm": 0.6542597896181986, + "learning_rate": 9.230769230769232e-06, + "loss": 0.7846421599388123, + "step": 22 + }, + { + "epoch": 0.07590759075907591, + "grad_norm": 0.6269389928815381, + "learning_rate": 9.670329670329671e-06, + "loss": 0.7005743980407715, + "step": 23 + }, + { + "epoch": 0.07920792079207921, + "grad_norm": 0.6603922634859757, + "learning_rate": 1.010989010989011e-05, + "loss": 0.7084314227104187, + "step": 24 + }, + { + "epoch": 0.08250825082508251, + "grad_norm": 0.6856248928818359, + "learning_rate": 1.054945054945055e-05, + "loss": 0.7310304641723633, + "step": 25 + }, + { + "epoch": 0.0858085808580858, + "grad_norm": 0.5728331825854258, + "learning_rate": 1.098901098901099e-05, + "loss": 0.7056888341903687, + "step": 26 + }, + { + "epoch": 0.0891089108910891, + "grad_norm": 0.47956485465857923, + "learning_rate": 1.1428571428571429e-05, + "loss": 0.6987950205802917, + "step": 27 + }, + { + "epoch": 0.0924092409240924, + "grad_norm": 0.47407141179043555, + "learning_rate": 1.186813186813187e-05, + "loss": 0.7319807410240173, + "step": 28 + }, + { + "epoch": 0.09570957095709572, + "grad_norm": 0.4856924244101555, + "learning_rate": 1.230769230769231e-05, + "loss": 0.6983063220977783, + "step": 29 + }, + { + "epoch": 0.09900990099009901, + "grad_norm": 0.49122925908544063, + "learning_rate": 1.2747252747252747e-05, + "loss": 0.70492023229599, + "step": 30 + }, + { + "epoch": 0.10231023102310231, + "grad_norm": 0.4556788168903923, + "learning_rate": 1.3186813186813187e-05, + "loss": 0.7376629114151001, + "step": 31 + }, + { + "epoch": 0.10561056105610561, + "grad_norm": 0.4272838300827657, + "learning_rate": 1.3626373626373627e-05, + "loss": 0.6623936295509338, + "step": 32 + }, + { + "epoch": 0.10891089108910891, + "grad_norm": 0.40886227927218277, + "learning_rate": 1.4065934065934068e-05, + "loss": 0.7136330604553223, + "step": 33 + }, + { + "epoch": 0.11221122112211221, + "grad_norm": 0.37821179606418975, + "learning_rate": 1.4505494505494506e-05, + "loss": 0.7113747596740723, + "step": 34 + }, + { + "epoch": 0.11551155115511551, + "grad_norm": 0.4538557716923258, + "learning_rate": 1.4945054945054947e-05, + "loss": 0.8252867460250854, + "step": 35 + }, + { + "epoch": 0.1188118811881188, + "grad_norm": 0.3875808052898815, + "learning_rate": 1.5384615384615387e-05, + "loss": 0.7406599521636963, + "step": 36 + }, + { + "epoch": 0.12211221122112212, + "grad_norm": 0.3503240143986989, + "learning_rate": 1.5824175824175826e-05, + "loss": 0.6572297811508179, + "step": 37 + }, + { + "epoch": 0.1254125412541254, + "grad_norm": 0.3779655372487014, + "learning_rate": 1.6263736263736265e-05, + "loss": 0.7520949840545654, + "step": 38 + }, + { + "epoch": 0.12871287128712872, + "grad_norm": 0.36968690038350466, + "learning_rate": 1.6703296703296707e-05, + "loss": 0.6861323118209839, + "step": 39 + }, + { + "epoch": 0.132013201320132, + "grad_norm": 0.3724328241107235, + "learning_rate": 1.7142857142857142e-05, + "loss": 0.6818518042564392, + "step": 40 + }, + { + "epoch": 0.1353135313531353, + "grad_norm": 0.35542054984937593, + "learning_rate": 1.7582417582417584e-05, + "loss": 0.6663186550140381, + "step": 41 + }, + { + "epoch": 0.13861386138613863, + "grad_norm": 0.3441266617586836, + "learning_rate": 1.8021978021978023e-05, + "loss": 0.6492191553115845, + "step": 42 + }, + { + "epoch": 0.1419141914191419, + "grad_norm": 0.3478448092762331, + "learning_rate": 1.8461538461538465e-05, + "loss": 0.6444741487503052, + "step": 43 + }, + { + "epoch": 0.14521452145214522, + "grad_norm": 0.34951148057960574, + "learning_rate": 1.8901098901098903e-05, + "loss": 0.6476814150810242, + "step": 44 + }, + { + "epoch": 0.1485148514851485, + "grad_norm": 0.3356672452160599, + "learning_rate": 1.9340659340659342e-05, + "loss": 0.6660827994346619, + "step": 45 + }, + { + "epoch": 0.15181518151815182, + "grad_norm": 0.30809956365723695, + "learning_rate": 1.9780219780219784e-05, + "loss": 0.6924091577529907, + "step": 46 + }, + { + "epoch": 0.1551155115511551, + "grad_norm": 0.9030699054312887, + "learning_rate": 2.021978021978022e-05, + "loss": 0.6899605989456177, + "step": 47 + }, + { + "epoch": 0.15841584158415842, + "grad_norm": 0.35784060194946976, + "learning_rate": 2.0659340659340665e-05, + "loss": 0.7242028713226318, + "step": 48 + }, + { + "epoch": 0.1617161716171617, + "grad_norm": 0.3093966721093651, + "learning_rate": 2.10989010989011e-05, + "loss": 0.6203902959823608, + "step": 49 + }, + { + "epoch": 0.16501650165016502, + "grad_norm": 0.4242705872636108, + "learning_rate": 2.153846153846154e-05, + "loss": 0.6420010328292847, + "step": 50 + }, + { + "epoch": 0.16831683168316833, + "grad_norm": 0.35079960590346965, + "learning_rate": 2.197802197802198e-05, + "loss": 0.7517598867416382, + "step": 51 + }, + { + "epoch": 0.1716171617161716, + "grad_norm": 0.3078803790362521, + "learning_rate": 2.241758241758242e-05, + "loss": 0.6568161249160767, + "step": 52 + }, + { + "epoch": 0.17491749174917492, + "grad_norm": 0.34666662805484005, + "learning_rate": 2.2857142857142858e-05, + "loss": 0.7348504662513733, + "step": 53 + }, + { + "epoch": 0.1782178217821782, + "grad_norm": 0.302791415801781, + "learning_rate": 2.32967032967033e-05, + "loss": 0.6164949536323547, + "step": 54 + }, + { + "epoch": 0.18151815181518152, + "grad_norm": 0.33732756727763136, + "learning_rate": 2.373626373626374e-05, + "loss": 0.6505363583564758, + "step": 55 + }, + { + "epoch": 0.1848184818481848, + "grad_norm": 0.34780152362496847, + "learning_rate": 2.4175824175824177e-05, + "loss": 0.7562520503997803, + "step": 56 + }, + { + "epoch": 0.18811881188118812, + "grad_norm": 0.3310895358869482, + "learning_rate": 2.461538461538462e-05, + "loss": 0.6943148374557495, + "step": 57 + }, + { + "epoch": 0.19141914191419143, + "grad_norm": 0.3367877938063833, + "learning_rate": 2.5054945054945058e-05, + "loss": 0.6571655869483948, + "step": 58 + }, + { + "epoch": 0.19471947194719472, + "grad_norm": 0.32103256018771714, + "learning_rate": 2.5494505494505493e-05, + "loss": 0.7229321002960205, + "step": 59 + }, + { + "epoch": 0.19801980198019803, + "grad_norm": 0.30468399230672144, + "learning_rate": 2.593406593406594e-05, + "loss": 0.6307672262191772, + "step": 60 + }, + { + "epoch": 0.20132013201320131, + "grad_norm": 0.3282635121595526, + "learning_rate": 2.6373626373626374e-05, + "loss": 0.6336506009101868, + "step": 61 + }, + { + "epoch": 0.20462046204620463, + "grad_norm": 0.3280360563022675, + "learning_rate": 2.6813186813186813e-05, + "loss": 0.6492213010787964, + "step": 62 + }, + { + "epoch": 0.2079207920792079, + "grad_norm": 0.3292430577817229, + "learning_rate": 2.7252747252747255e-05, + "loss": 0.6763280034065247, + "step": 63 + }, + { + "epoch": 0.21122112211221122, + "grad_norm": 0.47832355846700536, + "learning_rate": 2.7692307692307694e-05, + "loss": 0.7322396039962769, + "step": 64 + }, + { + "epoch": 0.2145214521452145, + "grad_norm": 0.31915340164178446, + "learning_rate": 2.8131868131868136e-05, + "loss": 0.7080870270729065, + "step": 65 + }, + { + "epoch": 0.21782178217821782, + "grad_norm": 0.3227571040968621, + "learning_rate": 2.8571428571428574e-05, + "loss": 0.6054466962814331, + "step": 66 + }, + { + "epoch": 0.22112211221122113, + "grad_norm": 0.33375713186655664, + "learning_rate": 2.9010989010989013e-05, + "loss": 0.6782290935516357, + "step": 67 + }, + { + "epoch": 0.22442244224422442, + "grad_norm": 0.3437770801965916, + "learning_rate": 2.9450549450549455e-05, + "loss": 0.6804753541946411, + "step": 68 + }, + { + "epoch": 0.22772277227722773, + "grad_norm": 0.3228427319313703, + "learning_rate": 2.9890109890109894e-05, + "loss": 0.6493992805480957, + "step": 69 + }, + { + "epoch": 0.23102310231023102, + "grad_norm": 0.3540211756840673, + "learning_rate": 3.0329670329670332e-05, + "loss": 0.6263789534568787, + "step": 70 + }, + { + "epoch": 0.23432343234323433, + "grad_norm": 0.34989089824503405, + "learning_rate": 3.0769230769230774e-05, + "loss": 0.6960322856903076, + "step": 71 + }, + { + "epoch": 0.2376237623762376, + "grad_norm": 0.33624443163866324, + "learning_rate": 3.120879120879121e-05, + "loss": 0.6146604418754578, + "step": 72 + }, + { + "epoch": 0.24092409240924093, + "grad_norm": 0.39618402867027047, + "learning_rate": 3.164835164835165e-05, + "loss": 0.6361377239227295, + "step": 73 + }, + { + "epoch": 0.24422442244224424, + "grad_norm": 0.361603087273114, + "learning_rate": 3.2087912087912094e-05, + "loss": 0.636134147644043, + "step": 74 + }, + { + "epoch": 0.24752475247524752, + "grad_norm": 0.37985663132790304, + "learning_rate": 3.252747252747253e-05, + "loss": 0.5936564803123474, + "step": 75 + }, + { + "epoch": 0.2508250825082508, + "grad_norm": 0.35883234873646996, + "learning_rate": 3.296703296703297e-05, + "loss": 0.6001103520393372, + "step": 76 + }, + { + "epoch": 0.25412541254125415, + "grad_norm": 0.35227803701073973, + "learning_rate": 3.340659340659341e-05, + "loss": 0.6254594326019287, + "step": 77 + }, + { + "epoch": 0.25742574257425743, + "grad_norm": 0.3563257650896171, + "learning_rate": 3.384615384615385e-05, + "loss": 0.6457959413528442, + "step": 78 + }, + { + "epoch": 0.2607260726072607, + "grad_norm": 0.37234316340556584, + "learning_rate": 3.4285714285714284e-05, + "loss": 0.6186954975128174, + "step": 79 + }, + { + "epoch": 0.264026402640264, + "grad_norm": 0.35352748449766547, + "learning_rate": 3.4725274725274726e-05, + "loss": 0.6175529956817627, + "step": 80 + }, + { + "epoch": 0.26732673267326734, + "grad_norm": 0.35441369709658355, + "learning_rate": 3.516483516483517e-05, + "loss": 0.6694468259811401, + "step": 81 + }, + { + "epoch": 0.2706270627062706, + "grad_norm": 0.39955400784840756, + "learning_rate": 3.56043956043956e-05, + "loss": 0.627490222454071, + "step": 82 + }, + { + "epoch": 0.2739273927392739, + "grad_norm": 0.38314031523497477, + "learning_rate": 3.6043956043956045e-05, + "loss": 0.6410495638847351, + "step": 83 + }, + { + "epoch": 0.27722772277227725, + "grad_norm": 0.36926215386141575, + "learning_rate": 3.648351648351649e-05, + "loss": 0.6305102109909058, + "step": 84 + }, + { + "epoch": 0.28052805280528054, + "grad_norm": 0.38364118080284076, + "learning_rate": 3.692307692307693e-05, + "loss": 0.6558895111083984, + "step": 85 + }, + { + "epoch": 0.2838283828382838, + "grad_norm": 0.3370292682974053, + "learning_rate": 3.7362637362637365e-05, + "loss": 0.6029388308525085, + "step": 86 + }, + { + "epoch": 0.2871287128712871, + "grad_norm": 0.39541874871701704, + "learning_rate": 3.7802197802197807e-05, + "loss": 0.6551017761230469, + "step": 87 + }, + { + "epoch": 0.29042904290429045, + "grad_norm": 0.3629036550044273, + "learning_rate": 3.824175824175825e-05, + "loss": 0.6588809490203857, + "step": 88 + }, + { + "epoch": 0.29372937293729373, + "grad_norm": 0.37786447228212183, + "learning_rate": 3.8681318681318684e-05, + "loss": 0.614648699760437, + "step": 89 + }, + { + "epoch": 0.297029702970297, + "grad_norm": 0.42911861803278684, + "learning_rate": 3.9120879120879126e-05, + "loss": 0.7034356594085693, + "step": 90 + }, + { + "epoch": 0.30033003300330036, + "grad_norm": 0.3707184094312094, + "learning_rate": 3.956043956043957e-05, + "loss": 0.6908263564109802, + "step": 91 + }, + { + "epoch": 0.30363036303630364, + "grad_norm": 0.38262186656216063, + "learning_rate": 4e-05, + "loss": 0.6882215738296509, + "step": 92 + }, + { + "epoch": 0.3069306930693069, + "grad_norm": 0.3709464296309744, + "learning_rate": 3.999985249980169e-05, + "loss": 0.6377270221710205, + "step": 93 + }, + { + "epoch": 0.3102310231023102, + "grad_norm": 0.3412837406106036, + "learning_rate": 3.999941000138238e-05, + "loss": 0.6735270619392395, + "step": 94 + }, + { + "epoch": 0.31353135313531355, + "grad_norm": 0.40165192879996064, + "learning_rate": 3.999867251126893e-05, + "loss": 0.6934541463851929, + "step": 95 + }, + { + "epoch": 0.31683168316831684, + "grad_norm": 0.34707128601816045, + "learning_rate": 3.9997640040339335e-05, + "loss": 0.6367039084434509, + "step": 96 + }, + { + "epoch": 0.3201320132013201, + "grad_norm": 0.4268828113970776, + "learning_rate": 3.999631260382257e-05, + "loss": 0.6274522542953491, + "step": 97 + }, + { + "epoch": 0.3234323432343234, + "grad_norm": 0.454428833020686, + "learning_rate": 3.999469022129834e-05, + "loss": 0.5874066352844238, + "step": 98 + }, + { + "epoch": 0.32673267326732675, + "grad_norm": 0.4200675840489775, + "learning_rate": 3.9992772916696824e-05, + "loss": 0.6175942420959473, + "step": 99 + }, + { + "epoch": 0.33003300330033003, + "grad_norm": 0.3796321080056305, + "learning_rate": 3.99905607182983e-05, + "loss": 0.5625832080841064, + "step": 100 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.39108856096759403, + "learning_rate": 3.998805365873274e-05, + "loss": 0.6153020262718201, + "step": 101 + }, + { + "epoch": 0.33663366336633666, + "grad_norm": 0.3873560194436071, + "learning_rate": 3.998525177497932e-05, + "loss": 0.5585426092147827, + "step": 102 + }, + { + "epoch": 0.33993399339933994, + "grad_norm": 0.4084712106325698, + "learning_rate": 3.998215510836589e-05, + "loss": 0.6586359739303589, + "step": 103 + }, + { + "epoch": 0.3432343234323432, + "grad_norm": 0.4383246876899704, + "learning_rate": 3.997876370456833e-05, + "loss": 0.62096107006073, + "step": 104 + }, + { + "epoch": 0.3465346534653465, + "grad_norm": 0.4026893562706946, + "learning_rate": 3.997507761360993e-05, + "loss": 0.6059336066246033, + "step": 105 + }, + { + "epoch": 0.34983498349834985, + "grad_norm": 0.46586240044914223, + "learning_rate": 3.997109688986059e-05, + "loss": 0.617970883846283, + "step": 106 + }, + { + "epoch": 0.35313531353135313, + "grad_norm": 0.44949199032710474, + "learning_rate": 3.9966821592036066e-05, + "loss": 0.6453397274017334, + "step": 107 + }, + { + "epoch": 0.3564356435643564, + "grad_norm": 0.4794978158156406, + "learning_rate": 3.996225178319709e-05, + "loss": 0.6371763348579407, + "step": 108 + }, + { + "epoch": 0.35973597359735976, + "grad_norm": 0.4463512391721941, + "learning_rate": 3.9957387530748435e-05, + "loss": 0.5971124172210693, + "step": 109 + }, + { + "epoch": 0.36303630363036304, + "grad_norm": 0.368079413354641, + "learning_rate": 3.995222890643792e-05, + "loss": 0.5679532289505005, + "step": 110 + }, + { + "epoch": 0.36633663366336633, + "grad_norm": 0.43733705586285254, + "learning_rate": 3.9946775986355346e-05, + "loss": 0.5988069772720337, + "step": 111 + }, + { + "epoch": 0.3696369636963696, + "grad_norm": 0.38235582844960775, + "learning_rate": 3.994102885093141e-05, + "loss": 0.6352983713150024, + "step": 112 + }, + { + "epoch": 0.37293729372937295, + "grad_norm": 0.389837871286893, + "learning_rate": 3.993498758493646e-05, + "loss": 0.58957839012146, + "step": 113 + }, + { + "epoch": 0.37623762376237624, + "grad_norm": 0.40399856168911097, + "learning_rate": 3.992865227747929e-05, + "loss": 0.6396822929382324, + "step": 114 + }, + { + "epoch": 0.3795379537953795, + "grad_norm": 0.38891668976227123, + "learning_rate": 3.992202302200582e-05, + "loss": 0.6314754486083984, + "step": 115 + }, + { + "epoch": 0.38283828382838286, + "grad_norm": 0.4087528543828922, + "learning_rate": 3.991509991629769e-05, + "loss": 0.673650860786438, + "step": 116 + }, + { + "epoch": 0.38613861386138615, + "grad_norm": 0.36330054292020786, + "learning_rate": 3.990788306247085e-05, + "loss": 0.5813701152801514, + "step": 117 + }, + { + "epoch": 0.38943894389438943, + "grad_norm": 0.4247110332678589, + "learning_rate": 3.990037256697404e-05, + "loss": 0.6419334411621094, + "step": 118 + }, + { + "epoch": 0.3927392739273927, + "grad_norm": 0.4244126002071751, + "learning_rate": 3.989256854058721e-05, + "loss": 0.6319208145141602, + "step": 119 + }, + { + "epoch": 0.39603960396039606, + "grad_norm": 0.3651632933942853, + "learning_rate": 3.988447109841991e-05, + "loss": 0.5989845991134644, + "step": 120 + }, + { + "epoch": 0.39933993399339934, + "grad_norm": 0.393158353074077, + "learning_rate": 3.987608035990957e-05, + "loss": 0.5853303670883179, + "step": 121 + }, + { + "epoch": 0.40264026402640263, + "grad_norm": 0.35965233332276103, + "learning_rate": 3.986739644881975e-05, + "loss": 0.6115257143974304, + "step": 122 + }, + { + "epoch": 0.40594059405940597, + "grad_norm": 0.4252711474203845, + "learning_rate": 3.985841949323831e-05, + "loss": 0.6440504789352417, + "step": 123 + }, + { + "epoch": 0.40924092409240925, + "grad_norm": 0.5578797297271848, + "learning_rate": 3.984914962557553e-05, + "loss": 0.5765030384063721, + "step": 124 + }, + { + "epoch": 0.41254125412541254, + "grad_norm": 0.4362455029468141, + "learning_rate": 3.983958698256214e-05, + "loss": 0.6387556791305542, + "step": 125 + }, + { + "epoch": 0.4158415841584158, + "grad_norm": 0.39274811063076087, + "learning_rate": 3.98297317052473e-05, + "loss": 0.6263147592544556, + "step": 126 + }, + { + "epoch": 0.41914191419141916, + "grad_norm": 0.42682589637163704, + "learning_rate": 3.981958393899656e-05, + "loss": 0.6091845035552979, + "step": 127 + }, + { + "epoch": 0.42244224422442245, + "grad_norm": 0.4033131171538041, + "learning_rate": 3.980914383348967e-05, + "loss": 0.6458015441894531, + "step": 128 + }, + { + "epoch": 0.42574257425742573, + "grad_norm": 0.3881606915462862, + "learning_rate": 3.9798411542718395e-05, + "loss": 0.6115552186965942, + "step": 129 + }, + { + "epoch": 0.429042904290429, + "grad_norm": 0.38910317938225847, + "learning_rate": 3.978738722498423e-05, + "loss": 0.6427993774414062, + "step": 130 + }, + { + "epoch": 0.43234323432343236, + "grad_norm": 0.36836380096259913, + "learning_rate": 3.977607104289609e-05, + "loss": 0.6121467351913452, + "step": 131 + }, + { + "epoch": 0.43564356435643564, + "grad_norm": 0.3743062201629088, + "learning_rate": 3.9764463163367875e-05, + "loss": 0.5951442718505859, + "step": 132 + }, + { + "epoch": 0.4389438943894389, + "grad_norm": 0.3699746655092952, + "learning_rate": 3.9752563757616045e-05, + "loss": 0.6639472842216492, + "step": 133 + }, + { + "epoch": 0.44224422442244227, + "grad_norm": 0.37398919831188604, + "learning_rate": 3.974037300115706e-05, + "loss": 0.6084764003753662, + "step": 134 + }, + { + "epoch": 0.44554455445544555, + "grad_norm": 0.37043195153646374, + "learning_rate": 3.972789107380484e-05, + "loss": 0.6211085915565491, + "step": 135 + }, + { + "epoch": 0.44884488448844884, + "grad_norm": 0.3509837417375981, + "learning_rate": 3.9715118159668046e-05, + "loss": 0.6098147034645081, + "step": 136 + }, + { + "epoch": 0.4521452145214521, + "grad_norm": 0.3350785925775803, + "learning_rate": 3.970205444714742e-05, + "loss": 0.6155884861946106, + "step": 137 + }, + { + "epoch": 0.45544554455445546, + "grad_norm": 0.38529379761335925, + "learning_rate": 3.9688700128932975e-05, + "loss": 0.5984665155410767, + "step": 138 + }, + { + "epoch": 0.45874587458745875, + "grad_norm": 0.45130397769476205, + "learning_rate": 3.967505540200117e-05, + "loss": 0.6656880378723145, + "step": 139 + }, + { + "epoch": 0.46204620462046203, + "grad_norm": 0.3277874952439621, + "learning_rate": 3.966112046761201e-05, + "loss": 0.6607398390769958, + "step": 140 + }, + { + "epoch": 0.46534653465346537, + "grad_norm": 2.6727599644732267, + "learning_rate": 3.9646895531306046e-05, + "loss": 0.6578342914581299, + "step": 141 + }, + { + "epoch": 0.46864686468646866, + "grad_norm": 0.47429126269764676, + "learning_rate": 3.963238080290136e-05, + "loss": 0.6103699803352356, + "step": 142 + }, + { + "epoch": 0.47194719471947194, + "grad_norm": 0.32652590291724093, + "learning_rate": 3.96175764964905e-05, + "loss": 0.5484676957130432, + "step": 143 + }, + { + "epoch": 0.4752475247524752, + "grad_norm": 0.4531372955951849, + "learning_rate": 3.960248283043727e-05, + "loss": 0.578776478767395, + "step": 144 + }, + { + "epoch": 0.47854785478547857, + "grad_norm": 0.3685580706465372, + "learning_rate": 3.958710002737355e-05, + "loss": 0.6184446811676025, + "step": 145 + }, + { + "epoch": 0.48184818481848185, + "grad_norm": 0.3584005630962511, + "learning_rate": 3.9571428314195984e-05, + "loss": 0.6307916045188904, + "step": 146 + }, + { + "epoch": 0.48514851485148514, + "grad_norm": 0.4049679254542765, + "learning_rate": 3.955546792206265e-05, + "loss": 0.6064697504043579, + "step": 147 + }, + { + "epoch": 0.4884488448844885, + "grad_norm": 0.3846258995775384, + "learning_rate": 3.953921908638966e-05, + "loss": 0.6055655479431152, + "step": 148 + }, + { + "epoch": 0.49174917491749176, + "grad_norm": 0.3643318343315678, + "learning_rate": 3.952268204684765e-05, + "loss": 0.5856431126594543, + "step": 149 + }, + { + "epoch": 0.49504950495049505, + "grad_norm": 0.3854715521866927, + "learning_rate": 3.950585704735829e-05, + "loss": 0.6634635925292969, + "step": 150 + }, + { + "epoch": 0.49834983498349833, + "grad_norm": 0.34338835592304534, + "learning_rate": 3.948874433609065e-05, + "loss": 0.5880753397941589, + "step": 151 + }, + { + "epoch": 0.5016501650165016, + "grad_norm": 0.3481018111538647, + "learning_rate": 3.947134416545757e-05, + "loss": 0.5594221949577332, + "step": 152 + }, + { + "epoch": 0.504950495049505, + "grad_norm": 0.6570220882473125, + "learning_rate": 3.94536567921119e-05, + "loss": 0.664652407169342, + "step": 153 + }, + { + "epoch": 0.5082508250825083, + "grad_norm": 0.340048306266198, + "learning_rate": 3.9435682476942755e-05, + "loss": 0.6002815961837769, + "step": 154 + }, + { + "epoch": 0.5115511551155115, + "grad_norm": 0.3488682381523364, + "learning_rate": 3.941742148507163e-05, + "loss": 0.5905177593231201, + "step": 155 + }, + { + "epoch": 0.5148514851485149, + "grad_norm": 0.33062666453941425, + "learning_rate": 3.939887408584853e-05, + "loss": 0.5636795163154602, + "step": 156 + }, + { + "epoch": 0.5181518151815182, + "grad_norm": 0.35862086331061066, + "learning_rate": 3.938004055284796e-05, + "loss": 0.5639582276344299, + "step": 157 + }, + { + "epoch": 0.5214521452145214, + "grad_norm": 0.31769111173717246, + "learning_rate": 3.9360921163864895e-05, + "loss": 0.6515591144561768, + "step": 158 + }, + { + "epoch": 0.5247524752475248, + "grad_norm": 0.38401455820073427, + "learning_rate": 3.934151620091071e-05, + "loss": 0.5721683502197266, + "step": 159 + }, + { + "epoch": 0.528052805280528, + "grad_norm": 0.3284331200684813, + "learning_rate": 3.9321825950209e-05, + "loss": 0.5801802277565002, + "step": 160 + }, + { + "epoch": 0.5313531353135313, + "grad_norm": 0.3493998878359796, + "learning_rate": 3.9301850702191344e-05, + "loss": 0.603084921836853, + "step": 161 + }, + { + "epoch": 0.5346534653465347, + "grad_norm": 0.32233519110844616, + "learning_rate": 3.928159075149304e-05, + "loss": 0.6376925110816956, + "step": 162 + }, + { + "epoch": 0.5379537953795379, + "grad_norm": 0.35833134197704153, + "learning_rate": 3.926104639694877e-05, + "loss": 0.5764102935791016, + "step": 163 + }, + { + "epoch": 0.5412541254125413, + "grad_norm": 0.3523567199445224, + "learning_rate": 3.924021794158818e-05, + "loss": 0.6102188229560852, + "step": 164 + }, + { + "epoch": 0.5445544554455446, + "grad_norm": 0.36694222553878597, + "learning_rate": 3.921910569263139e-05, + "loss": 0.5833287835121155, + "step": 165 + }, + { + "epoch": 0.5478547854785478, + "grad_norm": 0.37179813198977807, + "learning_rate": 3.919770996148448e-05, + "loss": 0.5891385078430176, + "step": 166 + }, + { + "epoch": 0.5511551155115512, + "grad_norm": 0.3507301680001106, + "learning_rate": 3.917603106373493e-05, + "loss": 0.5838547348976135, + "step": 167 + }, + { + "epoch": 0.5544554455445545, + "grad_norm": 0.3134001311174479, + "learning_rate": 3.9154069319146904e-05, + "loss": 0.5727800726890564, + "step": 168 + }, + { + "epoch": 0.5577557755775577, + "grad_norm": 0.33531781904204605, + "learning_rate": 3.913182505165656e-05, + "loss": 0.6102641224861145, + "step": 169 + }, + { + "epoch": 0.5610561056105611, + "grad_norm": 0.35178976522027133, + "learning_rate": 3.91092985893673e-05, + "loss": 0.5718260407447815, + "step": 170 + }, + { + "epoch": 0.5643564356435643, + "grad_norm": 0.47006108726602863, + "learning_rate": 3.908649026454488e-05, + "loss": 0.6308504939079285, + "step": 171 + }, + { + "epoch": 0.5676567656765676, + "grad_norm": 0.3687514240026255, + "learning_rate": 3.906340041361255e-05, + "loss": 0.6089432835578918, + "step": 172 + }, + { + "epoch": 0.570957095709571, + "grad_norm": 0.3586674884704593, + "learning_rate": 3.904002937714606e-05, + "loss": 0.6583501696586609, + "step": 173 + }, + { + "epoch": 0.5742574257425742, + "grad_norm": 0.3399808047240735, + "learning_rate": 3.9016377499868666e-05, + "loss": 0.6108609437942505, + "step": 174 + }, + { + "epoch": 0.5775577557755776, + "grad_norm": 0.3840880337988826, + "learning_rate": 3.899244513064603e-05, + "loss": 0.63509202003479, + "step": 175 + }, + { + "epoch": 0.5808580858085809, + "grad_norm": 0.3725541644477348, + "learning_rate": 3.896823262248107e-05, + "loss": 0.5759241580963135, + "step": 176 + }, + { + "epoch": 0.5841584158415841, + "grad_norm": 0.30755721985114126, + "learning_rate": 3.8943740332508754e-05, + "loss": 0.6148169040679932, + "step": 177 + }, + { + "epoch": 0.5874587458745875, + "grad_norm": 0.3916756097057637, + "learning_rate": 3.891896862199086e-05, + "loss": 0.5266364216804504, + "step": 178 + }, + { + "epoch": 0.5907590759075908, + "grad_norm": 0.3417854779376455, + "learning_rate": 3.88939178563106e-05, + "loss": 0.5626640319824219, + "step": 179 + }, + { + "epoch": 0.594059405940594, + "grad_norm": 0.33526488525207704, + "learning_rate": 3.886858840496727e-05, + "loss": 0.6063880920410156, + "step": 180 + }, + { + "epoch": 0.5973597359735974, + "grad_norm": 0.37344333250119977, + "learning_rate": 3.884298064157077e-05, + "loss": 0.5979235768318176, + "step": 181 + }, + { + "epoch": 0.6006600660066007, + "grad_norm": 0.3835133271197793, + "learning_rate": 3.881709494383612e-05, + "loss": 0.6628611087799072, + "step": 182 + }, + { + "epoch": 0.6039603960396039, + "grad_norm": 0.4344526004756121, + "learning_rate": 3.879093169357789e-05, + "loss": 0.6215270757675171, + "step": 183 + }, + { + "epoch": 0.6072607260726073, + "grad_norm": 0.3644174435488244, + "learning_rate": 3.876449127670452e-05, + "loss": 0.6148592233657837, + "step": 184 + }, + { + "epoch": 0.6105610561056105, + "grad_norm": 0.3619226265536735, + "learning_rate": 3.87377740832127e-05, + "loss": 0.6254778504371643, + "step": 185 + }, + { + "epoch": 0.6138613861386139, + "grad_norm": 0.3492162593840536, + "learning_rate": 3.871078050718155e-05, + "loss": 0.6025378704071045, + "step": 186 + }, + { + "epoch": 0.6171617161716172, + "grad_norm": 0.3866924759539626, + "learning_rate": 3.8683510946766866e-05, + "loss": 0.5887518525123596, + "step": 187 + }, + { + "epoch": 0.6204620462046204, + "grad_norm": 0.3357229513721586, + "learning_rate": 3.865596580419519e-05, + "loss": 0.6180317401885986, + "step": 188 + }, + { + "epoch": 0.6237623762376238, + "grad_norm": 0.3594949077768003, + "learning_rate": 3.8628145485757925e-05, + "loss": 0.5970651507377625, + "step": 189 + }, + { + "epoch": 0.6270627062706271, + "grad_norm": 0.3496234009951303, + "learning_rate": 3.860005040180533e-05, + "loss": 0.6027296781539917, + "step": 190 + }, + { + "epoch": 0.6303630363036303, + "grad_norm": 0.3830042583584045, + "learning_rate": 3.857168096674044e-05, + "loss": 0.6326305270195007, + "step": 191 + }, + { + "epoch": 0.6336633663366337, + "grad_norm": 0.333508477943962, + "learning_rate": 3.854303759901299e-05, + "loss": 0.6508482694625854, + "step": 192 + }, + { + "epoch": 0.636963696369637, + "grad_norm": 0.352327105927571, + "learning_rate": 3.851412072111322e-05, + "loss": 0.6088548302650452, + "step": 193 + }, + { + "epoch": 0.6402640264026402, + "grad_norm": 0.36196379228138037, + "learning_rate": 3.8484930759565645e-05, + "loss": 0.5975607633590698, + "step": 194 + }, + { + "epoch": 0.6435643564356436, + "grad_norm": 0.3231664855297077, + "learning_rate": 3.845546814492279e-05, + "loss": 0.5467930436134338, + "step": 195 + }, + { + "epoch": 0.6468646864686468, + "grad_norm": 0.35556526722817444, + "learning_rate": 3.8425733311758795e-05, + "loss": 0.583969235420227, + "step": 196 + }, + { + "epoch": 0.6501650165016502, + "grad_norm": 0.331073543443887, + "learning_rate": 3.8395726698663045e-05, + "loss": 0.6007376909255981, + "step": 197 + }, + { + "epoch": 0.6534653465346535, + "grad_norm": 0.34786293006180385, + "learning_rate": 3.836544874823368e-05, + "loss": 0.5971908569335938, + "step": 198 + }, + { + "epoch": 0.6567656765676567, + "grad_norm": 0.3128647628132879, + "learning_rate": 3.8334899907071064e-05, + "loss": 0.592069685459137, + "step": 199 + }, + { + "epoch": 0.6600660066006601, + "grad_norm": 0.3308125796746202, + "learning_rate": 3.830408062577121e-05, + "loss": 0.6188071966171265, + "step": 200 + }, + { + "epoch": 0.6633663366336634, + "grad_norm": 0.34889077565364124, + "learning_rate": 3.827299135891913e-05, + "loss": 0.5976923704147339, + "step": 201 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.33443153994631497, + "learning_rate": 3.8241632565082124e-05, + "loss": 0.6120954155921936, + "step": 202 + }, + { + "epoch": 0.66996699669967, + "grad_norm": 0.3573334503206899, + "learning_rate": 3.821000470680303e-05, + "loss": 0.6661979556083679, + "step": 203 + }, + { + "epoch": 0.6732673267326733, + "grad_norm": 0.34662331225184934, + "learning_rate": 3.8178108250593384e-05, + "loss": 0.5853559970855713, + "step": 204 + }, + { + "epoch": 0.6765676567656765, + "grad_norm": 0.33823171869993424, + "learning_rate": 3.814594366692654e-05, + "loss": 0.6648768186569214, + "step": 205 + }, + { + "epoch": 0.6798679867986799, + "grad_norm": 0.4178878629038068, + "learning_rate": 3.8113511430230745e-05, + "loss": 0.5893838405609131, + "step": 206 + }, + { + "epoch": 0.6831683168316832, + "grad_norm": 0.36858896529016355, + "learning_rate": 3.808081201888214e-05, + "loss": 0.6177140474319458, + "step": 207 + }, + { + "epoch": 0.6864686468646864, + "grad_norm": 0.38061402245158527, + "learning_rate": 3.8047845915197695e-05, + "loss": 0.5793695449829102, + "step": 208 + }, + { + "epoch": 0.6897689768976898, + "grad_norm": 0.3591315376932048, + "learning_rate": 3.8014613605428084e-05, + "loss": 0.5571605563163757, + "step": 209 + }, + { + "epoch": 0.693069306930693, + "grad_norm": 0.33319862057164595, + "learning_rate": 3.798111557975053e-05, + "loss": 0.5945760011672974, + "step": 210 + }, + { + "epoch": 0.6963696369636964, + "grad_norm": 0.3495679574237745, + "learning_rate": 3.7947352332261586e-05, + "loss": 0.600873589515686, + "step": 211 + }, + { + "epoch": 0.6996699669966997, + "grad_norm": 0.37390147639764304, + "learning_rate": 3.791332436096983e-05, + "loss": 0.6234852075576782, + "step": 212 + }, + { + "epoch": 0.7029702970297029, + "grad_norm": 0.3571653694610809, + "learning_rate": 3.7879032167788494e-05, + "loss": 0.6129578948020935, + "step": 213 + }, + { + "epoch": 0.7062706270627063, + "grad_norm": 0.48971881906384135, + "learning_rate": 3.784447625852812e-05, + "loss": 0.6204475164413452, + "step": 214 + }, + { + "epoch": 0.7095709570957096, + "grad_norm": 0.3610294548812676, + "learning_rate": 3.780965714288905e-05, + "loss": 0.6734122037887573, + "step": 215 + }, + { + "epoch": 0.7128712871287128, + "grad_norm": 0.35396639697907356, + "learning_rate": 3.777457533445393e-05, + "loss": 0.5678560137748718, + "step": 216 + }, + { + "epoch": 0.7161716171617162, + "grad_norm": 0.3232076597831296, + "learning_rate": 3.7739231350680135e-05, + "loss": 0.5784683227539062, + "step": 217 + }, + { + "epoch": 0.7194719471947195, + "grad_norm": 0.3540897506756201, + "learning_rate": 3.7703625712892125e-05, + "loss": 0.6060354113578796, + "step": 218 + }, + { + "epoch": 0.7227722772277227, + "grad_norm": 0.35008278157890194, + "learning_rate": 3.766775894627376e-05, + "loss": 0.6248741745948792, + "step": 219 + }, + { + "epoch": 0.7260726072607261, + "grad_norm": 0.32018676747331787, + "learning_rate": 3.7631631579860553e-05, + "loss": 0.6014479398727417, + "step": 220 + }, + { + "epoch": 0.7293729372937293, + "grad_norm": 0.32068744744726313, + "learning_rate": 3.759524414653189e-05, + "loss": 0.6283233761787415, + "step": 221 + }, + { + "epoch": 0.7326732673267327, + "grad_norm": 0.3047460979670785, + "learning_rate": 3.755859718300313e-05, + "loss": 0.5710185766220093, + "step": 222 + }, + { + "epoch": 0.735973597359736, + "grad_norm": 0.34698489216212486, + "learning_rate": 3.75216912298177e-05, + "loss": 0.6007407903671265, + "step": 223 + }, + { + "epoch": 0.7392739273927392, + "grad_norm": 0.4952362221345831, + "learning_rate": 3.748452683133916e-05, + "loss": 0.6852575540542603, + "step": 224 + }, + { + "epoch": 0.7425742574257426, + "grad_norm": 0.32106680253004655, + "learning_rate": 3.7447104535743115e-05, + "loss": 0.6270833611488342, + "step": 225 + }, + { + "epoch": 0.7458745874587459, + "grad_norm": 0.30214814189665545, + "learning_rate": 3.740942489500916e-05, + "loss": 0.5925471782684326, + "step": 226 + }, + { + "epoch": 0.7491749174917491, + "grad_norm": 0.3171932777170319, + "learning_rate": 3.737148846491275e-05, + "loss": 0.573570728302002, + "step": 227 + }, + { + "epoch": 0.7524752475247525, + "grad_norm": 0.31480815810804524, + "learning_rate": 3.7333295805016986e-05, + "loss": 0.6088368892669678, + "step": 228 + }, + { + "epoch": 0.7557755775577558, + "grad_norm": 0.3103068539492526, + "learning_rate": 3.729484747866435e-05, + "loss": 0.5496470332145691, + "step": 229 + }, + { + "epoch": 0.759075907590759, + "grad_norm": 0.3007603199811456, + "learning_rate": 3.725614405296843e-05, + "loss": 0.6008220314979553, + "step": 230 + }, + { + "epoch": 0.7623762376237624, + "grad_norm": 0.3007492168191884, + "learning_rate": 3.721718609880551e-05, + "loss": 0.5982120037078857, + "step": 231 + }, + { + "epoch": 0.7656765676567657, + "grad_norm": 0.3010002181490163, + "learning_rate": 3.717797419080618e-05, + "loss": 0.6404559016227722, + "step": 232 + }, + { + "epoch": 0.768976897689769, + "grad_norm": 0.35604106645956024, + "learning_rate": 3.713850890734689e-05, + "loss": 0.5875239372253418, + "step": 233 + }, + { + "epoch": 0.7722772277227723, + "grad_norm": 0.33191901009333297, + "learning_rate": 3.709879083054133e-05, + "loss": 0.5962772369384766, + "step": 234 + }, + { + "epoch": 0.7755775577557755, + "grad_norm": 0.29418628627284477, + "learning_rate": 3.705882054623192e-05, + "loss": 0.5764110684394836, + "step": 235 + }, + { + "epoch": 0.7788778877887789, + "grad_norm": 0.30409612807603364, + "learning_rate": 3.7018598643981165e-05, + "loss": 0.5635858178138733, + "step": 236 + }, + { + "epoch": 0.7821782178217822, + "grad_norm": 0.3039645238556037, + "learning_rate": 3.69781257170629e-05, + "loss": 0.5880881547927856, + "step": 237 + }, + { + "epoch": 0.7854785478547854, + "grad_norm": 0.30606246597511416, + "learning_rate": 3.6937402362453606e-05, + "loss": 0.5644733905792236, + "step": 238 + }, + { + "epoch": 0.7887788778877888, + "grad_norm": 0.328325214152846, + "learning_rate": 3.689642918082358e-05, + "loss": 0.6431151032447815, + "step": 239 + }, + { + "epoch": 0.7920792079207921, + "grad_norm": 0.2863869456911102, + "learning_rate": 3.6855206776528055e-05, + "loss": 0.5848085880279541, + "step": 240 + }, + { + "epoch": 0.7953795379537953, + "grad_norm": 0.3169795193025283, + "learning_rate": 3.681373575759831e-05, + "loss": 0.590021550655365, + "step": 241 + }, + { + "epoch": 0.7986798679867987, + "grad_norm": 0.3630216059086489, + "learning_rate": 3.67720167357327e-05, + "loss": 0.6217919588088989, + "step": 242 + }, + { + "epoch": 0.801980198019802, + "grad_norm": 0.2999270957223198, + "learning_rate": 3.673005032628763e-05, + "loss": 0.6075180172920227, + "step": 243 + }, + { + "epoch": 0.8052805280528053, + "grad_norm": 0.35145967135780704, + "learning_rate": 3.668783714826846e-05, + "loss": 0.6078404188156128, + "step": 244 + }, + { + "epoch": 0.8085808580858086, + "grad_norm": 0.32650805345047657, + "learning_rate": 3.664537782432042e-05, + "loss": 0.6297526955604553, + "step": 245 + }, + { + "epoch": 0.8118811881188119, + "grad_norm": 0.32461322862254094, + "learning_rate": 3.660267298071936e-05, + "loss": 0.5684514045715332, + "step": 246 + }, + { + "epoch": 0.8151815181518152, + "grad_norm": 0.32171296221654416, + "learning_rate": 3.655972324736259e-05, + "loss": 0.6192148327827454, + "step": 247 + }, + { + "epoch": 0.8184818481848185, + "grad_norm": 0.3322336621503604, + "learning_rate": 3.6516529257759506e-05, + "loss": 0.5900243520736694, + "step": 248 + }, + { + "epoch": 0.8217821782178217, + "grad_norm": 0.35183312055445004, + "learning_rate": 3.6473091649022337e-05, + "loss": 0.5941751599311829, + "step": 249 + }, + { + "epoch": 0.8250825082508251, + "grad_norm": 0.31255833045908565, + "learning_rate": 3.6429411061856645e-05, + "loss": 0.5744310021400452, + "step": 250 + }, + { + "epoch": 0.8283828382838284, + "grad_norm": 0.3266269251233177, + "learning_rate": 3.6385488140551985e-05, + "loss": 0.5985124707221985, + "step": 251 + }, + { + "epoch": 0.8316831683168316, + "grad_norm": 0.30426711611593643, + "learning_rate": 3.6341323532972294e-05, + "loss": 0.581912636756897, + "step": 252 + }, + { + "epoch": 0.834983498349835, + "grad_norm": 0.3297819735063718, + "learning_rate": 3.629691789054643e-05, + "loss": 0.586786150932312, + "step": 253 + }, + { + "epoch": 0.8382838283828383, + "grad_norm": 0.3074133078124695, + "learning_rate": 3.625227186825848e-05, + "loss": 0.6312603950500488, + "step": 254 + }, + { + "epoch": 0.8415841584158416, + "grad_norm": 0.33007753969064285, + "learning_rate": 3.620738612463818e-05, + "loss": 0.5886626243591309, + "step": 255 + }, + { + "epoch": 0.8448844884488449, + "grad_norm": 0.31334340596765187, + "learning_rate": 3.6162261321751114e-05, + "loss": 0.5892266035079956, + "step": 256 + }, + { + "epoch": 0.8481848184818482, + "grad_norm": 0.31784442826893616, + "learning_rate": 3.6116898125189045e-05, + "loss": 0.5472115278244019, + "step": 257 + }, + { + "epoch": 0.8514851485148515, + "grad_norm": 0.3456330158902343, + "learning_rate": 3.6071297204059995e-05, + "loss": 0.5981796383857727, + "step": 258 + }, + { + "epoch": 0.8547854785478548, + "grad_norm": 0.3377124553034101, + "learning_rate": 3.6025459230978475e-05, + "loss": 0.6708342432975769, + "step": 259 + }, + { + "epoch": 0.858085808580858, + "grad_norm": 0.3081391395426973, + "learning_rate": 3.597938488205549e-05, + "loss": 0.6306079626083374, + "step": 260 + }, + { + "epoch": 0.8613861386138614, + "grad_norm": 0.3398583824115319, + "learning_rate": 3.59330748368886e-05, + "loss": 0.6098329424858093, + "step": 261 + }, + { + "epoch": 0.8646864686468647, + "grad_norm": 0.32878067719138626, + "learning_rate": 3.588652977855189e-05, + "loss": 0.5617724061012268, + "step": 262 + }, + { + "epoch": 0.8679867986798679, + "grad_norm": 0.34962664282188816, + "learning_rate": 3.58397503935859e-05, + "loss": 0.5780894756317139, + "step": 263 + }, + { + "epoch": 0.8712871287128713, + "grad_norm": 0.32665214019362204, + "learning_rate": 3.5792737371987477e-05, + "loss": 0.578921377658844, + "step": 264 + }, + { + "epoch": 0.8745874587458746, + "grad_norm": 0.36673188949709323, + "learning_rate": 3.574549140719962e-05, + "loss": 0.614944577217102, + "step": 265 + }, + { + "epoch": 0.8778877887788779, + "grad_norm": 0.3248666143164946, + "learning_rate": 3.569801319610125e-05, + "loss": 0.6269869208335876, + "step": 266 + }, + { + "epoch": 0.8811881188118812, + "grad_norm": 0.3338123662452596, + "learning_rate": 3.565030343899693e-05, + "loss": 0.6045581102371216, + "step": 267 + }, + { + "epoch": 0.8844884488448845, + "grad_norm": 0.31011600887091817, + "learning_rate": 3.5602362839606514e-05, + "loss": 0.5872907638549805, + "step": 268 + }, + { + "epoch": 0.8877887788778878, + "grad_norm": 0.31857062779594814, + "learning_rate": 3.55541921050548e-05, + "loss": 0.6283375024795532, + "step": 269 + }, + { + "epoch": 0.8910891089108911, + "grad_norm": 0.32445751859048455, + "learning_rate": 3.5505791945861076e-05, + "loss": 0.5747002363204956, + "step": 270 + }, + { + "epoch": 0.8943894389438944, + "grad_norm": 0.2923309334474062, + "learning_rate": 3.545716307592864e-05, + "loss": 0.6205827593803406, + "step": 271 + }, + { + "epoch": 0.8976897689768977, + "grad_norm": 0.43972579907455317, + "learning_rate": 3.54083062125343e-05, + "loss": 0.5987251400947571, + "step": 272 + }, + { + "epoch": 0.900990099009901, + "grad_norm": 0.33194286352506225, + "learning_rate": 3.535922207631776e-05, + "loss": 0.6275356411933899, + "step": 273 + }, + { + "epoch": 0.9042904290429042, + "grad_norm": 0.3408278730793354, + "learning_rate": 3.5309911391270996e-05, + "loss": 0.6097655892372131, + "step": 274 + }, + { + "epoch": 0.9075907590759076, + "grad_norm": 0.3441995699777348, + "learning_rate": 3.52603748847276e-05, + "loss": 0.544170618057251, + "step": 275 + }, + { + "epoch": 0.9108910891089109, + "grad_norm": 0.3034867763949278, + "learning_rate": 3.521061328735202e-05, + "loss": 0.5723366141319275, + "step": 276 + }, + { + "epoch": 0.9141914191419142, + "grad_norm": 0.3091145609625042, + "learning_rate": 3.516062733312879e-05, + "loss": 0.5801889896392822, + "step": 277 + }, + { + "epoch": 0.9174917491749175, + "grad_norm": 0.3532845546992122, + "learning_rate": 3.511041775935175e-05, + "loss": 0.5942766666412354, + "step": 278 + }, + { + "epoch": 0.9207920792079208, + "grad_norm": 0.3192035342587887, + "learning_rate": 3.50599853066131e-05, + "loss": 0.5604017972946167, + "step": 279 + }, + { + "epoch": 0.9240924092409241, + "grad_norm": 0.4475571406552253, + "learning_rate": 3.500933071879251e-05, + "loss": 0.6151460409164429, + "step": 280 + }, + { + "epoch": 0.9273927392739274, + "grad_norm": 0.30946498453996385, + "learning_rate": 3.495845474304616e-05, + "loss": 0.5854936838150024, + "step": 281 + }, + { + "epoch": 0.9306930693069307, + "grad_norm": 0.3188531409769719, + "learning_rate": 3.490735812979572e-05, + "loss": 0.5586672425270081, + "step": 282 + }, + { + "epoch": 0.933993399339934, + "grad_norm": 0.3250546549981712, + "learning_rate": 3.485604163271721e-05, + "loss": 0.578475832939148, + "step": 283 + }, + { + "epoch": 0.9372937293729373, + "grad_norm": 0.45030229248281484, + "learning_rate": 3.4804506008730015e-05, + "loss": 0.5236382484436035, + "step": 284 + }, + { + "epoch": 0.9405940594059405, + "grad_norm": 0.31677157675280776, + "learning_rate": 3.475275201798559e-05, + "loss": 0.5964822769165039, + "step": 285 + }, + { + "epoch": 0.9438943894389439, + "grad_norm": 0.3221519247617692, + "learning_rate": 3.4700780423856334e-05, + "loss": 0.5551598072052002, + "step": 286 + }, + { + "epoch": 0.9471947194719472, + "grad_norm": 0.31322506983838, + "learning_rate": 3.464859199292429e-05, + "loss": 0.6095103621482849, + "step": 287 + }, + { + "epoch": 0.9504950495049505, + "grad_norm": 0.33333701342858213, + "learning_rate": 3.4596187494969846e-05, + "loss": 0.5893416404724121, + "step": 288 + }, + { + "epoch": 0.9537953795379538, + "grad_norm": 0.31167002926986764, + "learning_rate": 3.454356770296039e-05, + "loss": 0.5992231965065002, + "step": 289 + }, + { + "epoch": 0.9570957095709571, + "grad_norm": 0.3407826991036566, + "learning_rate": 3.4490733393038895e-05, + "loss": 0.6071972250938416, + "step": 290 + }, + { + "epoch": 0.9603960396039604, + "grad_norm": 0.321397588262469, + "learning_rate": 3.443768534451248e-05, + "loss": 0.5836942195892334, + "step": 291 + }, + { + "epoch": 0.9636963696369637, + "grad_norm": 0.3596023570145339, + "learning_rate": 3.4384424339840916e-05, + "loss": 0.5707553625106812, + "step": 292 + }, + { + "epoch": 0.966996699669967, + "grad_norm": 0.326365753033755, + "learning_rate": 3.4330951164625075e-05, + "loss": 0.5883970260620117, + "step": 293 + }, + { + "epoch": 0.9702970297029703, + "grad_norm": 0.3276030981345682, + "learning_rate": 3.427726660759535e-05, + "loss": 0.6281589269638062, + "step": 294 + }, + { + "epoch": 0.9735973597359736, + "grad_norm": 0.3559560269123216, + "learning_rate": 3.422337146060003e-05, + "loss": 0.6641702651977539, + "step": 295 + }, + { + "epoch": 0.976897689768977, + "grad_norm": 0.34661891319338206, + "learning_rate": 3.4169266518593596e-05, + "loss": 0.6398966312408447, + "step": 296 + }, + { + "epoch": 0.9801980198019802, + "grad_norm": 0.3392015122860613, + "learning_rate": 3.411495257962501e-05, + "loss": 0.6376276016235352, + "step": 297 + }, + { + "epoch": 0.9834983498349835, + "grad_norm": 0.3454832175281825, + "learning_rate": 3.406043044482596e-05, + "loss": 0.648975133895874, + "step": 298 + }, + { + "epoch": 0.9867986798679867, + "grad_norm": 0.3284679145456545, + "learning_rate": 3.4005700918399016e-05, + "loss": 0.6201390624046326, + "step": 299 + }, + { + "epoch": 0.9900990099009901, + "grad_norm": 0.33000362479964457, + "learning_rate": 3.395076480760576e-05, + "loss": 0.6103875637054443, + "step": 300 + }, + { + "epoch": 0.9933993399339934, + "grad_norm": 0.31707924192462417, + "learning_rate": 3.3895622922754936e-05, + "loss": 0.5486876368522644, + "step": 301 + }, + { + "epoch": 0.9966996699669967, + "grad_norm": 0.3094164003933957, + "learning_rate": 3.384027607719043e-05, + "loss": 0.5980846285820007, + "step": 302 + }, + { + "epoch": 1.0, + "grad_norm": 0.33089398879681, + "learning_rate": 3.378472508727931e-05, + "loss": 0.5986801385879517, + "step": 303 + }, + { + "epoch": 1.0033003300330032, + "grad_norm": 0.4690060258405477, + "learning_rate": 3.372897077239979e-05, + "loss": 0.5586727857589722, + "step": 304 + }, + { + "epoch": 1.0066006600660067, + "grad_norm": 0.34686786747213394, + "learning_rate": 3.36730139549291e-05, + "loss": 0.5393255949020386, + "step": 305 + }, + { + "epoch": 1.00990099009901, + "grad_norm": 0.4023568892604613, + "learning_rate": 3.361685546023143e-05, + "loss": 0.5377227067947388, + "step": 306 + }, + { + "epoch": 1.0132013201320131, + "grad_norm": 0.39915820884177944, + "learning_rate": 3.356049611664568e-05, + "loss": 0.5223784446716309, + "step": 307 + }, + { + "epoch": 1.0165016501650166, + "grad_norm": 0.3654265250846575, + "learning_rate": 3.350393675547328e-05, + "loss": 0.5502469539642334, + "step": 308 + }, + { + "epoch": 1.0198019801980198, + "grad_norm": 0.42079557297663883, + "learning_rate": 3.3447178210965936e-05, + "loss": 0.5626603960990906, + "step": 309 + }, + { + "epoch": 1.023102310231023, + "grad_norm": 0.3684084639129366, + "learning_rate": 3.3390221320313303e-05, + "loss": 0.48262274265289307, + "step": 310 + }, + { + "epoch": 1.0264026402640265, + "grad_norm": 0.39908786063309193, + "learning_rate": 3.333306692363065e-05, + "loss": 0.5850967168807983, + "step": 311 + }, + { + "epoch": 1.0297029702970297, + "grad_norm": 0.44262876970078274, + "learning_rate": 3.3275715863946466e-05, + "loss": 0.5444281697273254, + "step": 312 + }, + { + "epoch": 1.033003300330033, + "grad_norm": 0.35239079669120155, + "learning_rate": 3.3218168987190004e-05, + "loss": 0.5329654216766357, + "step": 313 + }, + { + "epoch": 1.0363036303630364, + "grad_norm": 0.38499730860339404, + "learning_rate": 3.316042714217885e-05, + "loss": 0.5276832580566406, + "step": 314 + }, + { + "epoch": 1.0396039603960396, + "grad_norm": 0.3928937531164494, + "learning_rate": 3.310249118060636e-05, + "loss": 0.5344791412353516, + "step": 315 + }, + { + "epoch": 1.0429042904290429, + "grad_norm": 0.3466589226743573, + "learning_rate": 3.304436195702911e-05, + "loss": 0.5479785203933716, + "step": 316 + }, + { + "epoch": 1.046204620462046, + "grad_norm": 0.370325309360066, + "learning_rate": 3.298604032885431e-05, + "loss": 0.5223082900047302, + "step": 317 + }, + { + "epoch": 1.0495049504950495, + "grad_norm": 0.4271803134046634, + "learning_rate": 3.292752715632713e-05, + "loss": 0.5667799711227417, + "step": 318 + }, + { + "epoch": 1.0528052805280528, + "grad_norm": 0.33752277032768196, + "learning_rate": 3.2868823302518016e-05, + "loss": 0.5194317698478699, + "step": 319 + }, + { + "epoch": 1.056105610561056, + "grad_norm": 0.35801795115870316, + "learning_rate": 3.2809929633309985e-05, + "loss": 0.4911007285118103, + "step": 320 + }, + { + "epoch": 1.0594059405940595, + "grad_norm": 0.33819516112787196, + "learning_rate": 3.2750847017385826e-05, + "loss": 0.5269002914428711, + "step": 321 + }, + { + "epoch": 1.0627062706270627, + "grad_norm": 0.3280280196094967, + "learning_rate": 3.269157632621529e-05, + "loss": 0.5124789476394653, + "step": 322 + }, + { + "epoch": 1.066006600660066, + "grad_norm": 0.3841029677303286, + "learning_rate": 3.263211843404225e-05, + "loss": 0.5483890771865845, + "step": 323 + }, + { + "epoch": 1.0693069306930694, + "grad_norm": 0.348752311292252, + "learning_rate": 3.25724742178718e-05, + "loss": 0.5582579374313354, + "step": 324 + }, + { + "epoch": 1.0726072607260726, + "grad_norm": 0.3672218653955236, + "learning_rate": 3.2512644557457304e-05, + "loss": 0.5662975907325745, + "step": 325 + }, + { + "epoch": 1.0759075907590758, + "grad_norm": 0.339133227284404, + "learning_rate": 3.2452630335287445e-05, + "loss": 0.5502511858940125, + "step": 326 + }, + { + "epoch": 1.0792079207920793, + "grad_norm": 0.3607463939055526, + "learning_rate": 3.239243243657318e-05, + "loss": 0.5614978075027466, + "step": 327 + }, + { + "epoch": 1.0825082508250825, + "grad_norm": 0.3354690532522152, + "learning_rate": 3.233205174923472e-05, + "loss": 0.4828110635280609, + "step": 328 + }, + { + "epoch": 1.0858085808580857, + "grad_norm": 0.3296040603044689, + "learning_rate": 3.22714891638884e-05, + "loss": 0.5437847971916199, + "step": 329 + }, + { + "epoch": 1.0891089108910892, + "grad_norm": 0.3295415767468974, + "learning_rate": 3.221074557383355e-05, + "loss": 0.6240063309669495, + "step": 330 + }, + { + "epoch": 1.0924092409240924, + "grad_norm": 0.3032628226796708, + "learning_rate": 3.2149821875039325e-05, + "loss": 0.5435442328453064, + "step": 331 + }, + { + "epoch": 1.0957095709570956, + "grad_norm": 0.30875440813945676, + "learning_rate": 3.20887189661315e-05, + "loss": 0.5240401029586792, + "step": 332 + }, + { + "epoch": 1.099009900990099, + "grad_norm": 0.3043121620505056, + "learning_rate": 3.202743774837919e-05, + "loss": 0.5227692127227783, + "step": 333 + }, + { + "epoch": 1.1023102310231023, + "grad_norm": 0.3439754692795775, + "learning_rate": 3.196597912568157e-05, + "loss": 0.5607417821884155, + "step": 334 + }, + { + "epoch": 1.1056105610561056, + "grad_norm": 0.29691798670137787, + "learning_rate": 3.1904344004554536e-05, + "loss": 0.5607600808143616, + "step": 335 + }, + { + "epoch": 1.108910891089109, + "grad_norm": 0.32493088910689055, + "learning_rate": 3.184253329411737e-05, + "loss": 0.47135430574417114, + "step": 336 + }, + { + "epoch": 1.1122112211221122, + "grad_norm": 0.3202945703052858, + "learning_rate": 3.178054790607924e-05, + "loss": 0.5708764791488647, + "step": 337 + }, + { + "epoch": 1.1155115511551155, + "grad_norm": 0.3164605548495645, + "learning_rate": 3.1718388754725883e-05, + "loss": 0.5522497296333313, + "step": 338 + }, + { + "epoch": 1.118811881188119, + "grad_norm": 0.3449586600316318, + "learning_rate": 3.1656056756906e-05, + "loss": 0.5556532144546509, + "step": 339 + }, + { + "epoch": 1.1221122112211221, + "grad_norm": 0.3130025484639745, + "learning_rate": 3.1593552832017795e-05, + "loss": 0.5727676153182983, + "step": 340 + }, + { + "epoch": 1.1254125412541254, + "grad_norm": 0.3195703179740936, + "learning_rate": 3.153087790199541e-05, + "loss": 0.5131651759147644, + "step": 341 + }, + { + "epoch": 1.1287128712871288, + "grad_norm": 0.3191177264656739, + "learning_rate": 3.146803289129528e-05, + "loss": 0.5143063068389893, + "step": 342 + }, + { + "epoch": 1.132013201320132, + "grad_norm": 0.33398757419035885, + "learning_rate": 3.1405018726882595e-05, + "loss": 0.509161114692688, + "step": 343 + }, + { + "epoch": 1.1353135313531353, + "grad_norm": 0.33058725446313514, + "learning_rate": 3.13418363382175e-05, + "loss": 0.5213526487350464, + "step": 344 + }, + { + "epoch": 1.1386138613861387, + "grad_norm": 0.3226863318187914, + "learning_rate": 3.127848665724149e-05, + "loss": 0.5465434789657593, + "step": 345 + }, + { + "epoch": 1.141914191419142, + "grad_norm": 0.6179658385179007, + "learning_rate": 3.1214970618363626e-05, + "loss": 0.5342190265655518, + "step": 346 + }, + { + "epoch": 1.1452145214521452, + "grad_norm": 0.47777163001134637, + "learning_rate": 3.115128915844672e-05, + "loss": 0.541754424571991, + "step": 347 + }, + { + "epoch": 1.1485148514851484, + "grad_norm": 0.33931974771490697, + "learning_rate": 3.10874432167936e-05, + "loss": 0.5318331122398376, + "step": 348 + }, + { + "epoch": 1.1518151815181519, + "grad_norm": 0.32111740987941506, + "learning_rate": 3.1023433735133134e-05, + "loss": 0.4972509741783142, + "step": 349 + }, + { + "epoch": 1.155115511551155, + "grad_norm": 0.30074948382432587, + "learning_rate": 3.095926165760647e-05, + "loss": 0.5417294502258301, + "step": 350 + }, + { + "epoch": 1.1584158415841583, + "grad_norm": 0.3410522798436207, + "learning_rate": 3.089492793075302e-05, + "loss": 0.554945707321167, + "step": 351 + }, + { + "epoch": 1.1617161716171618, + "grad_norm": 0.3254774061643724, + "learning_rate": 3.083043350349653e-05, + "loss": 0.5204564929008484, + "step": 352 + }, + { + "epoch": 1.165016501650165, + "grad_norm": 0.3088402728006412, + "learning_rate": 3.076577932713108e-05, + "loss": 0.4856947064399719, + "step": 353 + }, + { + "epoch": 1.1683168316831682, + "grad_norm": 0.2896918095760776, + "learning_rate": 3.0700966355307055e-05, + "loss": 0.5269368886947632, + "step": 354 + }, + { + "epoch": 1.1716171617161717, + "grad_norm": 0.32747543865706225, + "learning_rate": 3.063599554401708e-05, + "loss": 0.5811939239501953, + "step": 355 + }, + { + "epoch": 1.174917491749175, + "grad_norm": 0.29324577597304957, + "learning_rate": 3.057086785158189e-05, + "loss": 0.5636904239654541, + "step": 356 + }, + { + "epoch": 1.1782178217821782, + "grad_norm": 0.31779620334412045, + "learning_rate": 3.050558423863626e-05, + "loss": 0.546089768409729, + "step": 357 + }, + { + "epoch": 1.1815181518151816, + "grad_norm": 0.3093045991582328, + "learning_rate": 3.0440145668114774e-05, + "loss": 0.5239901542663574, + "step": 358 + }, + { + "epoch": 1.1848184818481848, + "grad_norm": 0.31848934088179354, + "learning_rate": 3.0374553105237637e-05, + "loss": 0.5833466053009033, + "step": 359 + }, + { + "epoch": 1.188118811881188, + "grad_norm": 0.33803859097620154, + "learning_rate": 3.0308807517496456e-05, + "loss": 0.5060774087905884, + "step": 360 + }, + { + "epoch": 1.1914191419141915, + "grad_norm": 0.31145081064149094, + "learning_rate": 3.0242909874639953e-05, + "loss": 0.5164307355880737, + "step": 361 + }, + { + "epoch": 1.1947194719471947, + "grad_norm": 0.29765085452905116, + "learning_rate": 3.0176861148659672e-05, + "loss": 0.49949395656585693, + "step": 362 + }, + { + "epoch": 1.198019801980198, + "grad_norm": 0.3296486034239661, + "learning_rate": 3.0110662313775623e-05, + "loss": 0.5581181049346924, + "step": 363 + }, + { + "epoch": 1.2013201320132012, + "grad_norm": 0.3116631729941006, + "learning_rate": 3.0044314346421938e-05, + "loss": 0.5657376646995544, + "step": 364 + }, + { + "epoch": 1.2046204620462047, + "grad_norm": 0.33012695180790946, + "learning_rate": 2.9977818225232443e-05, + "loss": 0.5269935131072998, + "step": 365 + }, + { + "epoch": 1.2079207920792079, + "grad_norm": 0.31869984664933465, + "learning_rate": 2.991117493102626e-05, + "loss": 0.5385931730270386, + "step": 366 + }, + { + "epoch": 1.2112211221122111, + "grad_norm": 0.30491226427581125, + "learning_rate": 2.984438544679329e-05, + "loss": 0.5615143179893494, + "step": 367 + }, + { + "epoch": 1.2145214521452146, + "grad_norm": 0.32195999076013593, + "learning_rate": 2.9777450757679754e-05, + "loss": 0.5175333023071289, + "step": 368 + }, + { + "epoch": 1.2178217821782178, + "grad_norm": 0.30930257180361886, + "learning_rate": 2.971037185097364e-05, + "loss": 0.565494179725647, + "step": 369 + }, + { + "epoch": 1.221122112211221, + "grad_norm": 0.34237830645177886, + "learning_rate": 2.9643149716090146e-05, + "loss": 0.5519120693206787, + "step": 370 + }, + { + "epoch": 1.2244224422442245, + "grad_norm": 0.30959351563618437, + "learning_rate": 2.9575785344557114e-05, + "loss": 0.49374374747276306, + "step": 371 + }, + { + "epoch": 1.2277227722772277, + "grad_norm": 0.31310768619122714, + "learning_rate": 2.950827973000034e-05, + "loss": 0.5608875751495361, + "step": 372 + }, + { + "epoch": 1.231023102310231, + "grad_norm": 0.31986895424613543, + "learning_rate": 2.944063386812899e-05, + "loss": 0.5866271257400513, + "step": 373 + }, + { + "epoch": 1.2343234323432344, + "grad_norm": 0.3359900469491975, + "learning_rate": 2.9372848756720867e-05, + "loss": 0.5342913269996643, + "step": 374 + }, + { + "epoch": 1.2376237623762376, + "grad_norm": 0.2956484140793021, + "learning_rate": 2.9304925395607696e-05, + "loss": 0.5539537668228149, + "step": 375 + }, + { + "epoch": 1.2409240924092408, + "grad_norm": 0.3239136306261367, + "learning_rate": 2.9236864786660423e-05, + "loss": 0.5614147186279297, + "step": 376 + }, + { + "epoch": 1.2442244224422443, + "grad_norm": 0.3311932744032855, + "learning_rate": 2.9168667933774356e-05, + "loss": 0.46689367294311523, + "step": 377 + }, + { + "epoch": 1.2475247524752475, + "grad_norm": 0.3291299090174619, + "learning_rate": 2.910033584285444e-05, + "loss": 0.5383083820343018, + "step": 378 + }, + { + "epoch": 1.2508250825082508, + "grad_norm": 0.3013900588246958, + "learning_rate": 2.903186952180037e-05, + "loss": 0.5349752902984619, + "step": 379 + }, + { + "epoch": 1.2541254125412542, + "grad_norm": 0.3219145450840317, + "learning_rate": 2.8963269980491743e-05, + "loss": 0.5792303681373596, + "step": 380 + }, + { + "epoch": 1.2574257425742574, + "grad_norm": 0.2840550960191948, + "learning_rate": 2.8894538230773147e-05, + "loss": 0.524924099445343, + "step": 381 + }, + { + "epoch": 1.2607260726072607, + "grad_norm": 0.3172399675943548, + "learning_rate": 2.882567528643925e-05, + "loss": 0.5137406587600708, + "step": 382 + }, + { + "epoch": 1.2640264026402641, + "grad_norm": 0.2893676822687234, + "learning_rate": 2.8756682163219857e-05, + "loss": 0.5196574926376343, + "step": 383 + }, + { + "epoch": 1.2673267326732673, + "grad_norm": 0.31363904787626334, + "learning_rate": 2.8687559878764903e-05, + "loss": 0.585644006729126, + "step": 384 + }, + { + "epoch": 1.2706270627062706, + "grad_norm": 0.3310272877884813, + "learning_rate": 2.8618309452629445e-05, + "loss": 0.5973786115646362, + "step": 385 + }, + { + "epoch": 1.273927392739274, + "grad_norm": 0.3201222210217655, + "learning_rate": 2.854893190625865e-05, + "loss": 0.5909825563430786, + "step": 386 + }, + { + "epoch": 1.2772277227722773, + "grad_norm": 0.3507731714316878, + "learning_rate": 2.84794282629727e-05, + "loss": 0.5903690457344055, + "step": 387 + }, + { + "epoch": 1.2805280528052805, + "grad_norm": 0.31011243056320775, + "learning_rate": 2.840979954795171e-05, + "loss": 0.5316457152366638, + "step": 388 + }, + { + "epoch": 1.283828382838284, + "grad_norm": 0.32950464198309637, + "learning_rate": 2.8340046788220613e-05, + "loss": 0.5080389976501465, + "step": 389 + }, + { + "epoch": 1.2871287128712872, + "grad_norm": 0.37769184930606736, + "learning_rate": 2.8270171012633994e-05, + "loss": 0.6137889623641968, + "step": 390 + }, + { + "epoch": 1.2904290429042904, + "grad_norm": 0.34430823745531935, + "learning_rate": 2.8200173251860928e-05, + "loss": 0.5433805584907532, + "step": 391 + }, + { + "epoch": 1.2937293729372938, + "grad_norm": 0.356563736773021, + "learning_rate": 2.8130054538369775e-05, + "loss": 0.4965590834617615, + "step": 392 + }, + { + "epoch": 1.297029702970297, + "grad_norm": 0.29380923244218154, + "learning_rate": 2.805981590641295e-05, + "loss": 0.5361340045928955, + "step": 393 + }, + { + "epoch": 1.3003300330033003, + "grad_norm": 0.31403525376793245, + "learning_rate": 2.7989458392011678e-05, + "loss": 0.47011327743530273, + "step": 394 + }, + { + "epoch": 1.3036303630363038, + "grad_norm": 0.30710914438533876, + "learning_rate": 2.7918983032940666e-05, + "loss": 0.5893687605857849, + "step": 395 + }, + { + "epoch": 1.306930693069307, + "grad_norm": 0.3126943781985397, + "learning_rate": 2.7848390868712886e-05, + "loss": 0.5219327211380005, + "step": 396 + }, + { + "epoch": 1.3102310231023102, + "grad_norm": 0.35585146532127665, + "learning_rate": 2.7777682940564142e-05, + "loss": 0.5652155876159668, + "step": 397 + }, + { + "epoch": 1.3135313531353137, + "grad_norm": 0.41906023992763497, + "learning_rate": 2.7706860291437784e-05, + "loss": 0.5361950397491455, + "step": 398 + }, + { + "epoch": 1.316831683168317, + "grad_norm": 0.29071400108766793, + "learning_rate": 2.763592396596929e-05, + "loss": 0.5355206727981567, + "step": 399 + }, + { + "epoch": 1.3201320132013201, + "grad_norm": 0.298123677847084, + "learning_rate": 2.756487501047086e-05, + "loss": 0.5082858800888062, + "step": 400 + }, + { + "epoch": 1.3234323432343233, + "grad_norm": 0.3144050740212562, + "learning_rate": 2.7493714472916013e-05, + "loss": 0.5282934904098511, + "step": 401 + }, + { + "epoch": 1.3267326732673268, + "grad_norm": 0.29396121691648713, + "learning_rate": 2.7422443402924074e-05, + "loss": 0.5502887964248657, + "step": 402 + }, + { + "epoch": 1.33003300330033, + "grad_norm": 0.2854429234726643, + "learning_rate": 2.7351062851744747e-05, + "loss": 0.5374204516410828, + "step": 403 + }, + { + "epoch": 1.3333333333333333, + "grad_norm": 0.30308752538818784, + "learning_rate": 2.7279573872242574e-05, + "loss": 0.5602293014526367, + "step": 404 + }, + { + "epoch": 1.3366336633663367, + "grad_norm": 0.30975657746221447, + "learning_rate": 2.7207977518881418e-05, + "loss": 0.5321286916732788, + "step": 405 + }, + { + "epoch": 1.33993399339934, + "grad_norm": 0.28965457921713383, + "learning_rate": 2.713627484770892e-05, + "loss": 0.5523560047149658, + "step": 406 + }, + { + "epoch": 1.3432343234323432, + "grad_norm": 0.30598816879566076, + "learning_rate": 2.706446691634089e-05, + "loss": 0.47019705176353455, + "step": 407 + }, + { + "epoch": 1.3465346534653464, + "grad_norm": 0.2977261513860205, + "learning_rate": 2.6992554783945748e-05, + "loss": 0.540359616279602, + "step": 408 + }, + { + "epoch": 1.3498349834983498, + "grad_norm": 0.2845048826043699, + "learning_rate": 2.6920539511228874e-05, + "loss": 0.561464786529541, + "step": 409 + }, + { + "epoch": 1.353135313531353, + "grad_norm": 0.2939741197740927, + "learning_rate": 2.6848422160416956e-05, + "loss": 0.5429259538650513, + "step": 410 + }, + { + "epoch": 1.3564356435643563, + "grad_norm": 0.2968609589915083, + "learning_rate": 2.677620379524237e-05, + "loss": 0.5452640652656555, + "step": 411 + }, + { + "epoch": 1.3597359735973598, + "grad_norm": 0.28949363661635646, + "learning_rate": 2.670388548092741e-05, + "loss": 0.49627864360809326, + "step": 412 + }, + { + "epoch": 1.363036303630363, + "grad_norm": 0.328169978832012, + "learning_rate": 2.663146828416867e-05, + "loss": 0.5331633687019348, + "step": 413 + }, + { + "epoch": 1.3663366336633662, + "grad_norm": 0.2926434963884909, + "learning_rate": 2.6558953273121216e-05, + "loss": 0.5447151064872742, + "step": 414 + }, + { + "epoch": 1.3696369636963697, + "grad_norm": 0.2863360845432002, + "learning_rate": 2.648634151738292e-05, + "loss": 0.5467007160186768, + "step": 415 + }, + { + "epoch": 1.372937293729373, + "grad_norm": 0.33044933855099695, + "learning_rate": 2.6413634087978602e-05, + "loss": 0.5804279446601868, + "step": 416 + }, + { + "epoch": 1.3762376237623761, + "grad_norm": 0.29168904019746145, + "learning_rate": 2.63408320573443e-05, + "loss": 0.5323517322540283, + "step": 417 + }, + { + "epoch": 1.3795379537953796, + "grad_norm": 0.3046417110987717, + "learning_rate": 2.6267936499311402e-05, + "loss": 0.5452409982681274, + "step": 418 + }, + { + "epoch": 1.3828382838283828, + "grad_norm": 0.2878853361033164, + "learning_rate": 2.619494848909084e-05, + "loss": 0.4622665047645569, + "step": 419 + }, + { + "epoch": 1.386138613861386, + "grad_norm": 0.3129938954769346, + "learning_rate": 2.6121869103257206e-05, + "loss": 0.531772255897522, + "step": 420 + }, + { + "epoch": 1.3894389438943895, + "grad_norm": 0.3044320552061303, + "learning_rate": 2.6048699419732897e-05, + "loss": 0.519554853439331, + "step": 421 + }, + { + "epoch": 1.3927392739273927, + "grad_norm": 0.32616258357306027, + "learning_rate": 2.5975440517772187e-05, + "loss": 0.545585572719574, + "step": 422 + }, + { + "epoch": 1.396039603960396, + "grad_norm": 0.297995845019565, + "learning_rate": 2.5902093477945345e-05, + "loss": 0.5641547441482544, + "step": 423 + }, + { + "epoch": 1.3993399339933994, + "grad_norm": 0.28406971495281874, + "learning_rate": 2.5828659382122655e-05, + "loss": 0.5578028559684753, + "step": 424 + }, + { + "epoch": 1.4026402640264026, + "grad_norm": 0.35618435421860006, + "learning_rate": 2.5755139313458484e-05, + "loss": 0.5931404232978821, + "step": 425 + }, + { + "epoch": 1.4059405940594059, + "grad_norm": 0.3227282264542969, + "learning_rate": 2.5681534356375314e-05, + "loss": 0.5486891865730286, + "step": 426 + }, + { + "epoch": 1.4092409240924093, + "grad_norm": 0.31220449886262164, + "learning_rate": 2.5607845596547706e-05, + "loss": 0.5007671117782593, + "step": 427 + }, + { + "epoch": 1.4125412541254125, + "grad_norm": 0.2970377848116104, + "learning_rate": 2.5534074120886346e-05, + "loss": 0.5044519901275635, + "step": 428 + }, + { + "epoch": 1.4158415841584158, + "grad_norm": 0.30667327850480125, + "learning_rate": 2.5460221017521952e-05, + "loss": 0.5227789878845215, + "step": 429 + }, + { + "epoch": 1.4191419141914192, + "grad_norm": 0.2902458759439887, + "learning_rate": 2.538628737578926e-05, + "loss": 0.5530189871788025, + "step": 430 + }, + { + "epoch": 1.4224422442244224, + "grad_norm": 0.3114416510328153, + "learning_rate": 2.5312274286210966e-05, + "loss": 0.508142352104187, + "step": 431 + }, + { + "epoch": 1.4257425742574257, + "grad_norm": 0.30284970816559353, + "learning_rate": 2.523818284048159e-05, + "loss": 0.5497263669967651, + "step": 432 + }, + { + "epoch": 1.4290429042904291, + "grad_norm": 0.3619418905679721, + "learning_rate": 2.5164014131451443e-05, + "loss": 0.5477034449577332, + "step": 433 + }, + { + "epoch": 1.4323432343234324, + "grad_norm": 0.28668741491270383, + "learning_rate": 2.508976925311045e-05, + "loss": 0.5091728568077087, + "step": 434 + }, + { + "epoch": 1.4356435643564356, + "grad_norm": 0.2922234358135184, + "learning_rate": 2.501544930057203e-05, + "loss": 0.5022713541984558, + "step": 435 + }, + { + "epoch": 1.438943894389439, + "grad_norm": 0.29994035273286174, + "learning_rate": 2.494105537005697e-05, + "loss": 0.5401599407196045, + "step": 436 + }, + { + "epoch": 1.4422442244224423, + "grad_norm": 0.27863085551634303, + "learning_rate": 2.4866588558877208e-05, + "loss": 0.5632063150405884, + "step": 437 + }, + { + "epoch": 1.4455445544554455, + "grad_norm": 0.2968792338733857, + "learning_rate": 2.479204996541969e-05, + "loss": 0.552355170249939, + "step": 438 + }, + { + "epoch": 1.448844884488449, + "grad_norm": 0.3222205976590156, + "learning_rate": 2.4717440689130154e-05, + "loss": 0.5604996681213379, + "step": 439 + }, + { + "epoch": 1.4521452145214522, + "grad_norm": 0.2781451863798608, + "learning_rate": 2.4642761830496893e-05, + "loss": 0.4961245656013489, + "step": 440 + }, + { + "epoch": 1.4554455445544554, + "grad_norm": 0.3327533816855903, + "learning_rate": 2.4568014491034565e-05, + "loss": 0.5403590202331543, + "step": 441 + }, + { + "epoch": 1.4587458745874589, + "grad_norm": 0.2944499869326328, + "learning_rate": 2.4493199773267902e-05, + "loss": 0.4753378629684448, + "step": 442 + }, + { + "epoch": 1.462046204620462, + "grad_norm": 0.30936599048377306, + "learning_rate": 2.4418318780715477e-05, + "loss": 0.5125438570976257, + "step": 443 + }, + { + "epoch": 1.4653465346534653, + "grad_norm": 0.3047486735791836, + "learning_rate": 2.434337261787342e-05, + "loss": 0.5670269727706909, + "step": 444 + }, + { + "epoch": 1.4686468646864688, + "grad_norm": 0.3348418102837006, + "learning_rate": 2.426836239019911e-05, + "loss": 0.5538198947906494, + "step": 445 + }, + { + "epoch": 1.471947194719472, + "grad_norm": 0.2790312641462961, + "learning_rate": 2.4193289204094893e-05, + "loss": 0.5012328028678894, + "step": 446 + }, + { + "epoch": 1.4752475247524752, + "grad_norm": 0.30485310749783334, + "learning_rate": 2.4118154166891762e-05, + "loss": 0.538119912147522, + "step": 447 + }, + { + "epoch": 1.4785478547854787, + "grad_norm": 0.32398781026753815, + "learning_rate": 2.4042958386833003e-05, + "loss": 0.5252339839935303, + "step": 448 + }, + { + "epoch": 1.481848184818482, + "grad_norm": 0.326928536480608, + "learning_rate": 2.3967702973057853e-05, + "loss": 0.5367081761360168, + "step": 449 + }, + { + "epoch": 1.4851485148514851, + "grad_norm": 0.3044938562463835, + "learning_rate": 2.3892389035585167e-05, + "loss": 0.5091884136199951, + "step": 450 + }, + { + "epoch": 1.4884488448844886, + "grad_norm": 0.2897824690201277, + "learning_rate": 2.3817017685297016e-05, + "loss": 0.5079891681671143, + "step": 451 + }, + { + "epoch": 1.4917491749174918, + "grad_norm": 0.2966882318097961, + "learning_rate": 2.3741590033922313e-05, + "loss": 0.511939287185669, + "step": 452 + }, + { + "epoch": 1.495049504950495, + "grad_norm": 0.28797637565211376, + "learning_rate": 2.3666107194020404e-05, + "loss": 0.5070478916168213, + "step": 453 + }, + { + "epoch": 1.4983498349834983, + "grad_norm": 0.29050652670321586, + "learning_rate": 2.3590570278964682e-05, + "loss": 0.547492504119873, + "step": 454 + }, + { + "epoch": 1.5016501650165015, + "grad_norm": 0.311874965448668, + "learning_rate": 2.3514980402926132e-05, + "loss": 0.5386558771133423, + "step": 455 + }, + { + "epoch": 1.504950495049505, + "grad_norm": 0.26980126113979913, + "learning_rate": 2.3439338680856943e-05, + "loss": 0.48668172955513, + "step": 456 + }, + { + "epoch": 1.5082508250825084, + "grad_norm": 0.31689121328788056, + "learning_rate": 2.3363646228474002e-05, + "loss": 0.5497942566871643, + "step": 457 + }, + { + "epoch": 1.5115511551155114, + "grad_norm": 0.3648919358675907, + "learning_rate": 2.328790416224248e-05, + "loss": 0.5267748832702637, + "step": 458 + }, + { + "epoch": 1.5148514851485149, + "grad_norm": 0.3191029117024018, + "learning_rate": 2.3212113599359368e-05, + "loss": 0.5578982830047607, + "step": 459 + }, + { + "epoch": 1.5181518151815183, + "grad_norm": 0.30610891906133464, + "learning_rate": 2.3136275657736956e-05, + "loss": 0.5136545896530151, + "step": 460 + }, + { + "epoch": 1.5214521452145213, + "grad_norm": 0.28466532575384307, + "learning_rate": 2.3060391455986403e-05, + "loss": 0.5718669891357422, + "step": 461 + }, + { + "epoch": 1.5247524752475248, + "grad_norm": 0.3064265170567389, + "learning_rate": 2.2984462113401184e-05, + "loss": 0.5427108407020569, + "step": 462 + }, + { + "epoch": 1.528052805280528, + "grad_norm": 0.28495826208338726, + "learning_rate": 2.2908488749940596e-05, + "loss": 0.5293564200401306, + "step": 463 + }, + { + "epoch": 1.5313531353135312, + "grad_norm": 0.3073240786964915, + "learning_rate": 2.2832472486213275e-05, + "loss": 0.550743579864502, + "step": 464 + }, + { + "epoch": 1.5346534653465347, + "grad_norm": 0.30789089349395116, + "learning_rate": 2.2756414443460602e-05, + "loss": 0.5957387685775757, + "step": 465 + }, + { + "epoch": 1.537953795379538, + "grad_norm": 0.2840660845057486, + "learning_rate": 2.2680315743540234e-05, + "loss": 0.4994407892227173, + "step": 466 + }, + { + "epoch": 1.5412541254125411, + "grad_norm": 0.2912314912557071, + "learning_rate": 2.260417750890949e-05, + "loss": 0.5120857954025269, + "step": 467 + }, + { + "epoch": 1.5445544554455446, + "grad_norm": 0.3024618438133355, + "learning_rate": 2.2528000862608845e-05, + "loss": 0.5727359056472778, + "step": 468 + }, + { + "epoch": 1.5478547854785478, + "grad_norm": 0.30379584493476613, + "learning_rate": 2.2451786928245344e-05, + "loss": 0.584964394569397, + "step": 469 + }, + { + "epoch": 1.551155115511551, + "grad_norm": 0.2782374360382863, + "learning_rate": 2.237553682997603e-05, + "loss": 0.5507112741470337, + "step": 470 + }, + { + "epoch": 1.5544554455445545, + "grad_norm": 0.26333814455393634, + "learning_rate": 2.2299251692491364e-05, + "loss": 0.49136701226234436, + "step": 471 + }, + { + "epoch": 1.5577557755775577, + "grad_norm": 0.31673569076077385, + "learning_rate": 2.2222932640998635e-05, + "loss": 0.5374805927276611, + "step": 472 + }, + { + "epoch": 1.561056105610561, + "grad_norm": 0.29370656251116817, + "learning_rate": 2.2146580801205362e-05, + "loss": 0.523996114730835, + "step": 473 + }, + { + "epoch": 1.5643564356435644, + "grad_norm": 0.27277397989040114, + "learning_rate": 2.207019729930271e-05, + "loss": 0.48198428750038147, + "step": 474 + }, + { + "epoch": 1.5676567656765676, + "grad_norm": 0.2861287068823064, + "learning_rate": 2.199378326194883e-05, + "loss": 0.5148699879646301, + "step": 475 + }, + { + "epoch": 1.5709570957095709, + "grad_norm": 0.2981231032466442, + "learning_rate": 2.1917339816252303e-05, + "loss": 0.5297671556472778, + "step": 476 + }, + { + "epoch": 1.5742574257425743, + "grad_norm": 0.2775943923870632, + "learning_rate": 2.1840868089755465e-05, + "loss": 0.5082278847694397, + "step": 477 + }, + { + "epoch": 1.5775577557755776, + "grad_norm": 0.2988631140370514, + "learning_rate": 2.176436921041779e-05, + "loss": 0.4755392372608185, + "step": 478 + }, + { + "epoch": 1.5808580858085808, + "grad_norm": 0.28707182004966697, + "learning_rate": 2.1687844306599275e-05, + "loss": 0.5249454975128174, + "step": 479 + }, + { + "epoch": 1.5841584158415842, + "grad_norm": 0.3023499942723386, + "learning_rate": 2.161129450704376e-05, + "loss": 0.5626166462898254, + "step": 480 + }, + { + "epoch": 1.5874587458745875, + "grad_norm": 0.28182475866947054, + "learning_rate": 2.1534720940862318e-05, + "loss": 0.5590533018112183, + "step": 481 + }, + { + "epoch": 1.5907590759075907, + "grad_norm": 0.2724331542693392, + "learning_rate": 2.1458124737516557e-05, + "loss": 0.5146170854568481, + "step": 482 + }, + { + "epoch": 1.5940594059405941, + "grad_norm": 0.28834268248771533, + "learning_rate": 2.1381507026802007e-05, + "loss": 0.5633066296577454, + "step": 483 + }, + { + "epoch": 1.5973597359735974, + "grad_norm": 0.29376551657635425, + "learning_rate": 2.130486893883141e-05, + "loss": 0.5273865461349487, + "step": 484 + }, + { + "epoch": 1.6006600660066006, + "grad_norm": 0.277893471974935, + "learning_rate": 2.1228211604018088e-05, + "loss": 0.5040723085403442, + "step": 485 + }, + { + "epoch": 1.603960396039604, + "grad_norm": 0.2901419412347278, + "learning_rate": 2.1151536153059254e-05, + "loss": 0.5254411697387695, + "step": 486 + }, + { + "epoch": 1.6072607260726073, + "grad_norm": 0.29340041503520936, + "learning_rate": 2.1074843716919323e-05, + "loss": 0.5789728760719299, + "step": 487 + }, + { + "epoch": 1.6105610561056105, + "grad_norm": 0.2858502686555999, + "learning_rate": 2.0998135426813245e-05, + "loss": 0.5521235466003418, + "step": 488 + }, + { + "epoch": 1.613861386138614, + "grad_norm": 0.2770947277408911, + "learning_rate": 2.092141241418984e-05, + "loss": 0.4702959954738617, + "step": 489 + }, + { + "epoch": 1.6171617161716172, + "grad_norm": 0.29713285242144816, + "learning_rate": 2.0844675810715046e-05, + "loss": 0.4960707128047943, + "step": 490 + }, + { + "epoch": 1.6204620462046204, + "grad_norm": 0.2800759957297699, + "learning_rate": 2.076792674825529e-05, + "loss": 0.5334826111793518, + "step": 491 + }, + { + "epoch": 1.6237623762376239, + "grad_norm": 0.4465546145157964, + "learning_rate": 2.0691166358860775e-05, + "loss": 0.5604894161224365, + "step": 492 + }, + { + "epoch": 1.627062706270627, + "grad_norm": 0.2895889767199155, + "learning_rate": 2.061439577474875e-05, + "loss": 0.5565654635429382, + "step": 493 + }, + { + "epoch": 1.6303630363036303, + "grad_norm": 0.2663082120203026, + "learning_rate": 2.0537616128286875e-05, + "loss": 0.541640043258667, + "step": 494 + }, + { + "epoch": 1.6336633663366338, + "grad_norm": 0.27975047407467746, + "learning_rate": 2.0460828551976436e-05, + "loss": 0.5247132182121277, + "step": 495 + }, + { + "epoch": 1.636963696369637, + "grad_norm": 0.30554958978585, + "learning_rate": 2.0384034178435727e-05, + "loss": 0.533937394618988, + "step": 496 + }, + { + "epoch": 1.6402640264026402, + "grad_norm": 0.29094539458240765, + "learning_rate": 2.0307234140383264e-05, + "loss": 0.5857927799224854, + "step": 497 + }, + { + "epoch": 1.6435643564356437, + "grad_norm": 0.2718482098386275, + "learning_rate": 2.0230429570621134e-05, + "loss": 0.5191807746887207, + "step": 498 + }, + { + "epoch": 1.6468646864686467, + "grad_norm": 0.28523897670587156, + "learning_rate": 2.0153621602018276e-05, + "loss": 0.5255881547927856, + "step": 499 + }, + { + "epoch": 1.6501650165016502, + "grad_norm": 0.27057309315143646, + "learning_rate": 2.0076811367493736e-05, + "loss": 0.5134017467498779, + "step": 500 + } + ], + "logging_steps": 1, + "max_steps": 909, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 699790582349824.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-500/training_args.bin b/checkpoint-500/training_args.bin new file mode 100644 index 0000000..f7e3756 --- /dev/null +++ b/checkpoint-500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2299c356bc8becdedc07e7f99b268be8ab4a91e2c4e99a06f13c8e908a3188e4 +size 7313 diff --git a/checkpoint-500/zero_to_fp32.py b/checkpoint-500/zero_to_fp32.py new file mode 100644 index 0000000..5995d6e --- /dev/null +++ b/checkpoint-500/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if ZERO_STAGE not in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info("Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info("Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/checkpoint-909/chat_template.jinja b/checkpoint-909/chat_template.jinja new file mode 100644 index 0000000..2e2f69c --- /dev/null +++ b/checkpoint-909/chat_template.jinja @@ -0,0 +1,86 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- messages[0].content + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('') and message.content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- if message.content is string %} + {%- set content = message.content %} + {%- else %} + {%- set content = '' %} + {%- endif %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is string %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in content %} + {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- set content = content.split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {%- if loop.last or (not loop.last and reasoning_content) %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n\n' }} +{%- endif %} \ No newline at end of file diff --git a/checkpoint-909/config.json b/checkpoint-909/config.json new file mode 100644 index 0000000..6dae724 --- /dev/null +++ b/checkpoint-909/config.json @@ -0,0 +1,71 @@ +{ + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": null, + "dtype": "bfloat16", + "eos_token_id": 151645, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 262144, + "max_window_layers": 36, + "model_type": "qwen3", + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "pad_token_id": 151643, + "rms_norm_eps": 1e-06, + "rope_parameters": { + "rope_theta": 5000000, + "rope_type": "default" + }, + "sliding_window": null, + "tie_word_embeddings": true, + "transformers_version": "5.2.0", + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/checkpoint-909/generation_config.json b/checkpoint-909/generation_config.json new file mode 100644 index 0000000..1701c94 --- /dev/null +++ b/checkpoint-909/generation_config.json @@ -0,0 +1,12 @@ +{ + "do_sample": true, + "eos_token_id": [ + 151645, + 151643 + ], + "pad_token_id": 151643, + "temperature": 0.6, + "top_k": 20, + "top_p": 0.95, + "transformers_version": "5.2.0" +} diff --git a/checkpoint-909/global_step909/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/checkpoint-909/global_step909/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000..f70cce6 --- /dev/null +++ b/checkpoint-909/global_step909/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:513ef9be630c710a66df1269f66bac1fdaf05f3b34513b36279e66508d3c81ae +size 6033707889 diff --git a/checkpoint-909/global_step909/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/checkpoint-909/global_step909/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000..bff1435 --- /dev/null +++ b/checkpoint-909/global_step909/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41d40e777160ed5fdb0a6fe630f392117650ba2a290a66f8c077fc123c1394a0 +size 6033707889 diff --git a/checkpoint-909/global_step909/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/checkpoint-909/global_step909/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000..2775901 --- /dev/null +++ b/checkpoint-909/global_step909/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7cd8f2a4a3e912e9e904c6488631a43429f155ff3a0e427d16090c3ee958ba6 +size 6033707889 diff --git a/checkpoint-909/global_step909/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/checkpoint-909/global_step909/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000..24cc7a5 --- /dev/null +++ b/checkpoint-909/global_step909/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44b5d62b16b569b757b89853c2f58f2cf16e61e70053f8bb42e8a480cece0f0f +size 6033707889 diff --git a/checkpoint-909/global_step909/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/checkpoint-909/global_step909/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000..891b449 --- /dev/null +++ b/checkpoint-909/global_step909/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f016100b71960a6bc5a3c646066ec3e23132b85b361201d8ad4ec8478b1e5c0 +size 6033707889 diff --git a/checkpoint-909/global_step909/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/checkpoint-909/global_step909/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000..09bba17 --- /dev/null +++ b/checkpoint-909/global_step909/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f85794866ee3ecb478e94b587256261b3ca0a0d1e8224b1ea8ac5188ec1b3a6 +size 6033707889 diff --git a/checkpoint-909/global_step909/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/checkpoint-909/global_step909/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000..7605f7d --- /dev/null +++ b/checkpoint-909/global_step909/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8faec3f600aff9361621a5fefdf9201bbf15f8c64c695b08703b23318208ee0 +size 6033707889 diff --git a/checkpoint-909/global_step909/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/checkpoint-909/global_step909/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000..56acabe --- /dev/null +++ b/checkpoint-909/global_step909/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e243cbb9c43976420ac4a62b848b8e62a1d693afe35ce33a6cbd77b80f91787 +size 6033707889 diff --git a/checkpoint-909/global_step909/zero_pp_rank_0_mp_rank_00_model_states.pt b/checkpoint-909/global_step909/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000..c85d9ed --- /dev/null +++ b/checkpoint-909/global_step909/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57ff365ffdacb273de1a7d5dbf25ad918744608980b3beaf474a1a791ce3d9d5 +size 200719 diff --git a/checkpoint-909/global_step909/zero_pp_rank_1_mp_rank_00_model_states.pt b/checkpoint-909/global_step909/zero_pp_rank_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000..7c97dcd --- /dev/null +++ b/checkpoint-909/global_step909/zero_pp_rank_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c79917388c5bd25a333318dc69992406cc0683f8a17c1be62208551e98b2836f +size 200719 diff --git a/checkpoint-909/global_step909/zero_pp_rank_2_mp_rank_00_model_states.pt b/checkpoint-909/global_step909/zero_pp_rank_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000..c512464 --- /dev/null +++ b/checkpoint-909/global_step909/zero_pp_rank_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f7606fca21b09828b03e77f9adc92cccc859d47dd570f9498be22ef069c269e +size 200719 diff --git a/checkpoint-909/global_step909/zero_pp_rank_3_mp_rank_00_model_states.pt b/checkpoint-909/global_step909/zero_pp_rank_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000..3b44282 --- /dev/null +++ b/checkpoint-909/global_step909/zero_pp_rank_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a95ab4d839b5404798ac475a00e72e626758fbc64afd1ff1a6eb739f3b14ef0 +size 200719 diff --git a/checkpoint-909/global_step909/zero_pp_rank_4_mp_rank_00_model_states.pt b/checkpoint-909/global_step909/zero_pp_rank_4_mp_rank_00_model_states.pt new file mode 100644 index 0000000..1d224cc --- /dev/null +++ b/checkpoint-909/global_step909/zero_pp_rank_4_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7bf2706ddf6d4204b59aabb70a49ff7a9939dd4e80b0026bfa53aa5e339c939 +size 200719 diff --git a/checkpoint-909/global_step909/zero_pp_rank_5_mp_rank_00_model_states.pt b/checkpoint-909/global_step909/zero_pp_rank_5_mp_rank_00_model_states.pt new file mode 100644 index 0000000..e7239e2 --- /dev/null +++ b/checkpoint-909/global_step909/zero_pp_rank_5_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b83741dae88423c9ec16e7113c101781f52878c8b208666a45b90c4c48889e27 +size 200719 diff --git a/checkpoint-909/global_step909/zero_pp_rank_6_mp_rank_00_model_states.pt b/checkpoint-909/global_step909/zero_pp_rank_6_mp_rank_00_model_states.pt new file mode 100644 index 0000000..1fcd632 --- /dev/null +++ b/checkpoint-909/global_step909/zero_pp_rank_6_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f31c0bc0bb9e810463c9bb1be27460ff1204ea5850985bd3046ab707fbf42d66 +size 200719 diff --git a/checkpoint-909/global_step909/zero_pp_rank_7_mp_rank_00_model_states.pt b/checkpoint-909/global_step909/zero_pp_rank_7_mp_rank_00_model_states.pt new file mode 100644 index 0000000..56eb10b --- /dev/null +++ b/checkpoint-909/global_step909/zero_pp_rank_7_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ffc8f58e5a32c4036b64075783e63be1ff9e11b73fe5db78398f5f8a4d6548c +size 200719 diff --git a/checkpoint-909/latest b/checkpoint-909/latest new file mode 100644 index 0000000..3c42969 --- /dev/null +++ b/checkpoint-909/latest @@ -0,0 +1 @@ +global_step909 \ No newline at end of file diff --git a/checkpoint-909/model.safetensors b/checkpoint-909/model.safetensors new file mode 100644 index 0000000..51a41cb --- /dev/null +++ b/checkpoint-909/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d6efc5a0f14fc39e6749d9f160ed30c10f3001f9d49f5cbd909c63f514073d1 +size 8044982080 diff --git a/checkpoint-909/rng_state_0.pth b/checkpoint-909/rng_state_0.pth new file mode 100644 index 0000000..af3da00 --- /dev/null +++ b/checkpoint-909/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b093dfe59b41efeb45cc3d628d3360abaa2303bbaa489081411faf431e52941d +size 16389 diff --git a/checkpoint-909/rng_state_1.pth b/checkpoint-909/rng_state_1.pth new file mode 100644 index 0000000..d70d4a2 --- /dev/null +++ b/checkpoint-909/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:450a0ac1645503c0b14fe9c37d77060cc76b1c9942dcfdd0e779cd526b2e98d9 +size 16389 diff --git a/checkpoint-909/rng_state_2.pth b/checkpoint-909/rng_state_2.pth new file mode 100644 index 0000000..8ac8ed7 --- /dev/null +++ b/checkpoint-909/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:938b37918eac9a4cbef3805f7d2abdcef094a334f848e73ac19fcdc39d38663a +size 16389 diff --git a/checkpoint-909/rng_state_3.pth b/checkpoint-909/rng_state_3.pth new file mode 100644 index 0000000..4fd4b11 --- /dev/null +++ b/checkpoint-909/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8b27a54988f134299ab296b95e8c1e63d476dffdba7c6f120f2076e8688f355 +size 16389 diff --git a/checkpoint-909/rng_state_4.pth b/checkpoint-909/rng_state_4.pth new file mode 100644 index 0000000..1976284 --- /dev/null +++ b/checkpoint-909/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d95f73d920296d5d9558e47894c5a2c0d649d7cb10a3b07a013d6bfbd3b8cf90 +size 16389 diff --git a/checkpoint-909/rng_state_5.pth b/checkpoint-909/rng_state_5.pth new file mode 100644 index 0000000..a187a68 --- /dev/null +++ b/checkpoint-909/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70b945bb634c9daf4a00433296ecc5245b34a2b5f09017993b5f5f03b84dabea +size 16389 diff --git a/checkpoint-909/rng_state_6.pth b/checkpoint-909/rng_state_6.pth new file mode 100644 index 0000000..5ff5924 --- /dev/null +++ b/checkpoint-909/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfdd1fca0dace16a59c8592c531a70661218184bb0249c5862bbfb5ab0844fc9 +size 16389 diff --git a/checkpoint-909/rng_state_7.pth b/checkpoint-909/rng_state_7.pth new file mode 100644 index 0000000..4396c8f --- /dev/null +++ b/checkpoint-909/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d106363f9f1b0ff898c86d083a097bf22fd84de35e5670aa299504abcc99752a +size 16389 diff --git a/checkpoint-909/scheduler.pt b/checkpoint-909/scheduler.pt new file mode 100644 index 0000000..3bf4b00 --- /dev/null +++ b/checkpoint-909/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85ec221c5c7807ae983b613db073898db92ddc68f3a77cd70659bdd911f120ce +size 1465 diff --git a/checkpoint-909/tokenizer.json b/checkpoint-909/tokenizer.json new file mode 100644 index 0000000..c7afbed --- /dev/null +++ b/checkpoint-909/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/checkpoint-909/tokenizer_config.json b/checkpoint-909/tokenizer_config.json new file mode 100644 index 0000000..79dfc69 --- /dev/null +++ b/checkpoint-909/tokenizer_config.json @@ -0,0 +1,30 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 262144, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/checkpoint-909/trainer_state.json b/checkpoint-909/trainer_state.json new file mode 100644 index 0000000..e687e9e --- /dev/null +++ b/checkpoint-909/trainer_state.json @@ -0,0 +1,6397 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 909, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0033003300330033004, + "grad_norm": 10.81499361768409, + "learning_rate": 0.0, + "loss": 1.2079360485076904, + "step": 1 + }, + { + "epoch": 0.006600660066006601, + "grad_norm": 10.226770877445293, + "learning_rate": 4.395604395604396e-07, + "loss": 1.123347520828247, + "step": 2 + }, + { + "epoch": 0.009900990099009901, + "grad_norm": 11.292644267807786, + "learning_rate": 8.791208791208792e-07, + "loss": 1.261695384979248, + "step": 3 + }, + { + "epoch": 0.013201320132013201, + "grad_norm": 10.504638106263508, + "learning_rate": 1.3186813186813187e-06, + "loss": 1.1276888847351074, + "step": 4 + }, + { + "epoch": 0.0165016501650165, + "grad_norm": 10.822100601159539, + "learning_rate": 1.7582417582417585e-06, + "loss": 1.2254480123519897, + "step": 5 + }, + { + "epoch": 0.019801980198019802, + "grad_norm": 9.905516433474448, + "learning_rate": 2.197802197802198e-06, + "loss": 1.1809396743774414, + "step": 6 + }, + { + "epoch": 0.0231023102310231, + "grad_norm": 9.323364829402967, + "learning_rate": 2.6373626373626375e-06, + "loss": 1.2000095844268799, + "step": 7 + }, + { + "epoch": 0.026402640264026403, + "grad_norm": 6.706098746162178, + "learning_rate": 3.0769230769230774e-06, + "loss": 1.0248074531555176, + "step": 8 + }, + { + "epoch": 0.0297029702970297, + "grad_norm": 5.761138380327878, + "learning_rate": 3.516483516483517e-06, + "loss": 1.0840561389923096, + "step": 9 + }, + { + "epoch": 0.033003300330033, + "grad_norm": 2.7364343552329315, + "learning_rate": 3.9560439560439565e-06, + "loss": 0.955639123916626, + "step": 10 + }, + { + "epoch": 0.036303630363036306, + "grad_norm": 2.113810438625661, + "learning_rate": 4.395604395604396e-06, + "loss": 0.9281604290008545, + "step": 11 + }, + { + "epoch": 0.039603960396039604, + "grad_norm": 1.849238684536393, + "learning_rate": 4.8351648351648355e-06, + "loss": 0.9079018831253052, + "step": 12 + }, + { + "epoch": 0.0429042904290429, + "grad_norm": 1.6747171029255208, + "learning_rate": 5.274725274725275e-06, + "loss": 0.9039217233657837, + "step": 13 + }, + { + "epoch": 0.0462046204620462, + "grad_norm": 2.0121666555693416, + "learning_rate": 5.7142857142857145e-06, + "loss": 0.8910936117172241, + "step": 14 + }, + { + "epoch": 0.04950495049504951, + "grad_norm": 2.0600124028897526, + "learning_rate": 6.153846153846155e-06, + "loss": 0.895532488822937, + "step": 15 + }, + { + "epoch": 0.052805280528052806, + "grad_norm": 2.0613449368510044, + "learning_rate": 6.5934065934065935e-06, + "loss": 0.8889240622520447, + "step": 16 + }, + { + "epoch": 0.056105610561056105, + "grad_norm": 1.785450637059245, + "learning_rate": 7.032967032967034e-06, + "loss": 0.8499570488929749, + "step": 17 + }, + { + "epoch": 0.0594059405940594, + "grad_norm": 1.5894161631201256, + "learning_rate": 7.472527472527473e-06, + "loss": 0.839992105960846, + "step": 18 + }, + { + "epoch": 0.0627062706270627, + "grad_norm": 1.1904834264503976, + "learning_rate": 7.912087912087913e-06, + "loss": 0.7718420028686523, + "step": 19 + }, + { + "epoch": 0.066006600660066, + "grad_norm": 1.0397335564670163, + "learning_rate": 8.351648351648353e-06, + "loss": 0.7865867614746094, + "step": 20 + }, + { + "epoch": 0.06930693069306931, + "grad_norm": 0.8314739102256958, + "learning_rate": 8.791208791208792e-06, + "loss": 0.7982739806175232, + "step": 21 + }, + { + "epoch": 0.07260726072607261, + "grad_norm": 0.6542597896181986, + "learning_rate": 9.230769230769232e-06, + "loss": 0.7846421599388123, + "step": 22 + }, + { + "epoch": 0.07590759075907591, + "grad_norm": 0.6269389928815381, + "learning_rate": 9.670329670329671e-06, + "loss": 0.7005743980407715, + "step": 23 + }, + { + "epoch": 0.07920792079207921, + "grad_norm": 0.6603922634859757, + "learning_rate": 1.010989010989011e-05, + "loss": 0.7084314227104187, + "step": 24 + }, + { + "epoch": 0.08250825082508251, + "grad_norm": 0.6856248928818359, + "learning_rate": 1.054945054945055e-05, + "loss": 0.7310304641723633, + "step": 25 + }, + { + "epoch": 0.0858085808580858, + "grad_norm": 0.5728331825854258, + "learning_rate": 1.098901098901099e-05, + "loss": 0.7056888341903687, + "step": 26 + }, + { + "epoch": 0.0891089108910891, + "grad_norm": 0.47956485465857923, + "learning_rate": 1.1428571428571429e-05, + "loss": 0.6987950205802917, + "step": 27 + }, + { + "epoch": 0.0924092409240924, + "grad_norm": 0.47407141179043555, + "learning_rate": 1.186813186813187e-05, + "loss": 0.7319807410240173, + "step": 28 + }, + { + "epoch": 0.09570957095709572, + "grad_norm": 0.4856924244101555, + "learning_rate": 1.230769230769231e-05, + "loss": 0.6983063220977783, + "step": 29 + }, + { + "epoch": 0.09900990099009901, + "grad_norm": 0.49122925908544063, + "learning_rate": 1.2747252747252747e-05, + "loss": 0.70492023229599, + "step": 30 + }, + { + "epoch": 0.10231023102310231, + "grad_norm": 0.4556788168903923, + "learning_rate": 1.3186813186813187e-05, + "loss": 0.7376629114151001, + "step": 31 + }, + { + "epoch": 0.10561056105610561, + "grad_norm": 0.4272838300827657, + "learning_rate": 1.3626373626373627e-05, + "loss": 0.6623936295509338, + "step": 32 + }, + { + "epoch": 0.10891089108910891, + "grad_norm": 0.40886227927218277, + "learning_rate": 1.4065934065934068e-05, + "loss": 0.7136330604553223, + "step": 33 + }, + { + "epoch": 0.11221122112211221, + "grad_norm": 0.37821179606418975, + "learning_rate": 1.4505494505494506e-05, + "loss": 0.7113747596740723, + "step": 34 + }, + { + "epoch": 0.11551155115511551, + "grad_norm": 0.4538557716923258, + "learning_rate": 1.4945054945054947e-05, + "loss": 0.8252867460250854, + "step": 35 + }, + { + "epoch": 0.1188118811881188, + "grad_norm": 0.3875808052898815, + "learning_rate": 1.5384615384615387e-05, + "loss": 0.7406599521636963, + "step": 36 + }, + { + "epoch": 0.12211221122112212, + "grad_norm": 0.3503240143986989, + "learning_rate": 1.5824175824175826e-05, + "loss": 0.6572297811508179, + "step": 37 + }, + { + "epoch": 0.1254125412541254, + "grad_norm": 0.3779655372487014, + "learning_rate": 1.6263736263736265e-05, + "loss": 0.7520949840545654, + "step": 38 + }, + { + "epoch": 0.12871287128712872, + "grad_norm": 0.36968690038350466, + "learning_rate": 1.6703296703296707e-05, + "loss": 0.6861323118209839, + "step": 39 + }, + { + "epoch": 0.132013201320132, + "grad_norm": 0.3724328241107235, + "learning_rate": 1.7142857142857142e-05, + "loss": 0.6818518042564392, + "step": 40 + }, + { + "epoch": 0.1353135313531353, + "grad_norm": 0.35542054984937593, + "learning_rate": 1.7582417582417584e-05, + "loss": 0.6663186550140381, + "step": 41 + }, + { + "epoch": 0.13861386138613863, + "grad_norm": 0.3441266617586836, + "learning_rate": 1.8021978021978023e-05, + "loss": 0.6492191553115845, + "step": 42 + }, + { + "epoch": 0.1419141914191419, + "grad_norm": 0.3478448092762331, + "learning_rate": 1.8461538461538465e-05, + "loss": 0.6444741487503052, + "step": 43 + }, + { + "epoch": 0.14521452145214522, + "grad_norm": 0.34951148057960574, + "learning_rate": 1.8901098901098903e-05, + "loss": 0.6476814150810242, + "step": 44 + }, + { + "epoch": 0.1485148514851485, + "grad_norm": 0.3356672452160599, + "learning_rate": 1.9340659340659342e-05, + "loss": 0.6660827994346619, + "step": 45 + }, + { + "epoch": 0.15181518151815182, + "grad_norm": 0.30809956365723695, + "learning_rate": 1.9780219780219784e-05, + "loss": 0.6924091577529907, + "step": 46 + }, + { + "epoch": 0.1551155115511551, + "grad_norm": 0.9030699054312887, + "learning_rate": 2.021978021978022e-05, + "loss": 0.6899605989456177, + "step": 47 + }, + { + "epoch": 0.15841584158415842, + "grad_norm": 0.35784060194946976, + "learning_rate": 2.0659340659340665e-05, + "loss": 0.7242028713226318, + "step": 48 + }, + { + "epoch": 0.1617161716171617, + "grad_norm": 0.3093966721093651, + "learning_rate": 2.10989010989011e-05, + "loss": 0.6203902959823608, + "step": 49 + }, + { + "epoch": 0.16501650165016502, + "grad_norm": 0.4242705872636108, + "learning_rate": 2.153846153846154e-05, + "loss": 0.6420010328292847, + "step": 50 + }, + { + "epoch": 0.16831683168316833, + "grad_norm": 0.35079960590346965, + "learning_rate": 2.197802197802198e-05, + "loss": 0.7517598867416382, + "step": 51 + }, + { + "epoch": 0.1716171617161716, + "grad_norm": 0.3078803790362521, + "learning_rate": 2.241758241758242e-05, + "loss": 0.6568161249160767, + "step": 52 + }, + { + "epoch": 0.17491749174917492, + "grad_norm": 0.34666662805484005, + "learning_rate": 2.2857142857142858e-05, + "loss": 0.7348504662513733, + "step": 53 + }, + { + "epoch": 0.1782178217821782, + "grad_norm": 0.302791415801781, + "learning_rate": 2.32967032967033e-05, + "loss": 0.6164949536323547, + "step": 54 + }, + { + "epoch": 0.18151815181518152, + "grad_norm": 0.33732756727763136, + "learning_rate": 2.373626373626374e-05, + "loss": 0.6505363583564758, + "step": 55 + }, + { + "epoch": 0.1848184818481848, + "grad_norm": 0.34780152362496847, + "learning_rate": 2.4175824175824177e-05, + "loss": 0.7562520503997803, + "step": 56 + }, + { + "epoch": 0.18811881188118812, + "grad_norm": 0.3310895358869482, + "learning_rate": 2.461538461538462e-05, + "loss": 0.6943148374557495, + "step": 57 + }, + { + "epoch": 0.19141914191419143, + "grad_norm": 0.3367877938063833, + "learning_rate": 2.5054945054945058e-05, + "loss": 0.6571655869483948, + "step": 58 + }, + { + "epoch": 0.19471947194719472, + "grad_norm": 0.32103256018771714, + "learning_rate": 2.5494505494505493e-05, + "loss": 0.7229321002960205, + "step": 59 + }, + { + "epoch": 0.19801980198019803, + "grad_norm": 0.30468399230672144, + "learning_rate": 2.593406593406594e-05, + "loss": 0.6307672262191772, + "step": 60 + }, + { + "epoch": 0.20132013201320131, + "grad_norm": 0.3282635121595526, + "learning_rate": 2.6373626373626374e-05, + "loss": 0.6336506009101868, + "step": 61 + }, + { + "epoch": 0.20462046204620463, + "grad_norm": 0.3280360563022675, + "learning_rate": 2.6813186813186813e-05, + "loss": 0.6492213010787964, + "step": 62 + }, + { + "epoch": 0.2079207920792079, + "grad_norm": 0.3292430577817229, + "learning_rate": 2.7252747252747255e-05, + "loss": 0.6763280034065247, + "step": 63 + }, + { + "epoch": 0.21122112211221122, + "grad_norm": 0.47832355846700536, + "learning_rate": 2.7692307692307694e-05, + "loss": 0.7322396039962769, + "step": 64 + }, + { + "epoch": 0.2145214521452145, + "grad_norm": 0.31915340164178446, + "learning_rate": 2.8131868131868136e-05, + "loss": 0.7080870270729065, + "step": 65 + }, + { + "epoch": 0.21782178217821782, + "grad_norm": 0.3227571040968621, + "learning_rate": 2.8571428571428574e-05, + "loss": 0.6054466962814331, + "step": 66 + }, + { + "epoch": 0.22112211221122113, + "grad_norm": 0.33375713186655664, + "learning_rate": 2.9010989010989013e-05, + "loss": 0.6782290935516357, + "step": 67 + }, + { + "epoch": 0.22442244224422442, + "grad_norm": 0.3437770801965916, + "learning_rate": 2.9450549450549455e-05, + "loss": 0.6804753541946411, + "step": 68 + }, + { + "epoch": 0.22772277227722773, + "grad_norm": 0.3228427319313703, + "learning_rate": 2.9890109890109894e-05, + "loss": 0.6493992805480957, + "step": 69 + }, + { + "epoch": 0.23102310231023102, + "grad_norm": 0.3540211756840673, + "learning_rate": 3.0329670329670332e-05, + "loss": 0.6263789534568787, + "step": 70 + }, + { + "epoch": 0.23432343234323433, + "grad_norm": 0.34989089824503405, + "learning_rate": 3.0769230769230774e-05, + "loss": 0.6960322856903076, + "step": 71 + }, + { + "epoch": 0.2376237623762376, + "grad_norm": 0.33624443163866324, + "learning_rate": 3.120879120879121e-05, + "loss": 0.6146604418754578, + "step": 72 + }, + { + "epoch": 0.24092409240924093, + "grad_norm": 0.39618402867027047, + "learning_rate": 3.164835164835165e-05, + "loss": 0.6361377239227295, + "step": 73 + }, + { + "epoch": 0.24422442244224424, + "grad_norm": 0.361603087273114, + "learning_rate": 3.2087912087912094e-05, + "loss": 0.636134147644043, + "step": 74 + }, + { + "epoch": 0.24752475247524752, + "grad_norm": 0.37985663132790304, + "learning_rate": 3.252747252747253e-05, + "loss": 0.5936564803123474, + "step": 75 + }, + { + "epoch": 0.2508250825082508, + "grad_norm": 0.35883234873646996, + "learning_rate": 3.296703296703297e-05, + "loss": 0.6001103520393372, + "step": 76 + }, + { + "epoch": 0.25412541254125415, + "grad_norm": 0.35227803701073973, + "learning_rate": 3.340659340659341e-05, + "loss": 0.6254594326019287, + "step": 77 + }, + { + "epoch": 0.25742574257425743, + "grad_norm": 0.3563257650896171, + "learning_rate": 3.384615384615385e-05, + "loss": 0.6457959413528442, + "step": 78 + }, + { + "epoch": 0.2607260726072607, + "grad_norm": 0.37234316340556584, + "learning_rate": 3.4285714285714284e-05, + "loss": 0.6186954975128174, + "step": 79 + }, + { + "epoch": 0.264026402640264, + "grad_norm": 0.35352748449766547, + "learning_rate": 3.4725274725274726e-05, + "loss": 0.6175529956817627, + "step": 80 + }, + { + "epoch": 0.26732673267326734, + "grad_norm": 0.35441369709658355, + "learning_rate": 3.516483516483517e-05, + "loss": 0.6694468259811401, + "step": 81 + }, + { + "epoch": 0.2706270627062706, + "grad_norm": 0.39955400784840756, + "learning_rate": 3.56043956043956e-05, + "loss": 0.627490222454071, + "step": 82 + }, + { + "epoch": 0.2739273927392739, + "grad_norm": 0.38314031523497477, + "learning_rate": 3.6043956043956045e-05, + "loss": 0.6410495638847351, + "step": 83 + }, + { + "epoch": 0.27722772277227725, + "grad_norm": 0.36926215386141575, + "learning_rate": 3.648351648351649e-05, + "loss": 0.6305102109909058, + "step": 84 + }, + { + "epoch": 0.28052805280528054, + "grad_norm": 0.38364118080284076, + "learning_rate": 3.692307692307693e-05, + "loss": 0.6558895111083984, + "step": 85 + }, + { + "epoch": 0.2838283828382838, + "grad_norm": 0.3370292682974053, + "learning_rate": 3.7362637362637365e-05, + "loss": 0.6029388308525085, + "step": 86 + }, + { + "epoch": 0.2871287128712871, + "grad_norm": 0.39541874871701704, + "learning_rate": 3.7802197802197807e-05, + "loss": 0.6551017761230469, + "step": 87 + }, + { + "epoch": 0.29042904290429045, + "grad_norm": 0.3629036550044273, + "learning_rate": 3.824175824175825e-05, + "loss": 0.6588809490203857, + "step": 88 + }, + { + "epoch": 0.29372937293729373, + "grad_norm": 0.37786447228212183, + "learning_rate": 3.8681318681318684e-05, + "loss": 0.614648699760437, + "step": 89 + }, + { + "epoch": 0.297029702970297, + "grad_norm": 0.42911861803278684, + "learning_rate": 3.9120879120879126e-05, + "loss": 0.7034356594085693, + "step": 90 + }, + { + "epoch": 0.30033003300330036, + "grad_norm": 0.3707184094312094, + "learning_rate": 3.956043956043957e-05, + "loss": 0.6908263564109802, + "step": 91 + }, + { + "epoch": 0.30363036303630364, + "grad_norm": 0.38262186656216063, + "learning_rate": 4e-05, + "loss": 0.6882215738296509, + "step": 92 + }, + { + "epoch": 0.3069306930693069, + "grad_norm": 0.3709464296309744, + "learning_rate": 3.999985249980169e-05, + "loss": 0.6377270221710205, + "step": 93 + }, + { + "epoch": 0.3102310231023102, + "grad_norm": 0.3412837406106036, + "learning_rate": 3.999941000138238e-05, + "loss": 0.6735270619392395, + "step": 94 + }, + { + "epoch": 0.31353135313531355, + "grad_norm": 0.40165192879996064, + "learning_rate": 3.999867251126893e-05, + "loss": 0.6934541463851929, + "step": 95 + }, + { + "epoch": 0.31683168316831684, + "grad_norm": 0.34707128601816045, + "learning_rate": 3.9997640040339335e-05, + "loss": 0.6367039084434509, + "step": 96 + }, + { + "epoch": 0.3201320132013201, + "grad_norm": 0.4268828113970776, + "learning_rate": 3.999631260382257e-05, + "loss": 0.6274522542953491, + "step": 97 + }, + { + "epoch": 0.3234323432343234, + "grad_norm": 0.454428833020686, + "learning_rate": 3.999469022129834e-05, + "loss": 0.5874066352844238, + "step": 98 + }, + { + "epoch": 0.32673267326732675, + "grad_norm": 0.4200675840489775, + "learning_rate": 3.9992772916696824e-05, + "loss": 0.6175942420959473, + "step": 99 + }, + { + "epoch": 0.33003300330033003, + "grad_norm": 0.3796321080056305, + "learning_rate": 3.99905607182983e-05, + "loss": 0.5625832080841064, + "step": 100 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.39108856096759403, + "learning_rate": 3.998805365873274e-05, + "loss": 0.6153020262718201, + "step": 101 + }, + { + "epoch": 0.33663366336633666, + "grad_norm": 0.3873560194436071, + "learning_rate": 3.998525177497932e-05, + "loss": 0.5585426092147827, + "step": 102 + }, + { + "epoch": 0.33993399339933994, + "grad_norm": 0.4084712106325698, + "learning_rate": 3.998215510836589e-05, + "loss": 0.6586359739303589, + "step": 103 + }, + { + "epoch": 0.3432343234323432, + "grad_norm": 0.4383246876899704, + "learning_rate": 3.997876370456833e-05, + "loss": 0.62096107006073, + "step": 104 + }, + { + "epoch": 0.3465346534653465, + "grad_norm": 0.4026893562706946, + "learning_rate": 3.997507761360993e-05, + "loss": 0.6059336066246033, + "step": 105 + }, + { + "epoch": 0.34983498349834985, + "grad_norm": 0.46586240044914223, + "learning_rate": 3.997109688986059e-05, + "loss": 0.617970883846283, + "step": 106 + }, + { + "epoch": 0.35313531353135313, + "grad_norm": 0.44949199032710474, + "learning_rate": 3.9966821592036066e-05, + "loss": 0.6453397274017334, + "step": 107 + }, + { + "epoch": 0.3564356435643564, + "grad_norm": 0.4794978158156406, + "learning_rate": 3.996225178319709e-05, + "loss": 0.6371763348579407, + "step": 108 + }, + { + "epoch": 0.35973597359735976, + "grad_norm": 0.4463512391721941, + "learning_rate": 3.9957387530748435e-05, + "loss": 0.5971124172210693, + "step": 109 + }, + { + "epoch": 0.36303630363036304, + "grad_norm": 0.368079413354641, + "learning_rate": 3.995222890643792e-05, + "loss": 0.5679532289505005, + "step": 110 + }, + { + "epoch": 0.36633663366336633, + "grad_norm": 0.43733705586285254, + "learning_rate": 3.9946775986355346e-05, + "loss": 0.5988069772720337, + "step": 111 + }, + { + "epoch": 0.3696369636963696, + "grad_norm": 0.38235582844960775, + "learning_rate": 3.994102885093141e-05, + "loss": 0.6352983713150024, + "step": 112 + }, + { + "epoch": 0.37293729372937295, + "grad_norm": 0.389837871286893, + "learning_rate": 3.993498758493646e-05, + "loss": 0.58957839012146, + "step": 113 + }, + { + "epoch": 0.37623762376237624, + "grad_norm": 0.40399856168911097, + "learning_rate": 3.992865227747929e-05, + "loss": 0.6396822929382324, + "step": 114 + }, + { + "epoch": 0.3795379537953795, + "grad_norm": 0.38891668976227123, + "learning_rate": 3.992202302200582e-05, + "loss": 0.6314754486083984, + "step": 115 + }, + { + "epoch": 0.38283828382838286, + "grad_norm": 0.4087528543828922, + "learning_rate": 3.991509991629769e-05, + "loss": 0.673650860786438, + "step": 116 + }, + { + "epoch": 0.38613861386138615, + "grad_norm": 0.36330054292020786, + "learning_rate": 3.990788306247085e-05, + "loss": 0.5813701152801514, + "step": 117 + }, + { + "epoch": 0.38943894389438943, + "grad_norm": 0.4247110332678589, + "learning_rate": 3.990037256697404e-05, + "loss": 0.6419334411621094, + "step": 118 + }, + { + "epoch": 0.3927392739273927, + "grad_norm": 0.4244126002071751, + "learning_rate": 3.989256854058721e-05, + "loss": 0.6319208145141602, + "step": 119 + }, + { + "epoch": 0.39603960396039606, + "grad_norm": 0.3651632933942853, + "learning_rate": 3.988447109841991e-05, + "loss": 0.5989845991134644, + "step": 120 + }, + { + "epoch": 0.39933993399339934, + "grad_norm": 0.393158353074077, + "learning_rate": 3.987608035990957e-05, + "loss": 0.5853303670883179, + "step": 121 + }, + { + "epoch": 0.40264026402640263, + "grad_norm": 0.35965233332276103, + "learning_rate": 3.986739644881975e-05, + "loss": 0.6115257143974304, + "step": 122 + }, + { + "epoch": 0.40594059405940597, + "grad_norm": 0.4252711474203845, + "learning_rate": 3.985841949323831e-05, + "loss": 0.6440504789352417, + "step": 123 + }, + { + "epoch": 0.40924092409240925, + "grad_norm": 0.5578797297271848, + "learning_rate": 3.984914962557553e-05, + "loss": 0.5765030384063721, + "step": 124 + }, + { + "epoch": 0.41254125412541254, + "grad_norm": 0.4362455029468141, + "learning_rate": 3.983958698256214e-05, + "loss": 0.6387556791305542, + "step": 125 + }, + { + "epoch": 0.4158415841584158, + "grad_norm": 0.39274811063076087, + "learning_rate": 3.98297317052473e-05, + "loss": 0.6263147592544556, + "step": 126 + }, + { + "epoch": 0.41914191419141916, + "grad_norm": 0.42682589637163704, + "learning_rate": 3.981958393899656e-05, + "loss": 0.6091845035552979, + "step": 127 + }, + { + "epoch": 0.42244224422442245, + "grad_norm": 0.4033131171538041, + "learning_rate": 3.980914383348967e-05, + "loss": 0.6458015441894531, + "step": 128 + }, + { + "epoch": 0.42574257425742573, + "grad_norm": 0.3881606915462862, + "learning_rate": 3.9798411542718395e-05, + "loss": 0.6115552186965942, + "step": 129 + }, + { + "epoch": 0.429042904290429, + "grad_norm": 0.38910317938225847, + "learning_rate": 3.978738722498423e-05, + "loss": 0.6427993774414062, + "step": 130 + }, + { + "epoch": 0.43234323432343236, + "grad_norm": 0.36836380096259913, + "learning_rate": 3.977607104289609e-05, + "loss": 0.6121467351913452, + "step": 131 + }, + { + "epoch": 0.43564356435643564, + "grad_norm": 0.3743062201629088, + "learning_rate": 3.9764463163367875e-05, + "loss": 0.5951442718505859, + "step": 132 + }, + { + "epoch": 0.4389438943894389, + "grad_norm": 0.3699746655092952, + "learning_rate": 3.9752563757616045e-05, + "loss": 0.6639472842216492, + "step": 133 + }, + { + "epoch": 0.44224422442244227, + "grad_norm": 0.37398919831188604, + "learning_rate": 3.974037300115706e-05, + "loss": 0.6084764003753662, + "step": 134 + }, + { + "epoch": 0.44554455445544555, + "grad_norm": 0.37043195153646374, + "learning_rate": 3.972789107380484e-05, + "loss": 0.6211085915565491, + "step": 135 + }, + { + "epoch": 0.44884488448844884, + "grad_norm": 0.3509837417375981, + "learning_rate": 3.9715118159668046e-05, + "loss": 0.6098147034645081, + "step": 136 + }, + { + "epoch": 0.4521452145214521, + "grad_norm": 0.3350785925775803, + "learning_rate": 3.970205444714742e-05, + "loss": 0.6155884861946106, + "step": 137 + }, + { + "epoch": 0.45544554455445546, + "grad_norm": 0.38529379761335925, + "learning_rate": 3.9688700128932975e-05, + "loss": 0.5984665155410767, + "step": 138 + }, + { + "epoch": 0.45874587458745875, + "grad_norm": 0.45130397769476205, + "learning_rate": 3.967505540200117e-05, + "loss": 0.6656880378723145, + "step": 139 + }, + { + "epoch": 0.46204620462046203, + "grad_norm": 0.3277874952439621, + "learning_rate": 3.966112046761201e-05, + "loss": 0.6607398390769958, + "step": 140 + }, + { + "epoch": 0.46534653465346537, + "grad_norm": 2.6727599644732267, + "learning_rate": 3.9646895531306046e-05, + "loss": 0.6578342914581299, + "step": 141 + }, + { + "epoch": 0.46864686468646866, + "grad_norm": 0.47429126269764676, + "learning_rate": 3.963238080290136e-05, + "loss": 0.6103699803352356, + "step": 142 + }, + { + "epoch": 0.47194719471947194, + "grad_norm": 0.32652590291724093, + "learning_rate": 3.96175764964905e-05, + "loss": 0.5484676957130432, + "step": 143 + }, + { + "epoch": 0.4752475247524752, + "grad_norm": 0.4531372955951849, + "learning_rate": 3.960248283043727e-05, + "loss": 0.578776478767395, + "step": 144 + }, + { + "epoch": 0.47854785478547857, + "grad_norm": 0.3685580706465372, + "learning_rate": 3.958710002737355e-05, + "loss": 0.6184446811676025, + "step": 145 + }, + { + "epoch": 0.48184818481848185, + "grad_norm": 0.3584005630962511, + "learning_rate": 3.9571428314195984e-05, + "loss": 0.6307916045188904, + "step": 146 + }, + { + "epoch": 0.48514851485148514, + "grad_norm": 0.4049679254542765, + "learning_rate": 3.955546792206265e-05, + "loss": 0.6064697504043579, + "step": 147 + }, + { + "epoch": 0.4884488448844885, + "grad_norm": 0.3846258995775384, + "learning_rate": 3.953921908638966e-05, + "loss": 0.6055655479431152, + "step": 148 + }, + { + "epoch": 0.49174917491749176, + "grad_norm": 0.3643318343315678, + "learning_rate": 3.952268204684765e-05, + "loss": 0.5856431126594543, + "step": 149 + }, + { + "epoch": 0.49504950495049505, + "grad_norm": 0.3854715521866927, + "learning_rate": 3.950585704735829e-05, + "loss": 0.6634635925292969, + "step": 150 + }, + { + "epoch": 0.49834983498349833, + "grad_norm": 0.34338835592304534, + "learning_rate": 3.948874433609065e-05, + "loss": 0.5880753397941589, + "step": 151 + }, + { + "epoch": 0.5016501650165016, + "grad_norm": 0.3481018111538647, + "learning_rate": 3.947134416545757e-05, + "loss": 0.5594221949577332, + "step": 152 + }, + { + "epoch": 0.504950495049505, + "grad_norm": 0.6570220882473125, + "learning_rate": 3.94536567921119e-05, + "loss": 0.664652407169342, + "step": 153 + }, + { + "epoch": 0.5082508250825083, + "grad_norm": 0.340048306266198, + "learning_rate": 3.9435682476942755e-05, + "loss": 0.6002815961837769, + "step": 154 + }, + { + "epoch": 0.5115511551155115, + "grad_norm": 0.3488682381523364, + "learning_rate": 3.941742148507163e-05, + "loss": 0.5905177593231201, + "step": 155 + }, + { + "epoch": 0.5148514851485149, + "grad_norm": 0.33062666453941425, + "learning_rate": 3.939887408584853e-05, + "loss": 0.5636795163154602, + "step": 156 + }, + { + "epoch": 0.5181518151815182, + "grad_norm": 0.35862086331061066, + "learning_rate": 3.938004055284796e-05, + "loss": 0.5639582276344299, + "step": 157 + }, + { + "epoch": 0.5214521452145214, + "grad_norm": 0.31769111173717246, + "learning_rate": 3.9360921163864895e-05, + "loss": 0.6515591144561768, + "step": 158 + }, + { + "epoch": 0.5247524752475248, + "grad_norm": 0.38401455820073427, + "learning_rate": 3.934151620091071e-05, + "loss": 0.5721683502197266, + "step": 159 + }, + { + "epoch": 0.528052805280528, + "grad_norm": 0.3284331200684813, + "learning_rate": 3.9321825950209e-05, + "loss": 0.5801802277565002, + "step": 160 + }, + { + "epoch": 0.5313531353135313, + "grad_norm": 0.3493998878359796, + "learning_rate": 3.9301850702191344e-05, + "loss": 0.603084921836853, + "step": 161 + }, + { + "epoch": 0.5346534653465347, + "grad_norm": 0.32233519110844616, + "learning_rate": 3.928159075149304e-05, + "loss": 0.6376925110816956, + "step": 162 + }, + { + "epoch": 0.5379537953795379, + "grad_norm": 0.35833134197704153, + "learning_rate": 3.926104639694877e-05, + "loss": 0.5764102935791016, + "step": 163 + }, + { + "epoch": 0.5412541254125413, + "grad_norm": 0.3523567199445224, + "learning_rate": 3.924021794158818e-05, + "loss": 0.6102188229560852, + "step": 164 + }, + { + "epoch": 0.5445544554455446, + "grad_norm": 0.36694222553878597, + "learning_rate": 3.921910569263139e-05, + "loss": 0.5833287835121155, + "step": 165 + }, + { + "epoch": 0.5478547854785478, + "grad_norm": 0.37179813198977807, + "learning_rate": 3.919770996148448e-05, + "loss": 0.5891385078430176, + "step": 166 + }, + { + "epoch": 0.5511551155115512, + "grad_norm": 0.3507301680001106, + "learning_rate": 3.917603106373493e-05, + "loss": 0.5838547348976135, + "step": 167 + }, + { + "epoch": 0.5544554455445545, + "grad_norm": 0.3134001311174479, + "learning_rate": 3.9154069319146904e-05, + "loss": 0.5727800726890564, + "step": 168 + }, + { + "epoch": 0.5577557755775577, + "grad_norm": 0.33531781904204605, + "learning_rate": 3.913182505165656e-05, + "loss": 0.6102641224861145, + "step": 169 + }, + { + "epoch": 0.5610561056105611, + "grad_norm": 0.35178976522027133, + "learning_rate": 3.91092985893673e-05, + "loss": 0.5718260407447815, + "step": 170 + }, + { + "epoch": 0.5643564356435643, + "grad_norm": 0.47006108726602863, + "learning_rate": 3.908649026454488e-05, + "loss": 0.6308504939079285, + "step": 171 + }, + { + "epoch": 0.5676567656765676, + "grad_norm": 0.3687514240026255, + "learning_rate": 3.906340041361255e-05, + "loss": 0.6089432835578918, + "step": 172 + }, + { + "epoch": 0.570957095709571, + "grad_norm": 0.3586674884704593, + "learning_rate": 3.904002937714606e-05, + "loss": 0.6583501696586609, + "step": 173 + }, + { + "epoch": 0.5742574257425742, + "grad_norm": 0.3399808047240735, + "learning_rate": 3.9016377499868666e-05, + "loss": 0.6108609437942505, + "step": 174 + }, + { + "epoch": 0.5775577557755776, + "grad_norm": 0.3840880337988826, + "learning_rate": 3.899244513064603e-05, + "loss": 0.63509202003479, + "step": 175 + }, + { + "epoch": 0.5808580858085809, + "grad_norm": 0.3725541644477348, + "learning_rate": 3.896823262248107e-05, + "loss": 0.5759241580963135, + "step": 176 + }, + { + "epoch": 0.5841584158415841, + "grad_norm": 0.30755721985114126, + "learning_rate": 3.8943740332508754e-05, + "loss": 0.6148169040679932, + "step": 177 + }, + { + "epoch": 0.5874587458745875, + "grad_norm": 0.3916756097057637, + "learning_rate": 3.891896862199086e-05, + "loss": 0.5266364216804504, + "step": 178 + }, + { + "epoch": 0.5907590759075908, + "grad_norm": 0.3417854779376455, + "learning_rate": 3.88939178563106e-05, + "loss": 0.5626640319824219, + "step": 179 + }, + { + "epoch": 0.594059405940594, + "grad_norm": 0.33526488525207704, + "learning_rate": 3.886858840496727e-05, + "loss": 0.6063880920410156, + "step": 180 + }, + { + "epoch": 0.5973597359735974, + "grad_norm": 0.37344333250119977, + "learning_rate": 3.884298064157077e-05, + "loss": 0.5979235768318176, + "step": 181 + }, + { + "epoch": 0.6006600660066007, + "grad_norm": 0.3835133271197793, + "learning_rate": 3.881709494383612e-05, + "loss": 0.6628611087799072, + "step": 182 + }, + { + "epoch": 0.6039603960396039, + "grad_norm": 0.4344526004756121, + "learning_rate": 3.879093169357789e-05, + "loss": 0.6215270757675171, + "step": 183 + }, + { + "epoch": 0.6072607260726073, + "grad_norm": 0.3644174435488244, + "learning_rate": 3.876449127670452e-05, + "loss": 0.6148592233657837, + "step": 184 + }, + { + "epoch": 0.6105610561056105, + "grad_norm": 0.3619226265536735, + "learning_rate": 3.87377740832127e-05, + "loss": 0.6254778504371643, + "step": 185 + }, + { + "epoch": 0.6138613861386139, + "grad_norm": 0.3492162593840536, + "learning_rate": 3.871078050718155e-05, + "loss": 0.6025378704071045, + "step": 186 + }, + { + "epoch": 0.6171617161716172, + "grad_norm": 0.3866924759539626, + "learning_rate": 3.8683510946766866e-05, + "loss": 0.5887518525123596, + "step": 187 + }, + { + "epoch": 0.6204620462046204, + "grad_norm": 0.3357229513721586, + "learning_rate": 3.865596580419519e-05, + "loss": 0.6180317401885986, + "step": 188 + }, + { + "epoch": 0.6237623762376238, + "grad_norm": 0.3594949077768003, + "learning_rate": 3.8628145485757925e-05, + "loss": 0.5970651507377625, + "step": 189 + }, + { + "epoch": 0.6270627062706271, + "grad_norm": 0.3496234009951303, + "learning_rate": 3.860005040180533e-05, + "loss": 0.6027296781539917, + "step": 190 + }, + { + "epoch": 0.6303630363036303, + "grad_norm": 0.3830042583584045, + "learning_rate": 3.857168096674044e-05, + "loss": 0.6326305270195007, + "step": 191 + }, + { + "epoch": 0.6336633663366337, + "grad_norm": 0.333508477943962, + "learning_rate": 3.854303759901299e-05, + "loss": 0.6508482694625854, + "step": 192 + }, + { + "epoch": 0.636963696369637, + "grad_norm": 0.352327105927571, + "learning_rate": 3.851412072111322e-05, + "loss": 0.6088548302650452, + "step": 193 + }, + { + "epoch": 0.6402640264026402, + "grad_norm": 0.36196379228138037, + "learning_rate": 3.8484930759565645e-05, + "loss": 0.5975607633590698, + "step": 194 + }, + { + "epoch": 0.6435643564356436, + "grad_norm": 0.3231664855297077, + "learning_rate": 3.845546814492279e-05, + "loss": 0.5467930436134338, + "step": 195 + }, + { + "epoch": 0.6468646864686468, + "grad_norm": 0.35556526722817444, + "learning_rate": 3.8425733311758795e-05, + "loss": 0.583969235420227, + "step": 196 + }, + { + "epoch": 0.6501650165016502, + "grad_norm": 0.331073543443887, + "learning_rate": 3.8395726698663045e-05, + "loss": 0.6007376909255981, + "step": 197 + }, + { + "epoch": 0.6534653465346535, + "grad_norm": 0.34786293006180385, + "learning_rate": 3.836544874823368e-05, + "loss": 0.5971908569335938, + "step": 198 + }, + { + "epoch": 0.6567656765676567, + "grad_norm": 0.3128647628132879, + "learning_rate": 3.8334899907071064e-05, + "loss": 0.592069685459137, + "step": 199 + }, + { + "epoch": 0.6600660066006601, + "grad_norm": 0.3308125796746202, + "learning_rate": 3.830408062577121e-05, + "loss": 0.6188071966171265, + "step": 200 + }, + { + "epoch": 0.6633663366336634, + "grad_norm": 0.34889077565364124, + "learning_rate": 3.827299135891913e-05, + "loss": 0.5976923704147339, + "step": 201 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.33443153994631497, + "learning_rate": 3.8241632565082124e-05, + "loss": 0.6120954155921936, + "step": 202 + }, + { + "epoch": 0.66996699669967, + "grad_norm": 0.3573334503206899, + "learning_rate": 3.821000470680303e-05, + "loss": 0.6661979556083679, + "step": 203 + }, + { + "epoch": 0.6732673267326733, + "grad_norm": 0.34662331225184934, + "learning_rate": 3.8178108250593384e-05, + "loss": 0.5853559970855713, + "step": 204 + }, + { + "epoch": 0.6765676567656765, + "grad_norm": 0.33823171869993424, + "learning_rate": 3.814594366692654e-05, + "loss": 0.6648768186569214, + "step": 205 + }, + { + "epoch": 0.6798679867986799, + "grad_norm": 0.4178878629038068, + "learning_rate": 3.8113511430230745e-05, + "loss": 0.5893838405609131, + "step": 206 + }, + { + "epoch": 0.6831683168316832, + "grad_norm": 0.36858896529016355, + "learning_rate": 3.808081201888214e-05, + "loss": 0.6177140474319458, + "step": 207 + }, + { + "epoch": 0.6864686468646864, + "grad_norm": 0.38061402245158527, + "learning_rate": 3.8047845915197695e-05, + "loss": 0.5793695449829102, + "step": 208 + }, + { + "epoch": 0.6897689768976898, + "grad_norm": 0.3591315376932048, + "learning_rate": 3.8014613605428084e-05, + "loss": 0.5571605563163757, + "step": 209 + }, + { + "epoch": 0.693069306930693, + "grad_norm": 0.33319862057164595, + "learning_rate": 3.798111557975053e-05, + "loss": 0.5945760011672974, + "step": 210 + }, + { + "epoch": 0.6963696369636964, + "grad_norm": 0.3495679574237745, + "learning_rate": 3.7947352332261586e-05, + "loss": 0.600873589515686, + "step": 211 + }, + { + "epoch": 0.6996699669966997, + "grad_norm": 0.37390147639764304, + "learning_rate": 3.791332436096983e-05, + "loss": 0.6234852075576782, + "step": 212 + }, + { + "epoch": 0.7029702970297029, + "grad_norm": 0.3571653694610809, + "learning_rate": 3.7879032167788494e-05, + "loss": 0.6129578948020935, + "step": 213 + }, + { + "epoch": 0.7062706270627063, + "grad_norm": 0.48971881906384135, + "learning_rate": 3.784447625852812e-05, + "loss": 0.6204475164413452, + "step": 214 + }, + { + "epoch": 0.7095709570957096, + "grad_norm": 0.3610294548812676, + "learning_rate": 3.780965714288905e-05, + "loss": 0.6734122037887573, + "step": 215 + }, + { + "epoch": 0.7128712871287128, + "grad_norm": 0.35396639697907356, + "learning_rate": 3.777457533445393e-05, + "loss": 0.5678560137748718, + "step": 216 + }, + { + "epoch": 0.7161716171617162, + "grad_norm": 0.3232076597831296, + "learning_rate": 3.7739231350680135e-05, + "loss": 0.5784683227539062, + "step": 217 + }, + { + "epoch": 0.7194719471947195, + "grad_norm": 0.3540897506756201, + "learning_rate": 3.7703625712892125e-05, + "loss": 0.6060354113578796, + "step": 218 + }, + { + "epoch": 0.7227722772277227, + "grad_norm": 0.35008278157890194, + "learning_rate": 3.766775894627376e-05, + "loss": 0.6248741745948792, + "step": 219 + }, + { + "epoch": 0.7260726072607261, + "grad_norm": 0.32018676747331787, + "learning_rate": 3.7631631579860553e-05, + "loss": 0.6014479398727417, + "step": 220 + }, + { + "epoch": 0.7293729372937293, + "grad_norm": 0.32068744744726313, + "learning_rate": 3.759524414653189e-05, + "loss": 0.6283233761787415, + "step": 221 + }, + { + "epoch": 0.7326732673267327, + "grad_norm": 0.3047460979670785, + "learning_rate": 3.755859718300313e-05, + "loss": 0.5710185766220093, + "step": 222 + }, + { + "epoch": 0.735973597359736, + "grad_norm": 0.34698489216212486, + "learning_rate": 3.75216912298177e-05, + "loss": 0.6007407903671265, + "step": 223 + }, + { + "epoch": 0.7392739273927392, + "grad_norm": 0.4952362221345831, + "learning_rate": 3.748452683133916e-05, + "loss": 0.6852575540542603, + "step": 224 + }, + { + "epoch": 0.7425742574257426, + "grad_norm": 0.32106680253004655, + "learning_rate": 3.7447104535743115e-05, + "loss": 0.6270833611488342, + "step": 225 + }, + { + "epoch": 0.7458745874587459, + "grad_norm": 0.30214814189665545, + "learning_rate": 3.740942489500916e-05, + "loss": 0.5925471782684326, + "step": 226 + }, + { + "epoch": 0.7491749174917491, + "grad_norm": 0.3171932777170319, + "learning_rate": 3.737148846491275e-05, + "loss": 0.573570728302002, + "step": 227 + }, + { + "epoch": 0.7524752475247525, + "grad_norm": 0.31480815810804524, + "learning_rate": 3.7333295805016986e-05, + "loss": 0.6088368892669678, + "step": 228 + }, + { + "epoch": 0.7557755775577558, + "grad_norm": 0.3103068539492526, + "learning_rate": 3.729484747866435e-05, + "loss": 0.5496470332145691, + "step": 229 + }, + { + "epoch": 0.759075907590759, + "grad_norm": 0.3007603199811456, + "learning_rate": 3.725614405296843e-05, + "loss": 0.6008220314979553, + "step": 230 + }, + { + "epoch": 0.7623762376237624, + "grad_norm": 0.3007492168191884, + "learning_rate": 3.721718609880551e-05, + "loss": 0.5982120037078857, + "step": 231 + }, + { + "epoch": 0.7656765676567657, + "grad_norm": 0.3010002181490163, + "learning_rate": 3.717797419080618e-05, + "loss": 0.6404559016227722, + "step": 232 + }, + { + "epoch": 0.768976897689769, + "grad_norm": 0.35604106645956024, + "learning_rate": 3.713850890734689e-05, + "loss": 0.5875239372253418, + "step": 233 + }, + { + "epoch": 0.7722772277227723, + "grad_norm": 0.33191901009333297, + "learning_rate": 3.709879083054133e-05, + "loss": 0.5962772369384766, + "step": 234 + }, + { + "epoch": 0.7755775577557755, + "grad_norm": 0.29418628627284477, + "learning_rate": 3.705882054623192e-05, + "loss": 0.5764110684394836, + "step": 235 + }, + { + "epoch": 0.7788778877887789, + "grad_norm": 0.30409612807603364, + "learning_rate": 3.7018598643981165e-05, + "loss": 0.5635858178138733, + "step": 236 + }, + { + "epoch": 0.7821782178217822, + "grad_norm": 0.3039645238556037, + "learning_rate": 3.69781257170629e-05, + "loss": 0.5880881547927856, + "step": 237 + }, + { + "epoch": 0.7854785478547854, + "grad_norm": 0.30606246597511416, + "learning_rate": 3.6937402362453606e-05, + "loss": 0.5644733905792236, + "step": 238 + }, + { + "epoch": 0.7887788778877888, + "grad_norm": 0.328325214152846, + "learning_rate": 3.689642918082358e-05, + "loss": 0.6431151032447815, + "step": 239 + }, + { + "epoch": 0.7920792079207921, + "grad_norm": 0.2863869456911102, + "learning_rate": 3.6855206776528055e-05, + "loss": 0.5848085880279541, + "step": 240 + }, + { + "epoch": 0.7953795379537953, + "grad_norm": 0.3169795193025283, + "learning_rate": 3.681373575759831e-05, + "loss": 0.590021550655365, + "step": 241 + }, + { + "epoch": 0.7986798679867987, + "grad_norm": 0.3630216059086489, + "learning_rate": 3.67720167357327e-05, + "loss": 0.6217919588088989, + "step": 242 + }, + { + "epoch": 0.801980198019802, + "grad_norm": 0.2999270957223198, + "learning_rate": 3.673005032628763e-05, + "loss": 0.6075180172920227, + "step": 243 + }, + { + "epoch": 0.8052805280528053, + "grad_norm": 0.35145967135780704, + "learning_rate": 3.668783714826846e-05, + "loss": 0.6078404188156128, + "step": 244 + }, + { + "epoch": 0.8085808580858086, + "grad_norm": 0.32650805345047657, + "learning_rate": 3.664537782432042e-05, + "loss": 0.6297526955604553, + "step": 245 + }, + { + "epoch": 0.8118811881188119, + "grad_norm": 0.32461322862254094, + "learning_rate": 3.660267298071936e-05, + "loss": 0.5684514045715332, + "step": 246 + }, + { + "epoch": 0.8151815181518152, + "grad_norm": 0.32171296221654416, + "learning_rate": 3.655972324736259e-05, + "loss": 0.6192148327827454, + "step": 247 + }, + { + "epoch": 0.8184818481848185, + "grad_norm": 0.3322336621503604, + "learning_rate": 3.6516529257759506e-05, + "loss": 0.5900243520736694, + "step": 248 + }, + { + "epoch": 0.8217821782178217, + "grad_norm": 0.35183312055445004, + "learning_rate": 3.6473091649022337e-05, + "loss": 0.5941751599311829, + "step": 249 + }, + { + "epoch": 0.8250825082508251, + "grad_norm": 0.31255833045908565, + "learning_rate": 3.6429411061856645e-05, + "loss": 0.5744310021400452, + "step": 250 + }, + { + "epoch": 0.8283828382838284, + "grad_norm": 0.3266269251233177, + "learning_rate": 3.6385488140551985e-05, + "loss": 0.5985124707221985, + "step": 251 + }, + { + "epoch": 0.8316831683168316, + "grad_norm": 0.30426711611593643, + "learning_rate": 3.6341323532972294e-05, + "loss": 0.581912636756897, + "step": 252 + }, + { + "epoch": 0.834983498349835, + "grad_norm": 0.3297819735063718, + "learning_rate": 3.629691789054643e-05, + "loss": 0.586786150932312, + "step": 253 + }, + { + "epoch": 0.8382838283828383, + "grad_norm": 0.3074133078124695, + "learning_rate": 3.625227186825848e-05, + "loss": 0.6312603950500488, + "step": 254 + }, + { + "epoch": 0.8415841584158416, + "grad_norm": 0.33007753969064285, + "learning_rate": 3.620738612463818e-05, + "loss": 0.5886626243591309, + "step": 255 + }, + { + "epoch": 0.8448844884488449, + "grad_norm": 0.31334340596765187, + "learning_rate": 3.6162261321751114e-05, + "loss": 0.5892266035079956, + "step": 256 + }, + { + "epoch": 0.8481848184818482, + "grad_norm": 0.31784442826893616, + "learning_rate": 3.6116898125189045e-05, + "loss": 0.5472115278244019, + "step": 257 + }, + { + "epoch": 0.8514851485148515, + "grad_norm": 0.3456330158902343, + "learning_rate": 3.6071297204059995e-05, + "loss": 0.5981796383857727, + "step": 258 + }, + { + "epoch": 0.8547854785478548, + "grad_norm": 0.3377124553034101, + "learning_rate": 3.6025459230978475e-05, + "loss": 0.6708342432975769, + "step": 259 + }, + { + "epoch": 0.858085808580858, + "grad_norm": 0.3081391395426973, + "learning_rate": 3.597938488205549e-05, + "loss": 0.6306079626083374, + "step": 260 + }, + { + "epoch": 0.8613861386138614, + "grad_norm": 0.3398583824115319, + "learning_rate": 3.59330748368886e-05, + "loss": 0.6098329424858093, + "step": 261 + }, + { + "epoch": 0.8646864686468647, + "grad_norm": 0.32878067719138626, + "learning_rate": 3.588652977855189e-05, + "loss": 0.5617724061012268, + "step": 262 + }, + { + "epoch": 0.8679867986798679, + "grad_norm": 0.34962664282188816, + "learning_rate": 3.58397503935859e-05, + "loss": 0.5780894756317139, + "step": 263 + }, + { + "epoch": 0.8712871287128713, + "grad_norm": 0.32665214019362204, + "learning_rate": 3.5792737371987477e-05, + "loss": 0.578921377658844, + "step": 264 + }, + { + "epoch": 0.8745874587458746, + "grad_norm": 0.36673188949709323, + "learning_rate": 3.574549140719962e-05, + "loss": 0.614944577217102, + "step": 265 + }, + { + "epoch": 0.8778877887788779, + "grad_norm": 0.3248666143164946, + "learning_rate": 3.569801319610125e-05, + "loss": 0.6269869208335876, + "step": 266 + }, + { + "epoch": 0.8811881188118812, + "grad_norm": 0.3338123662452596, + "learning_rate": 3.565030343899693e-05, + "loss": 0.6045581102371216, + "step": 267 + }, + { + "epoch": 0.8844884488448845, + "grad_norm": 0.31011600887091817, + "learning_rate": 3.5602362839606514e-05, + "loss": 0.5872907638549805, + "step": 268 + }, + { + "epoch": 0.8877887788778878, + "grad_norm": 0.31857062779594814, + "learning_rate": 3.55541921050548e-05, + "loss": 0.6283375024795532, + "step": 269 + }, + { + "epoch": 0.8910891089108911, + "grad_norm": 0.32445751859048455, + "learning_rate": 3.5505791945861076e-05, + "loss": 0.5747002363204956, + "step": 270 + }, + { + "epoch": 0.8943894389438944, + "grad_norm": 0.2923309334474062, + "learning_rate": 3.545716307592864e-05, + "loss": 0.6205827593803406, + "step": 271 + }, + { + "epoch": 0.8976897689768977, + "grad_norm": 0.43972579907455317, + "learning_rate": 3.54083062125343e-05, + "loss": 0.5987251400947571, + "step": 272 + }, + { + "epoch": 0.900990099009901, + "grad_norm": 0.33194286352506225, + "learning_rate": 3.535922207631776e-05, + "loss": 0.6275356411933899, + "step": 273 + }, + { + "epoch": 0.9042904290429042, + "grad_norm": 0.3408278730793354, + "learning_rate": 3.5309911391270996e-05, + "loss": 0.6097655892372131, + "step": 274 + }, + { + "epoch": 0.9075907590759076, + "grad_norm": 0.3441995699777348, + "learning_rate": 3.52603748847276e-05, + "loss": 0.544170618057251, + "step": 275 + }, + { + "epoch": 0.9108910891089109, + "grad_norm": 0.3034867763949278, + "learning_rate": 3.521061328735202e-05, + "loss": 0.5723366141319275, + "step": 276 + }, + { + "epoch": 0.9141914191419142, + "grad_norm": 0.3091145609625042, + "learning_rate": 3.516062733312879e-05, + "loss": 0.5801889896392822, + "step": 277 + }, + { + "epoch": 0.9174917491749175, + "grad_norm": 0.3532845546992122, + "learning_rate": 3.511041775935175e-05, + "loss": 0.5942766666412354, + "step": 278 + }, + { + "epoch": 0.9207920792079208, + "grad_norm": 0.3192035342587887, + "learning_rate": 3.50599853066131e-05, + "loss": 0.5604017972946167, + "step": 279 + }, + { + "epoch": 0.9240924092409241, + "grad_norm": 0.4475571406552253, + "learning_rate": 3.500933071879251e-05, + "loss": 0.6151460409164429, + "step": 280 + }, + { + "epoch": 0.9273927392739274, + "grad_norm": 0.30946498453996385, + "learning_rate": 3.495845474304616e-05, + "loss": 0.5854936838150024, + "step": 281 + }, + { + "epoch": 0.9306930693069307, + "grad_norm": 0.3188531409769719, + "learning_rate": 3.490735812979572e-05, + "loss": 0.5586672425270081, + "step": 282 + }, + { + "epoch": 0.933993399339934, + "grad_norm": 0.3250546549981712, + "learning_rate": 3.485604163271721e-05, + "loss": 0.578475832939148, + "step": 283 + }, + { + "epoch": 0.9372937293729373, + "grad_norm": 0.45030229248281484, + "learning_rate": 3.4804506008730015e-05, + "loss": 0.5236382484436035, + "step": 284 + }, + { + "epoch": 0.9405940594059405, + "grad_norm": 0.31677157675280776, + "learning_rate": 3.475275201798559e-05, + "loss": 0.5964822769165039, + "step": 285 + }, + { + "epoch": 0.9438943894389439, + "grad_norm": 0.3221519247617692, + "learning_rate": 3.4700780423856334e-05, + "loss": 0.5551598072052002, + "step": 286 + }, + { + "epoch": 0.9471947194719472, + "grad_norm": 0.31322506983838, + "learning_rate": 3.464859199292429e-05, + "loss": 0.6095103621482849, + "step": 287 + }, + { + "epoch": 0.9504950495049505, + "grad_norm": 0.33333701342858213, + "learning_rate": 3.4596187494969846e-05, + "loss": 0.5893416404724121, + "step": 288 + }, + { + "epoch": 0.9537953795379538, + "grad_norm": 0.31167002926986764, + "learning_rate": 3.454356770296039e-05, + "loss": 0.5992231965065002, + "step": 289 + }, + { + "epoch": 0.9570957095709571, + "grad_norm": 0.3407826991036566, + "learning_rate": 3.4490733393038895e-05, + "loss": 0.6071972250938416, + "step": 290 + }, + { + "epoch": 0.9603960396039604, + "grad_norm": 0.321397588262469, + "learning_rate": 3.443768534451248e-05, + "loss": 0.5836942195892334, + "step": 291 + }, + { + "epoch": 0.9636963696369637, + "grad_norm": 0.3596023570145339, + "learning_rate": 3.4384424339840916e-05, + "loss": 0.5707553625106812, + "step": 292 + }, + { + "epoch": 0.966996699669967, + "grad_norm": 0.326365753033755, + "learning_rate": 3.4330951164625075e-05, + "loss": 0.5883970260620117, + "step": 293 + }, + { + "epoch": 0.9702970297029703, + "grad_norm": 0.3276030981345682, + "learning_rate": 3.427726660759535e-05, + "loss": 0.6281589269638062, + "step": 294 + }, + { + "epoch": 0.9735973597359736, + "grad_norm": 0.3559560269123216, + "learning_rate": 3.422337146060003e-05, + "loss": 0.6641702651977539, + "step": 295 + }, + { + "epoch": 0.976897689768977, + "grad_norm": 0.34661891319338206, + "learning_rate": 3.4169266518593596e-05, + "loss": 0.6398966312408447, + "step": 296 + }, + { + "epoch": 0.9801980198019802, + "grad_norm": 0.3392015122860613, + "learning_rate": 3.411495257962501e-05, + "loss": 0.6376276016235352, + "step": 297 + }, + { + "epoch": 0.9834983498349835, + "grad_norm": 0.3454832175281825, + "learning_rate": 3.406043044482596e-05, + "loss": 0.648975133895874, + "step": 298 + }, + { + "epoch": 0.9867986798679867, + "grad_norm": 0.3284679145456545, + "learning_rate": 3.4005700918399016e-05, + "loss": 0.6201390624046326, + "step": 299 + }, + { + "epoch": 0.9900990099009901, + "grad_norm": 0.33000362479964457, + "learning_rate": 3.395076480760576e-05, + "loss": 0.6103875637054443, + "step": 300 + }, + { + "epoch": 0.9933993399339934, + "grad_norm": 0.31707924192462417, + "learning_rate": 3.3895622922754936e-05, + "loss": 0.5486876368522644, + "step": 301 + }, + { + "epoch": 0.9966996699669967, + "grad_norm": 0.3094164003933957, + "learning_rate": 3.384027607719043e-05, + "loss": 0.5980846285820007, + "step": 302 + }, + { + "epoch": 1.0, + "grad_norm": 0.33089398879681, + "learning_rate": 3.378472508727931e-05, + "loss": 0.5986801385879517, + "step": 303 + }, + { + "epoch": 1.0033003300330032, + "grad_norm": 0.4690060258405477, + "learning_rate": 3.372897077239979e-05, + "loss": 0.5586727857589722, + "step": 304 + }, + { + "epoch": 1.0066006600660067, + "grad_norm": 0.34686786747213394, + "learning_rate": 3.36730139549291e-05, + "loss": 0.5393255949020386, + "step": 305 + }, + { + "epoch": 1.00990099009901, + "grad_norm": 0.4023568892604613, + "learning_rate": 3.361685546023143e-05, + "loss": 0.5377227067947388, + "step": 306 + }, + { + "epoch": 1.0132013201320131, + "grad_norm": 0.39915820884177944, + "learning_rate": 3.356049611664568e-05, + "loss": 0.5223784446716309, + "step": 307 + }, + { + "epoch": 1.0165016501650166, + "grad_norm": 0.3654265250846575, + "learning_rate": 3.350393675547328e-05, + "loss": 0.5502469539642334, + "step": 308 + }, + { + "epoch": 1.0198019801980198, + "grad_norm": 0.42079557297663883, + "learning_rate": 3.3447178210965936e-05, + "loss": 0.5626603960990906, + "step": 309 + }, + { + "epoch": 1.023102310231023, + "grad_norm": 0.3684084639129366, + "learning_rate": 3.3390221320313303e-05, + "loss": 0.48262274265289307, + "step": 310 + }, + { + "epoch": 1.0264026402640265, + "grad_norm": 0.39908786063309193, + "learning_rate": 3.333306692363065e-05, + "loss": 0.5850967168807983, + "step": 311 + }, + { + "epoch": 1.0297029702970297, + "grad_norm": 0.44262876970078274, + "learning_rate": 3.3275715863946466e-05, + "loss": 0.5444281697273254, + "step": 312 + }, + { + "epoch": 1.033003300330033, + "grad_norm": 0.35239079669120155, + "learning_rate": 3.3218168987190004e-05, + "loss": 0.5329654216766357, + "step": 313 + }, + { + "epoch": 1.0363036303630364, + "grad_norm": 0.38499730860339404, + "learning_rate": 3.316042714217885e-05, + "loss": 0.5276832580566406, + "step": 314 + }, + { + "epoch": 1.0396039603960396, + "grad_norm": 0.3928937531164494, + "learning_rate": 3.310249118060636e-05, + "loss": 0.5344791412353516, + "step": 315 + }, + { + "epoch": 1.0429042904290429, + "grad_norm": 0.3466589226743573, + "learning_rate": 3.304436195702911e-05, + "loss": 0.5479785203933716, + "step": 316 + }, + { + "epoch": 1.046204620462046, + "grad_norm": 0.370325309360066, + "learning_rate": 3.298604032885431e-05, + "loss": 0.5223082900047302, + "step": 317 + }, + { + "epoch": 1.0495049504950495, + "grad_norm": 0.4271803134046634, + "learning_rate": 3.292752715632713e-05, + "loss": 0.5667799711227417, + "step": 318 + }, + { + "epoch": 1.0528052805280528, + "grad_norm": 0.33752277032768196, + "learning_rate": 3.2868823302518016e-05, + "loss": 0.5194317698478699, + "step": 319 + }, + { + "epoch": 1.056105610561056, + "grad_norm": 0.35801795115870316, + "learning_rate": 3.2809929633309985e-05, + "loss": 0.4911007285118103, + "step": 320 + }, + { + "epoch": 1.0594059405940595, + "grad_norm": 0.33819516112787196, + "learning_rate": 3.2750847017385826e-05, + "loss": 0.5269002914428711, + "step": 321 + }, + { + "epoch": 1.0627062706270627, + "grad_norm": 0.3280280196094967, + "learning_rate": 3.269157632621529e-05, + "loss": 0.5124789476394653, + "step": 322 + }, + { + "epoch": 1.066006600660066, + "grad_norm": 0.3841029677303286, + "learning_rate": 3.263211843404225e-05, + "loss": 0.5483890771865845, + "step": 323 + }, + { + "epoch": 1.0693069306930694, + "grad_norm": 0.348752311292252, + "learning_rate": 3.25724742178718e-05, + "loss": 0.5582579374313354, + "step": 324 + }, + { + "epoch": 1.0726072607260726, + "grad_norm": 0.3672218653955236, + "learning_rate": 3.2512644557457304e-05, + "loss": 0.5662975907325745, + "step": 325 + }, + { + "epoch": 1.0759075907590758, + "grad_norm": 0.339133227284404, + "learning_rate": 3.2452630335287445e-05, + "loss": 0.5502511858940125, + "step": 326 + }, + { + "epoch": 1.0792079207920793, + "grad_norm": 0.3607463939055526, + "learning_rate": 3.239243243657318e-05, + "loss": 0.5614978075027466, + "step": 327 + }, + { + "epoch": 1.0825082508250825, + "grad_norm": 0.3354690532522152, + "learning_rate": 3.233205174923472e-05, + "loss": 0.4828110635280609, + "step": 328 + }, + { + "epoch": 1.0858085808580857, + "grad_norm": 0.3296040603044689, + "learning_rate": 3.22714891638884e-05, + "loss": 0.5437847971916199, + "step": 329 + }, + { + "epoch": 1.0891089108910892, + "grad_norm": 0.3295415767468974, + "learning_rate": 3.221074557383355e-05, + "loss": 0.6240063309669495, + "step": 330 + }, + { + "epoch": 1.0924092409240924, + "grad_norm": 0.3032628226796708, + "learning_rate": 3.2149821875039325e-05, + "loss": 0.5435442328453064, + "step": 331 + }, + { + "epoch": 1.0957095709570956, + "grad_norm": 0.30875440813945676, + "learning_rate": 3.20887189661315e-05, + "loss": 0.5240401029586792, + "step": 332 + }, + { + "epoch": 1.099009900990099, + "grad_norm": 0.3043121620505056, + "learning_rate": 3.202743774837919e-05, + "loss": 0.5227692127227783, + "step": 333 + }, + { + "epoch": 1.1023102310231023, + "grad_norm": 0.3439754692795775, + "learning_rate": 3.196597912568157e-05, + "loss": 0.5607417821884155, + "step": 334 + }, + { + "epoch": 1.1056105610561056, + "grad_norm": 0.29691798670137787, + "learning_rate": 3.1904344004554536e-05, + "loss": 0.5607600808143616, + "step": 335 + }, + { + "epoch": 1.108910891089109, + "grad_norm": 0.32493088910689055, + "learning_rate": 3.184253329411737e-05, + "loss": 0.47135430574417114, + "step": 336 + }, + { + "epoch": 1.1122112211221122, + "grad_norm": 0.3202945703052858, + "learning_rate": 3.178054790607924e-05, + "loss": 0.5708764791488647, + "step": 337 + }, + { + "epoch": 1.1155115511551155, + "grad_norm": 0.3164605548495645, + "learning_rate": 3.1718388754725883e-05, + "loss": 0.5522497296333313, + "step": 338 + }, + { + "epoch": 1.118811881188119, + "grad_norm": 0.3449586600316318, + "learning_rate": 3.1656056756906e-05, + "loss": 0.5556532144546509, + "step": 339 + }, + { + "epoch": 1.1221122112211221, + "grad_norm": 0.3130025484639745, + "learning_rate": 3.1593552832017795e-05, + "loss": 0.5727676153182983, + "step": 340 + }, + { + "epoch": 1.1254125412541254, + "grad_norm": 0.3195703179740936, + "learning_rate": 3.153087790199541e-05, + "loss": 0.5131651759147644, + "step": 341 + }, + { + "epoch": 1.1287128712871288, + "grad_norm": 0.3191177264656739, + "learning_rate": 3.146803289129528e-05, + "loss": 0.5143063068389893, + "step": 342 + }, + { + "epoch": 1.132013201320132, + "grad_norm": 0.33398757419035885, + "learning_rate": 3.1405018726882595e-05, + "loss": 0.509161114692688, + "step": 343 + }, + { + "epoch": 1.1353135313531353, + "grad_norm": 0.33058725446313514, + "learning_rate": 3.13418363382175e-05, + "loss": 0.5213526487350464, + "step": 344 + }, + { + "epoch": 1.1386138613861387, + "grad_norm": 0.3226863318187914, + "learning_rate": 3.127848665724149e-05, + "loss": 0.5465434789657593, + "step": 345 + }, + { + "epoch": 1.141914191419142, + "grad_norm": 0.6179658385179007, + "learning_rate": 3.1214970618363626e-05, + "loss": 0.5342190265655518, + "step": 346 + }, + { + "epoch": 1.1452145214521452, + "grad_norm": 0.47777163001134637, + "learning_rate": 3.115128915844672e-05, + "loss": 0.541754424571991, + "step": 347 + }, + { + "epoch": 1.1485148514851484, + "grad_norm": 0.33931974771490697, + "learning_rate": 3.10874432167936e-05, + "loss": 0.5318331122398376, + "step": 348 + }, + { + "epoch": 1.1518151815181519, + "grad_norm": 0.32111740987941506, + "learning_rate": 3.1023433735133134e-05, + "loss": 0.4972509741783142, + "step": 349 + }, + { + "epoch": 1.155115511551155, + "grad_norm": 0.30074948382432587, + "learning_rate": 3.095926165760647e-05, + "loss": 0.5417294502258301, + "step": 350 + }, + { + "epoch": 1.1584158415841583, + "grad_norm": 0.3410522798436207, + "learning_rate": 3.089492793075302e-05, + "loss": 0.554945707321167, + "step": 351 + }, + { + "epoch": 1.1617161716171618, + "grad_norm": 0.3254774061643724, + "learning_rate": 3.083043350349653e-05, + "loss": 0.5204564929008484, + "step": 352 + }, + { + "epoch": 1.165016501650165, + "grad_norm": 0.3088402728006412, + "learning_rate": 3.076577932713108e-05, + "loss": 0.4856947064399719, + "step": 353 + }, + { + "epoch": 1.1683168316831682, + "grad_norm": 0.2896918095760776, + "learning_rate": 3.0700966355307055e-05, + "loss": 0.5269368886947632, + "step": 354 + }, + { + "epoch": 1.1716171617161717, + "grad_norm": 0.32747543865706225, + "learning_rate": 3.063599554401708e-05, + "loss": 0.5811939239501953, + "step": 355 + }, + { + "epoch": 1.174917491749175, + "grad_norm": 0.29324577597304957, + "learning_rate": 3.057086785158189e-05, + "loss": 0.5636904239654541, + "step": 356 + }, + { + "epoch": 1.1782178217821782, + "grad_norm": 0.31779620334412045, + "learning_rate": 3.050558423863626e-05, + "loss": 0.546089768409729, + "step": 357 + }, + { + "epoch": 1.1815181518151816, + "grad_norm": 0.3093045991582328, + "learning_rate": 3.0440145668114774e-05, + "loss": 0.5239901542663574, + "step": 358 + }, + { + "epoch": 1.1848184818481848, + "grad_norm": 0.31848934088179354, + "learning_rate": 3.0374553105237637e-05, + "loss": 0.5833466053009033, + "step": 359 + }, + { + "epoch": 1.188118811881188, + "grad_norm": 0.33803859097620154, + "learning_rate": 3.0308807517496456e-05, + "loss": 0.5060774087905884, + "step": 360 + }, + { + "epoch": 1.1914191419141915, + "grad_norm": 0.31145081064149094, + "learning_rate": 3.0242909874639953e-05, + "loss": 0.5164307355880737, + "step": 361 + }, + { + "epoch": 1.1947194719471947, + "grad_norm": 0.29765085452905116, + "learning_rate": 3.0176861148659672e-05, + "loss": 0.49949395656585693, + "step": 362 + }, + { + "epoch": 1.198019801980198, + "grad_norm": 0.3296486034239661, + "learning_rate": 3.0110662313775623e-05, + "loss": 0.5581181049346924, + "step": 363 + }, + { + "epoch": 1.2013201320132012, + "grad_norm": 0.3116631729941006, + "learning_rate": 3.0044314346421938e-05, + "loss": 0.5657376646995544, + "step": 364 + }, + { + "epoch": 1.2046204620462047, + "grad_norm": 0.33012695180790946, + "learning_rate": 2.9977818225232443e-05, + "loss": 0.5269935131072998, + "step": 365 + }, + { + "epoch": 1.2079207920792079, + "grad_norm": 0.31869984664933465, + "learning_rate": 2.991117493102626e-05, + "loss": 0.5385931730270386, + "step": 366 + }, + { + "epoch": 1.2112211221122111, + "grad_norm": 0.30491226427581125, + "learning_rate": 2.984438544679329e-05, + "loss": 0.5615143179893494, + "step": 367 + }, + { + "epoch": 1.2145214521452146, + "grad_norm": 0.32195999076013593, + "learning_rate": 2.9777450757679754e-05, + "loss": 0.5175333023071289, + "step": 368 + }, + { + "epoch": 1.2178217821782178, + "grad_norm": 0.30930257180361886, + "learning_rate": 2.971037185097364e-05, + "loss": 0.565494179725647, + "step": 369 + }, + { + "epoch": 1.221122112211221, + "grad_norm": 0.34237830645177886, + "learning_rate": 2.9643149716090146e-05, + "loss": 0.5519120693206787, + "step": 370 + }, + { + "epoch": 1.2244224422442245, + "grad_norm": 0.30959351563618437, + "learning_rate": 2.9575785344557114e-05, + "loss": 0.49374374747276306, + "step": 371 + }, + { + "epoch": 1.2277227722772277, + "grad_norm": 0.31310768619122714, + "learning_rate": 2.950827973000034e-05, + "loss": 0.5608875751495361, + "step": 372 + }, + { + "epoch": 1.231023102310231, + "grad_norm": 0.31986895424613543, + "learning_rate": 2.944063386812899e-05, + "loss": 0.5866271257400513, + "step": 373 + }, + { + "epoch": 1.2343234323432344, + "grad_norm": 0.3359900469491975, + "learning_rate": 2.9372848756720867e-05, + "loss": 0.5342913269996643, + "step": 374 + }, + { + "epoch": 1.2376237623762376, + "grad_norm": 0.2956484140793021, + "learning_rate": 2.9304925395607696e-05, + "loss": 0.5539537668228149, + "step": 375 + }, + { + "epoch": 1.2409240924092408, + "grad_norm": 0.3239136306261367, + "learning_rate": 2.9236864786660423e-05, + "loss": 0.5614147186279297, + "step": 376 + }, + { + "epoch": 1.2442244224422443, + "grad_norm": 0.3311932744032855, + "learning_rate": 2.9168667933774356e-05, + "loss": 0.46689367294311523, + "step": 377 + }, + { + "epoch": 1.2475247524752475, + "grad_norm": 0.3291299090174619, + "learning_rate": 2.910033584285444e-05, + "loss": 0.5383083820343018, + "step": 378 + }, + { + "epoch": 1.2508250825082508, + "grad_norm": 0.3013900588246958, + "learning_rate": 2.903186952180037e-05, + "loss": 0.5349752902984619, + "step": 379 + }, + { + "epoch": 1.2541254125412542, + "grad_norm": 0.3219145450840317, + "learning_rate": 2.8963269980491743e-05, + "loss": 0.5792303681373596, + "step": 380 + }, + { + "epoch": 1.2574257425742574, + "grad_norm": 0.2840550960191948, + "learning_rate": 2.8894538230773147e-05, + "loss": 0.524924099445343, + "step": 381 + }, + { + "epoch": 1.2607260726072607, + "grad_norm": 0.3172399675943548, + "learning_rate": 2.882567528643925e-05, + "loss": 0.5137406587600708, + "step": 382 + }, + { + "epoch": 1.2640264026402641, + "grad_norm": 0.2893676822687234, + "learning_rate": 2.8756682163219857e-05, + "loss": 0.5196574926376343, + "step": 383 + }, + { + "epoch": 1.2673267326732673, + "grad_norm": 0.31363904787626334, + "learning_rate": 2.8687559878764903e-05, + "loss": 0.585644006729126, + "step": 384 + }, + { + "epoch": 1.2706270627062706, + "grad_norm": 0.3310272877884813, + "learning_rate": 2.8618309452629445e-05, + "loss": 0.5973786115646362, + "step": 385 + }, + { + "epoch": 1.273927392739274, + "grad_norm": 0.3201222210217655, + "learning_rate": 2.854893190625865e-05, + "loss": 0.5909825563430786, + "step": 386 + }, + { + "epoch": 1.2772277227722773, + "grad_norm": 0.3507731714316878, + "learning_rate": 2.84794282629727e-05, + "loss": 0.5903690457344055, + "step": 387 + }, + { + "epoch": 1.2805280528052805, + "grad_norm": 0.31011243056320775, + "learning_rate": 2.840979954795171e-05, + "loss": 0.5316457152366638, + "step": 388 + }, + { + "epoch": 1.283828382838284, + "grad_norm": 0.32950464198309637, + "learning_rate": 2.8340046788220613e-05, + "loss": 0.5080389976501465, + "step": 389 + }, + { + "epoch": 1.2871287128712872, + "grad_norm": 0.37769184930606736, + "learning_rate": 2.8270171012633994e-05, + "loss": 0.6137889623641968, + "step": 390 + }, + { + "epoch": 1.2904290429042904, + "grad_norm": 0.34430823745531935, + "learning_rate": 2.8200173251860928e-05, + "loss": 0.5433805584907532, + "step": 391 + }, + { + "epoch": 1.2937293729372938, + "grad_norm": 0.356563736773021, + "learning_rate": 2.8130054538369775e-05, + "loss": 0.4965590834617615, + "step": 392 + }, + { + "epoch": 1.297029702970297, + "grad_norm": 0.29380923244218154, + "learning_rate": 2.805981590641295e-05, + "loss": 0.5361340045928955, + "step": 393 + }, + { + "epoch": 1.3003300330033003, + "grad_norm": 0.31403525376793245, + "learning_rate": 2.7989458392011678e-05, + "loss": 0.47011327743530273, + "step": 394 + }, + { + "epoch": 1.3036303630363038, + "grad_norm": 0.30710914438533876, + "learning_rate": 2.7918983032940666e-05, + "loss": 0.5893687605857849, + "step": 395 + }, + { + "epoch": 1.306930693069307, + "grad_norm": 0.3126943781985397, + "learning_rate": 2.7848390868712886e-05, + "loss": 0.5219327211380005, + "step": 396 + }, + { + "epoch": 1.3102310231023102, + "grad_norm": 0.35585146532127665, + "learning_rate": 2.7777682940564142e-05, + "loss": 0.5652155876159668, + "step": 397 + }, + { + "epoch": 1.3135313531353137, + "grad_norm": 0.41906023992763497, + "learning_rate": 2.7706860291437784e-05, + "loss": 0.5361950397491455, + "step": 398 + }, + { + "epoch": 1.316831683168317, + "grad_norm": 0.29071400108766793, + "learning_rate": 2.763592396596929e-05, + "loss": 0.5355206727981567, + "step": 399 + }, + { + "epoch": 1.3201320132013201, + "grad_norm": 0.298123677847084, + "learning_rate": 2.756487501047086e-05, + "loss": 0.5082858800888062, + "step": 400 + }, + { + "epoch": 1.3234323432343233, + "grad_norm": 0.3144050740212562, + "learning_rate": 2.7493714472916013e-05, + "loss": 0.5282934904098511, + "step": 401 + }, + { + "epoch": 1.3267326732673268, + "grad_norm": 0.29396121691648713, + "learning_rate": 2.7422443402924074e-05, + "loss": 0.5502887964248657, + "step": 402 + }, + { + "epoch": 1.33003300330033, + "grad_norm": 0.2854429234726643, + "learning_rate": 2.7351062851744747e-05, + "loss": 0.5374204516410828, + "step": 403 + }, + { + "epoch": 1.3333333333333333, + "grad_norm": 0.30308752538818784, + "learning_rate": 2.7279573872242574e-05, + "loss": 0.5602293014526367, + "step": 404 + }, + { + "epoch": 1.3366336633663367, + "grad_norm": 0.30975657746221447, + "learning_rate": 2.7207977518881418e-05, + "loss": 0.5321286916732788, + "step": 405 + }, + { + "epoch": 1.33993399339934, + "grad_norm": 0.28965457921713383, + "learning_rate": 2.713627484770892e-05, + "loss": 0.5523560047149658, + "step": 406 + }, + { + "epoch": 1.3432343234323432, + "grad_norm": 0.30598816879566076, + "learning_rate": 2.706446691634089e-05, + "loss": 0.47019705176353455, + "step": 407 + }, + { + "epoch": 1.3465346534653464, + "grad_norm": 0.2977261513860205, + "learning_rate": 2.6992554783945748e-05, + "loss": 0.540359616279602, + "step": 408 + }, + { + "epoch": 1.3498349834983498, + "grad_norm": 0.2845048826043699, + "learning_rate": 2.6920539511228874e-05, + "loss": 0.561464786529541, + "step": 409 + }, + { + "epoch": 1.353135313531353, + "grad_norm": 0.2939741197740927, + "learning_rate": 2.6848422160416956e-05, + "loss": 0.5429259538650513, + "step": 410 + }, + { + "epoch": 1.3564356435643563, + "grad_norm": 0.2968609589915083, + "learning_rate": 2.677620379524237e-05, + "loss": 0.5452640652656555, + "step": 411 + }, + { + "epoch": 1.3597359735973598, + "grad_norm": 0.28949363661635646, + "learning_rate": 2.670388548092741e-05, + "loss": 0.49627864360809326, + "step": 412 + }, + { + "epoch": 1.363036303630363, + "grad_norm": 0.328169978832012, + "learning_rate": 2.663146828416867e-05, + "loss": 0.5331633687019348, + "step": 413 + }, + { + "epoch": 1.3663366336633662, + "grad_norm": 0.2926434963884909, + "learning_rate": 2.6558953273121216e-05, + "loss": 0.5447151064872742, + "step": 414 + }, + { + "epoch": 1.3696369636963697, + "grad_norm": 0.2863360845432002, + "learning_rate": 2.648634151738292e-05, + "loss": 0.5467007160186768, + "step": 415 + }, + { + "epoch": 1.372937293729373, + "grad_norm": 0.33044933855099695, + "learning_rate": 2.6413634087978602e-05, + "loss": 0.5804279446601868, + "step": 416 + }, + { + "epoch": 1.3762376237623761, + "grad_norm": 0.29168904019746145, + "learning_rate": 2.63408320573443e-05, + "loss": 0.5323517322540283, + "step": 417 + }, + { + "epoch": 1.3795379537953796, + "grad_norm": 0.3046417110987717, + "learning_rate": 2.6267936499311402e-05, + "loss": 0.5452409982681274, + "step": 418 + }, + { + "epoch": 1.3828382838283828, + "grad_norm": 0.2878853361033164, + "learning_rate": 2.619494848909084e-05, + "loss": 0.4622665047645569, + "step": 419 + }, + { + "epoch": 1.386138613861386, + "grad_norm": 0.3129938954769346, + "learning_rate": 2.6121869103257206e-05, + "loss": 0.531772255897522, + "step": 420 + }, + { + "epoch": 1.3894389438943895, + "grad_norm": 0.3044320552061303, + "learning_rate": 2.6048699419732897e-05, + "loss": 0.519554853439331, + "step": 421 + }, + { + "epoch": 1.3927392739273927, + "grad_norm": 0.32616258357306027, + "learning_rate": 2.5975440517772187e-05, + "loss": 0.545585572719574, + "step": 422 + }, + { + "epoch": 1.396039603960396, + "grad_norm": 0.297995845019565, + "learning_rate": 2.5902093477945345e-05, + "loss": 0.5641547441482544, + "step": 423 + }, + { + "epoch": 1.3993399339933994, + "grad_norm": 0.28406971495281874, + "learning_rate": 2.5828659382122655e-05, + "loss": 0.5578028559684753, + "step": 424 + }, + { + "epoch": 1.4026402640264026, + "grad_norm": 0.35618435421860006, + "learning_rate": 2.5755139313458484e-05, + "loss": 0.5931404232978821, + "step": 425 + }, + { + "epoch": 1.4059405940594059, + "grad_norm": 0.3227282264542969, + "learning_rate": 2.5681534356375314e-05, + "loss": 0.5486891865730286, + "step": 426 + }, + { + "epoch": 1.4092409240924093, + "grad_norm": 0.31220449886262164, + "learning_rate": 2.5607845596547706e-05, + "loss": 0.5007671117782593, + "step": 427 + }, + { + "epoch": 1.4125412541254125, + "grad_norm": 0.2970377848116104, + "learning_rate": 2.5534074120886346e-05, + "loss": 0.5044519901275635, + "step": 428 + }, + { + "epoch": 1.4158415841584158, + "grad_norm": 0.30667327850480125, + "learning_rate": 2.5460221017521952e-05, + "loss": 0.5227789878845215, + "step": 429 + }, + { + "epoch": 1.4191419141914192, + "grad_norm": 0.2902458759439887, + "learning_rate": 2.538628737578926e-05, + "loss": 0.5530189871788025, + "step": 430 + }, + { + "epoch": 1.4224422442244224, + "grad_norm": 0.3114416510328153, + "learning_rate": 2.5312274286210966e-05, + "loss": 0.508142352104187, + "step": 431 + }, + { + "epoch": 1.4257425742574257, + "grad_norm": 0.30284970816559353, + "learning_rate": 2.523818284048159e-05, + "loss": 0.5497263669967651, + "step": 432 + }, + { + "epoch": 1.4290429042904291, + "grad_norm": 0.3619418905679721, + "learning_rate": 2.5164014131451443e-05, + "loss": 0.5477034449577332, + "step": 433 + }, + { + "epoch": 1.4323432343234324, + "grad_norm": 0.28668741491270383, + "learning_rate": 2.508976925311045e-05, + "loss": 0.5091728568077087, + "step": 434 + }, + { + "epoch": 1.4356435643564356, + "grad_norm": 0.2922234358135184, + "learning_rate": 2.501544930057203e-05, + "loss": 0.5022713541984558, + "step": 435 + }, + { + "epoch": 1.438943894389439, + "grad_norm": 0.29994035273286174, + "learning_rate": 2.494105537005697e-05, + "loss": 0.5401599407196045, + "step": 436 + }, + { + "epoch": 1.4422442244224423, + "grad_norm": 0.27863085551634303, + "learning_rate": 2.4866588558877208e-05, + "loss": 0.5632063150405884, + "step": 437 + }, + { + "epoch": 1.4455445544554455, + "grad_norm": 0.2968792338733857, + "learning_rate": 2.479204996541969e-05, + "loss": 0.552355170249939, + "step": 438 + }, + { + "epoch": 1.448844884488449, + "grad_norm": 0.3222205976590156, + "learning_rate": 2.4717440689130154e-05, + "loss": 0.5604996681213379, + "step": 439 + }, + { + "epoch": 1.4521452145214522, + "grad_norm": 0.2781451863798608, + "learning_rate": 2.4642761830496893e-05, + "loss": 0.4961245656013489, + "step": 440 + }, + { + "epoch": 1.4554455445544554, + "grad_norm": 0.3327533816855903, + "learning_rate": 2.4568014491034565e-05, + "loss": 0.5403590202331543, + "step": 441 + }, + { + "epoch": 1.4587458745874589, + "grad_norm": 0.2944499869326328, + "learning_rate": 2.4493199773267902e-05, + "loss": 0.4753378629684448, + "step": 442 + }, + { + "epoch": 1.462046204620462, + "grad_norm": 0.30936599048377306, + "learning_rate": 2.4418318780715477e-05, + "loss": 0.5125438570976257, + "step": 443 + }, + { + "epoch": 1.4653465346534653, + "grad_norm": 0.3047486735791836, + "learning_rate": 2.434337261787342e-05, + "loss": 0.5670269727706909, + "step": 444 + }, + { + "epoch": 1.4686468646864688, + "grad_norm": 0.3348418102837006, + "learning_rate": 2.426836239019911e-05, + "loss": 0.5538198947906494, + "step": 445 + }, + { + "epoch": 1.471947194719472, + "grad_norm": 0.2790312641462961, + "learning_rate": 2.4193289204094893e-05, + "loss": 0.5012328028678894, + "step": 446 + }, + { + "epoch": 1.4752475247524752, + "grad_norm": 0.30485310749783334, + "learning_rate": 2.4118154166891762e-05, + "loss": 0.538119912147522, + "step": 447 + }, + { + "epoch": 1.4785478547854787, + "grad_norm": 0.32398781026753815, + "learning_rate": 2.4042958386833003e-05, + "loss": 0.5252339839935303, + "step": 448 + }, + { + "epoch": 1.481848184818482, + "grad_norm": 0.326928536480608, + "learning_rate": 2.3967702973057853e-05, + "loss": 0.5367081761360168, + "step": 449 + }, + { + "epoch": 1.4851485148514851, + "grad_norm": 0.3044938562463835, + "learning_rate": 2.3892389035585167e-05, + "loss": 0.5091884136199951, + "step": 450 + }, + { + "epoch": 1.4884488448844886, + "grad_norm": 0.2897824690201277, + "learning_rate": 2.3817017685297016e-05, + "loss": 0.5079891681671143, + "step": 451 + }, + { + "epoch": 1.4917491749174918, + "grad_norm": 0.2966882318097961, + "learning_rate": 2.3741590033922313e-05, + "loss": 0.511939287185669, + "step": 452 + }, + { + "epoch": 1.495049504950495, + "grad_norm": 0.28797637565211376, + "learning_rate": 2.3666107194020404e-05, + "loss": 0.5070478916168213, + "step": 453 + }, + { + "epoch": 1.4983498349834983, + "grad_norm": 0.29050652670321586, + "learning_rate": 2.3590570278964682e-05, + "loss": 0.547492504119873, + "step": 454 + }, + { + "epoch": 1.5016501650165015, + "grad_norm": 0.311874965448668, + "learning_rate": 2.3514980402926132e-05, + "loss": 0.5386558771133423, + "step": 455 + }, + { + "epoch": 1.504950495049505, + "grad_norm": 0.26980126113979913, + "learning_rate": 2.3439338680856943e-05, + "loss": 0.48668172955513, + "step": 456 + }, + { + "epoch": 1.5082508250825084, + "grad_norm": 0.31689121328788056, + "learning_rate": 2.3363646228474002e-05, + "loss": 0.5497942566871643, + "step": 457 + }, + { + "epoch": 1.5115511551155114, + "grad_norm": 0.3648919358675907, + "learning_rate": 2.328790416224248e-05, + "loss": 0.5267748832702637, + "step": 458 + }, + { + "epoch": 1.5148514851485149, + "grad_norm": 0.3191029117024018, + "learning_rate": 2.3212113599359368e-05, + "loss": 0.5578982830047607, + "step": 459 + }, + { + "epoch": 1.5181518151815183, + "grad_norm": 0.30610891906133464, + "learning_rate": 2.3136275657736956e-05, + "loss": 0.5136545896530151, + "step": 460 + }, + { + "epoch": 1.5214521452145213, + "grad_norm": 0.28466532575384307, + "learning_rate": 2.3060391455986403e-05, + "loss": 0.5718669891357422, + "step": 461 + }, + { + "epoch": 1.5247524752475248, + "grad_norm": 0.3064265170567389, + "learning_rate": 2.2984462113401184e-05, + "loss": 0.5427108407020569, + "step": 462 + }, + { + "epoch": 1.528052805280528, + "grad_norm": 0.28495826208338726, + "learning_rate": 2.2908488749940596e-05, + "loss": 0.5293564200401306, + "step": 463 + }, + { + "epoch": 1.5313531353135312, + "grad_norm": 0.3073240786964915, + "learning_rate": 2.2832472486213275e-05, + "loss": 0.550743579864502, + "step": 464 + }, + { + "epoch": 1.5346534653465347, + "grad_norm": 0.30789089349395116, + "learning_rate": 2.2756414443460602e-05, + "loss": 0.5957387685775757, + "step": 465 + }, + { + "epoch": 1.537953795379538, + "grad_norm": 0.2840660845057486, + "learning_rate": 2.2680315743540234e-05, + "loss": 0.4994407892227173, + "step": 466 + }, + { + "epoch": 1.5412541254125411, + "grad_norm": 0.2912314912557071, + "learning_rate": 2.260417750890949e-05, + "loss": 0.5120857954025269, + "step": 467 + }, + { + "epoch": 1.5445544554455446, + "grad_norm": 0.3024618438133355, + "learning_rate": 2.2528000862608845e-05, + "loss": 0.5727359056472778, + "step": 468 + }, + { + "epoch": 1.5478547854785478, + "grad_norm": 0.30379584493476613, + "learning_rate": 2.2451786928245344e-05, + "loss": 0.584964394569397, + "step": 469 + }, + { + "epoch": 1.551155115511551, + "grad_norm": 0.2782374360382863, + "learning_rate": 2.237553682997603e-05, + "loss": 0.5507112741470337, + "step": 470 + }, + { + "epoch": 1.5544554455445545, + "grad_norm": 0.26333814455393634, + "learning_rate": 2.2299251692491364e-05, + "loss": 0.49136701226234436, + "step": 471 + }, + { + "epoch": 1.5577557755775577, + "grad_norm": 0.31673569076077385, + "learning_rate": 2.2222932640998635e-05, + "loss": 0.5374805927276611, + "step": 472 + }, + { + "epoch": 1.561056105610561, + "grad_norm": 0.29370656251116817, + "learning_rate": 2.2146580801205362e-05, + "loss": 0.523996114730835, + "step": 473 + }, + { + "epoch": 1.5643564356435644, + "grad_norm": 0.27277397989040114, + "learning_rate": 2.207019729930271e-05, + "loss": 0.48198428750038147, + "step": 474 + }, + { + "epoch": 1.5676567656765676, + "grad_norm": 0.2861287068823064, + "learning_rate": 2.199378326194883e-05, + "loss": 0.5148699879646301, + "step": 475 + }, + { + "epoch": 1.5709570957095709, + "grad_norm": 0.2981231032466442, + "learning_rate": 2.1917339816252303e-05, + "loss": 0.5297671556472778, + "step": 476 + }, + { + "epoch": 1.5742574257425743, + "grad_norm": 0.2775943923870632, + "learning_rate": 2.1840868089755465e-05, + "loss": 0.5082278847694397, + "step": 477 + }, + { + "epoch": 1.5775577557755776, + "grad_norm": 0.2988631140370514, + "learning_rate": 2.176436921041779e-05, + "loss": 0.4755392372608185, + "step": 478 + }, + { + "epoch": 1.5808580858085808, + "grad_norm": 0.28707182004966697, + "learning_rate": 2.1687844306599275e-05, + "loss": 0.5249454975128174, + "step": 479 + }, + { + "epoch": 1.5841584158415842, + "grad_norm": 0.3023499942723386, + "learning_rate": 2.161129450704376e-05, + "loss": 0.5626166462898254, + "step": 480 + }, + { + "epoch": 1.5874587458745875, + "grad_norm": 0.28182475866947054, + "learning_rate": 2.1534720940862318e-05, + "loss": 0.5590533018112183, + "step": 481 + }, + { + "epoch": 1.5907590759075907, + "grad_norm": 0.2724331542693392, + "learning_rate": 2.1458124737516557e-05, + "loss": 0.5146170854568481, + "step": 482 + }, + { + "epoch": 1.5940594059405941, + "grad_norm": 0.28834268248771533, + "learning_rate": 2.1381507026802007e-05, + "loss": 0.5633066296577454, + "step": 483 + }, + { + "epoch": 1.5973597359735974, + "grad_norm": 0.29376551657635425, + "learning_rate": 2.130486893883141e-05, + "loss": 0.5273865461349487, + "step": 484 + }, + { + "epoch": 1.6006600660066006, + "grad_norm": 0.277893471974935, + "learning_rate": 2.1228211604018088e-05, + "loss": 0.5040723085403442, + "step": 485 + }, + { + "epoch": 1.603960396039604, + "grad_norm": 0.2901419412347278, + "learning_rate": 2.1151536153059254e-05, + "loss": 0.5254411697387695, + "step": 486 + }, + { + "epoch": 1.6072607260726073, + "grad_norm": 0.29340041503520936, + "learning_rate": 2.1074843716919323e-05, + "loss": 0.5789728760719299, + "step": 487 + }, + { + "epoch": 1.6105610561056105, + "grad_norm": 0.2858502686555999, + "learning_rate": 2.0998135426813245e-05, + "loss": 0.5521235466003418, + "step": 488 + }, + { + "epoch": 1.613861386138614, + "grad_norm": 0.2770947277408911, + "learning_rate": 2.092141241418984e-05, + "loss": 0.4702959954738617, + "step": 489 + }, + { + "epoch": 1.6171617161716172, + "grad_norm": 0.29713285242144816, + "learning_rate": 2.0844675810715046e-05, + "loss": 0.4960707128047943, + "step": 490 + }, + { + "epoch": 1.6204620462046204, + "grad_norm": 0.2800759957297699, + "learning_rate": 2.076792674825529e-05, + "loss": 0.5334826111793518, + "step": 491 + }, + { + "epoch": 1.6237623762376239, + "grad_norm": 0.4465546145157964, + "learning_rate": 2.0691166358860775e-05, + "loss": 0.5604894161224365, + "step": 492 + }, + { + "epoch": 1.627062706270627, + "grad_norm": 0.2895889767199155, + "learning_rate": 2.061439577474875e-05, + "loss": 0.5565654635429382, + "step": 493 + }, + { + "epoch": 1.6303630363036303, + "grad_norm": 0.2663082120203026, + "learning_rate": 2.0537616128286875e-05, + "loss": 0.541640043258667, + "step": 494 + }, + { + "epoch": 1.6336633663366338, + "grad_norm": 0.27975047407467746, + "learning_rate": 2.0460828551976436e-05, + "loss": 0.5247132182121277, + "step": 495 + }, + { + "epoch": 1.636963696369637, + "grad_norm": 0.30554958978585, + "learning_rate": 2.0384034178435727e-05, + "loss": 0.533937394618988, + "step": 496 + }, + { + "epoch": 1.6402640264026402, + "grad_norm": 0.29094539458240765, + "learning_rate": 2.0307234140383264e-05, + "loss": 0.5857927799224854, + "step": 497 + }, + { + "epoch": 1.6435643564356437, + "grad_norm": 0.2718482098386275, + "learning_rate": 2.0230429570621134e-05, + "loss": 0.5191807746887207, + "step": 498 + }, + { + "epoch": 1.6468646864686467, + "grad_norm": 0.28523897670587156, + "learning_rate": 2.0153621602018276e-05, + "loss": 0.5255881547927856, + "step": 499 + }, + { + "epoch": 1.6501650165016502, + "grad_norm": 0.27057309315143646, + "learning_rate": 2.0076811367493736e-05, + "loss": 0.5134017467498779, + "step": 500 + }, + { + "epoch": 1.6534653465346536, + "grad_norm": 0.2603322919481828, + "learning_rate": 2e-05, + "loss": 0.4548872113227844, + "step": 501 + }, + { + "epoch": 1.6567656765676566, + "grad_norm": 0.2841830282558966, + "learning_rate": 1.9923188632506268e-05, + "loss": 0.4879235625267029, + "step": 502 + }, + { + "epoch": 1.66006600660066, + "grad_norm": 0.2718072353452213, + "learning_rate": 1.9846378397981737e-05, + "loss": 0.5488070249557495, + "step": 503 + }, + { + "epoch": 1.6633663366336635, + "grad_norm": 0.26980717544426264, + "learning_rate": 1.976957042937887e-05, + "loss": 0.474858820438385, + "step": 504 + }, + { + "epoch": 1.6666666666666665, + "grad_norm": 0.2729038695715346, + "learning_rate": 1.969276585961674e-05, + "loss": 0.573983907699585, + "step": 505 + }, + { + "epoch": 1.66996699669967, + "grad_norm": 0.2754435399081945, + "learning_rate": 1.9615965821564284e-05, + "loss": 0.5299487709999084, + "step": 506 + }, + { + "epoch": 1.6732673267326734, + "grad_norm": 0.28078214205826996, + "learning_rate": 1.9539171448023568e-05, + "loss": 0.580963134765625, + "step": 507 + }, + { + "epoch": 1.6765676567656764, + "grad_norm": 0.28056872169008745, + "learning_rate": 1.946238387171313e-05, + "loss": 0.5240850448608398, + "step": 508 + }, + { + "epoch": 1.6798679867986799, + "grad_norm": 0.27579932032687055, + "learning_rate": 1.9385604225251245e-05, + "loss": 0.5397930145263672, + "step": 509 + }, + { + "epoch": 1.6831683168316833, + "grad_norm": 0.2649239844230271, + "learning_rate": 1.9308833641139235e-05, + "loss": 0.4949077367782593, + "step": 510 + }, + { + "epoch": 1.6864686468646863, + "grad_norm": 0.26821293718742795, + "learning_rate": 1.9232073251744715e-05, + "loss": 0.4906027913093567, + "step": 511 + }, + { + "epoch": 1.6897689768976898, + "grad_norm": 0.30180544906142204, + "learning_rate": 1.9155324189284957e-05, + "loss": 0.562363862991333, + "step": 512 + }, + { + "epoch": 1.693069306930693, + "grad_norm": 0.26560887539548794, + "learning_rate": 1.9078587585810167e-05, + "loss": 0.5347090363502502, + "step": 513 + }, + { + "epoch": 1.6963696369636962, + "grad_norm": 0.28206984650870465, + "learning_rate": 1.900186457318676e-05, + "loss": 0.5554836988449097, + "step": 514 + }, + { + "epoch": 1.6996699669966997, + "grad_norm": 0.2667791650009087, + "learning_rate": 1.8925156283080684e-05, + "loss": 0.5179104208946228, + "step": 515 + }, + { + "epoch": 1.702970297029703, + "grad_norm": 0.2759730227945326, + "learning_rate": 1.8848463846940756e-05, + "loss": 0.552240252494812, + "step": 516 + }, + { + "epoch": 1.7062706270627062, + "grad_norm": 0.34634391778922186, + "learning_rate": 1.8771788395981915e-05, + "loss": 0.534430980682373, + "step": 517 + }, + { + "epoch": 1.7095709570957096, + "grad_norm": 0.26711110641337843, + "learning_rate": 1.8695131061168598e-05, + "loss": 0.5601803064346313, + "step": 518 + }, + { + "epoch": 1.7128712871287128, + "grad_norm": 0.3479876576460715, + "learning_rate": 1.8618492973198e-05, + "loss": 0.5119711756706238, + "step": 519 + }, + { + "epoch": 1.716171617161716, + "grad_norm": 0.32608510378908223, + "learning_rate": 1.8541875262483446e-05, + "loss": 0.5632577538490295, + "step": 520 + }, + { + "epoch": 1.7194719471947195, + "grad_norm": 0.2744236737297373, + "learning_rate": 1.8465279059137686e-05, + "loss": 0.5499478578567505, + "step": 521 + }, + { + "epoch": 1.7227722772277227, + "grad_norm": 0.2835433030263243, + "learning_rate": 1.8388705492956244e-05, + "loss": 0.5176683664321899, + "step": 522 + }, + { + "epoch": 1.726072607260726, + "grad_norm": 0.30494439216544983, + "learning_rate": 1.8312155693400735e-05, + "loss": 0.49528205394744873, + "step": 523 + }, + { + "epoch": 1.7293729372937294, + "grad_norm": 0.26710805184601655, + "learning_rate": 1.8235630789582213e-05, + "loss": 0.5684216022491455, + "step": 524 + }, + { + "epoch": 1.7326732673267327, + "grad_norm": 0.3852411183060649, + "learning_rate": 1.815913191024454e-05, + "loss": 0.5375942587852478, + "step": 525 + }, + { + "epoch": 1.7359735973597359, + "grad_norm": 0.33262500157086355, + "learning_rate": 1.8082660183747704e-05, + "loss": 0.5541956424713135, + "step": 526 + }, + { + "epoch": 1.7392739273927393, + "grad_norm": 0.28642691265671333, + "learning_rate": 1.8006216738051175e-05, + "loss": 0.5304872393608093, + "step": 527 + }, + { + "epoch": 1.7425742574257426, + "grad_norm": 0.2734388390360432, + "learning_rate": 1.7929802700697297e-05, + "loss": 0.48648735880851746, + "step": 528 + }, + { + "epoch": 1.7458745874587458, + "grad_norm": 0.28617564742207474, + "learning_rate": 1.7853419198794638e-05, + "loss": 0.49221059679985046, + "step": 529 + }, + { + "epoch": 1.7491749174917492, + "grad_norm": 0.2790947673251484, + "learning_rate": 1.7777067359001375e-05, + "loss": 0.5652948021888733, + "step": 530 + }, + { + "epoch": 1.7524752475247525, + "grad_norm": 0.2853703561489374, + "learning_rate": 1.7700748307508643e-05, + "loss": 0.5187686681747437, + "step": 531 + }, + { + "epoch": 1.7557755775577557, + "grad_norm": 0.2853976224574607, + "learning_rate": 1.7624463170023974e-05, + "loss": 0.5013114809989929, + "step": 532 + }, + { + "epoch": 1.7590759075907592, + "grad_norm": 0.2619757068753479, + "learning_rate": 1.7548213071754663e-05, + "loss": 0.47477245330810547, + "step": 533 + }, + { + "epoch": 1.7623762376237624, + "grad_norm": 0.29220608585061886, + "learning_rate": 1.7471999137391162e-05, + "loss": 0.5600515007972717, + "step": 534 + }, + { + "epoch": 1.7656765676567656, + "grad_norm": 0.2680464906367101, + "learning_rate": 1.7395822491090513e-05, + "loss": 0.5017521381378174, + "step": 535 + }, + { + "epoch": 1.768976897689769, + "grad_norm": 0.3164936697237469, + "learning_rate": 1.7319684256459773e-05, + "loss": 0.48718830943107605, + "step": 536 + }, + { + "epoch": 1.7722772277227723, + "grad_norm": 0.26576630911317906, + "learning_rate": 1.72435855565394e-05, + "loss": 0.5348131060600281, + "step": 537 + }, + { + "epoch": 1.7755775577557755, + "grad_norm": 0.3785718389935733, + "learning_rate": 1.716752751378673e-05, + "loss": 0.5132070183753967, + "step": 538 + }, + { + "epoch": 1.778877887788779, + "grad_norm": 0.2912227396538846, + "learning_rate": 1.7091511250059407e-05, + "loss": 0.5194598436355591, + "step": 539 + }, + { + "epoch": 1.7821782178217822, + "grad_norm": 0.25340183641995817, + "learning_rate": 1.701553788659883e-05, + "loss": 0.4950656294822693, + "step": 540 + }, + { + "epoch": 1.7854785478547854, + "grad_norm": 0.32993048381725726, + "learning_rate": 1.6939608544013603e-05, + "loss": 0.5465744137763977, + "step": 541 + }, + { + "epoch": 1.7887788778877889, + "grad_norm": 0.33326548174687204, + "learning_rate": 1.6863724342263047e-05, + "loss": 0.5328625440597534, + "step": 542 + }, + { + "epoch": 1.7920792079207921, + "grad_norm": 0.2747817812302539, + "learning_rate": 1.6787886400640645e-05, + "loss": 0.483689546585083, + "step": 543 + }, + { + "epoch": 1.7953795379537953, + "grad_norm": 0.2619017709081145, + "learning_rate": 1.6712095837757525e-05, + "loss": 0.5225390195846558, + "step": 544 + }, + { + "epoch": 1.7986798679867988, + "grad_norm": 0.2718453161830156, + "learning_rate": 1.6636353771526005e-05, + "loss": 0.5168595314025879, + "step": 545 + }, + { + "epoch": 1.801980198019802, + "grad_norm": 0.2915579523683445, + "learning_rate": 1.6560661319143064e-05, + "loss": 0.5257725119590759, + "step": 546 + }, + { + "epoch": 1.8052805280528053, + "grad_norm": 0.2767711815305055, + "learning_rate": 1.648501959707387e-05, + "loss": 0.5023485422134399, + "step": 547 + }, + { + "epoch": 1.8085808580858087, + "grad_norm": 0.267570701584644, + "learning_rate": 1.6409429721035324e-05, + "loss": 0.48897239565849304, + "step": 548 + }, + { + "epoch": 1.811881188118812, + "grad_norm": 0.28714006005114934, + "learning_rate": 1.63338928059796e-05, + "loss": 0.5318676829338074, + "step": 549 + }, + { + "epoch": 1.8151815181518152, + "grad_norm": 0.2802563301473015, + "learning_rate": 1.6258409966077693e-05, + "loss": 0.4996787905693054, + "step": 550 + }, + { + "epoch": 1.8184818481848186, + "grad_norm": 0.28354713397276166, + "learning_rate": 1.6182982314702987e-05, + "loss": 0.4833434820175171, + "step": 551 + }, + { + "epoch": 1.8217821782178216, + "grad_norm": 0.2904168234412241, + "learning_rate": 1.6107610964414836e-05, + "loss": 0.5050291419029236, + "step": 552 + }, + { + "epoch": 1.825082508250825, + "grad_norm": 0.2859100119195952, + "learning_rate": 1.6032297026942154e-05, + "loss": 0.5423529148101807, + "step": 553 + }, + { + "epoch": 1.8283828382838285, + "grad_norm": 0.2700093369793658, + "learning_rate": 1.5957041613167007e-05, + "loss": 0.5670536756515503, + "step": 554 + }, + { + "epoch": 1.8316831683168315, + "grad_norm": 0.2784484594925466, + "learning_rate": 1.5881845833108245e-05, + "loss": 0.5148528814315796, + "step": 555 + }, + { + "epoch": 1.834983498349835, + "grad_norm": 0.2795083034807244, + "learning_rate": 1.5806710795905113e-05, + "loss": 0.5441350340843201, + "step": 556 + }, + { + "epoch": 1.8382838283828384, + "grad_norm": 0.27706485047893287, + "learning_rate": 1.5731637609800897e-05, + "loss": 0.5338016748428345, + "step": 557 + }, + { + "epoch": 1.8415841584158414, + "grad_norm": 0.281671337152691, + "learning_rate": 1.5656627382126587e-05, + "loss": 0.522803783416748, + "step": 558 + }, + { + "epoch": 1.844884488448845, + "grad_norm": 0.2867314215651197, + "learning_rate": 1.5581681219284523e-05, + "loss": 0.5079183578491211, + "step": 559 + }, + { + "epoch": 1.8481848184818483, + "grad_norm": 0.2880604655799914, + "learning_rate": 1.5506800226732104e-05, + "loss": 0.5360547304153442, + "step": 560 + }, + { + "epoch": 1.8514851485148514, + "grad_norm": 0.276328956502413, + "learning_rate": 1.5431985508965438e-05, + "loss": 0.5137909650802612, + "step": 561 + }, + { + "epoch": 1.8547854785478548, + "grad_norm": 0.26198432963654783, + "learning_rate": 1.5357238169503107e-05, + "loss": 0.513020396232605, + "step": 562 + }, + { + "epoch": 1.858085808580858, + "grad_norm": 0.3155751914603546, + "learning_rate": 1.5282559310869856e-05, + "loss": 0.5015939474105835, + "step": 563 + }, + { + "epoch": 1.8613861386138613, + "grad_norm": 0.2654102353913447, + "learning_rate": 1.5207950034580317e-05, + "loss": 0.5012743473052979, + "step": 564 + }, + { + "epoch": 1.8646864686468647, + "grad_norm": 0.27309132142690246, + "learning_rate": 1.5133411441122799e-05, + "loss": 0.48864254355430603, + "step": 565 + }, + { + "epoch": 1.867986798679868, + "grad_norm": 0.6058665885379618, + "learning_rate": 1.5058944629943044e-05, + "loss": 0.437102347612381, + "step": 566 + }, + { + "epoch": 1.8712871287128712, + "grad_norm": 0.2718164602566872, + "learning_rate": 1.4984550699427978e-05, + "loss": 0.5518525838851929, + "step": 567 + }, + { + "epoch": 1.8745874587458746, + "grad_norm": 0.2832474093938169, + "learning_rate": 1.4910230746889559e-05, + "loss": 0.5618141889572144, + "step": 568 + }, + { + "epoch": 1.8778877887788779, + "grad_norm": 0.2790138686096534, + "learning_rate": 1.4835985868548557e-05, + "loss": 0.4990406632423401, + "step": 569 + }, + { + "epoch": 1.881188118811881, + "grad_norm": 0.26198363334655667, + "learning_rate": 1.4761817159518415e-05, + "loss": 0.5004926919937134, + "step": 570 + }, + { + "epoch": 1.8844884488448845, + "grad_norm": 0.28233065536105734, + "learning_rate": 1.4687725713789042e-05, + "loss": 0.5166051983833313, + "step": 571 + }, + { + "epoch": 1.8877887788778878, + "grad_norm": 0.2654807250852616, + "learning_rate": 1.461371262421074e-05, + "loss": 0.5510391592979431, + "step": 572 + }, + { + "epoch": 1.891089108910891, + "grad_norm": 0.2766439695892797, + "learning_rate": 1.4539778982478061e-05, + "loss": 0.5305938720703125, + "step": 573 + }, + { + "epoch": 1.8943894389438944, + "grad_norm": 0.35617765802983586, + "learning_rate": 1.4465925879113663e-05, + "loss": 0.562718391418457, + "step": 574 + }, + { + "epoch": 1.8976897689768977, + "grad_norm": 0.26373250902859363, + "learning_rate": 1.4392154403452294e-05, + "loss": 0.541257381439209, + "step": 575 + }, + { + "epoch": 1.900990099009901, + "grad_norm": 0.2584596806712207, + "learning_rate": 1.4318465643624696e-05, + "loss": 0.556663990020752, + "step": 576 + }, + { + "epoch": 1.9042904290429044, + "grad_norm": 0.2655751613308258, + "learning_rate": 1.4244860686541522e-05, + "loss": 0.5691581964492798, + "step": 577 + }, + { + "epoch": 1.9075907590759076, + "grad_norm": 0.3146864569567829, + "learning_rate": 1.4171340617877349e-05, + "loss": 0.513170063495636, + "step": 578 + }, + { + "epoch": 1.9108910891089108, + "grad_norm": 0.288458498752148, + "learning_rate": 1.4097906522054656e-05, + "loss": 0.5679588317871094, + "step": 579 + }, + { + "epoch": 1.9141914191419143, + "grad_norm": 0.2858005511149637, + "learning_rate": 1.4024559482227818e-05, + "loss": 0.513796329498291, + "step": 580 + }, + { + "epoch": 1.9174917491749175, + "grad_norm": 0.25543101337641916, + "learning_rate": 1.3951300580267108e-05, + "loss": 0.4618416428565979, + "step": 581 + }, + { + "epoch": 1.9207920792079207, + "grad_norm": 0.2670194314216259, + "learning_rate": 1.3878130896742796e-05, + "loss": 0.5491312742233276, + "step": 582 + }, + { + "epoch": 1.9240924092409242, + "grad_norm": 0.24204031552297342, + "learning_rate": 1.3805051510909164e-05, + "loss": 0.5524745583534241, + "step": 583 + }, + { + "epoch": 1.9273927392739274, + "grad_norm": 0.25091865473771396, + "learning_rate": 1.3732063500688604e-05, + "loss": 0.5232075452804565, + "step": 584 + }, + { + "epoch": 1.9306930693069306, + "grad_norm": 0.26059464209400784, + "learning_rate": 1.3659167942655702e-05, + "loss": 0.5257346034049988, + "step": 585 + }, + { + "epoch": 1.933993399339934, + "grad_norm": 0.2814401591736557, + "learning_rate": 1.35863659120214e-05, + "loss": 0.5196455717086792, + "step": 586 + }, + { + "epoch": 1.9372937293729373, + "grad_norm": 0.2624714306516865, + "learning_rate": 1.3513658482617085e-05, + "loss": 0.5122568011283875, + "step": 587 + }, + { + "epoch": 1.9405940594059405, + "grad_norm": 0.2644911414307543, + "learning_rate": 1.3441046726878786e-05, + "loss": 0.5236790180206299, + "step": 588 + }, + { + "epoch": 1.943894389438944, + "grad_norm": 0.2699458396883844, + "learning_rate": 1.3368531715831337e-05, + "loss": 0.5508555173873901, + "step": 589 + }, + { + "epoch": 1.9471947194719472, + "grad_norm": 0.26005129022694123, + "learning_rate": 1.3296114519072594e-05, + "loss": 0.4742932617664337, + "step": 590 + }, + { + "epoch": 1.9504950495049505, + "grad_norm": 0.2530711129220065, + "learning_rate": 1.3223796204757638e-05, + "loss": 0.5406354665756226, + "step": 591 + }, + { + "epoch": 1.953795379537954, + "grad_norm": 0.26847075280504556, + "learning_rate": 1.3151577839583043e-05, + "loss": 0.508262038230896, + "step": 592 + }, + { + "epoch": 1.9570957095709571, + "grad_norm": 0.2601716190776577, + "learning_rate": 1.3079460488771136e-05, + "loss": 0.5260204672813416, + "step": 593 + }, + { + "epoch": 1.9603960396039604, + "grad_norm": 0.2597900374740898, + "learning_rate": 1.3007445216054257e-05, + "loss": 0.522408127784729, + "step": 594 + }, + { + "epoch": 1.9636963696369638, + "grad_norm": 0.23858694591096777, + "learning_rate": 1.2935533083659114e-05, + "loss": 0.4849371910095215, + "step": 595 + }, + { + "epoch": 1.966996699669967, + "grad_norm": 0.26399518807159883, + "learning_rate": 1.2863725152291091e-05, + "loss": 0.5319019556045532, + "step": 596 + }, + { + "epoch": 1.9702970297029703, + "grad_norm": 0.2797422170192374, + "learning_rate": 1.2792022481118587e-05, + "loss": 0.5562412738800049, + "step": 597 + }, + { + "epoch": 1.9735973597359737, + "grad_norm": 0.2537907416959109, + "learning_rate": 1.2720426127757431e-05, + "loss": 0.49608999490737915, + "step": 598 + }, + { + "epoch": 1.976897689768977, + "grad_norm": 0.2521690484869479, + "learning_rate": 1.2648937148255253e-05, + "loss": 0.5082768201828003, + "step": 599 + }, + { + "epoch": 1.9801980198019802, + "grad_norm": 0.2572245668654862, + "learning_rate": 1.2577556597075933e-05, + "loss": 0.5706614255905151, + "step": 600 + }, + { + "epoch": 1.9834983498349836, + "grad_norm": 0.2697883750179181, + "learning_rate": 1.2506285527083991e-05, + "loss": 0.5366507768630981, + "step": 601 + }, + { + "epoch": 1.9867986798679866, + "grad_norm": 0.26402819852563175, + "learning_rate": 1.2435124989529139e-05, + "loss": 0.5462816953659058, + "step": 602 + }, + { + "epoch": 1.99009900990099, + "grad_norm": 0.246894878071046, + "learning_rate": 1.236407603403072e-05, + "loss": 0.5050650238990784, + "step": 603 + }, + { + "epoch": 1.9933993399339935, + "grad_norm": 0.477370357077484, + "learning_rate": 1.2293139708562221e-05, + "loss": 0.4915675222873688, + "step": 604 + }, + { + "epoch": 1.9966996699669965, + "grad_norm": 0.2657795870076786, + "learning_rate": 1.2222317059435863e-05, + "loss": 0.5807889103889465, + "step": 605 + }, + { + "epoch": 2.0, + "grad_norm": 0.2770967943671612, + "learning_rate": 1.2151609131287124e-05, + "loss": 0.49173152446746826, + "step": 606 + }, + { + "epoch": 2.0033003300330035, + "grad_norm": 0.7014931959992592, + "learning_rate": 1.2081016967059336e-05, + "loss": 0.4426806569099426, + "step": 607 + }, + { + "epoch": 2.0066006600660065, + "grad_norm": 0.3040348249510974, + "learning_rate": 1.201054160798833e-05, + "loss": 0.45669305324554443, + "step": 608 + }, + { + "epoch": 2.00990099009901, + "grad_norm": 0.31030490189011145, + "learning_rate": 1.1940184093587047e-05, + "loss": 0.4638911783695221, + "step": 609 + }, + { + "epoch": 2.0132013201320134, + "grad_norm": 0.36234285165121427, + "learning_rate": 1.186994546163023e-05, + "loss": 0.4541138708591461, + "step": 610 + }, + { + "epoch": 2.0165016501650164, + "grad_norm": 0.38564024677228226, + "learning_rate": 1.1799826748139079e-05, + "loss": 0.49081191420555115, + "step": 611 + }, + { + "epoch": 2.01980198019802, + "grad_norm": 0.3266656962672454, + "learning_rate": 1.1729828987366009e-05, + "loss": 0.4794033169746399, + "step": 612 + }, + { + "epoch": 2.0231023102310233, + "grad_norm": 0.291304204290645, + "learning_rate": 1.165995321177939e-05, + "loss": 0.4142993688583374, + "step": 613 + }, + { + "epoch": 2.0264026402640263, + "grad_norm": 0.33294658416576944, + "learning_rate": 1.159020045204829e-05, + "loss": 0.47322210669517517, + "step": 614 + }, + { + "epoch": 2.0297029702970297, + "grad_norm": 0.3539618583487969, + "learning_rate": 1.15205717370273e-05, + "loss": 0.4899124503135681, + "step": 615 + }, + { + "epoch": 2.033003300330033, + "grad_norm": 0.2952110750729378, + "learning_rate": 1.1451068093741355e-05, + "loss": 0.4857853055000305, + "step": 616 + }, + { + "epoch": 2.036303630363036, + "grad_norm": 0.28290377247578213, + "learning_rate": 1.1381690547370559e-05, + "loss": 0.4790021479129791, + "step": 617 + }, + { + "epoch": 2.0396039603960396, + "grad_norm": 0.2902876717109542, + "learning_rate": 1.13124401212351e-05, + "loss": 0.4519282281398773, + "step": 618 + }, + { + "epoch": 2.042904290429043, + "grad_norm": 0.32584221310071065, + "learning_rate": 1.1243317836780138e-05, + "loss": 0.4738570749759674, + "step": 619 + }, + { + "epoch": 2.046204620462046, + "grad_norm": 0.3093985088780693, + "learning_rate": 1.1174324713560751e-05, + "loss": 0.5111795663833618, + "step": 620 + }, + { + "epoch": 2.0495049504950495, + "grad_norm": 0.2707360386310654, + "learning_rate": 1.1105461769226858e-05, + "loss": 0.4750926196575165, + "step": 621 + }, + { + "epoch": 2.052805280528053, + "grad_norm": 0.3107814822051771, + "learning_rate": 1.1036730019508259e-05, + "loss": 0.4580341577529907, + "step": 622 + }, + { + "epoch": 2.056105610561056, + "grad_norm": 0.28803288143665157, + "learning_rate": 1.0968130478199635e-05, + "loss": 0.43322116136550903, + "step": 623 + }, + { + "epoch": 2.0594059405940595, + "grad_norm": 0.2810686637672446, + "learning_rate": 1.0899664157145562e-05, + "loss": 0.5015532374382019, + "step": 624 + }, + { + "epoch": 2.062706270627063, + "grad_norm": 0.28464578766110366, + "learning_rate": 1.0831332066225645e-05, + "loss": 0.4508541226387024, + "step": 625 + }, + { + "epoch": 2.066006600660066, + "grad_norm": 0.2904901154874499, + "learning_rate": 1.0763135213339589e-05, + "loss": 0.49554720520973206, + "step": 626 + }, + { + "epoch": 2.0693069306930694, + "grad_norm": 0.27820378239401394, + "learning_rate": 1.0695074604392305e-05, + "loss": 0.4523652493953705, + "step": 627 + }, + { + "epoch": 2.072607260726073, + "grad_norm": 0.2794675014886217, + "learning_rate": 1.0627151243279136e-05, + "loss": 0.44413498044013977, + "step": 628 + }, + { + "epoch": 2.075907590759076, + "grad_norm": 0.30159300158430347, + "learning_rate": 1.055936613187101e-05, + "loss": 0.4645534157752991, + "step": 629 + }, + { + "epoch": 2.0792079207920793, + "grad_norm": 0.26698861915138783, + "learning_rate": 1.0491720269999663e-05, + "loss": 0.44823265075683594, + "step": 630 + }, + { + "epoch": 2.0825082508250823, + "grad_norm": 0.2813791646704669, + "learning_rate": 1.0424214655442891e-05, + "loss": 0.45181727409362793, + "step": 631 + }, + { + "epoch": 2.0858085808580857, + "grad_norm": 0.28721240697359884, + "learning_rate": 1.0356850283909852e-05, + "loss": 0.5371145009994507, + "step": 632 + }, + { + "epoch": 2.089108910891089, + "grad_norm": 0.26030729348418064, + "learning_rate": 1.0289628149026369e-05, + "loss": 0.4564274847507477, + "step": 633 + }, + { + "epoch": 2.092409240924092, + "grad_norm": 0.3008427259435641, + "learning_rate": 1.0222549242320254e-05, + "loss": 0.4490276873111725, + "step": 634 + }, + { + "epoch": 2.0957095709570956, + "grad_norm": 0.27241405218961473, + "learning_rate": 1.0155614553206715e-05, + "loss": 0.4663650095462799, + "step": 635 + }, + { + "epoch": 2.099009900990099, + "grad_norm": 0.2814271376941218, + "learning_rate": 1.0088825068973746e-05, + "loss": 0.46265488862991333, + "step": 636 + }, + { + "epoch": 2.102310231023102, + "grad_norm": 0.27083223857822414, + "learning_rate": 1.002218177476756e-05, + "loss": 0.45717963576316833, + "step": 637 + }, + { + "epoch": 2.1056105610561056, + "grad_norm": 0.27321625989679976, + "learning_rate": 9.955685653578068e-06, + "loss": 0.47119495272636414, + "step": 638 + }, + { + "epoch": 2.108910891089109, + "grad_norm": 0.2756031623165562, + "learning_rate": 9.88933768622439e-06, + "loss": 0.46565738320350647, + "step": 639 + }, + { + "epoch": 2.112211221122112, + "grad_norm": 0.26745369116167694, + "learning_rate": 9.823138851340337e-06, + "loss": 0.45610398054122925, + "step": 640 + }, + { + "epoch": 2.1155115511551155, + "grad_norm": 0.2722722292829376, + "learning_rate": 9.75709012536005e-06, + "loss": 0.4907280206680298, + "step": 641 + }, + { + "epoch": 2.118811881188119, + "grad_norm": 0.3111977337695957, + "learning_rate": 9.691192482503546e-06, + "loss": 0.500091552734375, + "step": 642 + }, + { + "epoch": 2.122112211221122, + "grad_norm": 0.2648612882642695, + "learning_rate": 9.625446894762371e-06, + "loss": 0.4330231547355652, + "step": 643 + }, + { + "epoch": 2.1254125412541254, + "grad_norm": 0.2809597353379975, + "learning_rate": 9.559854331885233e-06, + "loss": 0.4750261902809143, + "step": 644 + }, + { + "epoch": 2.128712871287129, + "grad_norm": 0.28201431758911444, + "learning_rate": 9.49441576136374e-06, + "loss": 0.4567373991012573, + "step": 645 + }, + { + "epoch": 2.132013201320132, + "grad_norm": 0.2901654659031683, + "learning_rate": 9.429132148418116e-06, + "loss": 0.4601932168006897, + "step": 646 + }, + { + "epoch": 2.1353135313531353, + "grad_norm": 0.2792782648133288, + "learning_rate": 9.364004455982931e-06, + "loss": 0.4909035265445709, + "step": 647 + }, + { + "epoch": 2.1386138613861387, + "grad_norm": 0.2531215125004539, + "learning_rate": 9.299033644692948e-06, + "loss": 0.4443170428276062, + "step": 648 + }, + { + "epoch": 2.1419141914191417, + "grad_norm": 0.2676386529649011, + "learning_rate": 9.234220672868928e-06, + "loss": 0.46534985303878784, + "step": 649 + }, + { + "epoch": 2.145214521452145, + "grad_norm": 0.2667778492620529, + "learning_rate": 9.169566496503476e-06, + "loss": 0.4351472854614258, + "step": 650 + }, + { + "epoch": 2.1485148514851486, + "grad_norm": 0.26819623679400084, + "learning_rate": 9.105072069246983e-06, + "loss": 0.41445475816726685, + "step": 651 + }, + { + "epoch": 2.1518151815181517, + "grad_norm": 0.2627848025641513, + "learning_rate": 9.040738342393532e-06, + "loss": 0.475847989320755, + "step": 652 + }, + { + "epoch": 2.155115511551155, + "grad_norm": 0.26883146792086515, + "learning_rate": 8.976566264866876e-06, + "loss": 0.48487618565559387, + "step": 653 + }, + { + "epoch": 2.1584158415841586, + "grad_norm": 0.2373773636564882, + "learning_rate": 8.912556783206414e-06, + "loss": 0.4661785364151001, + "step": 654 + }, + { + "epoch": 2.1617161716171616, + "grad_norm": 0.25939800378632233, + "learning_rate": 8.84871084155328e-06, + "loss": 0.48009538650512695, + "step": 655 + }, + { + "epoch": 2.165016501650165, + "grad_norm": 0.26858346089342566, + "learning_rate": 8.785029381636387e-06, + "loss": 0.45644935965538025, + "step": 656 + }, + { + "epoch": 2.1683168316831685, + "grad_norm": 0.25509808532967904, + "learning_rate": 8.721513342758516e-06, + "loss": 0.4896699786186218, + "step": 657 + }, + { + "epoch": 2.1716171617161715, + "grad_norm": 0.2678040151014407, + "learning_rate": 8.658163661782507e-06, + "loss": 0.4286258816719055, + "step": 658 + }, + { + "epoch": 2.174917491749175, + "grad_norm": 0.25541690613787077, + "learning_rate": 8.59498127311742e-06, + "loss": 0.42029869556427, + "step": 659 + }, + { + "epoch": 2.1782178217821784, + "grad_norm": 0.2748486648157056, + "learning_rate": 8.531967108704722e-06, + "loss": 0.48522356152534485, + "step": 660 + }, + { + "epoch": 2.1815181518151814, + "grad_norm": 0.37918495336042346, + "learning_rate": 8.4691220980046e-06, + "loss": 0.461814284324646, + "step": 661 + }, + { + "epoch": 2.184818481848185, + "grad_norm": 0.2581277433441387, + "learning_rate": 8.406447167982205e-06, + "loss": 0.49913299083709717, + "step": 662 + }, + { + "epoch": 2.1881188118811883, + "grad_norm": 0.2804949954645611, + "learning_rate": 8.343943243094008e-06, + "loss": 0.4936009645462036, + "step": 663 + }, + { + "epoch": 2.1914191419141913, + "grad_norm": 0.2621319196989517, + "learning_rate": 8.281611245274123e-06, + "loss": 0.44817712903022766, + "step": 664 + }, + { + "epoch": 2.1947194719471947, + "grad_norm": 0.26441078845804705, + "learning_rate": 8.219452093920763e-06, + "loss": 0.482817143201828, + "step": 665 + }, + { + "epoch": 2.198019801980198, + "grad_norm": 0.25954690482303255, + "learning_rate": 8.157466705882645e-06, + "loss": 0.4643383026123047, + "step": 666 + }, + { + "epoch": 2.201320132013201, + "grad_norm": 0.26531559844936237, + "learning_rate": 8.095655995445472e-06, + "loss": 0.4797602593898773, + "step": 667 + }, + { + "epoch": 2.2046204620462047, + "grad_norm": 0.26505896756203806, + "learning_rate": 8.03402087431844e-06, + "loss": 0.44109994173049927, + "step": 668 + }, + { + "epoch": 2.207920792079208, + "grad_norm": 0.24679836702691405, + "learning_rate": 7.972562251620817e-06, + "loss": 0.46359869837760925, + "step": 669 + }, + { + "epoch": 2.211221122112211, + "grad_norm": 0.23925371744802634, + "learning_rate": 7.9112810338685e-06, + "loss": 0.4576035141944885, + "step": 670 + }, + { + "epoch": 2.2145214521452146, + "grad_norm": 0.2854541383231889, + "learning_rate": 7.850178124960678e-06, + "loss": 0.40902045369148254, + "step": 671 + }, + { + "epoch": 2.217821782178218, + "grad_norm": 0.2726752140080075, + "learning_rate": 7.789254426166454e-06, + "loss": 0.45797932147979736, + "step": 672 + }, + { + "epoch": 2.221122112211221, + "grad_norm": 0.2463208855251595, + "learning_rate": 7.728510836111602e-06, + "loss": 0.43204474449157715, + "step": 673 + }, + { + "epoch": 2.2244224422442245, + "grad_norm": 0.2632084235311744, + "learning_rate": 7.667948250765278e-06, + "loss": 0.46007901430130005, + "step": 674 + }, + { + "epoch": 2.227722772277228, + "grad_norm": 0.2508043419515415, + "learning_rate": 7.607567563426823e-06, + "loss": 0.46342402696609497, + "step": 675 + }, + { + "epoch": 2.231023102310231, + "grad_norm": 0.25728063807342477, + "learning_rate": 7.5473696647125605e-06, + "loss": 0.48953354358673096, + "step": 676 + }, + { + "epoch": 2.2343234323432344, + "grad_norm": 0.2667124077929822, + "learning_rate": 7.487355442542696e-06, + "loss": 0.5022163391113281, + "step": 677 + }, + { + "epoch": 2.237623762376238, + "grad_norm": 0.2666199657154719, + "learning_rate": 7.4275257821281995e-06, + "loss": 0.5144001245498657, + "step": 678 + }, + { + "epoch": 2.240924092409241, + "grad_norm": 0.2598091753134079, + "learning_rate": 7.3678815659577505e-06, + "loss": 0.489937961101532, + "step": 679 + }, + { + "epoch": 2.2442244224422443, + "grad_norm": 0.25000738365352393, + "learning_rate": 7.3084236737847125e-06, + "loss": 0.48842746019363403, + "step": 680 + }, + { + "epoch": 2.2475247524752477, + "grad_norm": 0.2672754249714767, + "learning_rate": 7.249152982614176e-06, + "loss": 0.5024458765983582, + "step": 681 + }, + { + "epoch": 2.2508250825082508, + "grad_norm": 0.25558161311007577, + "learning_rate": 7.190070366690014e-06, + "loss": 0.46162086725234985, + "step": 682 + }, + { + "epoch": 2.254125412541254, + "grad_norm": 0.24807827286497117, + "learning_rate": 7.13117669748199e-06, + "loss": 0.44991785287857056, + "step": 683 + }, + { + "epoch": 2.2574257425742577, + "grad_norm": 0.24635539567650763, + "learning_rate": 7.072472843672877e-06, + "loss": 0.43738633394241333, + "step": 684 + }, + { + "epoch": 2.2607260726072607, + "grad_norm": 0.25605350464823584, + "learning_rate": 7.013959671145691e-06, + "loss": 0.46122169494628906, + "step": 685 + }, + { + "epoch": 2.264026402640264, + "grad_norm": 0.24205320356251103, + "learning_rate": 6.955638042970896e-06, + "loss": 0.4504377841949463, + "step": 686 + }, + { + "epoch": 2.2673267326732676, + "grad_norm": 0.2570116198268661, + "learning_rate": 6.897508819393645e-06, + "loss": 0.4620972275733948, + "step": 687 + }, + { + "epoch": 2.2706270627062706, + "grad_norm": 0.2629731642768507, + "learning_rate": 6.8395728578211525e-06, + "loss": 0.5271490216255188, + "step": 688 + }, + { + "epoch": 2.273927392739274, + "grad_norm": 1.9898738742816064, + "learning_rate": 6.781831012810001e-06, + "loss": 0.4448450803756714, + "step": 689 + }, + { + "epoch": 2.2772277227722775, + "grad_norm": 0.3213733503923664, + "learning_rate": 6.72428413605354e-06, + "loss": 0.4602925181388855, + "step": 690 + }, + { + "epoch": 2.2805280528052805, + "grad_norm": 0.26788259096559774, + "learning_rate": 6.6669330763693485e-06, + "loss": 0.4722862243652344, + "step": 691 + }, + { + "epoch": 2.283828382838284, + "grad_norm": 0.25272077157298134, + "learning_rate": 6.609778679686694e-06, + "loss": 0.47454553842544556, + "step": 692 + }, + { + "epoch": 2.287128712871287, + "grad_norm": 0.24015565864939845, + "learning_rate": 6.552821789034067e-06, + "loss": 0.4750802516937256, + "step": 693 + }, + { + "epoch": 2.2904290429042904, + "grad_norm": 0.2559036200154721, + "learning_rate": 6.496063244526723e-06, + "loss": 0.4640570282936096, + "step": 694 + }, + { + "epoch": 2.293729372937294, + "grad_norm": 0.25061879602537984, + "learning_rate": 6.439503883354323e-06, + "loss": 0.47181540727615356, + "step": 695 + }, + { + "epoch": 2.297029702970297, + "grad_norm": 0.24588968301020392, + "learning_rate": 6.3831445397685755e-06, + "loss": 0.4335097372531891, + "step": 696 + }, + { + "epoch": 2.3003300330033003, + "grad_norm": 0.26057507812572134, + "learning_rate": 6.3269860450709016e-06, + "loss": 0.5158364772796631, + "step": 697 + }, + { + "epoch": 2.3036303630363038, + "grad_norm": 0.24767301357183136, + "learning_rate": 6.271029227600216e-06, + "loss": 0.497075617313385, + "step": 698 + }, + { + "epoch": 2.3069306930693068, + "grad_norm": 0.2612680212099097, + "learning_rate": 6.215274912720697e-06, + "loss": 0.4946526288986206, + "step": 699 + }, + { + "epoch": 2.31023102310231, + "grad_norm": 0.25694731286364175, + "learning_rate": 6.159723922809577e-06, + "loss": 0.4632418155670166, + "step": 700 + }, + { + "epoch": 2.3135313531353137, + "grad_norm": 0.26826842519558464, + "learning_rate": 6.10437707724507e-06, + "loss": 0.4936927258968353, + "step": 701 + }, + { + "epoch": 2.3168316831683167, + "grad_norm": 0.3039451981089408, + "learning_rate": 6.049235192394242e-06, + "loss": 0.4373137056827545, + "step": 702 + }, + { + "epoch": 2.32013201320132, + "grad_norm": 0.2502753739217944, + "learning_rate": 5.994299081600996e-06, + "loss": 0.49224400520324707, + "step": 703 + }, + { + "epoch": 2.3234323432343236, + "grad_norm": 0.25232784831466315, + "learning_rate": 5.939569555174045e-06, + "loss": 0.453000545501709, + "step": 704 + }, + { + "epoch": 2.3267326732673266, + "grad_norm": 0.2443845287083898, + "learning_rate": 5.885047420374992e-06, + "loss": 0.4201410114765167, + "step": 705 + }, + { + "epoch": 2.33003300330033, + "grad_norm": 0.2757856931959748, + "learning_rate": 5.830733481406415e-06, + "loss": 0.4817071557044983, + "step": 706 + }, + { + "epoch": 2.3333333333333335, + "grad_norm": 0.23548633980687703, + "learning_rate": 5.776628539399975e-06, + "loss": 0.42609190940856934, + "step": 707 + }, + { + "epoch": 2.3366336633663365, + "grad_norm": 0.2484780532867763, + "learning_rate": 5.722733392404652e-06, + "loss": 0.46225881576538086, + "step": 708 + }, + { + "epoch": 2.33993399339934, + "grad_norm": 0.28677279656296756, + "learning_rate": 5.669048835374933e-06, + "loss": 0.49061962962150574, + "step": 709 + }, + { + "epoch": 2.3432343234323434, + "grad_norm": 0.25600200089074804, + "learning_rate": 5.615575660159089e-06, + "loss": 0.4506024122238159, + "step": 710 + }, + { + "epoch": 2.3465346534653464, + "grad_norm": 0.23921559671813297, + "learning_rate": 5.562314655487522e-06, + "loss": 0.4433022141456604, + "step": 711 + }, + { + "epoch": 2.34983498349835, + "grad_norm": 0.26708565402858225, + "learning_rate": 5.5092666069611055e-06, + "loss": 0.45988917350769043, + "step": 712 + }, + { + "epoch": 2.3531353135313533, + "grad_norm": 0.2294068192725238, + "learning_rate": 5.4564322970396154e-06, + "loss": 0.44675180315971375, + "step": 713 + }, + { + "epoch": 2.3564356435643563, + "grad_norm": 0.2431380886271115, + "learning_rate": 5.403812505030157e-06, + "loss": 0.46991807222366333, + "step": 714 + }, + { + "epoch": 2.3597359735973598, + "grad_norm": 0.2412850801003648, + "learning_rate": 5.351408007075714e-06, + "loss": 0.49208664894104004, + "step": 715 + }, + { + "epoch": 2.363036303630363, + "grad_norm": 2.760535806072788, + "learning_rate": 5.299219576143673e-06, + "loss": 0.48280128836631775, + "step": 716 + }, + { + "epoch": 2.366336633663366, + "grad_norm": 0.24609236023763137, + "learning_rate": 5.247247982014414e-06, + "loss": 0.4491961896419525, + "step": 717 + }, + { + "epoch": 2.3696369636963697, + "grad_norm": 0.24672380739006747, + "learning_rate": 5.195493991269991e-06, + "loss": 0.4943190813064575, + "step": 718 + }, + { + "epoch": 2.372937293729373, + "grad_norm": 0.27378763646010795, + "learning_rate": 5.143958367282795e-06, + "loss": 0.4586840867996216, + "step": 719 + }, + { + "epoch": 2.376237623762376, + "grad_norm": 0.2422334792581867, + "learning_rate": 5.0926418702042914e-06, + "loss": 0.46227943897247314, + "step": 720 + }, + { + "epoch": 2.3795379537953796, + "grad_norm": 0.23796137337817433, + "learning_rate": 5.041545256953839e-06, + "loss": 0.45386868715286255, + "step": 721 + }, + { + "epoch": 2.382838283828383, + "grad_norm": 0.24415832537414764, + "learning_rate": 4.990669281207492e-06, + "loss": 0.5026980042457581, + "step": 722 + }, + { + "epoch": 2.386138613861386, + "grad_norm": 0.247792875546048, + "learning_rate": 4.940014693386909e-06, + "loss": 0.4834757447242737, + "step": 723 + }, + { + "epoch": 2.3894389438943895, + "grad_norm": 0.43027345510854853, + "learning_rate": 4.889582240648254e-06, + "loss": 0.44382545351982117, + "step": 724 + }, + { + "epoch": 2.3927392739273925, + "grad_norm": 0.2519737312346543, + "learning_rate": 4.839372666871212e-06, + "loss": 0.45313894748687744, + "step": 725 + }, + { + "epoch": 2.396039603960396, + "grad_norm": 0.23932824454201898, + "learning_rate": 4.789386712647994e-06, + "loss": 0.4597586393356323, + "step": 726 + }, + { + "epoch": 2.3993399339933994, + "grad_norm": 0.23075224453442636, + "learning_rate": 4.739625115272408e-06, + "loss": 0.4427994191646576, + "step": 727 + }, + { + "epoch": 2.4026402640264024, + "grad_norm": 0.24450312969705348, + "learning_rate": 4.690088608729007e-06, + "loss": 0.4459637403488159, + "step": 728 + }, + { + "epoch": 2.405940594059406, + "grad_norm": 0.2516039358654293, + "learning_rate": 4.640777923682247e-06, + "loss": 0.5043150186538696, + "step": 729 + }, + { + "epoch": 2.4092409240924093, + "grad_norm": 0.26743057517217783, + "learning_rate": 4.5916937874657055e-06, + "loss": 0.4942860007286072, + "step": 730 + }, + { + "epoch": 2.4125412541254123, + "grad_norm": 0.25489023032736696, + "learning_rate": 4.5428369240713655e-06, + "loss": 0.4572402834892273, + "step": 731 + }, + { + "epoch": 2.4158415841584158, + "grad_norm": 0.24954926782274506, + "learning_rate": 4.494208054138934e-06, + "loss": 0.44927412271499634, + "step": 732 + }, + { + "epoch": 2.419141914191419, + "grad_norm": 0.24684795220524788, + "learning_rate": 4.445807894945211e-06, + "loss": 0.461928129196167, + "step": 733 + }, + { + "epoch": 2.4224422442244222, + "grad_norm": 0.2375757440633774, + "learning_rate": 4.397637160393493e-06, + "loss": 0.46279191970825195, + "step": 734 + }, + { + "epoch": 2.4257425742574257, + "grad_norm": 0.24407488686385456, + "learning_rate": 4.349696561003076e-06, + "loss": 0.48653045296669006, + "step": 735 + }, + { + "epoch": 2.429042904290429, + "grad_norm": 0.2443771510662661, + "learning_rate": 4.301986803898752e-06, + "loss": 0.4587661027908325, + "step": 736 + }, + { + "epoch": 2.432343234323432, + "grad_norm": 0.25142970699984885, + "learning_rate": 4.2545085928003906e-06, + "loss": 0.4946083426475525, + "step": 737 + }, + { + "epoch": 2.4356435643564356, + "grad_norm": 0.2446760243354809, + "learning_rate": 4.207262628012534e-06, + "loss": 0.4614926278591156, + "step": 738 + }, + { + "epoch": 2.438943894389439, + "grad_norm": 0.24323846273380414, + "learning_rate": 4.160249606414109e-06, + "loss": 0.46377992630004883, + "step": 739 + }, + { + "epoch": 2.442244224422442, + "grad_norm": 0.2554844227936452, + "learning_rate": 4.1134702214481126e-06, + "loss": 0.4217844009399414, + "step": 740 + }, + { + "epoch": 2.4455445544554455, + "grad_norm": 0.40365970056175393, + "learning_rate": 4.066925163111406e-06, + "loss": 0.4616321325302124, + "step": 741 + }, + { + "epoch": 2.448844884488449, + "grad_norm": 0.23727547629912737, + "learning_rate": 4.020615117944515e-06, + "loss": 0.48755043745040894, + "step": 742 + }, + { + "epoch": 2.452145214521452, + "grad_norm": 0.2636488971277773, + "learning_rate": 3.974540769021529e-06, + "loss": 0.47338151931762695, + "step": 743 + }, + { + "epoch": 2.4554455445544554, + "grad_norm": 0.26687939105998304, + "learning_rate": 3.928702795940007e-06, + "loss": 0.47220849990844727, + "step": 744 + }, + { + "epoch": 2.458745874587459, + "grad_norm": 0.23440870124340746, + "learning_rate": 3.883101874810966e-06, + "loss": 0.4117845296859741, + "step": 745 + }, + { + "epoch": 2.462046204620462, + "grad_norm": 0.2389531188545627, + "learning_rate": 3.8377386782488875e-06, + "loss": 0.44338276982307434, + "step": 746 + }, + { + "epoch": 2.4653465346534653, + "grad_norm": 0.28253943840492757, + "learning_rate": 3.7926138753618257e-06, + "loss": 0.470272958278656, + "step": 747 + }, + { + "epoch": 2.4686468646864688, + "grad_norm": 0.2533414456878978, + "learning_rate": 3.747728131741517e-06, + "loss": 0.4825139045715332, + "step": 748 + }, + { + "epoch": 2.4719471947194718, + "grad_norm": 0.22813621303002277, + "learning_rate": 3.703082109453575e-06, + "loss": 0.43612140417099, + "step": 749 + }, + { + "epoch": 2.4752475247524752, + "grad_norm": 0.22709733679425215, + "learning_rate": 3.6586764670277065e-06, + "loss": 0.4573146402835846, + "step": 750 + }, + { + "epoch": 2.4785478547854787, + "grad_norm": 0.24807030489347143, + "learning_rate": 3.61451185944802e-06, + "loss": 0.4419093430042267, + "step": 751 + }, + { + "epoch": 2.4818481848184817, + "grad_norm": 0.23735191741997233, + "learning_rate": 3.570588938143353e-06, + "loss": 0.440906822681427, + "step": 752 + }, + { + "epoch": 2.485148514851485, + "grad_norm": 0.24792760735437452, + "learning_rate": 3.5269083509776735e-06, + "loss": 0.432383269071579, + "step": 753 + }, + { + "epoch": 2.4884488448844886, + "grad_norm": 0.24788857238042053, + "learning_rate": 3.4834707422404957e-06, + "loss": 0.4615401029586792, + "step": 754 + }, + { + "epoch": 2.4917491749174916, + "grad_norm": 0.29288725170403773, + "learning_rate": 3.440276752637417e-06, + "loss": 0.43933019042015076, + "step": 755 + }, + { + "epoch": 2.495049504950495, + "grad_norm": 0.24422605775888084, + "learning_rate": 3.3973270192806427e-06, + "loss": 0.4651945233345032, + "step": 756 + }, + { + "epoch": 2.4983498349834985, + "grad_norm": 0.3408455968625333, + "learning_rate": 3.3546221756795874e-06, + "loss": 0.4423069953918457, + "step": 757 + }, + { + "epoch": 2.5016501650165015, + "grad_norm": 0.32517130275625505, + "learning_rate": 3.3121628517315373e-06, + "loss": 0.4905679225921631, + "step": 758 + }, + { + "epoch": 2.504950495049505, + "grad_norm": 0.24015956320352147, + "learning_rate": 3.2699496737123758e-06, + "loss": 0.46989548206329346, + "step": 759 + }, + { + "epoch": 2.5082508250825084, + "grad_norm": 0.24393784259324253, + "learning_rate": 3.2279832642673025e-06, + "loss": 0.5168344378471375, + "step": 760 + }, + { + "epoch": 2.5115511551155114, + "grad_norm": 0.2446798962745333, + "learning_rate": 3.186264242401693e-06, + "loss": 0.46055924892425537, + "step": 761 + }, + { + "epoch": 2.514851485148515, + "grad_norm": 0.2561165095643357, + "learning_rate": 3.144793223471949e-06, + "loss": 0.5135318040847778, + "step": 762 + }, + { + "epoch": 2.5181518151815183, + "grad_norm": 1.1234233736547772, + "learning_rate": 3.1035708191764246e-06, + "loss": 0.5026534199714661, + "step": 763 + }, + { + "epoch": 2.5214521452145213, + "grad_norm": 0.23866674349332329, + "learning_rate": 3.0625976375463938e-06, + "loss": 0.43348389863967896, + "step": 764 + }, + { + "epoch": 2.5247524752475248, + "grad_norm": 0.2295043927466033, + "learning_rate": 3.021874282937103e-06, + "loss": 0.4620594382286072, + "step": 765 + }, + { + "epoch": 2.5280528052805282, + "grad_norm": 0.25250691113798673, + "learning_rate": 2.9814013560188425e-06, + "loss": 0.4646865725517273, + "step": 766 + }, + { + "epoch": 2.5313531353135312, + "grad_norm": 0.2396511266141401, + "learning_rate": 2.9411794537680795e-06, + "loss": 0.46846333146095276, + "step": 767 + }, + { + "epoch": 2.5346534653465347, + "grad_norm": 0.24818691561244743, + "learning_rate": 2.901209169458672e-06, + "loss": 0.487953782081604, + "step": 768 + }, + { + "epoch": 2.537953795379538, + "grad_norm": 0.24296952409375147, + "learning_rate": 2.861491092653115e-06, + "loss": 0.4543481469154358, + "step": 769 + }, + { + "epoch": 2.541254125412541, + "grad_norm": 0.24368208278529027, + "learning_rate": 2.822025809193818e-06, + "loss": 0.4961584806442261, + "step": 770 + }, + { + "epoch": 2.5445544554455446, + "grad_norm": 0.2377375055697493, + "learning_rate": 2.7828139011944967e-06, + "loss": 0.44123750925064087, + "step": 771 + }, + { + "epoch": 2.547854785478548, + "grad_norm": 0.2301227484744363, + "learning_rate": 2.743855947031575e-06, + "loss": 0.43014320731163025, + "step": 772 + }, + { + "epoch": 2.551155115511551, + "grad_norm": 0.2250422650499226, + "learning_rate": 2.7051525213356546e-06, + "loss": 0.4774499535560608, + "step": 773 + }, + { + "epoch": 2.5544554455445545, + "grad_norm": 0.23823454905644054, + "learning_rate": 2.6667041949830186e-06, + "loss": 0.44963133335113525, + "step": 774 + }, + { + "epoch": 2.557755775577558, + "grad_norm": 0.2554981481850554, + "learning_rate": 2.6285115350872524e-06, + "loss": 0.4840245842933655, + "step": 775 + }, + { + "epoch": 2.561056105610561, + "grad_norm": 0.2589754738757413, + "learning_rate": 2.5905751049908466e-06, + "loss": 0.5490096807479858, + "step": 776 + }, + { + "epoch": 2.5643564356435644, + "grad_norm": 0.30754095371590884, + "learning_rate": 2.5528954642568947e-06, + "loss": 0.4965711832046509, + "step": 777 + }, + { + "epoch": 2.567656765676568, + "grad_norm": 0.2261872478084121, + "learning_rate": 2.5154731686608424e-06, + "loss": 0.4518459439277649, + "step": 778 + }, + { + "epoch": 2.570957095709571, + "grad_norm": 0.24374764034742216, + "learning_rate": 2.4783087701823026e-06, + "loss": 0.5022287964820862, + "step": 779 + }, + { + "epoch": 2.5742574257425743, + "grad_norm": 0.2531412256958666, + "learning_rate": 2.441402816996876e-06, + "loss": 0.47195330262184143, + "step": 780 + }, + { + "epoch": 2.5775577557755778, + "grad_norm": 0.25588546327446415, + "learning_rate": 2.4047558534681124e-06, + "loss": 0.5155715346336365, + "step": 781 + }, + { + "epoch": 2.580858085808581, + "grad_norm": 0.26863032492519423, + "learning_rate": 2.3683684201394507e-06, + "loss": 0.46963661909103394, + "step": 782 + }, + { + "epoch": 2.5841584158415842, + "grad_norm": 0.2303264290466175, + "learning_rate": 2.3322410537262495e-06, + "loss": 0.4279938340187073, + "step": 783 + }, + { + "epoch": 2.5874587458745877, + "grad_norm": 0.24160002325917174, + "learning_rate": 2.296374287107883e-06, + "loss": 0.47818487882614136, + "step": 784 + }, + { + "epoch": 2.5907590759075907, + "grad_norm": 0.23493031875502465, + "learning_rate": 2.260768649319869e-06, + "loss": 0.4445609152317047, + "step": 785 + }, + { + "epoch": 2.594059405940594, + "grad_norm": 0.2545526596288379, + "learning_rate": 2.2254246655460765e-06, + "loss": 0.4838835895061493, + "step": 786 + }, + { + "epoch": 2.5973597359735976, + "grad_norm": 0.24631479441885146, + "learning_rate": 2.1903428571109566e-06, + "loss": 0.4454101324081421, + "step": 787 + }, + { + "epoch": 2.6006600660066006, + "grad_norm": 0.2399303225290425, + "learning_rate": 2.1555237414718854e-06, + "loss": 0.46468472480773926, + "step": 788 + }, + { + "epoch": 2.603960396039604, + "grad_norm": 0.24533578787784271, + "learning_rate": 2.1209678322115133e-06, + "loss": 0.508684515953064, + "step": 789 + }, + { + "epoch": 2.6072607260726075, + "grad_norm": 0.23699012050293838, + "learning_rate": 2.0866756390301778e-06, + "loss": 0.46998751163482666, + "step": 790 + }, + { + "epoch": 2.6105610561056105, + "grad_norm": 0.22442653448303418, + "learning_rate": 2.0526476677384123e-06, + "loss": 0.41589513421058655, + "step": 791 + }, + { + "epoch": 2.613861386138614, + "grad_norm": 0.23870429201603713, + "learning_rate": 2.018884420249474e-06, + "loss": 0.4948643445968628, + "step": 792 + }, + { + "epoch": 2.6171617161716174, + "grad_norm": 0.23103305184303033, + "learning_rate": 1.9853863945719243e-06, + "loss": 0.4494874179363251, + "step": 793 + }, + { + "epoch": 2.6204620462046204, + "grad_norm": 0.23980252076908543, + "learning_rate": 1.9521540848023113e-06, + "loss": 0.42173343896865845, + "step": 794 + }, + { + "epoch": 2.623762376237624, + "grad_norm": 0.24254851053091633, + "learning_rate": 1.9191879811178605e-06, + "loss": 0.4319555461406708, + "step": 795 + }, + { + "epoch": 2.6270627062706273, + "grad_norm": 0.21769714480169441, + "learning_rate": 1.8864885697692582e-06, + "loss": 0.40467706322669983, + "step": 796 + }, + { + "epoch": 2.6303630363036303, + "grad_norm": 0.23815188307796767, + "learning_rate": 1.8540563330734662e-06, + "loss": 0.5141273736953735, + "step": 797 + }, + { + "epoch": 2.633663366336634, + "grad_norm": 0.23237959155910853, + "learning_rate": 1.8218917494066212e-06, + "loss": 0.44990289211273193, + "step": 798 + }, + { + "epoch": 2.6369636963696372, + "grad_norm": 0.2393948822814923, + "learning_rate": 1.7899952931969756e-06, + "loss": 0.4878673553466797, + "step": 799 + }, + { + "epoch": 2.6402640264026402, + "grad_norm": 0.22595932266177446, + "learning_rate": 1.7583674349178803e-06, + "loss": 0.46406376361846924, + "step": 800 + }, + { + "epoch": 2.6435643564356437, + "grad_norm": 0.22163499847677615, + "learning_rate": 1.7270086410808762e-06, + "loss": 0.44470641016960144, + "step": 801 + }, + { + "epoch": 2.6468646864686467, + "grad_norm": 0.23461158504190754, + "learning_rate": 1.695919374228796e-06, + "loss": 0.5306479930877686, + "step": 802 + }, + { + "epoch": 2.65016501650165, + "grad_norm": 0.23844670077139818, + "learning_rate": 1.6651000929289462e-06, + "loss": 0.4570600390434265, + "step": 803 + }, + { + "epoch": 2.6534653465346536, + "grad_norm": 0.24202990025785212, + "learning_rate": 1.6345512517663275e-06, + "loss": 0.48561781644821167, + "step": 804 + }, + { + "epoch": 2.6567656765676566, + "grad_norm": 0.23785932147050265, + "learning_rate": 1.6042733013369604e-06, + "loss": 0.4666748642921448, + "step": 805 + }, + { + "epoch": 2.66006600660066, + "grad_norm": 0.2420529385568233, + "learning_rate": 1.5742666882412106e-06, + "loss": 0.4761434495449066, + "step": 806 + }, + { + "epoch": 2.6633663366336635, + "grad_norm": 0.23716960917200494, + "learning_rate": 1.5445318550772204e-06, + "loss": 0.4475252628326416, + "step": 807 + }, + { + "epoch": 2.6666666666666665, + "grad_norm": 0.2477540352529907, + "learning_rate": 1.5150692404343637e-06, + "loss": 0.5299564599990845, + "step": 808 + }, + { + "epoch": 2.66996699669967, + "grad_norm": 0.23933028255710986, + "learning_rate": 1.4858792788867904e-06, + "loss": 0.518581748008728, + "step": 809 + }, + { + "epoch": 2.6732673267326734, + "grad_norm": 0.2332077440459636, + "learning_rate": 1.4569624009870165e-06, + "loss": 0.5162506103515625, + "step": 810 + }, + { + "epoch": 2.6765676567656764, + "grad_norm": 0.23396257763770162, + "learning_rate": 1.4283190332595665e-06, + "loss": 0.4762595593929291, + "step": 811 + }, + { + "epoch": 2.67986798679868, + "grad_norm": 0.24891326451914347, + "learning_rate": 1.3999495981946764e-06, + "loss": 0.44347697496414185, + "step": 812 + }, + { + "epoch": 2.6831683168316833, + "grad_norm": 0.22951918904681498, + "learning_rate": 1.3718545142420768e-06, + "loss": 0.4344146251678467, + "step": 813 + }, + { + "epoch": 2.6864686468646863, + "grad_norm": 0.23863686607461265, + "learning_rate": 1.344034195804813e-06, + "loss": 0.4936307668685913, + "step": 814 + }, + { + "epoch": 2.68976897689769, + "grad_norm": 0.23758007083024585, + "learning_rate": 1.3164890532331386e-06, + "loss": 0.43635520339012146, + "step": 815 + }, + { + "epoch": 2.693069306930693, + "grad_norm": 0.24550816708533926, + "learning_rate": 1.2892194928184499e-06, + "loss": 0.48006054759025574, + "step": 816 + }, + { + "epoch": 2.6963696369636962, + "grad_norm": 0.22610358677951214, + "learning_rate": 1.2622259167873008e-06, + "loss": 0.4296647906303406, + "step": 817 + }, + { + "epoch": 2.6996699669966997, + "grad_norm": 0.3871947383123805, + "learning_rate": 1.2355087232954754e-06, + "loss": 0.47840994596481323, + "step": 818 + }, + { + "epoch": 2.7029702970297027, + "grad_norm": 0.21432181977841594, + "learning_rate": 1.209068306422112e-06, + "loss": 0.41459953784942627, + "step": 819 + }, + { + "epoch": 2.706270627062706, + "grad_norm": 0.24313471794627498, + "learning_rate": 1.1829050561638766e-06, + "loss": 0.4278629422187805, + "step": 820 + }, + { + "epoch": 2.7095709570957096, + "grad_norm": 0.24379358416226346, + "learning_rate": 1.1570193584292323e-06, + "loss": 0.44538602232933044, + "step": 821 + }, + { + "epoch": 2.7128712871287126, + "grad_norm": 0.23094639733408046, + "learning_rate": 1.1314115950327365e-06, + "loss": 0.4757949709892273, + "step": 822 + }, + { + "epoch": 2.716171617161716, + "grad_norm": 0.22182336808333136, + "learning_rate": 1.106082143689402e-06, + "loss": 0.49131542444229126, + "step": 823 + }, + { + "epoch": 2.7194719471947195, + "grad_norm": 0.2534124798335607, + "learning_rate": 1.0810313780091408e-06, + "loss": 0.4917967915534973, + "step": 824 + }, + { + "epoch": 2.7227722772277225, + "grad_norm": 0.23670068032674005, + "learning_rate": 1.056259667491244e-06, + "loss": 0.4949303865432739, + "step": 825 + }, + { + "epoch": 2.726072607260726, + "grad_norm": 0.23770304320813665, + "learning_rate": 1.0317673775189374e-06, + "loss": 0.4287925958633423, + "step": 826 + }, + { + "epoch": 2.7293729372937294, + "grad_norm": 0.2425418928573913, + "learning_rate": 1.007554869353975e-06, + "loss": 0.5059949159622192, + "step": 827 + }, + { + "epoch": 2.7326732673267324, + "grad_norm": 0.25049371554006, + "learning_rate": 9.83622500131336e-07, + "loss": 0.47914958000183105, + "step": 828 + }, + { + "epoch": 2.735973597359736, + "grad_norm": 0.24168515794090734, + "learning_rate": 9.599706228539452e-07, + "loss": 0.5237720608711243, + "step": 829 + }, + { + "epoch": 2.7392739273927393, + "grad_norm": 0.23836969767457952, + "learning_rate": 9.365995863874566e-07, + "loss": 0.4628916382789612, + "step": 830 + }, + { + "epoch": 2.7425742574257423, + "grad_norm": 0.22835633263617844, + "learning_rate": 9.135097354551203e-07, + "loss": 0.49988898634910583, + "step": 831 + }, + { + "epoch": 2.745874587458746, + "grad_norm": 0.2229937423966958, + "learning_rate": 8.907014106327039e-07, + "loss": 0.4631851315498352, + "step": 832 + }, + { + "epoch": 2.7491749174917492, + "grad_norm": 0.24485133529173167, + "learning_rate": 8.681749483434387e-07, + "loss": 0.47001713514328003, + "step": 833 + }, + { + "epoch": 2.7524752475247523, + "grad_norm": 0.23400965677751775, + "learning_rate": 8.459306808530999e-07, + "loss": 0.4437292218208313, + "step": 834 + }, + { + "epoch": 2.7557755775577557, + "grad_norm": 0.26632452732629835, + "learning_rate": 8.239689362650694e-07, + "loss": 0.5006406903266907, + "step": 835 + }, + { + "epoch": 2.759075907590759, + "grad_norm": 0.23471614589516374, + "learning_rate": 8.022900385155185e-07, + "loss": 0.45732003450393677, + "step": 836 + }, + { + "epoch": 2.762376237623762, + "grad_norm": 0.47225644675751677, + "learning_rate": 7.808943073686159e-07, + "loss": 0.5012909173965454, + "step": 837 + }, + { + "epoch": 2.7656765676567656, + "grad_norm": 0.25510766784506034, + "learning_rate": 7.597820584118221e-07, + "loss": 0.5104090571403503, + "step": 838 + }, + { + "epoch": 2.768976897689769, + "grad_norm": 0.22536004830501363, + "learning_rate": 7.38953603051229e-07, + "loss": 0.44415900111198425, + "step": 839 + }, + { + "epoch": 2.772277227722772, + "grad_norm": 0.23868123290562657, + "learning_rate": 7.184092485069638e-07, + "loss": 0.46958473324775696, + "step": 840 + }, + { + "epoch": 2.7755775577557755, + "grad_norm": 0.22685199851447227, + "learning_rate": 6.981492978086634e-07, + "loss": 0.4305083155632019, + "step": 841 + }, + { + "epoch": 2.778877887788779, + "grad_norm": 0.2363937135429503, + "learning_rate": 6.78174049791005e-07, + "loss": 0.4812752604484558, + "step": 842 + }, + { + "epoch": 2.782178217821782, + "grad_norm": 0.23536493344498524, + "learning_rate": 6.584837990892889e-07, + "loss": 0.522142231464386, + "step": 843 + }, + { + "epoch": 2.7854785478547854, + "grad_norm": 0.2629089101886439, + "learning_rate": 6.390788361351053e-07, + "loss": 0.4789726138114929, + "step": 844 + }, + { + "epoch": 2.788778877887789, + "grad_norm": 0.221963892758326, + "learning_rate": 6.199594471520453e-07, + "loss": 0.44507476687431335, + "step": 845 + }, + { + "epoch": 2.792079207920792, + "grad_norm": 0.23452674626378717, + "learning_rate": 6.011259141514747e-07, + "loss": 0.47613948583602905, + "step": 846 + }, + { + "epoch": 2.7953795379537953, + "grad_norm": 0.22167932095355114, + "learning_rate": 5.825785149283758e-07, + "loss": 0.44828763604164124, + "step": 847 + }, + { + "epoch": 2.798679867986799, + "grad_norm": 0.3027768768548174, + "learning_rate": 5.64317523057254e-07, + "loss": 0.4695909321308136, + "step": 848 + }, + { + "epoch": 2.801980198019802, + "grad_norm": 0.2349539472452322, + "learning_rate": 5.463432078881093e-07, + "loss": 0.48341453075408936, + "step": 849 + }, + { + "epoch": 2.8052805280528053, + "grad_norm": 0.21333400051209225, + "learning_rate": 5.286558345424397e-07, + "loss": 0.47008436918258667, + "step": 850 + }, + { + "epoch": 2.8085808580858087, + "grad_norm": 0.2369125413431687, + "learning_rate": 5.112556639093536e-07, + "loss": 0.5081039071083069, + "step": 851 + }, + { + "epoch": 2.8118811881188117, + "grad_norm": 0.23230496066562498, + "learning_rate": 4.941429526417163e-07, + "loss": 0.49790090322494507, + "step": 852 + }, + { + "epoch": 2.815181518151815, + "grad_norm": 0.2314377157636827, + "learning_rate": 4.773179531523542e-07, + "loss": 0.476767897605896, + "step": 853 + }, + { + "epoch": 2.8184818481848186, + "grad_norm": 0.234974793768271, + "learning_rate": 4.6078091361034585e-07, + "loss": 0.5067446231842041, + "step": 854 + }, + { + "epoch": 2.8217821782178216, + "grad_norm": 0.2229121342330284, + "learning_rate": 4.4453207793735185e-07, + "loss": 0.45703452825546265, + "step": 855 + }, + { + "epoch": 2.825082508250825, + "grad_norm": 0.25006675020075053, + "learning_rate": 4.285716858040223e-07, + "loss": 0.4193270206451416, + "step": 856 + }, + { + "epoch": 2.8283828382838285, + "grad_norm": 0.2214334357956483, + "learning_rate": 4.128999726264549e-07, + "loss": 0.4367069602012634, + "step": 857 + }, + { + "epoch": 2.8316831683168315, + "grad_norm": 0.23745672544685706, + "learning_rate": 3.9751716956273113e-07, + "loss": 0.46601590514183044, + "step": 858 + }, + { + "epoch": 2.834983498349835, + "grad_norm": 0.23728948504727357, + "learning_rate": 3.824235035095036e-07, + "loss": 0.4801405072212219, + "step": 859 + }, + { + "epoch": 2.8382838283828384, + "grad_norm": 0.2305722834125333, + "learning_rate": 3.676191970986409e-07, + "loss": 0.4729960262775421, + "step": 860 + }, + { + "epoch": 2.8415841584158414, + "grad_norm": 0.2565962552578653, + "learning_rate": 3.531044686939611e-07, + "loss": 0.453819215297699, + "step": 861 + }, + { + "epoch": 2.844884488448845, + "grad_norm": 0.2345568934684747, + "learning_rate": 3.388795323879923e-07, + "loss": 0.4655516743659973, + "step": 862 + }, + { + "epoch": 2.8481848184818483, + "grad_norm": 0.2602122051468819, + "learning_rate": 3.249445979988286e-07, + "loss": 0.4915505647659302, + "step": 863 + }, + { + "epoch": 2.8514851485148514, + "grad_norm": 0.227534967530927, + "learning_rate": 3.112998710670279e-07, + "loss": 0.46072205901145935, + "step": 864 + }, + { + "epoch": 2.854785478547855, + "grad_norm": 0.2372527927247435, + "learning_rate": 2.979455528525854e-07, + "loss": 0.47496911883354187, + "step": 865 + }, + { + "epoch": 2.8580858085808583, + "grad_norm": 0.2396587074165527, + "learning_rate": 2.8488184033195867e-07, + "loss": 0.4863288402557373, + "step": 866 + }, + { + "epoch": 2.8613861386138613, + "grad_norm": 0.23166629272471134, + "learning_rate": 2.721089261951626e-07, + "loss": 0.4543803930282593, + "step": 867 + }, + { + "epoch": 2.8646864686468647, + "grad_norm": 0.2431611152190322, + "learning_rate": 2.5962699884293894e-07, + "loss": 0.4589266777038574, + "step": 868 + }, + { + "epoch": 2.867986798679868, + "grad_norm": 0.2225895431580723, + "learning_rate": 2.474362423839627e-07, + "loss": 0.45603302121162415, + "step": 869 + }, + { + "epoch": 2.871287128712871, + "grad_norm": 0.2221408751585563, + "learning_rate": 2.3553683663213088e-07, + "loss": 0.4547184109687805, + "step": 870 + }, + { + "epoch": 2.8745874587458746, + "grad_norm": 0.24123343867414457, + "learning_rate": 2.2392895710391604e-07, + "loss": 0.4900602102279663, + "step": 871 + }, + { + "epoch": 2.877887788778878, + "grad_norm": 0.2412441535157341, + "learning_rate": 2.126127750157725e-07, + "loss": 0.48706525564193726, + "step": 872 + }, + { + "epoch": 2.881188118811881, + "grad_norm": 0.24173675884162568, + "learning_rate": 2.0158845728160958e-07, + "loss": 0.4726618230342865, + "step": 873 + }, + { + "epoch": 2.8844884488448845, + "grad_norm": 0.25907893004745514, + "learning_rate": 1.9085616651033147e-07, + "loss": 0.45884019136428833, + "step": 874 + }, + { + "epoch": 2.887788778877888, + "grad_norm": 0.2641670850826395, + "learning_rate": 1.804160610034411e-07, + "loss": 0.4787840247154236, + "step": 875 + }, + { + "epoch": 2.891089108910891, + "grad_norm": 0.24253910042279672, + "learning_rate": 1.702682947527001e-07, + "loss": 0.4758448004722595, + "step": 876 + }, + { + "epoch": 2.8943894389438944, + "grad_norm": 0.2279011748861112, + "learning_rate": 1.6041301743786596e-07, + "loss": 0.47089093923568726, + "step": 877 + }, + { + "epoch": 2.897689768976898, + "grad_norm": 0.29849498701163135, + "learning_rate": 1.5085037442446937e-07, + "loss": 0.46921056509017944, + "step": 878 + }, + { + "epoch": 2.900990099009901, + "grad_norm": 0.2344970489799305, + "learning_rate": 1.415805067616871e-07, + "loss": 0.5218731164932251, + "step": 879 + }, + { + "epoch": 2.9042904290429044, + "grad_norm": 0.2254215991599414, + "learning_rate": 1.3260355118025036e-07, + "loss": 0.43099671602249146, + "step": 880 + }, + { + "epoch": 2.907590759075908, + "grad_norm": 0.23874830724823604, + "learning_rate": 1.2391964009043078e-07, + "loss": 0.48290592432022095, + "step": 881 + }, + { + "epoch": 2.910891089108911, + "grad_norm": 0.23943766068140404, + "learning_rate": 1.1552890158009311e-07, + "loss": 0.4634360074996948, + "step": 882 + }, + { + "epoch": 2.9141914191419143, + "grad_norm": 0.2453653346062948, + "learning_rate": 1.0743145941279453e-07, + "loss": 0.5041622519493103, + "step": 883 + }, + { + "epoch": 2.9174917491749177, + "grad_norm": 0.21518547033713775, + "learning_rate": 9.962743302596612e-08, + "loss": 0.480410099029541, + "step": 884 + }, + { + "epoch": 2.9207920792079207, + "grad_norm": 0.24487326504708118, + "learning_rate": 9.211693752915419e-08, + "loss": 0.49919891357421875, + "step": 885 + }, + { + "epoch": 2.924092409240924, + "grad_norm": 0.23373083594094138, + "learning_rate": 8.490008370231506e-08, + "loss": 0.508806586265564, + "step": 886 + }, + { + "epoch": 2.9273927392739276, + "grad_norm": 0.23076843849897602, + "learning_rate": 7.797697799418525e-08, + "loss": 0.4233350157737732, + "step": 887 + }, + { + "epoch": 2.9306930693069306, + "grad_norm": 0.2406032429252954, + "learning_rate": 7.134772252071154e-08, + "loss": 0.4577901363372803, + "step": 888 + }, + { + "epoch": 2.933993399339934, + "grad_norm": 0.22213331512067527, + "learning_rate": 6.501241506354561e-08, + "loss": 0.4028077721595764, + "step": 889 + }, + { + "epoch": 2.9372937293729375, + "grad_norm": 0.23681508976522572, + "learning_rate": 5.897114906859402e-08, + "loss": 0.48321446776390076, + "step": 890 + }, + { + "epoch": 2.9405940594059405, + "grad_norm": 0.27558742916404966, + "learning_rate": 5.322401364465491e-08, + "loss": 0.48732608556747437, + "step": 891 + }, + { + "epoch": 2.943894389438944, + "grad_norm": 0.22725537704850798, + "learning_rate": 4.777109356208565e-08, + "loss": 0.46879494190216064, + "step": 892 + }, + { + "epoch": 2.9471947194719474, + "grad_norm": 0.23495776431163154, + "learning_rate": 4.261246925156837e-08, + "loss": 0.4858628511428833, + "step": 893 + }, + { + "epoch": 2.9504950495049505, + "grad_norm": 0.22802725333151694, + "learning_rate": 3.7748216802913077e-08, + "loss": 0.48119616508483887, + "step": 894 + }, + { + "epoch": 2.953795379537954, + "grad_norm": 0.22512889420077337, + "learning_rate": 3.3178407963938564e-08, + "loss": 0.4994167983531952, + "step": 895 + }, + { + "epoch": 2.9570957095709574, + "grad_norm": 0.23739211802797258, + "learning_rate": 2.8903110139417712e-08, + "loss": 0.46394845843315125, + "step": 896 + }, + { + "epoch": 2.9603960396039604, + "grad_norm": 0.2476698533912655, + "learning_rate": 2.4922386390076047e-08, + "loss": 0.42504560947418213, + "step": 897 + }, + { + "epoch": 2.963696369636964, + "grad_norm": 0.24523827629331452, + "learning_rate": 2.1236295431670275e-08, + "loss": 0.4186960756778717, + "step": 898 + }, + { + "epoch": 2.9669966996699673, + "grad_norm": 0.22738870735932892, + "learning_rate": 1.7844891634113402e-08, + "loss": 0.4529160261154175, + "step": 899 + }, + { + "epoch": 2.9702970297029703, + "grad_norm": 0.23734524364327658, + "learning_rate": 1.4748225020679851e-08, + "loss": 0.44012153148651123, + "step": 900 + }, + { + "epoch": 2.9735973597359737, + "grad_norm": 0.23103066951863727, + "learning_rate": 1.1946341267263794e-08, + "loss": 0.4775368571281433, + "step": 901 + }, + { + "epoch": 2.976897689768977, + "grad_norm": 0.22618868632744704, + "learning_rate": 9.439281701704162e-09, + "loss": 0.4465276002883911, + "step": 902 + }, + { + "epoch": 2.98019801980198, + "grad_norm": 0.24271367480309458, + "learning_rate": 7.227083303180671e-09, + "loss": 0.4674132168292999, + "step": 903 + }, + { + "epoch": 2.9834983498349836, + "grad_norm": 0.23142674174926925, + "learning_rate": 5.30977870166316e-09, + "loss": 0.4751841127872467, + "step": 904 + }, + { + "epoch": 2.9867986798679866, + "grad_norm": 0.24061959007170008, + "learning_rate": 3.687396177434188e-09, + "loss": 0.4587743580341339, + "step": 905 + }, + { + "epoch": 2.99009900990099, + "grad_norm": 0.22301171950064888, + "learning_rate": 2.359959660667155e-09, + "loss": 0.4815826416015625, + "step": 906 + }, + { + "epoch": 2.9933993399339935, + "grad_norm": 0.22240717058445192, + "learning_rate": 1.3274887310732454e-09, + "loss": 0.45863479375839233, + "step": 907 + }, + { + "epoch": 2.9966996699669965, + "grad_norm": 0.23321307876392341, + "learning_rate": 5.899986176260974e-10, + "loss": 0.4888804256916046, + "step": 908 + }, + { + "epoch": 3.0, + "grad_norm": 0.2343821134475686, + "learning_rate": 1.475001983131108e-10, + "loss": 0.46804267168045044, + "step": 909 + } + ], + "logging_steps": 1, + "max_steps": 909, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1274755977576448.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-909/training_args.bin b/checkpoint-909/training_args.bin new file mode 100644 index 0000000..f7e3756 --- /dev/null +++ b/checkpoint-909/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2299c356bc8becdedc07e7f99b268be8ab4a91e2c4e99a06f13c8e908a3188e4 +size 7313 diff --git a/checkpoint-909/zero_to_fp32.py b/checkpoint-909/zero_to_fp32.py new file mode 100644 index 0000000..5995d6e --- /dev/null +++ b/checkpoint-909/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if ZERO_STAGE not in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info("Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info("Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/config.json b/config.json new file mode 100644 index 0000000..6dae724 --- /dev/null +++ b/config.json @@ -0,0 +1,71 @@ +{ + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": null, + "dtype": "bfloat16", + "eos_token_id": 151645, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 262144, + "max_window_layers": 36, + "model_type": "qwen3", + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "pad_token_id": 151643, + "rms_norm_eps": 1e-06, + "rope_parameters": { + "rope_theta": 5000000, + "rope_type": "default" + }, + "sliding_window": null, + "tie_word_embeddings": true, + "transformers_version": "5.2.0", + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..1701c94 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,12 @@ +{ + "do_sample": true, + "eos_token_id": [ + 151645, + 151643 + ], + "pad_token_id": 151643, + "temperature": 0.6, + "top_k": 20, + "top_p": 0.95, + "transformers_version": "5.2.0" +} diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..51a41cb --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d6efc5a0f14fc39e6749d9f160ed30c10f3001f9d49f5cbd909c63f514073d1 +size 8044982080 diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..c7afbed --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506 +size 11422650 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..79dfc69 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,30 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 262144, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..31ec503 --- /dev/null +++ b/train_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 3.0, + "total_flos": 1274755977576448.0, + "train_loss": 0.5495063810720958, + "train_runtime": 34913.8374, + "train_samples_per_second": 3.33, + "train_steps_per_second": 0.026 +} \ No newline at end of file diff --git a/trainer_log.jsonl b/trainer_log.jsonl new file mode 100644 index 0000000..0883b4e --- /dev/null +++ b/trainer_log.jsonl @@ -0,0 +1,1119 @@ +{"current_steps": 236, "total_steps": 1302, "loss": 0.4300587773323059, "lr": 3.9226548017183946e-05, "epoch": 0.544405997693195, "percentage": 18.13, "elapsed_time": "13:39:56", "remaining_time": "2 days, 13:43:40"} +{"current_steps": 1, "total_steps": 909, "loss": 1.2079360485076904, "lr": 0.0, "epoch": 0.0033003300330033004, "percentage": 0.11, "elapsed_time": "0:00:36", "remaining_time": "9:10:52"} +{"current_steps": 2, "total_steps": 909, "loss": 1.123347520828247, "lr": 4.395604395604396e-07, "epoch": 0.006600660066006601, "percentage": 0.22, "elapsed_time": "0:01:39", "remaining_time": "12:32:13"} +{"current_steps": 3, "total_steps": 909, "loss": 1.261695384979248, "lr": 8.791208791208792e-07, "epoch": 0.009900990099009901, "percentage": 0.33, "elapsed_time": "0:02:08", "remaining_time": "10:44:23"} +{"current_steps": 4, "total_steps": 909, "loss": 1.1276888847351074, "lr": 1.3186813186813187e-06, "epoch": 0.013201320132013201, "percentage": 0.44, "elapsed_time": "0:02:49", "remaining_time": "10:40:46"} +{"current_steps": 5, "total_steps": 909, "loss": 1.2254480123519897, "lr": 1.7582417582417585e-06, "epoch": 0.0165016501650165, "percentage": 0.55, "elapsed_time": "0:03:22", "remaining_time": "10:08:44"} +{"current_steps": 237, "total_steps": 1302, "loss": 0.44064006209373474, "lr": 3.9211701385812556e-05, "epoch": 0.5467128027681661, "percentage": 18.2, "elapsed_time": "13:43:08", "remaining_time": "2 days, 13:38:53"} +{"current_steps": 6, "total_steps": 909, "loss": 1.1809396743774414, "lr": 2.197802197802198e-06, "epoch": 0.019801980198019802, "percentage": 0.66, "elapsed_time": "0:03:54", "remaining_time": "9:48:33"} +{"current_steps": 7, "total_steps": 909, "loss": 1.2000095844268799, "lr": 2.6373626373626375e-06, "epoch": 0.0231023102310231, "percentage": 0.77, "elapsed_time": "0:04:36", "remaining_time": "9:52:49"} +{"current_steps": 8, "total_steps": 909, "loss": 1.0248074531555176, "lr": 3.0769230769230774e-06, "epoch": 0.026402640264026403, "percentage": 0.88, "elapsed_time": "0:05:20", "remaining_time": "10:02:11"} +{"current_steps": 9, "total_steps": 909, "loss": 1.0840561389923096, "lr": 3.516483516483517e-06, "epoch": 0.0297029702970297, "percentage": 0.99, "elapsed_time": "0:06:05", "remaining_time": "10:09:14"} +{"current_steps": 10, "total_steps": 909, "loss": 0.955639123916626, "lr": 3.9560439560439565e-06, "epoch": 0.033003300330033, "percentage": 1.1, "elapsed_time": "0:06:42", "remaining_time": "10:02:42"} +{"current_steps": 11, "total_steps": 909, "loss": 0.9281604290008545, "lr": 4.395604395604396e-06, "epoch": 0.036303630363036306, "percentage": 1.21, "elapsed_time": "0:07:18", "remaining_time": "9:56:42"} +{"current_steps": 238, "total_steps": 1302, "loss": 0.43503716588020325, "lr": 3.9196716476940116e-05, "epoch": 0.5490196078431373, "percentage": 18.28, "elapsed_time": "13:46:43", "remaining_time": "2 days, 13:35:57"} +{"current_steps": 12, "total_steps": 909, "loss": 0.9079018831253052, "lr": 4.8351648351648355e-06, "epoch": 0.039603960396039604, "percentage": 1.32, "elapsed_time": "0:07:45", "remaining_time": "9:40:24"} +{"current_steps": 13, "total_steps": 909, "loss": 0.9039217233657837, "lr": 5.274725274725275e-06, "epoch": 0.0429042904290429, "percentage": 1.43, "elapsed_time": "0:08:23", "remaining_time": "9:38:27"} +{"current_steps": 14, "total_steps": 909, "loss": 0.8910936117172241, "lr": 5.7142857142857145e-06, "epoch": 0.0462046204620462, "percentage": 1.54, "elapsed_time": "0:08:53", "remaining_time": "9:28:39"} +{"current_steps": 15, "total_steps": 909, "loss": 0.895532488822937, "lr": 6.153846153846155e-06, "epoch": 0.04950495049504951, "percentage": 1.65, "elapsed_time": "0:09:32", "remaining_time": "9:28:44"} +{"current_steps": 16, "total_steps": 909, "loss": 0.8889240622520447, "lr": 6.5934065934065935e-06, "epoch": 0.052805280528052806, "percentage": 1.76, "elapsed_time": "0:10:08", "remaining_time": "9:26:20"} +{"current_steps": 17, "total_steps": 909, "loss": 0.8499570488929749, "lr": 7.032967032967034e-06, "epoch": 0.056105610561056105, "percentage": 1.87, "elapsed_time": "0:10:40", "remaining_time": "9:19:44"} +{"current_steps": 239, "total_steps": 1302, "loss": 0.4208536744117737, "lr": 3.9181593398421495e-05, "epoch": 0.5513264129181085, "percentage": 18.36, "elapsed_time": "13:50:12", "remaining_time": "2 days, 13:32:32"} +{"current_steps": 18, "total_steps": 909, "loss": 0.839992105960846, "lr": 7.472527472527473e-06, "epoch": 0.0594059405940594, "percentage": 1.98, "elapsed_time": "0:11:19", "remaining_time": "9:20:39"} +{"current_steps": 19, "total_steps": 909, "loss": 0.7718420028686523, "lr": 7.912087912087913e-06, "epoch": 0.0627062706270627, "percentage": 2.09, "elapsed_time": "0:12:04", "remaining_time": "9:25:34"} +{"current_steps": 20, "total_steps": 909, "loss": 0.7865867614746094, "lr": 8.351648351648353e-06, "epoch": 0.066006600660066, "percentage": 2.2, "elapsed_time": "0:12:41", "remaining_time": "9:24:25"} +{"current_steps": 21, "total_steps": 909, "loss": 0.7982739806175232, "lr": 8.791208791208792e-06, "epoch": 0.06930693069306931, "percentage": 2.31, "elapsed_time": "0:13:26", "remaining_time": "9:28:04"} +{"current_steps": 22, "total_steps": 909, "loss": 0.7846421599388123, "lr": 9.230769230769232e-06, "epoch": 0.07260726072607261, "percentage": 2.42, "elapsed_time": "0:14:09", "remaining_time": "9:30:47"} +{"current_steps": 240, "total_steps": 1302, "loss": 0.4130966067314148, "lr": 3.9166332259106076e-05, "epoch": 0.5536332179930796, "percentage": 18.43, "elapsed_time": "13:53:44", "remaining_time": "2 days, 13:29:17"} +{"current_steps": 23, "total_steps": 909, "loss": 0.7005743980407715, "lr": 9.670329670329671e-06, "epoch": 0.07590759075907591, "percentage": 2.53, "elapsed_time": "0:14:53", "remaining_time": "9:33:56"} +{"current_steps": 24, "total_steps": 909, "loss": 0.7084314227104187, "lr": 1.010989010989011e-05, "epoch": 0.07920792079207921, "percentage": 2.64, "elapsed_time": "0:15:40", "remaining_time": "9:37:57"} +{"current_steps": 25, "total_steps": 909, "loss": 0.7310304641723633, "lr": 1.054945054945055e-05, "epoch": 0.08250825082508251, "percentage": 2.75, "elapsed_time": "0:16:11", "remaining_time": "9:32:17"} +{"current_steps": 26, "total_steps": 909, "loss": 0.7056888341903687, "lr": 1.098901098901099e-05, "epoch": 0.0858085808580858, "percentage": 2.86, "elapsed_time": "0:16:46", "remaining_time": "9:29:36"} +{"current_steps": 27, "total_steps": 909, "loss": 0.6987950205802917, "lr": 1.1428571428571429e-05, "epoch": 0.0891089108910891, "percentage": 2.97, "elapsed_time": "0:17:13", "remaining_time": "9:22:54"} +{"current_steps": 28, "total_steps": 909, "loss": 0.7319807410240173, "lr": 1.186813186813187e-05, "epoch": 0.0924092409240924, "percentage": 3.08, "elapsed_time": "0:17:53", "remaining_time": "9:22:42"} +{"current_steps": 241, "total_steps": 1302, "loss": 0.4009060263633728, "lr": 3.915093316883691e-05, "epoch": 0.5559400230680508, "percentage": 18.51, "elapsed_time": "13:57:23", "remaining_time": "2 days, 13:26:34"} +{"current_steps": 29, "total_steps": 909, "loss": 0.6983063220977783, "lr": 1.230769230769231e-05, "epoch": 0.09570957095709572, "percentage": 3.19, "elapsed_time": "0:18:28", "remaining_time": "9:20:22"} +{"current_steps": 30, "total_steps": 909, "loss": 0.70492023229599, "lr": 1.2747252747252747e-05, "epoch": 0.09900990099009901, "percentage": 3.3, "elapsed_time": "0:19:05", "remaining_time": "9:19:35"} +{"current_steps": 31, "total_steps": 909, "loss": 0.7376629114151001, "lr": 1.3186813186813187e-05, "epoch": 0.10231023102310231, "percentage": 3.41, "elapsed_time": "0:19:46", "remaining_time": "9:20:02"} +{"current_steps": 32, "total_steps": 909, "loss": 0.6623936295509338, "lr": 1.3626373626373627e-05, "epoch": 0.10561056105610561, "percentage": 3.52, "elapsed_time": "0:20:27", "remaining_time": "9:20:28"} +{"current_steps": 33, "total_steps": 909, "loss": 0.7136330604553223, "lr": 1.4065934065934068e-05, "epoch": 0.10891089108910891, "percentage": 3.63, "elapsed_time": "0:21:04", "remaining_time": "9:19:24"} +{"current_steps": 242, "total_steps": 1302, "loss": 0.4030718207359314, "lr": 3.913539623844999e-05, "epoch": 0.558246828143022, "percentage": 18.59, "elapsed_time": "14:00:59", "remaining_time": "2 days, 13:23:40"} +{"current_steps": 34, "total_steps": 909, "loss": 0.7113747596740723, "lr": 1.4505494505494506e-05, "epoch": 0.11221122112211221, "percentage": 3.74, "elapsed_time": "0:21:40", "remaining_time": "9:18:01"} +{"current_steps": 35, "total_steps": 909, "loss": 0.8252867460250854, "lr": 1.4945054945054947e-05, "epoch": 0.11551155115511551, "percentage": 3.85, "elapsed_time": "0:22:16", "remaining_time": "9:16:19"} +{"current_steps": 36, "total_steps": 909, "loss": 0.7406599521636963, "lr": 1.5384615384615387e-05, "epoch": 0.1188118811881188, "percentage": 3.96, "elapsed_time": "0:23:00", "remaining_time": "9:18:04"} +{"current_steps": 37, "total_steps": 909, "loss": 0.6572297811508179, "lr": 1.5824175824175826e-05, "epoch": 0.12211221122112212, "percentage": 4.07, "elapsed_time": "0:23:32", "remaining_time": "9:14:59"} +{"current_steps": 38, "total_steps": 909, "loss": 0.7520949840545654, "lr": 1.6263736263736265e-05, "epoch": 0.1254125412541254, "percentage": 4.18, "elapsed_time": "0:24:02", "remaining_time": "9:11:05"} +{"current_steps": 39, "total_steps": 909, "loss": 0.6861323118209839, "lr": 1.6703296703296707e-05, "epoch": 0.12871287128712872, "percentage": 4.29, "elapsed_time": "0:24:38", "remaining_time": "9:09:37"} +{"current_steps": 243, "total_steps": 1302, "loss": 0.43782880902290344, "lr": 3.911972157977339e-05, "epoch": 0.5605536332179931, "percentage": 18.66, "elapsed_time": "14:04:29", "remaining_time": "2 days, 13:20:21"} +{"current_steps": 40, "total_steps": 909, "loss": 0.6818518042564392, "lr": 1.7142857142857142e-05, "epoch": 0.132013201320132, "percentage": 4.4, "elapsed_time": "0:25:20", "remaining_time": "9:10:35"} +{"current_steps": 41, "total_steps": 909, "loss": 0.6663186550140381, "lr": 1.7582417582417584e-05, "epoch": 0.1353135313531353, "percentage": 4.51, "elapsed_time": "0:25:57", "remaining_time": "9:09:36"} +{"current_steps": 42, "total_steps": 909, "loss": 0.6492191553115845, "lr": 1.8021978021978023e-05, "epoch": 0.13861386138613863, "percentage": 4.62, "elapsed_time": "0:26:38", "remaining_time": "9:10:04"} +{"current_steps": 43, "total_steps": 909, "loss": 0.6444741487503052, "lr": 1.8461538461538465e-05, "epoch": 0.1419141914191419, "percentage": 4.73, "elapsed_time": "0:27:10", "remaining_time": "9:07:16"} +{"current_steps": 44, "total_steps": 909, "loss": 0.6476814150810242, "lr": 1.8901098901098903e-05, "epoch": 0.14521452145214522, "percentage": 4.84, "elapsed_time": "0:28:00", "remaining_time": "9:10:34"} +{"current_steps": 45, "total_steps": 909, "loss": 0.6660827994346619, "lr": 1.9340659340659342e-05, "epoch": 0.1485148514851485, "percentage": 4.95, "elapsed_time": "0:28:34", "remaining_time": "9:08:36"} +{"current_steps": 244, "total_steps": 1302, "loss": 0.4285259544849396, "lr": 3.910390930562653e-05, "epoch": 0.5628604382929643, "percentage": 18.74, "elapsed_time": "14:08:07", "remaining_time": "2 days, 13:17:29"} +{"current_steps": 46, "total_steps": 909, "loss": 0.6924091577529907, "lr": 1.9780219780219784e-05, "epoch": 0.15181518151815182, "percentage": 5.06, "elapsed_time": "0:29:12", "remaining_time": "9:08:04"} +{"current_steps": 47, "total_steps": 909, "loss": 0.6899605989456177, "lr": 2.021978021978022e-05, "epoch": 0.1551155115511551, "percentage": 5.17, "elapsed_time": "0:29:51", "remaining_time": "9:07:45"} +{"current_steps": 48, "total_steps": 909, "loss": 0.7242028713226318, "lr": 2.0659340659340665e-05, "epoch": 0.15841584158415842, "percentage": 5.28, "elapsed_time": "0:30:22", "remaining_time": "9:04:44"} +{"current_steps": 49, "total_steps": 909, "loss": 0.6203902959823608, "lr": 2.10989010989011e-05, "epoch": 0.1617161716171617, "percentage": 5.39, "elapsed_time": "0:30:55", "remaining_time": "9:02:43"} +{"current_steps": 50, "total_steps": 909, "loss": 0.6420010328292847, "lr": 2.153846153846154e-05, "epoch": 0.16501650165016502, "percentage": 5.5, "elapsed_time": "0:31:41", "remaining_time": "9:04:31"} +{"current_steps": 51, "total_steps": 909, "loss": 0.7517598867416382, "lr": 2.197802197802198e-05, "epoch": 0.16831683168316833, "percentage": 5.61, "elapsed_time": "0:32:13", "remaining_time": "9:02:00"} +{"current_steps": 245, "total_steps": 1302, "loss": 0.42723968625068665, "lr": 3.9087959529819295e-05, "epoch": 0.5651672433679354, "percentage": 18.82, "elapsed_time": "14:11:39", "remaining_time": "2 days, 13:14:17"} +{"current_steps": 52, "total_steps": 909, "loss": 0.6568161249160767, "lr": 2.241758241758242e-05, "epoch": 0.1716171617161716, "percentage": 5.72, "elapsed_time": "0:32:59", "remaining_time": "9:03:50"} +{"current_steps": 53, "total_steps": 909, "loss": 0.7348504662513733, "lr": 2.2857142857142858e-05, "epoch": 0.17491749174917492, "percentage": 5.83, "elapsed_time": "0:33:29", "remaining_time": "9:00:55"} +{"current_steps": 54, "total_steps": 909, "loss": 0.6164949536323547, "lr": 2.32967032967033e-05, "epoch": 0.1782178217821782, "percentage": 5.94, "elapsed_time": "0:34:07", "remaining_time": "9:00:14"} +{"current_steps": 55, "total_steps": 909, "loss": 0.6505363583564758, "lr": 2.373626373626374e-05, "epoch": 0.18151815181518152, "percentage": 6.05, "elapsed_time": "0:34:55", "remaining_time": "9:02:21"} +{"current_steps": 56, "total_steps": 909, "loss": 0.7562520503997803, "lr": 2.4175824175824177e-05, "epoch": 0.1848184818481848, "percentage": 6.16, "elapsed_time": "0:35:16", "remaining_time": "8:57:22"} +{"current_steps": 246, "total_steps": 1302, "loss": 0.42523980140686035, "lr": 3.907187236715127e-05, "epoch": 0.5674740484429066, "percentage": 18.89, "elapsed_time": "14:14:57", "remaining_time": "2 days, 13:10:02"} +{"current_steps": 57, "total_steps": 909, "loss": 0.6943148374557495, "lr": 2.461538461538462e-05, "epoch": 0.18811881188118812, "percentage": 6.27, "elapsed_time": "0:35:53", "remaining_time": "8:56:35"} +{"current_steps": 58, "total_steps": 909, "loss": 0.6571655869483948, "lr": 2.5054945054945058e-05, "epoch": 0.19141914191419143, "percentage": 6.38, "elapsed_time": "0:36:30", "remaining_time": "8:55:38"} +{"current_steps": 59, "total_steps": 909, "loss": 0.7229321002960205, "lr": 2.5494505494505493e-05, "epoch": 0.19471947194719472, "percentage": 6.49, "elapsed_time": "0:37:04", "remaining_time": "8:54:06"} +{"current_steps": 60, "total_steps": 909, "loss": 0.6307672262191772, "lr": 2.593406593406594e-05, "epoch": 0.19801980198019803, "percentage": 6.6, "elapsed_time": "0:37:36", "remaining_time": "8:52:13"} +{"current_steps": 61, "total_steps": 909, "loss": 0.6336506009101868, "lr": 2.6373626373626374e-05, "epoch": 0.20132013201320131, "percentage": 6.71, "elapsed_time": "0:38:12", "remaining_time": "8:51:06"} +{"current_steps": 62, "total_steps": 909, "loss": 0.6492213010787964, "lr": 2.6813186813186813e-05, "epoch": 0.20462046204620463, "percentage": 6.82, "elapsed_time": "0:38:48", "remaining_time": "8:50:07"} +{"current_steps": 247, "total_steps": 1302, "loss": 0.43423518538475037, "lr": 3.9055647933410854e-05, "epoch": 0.5697808535178778, "percentage": 18.97, "elapsed_time": "14:18:24", "remaining_time": "2 days, 13:06:26"} +{"current_steps": 63, "total_steps": 909, "loss": 0.6763280034065247, "lr": 2.7252747252747255e-05, "epoch": 0.2079207920792079, "percentage": 6.93, "elapsed_time": "0:39:18", "remaining_time": "8:47:52"} +{"current_steps": 64, "total_steps": 909, "loss": 0.7322396039962769, "lr": 2.7692307692307694e-05, "epoch": 0.21122112211221122, "percentage": 7.04, "elapsed_time": "0:39:52", "remaining_time": "8:46:24"} +{"current_steps": 65, "total_steps": 909, "loss": 0.7080870270729065, "lr": 2.8131868131868136e-05, "epoch": 0.2145214521452145, "percentage": 7.15, "elapsed_time": "0:40:23", "remaining_time": "8:44:26"} +{"current_steps": 66, "total_steps": 909, "loss": 0.6054466962814331, "lr": 2.8571428571428574e-05, "epoch": 0.21782178217821782, "percentage": 7.26, "elapsed_time": "0:41:12", "remaining_time": "8:46:19"} +{"current_steps": 67, "total_steps": 909, "loss": 0.6782290935516357, "lr": 2.9010989010989013e-05, "epoch": 0.22112211221122113, "percentage": 7.37, "elapsed_time": "0:41:38", "remaining_time": "8:43:23"} +{"current_steps": 248, "total_steps": 1302, "loss": 0.43340083956718445, "lr": 3.9039286345374526e-05, "epoch": 0.5720876585928489, "percentage": 19.05, "elapsed_time": "14:21:27", "remaining_time": "2 days, 13:01:13"} +{"current_steps": 68, "total_steps": 909, "loss": 0.6804753541946411, "lr": 2.9450549450549455e-05, "epoch": 0.22442244224422442, "percentage": 7.48, "elapsed_time": "0:42:11", "remaining_time": "8:41:49"} +{"current_steps": 69, "total_steps": 909, "loss": 0.6493992805480957, "lr": 2.9890109890109894e-05, "epoch": 0.22772277227722773, "percentage": 7.59, "elapsed_time": "0:42:42", "remaining_time": "8:39:57"} +{"current_steps": 70, "total_steps": 909, "loss": 0.6263789534568787, "lr": 3.0329670329670332e-05, "epoch": 0.23102310231023102, "percentage": 7.7, "elapsed_time": "0:43:30", "remaining_time": "8:41:26"} +{"current_steps": 71, "total_steps": 909, "loss": 0.6960322856903076, "lr": 3.0769230769230774e-05, "epoch": 0.23432343234323433, "percentage": 7.81, "elapsed_time": "0:44:28", "remaining_time": "8:44:50"} +{"current_steps": 72, "total_steps": 909, "loss": 0.6146604418754578, "lr": 3.120879120879121e-05, "epoch": 0.2376237623762376, "percentage": 7.92, "elapsed_time": "0:44:55", "remaining_time": "8:42:14"} +{"current_steps": 73, "total_steps": 909, "loss": 0.6361377239227295, "lr": 3.164835164835165e-05, "epoch": 0.24092409240924093, "percentage": 8.03, "elapsed_time": "0:45:21", "remaining_time": "8:39:31"} +{"current_steps": 249, "total_steps": 1302, "loss": 0.44009676575660706, "lr": 3.902278772080588e-05, "epoch": 0.5743944636678201, "percentage": 19.12, "elapsed_time": "14:24:55", "remaining_time": "2 days, 12:57:40"} +{"current_steps": 74, "total_steps": 909, "loss": 0.636134147644043, "lr": 3.2087912087912094e-05, "epoch": 0.24422442244224424, "percentage": 8.14, "elapsed_time": "0:46:00", "remaining_time": "8:39:12"} +{"current_steps": 75, "total_steps": 909, "loss": 0.5936564803123474, "lr": 3.252747252747253e-05, "epoch": 0.24752475247524752, "percentage": 8.25, "elapsed_time": "0:46:37", "remaining_time": "8:38:24"} +{"current_steps": 76, "total_steps": 909, "loss": 0.6001103520393372, "lr": 3.296703296703297e-05, "epoch": 0.2508250825082508, "percentage": 8.36, "elapsed_time": "0:47:10", "remaining_time": "8:37:01"} +{"current_steps": 77, "total_steps": 909, "loss": 0.6254594326019287, "lr": 3.340659340659341e-05, "epoch": 0.25412541254125415, "percentage": 8.47, "elapsed_time": "0:47:49", "remaining_time": "8:36:49"} +{"current_steps": 78, "total_steps": 909, "loss": 0.6457959413528442, "lr": 3.384615384615385e-05, "epoch": 0.25742574257425743, "percentage": 8.58, "elapsed_time": "0:48:22", "remaining_time": "8:35:27"} +{"current_steps": 79, "total_steps": 909, "loss": 0.6186954975128174, "lr": 3.4285714285714284e-05, "epoch": 0.2607260726072607, "percentage": 8.69, "elapsed_time": "0:49:01", "remaining_time": "8:35:08"} +{"current_steps": 250, "total_steps": 1302, "loss": 0.43056797981262207, "lr": 3.900615217845488e-05, "epoch": 0.5767012687427913, "percentage": 19.2, "elapsed_time": "14:28:29", "remaining_time": "2 days, 12:54:34"} +{"current_steps": 80, "total_steps": 909, "loss": 0.6175529956817627, "lr": 3.4725274725274726e-05, "epoch": 0.264026402640264, "percentage": 8.8, "elapsed_time": "0:49:39", "remaining_time": "8:34:35"} +{"current_steps": 81, "total_steps": 909, "loss": 0.6694468259811401, "lr": 3.516483516483517e-05, "epoch": 0.26732673267326734, "percentage": 8.91, "elapsed_time": "0:50:21", "remaining_time": "8:34:49"} +{"current_steps": 82, "total_steps": 909, "loss": 0.627490222454071, "lr": 3.56043956043956e-05, "epoch": 0.2706270627062706, "percentage": 9.02, "elapsed_time": "0:51:01", "remaining_time": "8:34:32"} +{"current_steps": 83, "total_steps": 909, "loss": 0.6410495638847351, "lr": 3.6043956043956045e-05, "epoch": 0.2739273927392739, "percentage": 9.13, "elapsed_time": "0:51:39", "remaining_time": "8:34:03"} +{"current_steps": 84, "total_steps": 909, "loss": 0.6305102109909058, "lr": 3.648351648351649e-05, "epoch": 0.27722772277227725, "percentage": 9.24, "elapsed_time": "0:52:20", "remaining_time": "8:34:06"} +{"current_steps": 251, "total_steps": 1302, "loss": 0.42200613021850586, "lr": 3.8989379838056945e-05, "epoch": 0.5790080738177624, "percentage": 19.28, "elapsed_time": "14:31:57", "remaining_time": "2 days, 12:51:08"} +{"current_steps": 85, "total_steps": 909, "loss": 0.6558895111083984, "lr": 3.692307692307693e-05, "epoch": 0.28052805280528054, "percentage": 9.35, "elapsed_time": "0:52:54", "remaining_time": "8:32:52"} +{"current_steps": 86, "total_steps": 909, "loss": 0.6029388308525085, "lr": 3.7362637362637365e-05, "epoch": 0.2838283828382838, "percentage": 9.46, "elapsed_time": "0:53:30", "remaining_time": "8:32:04"} +{"current_steps": 87, "total_steps": 909, "loss": 0.6551017761230469, "lr": 3.7802197802197807e-05, "epoch": 0.2871287128712871, "percentage": 9.57, "elapsed_time": "0:53:58", "remaining_time": "8:29:53"} +{"current_steps": 88, "total_steps": 909, "loss": 0.6588809490203857, "lr": 3.824175824175825e-05, "epoch": 0.29042904290429045, "percentage": 9.68, "elapsed_time": "0:54:29", "remaining_time": "8:28:22"} +{"current_steps": 89, "total_steps": 909, "loss": 0.614648699760437, "lr": 3.8681318681318684e-05, "epoch": 0.29372937293729373, "percentage": 9.79, "elapsed_time": "0:54:57", "remaining_time": "8:26:22"} +{"current_steps": 90, "total_steps": 909, "loss": 0.7034356594085693, "lr": 3.9120879120879126e-05, "epoch": 0.297029702970297, "percentage": 9.9, "elapsed_time": "0:55:20", "remaining_time": "8:23:35"} +{"current_steps": 91, "total_steps": 909, "loss": 0.6908263564109802, "lr": 3.956043956043957e-05, "epoch": 0.30033003300330036, "percentage": 10.01, "elapsed_time": "0:55:51", "remaining_time": "8:22:02"} +{"current_steps": 252, "total_steps": 1302, "loss": 0.43741437792778015, "lr": 3.897247082033211e-05, "epoch": 0.5813148788927336, "percentage": 19.35, "elapsed_time": "14:35:27", "remaining_time": "2 days, 12:47:43"} +{"current_steps": 92, "total_steps": 909, "loss": 0.6882215738296509, "lr": 4e-05, "epoch": 0.30363036303630364, "percentage": 10.12, "elapsed_time": "0:56:17", "remaining_time": "8:19:51"} +{"current_steps": 93, "total_steps": 909, "loss": 0.6377270221710205, "lr": 3.999985249980169e-05, "epoch": 0.3069306930693069, "percentage": 10.23, "elapsed_time": "0:56:54", "remaining_time": "8:19:18"} +{"current_steps": 94, "total_steps": 909, "loss": 0.6735270619392395, "lr": 3.999941000138238e-05, "epoch": 0.3102310231023102, "percentage": 10.34, "elapsed_time": "0:57:22", "remaining_time": "8:17:24"} +{"current_steps": 95, "total_steps": 909, "loss": 0.6934541463851929, "lr": 3.999867251126893e-05, "epoch": 0.31353135313531355, "percentage": 10.45, "elapsed_time": "0:57:56", "remaining_time": "8:16:28"} +{"current_steps": 96, "total_steps": 909, "loss": 0.6367039084434509, "lr": 3.9997640040339335e-05, "epoch": 0.31683168316831684, "percentage": 10.56, "elapsed_time": "0:58:31", "remaining_time": "8:15:36"} +{"current_steps": 97, "total_steps": 909, "loss": 0.6274522542953491, "lr": 3.999631260382257e-05, "epoch": 0.3201320132013201, "percentage": 10.67, "elapsed_time": "0:59:11", "remaining_time": "8:15:27"} +{"current_steps": 253, "total_steps": 1302, "loss": 0.43319445848464966, "lr": 3.895542524698417e-05, "epoch": 0.5836216839677048, "percentage": 19.43, "elapsed_time": "14:39:00", "remaining_time": "2 days, 12:44:35"} +{"current_steps": 98, "total_steps": 909, "loss": 0.5874066352844238, "lr": 3.999469022129834e-05, "epoch": 0.3234323432343234, "percentage": 10.78, "elapsed_time": "0:59:56", "remaining_time": "8:16:06"} +{"current_steps": 99, "total_steps": 909, "loss": 0.6175942420959473, "lr": 3.9992772916696824e-05, "epoch": 0.32673267326732675, "percentage": 10.89, "elapsed_time": "1:00:41", "remaining_time": "8:16:30"} +{"current_steps": 100, "total_steps": 909, "loss": 0.5625832080841064, "lr": 3.99905607182983e-05, "epoch": 0.33003300330033003, "percentage": 11.0, "elapsed_time": "1:01:25", "remaining_time": "8:16:54"} +{"current_steps": 101, "total_steps": 909, "loss": 0.6153020262718201, "lr": 3.998805365873274e-05, "epoch": 0.3333333333333333, "percentage": 11.11, "elapsed_time": "1:02:18", "remaining_time": "8:18:25"} +{"current_steps": 102, "total_steps": 909, "loss": 0.5585426092147827, "lr": 3.998525177497932e-05, "epoch": 0.33663366336633666, "percentage": 11.22, "elapsed_time": "1:02:55", "remaining_time": "8:17:51"} +{"current_steps": 254, "total_steps": 1302, "loss": 0.4421916902065277, "lr": 3.893824324069975e-05, "epoch": 0.5859284890426759, "percentage": 19.51, "elapsed_time": "14:42:34", "remaining_time": "2 days, 12:41:29"} +{"current_steps": 103, "total_steps": 909, "loss": 0.6586359739303589, "lr": 3.998215510836589e-05, "epoch": 0.33993399339933994, "percentage": 11.33, "elapsed_time": "1:03:33", "remaining_time": "8:17:19"} +{"current_steps": 104, "total_steps": 909, "loss": 0.62096107006073, "lr": 3.997876370456833e-05, "epoch": 0.3432343234323432, "percentage": 11.44, "elapsed_time": "1:04:05", "remaining_time": "8:16:08"} +{"current_steps": 105, "total_steps": 909, "loss": 0.6059336066246033, "lr": 3.997507761360993e-05, "epoch": 0.3465346534653465, "percentage": 11.55, "elapsed_time": "1:04:46", "remaining_time": "8:16:02"} +{"current_steps": 106, "total_steps": 909, "loss": 0.617970883846283, "lr": 3.997109688986059e-05, "epoch": 0.34983498349834985, "percentage": 11.66, "elapsed_time": "1:05:42", "remaining_time": "8:17:49"} +{"current_steps": 255, "total_steps": 1302, "loss": 0.4514671266078949, "lr": 3.892092492514751e-05, "epoch": 0.5882352941176471, "percentage": 19.59, "elapsed_time": "14:45:37", "remaining_time": "2 days, 12:36:15"} +{"current_steps": 107, "total_steps": 909, "loss": 0.6453397274017334, "lr": 3.9966821592036066e-05, "epoch": 0.35313531353135313, "percentage": 11.77, "elapsed_time": "1:06:24", "remaining_time": "8:17:46"} +{"current_steps": 108, "total_steps": 909, "loss": 0.6371763348579407, "lr": 3.996225178319709e-05, "epoch": 0.3564356435643564, "percentage": 11.88, "elapsed_time": "1:07:00", "remaining_time": "8:16:59"} +{"current_steps": 109, "total_steps": 909, "loss": 0.5971124172210693, "lr": 3.9957387530748435e-05, "epoch": 0.35973597359735976, "percentage": 11.99, "elapsed_time": "1:07:42", "remaining_time": "8:16:56"} +{"current_steps": 110, "total_steps": 909, "loss": 0.5679532289505005, "lr": 3.995222890643792e-05, "epoch": 0.36303630363036304, "percentage": 12.1, "elapsed_time": "1:08:32", "remaining_time": "8:17:48"} +{"current_steps": 111, "total_steps": 909, "loss": 0.5988069772720337, "lr": 3.9946775986355346e-05, "epoch": 0.36633663366336633, "percentage": 12.21, "elapsed_time": "1:09:14", "remaining_time": "8:17:48"} +{"current_steps": 256, "total_steps": 1302, "loss": 0.4406735897064209, "lr": 3.890347042497717e-05, "epoch": 0.5905420991926182, "percentage": 19.66, "elapsed_time": "14:49:10", "remaining_time": "2 days, 12:33:05"} +{"current_steps": 112, "total_steps": 909, "loss": 0.6352983713150024, "lr": 3.994102885093141e-05, "epoch": 0.3696369636963696, "percentage": 12.32, "elapsed_time": "1:09:56", "remaining_time": "8:17:40"} +{"current_steps": 113, "total_steps": 909, "loss": 0.58957839012146, "lr": 3.993498758493646e-05, "epoch": 0.37293729372937295, "percentage": 12.43, "elapsed_time": "1:10:50", "remaining_time": "8:19:02"} +{"current_steps": 114, "total_steps": 909, "loss": 0.6396822929382324, "lr": 3.992865227747929e-05, "epoch": 0.37623762376237624, "percentage": 12.54, "elapsed_time": "1:11:30", "remaining_time": "8:18:43"} +{"current_steps": 115, "total_steps": 909, "loss": 0.6314754486083984, "lr": 3.992202302200582e-05, "epoch": 0.3795379537953795, "percentage": 12.65, "elapsed_time": "1:12:04", "remaining_time": "8:17:35"} +{"current_steps": 116, "total_steps": 909, "loss": 0.673650860786438, "lr": 3.991509991629769e-05, "epoch": 0.38283828382838286, "percentage": 12.76, "elapsed_time": "1:12:37", "remaining_time": "8:16:26"} +{"current_steps": 117, "total_steps": 909, "loss": 0.5813701152801514, "lr": 3.990788306247085e-05, "epoch": 0.38613861386138615, "percentage": 12.87, "elapsed_time": "1:13:11", "remaining_time": "8:15:24"} +{"current_steps": 257, "total_steps": 1302, "loss": 0.4251011312007904, "lr": 3.8885879865818656e-05, "epoch": 0.5928489042675894, "percentage": 19.74, "elapsed_time": "14:52:42", "remaining_time": "2 days, 12:29:52"} +{"current_steps": 118, "total_steps": 909, "loss": 0.6419334411621094, "lr": 3.990037256697404e-05, "epoch": 0.38943894389438943, "percentage": 12.98, "elapsed_time": "1:13:46", "remaining_time": "8:14:33"} +{"current_steps": 119, "total_steps": 909, "loss": 0.6319208145141602, "lr": 3.989256854058721e-05, "epoch": 0.3927392739273927, "percentage": 13.09, "elapsed_time": "1:14:22", "remaining_time": "8:13:43"} +{"current_steps": 120, "total_steps": 909, "loss": 0.5989845991134644, "lr": 3.988447109841991e-05, "epoch": 0.39603960396039606, "percentage": 13.2, "elapsed_time": "1:14:56", "remaining_time": "8:12:45"} +{"current_steps": 121, "total_steps": 909, "loss": 0.5853303670883179, "lr": 3.987608035990957e-05, "epoch": 0.39933993399339934, "percentage": 13.31, "elapsed_time": "1:15:30", "remaining_time": "8:11:41"} +{"current_steps": 122, "total_steps": 909, "loss": 0.6115257143974304, "lr": 3.986739644881975e-05, "epoch": 0.40264026402640263, "percentage": 13.42, "elapsed_time": "1:15:58", "remaining_time": "8:10:04"} +{"current_steps": 123, "total_steps": 909, "loss": 0.6440504789352417, "lr": 3.985841949323831e-05, "epoch": 0.40594059405940597, "percentage": 13.53, "elapsed_time": "1:16:36", "remaining_time": "8:09:30"} +{"current_steps": 258, "total_steps": 1302, "loss": 0.42767131328582764, "lr": 3.88681533742812e-05, "epoch": 0.5951557093425606, "percentage": 19.82, "elapsed_time": "14:56:03", "remaining_time": "2 days, 12:25:53"} +{"current_steps": 124, "total_steps": 909, "loss": 0.5765030384063721, "lr": 3.984914962557553e-05, "epoch": 0.40924092409240925, "percentage": 13.64, "elapsed_time": "1:17:21", "remaining_time": "8:09:46"} +{"current_steps": 125, "total_steps": 909, "loss": 0.6387556791305542, "lr": 3.983958698256214e-05, "epoch": 0.41254125412541254, "percentage": 13.75, "elapsed_time": "1:17:58", "remaining_time": "8:09:04"} +{"current_steps": 126, "total_steps": 909, "loss": 0.6263147592544556, "lr": 3.98297317052473e-05, "epoch": 0.4158415841584158, "percentage": 13.86, "elapsed_time": "1:18:33", "remaining_time": "8:08:11"} +{"current_steps": 127, "total_steps": 909, "loss": 0.6091845035552979, "lr": 3.981958393899656e-05, "epoch": 0.41914191419141916, "percentage": 13.97, "elapsed_time": "1:19:14", "remaining_time": "8:07:53"} +{"current_steps": 128, "total_steps": 909, "loss": 0.6458015441894531, "lr": 3.980914383348967e-05, "epoch": 0.42244224422442245, "percentage": 14.08, "elapsed_time": "1:19:44", "remaining_time": "8:06:35"} +{"current_steps": 259, "total_steps": 1302, "loss": 0.42612290382385254, "lr": 3.885029107795239e-05, "epoch": 0.5974625144175317, "percentage": 19.89, "elapsed_time": "14:59:10", "remaining_time": "2 days, 12:21:01"} +{"current_steps": 129, "total_steps": 909, "loss": 0.6115552186965942, "lr": 3.9798411542718395e-05, "epoch": 0.42574257425742573, "percentage": 14.19, "elapsed_time": "1:20:16", "remaining_time": "8:05:24"} +{"current_steps": 130, "total_steps": 909, "loss": 0.6427993774414062, "lr": 3.978738722498423e-05, "epoch": 0.429042904290429, "percentage": 14.3, "elapsed_time": "1:21:02", "remaining_time": "8:05:39"} +{"current_steps": 131, "total_steps": 909, "loss": 0.6121467351913452, "lr": 3.977607104289609e-05, "epoch": 0.43234323432343236, "percentage": 14.41, "elapsed_time": "1:21:38", "remaining_time": "8:04:49"} +{"current_steps": 132, "total_steps": 909, "loss": 0.5951442718505859, "lr": 3.9764463163367875e-05, "epoch": 0.43564356435643564, "percentage": 14.52, "elapsed_time": "1:22:10", "remaining_time": "8:03:44"} +{"current_steps": 133, "total_steps": 909, "loss": 0.6639472842216492, "lr": 3.9752563757616045e-05, "epoch": 0.4389438943894389, "percentage": 14.63, "elapsed_time": "1:22:36", "remaining_time": "8:02:00"} +{"current_steps": 260, "total_steps": 1302, "loss": 0.42033272981643677, "lr": 3.883229310539731e-05, "epoch": 0.5997693194925029, "percentage": 19.97, "elapsed_time": "15:02:20", "remaining_time": "2 days, 12:16:18"} +{"current_steps": 134, "total_steps": 909, "loss": 0.6084764003753662, "lr": 3.974037300115706e-05, "epoch": 0.44224422442244227, "percentage": 14.74, "elapsed_time": "1:23:11", "remaining_time": "8:01:07"} +{"current_steps": 135, "total_steps": 909, "loss": 0.6211085915565491, "lr": 3.972789107380484e-05, "epoch": 0.44554455445544555, "percentage": 14.85, "elapsed_time": "1:23:50", "remaining_time": "8:00:40"} +{"current_steps": 136, "total_steps": 909, "loss": 0.6098147034645081, "lr": 3.9715118159668046e-05, "epoch": 0.44884488448844884, "percentage": 14.96, "elapsed_time": "1:24:22", "remaining_time": "7:59:34"} +{"current_steps": 137, "total_steps": 909, "loss": 0.6155884861946106, "lr": 3.970205444714742e-05, "epoch": 0.4521452145214521, "percentage": 15.07, "elapsed_time": "1:24:59", "remaining_time": "7:58:54"} +{"current_steps": 138, "total_steps": 909, "loss": 0.5984665155410767, "lr": 3.9688700128932975e-05, "epoch": 0.45544554455445546, "percentage": 15.18, "elapsed_time": "1:25:34", "remaining_time": "7:58:07"} +{"current_steps": 139, "total_steps": 909, "loss": 0.6656880378723145, "lr": 3.967505540200117e-05, "epoch": 0.45874587458745875, "percentage": 15.29, "elapsed_time": "1:26:07", "remaining_time": "7:57:03"} +{"current_steps": 261, "total_steps": 1302, "loss": 0.44159603118896484, "lr": 3.881415958615757e-05, "epoch": 0.6020761245674741, "percentage": 20.05, "elapsed_time": "15:05:33", "remaining_time": "2 days, 12:11:50"} +{"current_steps": 140, "total_steps": 909, "loss": 0.6607398390769958, "lr": 3.966112046761201e-05, "epoch": 0.46204620462046203, "percentage": 15.4, "elapsed_time": "1:26:43", "remaining_time": "7:56:20"} +{"current_steps": 141, "total_steps": 909, "loss": 0.6578342914581299, "lr": 3.9646895531306046e-05, "epoch": 0.46534653465346537, "percentage": 15.51, "elapsed_time": "1:27:28", "remaining_time": "7:56:27"} +{"current_steps": 142, "total_steps": 909, "loss": 0.6103699803352356, "lr": 3.963238080290136e-05, "epoch": 0.46864686468646866, "percentage": 15.62, "elapsed_time": "1:28:05", "remaining_time": "7:55:51"} +{"current_steps": 143, "total_steps": 909, "loss": 0.5484676957130432, "lr": 3.96175764964905e-05, "epoch": 0.47194719471947194, "percentage": 15.73, "elapsed_time": "1:28:56", "remaining_time": "7:56:25"} +{"current_steps": 144, "total_steps": 909, "loss": 0.578776478767395, "lr": 3.960248283043727e-05, "epoch": 0.4752475247524752, "percentage": 15.84, "elapsed_time": "1:29:38", "remaining_time": "7:56:12"} +{"current_steps": 262, "total_steps": 1302, "loss": 0.4198034405708313, "lr": 3.879589065075035e-05, "epoch": 0.6043829296424452, "percentage": 20.12, "elapsed_time": "15:09:10", "remaining_time": "2 days, 12:08:55"} +{"current_steps": 145, "total_steps": 909, "loss": 0.6184446811676025, "lr": 3.958710002737355e-05, "epoch": 0.47854785478547857, "percentage": 15.95, "elapsed_time": "1:30:16", "remaining_time": "7:55:38"} +{"current_steps": 146, "total_steps": 909, "loss": 0.6307916045188904, "lr": 3.9571428314195984e-05, "epoch": 0.48184818481848185, "percentage": 16.06, "elapsed_time": "1:30:45", "remaining_time": "7:54:20"} +{"current_steps": 147, "total_steps": 909, "loss": 0.6064697504043579, "lr": 3.955546792206265e-05, "epoch": 0.48514851485148514, "percentage": 16.17, "elapsed_time": "1:31:21", "remaining_time": "7:53:34"} +{"current_steps": 148, "total_steps": 909, "loss": 0.6055655479431152, "lr": 3.953921908638966e-05, "epoch": 0.4884488448844885, "percentage": 16.28, "elapsed_time": "1:31:59", "remaining_time": "7:53:01"} +{"current_steps": 149, "total_steps": 909, "loss": 0.5856431126594543, "lr": 3.952268204684765e-05, "epoch": 0.49174917491749176, "percentage": 16.39, "elapsed_time": "1:32:43", "remaining_time": "7:52:56"} +{"current_steps": 150, "total_steps": 909, "loss": 0.6634635925292969, "lr": 3.950585704735829e-05, "epoch": 0.49504950495049505, "percentage": 16.5, "elapsed_time": "1:33:18", "remaining_time": "7:52:08"} +{"current_steps": 263, "total_steps": 1302, "loss": 0.41800999641418457, "lr": 3.8777486430667574e-05, "epoch": 0.6066897347174164, "percentage": 20.2, "elapsed_time": "15:12:43", "remaining_time": "2 days, 12:05:45"} +{"current_steps": 151, "total_steps": 909, "loss": 0.5880753397941589, "lr": 3.948874433609065e-05, "epoch": 0.49834983498349833, "percentage": 16.61, "elapsed_time": "1:34:20", "remaining_time": "7:53:34"} +{"current_steps": 152, "total_steps": 909, "loss": 0.5594221949577332, "lr": 3.947134416545757e-05, "epoch": 0.5016501650165016, "percentage": 16.72, "elapsed_time": "1:34:49", "remaining_time": "7:52:15"} +{"current_steps": 153, "total_steps": 909, "loss": 0.664652407169342, "lr": 3.94536567921119e-05, "epoch": 0.504950495049505, "percentage": 16.83, "elapsed_time": "1:35:18", "remaining_time": "7:50:55"} +{"current_steps": 154, "total_steps": 909, "loss": 0.6002815961837769, "lr": 3.9435682476942755e-05, "epoch": 0.5082508250825083, "percentage": 16.94, "elapsed_time": "1:35:57", "remaining_time": "7:50:25"} +{"current_steps": 264, "total_steps": 1302, "loss": 0.42167818546295166, "lr": 3.875894705837482e-05, "epoch": 0.6089965397923875, "percentage": 20.28, "elapsed_time": "15:15:57", "remaining_time": "2 days, 12:01:23"} +{"current_steps": 155, "total_steps": 909, "loss": 0.5905177593231201, "lr": 3.941742148507163e-05, "epoch": 0.5115511551155115, "percentage": 17.05, "elapsed_time": "1:36:41", "remaining_time": "7:50:21"} +{"current_steps": 156, "total_steps": 909, "loss": 0.5636795163154602, "lr": 3.939887408584853e-05, "epoch": 0.5148514851485149, "percentage": 17.16, "elapsed_time": "1:37:24", "remaining_time": "7:50:10"} +{"current_steps": 157, "total_steps": 909, "loss": 0.5639582276344299, "lr": 3.938004055284796e-05, "epoch": 0.5181518151815182, "percentage": 17.27, "elapsed_time": "1:38:03", "remaining_time": "7:49:42"} +{"current_steps": 158, "total_steps": 909, "loss": 0.6515591144561768, "lr": 3.9360921163864895e-05, "epoch": 0.5214521452145214, "percentage": 17.38, "elapsed_time": "1:38:40", "remaining_time": "7:48:59"} +{"current_steps": 159, "total_steps": 909, "loss": 0.5721683502197266, "lr": 3.934151620091071e-05, "epoch": 0.5247524752475248, "percentage": 17.49, "elapsed_time": "1:39:18", "remaining_time": "7:48:26"} +{"current_steps": 160, "total_steps": 909, "loss": 0.5801802277565002, "lr": 3.9321825950209e-05, "epoch": 0.528052805280528, "percentage": 17.6, "elapsed_time": "1:39:56", "remaining_time": "7:47:51"} +{"current_steps": 265, "total_steps": 1302, "loss": 0.439184308052063, "lr": 3.874027266731045e-05, "epoch": 0.6113033448673587, "percentage": 20.35, "elapsed_time": "15:19:32", "remaining_time": "2 days, 11:58:23"} +{"current_steps": 161, "total_steps": 909, "loss": 0.603084921836853, "lr": 3.9301850702191344e-05, "epoch": 0.5313531353135313, "percentage": 17.71, "elapsed_time": "1:40:45", "remaining_time": "7:48:07"} +{"current_steps": 162, "total_steps": 909, "loss": 0.6376925110816956, "lr": 3.928159075149304e-05, "epoch": 0.5346534653465347, "percentage": 17.82, "elapsed_time": "1:41:23", "remaining_time": "7:47:31"} +{"current_steps": 163, "total_steps": 909, "loss": 0.5764102935791016, "lr": 3.926104639694877e-05, "epoch": 0.5379537953795379, "percentage": 17.93, "elapsed_time": "1:41:58", "remaining_time": "7:46:42"} +{"current_steps": 164, "total_steps": 909, "loss": 0.6102188229560852, "lr": 3.924021794158818e-05, "epoch": 0.5412541254125413, "percentage": 18.04, "elapsed_time": "1:42:42", "remaining_time": "7:46:32"} +{"current_steps": 165, "total_steps": 909, "loss": 0.5833287835121155, "lr": 3.921910569263139e-05, "epoch": 0.5445544554455446, "percentage": 18.15, "elapsed_time": "1:43:45", "remaining_time": "7:47:49"} +{"current_steps": 266, "total_steps": 1302, "loss": 0.43136483430862427, "lr": 3.872146339188465e-05, "epoch": 0.6136101499423299, "percentage": 20.43, "elapsed_time": "15:23:12", "remaining_time": "2 days, 11:55:39"} +{"current_steps": 166, "total_steps": 909, "loss": 0.5891385078430176, "lr": 3.919770996148448e-05, "epoch": 0.5478547854785478, "percentage": 18.26, "elapsed_time": "1:44:22", "remaining_time": "7:47:12"} +{"current_steps": 167, "total_steps": 909, "loss": 0.5838547348976135, "lr": 3.917603106373493e-05, "epoch": 0.5511551155115512, "percentage": 18.37, "elapsed_time": "1:45:03", "remaining_time": "7:46:48"} +{"current_steps": 168, "total_steps": 909, "loss": 0.5727800726890564, "lr": 3.9154069319146904e-05, "epoch": 0.5544554455445545, "percentage": 18.48, "elapsed_time": "1:45:49", "remaining_time": "7:46:44"} +{"current_steps": 169, "total_steps": 909, "loss": 0.6102641224861145, "lr": 3.913182505165656e-05, "epoch": 0.5577557755775577, "percentage": 18.59, "elapsed_time": "1:46:25", "remaining_time": "7:46:01"} +{"current_steps": 170, "total_steps": 909, "loss": 0.5718260407447815, "lr": 3.91092985893673e-05, "epoch": 0.5610561056105611, "percentage": 18.7, "elapsed_time": "1:47:03", "remaining_time": "7:45:22"} +{"current_steps": 267, "total_steps": 1302, "loss": 0.4059191942214966, "lr": 3.870251936747843e-05, "epoch": 0.615916955017301, "percentage": 20.51, "elapsed_time": "15:26:53", "remaining_time": "2 days, 11:53:00"} +{"current_steps": 171, "total_steps": 909, "loss": 0.6308504939079285, "lr": 3.908649026454488e-05, "epoch": 0.5643564356435643, "percentage": 18.81, "elapsed_time": "1:47:45", "remaining_time": "7:45:02"} +{"current_steps": 172, "total_steps": 909, "loss": 0.6089432835578918, "lr": 3.906340041361255e-05, "epoch": 0.5676567656765676, "percentage": 18.92, "elapsed_time": "1:48:11", "remaining_time": "7:43:35"} +{"current_steps": 173, "total_steps": 909, "loss": 0.6583501696586609, "lr": 3.904002937714606e-05, "epoch": 0.570957095709571, "percentage": 19.03, "elapsed_time": "1:48:40", "remaining_time": "7:42:20"} +{"current_steps": 174, "total_steps": 909, "loss": 0.6108609437942505, "lr": 3.9016377499868666e-05, "epoch": 0.5742574257425742, "percentage": 19.14, "elapsed_time": "1:49:20", "remaining_time": "7:41:50"} +{"current_steps": 175, "total_steps": 909, "loss": 0.63509202003479, "lr": 3.899244513064603e-05, "epoch": 0.5775577557755776, "percentage": 19.25, "elapsed_time": "1:50:07", "remaining_time": "7:41:53"} +{"current_steps": 176, "total_steps": 909, "loss": 0.5759241580963135, "lr": 3.896823262248107e-05, "epoch": 0.5808580858085809, "percentage": 19.36, "elapsed_time": "1:50:55", "remaining_time": "7:42:00"} +{"current_steps": 268, "total_steps": 1302, "loss": 0.41643980145454407, "lr": 3.868344073044266e-05, "epoch": 0.6182237600922722, "percentage": 20.58, "elapsed_time": "15:30:27", "remaining_time": "2 days, 11:49:55"} +{"current_steps": 177, "total_steps": 909, "loss": 0.6148169040679932, "lr": 3.8943740332508754e-05, "epoch": 0.5841584158415841, "percentage": 19.47, "elapsed_time": "1:51:34", "remaining_time": "7:41:26"} +{"current_steps": 178, "total_steps": 909, "loss": 0.5266364216804504, "lr": 3.891896862199086e-05, "epoch": 0.5874587458745875, "percentage": 19.58, "elapsed_time": "1:52:07", "remaining_time": "7:40:29"} +{"current_steps": 179, "total_steps": 909, "loss": 0.5626640319824219, "lr": 3.88939178563106e-05, "epoch": 0.5907590759075908, "percentage": 19.69, "elapsed_time": "1:52:58", "remaining_time": "7:40:43"} +{"current_steps": 180, "total_steps": 909, "loss": 0.6063880920410156, "lr": 3.886858840496727e-05, "epoch": 0.594059405940594, "percentage": 19.8, "elapsed_time": "1:53:33", "remaining_time": "7:39:55"} +{"current_steps": 181, "total_steps": 909, "loss": 0.5979235768318176, "lr": 3.884298064157077e-05, "epoch": 0.5973597359735974, "percentage": 19.91, "elapsed_time": "1:54:11", "remaining_time": "7:39:16"} +{"current_steps": 269, "total_steps": 1302, "loss": 0.43124985694885254, "lr": 3.866422761809712e-05, "epoch": 0.6205305651672434, "percentage": 20.66, "elapsed_time": "15:34:05", "remaining_time": "2 days, 11:47:02"} +{"current_steps": 182, "total_steps": 909, "loss": 0.6628611087799072, "lr": 3.881709494383612e-05, "epoch": 0.6006600660066007, "percentage": 20.02, "elapsed_time": "1:54:43", "remaining_time": "7:38:17"} +{"current_steps": 183, "total_steps": 909, "loss": 0.6215270757675171, "lr": 3.879093169357789e-05, "epoch": 0.6039603960396039, "percentage": 20.13, "elapsed_time": "1:55:21", "remaining_time": "7:37:37"} +{"current_steps": 184, "total_steps": 909, "loss": 0.6148592233657837, "lr": 3.876449127670452e-05, "epoch": 0.6072607260726073, "percentage": 20.24, "elapsed_time": "1:55:58", "remaining_time": "7:36:57"} +{"current_steps": 185, "total_steps": 909, "loss": 0.6254778504371643, "lr": 3.87377740832127e-05, "epoch": 0.6105610561056105, "percentage": 20.35, "elapsed_time": "1:56:36", "remaining_time": "7:36:21"} +{"current_steps": 186, "total_steps": 909, "loss": 0.6025378704071045, "lr": 3.871078050718155e-05, "epoch": 0.6138613861386139, "percentage": 20.46, "elapsed_time": "1:57:11", "remaining_time": "7:35:33"} +{"current_steps": 187, "total_steps": 909, "loss": 0.5887518525123596, "lr": 3.8683510946766866e-05, "epoch": 0.6171617161716172, "percentage": 20.57, "elapsed_time": "1:57:39", "remaining_time": "7:34:15"} +{"current_steps": 188, "total_steps": 909, "loss": 0.6180317401885986, "lr": 3.865596580419519e-05, "epoch": 0.6204620462046204, "percentage": 20.68, "elapsed_time": "1:58:06", "remaining_time": "7:32:57"} +{"current_steps": 270, "total_steps": 1302, "loss": 0.4011824429035187, "lr": 3.8644880168729456e-05, "epoch": 0.6228373702422145, "percentage": 20.74, "elapsed_time": "15:37:36", "remaining_time": "2 days, 11:43:45"} +{"current_steps": 189, "total_steps": 909, "loss": 0.5970651507377625, "lr": 3.8628145485757925e-05, "epoch": 0.6237623762376238, "percentage": 20.79, "elapsed_time": "1:58:55", "remaining_time": "7:33:03"} +{"current_steps": 190, "total_steps": 909, "loss": 0.6027296781539917, "lr": 3.860005040180533e-05, "epoch": 0.6270627062706271, "percentage": 20.9, "elapsed_time": "1:59:29", "remaining_time": "7:32:11"} +{"current_steps": 191, "total_steps": 909, "loss": 0.6326305270195007, "lr": 3.857168096674044e-05, "epoch": 0.6303630363036303, "percentage": 21.01, "elapsed_time": "1:59:57", "remaining_time": "7:30:56"} +{"current_steps": 192, "total_steps": 909, "loss": 0.6508482694625854, "lr": 3.854303759901299e-05, "epoch": 0.6336633663366337, "percentage": 21.12, "elapsed_time": "2:00:41", "remaining_time": "7:30:43"} +{"current_steps": 193, "total_steps": 909, "loss": 0.6088548302650452, "lr": 3.851412072111322e-05, "epoch": 0.636963696369637, "percentage": 21.23, "elapsed_time": "2:01:12", "remaining_time": "7:29:40"} +{"current_steps": 271, "total_steps": 1302, "loss": 0.4177245497703552, "lr": 3.8625398521594214e-05, "epoch": 0.6251441753171857, "percentage": 20.81, "elapsed_time": "15:41:07", "remaining_time": "2 days, 11:40:26"} +{"current_steps": 194, "total_steps": 909, "loss": 0.5975607633590698, "lr": 3.8484930759565645e-05, "epoch": 0.6402640264026402, "percentage": 21.34, "elapsed_time": "2:02:00", "remaining_time": "7:29:40"} +{"current_steps": 195, "total_steps": 909, "loss": 0.5467930436134338, "lr": 3.845546814492279e-05, "epoch": 0.6435643564356436, "percentage": 21.45, "elapsed_time": "2:02:42", "remaining_time": "7:29:16"} +{"current_steps": 196, "total_steps": 909, "loss": 0.583969235420227, "lr": 3.8425733311758795e-05, "epoch": 0.6468646864686468, "percentage": 21.56, "elapsed_time": "2:03:25", "remaining_time": "7:28:58"} +{"current_steps": 197, "total_steps": 909, "loss": 0.6007376909255981, "lr": 3.8395726698663045e-05, "epoch": 0.6501650165016502, "percentage": 21.67, "elapsed_time": "2:03:55", "remaining_time": "7:27:53"} +{"current_steps": 198, "total_steps": 909, "loss": 0.5971908569335938, "lr": 3.836544874823368e-05, "epoch": 0.6534653465346535, "percentage": 21.78, "elapsed_time": "2:04:33", "remaining_time": "7:27:15"} +{"current_steps": 272, "total_steps": 1302, "loss": 0.4108453691005707, "lr": 3.8605782816911866e-05, "epoch": 0.6274509803921569, "percentage": 20.89, "elapsed_time": "15:44:36", "remaining_time": "2 days, 11:37:02"} +{"current_steps": 199, "total_steps": 909, "loss": 0.592069685459137, "lr": 3.8334899907071064e-05, "epoch": 0.6567656765676567, "percentage": 21.89, "elapsed_time": "2:05:22", "remaining_time": "7:27:17"} +{"current_steps": 200, "total_steps": 909, "loss": 0.6188071966171265, "lr": 3.830408062577121e-05, "epoch": 0.6600660066006601, "percentage": 22.0, "elapsed_time": "2:06:19", "remaining_time": "7:27:50"} +{"current_steps": 201, "total_steps": 909, "loss": 0.5976923704147339, "lr": 3.827299135891913e-05, "epoch": 0.6633663366336634, "percentage": 22.11, "elapsed_time": "2:06:47", "remaining_time": "7:26:37"} +{"current_steps": 202, "total_steps": 909, "loss": 0.6120954155921936, "lr": 3.8241632565082124e-05, "epoch": 0.6666666666666666, "percentage": 22.22, "elapsed_time": "2:07:20", "remaining_time": "7:25:42"} +{"current_steps": 203, "total_steps": 909, "loss": 0.6661979556083679, "lr": 3.821000470680303e-05, "epoch": 0.66996699669967, "percentage": 22.33, "elapsed_time": "2:07:51", "remaining_time": "7:24:40"} +{"current_steps": 204, "total_steps": 909, "loss": 0.5853559970855713, "lr": 3.8178108250593384e-05, "epoch": 0.6732673267326733, "percentage": 22.44, "elapsed_time": "2:08:36", "remaining_time": "7:24:27"} +{"current_steps": 273, "total_steps": 1302, "loss": 0.4276476800441742, "lr": 3.858603319586774e-05, "epoch": 0.629757785467128, "percentage": 20.97, "elapsed_time": "15:48:06", "remaining_time": "2 days, 11:33:37"} +{"current_steps": 205, "total_steps": 909, "loss": 0.6648768186569214, "lr": 3.814594366692654e-05, "epoch": 0.6765676567656765, "percentage": 22.55, "elapsed_time": "2:09:10", "remaining_time": "7:23:36"} +{"current_steps": 206, "total_steps": 909, "loss": 0.5893838405609131, "lr": 3.8113511430230745e-05, "epoch": 0.6798679867986799, "percentage": 22.66, "elapsed_time": "2:09:40", "remaining_time": "7:22:32"} +{"current_steps": 207, "total_steps": 909, "loss": 0.6177140474319458, "lr": 3.808081201888214e-05, "epoch": 0.6831683168316832, "percentage": 22.77, "elapsed_time": "2:10:16", "remaining_time": "7:21:47"} +{"current_steps": 208, "total_steps": 909, "loss": 0.5793695449829102, "lr": 3.8047845915197695e-05, "epoch": 0.6864686468646864, "percentage": 22.88, "elapsed_time": "2:10:52", "remaining_time": "7:21:05"} +{"current_steps": 209, "total_steps": 909, "loss": 0.5571605563163757, "lr": 3.8014613605428084e-05, "epoch": 0.6897689768976898, "percentage": 22.99, "elapsed_time": "2:11:37", "remaining_time": "7:20:52"} +{"current_steps": 274, "total_steps": 1302, "loss": 0.42278772592544556, "lr": 3.856614980061106e-05, "epoch": 0.6320645905420992, "percentage": 21.04, "elapsed_time": "15:51:26", "remaining_time": "2 days, 11:29:37"} +{"current_steps": 210, "total_steps": 909, "loss": 0.5945760011672974, "lr": 3.798111557975053e-05, "epoch": 0.693069306930693, "percentage": 23.1, "elapsed_time": "2:12:16", "remaining_time": "7:20:18"} +{"current_steps": 211, "total_steps": 909, "loss": 0.600873589515686, "lr": 3.7947352332261586e-05, "epoch": 0.6963696369636964, "percentage": 23.21, "elapsed_time": "2:13:03", "remaining_time": "7:20:08"} +{"current_steps": 212, "total_steps": 909, "loss": 0.6234852075576782, "lr": 3.791332436096983e-05, "epoch": 0.6996699669966997, "percentage": 23.32, "elapsed_time": "2:13:29", "remaining_time": "7:18:53"} +{"current_steps": 213, "total_steps": 909, "loss": 0.6129578948020935, "lr": 3.7879032167788494e-05, "epoch": 0.7029702970297029, "percentage": 23.43, "elapsed_time": "2:14:06", "remaining_time": "7:18:11"} +{"current_steps": 214, "total_steps": 909, "loss": 0.6204475164413452, "lr": 3.784447625852812e-05, "epoch": 0.7062706270627063, "percentage": 23.54, "elapsed_time": "2:14:38", "remaining_time": "7:17:17"} +{"current_steps": 275, "total_steps": 1302, "loss": 0.45093801617622375, "lr": 3.8546132774253885e-05, "epoch": 0.6343713956170703, "percentage": 21.12, "elapsed_time": "15:54:42", "remaining_time": "2 days, 11:25:22"} +{"current_steps": 215, "total_steps": 909, "loss": 0.6734122037887573, "lr": 3.780965714288905e-05, "epoch": 0.7095709570957096, "percentage": 23.65, "elapsed_time": "2:15:18", "remaining_time": "7:16:45"} +{"current_steps": 216, "total_steps": 909, "loss": 0.5678560137748718, "lr": 3.777457533445393e-05, "epoch": 0.7128712871287128, "percentage": 23.76, "elapsed_time": "2:15:51", "remaining_time": "7:15:52"} +{"current_steps": 217, "total_steps": 909, "loss": 0.5784683227539062, "lr": 3.7739231350680135e-05, "epoch": 0.7161716171617162, "percentage": 23.87, "elapsed_time": "2:16:27", "remaining_time": "7:15:09"} +{"current_steps": 218, "total_steps": 909, "loss": 0.6060354113578796, "lr": 3.7703625712892125e-05, "epoch": 0.7194719471947195, "percentage": 23.98, "elapsed_time": "2:17:00", "remaining_time": "7:14:16"} +{"current_steps": 219, "total_steps": 909, "loss": 0.6248741745948792, "lr": 3.766775894627376e-05, "epoch": 0.7227722772277227, "percentage": 24.09, "elapsed_time": "2:17:40", "remaining_time": "7:13:45"} +{"current_steps": 220, "total_steps": 909, "loss": 0.6014479398727417, "lr": 3.7631631579860553e-05, "epoch": 0.7260726072607261, "percentage": 24.2, "elapsed_time": "2:18:12", "remaining_time": "7:12:50"} +{"current_steps": 276, "total_steps": 1302, "loss": 0.4173239767551422, "lr": 3.852598226087008e-05, "epoch": 0.6366782006920415, "percentage": 21.2, "elapsed_time": "15:58:13", "remaining_time": "2 days, 11:22:07"} +{"current_steps": 221, "total_steps": 909, "loss": 0.6283233761787415, "lr": 3.759524414653189e-05, "epoch": 0.7293729372937293, "percentage": 24.31, "elapsed_time": "2:18:57", "remaining_time": "7:12:36"} +{"current_steps": 222, "total_steps": 909, "loss": 0.5710185766220093, "lr": 3.755859718300313e-05, "epoch": 0.7326732673267327, "percentage": 24.42, "elapsed_time": "2:19:32", "remaining_time": "7:11:50"} +{"current_steps": 223, "total_steps": 909, "loss": 0.6007407903671265, "lr": 3.75216912298177e-05, "epoch": 0.735973597359736, "percentage": 24.53, "elapsed_time": "2:20:06", "remaining_time": "7:10:58"} +{"current_steps": 224, "total_steps": 909, "loss": 0.6852575540542603, "lr": 3.748452683133916e-05, "epoch": 0.7392739273927392, "percentage": 24.64, "elapsed_time": "2:20:53", "remaining_time": "7:10:52"} +{"current_steps": 225, "total_steps": 909, "loss": 0.6270833611488342, "lr": 3.7447104535743115e-05, "epoch": 0.7425742574257426, "percentage": 24.75, "elapsed_time": "2:21:36", "remaining_time": "7:10:28"} +{"current_steps": 226, "total_steps": 909, "loss": 0.5925471782684326, "lr": 3.740942489500916e-05, "epoch": 0.7458745874587459, "percentage": 24.86, "elapsed_time": "2:22:25", "remaining_time": "7:10:26"} +{"current_steps": 277, "total_steps": 1302, "loss": 0.43407827615737915, "lr": 3.850569840549434e-05, "epoch": 0.6389850057670127, "percentage": 21.27, "elapsed_time": "16:01:53", "remaining_time": "2 days, 11:19:19"} +{"current_steps": 227, "total_steps": 909, "loss": 0.573570728302002, "lr": 3.737148846491275e-05, "epoch": 0.7491749174917491, "percentage": 24.97, "elapsed_time": "2:23:04", "remaining_time": "7:09:50"} +{"current_steps": 228, "total_steps": 909, "loss": 0.6088368892669678, "lr": 3.7333295805016986e-05, "epoch": 0.7524752475247525, "percentage": 25.08, "elapsed_time": "2:23:47", "remaining_time": "7:09:27"} +{"current_steps": 229, "total_steps": 909, "loss": 0.5496470332145691, "lr": 3.729484747866435e-05, "epoch": 0.7557755775577558, "percentage": 25.19, "elapsed_time": "2:24:43", "remaining_time": "7:09:46"} +{"current_steps": 230, "total_steps": 909, "loss": 0.6008220314979553, "lr": 3.725614405296843e-05, "epoch": 0.759075907590759, "percentage": 25.3, "elapsed_time": "2:25:29", "remaining_time": "7:09:31"} +{"current_steps": 278, "total_steps": 1302, "loss": 0.4158100187778473, "lr": 3.848528135412104e-05, "epoch": 0.6412918108419838, "percentage": 21.35, "elapsed_time": "16:05:26", "remaining_time": "2 days, 11:16:09"} +{"current_steps": 231, "total_steps": 909, "loss": 0.5982120037078857, "lr": 3.721718609880551e-05, "epoch": 0.7623762376237624, "percentage": 25.41, "elapsed_time": "2:26:32", "remaining_time": "7:10:06"} +{"current_steps": 232, "total_steps": 909, "loss": 0.6404559016227722, "lr": 3.717797419080618e-05, "epoch": 0.7656765676567657, "percentage": 25.52, "elapsed_time": "2:27:05", "remaining_time": "7:09:12"} +{"current_steps": 233, "total_steps": 909, "loss": 0.5875239372253418, "lr": 3.713850890734689e-05, "epoch": 0.768976897689769, "percentage": 25.63, "elapsed_time": "2:27:42", "remaining_time": "7:08:32"} +{"current_steps": 234, "total_steps": 909, "loss": 0.5962772369384766, "lr": 3.709879083054133e-05, "epoch": 0.7722772277227723, "percentage": 25.74, "elapsed_time": "2:28:22", "remaining_time": "7:08:01"} +{"current_steps": 235, "total_steps": 909, "loss": 0.5764110684394836, "lr": 3.705882054623192e-05, "epoch": 0.7755775577557755, "percentage": 25.85, "elapsed_time": "2:29:03", "remaining_time": "7:07:30"} +{"current_steps": 279, "total_steps": 1302, "loss": 0.41657555103302, "lr": 3.846473125370328e-05, "epoch": 0.643598615916955, "percentage": 21.43, "elapsed_time": "16:08:54", "remaining_time": "2 days, 11:12:41"} +{"current_steps": 236, "total_steps": 909, "loss": 0.5635858178138733, "lr": 3.7018598643981165e-05, "epoch": 0.7788778877887789, "percentage": 25.96, "elapsed_time": "2:29:55", "remaining_time": "7:07:32"} +{"current_steps": 237, "total_steps": 909, "loss": 0.5880881547927856, "lr": 3.69781257170629e-05, "epoch": 0.7821782178217822, "percentage": 26.07, "elapsed_time": "2:30:33", "remaining_time": "7:06:52"} +{"current_steps": 238, "total_steps": 909, "loss": 0.5644733905792236, "lr": 3.6937402362453606e-05, "epoch": 0.7854785478547854, "percentage": 26.18, "elapsed_time": "2:31:13", "remaining_time": "7:06:20"} +{"current_steps": 239, "total_steps": 909, "loss": 0.6431151032447815, "lr": 3.689642918082358e-05, "epoch": 0.7887788778877888, "percentage": 26.29, "elapsed_time": "2:31:49", "remaining_time": "7:05:36"} +{"current_steps": 240, "total_steps": 909, "loss": 0.5848085880279541, "lr": 3.6855206776528055e-05, "epoch": 0.7920792079207921, "percentage": 26.4, "elapsed_time": "2:32:37", "remaining_time": "7:05:27"} +{"current_steps": 280, "total_steps": 1302, "loss": 0.44225457310676575, "lr": 3.844404825215177e-05, "epoch": 0.6459054209919262, "percentage": 21.51, "elapsed_time": "16:12:05", "remaining_time": "2 days, 11:08:08"} +{"current_steps": 241, "total_steps": 909, "loss": 0.590021550655365, "lr": 3.681373575759831e-05, "epoch": 0.7953795379537953, "percentage": 26.51, "elapsed_time": "2:33:09", "remaining_time": "7:04:32"} +{"current_steps": 242, "total_steps": 909, "loss": 0.6217919588088989, "lr": 3.67720167357327e-05, "epoch": 0.7986798679867987, "percentage": 26.62, "elapsed_time": "2:33:53", "remaining_time": "7:04:09"} +{"current_steps": 243, "total_steps": 909, "loss": 0.6075180172920227, "lr": 3.673005032628763e-05, "epoch": 0.801980198019802, "percentage": 26.73, "elapsed_time": "2:34:35", "remaining_time": "7:03:42"} +{"current_steps": 244, "total_steps": 909, "loss": 0.6078404188156128, "lr": 3.668783714826846e-05, "epoch": 0.8052805280528053, "percentage": 26.84, "elapsed_time": "2:35:12", "remaining_time": "7:03:00"} +{"current_steps": 245, "total_steps": 909, "loss": 0.6297526955604553, "lr": 3.664537782432042e-05, "epoch": 0.8085808580858086, "percentage": 26.95, "elapsed_time": "2:35:48", "remaining_time": "7:02:15"} +{"current_steps": 281, "total_steps": 1302, "loss": 0.4268050789833069, "lr": 3.84232324983338e-05, "epoch": 0.6482122260668973, "percentage": 21.58, "elapsed_time": "16:15:35", "remaining_time": "2 days, 11:04:46"} +{"current_steps": 246, "total_steps": 909, "loss": 0.5684514045715332, "lr": 3.660267298071936e-05, "epoch": 0.8118811881188119, "percentage": 27.06, "elapsed_time": "2:36:19", "remaining_time": "7:01:19"} +{"current_steps": 247, "total_steps": 909, "loss": 0.6192148327827454, "lr": 3.655972324736259e-05, "epoch": 0.8151815181518152, "percentage": 27.17, "elapsed_time": "2:36:58", "remaining_time": "7:00:43"} +{"current_steps": 248, "total_steps": 909, "loss": 0.5900243520736694, "lr": 3.6516529257759506e-05, "epoch": 0.8184818481848185, "percentage": 27.28, "elapsed_time": "2:37:29", "remaining_time": "6:59:47"} +{"current_steps": 249, "total_steps": 909, "loss": 0.5941751599311829, "lr": 3.6473091649022337e-05, "epoch": 0.8217821782178217, "percentage": 27.39, "elapsed_time": "2:38:25", "remaining_time": "6:59:55"} +{"current_steps": 250, "total_steps": 909, "loss": 0.5744310021400452, "lr": 3.6429411061856645e-05, "epoch": 0.8250825082508251, "percentage": 27.5, "elapsed_time": "2:39:10", "remaining_time": "6:59:36"} +{"current_steps": 282, "total_steps": 1302, "loss": 0.41612792015075684, "lr": 3.840228414207214e-05, "epoch": 0.6505190311418685, "percentage": 21.66, "elapsed_time": "16:19:05", "remaining_time": "2 days, 11:01:25"} +{"current_steps": 251, "total_steps": 909, "loss": 0.5985124707221985, "lr": 3.6385488140551985e-05, "epoch": 0.8283828382838284, "percentage": 27.61, "elapsed_time": "2:39:59", "remaining_time": "6:59:24"} +{"current_steps": 252, "total_steps": 909, "loss": 0.581912636756897, "lr": 3.6341323532972294e-05, "epoch": 0.8316831683168316, "percentage": 27.72, "elapsed_time": "2:40:35", "remaining_time": "6:58:41"} +{"current_steps": 253, "total_steps": 909, "loss": 0.586786150932312, "lr": 3.629691789054643e-05, "epoch": 0.834983498349835, "percentage": 27.83, "elapsed_time": "2:41:18", "remaining_time": "6:58:14"} +{"current_steps": 254, "total_steps": 909, "loss": 0.6312603950500488, "lr": 3.625227186825848e-05, "epoch": 0.8382838283828383, "percentage": 27.94, "elapsed_time": "2:42:00", "remaining_time": "6:57:46"} +{"current_steps": 255, "total_steps": 909, "loss": 0.5886626243591309, "lr": 3.620738612463818e-05, "epoch": 0.8415841584158416, "percentage": 28.05, "elapsed_time": "2:42:33", "remaining_time": "6:56:54"} +{"current_steps": 283, "total_steps": 1302, "loss": 0.3921445608139038, "lr": 3.838120333414397e-05, "epoch": 0.6528258362168397, "percentage": 21.74, "elapsed_time": "16:22:36", "remaining_time": "2 days, 10:58:04"} +{"current_steps": 256, "total_steps": 909, "loss": 0.5892266035079956, "lr": 3.6162261321751114e-05, "epoch": 0.8448844884488449, "percentage": 28.16, "elapsed_time": "2:43:17", "remaining_time": "6:56:32"} +{"current_steps": 257, "total_steps": 909, "loss": 0.5472115278244019, "lr": 3.6116898125189045e-05, "epoch": 0.8481848184818482, "percentage": 28.27, "elapsed_time": "2:44:03", "remaining_time": "6:56:11"} +{"current_steps": 258, "total_steps": 909, "loss": 0.5981796383857727, "lr": 3.6071297204059995e-05, "epoch": 0.8514851485148515, "percentage": 28.38, "elapsed_time": "2:44:46", "remaining_time": "6:55:46"} +{"current_steps": 259, "total_steps": 909, "loss": 0.6708342432975769, "lr": 3.6025459230978475e-05, "epoch": 0.8547854785478548, "percentage": 28.49, "elapsed_time": "2:45:29", "remaining_time": "6:55:20"} +{"current_steps": 260, "total_steps": 909, "loss": 0.6306079626083374, "lr": 3.597938488205549e-05, "epoch": 0.858085808580858, "percentage": 28.6, "elapsed_time": "2:46:05", "remaining_time": "6:54:35"} +{"current_steps": 261, "total_steps": 909, "loss": 0.6098329424858093, "lr": 3.59330748368886e-05, "epoch": 0.8613861386138614, "percentage": 28.71, "elapsed_time": "2:46:37", "remaining_time": "6:53:42"} +{"current_steps": 284, "total_steps": 1302, "loss": 0.4084510803222656, "lr": 3.835999022627983e-05, "epoch": 0.6551326412918108, "percentage": 21.81, "elapsed_time": "16:26:12", "remaining_time": "2 days, 10:55:03"} +{"current_steps": 262, "total_steps": 909, "loss": 0.5617724061012268, "lr": 3.588652977855189e-05, "epoch": 0.8646864686468647, "percentage": 28.82, "elapsed_time": "2:47:21", "remaining_time": "6:53:17"} +{"current_steps": 263, "total_steps": 909, "loss": 0.5780894756317139, "lr": 3.58397503935859e-05, "epoch": 0.8679867986798679, "percentage": 28.93, "elapsed_time": "2:48:01", "remaining_time": "6:52:42"} +{"current_steps": 264, "total_steps": 909, "loss": 0.578921377658844, "lr": 3.5792737371987477e-05, "epoch": 0.8712871287128713, "percentage": 29.04, "elapsed_time": "2:48:42", "remaining_time": "6:52:11"} +{"current_steps": 265, "total_steps": 909, "loss": 0.614944577217102, "lr": 3.574549140719962e-05, "epoch": 0.8745874587458746, "percentage": 29.15, "elapsed_time": "2:49:21", "remaining_time": "6:51:33"} +{"current_steps": 266, "total_steps": 909, "loss": 0.6269869208335876, "lr": 3.569801319610125e-05, "epoch": 0.8778877887788779, "percentage": 29.26, "elapsed_time": "2:49:52", "remaining_time": "6:50:37"} +{"current_steps": 267, "total_steps": 909, "loss": 0.6045581102371216, "lr": 3.565030343899693e-05, "epoch": 0.8811881188118812, "percentage": 29.37, "elapsed_time": "2:50:25", "remaining_time": "6:49:46"} +{"current_steps": 285, "total_steps": 1302, "loss": 0.42947059869766235, "lr": 3.833864497116247e-05, "epoch": 0.657439446366782, "percentage": 21.89, "elapsed_time": "16:29:50", "remaining_time": "2 days, 10:52:10"} +{"current_steps": 268, "total_steps": 909, "loss": 0.5872907638549805, "lr": 3.5602362839606514e-05, "epoch": 0.8844884488448845, "percentage": 29.48, "elapsed_time": "2:51:11", "remaining_time": "6:49:26"} +{"current_steps": 269, "total_steps": 909, "loss": 0.6283375024795532, "lr": 3.55541921050548e-05, "epoch": 0.8877887788778878, "percentage": 29.59, "elapsed_time": "2:51:45", "remaining_time": "6:48:38"} +{"current_steps": 270, "total_steps": 909, "loss": 0.5747002363204956, "lr": 3.5505791945861076e-05, "epoch": 0.8910891089108911, "percentage": 29.7, "elapsed_time": "2:52:31", "remaining_time": "6:48:19"} +{"current_steps": 271, "total_steps": 909, "loss": 0.6205827593803406, "lr": 3.545716307592864e-05, "epoch": 0.8943894389438944, "percentage": 29.81, "elapsed_time": "2:53:05", "remaining_time": "6:47:29"} +{"current_steps": 272, "total_steps": 909, "loss": 0.5987251400947571, "lr": 3.54083062125343e-05, "epoch": 0.8976897689768977, "percentage": 29.92, "elapsed_time": "2:53:36", "remaining_time": "6:46:33"} +{"current_steps": 286, "total_steps": 1302, "loss": 0.414999395608902, "lr": 3.831716772242578e-05, "epoch": 0.6597462514417531, "percentage": 21.97, "elapsed_time": "16:33:16", "remaining_time": "2 days, 10:48:33"} +{"current_steps": 273, "total_steps": 909, "loss": 0.6275356411933899, "lr": 3.535922207631776e-05, "epoch": 0.900990099009901, "percentage": 30.03, "elapsed_time": "2:54:04", "remaining_time": "6:45:33"} +{"current_steps": 274, "total_steps": 909, "loss": 0.6097655892372131, "lr": 3.5309911391270996e-05, "epoch": 0.9042904290429042, "percentage": 30.14, "elapsed_time": "2:54:38", "remaining_time": "6:44:43"} +{"current_steps": 275, "total_steps": 909, "loss": 0.544170618057251, "lr": 3.52603748847276e-05, "epoch": 0.9075907590759076, "percentage": 30.25, "elapsed_time": "2:55:18", "remaining_time": "6:44:10"} +{"current_steps": 276, "total_steps": 909, "loss": 0.5723366141319275, "lr": 3.521061328735202e-05, "epoch": 0.9108910891089109, "percentage": 30.36, "elapsed_time": "2:56:07", "remaining_time": "6:43:56"} +{"current_steps": 277, "total_steps": 909, "loss": 0.5801889896392822, "lr": 3.516062733312879e-05, "epoch": 0.9141914191419142, "percentage": 30.47, "elapsed_time": "2:56:48", "remaining_time": "6:43:25"} +{"current_steps": 287, "total_steps": 1302, "loss": 0.4007243514060974, "lr": 3.82955586346537e-05, "epoch": 0.6620530565167243, "percentage": 22.04, "elapsed_time": "16:36:47", "remaining_time": "2 days, 10:45:15"} +{"current_steps": 278, "total_steps": 909, "loss": 0.5942766666412354, "lr": 3.511041775935175e-05, "epoch": 0.9174917491749175, "percentage": 30.58, "elapsed_time": "2:57:23", "remaining_time": "6:42:39"} +{"current_steps": 279, "total_steps": 909, "loss": 0.5604017972946167, "lr": 3.50599853066131e-05, "epoch": 0.9207920792079208, "percentage": 30.69, "elapsed_time": "2:58:01", "remaining_time": "6:42:00"} +{"current_steps": 280, "total_steps": 909, "loss": 0.6151460409164429, "lr": 3.500933071879251e-05, "epoch": 0.9240924092409241, "percentage": 30.8, "elapsed_time": "2:58:31", "remaining_time": "6:41:02"} +{"current_steps": 281, "total_steps": 909, "loss": 0.5854936838150024, "lr": 3.495845474304616e-05, "epoch": 0.9273927392739274, "percentage": 30.91, "elapsed_time": "2:59:08", "remaining_time": "6:40:21"} +{"current_steps": 282, "total_steps": 909, "loss": 0.5586672425270081, "lr": 3.490735812979572e-05, "epoch": 0.9306930693069307, "percentage": 31.02, "elapsed_time": "2:59:45", "remaining_time": "6:39:39"} +{"current_steps": 283, "total_steps": 909, "loss": 0.578475832939148, "lr": 3.485604163271721e-05, "epoch": 0.933993399339934, "percentage": 31.13, "elapsed_time": "3:00:26", "remaining_time": "6:39:07"} +{"current_steps": 288, "total_steps": 1302, "loss": 0.46211814880371094, "lr": 3.827381786337907e-05, "epoch": 0.6643598615916955, "percentage": 22.12, "elapsed_time": "16:40:07", "remaining_time": "2 days, 10:41:15"} +{"current_steps": 284, "total_steps": 909, "loss": 0.5236382484436035, "lr": 3.4804506008730015e-05, "epoch": 0.9372937293729373, "percentage": 31.24, "elapsed_time": "3:01:10", "remaining_time": "6:38:43"} +{"current_steps": 285, "total_steps": 909, "loss": 0.5964822769165039, "lr": 3.475275201798559e-05, "epoch": 0.9405940594059405, "percentage": 31.35, "elapsed_time": "3:01:59", "remaining_time": "6:38:28"} +{"current_steps": 286, "total_steps": 909, "loss": 0.5551598072052002, "lr": 3.4700780423856334e-05, "epoch": 0.9438943894389439, "percentage": 31.46, "elapsed_time": "3:02:39", "remaining_time": "6:37:53"} +{"current_steps": 287, "total_steps": 909, "loss": 0.6095103621482849, "lr": 3.464859199292429e-05, "epoch": 0.9471947194719472, "percentage": 31.57, "elapsed_time": "3:03:20", "remaining_time": "6:37:20"} +{"current_steps": 288, "total_steps": 909, "loss": 0.5893416404724121, "lr": 3.4596187494969846e-05, "epoch": 0.9504950495049505, "percentage": 31.68, "elapsed_time": "3:04:13", "remaining_time": "6:37:13"} +{"current_steps": 289, "total_steps": 1302, "loss": 0.4074893891811371, "lr": 3.825194556508254e-05, "epoch": 0.6666666666666666, "percentage": 22.2, "elapsed_time": "16:43:40", "remaining_time": "2 days, 10:38:03"} +{"current_steps": 289, "total_steps": 909, "loss": 0.5992231965065002, "lr": 3.454356770296039e-05, "epoch": 0.9537953795379538, "percentage": 31.79, "elapsed_time": "3:04:52", "remaining_time": "6:36:37"} +{"current_steps": 290, "total_steps": 909, "loss": 0.6071972250938416, "lr": 3.4490733393038895e-05, "epoch": 0.9570957095709571, "percentage": 31.9, "elapsed_time": "3:05:30", "remaining_time": "6:35:58"} +{"current_steps": 291, "total_steps": 909, "loss": 0.5836942195892334, "lr": 3.443768534451248e-05, "epoch": 0.9603960396039604, "percentage": 32.01, "elapsed_time": "3:06:09", "remaining_time": "6:35:20"} +{"current_steps": 292, "total_steps": 909, "loss": 0.5707553625106812, "lr": 3.4384424339840916e-05, "epoch": 0.9636963696369637, "percentage": 32.12, "elapsed_time": "3:06:59", "remaining_time": "6:35:07"} +{"current_steps": 293, "total_steps": 909, "loss": 0.5883970260620117, "lr": 3.4330951164625075e-05, "epoch": 0.966996699669967, "percentage": 32.23, "elapsed_time": "3:07:29", "remaining_time": "6:34:11"} +{"current_steps": 290, "total_steps": 1302, "loss": 0.4242897033691406, "lr": 3.822994189719144e-05, "epoch": 0.6689734717416378, "percentage": 22.27, "elapsed_time": "16:47:13", "remaining_time": "2 days, 10:34:50"} +{"current_steps": 294, "total_steps": 909, "loss": 0.6281589269638062, "lr": 3.427726660759535e-05, "epoch": 0.9702970297029703, "percentage": 32.34, "elapsed_time": "3:08:01", "remaining_time": "6:33:19"} +{"current_steps": 295, "total_steps": 909, "loss": 0.6641702651977539, "lr": 3.422337146060003e-05, "epoch": 0.9735973597359736, "percentage": 32.45, "elapsed_time": "3:08:44", "remaining_time": "6:32:50"} +{"current_steps": 296, "total_steps": 909, "loss": 0.6398966312408447, "lr": 3.4169266518593596e-05, "epoch": 0.976897689768977, "percentage": 32.56, "elapsed_time": "3:09:22", "remaining_time": "6:32:11"} +{"current_steps": 297, "total_steps": 909, "loss": 0.6376276016235352, "lr": 3.411495257962501e-05, "epoch": 0.9801980198019802, "percentage": 32.67, "elapsed_time": "3:09:51", "remaining_time": "6:31:13"} +{"current_steps": 298, "total_steps": 909, "loss": 0.648975133895874, "lr": 3.406043044482596e-05, "epoch": 0.9834983498349835, "percentage": 32.78, "elapsed_time": "3:10:25", "remaining_time": "6:30:25"} +{"current_steps": 299, "total_steps": 909, "loss": 0.6201390624046326, "lr": 3.4005700918399016e-05, "epoch": 0.9867986798679867, "percentage": 32.89, "elapsed_time": "3:11:08", "remaining_time": "6:29:57"} +{"current_steps": 291, "total_steps": 1302, "loss": 0.42089277505874634, "lr": 3.820780701807864e-05, "epoch": 0.671280276816609, "percentage": 22.35, "elapsed_time": "16:50:39", "remaining_time": "2 days, 10:31:16"} +{"current_steps": 300, "total_steps": 909, "loss": 0.6103875637054443, "lr": 3.395076480760576e-05, "epoch": 0.9900990099009901, "percentage": 33.0, "elapsed_time": "3:11:41", "remaining_time": "6:29:07"} +{"current_steps": 301, "total_steps": 909, "loss": 0.5486876368522644, "lr": 3.3895622922754936e-05, "epoch": 0.9933993399339934, "percentage": 33.11, "elapsed_time": "3:12:15", "remaining_time": "6:28:20"} +{"current_steps": 302, "total_steps": 909, "loss": 0.5980846285820007, "lr": 3.384027607719043e-05, "epoch": 0.9966996699669967, "percentage": 33.22, "elapsed_time": "3:12:54", "remaining_time": "6:27:42"} +{"current_steps": 303, "total_steps": 909, "loss": 0.5986801385879517, "lr": 3.378472508727931e-05, "epoch": 1.0, "percentage": 33.33, "elapsed_time": "3:13:29", "remaining_time": "6:26:59"} +{"current_steps": 304, "total_steps": 909, "loss": 0.5586727857589722, "lr": 3.372897077239979e-05, "epoch": 1.0033003300330032, "percentage": 33.44, "elapsed_time": "3:14:02", "remaining_time": "6:26:10"} +{"current_steps": 292, "total_steps": 1302, "loss": 0.4205603003501892, "lr": 3.8185541087061395e-05, "epoch": 0.6735870818915801, "percentage": 22.43, "elapsed_time": "16:54:06", "remaining_time": "2 days, 10:27:40"} +{"current_steps": 305, "total_steps": 909, "loss": 0.5393255949020386, "lr": 3.36730139549291e-05, "epoch": 1.0066006600660067, "percentage": 33.55, "elapsed_time": "3:14:42", "remaining_time": "6:25:34"} +{"current_steps": 306, "total_steps": 909, "loss": 0.5377227067947388, "lr": 3.361685546023143e-05, "epoch": 1.00990099009901, "percentage": 33.66, "elapsed_time": "3:15:14", "remaining_time": "6:24:44"} +{"current_steps": 307, "total_steps": 909, "loss": 0.5223784446716309, "lr": 3.356049611664568e-05, "epoch": 1.0132013201320131, "percentage": 33.77, "elapsed_time": "3:15:49", "remaining_time": "6:24:00"} +{"current_steps": 308, "total_steps": 909, "loss": 0.5502469539642334, "lr": 3.350393675547328e-05, "epoch": 1.0165016501650166, "percentage": 33.88, "elapsed_time": "3:16:20", "remaining_time": "6:23:06"} +{"current_steps": 309, "total_steps": 909, "loss": 0.5626603960990906, "lr": 3.3447178210965936e-05, "epoch": 1.0198019801980198, "percentage": 33.99, "elapsed_time": "3:16:59", "remaining_time": "6:22:30"} +{"current_steps": 310, "total_steps": 909, "loss": 0.48262274265289307, "lr": 3.3390221320313303e-05, "epoch": 1.023102310231023, "percentage": 34.1, "elapsed_time": "3:17:41", "remaining_time": "6:22:00"} +{"current_steps": 293, "total_steps": 1302, "loss": 0.40558916330337524, "lr": 3.8163144264400244e-05, "epoch": 0.6758938869665513, "percentage": 22.5, "elapsed_time": "16:57:35", "remaining_time": "2 days, 10:24:17"} +{"current_steps": 311, "total_steps": 909, "loss": 0.5850967168807983, "lr": 3.333306692363065e-05, "epoch": 1.0264026402640265, "percentage": 34.21, "elapsed_time": "3:18:36", "remaining_time": "6:21:54"} +{"current_steps": 312, "total_steps": 909, "loss": 0.5444281697273254, "lr": 3.3275715863946466e-05, "epoch": 1.0297029702970297, "percentage": 34.32, "elapsed_time": "3:19:04", "remaining_time": "6:20:56"} +{"current_steps": 313, "total_steps": 909, "loss": 0.5329654216766357, "lr": 3.3218168987190004e-05, "epoch": 1.033003300330033, "percentage": 34.43, "elapsed_time": "3:19:35", "remaining_time": "6:20:03"} +{"current_steps": 314, "total_steps": 909, "loss": 0.5276832580566406, "lr": 3.316042714217885e-05, "epoch": 1.0363036303630364, "percentage": 34.54, "elapsed_time": "3:20:13", "remaining_time": "6:19:23"} +{"current_steps": 315, "total_steps": 909, "loss": 0.5344791412353516, "lr": 3.310249118060636e-05, "epoch": 1.0396039603960396, "percentage": 34.65, "elapsed_time": "3:20:42", "remaining_time": "6:18:29"} +{"current_steps": 316, "total_steps": 909, "loss": 0.5479785203933716, "lr": 3.304436195702911e-05, "epoch": 1.0429042904290429, "percentage": 34.76, "elapsed_time": "3:21:25", "remaining_time": "6:17:58"} +{"current_steps": 294, "total_steps": 1302, "loss": 0.41276469826698303, "lr": 3.814061671129779e-05, "epoch": 0.6782006920415224, "percentage": 22.58, "elapsed_time": "17:01:10", "remaining_time": "2 days, 10:21:11"} +{"current_steps": 317, "total_steps": 909, "loss": 0.5223082900047302, "lr": 3.298604032885431e-05, "epoch": 1.046204620462046, "percentage": 34.87, "elapsed_time": "3:22:01", "remaining_time": "6:17:17"} +{"current_steps": 318, "total_steps": 909, "loss": 0.5667799711227417, "lr": 3.292752715632713e-05, "epoch": 1.0495049504950495, "percentage": 34.98, "elapsed_time": "3:22:43", "remaining_time": "6:16:45"} +{"current_steps": 319, "total_steps": 909, "loss": 0.5194317698478699, "lr": 3.2868823302518016e-05, "epoch": 1.0528052805280528, "percentage": 35.09, "elapsed_time": "3:23:20", "remaining_time": "6:16:05"} +{"current_steps": 320, "total_steps": 909, "loss": 0.4911007285118103, "lr": 3.2809929633309985e-05, "epoch": 1.056105610561056, "percentage": 35.2, "elapsed_time": "3:24:00", "remaining_time": "6:15:30"} +{"current_steps": 321, "total_steps": 909, "loss": 0.5269002914428711, "lr": 3.2750847017385826e-05, "epoch": 1.0594059405940595, "percentage": 35.31, "elapsed_time": "3:24:47", "remaining_time": "6:15:08"} +{"current_steps": 295, "total_steps": 1302, "loss": 0.3919031023979187, "lr": 3.811795858989761e-05, "epoch": 0.6805074971164936, "percentage": 22.66, "elapsed_time": "17:04:52", "remaining_time": "2 days, 10:18:28"} +{"current_steps": 322, "total_steps": 909, "loss": 0.5124789476394653, "lr": 3.269157632621529e-05, "epoch": 1.0627062706270627, "percentage": 35.42, "elapsed_time": "3:25:30", "remaining_time": "6:14:38"} +{"current_steps": 323, "total_steps": 909, "loss": 0.5483890771865845, "lr": 3.263211843404225e-05, "epoch": 1.066006600660066, "percentage": 35.53, "elapsed_time": "3:25:56", "remaining_time": "6:13:37"} +{"current_steps": 324, "total_steps": 909, "loss": 0.5582579374313354, "lr": 3.25724742178718e-05, "epoch": 1.0693069306930694, "percentage": 35.64, "elapsed_time": "3:26:23", "remaining_time": "6:12:38"} +{"current_steps": 325, "total_steps": 909, "loss": 0.5662975907325745, "lr": 3.2512644557457304e-05, "epoch": 1.0726072607260726, "percentage": 35.75, "elapsed_time": "3:26:47", "remaining_time": "6:11:35"} +{"current_steps": 326, "total_steps": 909, "loss": 0.5502511858940125, "lr": 3.2452630335287445e-05, "epoch": 1.0759075907590758, "percentage": 35.86, "elapsed_time": "3:27:30", "remaining_time": "6:11:05"} +{"current_steps": 327, "total_steps": 909, "loss": 0.5614978075027466, "lr": 3.239243243657318e-05, "epoch": 1.0792079207920793, "percentage": 35.97, "elapsed_time": "3:28:04", "remaining_time": "6:10:20"} +{"current_steps": 328, "total_steps": 909, "loss": 0.4828110635280609, "lr": 3.233205174923472e-05, "epoch": 1.0825082508250825, "percentage": 36.08, "elapsed_time": "3:28:46", "remaining_time": "6:09:48"} +{"current_steps": 296, "total_steps": 1302, "loss": 0.42677876353263855, "lr": 3.809517006328305e-05, "epoch": 0.6828143021914648, "percentage": 22.73, "elapsed_time": "17:08:24", "remaining_time": "2 days, 10:15:12"} +{"current_steps": 329, "total_steps": 909, "loss": 0.5437847971916199, "lr": 3.22714891638884e-05, "epoch": 1.0858085808580857, "percentage": 36.19, "elapsed_time": "3:29:32", "remaining_time": "6:09:25"} +{"current_steps": 330, "total_steps": 909, "loss": 0.6240063309669495, "lr": 3.221074557383355e-05, "epoch": 1.0891089108910892, "percentage": 36.3, "elapsed_time": "3:30:06", "remaining_time": "6:08:37"} +{"current_steps": 331, "total_steps": 909, "loss": 0.5435442328453064, "lr": 3.2149821875039325e-05, "epoch": 1.0924092409240924, "percentage": 36.41, "elapsed_time": "3:30:45", "remaining_time": "6:08:01"} +{"current_steps": 332, "total_steps": 909, "loss": 0.5240401029586792, "lr": 3.20887189661315e-05, "epoch": 1.0957095709570956, "percentage": 36.52, "elapsed_time": "3:31:35", "remaining_time": "6:07:44"} +{"current_steps": 333, "total_steps": 909, "loss": 0.5227692127227783, "lr": 3.202743774837919e-05, "epoch": 1.099009900990099, "percentage": 36.63, "elapsed_time": "3:32:11", "remaining_time": "6:07:02"} +{"current_steps": 297, "total_steps": 1302, "loss": 0.373681902885437, "lr": 3.807225129547603e-05, "epoch": 0.6851211072664359, "percentage": 22.81, "elapsed_time": "17:12:04", "remaining_time": "2 days, 10:12:23"} +{"current_steps": 334, "total_steps": 909, "loss": 0.5607417821884155, "lr": 3.196597912568157e-05, "epoch": 1.1023102310231023, "percentage": 36.74, "elapsed_time": "3:32:48", "remaining_time": "6:06:22"} +{"current_steps": 335, "total_steps": 909, "loss": 0.5607600808143616, "lr": 3.1904344004554536e-05, "epoch": 1.1056105610561056, "percentage": 36.85, "elapsed_time": "3:33:27", "remaining_time": "6:05:45"} +{"current_steps": 336, "total_steps": 909, "loss": 0.47135430574417114, "lr": 3.184253329411737e-05, "epoch": 1.108910891089109, "percentage": 36.96, "elapsed_time": "3:34:18", "remaining_time": "6:05:27"} +{"current_steps": 337, "total_steps": 909, "loss": 0.5708764791488647, "lr": 3.178054790607924e-05, "epoch": 1.1122112211221122, "percentage": 37.07, "elapsed_time": "3:35:05", "remaining_time": "6:05:04"} +{"current_steps": 338, "total_steps": 909, "loss": 0.5522497296333313, "lr": 3.1718388754725883e-05, "epoch": 1.1155115511551155, "percentage": 37.18, "elapsed_time": "3:35:43", "remaining_time": "6:04:26"} +{"current_steps": 339, "total_steps": 909, "loss": 0.5556532144546509, "lr": 3.1656056756906e-05, "epoch": 1.118811881188119, "percentage": 37.29, "elapsed_time": "3:36:18", "remaining_time": "6:03:41"} +{"current_steps": 298, "total_steps": 1302, "loss": 0.39785879850387573, "lr": 3.804920245143592e-05, "epoch": 0.6874279123414071, "percentage": 22.89, "elapsed_time": "17:15:46", "remaining_time": "2 days, 10:09:38"} +{"current_steps": 340, "total_steps": 909, "loss": 0.5727676153182983, "lr": 3.1593552832017795e-05, "epoch": 1.1221122112211221, "percentage": 37.4, "elapsed_time": "3:37:02", "remaining_time": "6:03:13"} +{"current_steps": 341, "total_steps": 909, "loss": 0.5131651759147644, "lr": 3.153087790199541e-05, "epoch": 1.1254125412541254, "percentage": 37.51, "elapsed_time": "3:37:49", "remaining_time": "6:02:50"} +{"current_steps": 342, "total_steps": 909, "loss": 0.5143063068389893, "lr": 3.146803289129528e-05, "epoch": 1.1287128712871288, "percentage": 37.62, "elapsed_time": "3:38:23", "remaining_time": "6:02:03"} +{"current_steps": 343, "total_steps": 909, "loss": 0.509161114692688, "lr": 3.1405018726882595e-05, "epoch": 1.132013201320132, "percentage": 37.73, "elapsed_time": "3:39:04", "remaining_time": "6:01:31"} +{"current_steps": 344, "total_steps": 909, "loss": 0.5213526487350464, "lr": 3.13418363382175e-05, "epoch": 1.1353135313531353, "percentage": 37.84, "elapsed_time": "3:39:40", "remaining_time": "6:00:47"} +{"current_steps": 299, "total_steps": 1302, "loss": 0.4215731620788574, "lr": 3.802602369705831e-05, "epoch": 0.6897347174163783, "percentage": 22.96, "elapsed_time": "17:19:22", "remaining_time": "2 days, 10:06:36"} +{"current_steps": 345, "total_steps": 909, "loss": 0.5465434789657593, "lr": 3.127848665724149e-05, "epoch": 1.1386138613861387, "percentage": 37.95, "elapsed_time": "3:40:28", "remaining_time": "6:00:25"} +{"current_steps": 346, "total_steps": 909, "loss": 0.5342190265655518, "lr": 3.1214970618363626e-05, "epoch": 1.141914191419142, "percentage": 38.06, "elapsed_time": "3:41:15", "remaining_time": "6:00:01"} +{"current_steps": 347, "total_steps": 909, "loss": 0.541754424571991, "lr": 3.115128915844672e-05, "epoch": 1.1452145214521452, "percentage": 38.17, "elapsed_time": "3:42:03", "remaining_time": "5:59:37"} +{"current_steps": 348, "total_steps": 909, "loss": 0.5318331122398376, "lr": 3.10874432167936e-05, "epoch": 1.1485148514851484, "percentage": 38.28, "elapsed_time": "3:42:31", "remaining_time": "5:58:43"} +{"current_steps": 349, "total_steps": 909, "loss": 0.4972509741783142, "lr": 3.1023433735133134e-05, "epoch": 1.1518151815181519, "percentage": 38.39, "elapsed_time": "3:43:17", "remaining_time": "5:58:17"} +{"current_steps": 300, "total_steps": 1302, "loss": 0.40535813570022583, "lr": 3.800271519917384e-05, "epoch": 0.6920415224913494, "percentage": 23.04, "elapsed_time": "17:22:50", "remaining_time": "2 days, 10:03:06"} +{"current_steps": 350, "total_steps": 909, "loss": 0.5417294502258301, "lr": 3.095926165760647e-05, "epoch": 1.155115511551155, "percentage": 38.5, "elapsed_time": "3:43:53", "remaining_time": "5:57:35"} +{"current_steps": 351, "total_steps": 909, "loss": 0.554945707321167, "lr": 3.089492793075302e-05, "epoch": 1.1584158415841583, "percentage": 38.61, "elapsed_time": "3:44:37", "remaining_time": "5:57:05"} +{"current_steps": 352, "total_steps": 909, "loss": 0.5204564929008484, "lr": 3.083043350349653e-05, "epoch": 1.1617161716171618, "percentage": 38.72, "elapsed_time": "3:45:19", "remaining_time": "5:56:32"} +{"current_steps": 353, "total_steps": 909, "loss": 0.4856947064399719, "lr": 3.076577932713108e-05, "epoch": 1.165016501650165, "percentage": 38.83, "elapsed_time": "3:45:59", "remaining_time": "5:55:57"} +{"current_steps": 354, "total_steps": 909, "loss": 0.5269368886947632, "lr": 3.0700966355307055e-05, "epoch": 1.1683168316831682, "percentage": 38.94, "elapsed_time": "3:46:30", "remaining_time": "5:55:07"} +{"current_steps": 301, "total_steps": 1302, "loss": 0.42303597927093506, "lr": 3.7979277125546954e-05, "epoch": 0.6943483275663207, "percentage": 23.12, "elapsed_time": "17:26:22", "remaining_time": "2 days, 9:59:46"} +{"current_steps": 355, "total_steps": 909, "loss": 0.5811939239501953, "lr": 3.063599554401708e-05, "epoch": 1.1716171617161717, "percentage": 39.05, "elapsed_time": "3:47:18", "remaining_time": "5:54:43"} +{"current_steps": 356, "total_steps": 909, "loss": 0.5636904239654541, "lr": 3.057086785158189e-05, "epoch": 1.174917491749175, "percentage": 39.16, "elapsed_time": "3:47:50", "remaining_time": "5:53:55"} +{"current_steps": 357, "total_steps": 909, "loss": 0.546089768409729, "lr": 3.050558423863626e-05, "epoch": 1.1782178217821782, "percentage": 39.27, "elapsed_time": "3:48:30", "remaining_time": "5:53:18"} +{"current_steps": 358, "total_steps": 909, "loss": 0.5239901542663574, "lr": 3.0440145668114774e-05, "epoch": 1.1815181518151816, "percentage": 39.38, "elapsed_time": "3:49:05", "remaining_time": "5:52:35"} +{"current_steps": 359, "total_steps": 909, "loss": 0.5833466053009033, "lr": 3.0374553105237637e-05, "epoch": 1.1848184818481848, "percentage": 39.49, "elapsed_time": "3:49:42", "remaining_time": "5:51:54"} +{"current_steps": 302, "total_steps": 1302, "loss": 0.4238996207714081, "lr": 3.795570964487476e-05, "epoch": 0.6966551326412919, "percentage": 23.2, "elapsed_time": "17:30:00", "remaining_time": "2 days, 9:56:49"} +{"current_steps": 360, "total_steps": 909, "loss": 0.5060774087905884, "lr": 3.0308807517496456e-05, "epoch": 1.188118811881188, "percentage": 39.6, "elapsed_time": "3:50:35", "remaining_time": "5:51:39"} +{"current_steps": 361, "total_steps": 909, "loss": 0.5164307355880737, "lr": 3.0242909874639953e-05, "epoch": 1.1914191419141915, "percentage": 39.71, "elapsed_time": "3:51:09", "remaining_time": "5:50:54"} +{"current_steps": 362, "total_steps": 909, "loss": 0.49949395656585693, "lr": 3.0176861148659672e-05, "epoch": 1.1947194719471947, "percentage": 39.82, "elapsed_time": "3:51:49", "remaining_time": "5:50:17"} +{"current_steps": 363, "total_steps": 909, "loss": 0.5581181049346924, "lr": 3.0110662313775623e-05, "epoch": 1.198019801980198, "percentage": 39.93, "elapsed_time": "3:52:40", "remaining_time": "5:49:58"} +{"current_steps": 364, "total_steps": 909, "loss": 0.5657376646995544, "lr": 3.0044314346421938e-05, "epoch": 1.2013201320132012, "percentage": 40.04, "elapsed_time": "3:53:16", "remaining_time": "5:49:16"} +{"current_steps": 365, "total_steps": 909, "loss": 0.5269935131072998, "lr": 2.9977818225232443e-05, "epoch": 1.2046204620462047, "percentage": 40.15, "elapsed_time": "3:53:56", "remaining_time": "5:48:39"} +{"current_steps": 303, "total_steps": 1302, "loss": 0.41816985607147217, "lr": 3.793201292678578e-05, "epoch": 0.698961937716263, "percentage": 23.27, "elapsed_time": "17:33:33", "remaining_time": "2 days, 9:53:37"} +{"current_steps": 366, "total_steps": 909, "loss": 0.5385931730270386, "lr": 2.991117493102626e-05, "epoch": 1.2079207920792079, "percentage": 40.26, "elapsed_time": "3:54:34", "remaining_time": "5:48:00"} +{"current_steps": 367, "total_steps": 909, "loss": 0.5615143179893494, "lr": 2.984438544679329e-05, "epoch": 1.2112211221122111, "percentage": 40.37, "elapsed_time": "3:55:11", "remaining_time": "5:47:19"} +{"current_steps": 368, "total_steps": 909, "loss": 0.5175333023071289, "lr": 2.9777450757679754e-05, "epoch": 1.2145214521452146, "percentage": 40.48, "elapsed_time": "3:55:38", "remaining_time": "5:46:25"} +{"current_steps": 369, "total_steps": 909, "loss": 0.565494179725647, "lr": 2.971037185097364e-05, "epoch": 1.2178217821782178, "percentage": 40.59, "elapsed_time": "3:56:25", "remaining_time": "5:45:59"} +{"current_steps": 370, "total_steps": 909, "loss": 0.5519120693206787, "lr": 2.9643149716090146e-05, "epoch": 1.221122112211221, "percentage": 40.7, "elapsed_time": "3:56:54", "remaining_time": "5:45:07"} +{"current_steps": 304, "total_steps": 1302, "loss": 0.44623321294784546, "lr": 3.790818714183871e-05, "epoch": 0.7012687427912342, "percentage": 23.35, "elapsed_time": "17:36:43", "remaining_time": "2 days, 9:49:05"} +{"current_steps": 371, "total_steps": 909, "loss": 0.49374374747276306, "lr": 2.9575785344557114e-05, "epoch": 1.2244224422442245, "percentage": 40.81, "elapsed_time": "3:57:47", "remaining_time": "5:44:50"} +{"current_steps": 372, "total_steps": 909, "loss": 0.5608875751495361, "lr": 2.950827973000034e-05, "epoch": 1.2277227722772277, "percentage": 40.92, "elapsed_time": "3:58:14", "remaining_time": "5:43:54"} +{"current_steps": 373, "total_steps": 909, "loss": 0.5866271257400513, "lr": 2.944063386812899e-05, "epoch": 1.231023102310231, "percentage": 41.03, "elapsed_time": "3:58:47", "remaining_time": "5:43:08"} +{"current_steps": 374, "total_steps": 909, "loss": 0.5342913269996643, "lr": 2.9372848756720867e-05, "epoch": 1.2343234323432344, "percentage": 41.14, "elapsed_time": "3:59:33", "remaining_time": "5:42:40"} +{"current_steps": 375, "total_steps": 909, "loss": 0.5539537668228149, "lr": 2.9304925395607696e-05, "epoch": 1.2376237623762376, "percentage": 41.25, "elapsed_time": "4:00:08", "remaining_time": "5:41:57"} +{"current_steps": 376, "total_steps": 909, "loss": 0.5614147186279297, "lr": 2.9236864786660423e-05, "epoch": 1.2409240924092408, "percentage": 41.36, "elapsed_time": "4:00:38", "remaining_time": "5:41:07"} +{"current_steps": 305, "total_steps": 1302, "loss": 0.44612938165664673, "lr": 3.7884232461521236e-05, "epoch": 0.7035755478662054, "percentage": 23.43, "elapsed_time": "17:40:13", "remaining_time": "2 days, 9:45:43"} +{"current_steps": 377, "total_steps": 909, "loss": 0.46689367294311523, "lr": 2.9168667933774356e-05, "epoch": 1.2442244224422443, "percentage": 41.47, "elapsed_time": "4:01:14", "remaining_time": "5:40:25"} +{"current_steps": 378, "total_steps": 909, "loss": 0.5383083820343018, "lr": 2.910033584285444e-05, "epoch": 1.2475247524752475, "percentage": 41.58, "elapsed_time": "4:01:46", "remaining_time": "5:39:37"} +{"current_steps": 379, "total_steps": 909, "loss": 0.5349752902984619, "lr": 2.903186952180037e-05, "epoch": 1.2508250825082508, "percentage": 41.69, "elapsed_time": "4:02:20", "remaining_time": "5:38:53"} +{"current_steps": 380, "total_steps": 909, "loss": 0.5792303681373596, "lr": 2.8963269980491743e-05, "epoch": 1.2541254125412542, "percentage": 41.8, "elapsed_time": "4:02:46", "remaining_time": "5:37:58"} +{"current_steps": 381, "total_steps": 909, "loss": 0.524924099445343, "lr": 2.8894538230773147e-05, "epoch": 1.2574257425742574, "percentage": 41.91, "elapsed_time": "4:03:20", "remaining_time": "5:37:13"} +{"current_steps": 382, "total_steps": 909, "loss": 0.5137406587600708, "lr": 2.882567528643925e-05, "epoch": 1.2607260726072607, "percentage": 42.02, "elapsed_time": "4:04:00", "remaining_time": "5:36:38"} +{"current_steps": 306, "total_steps": 1302, "loss": 0.4025178551673889, "lr": 3.786014905824875e-05, "epoch": 0.7058823529411765, "percentage": 23.5, "elapsed_time": "17:43:47", "remaining_time": "2 days, 9:42:32"} +{"current_steps": 383, "total_steps": 909, "loss": 0.5196574926376343, "lr": 2.8756682163219857e-05, "epoch": 1.2640264026402641, "percentage": 42.13, "elapsed_time": "4:04:41", "remaining_time": "5:36:02"} +{"current_steps": 384, "total_steps": 909, "loss": 0.585644006729126, "lr": 2.8687559878764903e-05, "epoch": 1.2673267326732673, "percentage": 42.24, "elapsed_time": "4:05:30", "remaining_time": "5:35:39"} +{"current_steps": 385, "total_steps": 909, "loss": 0.5973786115646362, "lr": 2.8618309452629445e-05, "epoch": 1.2706270627062706, "percentage": 42.35, "elapsed_time": "4:06:06", "remaining_time": "5:34:57"} +{"current_steps": 386, "total_steps": 909, "loss": 0.5909825563430786, "lr": 2.854893190625865e-05, "epoch": 1.273927392739274, "percentage": 42.46, "elapsed_time": "4:06:40", "remaining_time": "5:34:13"} +{"current_steps": 387, "total_steps": 909, "loss": 0.5903690457344055, "lr": 2.84794282629727e-05, "epoch": 1.2772277227722773, "percentage": 42.57, "elapsed_time": "4:07:12", "remaining_time": "5:33:26"} +{"current_steps": 388, "total_steps": 909, "loss": 0.5316457152366638, "lr": 2.840979954795171e-05, "epoch": 1.2805280528052805, "percentage": 42.68, "elapsed_time": "4:07:47", "remaining_time": "5:32:43"} +{"current_steps": 307, "total_steps": 1302, "loss": 0.42514878511428833, "lr": 3.783593710536316e-05, "epoch": 0.7081891580161477, "percentage": 23.58, "elapsed_time": "17:47:16", "remaining_time": "2 days, 9:39:06"} +{"current_steps": 389, "total_steps": 909, "loss": 0.5080389976501465, "lr": 2.8340046788220613e-05, "epoch": 1.283828382838284, "percentage": 42.79, "elapsed_time": "4:08:26", "remaining_time": "5:32:05"} +{"current_steps": 390, "total_steps": 909, "loss": 0.6137889623641968, "lr": 2.8270171012633994e-05, "epoch": 1.2871287128712872, "percentage": 42.9, "elapsed_time": "4:09:05", "remaining_time": "5:31:29"} +{"current_steps": 391, "total_steps": 909, "loss": 0.5433805584907532, "lr": 2.8200173251860928e-05, "epoch": 1.2904290429042904, "percentage": 43.01, "elapsed_time": "4:09:38", "remaining_time": "5:30:42"} +{"current_steps": 392, "total_steps": 909, "loss": 0.4965590834617615, "lr": 2.8130054538369775e-05, "epoch": 1.2937293729372938, "percentage": 43.12, "elapsed_time": "4:10:18", "remaining_time": "5:30:07"} +{"current_steps": 393, "total_steps": 909, "loss": 0.5361340045928955, "lr": 2.805981590641295e-05, "epoch": 1.297029702970297, "percentage": 43.23, "elapsed_time": "4:11:02", "remaining_time": "5:29:36"} +{"current_steps": 308, "total_steps": 1302, "loss": 0.42905813455581665, "lr": 3.781159677713162e-05, "epoch": 0.7104959630911188, "percentage": 23.66, "elapsed_time": "17:50:51", "remaining_time": "2 days, 9:35:58"} +{"current_steps": 394, "total_steps": 909, "loss": 0.47011327743530273, "lr": 2.7989458392011678e-05, "epoch": 1.3003300330033003, "percentage": 43.34, "elapsed_time": "4:11:34", "remaining_time": "5:28:50"} +{"current_steps": 395, "total_steps": 909, "loss": 0.5893687605857849, "lr": 2.7918983032940666e-05, "epoch": 1.3036303630363038, "percentage": 43.45, "elapsed_time": "4:12:06", "remaining_time": "5:28:03"} +{"current_steps": 396, "total_steps": 909, "loss": 0.5219327211380005, "lr": 2.7848390868712886e-05, "epoch": 1.306930693069307, "percentage": 43.56, "elapsed_time": "4:12:47", "remaining_time": "5:27:29"} +{"current_steps": 397, "total_steps": 909, "loss": 0.5652155876159668, "lr": 2.7777682940564142e-05, "epoch": 1.3102310231023102, "percentage": 43.67, "elapsed_time": "4:13:26", "remaining_time": "5:26:51"} +{"current_steps": 398, "total_steps": 909, "loss": 0.5361950397491455, "lr": 2.7706860291437784e-05, "epoch": 1.3135313531353137, "percentage": 43.78, "elapsed_time": "4:14:00", "remaining_time": "5:26:07"} +{"current_steps": 399, "total_steps": 909, "loss": 0.5355206727981567, "lr": 2.763592396596929e-05, "epoch": 1.316831683168317, "percentage": 43.89, "elapsed_time": "4:14:37", "remaining_time": "5:25:27"} +{"current_steps": 309, "total_steps": 1302, "loss": 0.4227738082408905, "lr": 3.7787128248745244e-05, "epoch": 0.71280276816609, "percentage": 23.73, "elapsed_time": "17:54:16", "remaining_time": "2 days, 9:32:18"} +{"current_steps": 400, "total_steps": 909, "loss": 0.5082858800888062, "lr": 2.756487501047086e-05, "epoch": 1.3201320132013201, "percentage": 44.0, "elapsed_time": "4:15:21", "remaining_time": "5:24:56"} +{"current_steps": 401, "total_steps": 909, "loss": 0.5282934904098511, "lr": 2.7493714472916013e-05, "epoch": 1.3234323432343233, "percentage": 44.11, "elapsed_time": "4:15:47", "remaining_time": "5:24:02"} +{"current_steps": 402, "total_steps": 909, "loss": 0.5502887964248657, "lr": 2.7422443402924074e-05, "epoch": 1.3267326732673268, "percentage": 44.22, "elapsed_time": "4:16:19", "remaining_time": "5:23:16"} +{"current_steps": 403, "total_steps": 909, "loss": 0.5374204516410828, "lr": 2.7351062851744747e-05, "epoch": 1.33003300330033, "percentage": 44.33, "elapsed_time": "4:17:01", "remaining_time": "5:22:43"} +{"current_steps": 404, "total_steps": 909, "loss": 0.5602293014526367, "lr": 2.7279573872242574e-05, "epoch": 1.3333333333333333, "percentage": 44.44, "elapsed_time": "4:17:40", "remaining_time": "5:22:05"} +{"current_steps": 405, "total_steps": 909, "loss": 0.5321286916732788, "lr": 2.7207977518881418e-05, "epoch": 1.3366336633663367, "percentage": 44.55, "elapsed_time": "4:18:18", "remaining_time": "5:21:26"} +{"current_steps": 310, "total_steps": 1302, "loss": 0.4216471314430237, "lr": 3.776253169631791e-05, "epoch": 0.7151095732410612, "percentage": 23.81, "elapsed_time": "17:57:48", "remaining_time": "2 days, 9:29:00"} +{"current_steps": 406, "total_steps": 909, "loss": 0.5523560047149658, "lr": 2.713627484770892e-05, "epoch": 1.33993399339934, "percentage": 44.66, "elapsed_time": "4:18:50", "remaining_time": "5:20:41"} +{"current_steps": 407, "total_steps": 909, "loss": 0.47019705176353455, "lr": 2.706446691634089e-05, "epoch": 1.3432343234323432, "percentage": 44.77, "elapsed_time": "4:19:24", "remaining_time": "5:19:56"} +{"current_steps": 408, "total_steps": 909, "loss": 0.540359616279602, "lr": 2.6992554783945748e-05, "epoch": 1.3465346534653464, "percentage": 44.88, "elapsed_time": "4:19:59", "remaining_time": "5:19:15"} +{"current_steps": 409, "total_steps": 909, "loss": 0.561464786529541, "lr": 2.6920539511228874e-05, "epoch": 1.3498349834983498, "percentage": 44.99, "elapsed_time": "4:20:34", "remaining_time": "5:18:32"} +{"current_steps": 410, "total_steps": 909, "loss": 0.5429259538650513, "lr": 2.6848422160416956e-05, "epoch": 1.353135313531353, "percentage": 45.1, "elapsed_time": "4:21:04", "remaining_time": "5:17:44"} +{"current_steps": 411, "total_steps": 909, "loss": 0.5452640652656555, "lr": 2.677620379524237e-05, "epoch": 1.3564356435643563, "percentage": 45.21, "elapsed_time": "4:21:38", "remaining_time": "5:17:01"} +{"current_steps": 311, "total_steps": 1302, "loss": 0.4122268557548523, "lr": 3.773780729688495e-05, "epoch": 0.7174163783160323, "percentage": 23.89, "elapsed_time": "18:01:16", "remaining_time": "2 days, 9:25:28"} +{"current_steps": 412, "total_steps": 909, "loss": 0.49627864360809326, "lr": 2.670388548092741e-05, "epoch": 1.3597359735973598, "percentage": 45.32, "elapsed_time": "4:22:15", "remaining_time": "5:16:21"} +{"current_steps": 413, "total_steps": 909, "loss": 0.5331633687019348, "lr": 2.663146828416867e-05, "epoch": 1.363036303630363, "percentage": 45.43, "elapsed_time": "4:22:46", "remaining_time": "5:15:35"} +{"current_steps": 414, "total_steps": 909, "loss": 0.5447151064872742, "lr": 2.6558953273121216e-05, "epoch": 1.3663366336633662, "percentage": 45.54, "elapsed_time": "4:23:14", "remaining_time": "5:14:44"} +{"current_steps": 415, "total_steps": 909, "loss": 0.5467007160186768, "lr": 2.648634151738292e-05, "epoch": 1.3696369636963697, "percentage": 45.65, "elapsed_time": "4:23:54", "remaining_time": "5:14:08"} +{"current_steps": 416, "total_steps": 909, "loss": 0.5804279446601868, "lr": 2.6413634087978602e-05, "epoch": 1.372937293729373, "percentage": 45.76, "elapsed_time": "4:24:28", "remaining_time": "5:13:25"} +{"current_steps": 417, "total_steps": 909, "loss": 0.5323517322540283, "lr": 2.63408320573443e-05, "epoch": 1.3762376237623761, "percentage": 45.87, "elapsed_time": "4:24:57", "remaining_time": "5:12:36"} +{"current_steps": 312, "total_steps": 1302, "loss": 0.4421166479587555, "lr": 3.771295522840186e-05, "epoch": 0.7197231833910035, "percentage": 23.96, "elapsed_time": "18:04:45", "remaining_time": "2 days, 9:22:01"} +{"current_steps": 418, "total_steps": 909, "loss": 0.5452409982681274, "lr": 2.6267936499311402e-05, "epoch": 1.3795379537953796, "percentage": 45.98, "elapsed_time": "4:25:28", "remaining_time": "5:11:50"} +{"current_steps": 419, "total_steps": 909, "loss": 0.4622665047645569, "lr": 2.619494848909084e-05, "epoch": 1.3828382838283828, "percentage": 46.09, "elapsed_time": "4:26:31", "remaining_time": "5:11:41"} +{"current_steps": 420, "total_steps": 909, "loss": 0.531772255897522, "lr": 2.6121869103257206e-05, "epoch": 1.386138613861386, "percentage": 46.2, "elapsed_time": "4:27:16", "remaining_time": "5:11:11"} +{"current_steps": 421, "total_steps": 909, "loss": 0.519554853439331, "lr": 2.6048699419732897e-05, "epoch": 1.3894389438943895, "percentage": 46.31, "elapsed_time": "4:27:43", "remaining_time": "5:10:19"} +{"current_steps": 422, "total_steps": 909, "loss": 0.545585572719574, "lr": 2.5975440517772187e-05, "epoch": 1.3927392739273927, "percentage": 46.42, "elapsed_time": "4:28:24", "remaining_time": "5:09:44"} +{"current_steps": 313, "total_steps": 1302, "loss": 0.4431558847427368, "lr": 3.7687975669743063e-05, "epoch": 0.7220299884659747, "percentage": 24.04, "elapsed_time": "18:08:03", "remaining_time": "2 days, 9:17:59"} +{"current_steps": 423, "total_steps": 909, "loss": 0.5641547441482544, "lr": 2.5902093477945345e-05, "epoch": 1.396039603960396, "percentage": 46.53, "elapsed_time": "4:28:56", "remaining_time": "5:08:59"} +{"current_steps": 424, "total_steps": 909, "loss": 0.5578028559684753, "lr": 2.5828659382122655e-05, "epoch": 1.3993399339933994, "percentage": 46.64, "elapsed_time": "4:29:32", "remaining_time": "5:08:19"} +{"current_steps": 425, "total_steps": 909, "loss": 0.5931404232978821, "lr": 2.5755139313458484e-05, "epoch": 1.4026402640264026, "percentage": 46.75, "elapsed_time": "4:30:04", "remaining_time": "5:07:34"} +{"current_steps": 426, "total_steps": 909, "loss": 0.5486891865730286, "lr": 2.5681534356375314e-05, "epoch": 1.4059405940594059, "percentage": 46.86, "elapsed_time": "4:30:36", "remaining_time": "5:06:49"} +{"current_steps": 427, "total_steps": 909, "loss": 0.5007671117782593, "lr": 2.5607845596547706e-05, "epoch": 1.4092409240924093, "percentage": 46.97, "elapsed_time": "4:31:12", "remaining_time": "5:06:08"} +{"current_steps": 428, "total_steps": 909, "loss": 0.5044519901275635, "lr": 2.5534074120886346e-05, "epoch": 1.4125412541254125, "percentage": 47.08, "elapsed_time": "4:31:45", "remaining_time": "5:05:24"} +{"current_steps": 314, "total_steps": 1302, "loss": 0.4280857443809509, "lr": 3.7662868800700605e-05, "epoch": 0.7243367935409458, "percentage": 24.12, "elapsed_time": "18:11:17", "remaining_time": "2 days, 9:13:43"} +{"current_steps": 429, "total_steps": 909, "loss": 0.5227789878845215, "lr": 2.5460221017521952e-05, "epoch": 1.4158415841584158, "percentage": 47.19, "elapsed_time": "4:32:24", "remaining_time": "5:04:47"} +{"current_steps": 430, "total_steps": 909, "loss": 0.5530189871788025, "lr": 2.538628737578926e-05, "epoch": 1.4191419141914192, "percentage": 47.3, "elapsed_time": "4:32:55", "remaining_time": "5:04:01"} +{"current_steps": 431, "total_steps": 909, "loss": 0.508142352104187, "lr": 2.5312274286210966e-05, "epoch": 1.4224422442244224, "percentage": 47.41, "elapsed_time": "4:33:30", "remaining_time": "5:03:20"} +{"current_steps": 432, "total_steps": 909, "loss": 0.5497263669967651, "lr": 2.523818284048159e-05, "epoch": 1.4257425742574257, "percentage": 47.52, "elapsed_time": "4:34:01", "remaining_time": "5:02:34"} +{"current_steps": 433, "total_steps": 909, "loss": 0.5477034449577332, "lr": 2.5164014131451443e-05, "epoch": 1.4290429042904291, "percentage": 47.63, "elapsed_time": "4:34:41", "remaining_time": "5:01:58"} +{"current_steps": 315, "total_steps": 1302, "loss": 0.3967229127883911, "lr": 3.763763480198284e-05, "epoch": 0.726643598615917, "percentage": 24.19, "elapsed_time": "18:14:32", "remaining_time": "2 days, 9:09:34"} +{"current_steps": 434, "total_steps": 909, "loss": 0.5091728568077087, "lr": 2.508976925311045e-05, "epoch": 1.4323432343234324, "percentage": 47.74, "elapsed_time": "4:35:23", "remaining_time": "5:01:24"} +{"current_steps": 435, "total_steps": 909, "loss": 0.5022713541984558, "lr": 2.501544930057203e-05, "epoch": 1.4356435643564356, "percentage": 47.85, "elapsed_time": "4:35:56", "remaining_time": "5:00:40"} +{"current_steps": 436, "total_steps": 909, "loss": 0.5401599407196045, "lr": 2.494105537005697e-05, "epoch": 1.438943894389439, "percentage": 47.96, "elapsed_time": "4:36:30", "remaining_time": "4:59:58"} +{"current_steps": 437, "total_steps": 909, "loss": 0.5632063150405884, "lr": 2.4866588558877208e-05, "epoch": 1.4422442244224423, "percentage": 48.07, "elapsed_time": "4:37:02", "remaining_time": "4:59:14"} +{"current_steps": 438, "total_steps": 909, "loss": 0.552355170249939, "lr": 2.479204996541969e-05, "epoch": 1.4455445544554455, "percentage": 48.18, "elapsed_time": "4:37:37", "remaining_time": "4:58:32"} +{"current_steps": 439, "total_steps": 909, "loss": 0.5604996681213379, "lr": 2.4717440689130154e-05, "epoch": 1.448844884488449, "percentage": 48.29, "elapsed_time": "4:38:09", "remaining_time": "4:57:47"} +{"current_steps": 316, "total_steps": 1302, "loss": 0.41480526328086853, "lr": 3.761227385521316e-05, "epoch": 0.7289504036908881, "percentage": 24.27, "elapsed_time": "18:17:57", "remaining_time": "2 days, 9:05:55"} +{"current_steps": 440, "total_steps": 909, "loss": 0.4961245656013489, "lr": 2.4642761830496893e-05, "epoch": 1.4521452145214522, "percentage": 48.4, "elapsed_time": "4:39:00", "remaining_time": "4:57:24"} +{"current_steps": 441, "total_steps": 909, "loss": 0.5403590202331543, "lr": 2.4568014491034565e-05, "epoch": 1.4554455445544554, "percentage": 48.51, "elapsed_time": "4:39:45", "remaining_time": "4:56:53"} +{"current_steps": 442, "total_steps": 909, "loss": 0.4753378629684448, "lr": 2.4493199773267902e-05, "epoch": 1.4587458745874589, "percentage": 48.62, "elapsed_time": "4:40:35", "remaining_time": "4:56:27"} +{"current_steps": 443, "total_steps": 909, "loss": 0.5125438570976257, "lr": 2.4418318780715477e-05, "epoch": 1.462046204620462, "percentage": 48.73, "elapsed_time": "4:41:07", "remaining_time": "4:55:43"} +{"current_steps": 444, "total_steps": 909, "loss": 0.5670269727706909, "lr": 2.434337261787342e-05, "epoch": 1.4653465346534653, "percentage": 48.84, "elapsed_time": "4:41:43", "remaining_time": "4:55:03"} +{"current_steps": 317, "total_steps": 1302, "loss": 0.4176313281059265, "lr": 3.7586786142928684e-05, "epoch": 0.7312572087658593, "percentage": 24.35, "elapsed_time": "18:21:29", "remaining_time": "2 days, 9:02:37"} +{"current_steps": 445, "total_steps": 909, "loss": 0.5538198947906494, "lr": 2.426836239019911e-05, "epoch": 1.4686468646864688, "percentage": 48.95, "elapsed_time": "4:42:13", "remaining_time": "4:54:16"} +{"current_steps": 446, "total_steps": 909, "loss": 0.5012328028678894, "lr": 2.4193289204094893e-05, "epoch": 1.471947194719472, "percentage": 49.06, "elapsed_time": "4:42:48", "remaining_time": "4:53:35"} +{"current_steps": 447, "total_steps": 909, "loss": 0.538119912147522, "lr": 2.4118154166891762e-05, "epoch": 1.4752475247524752, "percentage": 49.17, "elapsed_time": "4:43:24", "remaining_time": "4:52:54"} +{"current_steps": 448, "total_steps": 909, "loss": 0.5252339839935303, "lr": 2.4042958386833003e-05, "epoch": 1.4785478547854787, "percentage": 49.28, "elapsed_time": "4:44:00", "remaining_time": "4:52:15"} +{"current_steps": 449, "total_steps": 909, "loss": 0.5367081761360168, "lr": 2.3967702973057853e-05, "epoch": 1.481848184818482, "percentage": 49.39, "elapsed_time": "4:44:37", "remaining_time": "4:51:36"} +{"current_steps": 450, "total_steps": 909, "loss": 0.5091884136199951, "lr": 2.3892389035585167e-05, "epoch": 1.4851485148514851, "percentage": 49.5, "elapsed_time": "4:45:15", "remaining_time": "4:50:57"} +{"current_steps": 318, "total_steps": 1302, "loss": 0.41284921765327454, "lr": 3.756117184857889e-05, "epoch": 0.7335640138408305, "percentage": 24.42, "elapsed_time": "18:24:54", "remaining_time": "2 days, 8:58:56"} +{"current_steps": 451, "total_steps": 909, "loss": 0.5079891681671143, "lr": 2.3817017685297016e-05, "epoch": 1.4884488448844886, "percentage": 49.61, "elapsed_time": "4:45:57", "remaining_time": "4:50:23"} +{"current_steps": 452, "total_steps": 909, "loss": 0.511939287185669, "lr": 2.3741590033922313e-05, "epoch": 1.4917491749174918, "percentage": 49.72, "elapsed_time": "4:46:47", "remaining_time": "4:49:57"} +{"current_steps": 453, "total_steps": 909, "loss": 0.5070478916168213, "lr": 2.3666107194020404e-05, "epoch": 1.495049504950495, "percentage": 49.83, "elapsed_time": "4:47:19", "remaining_time": "4:49:13"} +{"current_steps": 454, "total_steps": 909, "loss": 0.547492504119873, "lr": 2.3590570278964682e-05, "epoch": 1.4983498349834983, "percentage": 49.94, "elapsed_time": "4:47:55", "remaining_time": "4:48:33"} +{"current_steps": 455, "total_steps": 909, "loss": 0.5386558771133423, "lr": 2.3514980402926132e-05, "epoch": 1.5016501650165015, "percentage": 50.06, "elapsed_time": "4:48:27", "remaining_time": "4:47:49"} +{"current_steps": 319, "total_steps": 1302, "loss": 0.44936639070510864, "lr": 3.753543115652439e-05, "epoch": 0.7358708189158016, "percentage": 24.5, "elapsed_time": "18:28:10", "remaining_time": "2 days, 8:54:51"} +{"current_steps": 456, "total_steps": 909, "loss": 0.48668172955513, "lr": 2.3439338680856943e-05, "epoch": 1.504950495049505, "percentage": 50.17, "elapsed_time": "4:49:11", "remaining_time": "4:47:17"} +{"current_steps": 457, "total_steps": 909, "loss": 0.5497942566871643, "lr": 2.3363646228474002e-05, "epoch": 1.5082508250825084, "percentage": 50.28, "elapsed_time": "4:49:45", "remaining_time": "4:46:35"} +{"current_steps": 458, "total_steps": 909, "loss": 0.5267748832702637, "lr": 2.328790416224248e-05, "epoch": 1.5115511551155114, "percentage": 50.39, "elapsed_time": "4:50:28", "remaining_time": "4:46:01"} +{"current_steps": 459, "total_steps": 909, "loss": 0.5578982830047607, "lr": 2.3212113599359368e-05, "epoch": 1.5148514851485149, "percentage": 50.5, "elapsed_time": "4:50:59", "remaining_time": "4:45:17"} +{"current_steps": 460, "total_steps": 909, "loss": 0.5136545896530151, "lr": 2.3136275657736956e-05, "epoch": 1.5181518151815183, "percentage": 50.61, "elapsed_time": "4:51:44", "remaining_time": "4:44:45"} +{"current_steps": 320, "total_steps": 1302, "loss": 0.4260580539703369, "lr": 3.750956425203552e-05, "epoch": 0.7381776239907728, "percentage": 24.58, "elapsed_time": "18:31:42", "remaining_time": "2 days, 8:51:34"} +{"current_steps": 461, "total_steps": 909, "loss": 0.5718669891357422, "lr": 2.3060391455986403e-05, "epoch": 1.5214521452145213, "percentage": 50.72, "elapsed_time": "4:52:22", "remaining_time": "4:44:08"} +{"current_steps": 462, "total_steps": 909, "loss": 0.5427108407020569, "lr": 2.2984462113401184e-05, "epoch": 1.5247524752475248, "percentage": 50.83, "elapsed_time": "4:53:07", "remaining_time": "4:43:36"} +{"current_steps": 463, "total_steps": 909, "loss": 0.5293564200401306, "lr": 2.2908488749940596e-05, "epoch": 1.528052805280528, "percentage": 50.94, "elapsed_time": "4:53:39", "remaining_time": "4:42:52"} +{"current_steps": 464, "total_steps": 909, "loss": 0.550743579864502, "lr": 2.2832472486213275e-05, "epoch": 1.5313531353135312, "percentage": 51.05, "elapsed_time": "4:54:14", "remaining_time": "4:42:11"} +{"current_steps": 465, "total_steps": 909, "loss": 0.5957387685775757, "lr": 2.2756414443460602e-05, "epoch": 1.5346534653465347, "percentage": 51.16, "elapsed_time": "4:54:59", "remaining_time": "4:41:40"} +{"current_steps": 321, "total_steps": 1302, "loss": 0.42194247245788574, "lr": 3.7483571321291064e-05, "epoch": 0.740484429065744, "percentage": 24.65, "elapsed_time": "18:35:07", "remaining_time": "2 days, 8:47:54"} +{"current_steps": 466, "total_steps": 909, "loss": 0.4994407892227173, "lr": 2.2680315743540234e-05, "epoch": 1.537953795379538, "percentage": 51.27, "elapsed_time": "4:55:48", "remaining_time": "4:41:12"} +{"current_steps": 467, "total_steps": 909, "loss": 0.5120857954025269, "lr": 2.260417750890949e-05, "epoch": 1.5412541254125411, "percentage": 51.38, "elapsed_time": "4:56:36", "remaining_time": "4:40:44"} +{"current_steps": 468, "total_steps": 909, "loss": 0.5727359056472778, "lr": 2.2528000862608845e-05, "epoch": 1.5445544554455446, "percentage": 51.49, "elapsed_time": "4:57:20", "remaining_time": "4:40:11"} +{"current_steps": 469, "total_steps": 909, "loss": 0.584964394569397, "lr": 2.2451786928245344e-05, "epoch": 1.5478547854785478, "percentage": 51.6, "elapsed_time": "4:57:56", "remaining_time": "4:39:30"} +{"current_steps": 470, "total_steps": 909, "loss": 0.5507112741470337, "lr": 2.237553682997603e-05, "epoch": 1.551155115511551, "percentage": 51.71, "elapsed_time": "4:58:37", "remaining_time": "4:38:56"} +{"current_steps": 322, "total_steps": 1302, "loss": 0.42118650674819946, "lr": 3.745745255137685e-05, "epoch": 0.7427912341407151, "percentage": 24.73, "elapsed_time": "18:38:39", "remaining_time": "2 days, 8:44:35"} +{"current_steps": 471, "total_steps": 909, "loss": 0.49136701226234436, "lr": 2.2299251692491364e-05, "epoch": 1.5544554455445545, "percentage": 51.82, "elapsed_time": "4:59:39", "remaining_time": "4:38:39"} +{"current_steps": 472, "total_steps": 909, "loss": 0.5374805927276611, "lr": 2.2222932640998635e-05, "epoch": 1.5577557755775577, "percentage": 51.93, "elapsed_time": "5:00:22", "remaining_time": "4:38:06"} +{"current_steps": 473, "total_steps": 909, "loss": 0.523996114730835, "lr": 2.2146580801205362e-05, "epoch": 1.561056105610561, "percentage": 52.04, "elapsed_time": "5:01:00", "remaining_time": "4:37:28"} +{"current_steps": 474, "total_steps": 909, "loss": 0.48198428750038147, "lr": 2.207019729930271e-05, "epoch": 1.5643564356435644, "percentage": 52.15, "elapsed_time": "5:01:47", "remaining_time": "4:36:57"} +{"current_steps": 475, "total_steps": 909, "loss": 0.5148699879646301, "lr": 2.199378326194883e-05, "epoch": 1.5676567656765676, "percentage": 52.26, "elapsed_time": "5:02:19", "remaining_time": "4:36:14"} +{"current_steps": 323, "total_steps": 1302, "loss": 0.42712563276290894, "lr": 3.7431208130284486e-05, "epoch": 0.7450980392156863, "percentage": 24.81, "elapsed_time": "18:42:04", "remaining_time": "2 days, 8:40:58"} +{"current_steps": 476, "total_steps": 909, "loss": 0.5297671556472778, "lr": 2.1917339816252303e-05, "epoch": 1.5709570957095709, "percentage": 52.37, "elapsed_time": "5:03:02", "remaining_time": "4:35:40"} +{"current_steps": 477, "total_steps": 909, "loss": 0.5082278847694397, "lr": 2.1840868089755465e-05, "epoch": 1.5742574257425743, "percentage": 52.48, "elapsed_time": "5:03:44", "remaining_time": "4:35:05"} +{"current_steps": 478, "total_steps": 909, "loss": 0.4755392372608185, "lr": 2.176436921041779e-05, "epoch": 1.5775577557755776, "percentage": 52.59, "elapsed_time": "5:04:19", "remaining_time": "4:34:24"} +{"current_steps": 479, "total_steps": 909, "loss": 0.5249454975128174, "lr": 2.1687844306599275e-05, "epoch": 1.5808580858085808, "percentage": 52.7, "elapsed_time": "5:05:01", "remaining_time": "4:33:48"} +{"current_steps": 480, "total_steps": 909, "loss": 0.5626166462898254, "lr": 2.161129450704376e-05, "epoch": 1.5841584158415842, "percentage": 52.81, "elapsed_time": "5:05:42", "remaining_time": "4:33:13"} +{"current_steps": 324, "total_steps": 1302, "loss": 0.4078589677810669, "lr": 3.740483824690993e-05, "epoch": 0.7474048442906575, "percentage": 24.88, "elapsed_time": "18:45:35", "remaining_time": "2 days, 8:37:36"} +{"current_steps": 481, "total_steps": 909, "loss": 0.5590533018112183, "lr": 2.1534720940862318e-05, "epoch": 1.5874587458745875, "percentage": 52.92, "elapsed_time": "5:06:20", "remaining_time": "4:32:35"} +{"current_steps": 482, "total_steps": 909, "loss": 0.5146170854568481, "lr": 2.1458124737516557e-05, "epoch": 1.5907590759075907, "percentage": 53.03, "elapsed_time": "5:07:08", "remaining_time": "4:32:05"} +{"current_steps": 483, "total_steps": 909, "loss": 0.5633066296577454, "lr": 2.1381507026802007e-05, "epoch": 1.5940594059405941, "percentage": 53.14, "elapsed_time": "5:07:35", "remaining_time": "4:31:17"} +{"current_steps": 484, "total_steps": 909, "loss": 0.5273865461349487, "lr": 2.130486893883141e-05, "epoch": 1.5973597359735974, "percentage": 53.25, "elapsed_time": "5:08:23", "remaining_time": "4:30:48"} +{"current_steps": 485, "total_steps": 909, "loss": 0.5040723085403442, "lr": 2.1228211604018088e-05, "epoch": 1.6006600660066006, "percentage": 53.36, "elapsed_time": "5:08:59", "remaining_time": "4:30:08"} +{"current_steps": 486, "total_steps": 909, "loss": 0.5254411697387695, "lr": 2.1151536153059254e-05, "epoch": 1.603960396039604, "percentage": 53.47, "elapsed_time": "5:09:32", "remaining_time": "4:29:25"} +{"current_steps": 325, "total_steps": 1302, "loss": 0.41842761635780334, "lr": 3.737834309105218e-05, "epoch": 0.7497116493656286, "percentage": 24.96, "elapsed_time": "18:49:06", "remaining_time": "2 days, 8:34:17"} +{"current_steps": 487, "total_steps": 909, "loss": 0.5789728760719299, "lr": 2.1074843716919323e-05, "epoch": 1.6072607260726073, "percentage": 53.58, "elapsed_time": "5:10:13", "remaining_time": "4:28:49"} +{"current_steps": 488, "total_steps": 909, "loss": 0.5521235466003418, "lr": 2.0998135426813245e-05, "epoch": 1.6105610561056105, "percentage": 53.69, "elapsed_time": "5:10:40", "remaining_time": "4:28:01"} +{"current_steps": 489, "total_steps": 909, "loss": 0.4702959954738617, "lr": 2.092141241418984e-05, "epoch": 1.613861386138614, "percentage": 53.8, "elapsed_time": "5:11:14", "remaining_time": "4:27:19"} +{"current_steps": 490, "total_steps": 909, "loss": 0.4960707128047943, "lr": 2.0844675810715046e-05, "epoch": 1.6171617161716172, "percentage": 53.91, "elapsed_time": "5:11:53", "remaining_time": "4:26:42"} +{"current_steps": 491, "total_steps": 909, "loss": 0.5334826111793518, "lr": 2.076792674825529e-05, "epoch": 1.6204620462046204, "percentage": 54.02, "elapsed_time": "5:12:29", "remaining_time": "4:26:01"} +{"current_steps": 326, "total_steps": 1302, "loss": 0.41437628865242004, "lr": 3.735172285341188e-05, "epoch": 0.7520184544405998, "percentage": 25.04, "elapsed_time": "18:52:43", "remaining_time": "2 days, 8:31:13"} +{"current_steps": 492, "total_steps": 909, "loss": 0.5604894161224365, "lr": 2.0691166358860775e-05, "epoch": 1.6237623762376239, "percentage": 54.13, "elapsed_time": "5:13:25", "remaining_time": "4:25:38"} +{"current_steps": 493, "total_steps": 909, "loss": 0.5565654635429382, "lr": 2.061439577474875e-05, "epoch": 1.627062706270627, "percentage": 54.24, "elapsed_time": "5:14:02", "remaining_time": "4:24:59"} +{"current_steps": 494, "total_steps": 909, "loss": 0.541640043258667, "lr": 2.0537616128286875e-05, "epoch": 1.6303630363036303, "percentage": 54.35, "elapsed_time": "5:14:41", "remaining_time": "4:24:21"} +{"current_steps": 495, "total_steps": 909, "loss": 0.5247132182121277, "lr": 2.0460828551976436e-05, "epoch": 1.6336633663366338, "percentage": 54.46, "elapsed_time": "5:15:18", "remaining_time": "4:23:42"} +{"current_steps": 496, "total_steps": 909, "loss": 0.533937394618988, "lr": 2.0384034178435727e-05, "epoch": 1.636963696369637, "percentage": 54.57, "elapsed_time": "5:15:50", "remaining_time": "4:22:59"} +{"current_steps": 497, "total_steps": 909, "loss": 0.5857927799224854, "lr": 2.0307234140383264e-05, "epoch": 1.6402640264026402, "percentage": 54.68, "elapsed_time": "5:16:42", "remaining_time": "4:22:32"} +{"current_steps": 327, "total_steps": 1302, "loss": 0.4471297860145569, "lr": 3.7324977725589974e-05, "epoch": 0.754325259515571, "percentage": 25.12, "elapsed_time": "18:56:08", "remaining_time": "2 days, 8:27:33"} +{"current_steps": 498, "total_steps": 909, "loss": 0.5191807746887207, "lr": 2.0230429570621134e-05, "epoch": 1.6435643564356437, "percentage": 54.79, "elapsed_time": "5:17:17", "remaining_time": "4:21:51"} +{"current_steps": 499, "total_steps": 909, "loss": 0.5255881547927856, "lr": 2.0153621602018276e-05, "epoch": 1.6468646864686467, "percentage": 54.9, "elapsed_time": "5:18:08", "remaining_time": "4:21:24"} +{"current_steps": 500, "total_steps": 909, "loss": 0.5134017467498779, "lr": 2.0076811367493736e-05, "epoch": 1.6501650165016502, "percentage": 55.01, "elapsed_time": "5:18:44", "remaining_time": "4:20:43"} +{"current_steps": 501, "total_steps": 909, "loss": 0.4548872113227844, "lr": 2e-05, "epoch": 1.6534653465346536, "percentage": 55.12, "elapsed_time": "5:19:46", "remaining_time": "4:20:25"} +{"current_steps": 328, "total_steps": 1302, "loss": 0.41949188709259033, "lr": 3.72981079000863e-05, "epoch": 0.7566320645905421, "percentage": 25.19, "elapsed_time": "18:59:35", "remaining_time": "2 days, 8:24:01"} +{"current_steps": 502, "total_steps": 909, "loss": 0.4879235625267029, "lr": 1.9923188632506268e-05, "epoch": 1.6567656765676566, "percentage": 55.23, "elapsed_time": "5:20:32", "remaining_time": "4:19:52"} +{"current_steps": 503, "total_steps": 909, "loss": 0.5488070249557495, "lr": 1.9846378397981737e-05, "epoch": 1.66006600660066, "percentage": 55.34, "elapsed_time": "5:21:15", "remaining_time": "4:19:18"} +{"current_steps": 504, "total_steps": 909, "loss": 0.474858820438385, "lr": 1.976957042937887e-05, "epoch": 1.6633663366336635, "percentage": 55.45, "elapsed_time": "5:22:08", "remaining_time": "4:18:51"} +{"current_steps": 505, "total_steps": 909, "loss": 0.573983907699585, "lr": 1.969276585961674e-05, "epoch": 1.6666666666666665, "percentage": 55.56, "elapsed_time": "5:22:49", "remaining_time": "4:18:15"} +{"current_steps": 329, "total_steps": 1302, "loss": 0.4263257086277008, "lr": 3.727111357029821e-05, "epoch": 0.7589388696655133, "percentage": 25.27, "elapsed_time": "19:02:35", "remaining_time": "2 days, 8:19:10"} +{"current_steps": 506, "total_steps": 909, "loss": 0.5299487709999084, "lr": 1.9615965821564284e-05, "epoch": 1.66996699669967, "percentage": 55.67, "elapsed_time": "5:23:15", "remaining_time": "4:17:27"} +{"current_steps": 507, "total_steps": 909, "loss": 0.580963134765625, "lr": 1.9539171448023568e-05, "epoch": 1.6732673267326734, "percentage": 55.78, "elapsed_time": "5:23:51", "remaining_time": "4:16:47"} +{"current_steps": 508, "total_steps": 909, "loss": 0.5240850448608398, "lr": 1.946238387171313e-05, "epoch": 1.6765676567656764, "percentage": 55.89, "elapsed_time": "5:24:32", "remaining_time": "4:16:10"} +{"current_steps": 509, "total_steps": 909, "loss": 0.5397930145263672, "lr": 1.9385604225251245e-05, "epoch": 1.6798679867986799, "percentage": 56.0, "elapsed_time": "5:25:06", "remaining_time": "4:15:29"} +{"current_steps": 510, "total_steps": 909, "loss": 0.4949077367782593, "lr": 1.9308833641139235e-05, "epoch": 1.6831683168316833, "percentage": 56.11, "elapsed_time": "5:25:43", "remaining_time": "4:14:50"} +{"current_steps": 511, "total_steps": 909, "loss": 0.4906027913093567, "lr": 1.9232073251744715e-05, "epoch": 1.6864686468646863, "percentage": 56.22, "elapsed_time": "5:26:27", "remaining_time": "4:14:16"} +{"current_steps": 330, "total_steps": 1302, "loss": 0.3839571475982666, "lr": 3.72439949305192e-05, "epoch": 0.7612456747404844, "percentage": 25.35, "elapsed_time": "19:06:09", "remaining_time": "2 days, 8:15:57"} +{"current_steps": 512, "total_steps": 909, "loss": 0.562363862991333, "lr": 1.9155324189284957e-05, "epoch": 1.6897689768976898, "percentage": 56.33, "elapsed_time": "5:26:57", "remaining_time": "4:13:31"} +{"current_steps": 513, "total_steps": 909, "loss": 0.5347090363502502, "lr": 1.9078587585810167e-05, "epoch": 1.693069306930693, "percentage": 56.44, "elapsed_time": "5:27:41", "remaining_time": "4:12:57"} +{"current_steps": 514, "total_steps": 909, "loss": 0.5554836988449097, "lr": 1.900186457318676e-05, "epoch": 1.6963696369636962, "percentage": 56.55, "elapsed_time": "5:28:18", "remaining_time": "4:12:18"} +{"current_steps": 515, "total_steps": 909, "loss": 0.5179104208946228, "lr": 1.8925156283080684e-05, "epoch": 1.6996699669966997, "percentage": 56.66, "elapsed_time": "5:28:54", "remaining_time": "4:11:37"} +{"current_steps": 516, "total_steps": 909, "loss": 0.552240252494812, "lr": 1.8848463846940756e-05, "epoch": 1.702970297029703, "percentage": 56.77, "elapsed_time": "5:29:37", "remaining_time": "4:11:02"} +{"current_steps": 331, "total_steps": 1302, "loss": 0.4604438245296478, "lr": 3.721675217593747e-05, "epoch": 0.7635524798154556, "percentage": 25.42, "elapsed_time": "19:09:17", "remaining_time": "2 days, 8:11:28"} +{"current_steps": 517, "total_steps": 909, "loss": 0.534430980682373, "lr": 1.8771788395981915e-05, "epoch": 1.7062706270627062, "percentage": 56.88, "elapsed_time": "5:29:58", "remaining_time": "4:10:11"} +{"current_steps": 518, "total_steps": 909, "loss": 0.5601803064346313, "lr": 1.8695131061168598e-05, "epoch": 1.7095709570957096, "percentage": 56.99, "elapsed_time": "5:30:33", "remaining_time": "4:09:31"} +{"current_steps": 519, "total_steps": 909, "loss": 0.5119711756706238, "lr": 1.8618492973198e-05, "epoch": 1.7128712871287128, "percentage": 57.1, "elapsed_time": "5:31:06", "remaining_time": "4:08:48"} +{"current_steps": 520, "total_steps": 909, "loss": 0.5632577538490295, "lr": 1.8541875262483446e-05, "epoch": 1.716171617161716, "percentage": 57.21, "elapsed_time": "5:31:39", "remaining_time": "4:08:06"} +{"current_steps": 521, "total_steps": 909, "loss": 0.5499478578567505, "lr": 1.8465279059137686e-05, "epoch": 1.7194719471947195, "percentage": 57.32, "elapsed_time": "5:32:15", "remaining_time": "4:07:26"} +{"current_steps": 522, "total_steps": 909, "loss": 0.5176683664321899, "lr": 1.8388705492956244e-05, "epoch": 1.7227722772277227, "percentage": 57.43, "elapsed_time": "5:32:58", "remaining_time": "4:06:51"} +{"current_steps": 332, "total_steps": 1302, "loss": 0.42054101824760437, "lr": 3.718938550263459e-05, "epoch": 0.7658592848904268, "percentage": 25.5, "elapsed_time": "19:12:52", "remaining_time": "2 days, 8:08:19"} +{"current_steps": 523, "total_steps": 909, "loss": 0.49528205394744873, "lr": 1.8312155693400735e-05, "epoch": 1.726072607260726, "percentage": 57.54, "elapsed_time": "5:33:32", "remaining_time": "4:06:10"} +{"current_steps": 524, "total_steps": 909, "loss": 0.5684216022491455, "lr": 1.8235630789582213e-05, "epoch": 1.7293729372937294, "percentage": 57.65, "elapsed_time": "5:34:13", "remaining_time": "4:05:33"} +{"current_steps": 525, "total_steps": 909, "loss": 0.5375942587852478, "lr": 1.815913191024454e-05, "epoch": 1.7326732673267327, "percentage": 57.76, "elapsed_time": "5:34:50", "remaining_time": "4:04:54"} +{"current_steps": 526, "total_steps": 909, "loss": 0.5541956424713135, "lr": 1.8082660183747704e-05, "epoch": 1.7359735973597359, "percentage": 57.87, "elapsed_time": "5:35:20", "remaining_time": "4:04:10"} +{"current_steps": 527, "total_steps": 909, "loss": 0.5304872393608093, "lr": 1.8006216738051175e-05, "epoch": 1.7392739273927393, "percentage": 57.98, "elapsed_time": "5:35:53", "remaining_time": "4:03:28"} +{"current_steps": 528, "total_steps": 909, "loss": 0.48648735880851746, "lr": 1.7929802700697297e-05, "epoch": 1.7425742574257426, "percentage": 58.09, "elapsed_time": "5:36:24", "remaining_time": "4:02:44"} +{"current_steps": 529, "total_steps": 909, "loss": 0.49221059679985046, "lr": 1.7853419198794638e-05, "epoch": 1.7458745874587458, "percentage": 58.2, "elapsed_time": "5:36:58", "remaining_time": "4:02:03"} +{"current_steps": 333, "total_steps": 1302, "loss": 0.4193068742752075, "lr": 3.716189510758399e-05, "epoch": 0.7681660899653979, "percentage": 25.58, "elapsed_time": "19:16:24", "remaining_time": "2 days, 8:05:03"} +{"current_steps": 530, "total_steps": 909, "loss": 0.5652948021888733, "lr": 1.7777067359001375e-05, "epoch": 1.7491749174917492, "percentage": 58.31, "elapsed_time": "5:37:31", "remaining_time": "4:01:21"} +{"current_steps": 531, "total_steps": 909, "loss": 0.5187686681747437, "lr": 1.7700748307508643e-05, "epoch": 1.7524752475247525, "percentage": 58.42, "elapsed_time": "5:38:06", "remaining_time": "4:00:41"} +{"current_steps": 532, "total_steps": 909, "loss": 0.5013114809989929, "lr": 1.7624463170023974e-05, "epoch": 1.7557755775577557, "percentage": 58.53, "elapsed_time": "5:38:50", "remaining_time": "4:00:07"} +{"current_steps": 533, "total_steps": 909, "loss": 0.47477245330810547, "lr": 1.7548213071754663e-05, "epoch": 1.7590759075907592, "percentage": 58.64, "elapsed_time": "5:39:24", "remaining_time": "3:59:25"} +{"current_steps": 534, "total_steps": 909, "loss": 0.5600515007972717, "lr": 1.7471999137391162e-05, "epoch": 1.7623762376237624, "percentage": 58.75, "elapsed_time": "5:40:01", "remaining_time": "3:58:47"} +{"current_steps": 334, "total_steps": 1302, "loss": 0.44868984818458557, "lr": 3.713428118864964e-05, "epoch": 0.7704728950403691, "percentage": 25.65, "elapsed_time": "19:20:07", "remaining_time": "2 days, 8:02:16"} +{"current_steps": 535, "total_steps": 909, "loss": 0.5017521381378174, "lr": 1.7395822491090513e-05, "epoch": 1.7656765676567656, "percentage": 58.86, "elapsed_time": "5:40:45", "remaining_time": "3:58:12"} +{"current_steps": 536, "total_steps": 909, "loss": 0.48718830943107605, "lr": 1.7319684256459773e-05, "epoch": 1.768976897689769, "percentage": 58.97, "elapsed_time": "5:41:24", "remaining_time": "3:57:35"} +{"current_steps": 537, "total_steps": 909, "loss": 0.5348131060600281, "lr": 1.72435855565394e-05, "epoch": 1.7722772277227723, "percentage": 59.08, "elapsed_time": "5:42:08", "remaining_time": "3:57:00"} +{"current_steps": 538, "total_steps": 909, "loss": 0.5132070183753967, "lr": 1.716752751378673e-05, "epoch": 1.7755775577557755, "percentage": 59.19, "elapsed_time": "5:42:41", "remaining_time": "3:56:19"} +{"current_steps": 539, "total_steps": 909, "loss": 0.5194598436355591, "lr": 1.7091511250059407e-05, "epoch": 1.778877887788779, "percentage": 59.3, "elapsed_time": "5:43:29", "remaining_time": "3:55:47"} +{"current_steps": 335, "total_steps": 1302, "loss": 0.39721131324768066, "lr": 3.7106543944584545e-05, "epoch": 0.7727797001153403, "percentage": 25.73, "elapsed_time": "19:23:21", "remaining_time": "2 days, 7:58:07"} +{"current_steps": 540, "total_steps": 909, "loss": 0.4950656294822693, "lr": 1.701553788659883e-05, "epoch": 1.7821782178217822, "percentage": 59.41, "elapsed_time": "5:44:17", "remaining_time": "3:55:15"} +{"current_steps": 541, "total_steps": 909, "loss": 0.5465744137763977, "lr": 1.6939608544013603e-05, "epoch": 1.7854785478547854, "percentage": 59.52, "elapsed_time": "5:44:56", "remaining_time": "3:54:38"} +{"current_steps": 542, "total_steps": 909, "loss": 0.5328625440597534, "lr": 1.6863724342263047e-05, "epoch": 1.7887788778877889, "percentage": 59.63, "elapsed_time": "5:45:22", "remaining_time": "3:53:51"} +{"current_steps": 543, "total_steps": 909, "loss": 0.483689546585083, "lr": 1.6787886400640645e-05, "epoch": 1.7920792079207921, "percentage": 59.74, "elapsed_time": "5:46:03", "remaining_time": "3:53:15"} +{"current_steps": 544, "total_steps": 909, "loss": 0.5225390195846558, "lr": 1.6712095837757525e-05, "epoch": 1.7953795379537953, "percentage": 59.85, "elapsed_time": "5:47:04", "remaining_time": "3:52:52"} +{"current_steps": 336, "total_steps": 1302, "loss": 0.40162885189056396, "lr": 3.707868357502938e-05, "epoch": 0.7750865051903114, "percentage": 25.81, "elapsed_time": "19:26:57", "remaining_time": "2 days, 7:54:59"} +{"current_steps": 545, "total_steps": 909, "loss": 0.5168595314025879, "lr": 1.6636353771526005e-05, "epoch": 1.7986798679867988, "percentage": 59.96, "elapsed_time": "5:47:42", "remaining_time": "3:52:13"} +{"current_steps": 546, "total_steps": 909, "loss": 0.5257725119590759, "lr": 1.6560661319143064e-05, "epoch": 1.801980198019802, "percentage": 60.07, "elapsed_time": "5:48:40", "remaining_time": "3:51:48"} +{"current_steps": 547, "total_steps": 909, "loss": 0.5023485422134399, "lr": 1.648501959707387e-05, "epoch": 1.8052805280528053, "percentage": 60.18, "elapsed_time": "5:49:21", "remaining_time": "3:51:12"} +{"current_steps": 548, "total_steps": 909, "loss": 0.48897239565849304, "lr": 1.6409429721035324e-05, "epoch": 1.8085808580858087, "percentage": 60.29, "elapsed_time": "5:49:58", "remaining_time": "3:50:32"} +{"current_steps": 549, "total_steps": 909, "loss": 0.5318676829338074, "lr": 1.63338928059796e-05, "epoch": 1.811881188118812, "percentage": 60.4, "elapsed_time": "5:50:33", "remaining_time": "3:49:52"} +{"current_steps": 337, "total_steps": 1302, "loss": 0.43235182762145996, "lr": 3.705070028051101e-05, "epoch": 0.7773933102652826, "percentage": 25.88, "elapsed_time": "19:30:29", "remaining_time": "2 days, 7:51:41"} +{"current_steps": 550, "total_steps": 909, "loss": 0.4996787905693054, "lr": 1.6258409966077693e-05, "epoch": 1.8151815181518152, "percentage": 60.51, "elapsed_time": "5:51:09", "remaining_time": "3:49:12"} +{"current_steps": 551, "total_steps": 909, "loss": 0.4833434820175171, "lr": 1.6182982314702987e-05, "epoch": 1.8184818481848186, "percentage": 60.62, "elapsed_time": "5:51:48", "remaining_time": "3:48:34"} +{"current_steps": 552, "total_steps": 909, "loss": 0.5050291419029236, "lr": 1.6107610964414836e-05, "epoch": 1.8217821782178216, "percentage": 60.73, "elapsed_time": "5:52:29", "remaining_time": "3:47:58"} +{"current_steps": 553, "total_steps": 909, "loss": 0.5423529148101807, "lr": 1.6032297026942154e-05, "epoch": 1.825082508250825, "percentage": 60.84, "elapsed_time": "5:53:20", "remaining_time": "3:47:28"} +{"current_steps": 554, "total_steps": 909, "loss": 0.5670536756515503, "lr": 1.5957041613167007e-05, "epoch": 1.8283828382838285, "percentage": 60.95, "elapsed_time": "5:53:52", "remaining_time": "3:46:45"} +{"current_steps": 338, "total_steps": 1302, "loss": 0.4222877025604248, "lr": 3.702259426244105e-05, "epoch": 0.7797001153402537, "percentage": 25.96, "elapsed_time": "19:33:56", "remaining_time": "2 days, 7:48:10"} +{"current_steps": 555, "total_steps": 909, "loss": 0.5148528814315796, "lr": 1.5881845833108245e-05, "epoch": 1.8316831683168315, "percentage": 61.06, "elapsed_time": "5:54:37", "remaining_time": "3:46:11"} +{"current_steps": 556, "total_steps": 909, "loss": 0.5441350340843201, "lr": 1.5806710795905113e-05, "epoch": 1.834983498349835, "percentage": 61.17, "elapsed_time": "5:55:28", "remaining_time": "3:45:41"} +{"current_steps": 557, "total_steps": 909, "loss": 0.5338016748428345, "lr": 1.5731637609800897e-05, "epoch": 1.8382838283828384, "percentage": 61.28, "elapsed_time": "5:55:55", "remaining_time": "3:44:55"} +{"current_steps": 558, "total_steps": 909, "loss": 0.522803783416748, "lr": 1.5656627382126587e-05, "epoch": 1.8415841584158414, "percentage": 61.39, "elapsed_time": "5:56:29", "remaining_time": "3:44:14"} +{"current_steps": 559, "total_steps": 909, "loss": 0.5079183578491211, "lr": 1.5581681219284523e-05, "epoch": 1.844884488448845, "percentage": 61.5, "elapsed_time": "5:57:20", "remaining_time": "3:43:44"} +{"current_steps": 560, "total_steps": 909, "loss": 0.5360547304153442, "lr": 1.5506800226732104e-05, "epoch": 1.8481848184818483, "percentage": 61.61, "elapsed_time": "5:57:58", "remaining_time": "3:43:05"} +{"current_steps": 339, "total_steps": 1302, "loss": 0.4298175573348999, "lr": 3.699436572311444e-05, "epoch": 0.7820069204152249, "percentage": 26.04, "elapsed_time": "19:37:31", "remaining_time": "2 days, 7:45:00"} +{"current_steps": 561, "total_steps": 909, "loss": 0.5137909650802612, "lr": 1.5431985508965438e-05, "epoch": 1.8514851485148514, "percentage": 61.72, "elapsed_time": "5:58:36", "remaining_time": "3:42:27"} +{"current_steps": 562, "total_steps": 909, "loss": 0.513020396232605, "lr": 1.5357238169503107e-05, "epoch": 1.8547854785478548, "percentage": 61.83, "elapsed_time": "5:59:19", "remaining_time": "3:41:51"} +{"current_steps": 563, "total_steps": 909, "loss": 0.5015939474105835, "lr": 1.5282559310869856e-05, "epoch": 1.858085808580858, "percentage": 61.94, "elapsed_time": "5:59:51", "remaining_time": "3:41:09"} +{"current_steps": 564, "total_steps": 909, "loss": 0.5012743473052979, "lr": 1.5207950034580317e-05, "epoch": 1.8613861386138613, "percentage": 62.05, "elapsed_time": "6:00:38", "remaining_time": "3:40:36"} +{"current_steps": 565, "total_steps": 909, "loss": 0.48864254355430603, "lr": 1.5133411441122799e-05, "epoch": 1.8646864686468647, "percentage": 62.16, "elapsed_time": "6:01:18", "remaining_time": "3:39:58"} +{"current_steps": 340, "total_steps": 1302, "loss": 0.43021804094314575, "lr": 3.6966014865707994e-05, "epoch": 0.7843137254901961, "percentage": 26.11, "elapsed_time": "19:41:03", "remaining_time": "2 days, 7:41:43"} +{"current_steps": 566, "total_steps": 909, "loss": 0.437102347612381, "lr": 1.5058944629943044e-05, "epoch": 1.867986798679868, "percentage": 62.27, "elapsed_time": "6:02:07", "remaining_time": "3:39:26"} +{"current_steps": 567, "total_steps": 909, "loss": 0.5518525838851929, "lr": 1.4984550699427978e-05, "epoch": 1.8712871287128712, "percentage": 62.38, "elapsed_time": "6:02:43", "remaining_time": "3:38:47"} +{"current_steps": 568, "total_steps": 909, "loss": 0.5618141889572144, "lr": 1.4910230746889559e-05, "epoch": 1.8745874587458746, "percentage": 62.49, "elapsed_time": "6:03:16", "remaining_time": "3:38:05"} +{"current_steps": 569, "total_steps": 909, "loss": 0.4990406632423401, "lr": 1.4835985868548557e-05, "epoch": 1.8778877887788779, "percentage": 62.6, "elapsed_time": "6:03:57", "remaining_time": "3:37:28"} +{"current_steps": 570, "total_steps": 909, "loss": 0.5004926919937134, "lr": 1.4761817159518415e-05, "epoch": 1.881188118811881, "percentage": 62.71, "elapsed_time": "6:04:41", "remaining_time": "3:36:53"} +{"current_steps": 341, "total_steps": 1302, "loss": 0.4211689233779907, "lr": 3.6937541894278875e-05, "epoch": 0.7866205305651672, "percentage": 26.19, "elapsed_time": "19:44:36", "remaining_time": "2 days, 7:38:25"} +{"current_steps": 571, "total_steps": 909, "loss": 0.5166051983833313, "lr": 1.4687725713789042e-05, "epoch": 1.8844884488448845, "percentage": 62.82, "elapsed_time": "6:05:20", "remaining_time": "3:36:15"} +{"current_steps": 572, "total_steps": 909, "loss": 0.5510391592979431, "lr": 1.461371262421074e-05, "epoch": 1.8877887788778878, "percentage": 62.93, "elapsed_time": "6:05:53", "remaining_time": "3:35:34"} +{"current_steps": 573, "total_steps": 909, "loss": 0.5305938720703125, "lr": 1.4539778982478061e-05, "epoch": 1.891089108910891, "percentage": 63.04, "elapsed_time": "6:06:24", "remaining_time": "3:34:51"} +{"current_steps": 574, "total_steps": 909, "loss": 0.562718391418457, "lr": 1.4465925879113663e-05, "epoch": 1.8943894389438944, "percentage": 63.15, "elapsed_time": "6:07:06", "remaining_time": "3:34:14"} +{"current_steps": 575, "total_steps": 909, "loss": 0.541257381439209, "lr": 1.4392154403452294e-05, "epoch": 1.8976897689768977, "percentage": 63.26, "elapsed_time": "6:07:39", "remaining_time": "3:33:33"} +{"current_steps": 576, "total_steps": 909, "loss": 0.556663990020752, "lr": 1.4318465643624696e-05, "epoch": 1.900990099009901, "percentage": 63.37, "elapsed_time": "6:08:08", "remaining_time": "3:32:49"} +{"current_steps": 342, "total_steps": 1302, "loss": 0.40474146604537964, "lr": 3.6908947013763205e-05, "epoch": 0.7889273356401384, "percentage": 26.27, "elapsed_time": "19:48:06", "remaining_time": "2 days, 7:35:02"} +{"current_steps": 577, "total_steps": 909, "loss": 0.5691581964492798, "lr": 1.4244860686541522e-05, "epoch": 1.9042904290429044, "percentage": 63.48, "elapsed_time": "6:08:42", "remaining_time": "3:32:09"} +{"current_steps": 578, "total_steps": 909, "loss": 0.513170063495636, "lr": 1.4171340617877349e-05, "epoch": 1.9075907590759076, "percentage": 63.59, "elapsed_time": "6:09:32", "remaining_time": "3:31:37"} +{"current_steps": 579, "total_steps": 909, "loss": 0.5679588317871094, "lr": 1.4097906522054656e-05, "epoch": 1.9108910891089108, "percentage": 63.7, "elapsed_time": "6:10:03", "remaining_time": "3:30:55"} +{"current_steps": 580, "total_steps": 909, "loss": 0.513796329498291, "lr": 1.4024559482227818e-05, "epoch": 1.9141914191419143, "percentage": 63.81, "elapsed_time": "6:10:52", "remaining_time": "3:30:22"} +{"current_steps": 581, "total_steps": 909, "loss": 0.4618416428565979, "lr": 1.3951300580267108e-05, "epoch": 1.9174917491749175, "percentage": 63.92, "elapsed_time": "6:11:34", "remaining_time": "3:29:46"} +{"current_steps": 343, "total_steps": 1302, "loss": 0.4009843170642853, "lr": 3.688023042997453e-05, "epoch": 0.7912341407151096, "percentage": 26.34, "elapsed_time": "19:51:31", "remaining_time": "2 days, 7:31:23"} +{"current_steps": 582, "total_steps": 909, "loss": 0.5491312742233276, "lr": 1.3878130896742796e-05, "epoch": 1.9207920792079207, "percentage": 64.03, "elapsed_time": "6:12:16", "remaining_time": "3:29:09"} +{"current_steps": 583, "total_steps": 909, "loss": 0.5524745583534241, "lr": 1.3805051510909164e-05, "epoch": 1.9240924092409242, "percentage": 64.14, "elapsed_time": "6:12:57", "remaining_time": "3:28:33"} +{"current_steps": 584, "total_steps": 909, "loss": 0.5232075452804565, "lr": 1.3732063500688604e-05, "epoch": 1.9273927392739274, "percentage": 64.25, "elapsed_time": "6:13:44", "remaining_time": "3:27:59"} +{"current_steps": 585, "total_steps": 909, "loss": 0.5257346034049988, "lr": 1.3659167942655702e-05, "epoch": 1.9306930693069306, "percentage": 64.36, "elapsed_time": "6:14:24", "remaining_time": "3:27:21"} +{"current_steps": 586, "total_steps": 909, "loss": 0.5196455717086792, "lr": 1.35863659120214e-05, "epoch": 1.933993399339934, "percentage": 64.47, "elapsed_time": "6:14:51", "remaining_time": "3:26:37"} +{"current_steps": 344, "total_steps": 1302, "loss": 0.43038254976272583, "lr": 3.6851392349602396e-05, "epoch": 0.7935409457900807, "percentage": 26.42, "elapsed_time": "19:54:39", "remaining_time": "2 days, 7:26:58"} +{"current_steps": 587, "total_steps": 909, "loss": 0.5122568011283875, "lr": 1.3513658482617085e-05, "epoch": 1.9372937293729373, "percentage": 64.58, "elapsed_time": "6:15:32", "remaining_time": "3:26:00"} +{"current_steps": 588, "total_steps": 909, "loss": 0.5236790180206299, "lr": 1.3441046726878786e-05, "epoch": 1.9405940594059405, "percentage": 64.69, "elapsed_time": "6:16:05", "remaining_time": "3:25:19"} +{"current_steps": 589, "total_steps": 909, "loss": 0.5508555173873901, "lr": 1.3368531715831337e-05, "epoch": 1.943894389438944, "percentage": 64.8, "elapsed_time": "6:16:40", "remaining_time": "3:24:38"} +{"current_steps": 590, "total_steps": 909, "loss": 0.4742932617664337, "lr": 1.3296114519072594e-05, "epoch": 1.9471947194719472, "percentage": 64.91, "elapsed_time": "6:17:31", "remaining_time": "3:24:07"} +{"current_steps": 591, "total_steps": 909, "loss": 0.5406354665756226, "lr": 1.3223796204757638e-05, "epoch": 1.9504950495049505, "percentage": 65.02, "elapsed_time": "6:18:03", "remaining_time": "3:23:25"} +{"current_steps": 345, "total_steps": 1302, "loss": 0.3898426294326782, "lr": 3.682243298021079e-05, "epoch": 0.7958477508650519, "percentage": 26.5, "elapsed_time": "19:58:00", "remaining_time": "2 days, 7:23:09"} +{"current_steps": 592, "total_steps": 909, "loss": 0.508262038230896, "lr": 1.3151577839583043e-05, "epoch": 1.953795379537954, "percentage": 65.13, "elapsed_time": "6:18:44", "remaining_time": "3:22:48"} +{"current_steps": 593, "total_steps": 909, "loss": 0.5260204672813416, "lr": 1.3079460488771136e-05, "epoch": 1.9570957095709571, "percentage": 65.24, "elapsed_time": "6:19:28", "remaining_time": "3:22:13"} +{"current_steps": 594, "total_steps": 909, "loss": 0.522408127784729, "lr": 1.3007445216054257e-05, "epoch": 1.9603960396039604, "percentage": 65.35, "elapsed_time": "6:20:09", "remaining_time": "3:21:35"} +{"current_steps": 595, "total_steps": 909, "loss": 0.4849371910095215, "lr": 1.2935533083659114e-05, "epoch": 1.9636963696369638, "percentage": 65.46, "elapsed_time": "6:20:50", "remaining_time": "3:20:58"} +{"current_steps": 596, "total_steps": 909, "loss": 0.5319019556045532, "lr": 1.2863725152291091e-05, "epoch": 1.966996699669967, "percentage": 65.57, "elapsed_time": "6:21:23", "remaining_time": "3:20:17"} +{"current_steps": 346, "total_steps": 1302, "loss": 0.42124250531196594, "lr": 3.679335253023671e-05, "epoch": 0.798154555940023, "percentage": 26.57, "elapsed_time": "20:01:15", "remaining_time": "2 days, 7:19:04"} +{"current_steps": 597, "total_steps": 909, "loss": 0.5562412738800049, "lr": 1.2792022481118587e-05, "epoch": 1.9702970297029703, "percentage": 65.68, "elapsed_time": "6:22:00", "remaining_time": "3:19:38"} +{"current_steps": 598, "total_steps": 909, "loss": 0.49608999490737915, "lr": 1.2720426127757431e-05, "epoch": 1.9735973597359737, "percentage": 65.79, "elapsed_time": "6:22:30", "remaining_time": "3:18:55"} +{"current_steps": 599, "total_steps": 909, "loss": 0.5082768201828003, "lr": 1.2648937148255253e-05, "epoch": 1.976897689768977, "percentage": 65.9, "elapsed_time": "6:23:15", "remaining_time": "3:18:20"} +{"current_steps": 600, "total_steps": 909, "loss": 0.5706614255905151, "lr": 1.2577556597075933e-05, "epoch": 1.9801980198019802, "percentage": 66.01, "elapsed_time": "6:23:51", "remaining_time": "3:17:41"} +{"current_steps": 601, "total_steps": 909, "loss": 0.5366507768630981, "lr": 1.2506285527083991e-05, "epoch": 1.9834983498349836, "percentage": 66.12, "elapsed_time": "6:24:32", "remaining_time": "3:17:04"} +{"current_steps": 602, "total_steps": 909, "loss": 0.5462816953659058, "lr": 1.2435124989529139e-05, "epoch": 1.9867986798679866, "percentage": 66.23, "elapsed_time": "6:25:10", "remaining_time": "3:16:25"} +{"current_steps": 347, "total_steps": 1302, "loss": 0.4331362843513489, "lr": 3.676415120898863e-05, "epoch": 0.8004613610149942, "percentage": 26.65, "elapsed_time": "20:04:43", "remaining_time": "2 days, 7:15:36"} +{"current_steps": 603, "total_steps": 909, "loss": 0.5050650238990784, "lr": 1.236407603403072e-05, "epoch": 1.99009900990099, "percentage": 66.34, "elapsed_time": "6:25:54", "remaining_time": "3:15:49"} +{"current_steps": 604, "total_steps": 909, "loss": 0.4915675222873688, "lr": 1.2293139708562221e-05, "epoch": 1.9933993399339935, "percentage": 66.45, "elapsed_time": "6:26:26", "remaining_time": "3:15:08"} +{"current_steps": 605, "total_steps": 909, "loss": 0.5807889103889465, "lr": 1.2222317059435863e-05, "epoch": 1.9966996699669965, "percentage": 66.56, "elapsed_time": "6:26:56", "remaining_time": "3:14:26"} +{"current_steps": 606, "total_steps": 909, "loss": 0.49173152446746826, "lr": 1.2151609131287124e-05, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "6:27:39", "remaining_time": "3:13:49"} +{"current_steps": 607, "total_steps": 909, "loss": 0.4426806569099426, "lr": 1.2081016967059336e-05, "epoch": 2.0033003300330035, "percentage": 66.78, "elapsed_time": "6:28:12", "remaining_time": "3:13:08"} +{"current_steps": 348, "total_steps": 1302, "loss": 0.41149985790252686, "lr": 3.673482922664501e-05, "epoch": 0.8027681660899654, "percentage": 26.73, "elapsed_time": "20:07:53", "remaining_time": "2 days, 7:11:17"} +{"current_steps": 608, "total_steps": 909, "loss": 0.45669305324554443, "lr": 1.201054160798833e-05, "epoch": 2.0066006600660065, "percentage": 66.89, "elapsed_time": "6:28:42", "remaining_time": "3:12:26"} +{"current_steps": 609, "total_steps": 909, "loss": 0.4638911783695221, "lr": 1.1940184093587047e-05, "epoch": 2.00990099009901, "percentage": 67.0, "elapsed_time": "6:29:22", "remaining_time": "3:11:48"} +{"current_steps": 610, "total_steps": 909, "loss": 0.4541138708591461, "lr": 1.186994546163023e-05, "epoch": 2.0132013201320134, "percentage": 67.11, "elapsed_time": "6:30:17", "remaining_time": "3:11:18"} +{"current_steps": 611, "total_steps": 909, "loss": 0.49081191420555115, "lr": 1.1799826748139079e-05, "epoch": 2.0165016501650164, "percentage": 67.22, "elapsed_time": "6:30:51", "remaining_time": "3:10:37"} +{"current_steps": 612, "total_steps": 909, "loss": 0.4794033169746399, "lr": 1.1729828987366009e-05, "epoch": 2.01980198019802, "percentage": 67.33, "elapsed_time": "6:31:31", "remaining_time": "3:10:00"} +{"current_steps": 349, "total_steps": 1302, "loss": 0.4314206838607788, "lr": 3.670538679425276e-05, "epoch": 0.8050749711649365, "percentage": 26.8, "elapsed_time": "20:11:05", "remaining_time": "2 days, 7:07:04"} +{"current_steps": 613, "total_steps": 909, "loss": 0.4142993688583374, "lr": 1.165995321177939e-05, "epoch": 2.0231023102310233, "percentage": 67.44, "elapsed_time": "6:32:11", "remaining_time": "3:09:22"} +{"current_steps": 614, "total_steps": 909, "loss": 0.47322210669517517, "lr": 1.159020045204829e-05, "epoch": 2.0264026402640263, "percentage": 67.55, "elapsed_time": "6:32:45", "remaining_time": "3:08:42"} +{"current_steps": 615, "total_steps": 909, "loss": 0.4899124503135681, "lr": 1.15205717370273e-05, "epoch": 2.0297029702970297, "percentage": 67.66, "elapsed_time": "6:33:13", "remaining_time": "3:07:58"} +{"current_steps": 616, "total_steps": 909, "loss": 0.4857853055000305, "lr": 1.1451068093741355e-05, "epoch": 2.033003300330033, "percentage": 67.77, "elapsed_time": "6:33:56", "remaining_time": "3:07:22"} +{"current_steps": 617, "total_steps": 909, "loss": 0.4790021479129791, "lr": 1.1381690547370559e-05, "epoch": 2.036303630363036, "percentage": 67.88, "elapsed_time": "6:34:34", "remaining_time": "3:06:44"} +{"current_steps": 618, "total_steps": 909, "loss": 0.4519282281398773, "lr": 1.13124401212351e-05, "epoch": 2.0396039603960396, "percentage": 67.99, "elapsed_time": "6:35:03", "remaining_time": "3:06:01"} +{"current_steps": 350, "total_steps": 1302, "loss": 0.42710763216018677, "lr": 3.667582412372577e-05, "epoch": 0.8073817762399077, "percentage": 26.88, "elapsed_time": "20:14:34", "remaining_time": "2 days, 7:03:39"} +{"current_steps": 619, "total_steps": 909, "loss": 0.4738570749759674, "lr": 1.1243317836780138e-05, "epoch": 2.042904290429043, "percentage": 68.1, "elapsed_time": "6:35:34", "remaining_time": "3:05:19"} +{"current_steps": 620, "total_steps": 909, "loss": 0.5111795663833618, "lr": 1.1174324713560751e-05, "epoch": 2.046204620462046, "percentage": 68.21, "elapsed_time": "6:36:00", "remaining_time": "3:04:35"} +{"current_steps": 621, "total_steps": 909, "loss": 0.4750926196575165, "lr": 1.1105461769226858e-05, "epoch": 2.0495049504950495, "percentage": 68.32, "elapsed_time": "6:36:45", "remaining_time": "3:04:00"} +{"current_steps": 622, "total_steps": 909, "loss": 0.4580341577529907, "lr": 1.1036730019508259e-05, "epoch": 2.052805280528053, "percentage": 68.43, "elapsed_time": "6:37:17", "remaining_time": "3:03:18"} +{"current_steps": 623, "total_steps": 909, "loss": 0.43322116136550903, "lr": 1.0968130478199635e-05, "epoch": 2.056105610561056, "percentage": 68.54, "elapsed_time": "6:37:52", "remaining_time": "3:02:39"} +{"current_steps": 624, "total_steps": 909, "loss": 0.5015532374382019, "lr": 1.0899664157145562e-05, "epoch": 2.0594059405940595, "percentage": 68.65, "elapsed_time": "6:38:21", "remaining_time": "3:01:56"} +{"current_steps": 351, "total_steps": 1302, "loss": 0.41166919469833374, "lr": 3.6646141427843315e-05, "epoch": 0.8096885813148789, "percentage": 26.96, "elapsed_time": "20:17:50", "remaining_time": "2 days, 6:59:36"} +{"current_steps": 625, "total_steps": 909, "loss": 0.4508541226387024, "lr": 1.0831332066225645e-05, "epoch": 2.062706270627063, "percentage": 68.76, "elapsed_time": "6:38:54", "remaining_time": "3:01:15"} +{"current_steps": 626, "total_steps": 909, "loss": 0.49554720520973206, "lr": 1.0763135213339589e-05, "epoch": 2.066006600660066, "percentage": 68.87, "elapsed_time": "6:39:37", "remaining_time": "3:00:39"} +{"current_steps": 627, "total_steps": 909, "loss": 0.4523652493953705, "lr": 1.0695074604392305e-05, "epoch": 2.0693069306930694, "percentage": 68.98, "elapsed_time": "6:40:11", "remaining_time": "2:59:59"} +{"current_steps": 628, "total_steps": 909, "loss": 0.44413498044013977, "lr": 1.0627151243279136e-05, "epoch": 2.072607260726073, "percentage": 69.09, "elapsed_time": "6:40:47", "remaining_time": "2:59:20"} +{"current_steps": 629, "total_steps": 909, "loss": 0.4645534157752991, "lr": 1.055936613187101e-05, "epoch": 2.075907590759076, "percentage": 69.2, "elapsed_time": "6:41:30", "remaining_time": "2:58:43"} +{"current_steps": 352, "total_steps": 1302, "loss": 0.41456669569015503, "lr": 3.6616338920248586e-05, "epoch": 0.81199538638985, "percentage": 27.04, "elapsed_time": "20:21:28", "remaining_time": "2 days, 6:56:34"} +{"current_steps": 630, "total_steps": 909, "loss": 0.44823265075683594, "lr": 1.0491720269999663e-05, "epoch": 2.0792079207920793, "percentage": 69.31, "elapsed_time": "6:42:10", "remaining_time": "2:58:06"} +{"current_steps": 631, "total_steps": 909, "loss": 0.45181727409362793, "lr": 1.0424214655442891e-05, "epoch": 2.0825082508250823, "percentage": 69.42, "elapsed_time": "6:43:00", "remaining_time": "2:57:33"} +{"current_steps": 632, "total_steps": 909, "loss": 0.5371145009994507, "lr": 1.0356850283909852e-05, "epoch": 2.0858085808580857, "percentage": 69.53, "elapsed_time": "6:43:38", "remaining_time": "2:56:54"} +{"current_steps": 633, "total_steps": 909, "loss": 0.4564274847507477, "lr": 1.0289628149026369e-05, "epoch": 2.089108910891089, "percentage": 69.64, "elapsed_time": "6:44:18", "remaining_time": "2:56:17"} +{"current_steps": 634, "total_steps": 909, "loss": 0.4490276873111725, "lr": 1.0222549242320254e-05, "epoch": 2.092409240924092, "percentage": 69.75, "elapsed_time": "6:45:13", "remaining_time": "2:55:46"} +{"current_steps": 353, "total_steps": 1302, "loss": 0.4078449010848999, "lr": 3.658641681544711e-05, "epoch": 0.8143021914648212, "percentage": 27.11, "elapsed_time": "20:25:01", "remaining_time": "2 days, 6:53:20"} +{"current_steps": 635, "total_steps": 909, "loss": 0.4663650095462799, "lr": 1.0155614553206715e-05, "epoch": 2.0957095709570956, "percentage": 69.86, "elapsed_time": "6:45:55", "remaining_time": "2:55:09"} +{"current_steps": 636, "total_steps": 909, "loss": 0.46265488862991333, "lr": 1.0088825068973746e-05, "epoch": 2.099009900990099, "percentage": 69.97, "elapsed_time": "6:46:25", "remaining_time": "2:54:27"} +{"current_steps": 637, "total_steps": 909, "loss": 0.45717963576316833, "lr": 1.002218177476756e-05, "epoch": 2.102310231023102, "percentage": 70.08, "elapsed_time": "6:47:04", "remaining_time": "2:53:49"} +{"current_steps": 638, "total_steps": 909, "loss": 0.47119495272636414, "lr": 9.955685653578068e-06, "epoch": 2.1056105610561056, "percentage": 70.19, "elapsed_time": "6:47:59", "remaining_time": "2:53:18"} +{"current_steps": 639, "total_steps": 909, "loss": 0.46565738320350647, "lr": 9.88933768622439e-06, "epoch": 2.108910891089109, "percentage": 70.3, "elapsed_time": "6:48:32", "remaining_time": "2:52:37"} +{"current_steps": 640, "total_steps": 909, "loss": 0.45610398054122925, "lr": 9.823138851340337e-06, "epoch": 2.112211221122112, "percentage": 70.41, "elapsed_time": "6:49:03", "remaining_time": "2:51:56"} +{"current_steps": 354, "total_steps": 1302, "loss": 0.3844146430492401, "lr": 3.655637532880523e-05, "epoch": 0.8166089965397924, "percentage": 27.19, "elapsed_time": "20:28:36", "remaining_time": "2 days, 6:50:11"} +{"current_steps": 641, "total_steps": 909, "loss": 0.4907280206680298, "lr": 9.75709012536005e-06, "epoch": 2.1155115511551155, "percentage": 70.52, "elapsed_time": "6:49:43", "remaining_time": "2:51:18"} +{"current_steps": 642, "total_steps": 909, "loss": 0.500091552734375, "lr": 9.691192482503546e-06, "epoch": 2.118811881188119, "percentage": 70.63, "elapsed_time": "6:50:22", "remaining_time": "2:50:40"} +{"current_steps": 643, "total_steps": 909, "loss": 0.4330231547355652, "lr": 9.625446894762371e-06, "epoch": 2.122112211221122, "percentage": 70.74, "elapsed_time": "6:51:00", "remaining_time": "2:50:01"} +{"current_steps": 644, "total_steps": 909, "loss": 0.4750261902809143, "lr": 9.559854331885233e-06, "epoch": 2.1254125412541254, "percentage": 70.85, "elapsed_time": "6:51:38", "remaining_time": "2:49:23"} +{"current_steps": 645, "total_steps": 909, "loss": 0.4567373991012573, "lr": 9.49441576136374e-06, "epoch": 2.128712871287129, "percentage": 70.96, "elapsed_time": "6:52:09", "remaining_time": "2:48:41"} +{"current_steps": 355, "total_steps": 1302, "loss": 0.4534158706665039, "lr": 3.6526214676548553e-05, "epoch": 0.8189158016147635, "percentage": 27.27, "elapsed_time": "20:31:50", "remaining_time": "2 days, 6:46:04"} +{"current_steps": 646, "total_steps": 909, "loss": 0.4601932168006897, "lr": 9.429132148418116e-06, "epoch": 2.132013201320132, "percentage": 71.07, "elapsed_time": "6:52:39", "remaining_time": "2:48:00"} +{"current_steps": 647, "total_steps": 909, "loss": 0.4909035265445709, "lr": 9.364004455982931e-06, "epoch": 2.1353135313531353, "percentage": 71.18, "elapsed_time": "6:53:10", "remaining_time": "2:47:18"} +{"current_steps": 648, "total_steps": 909, "loss": 0.4443170428276062, "lr": 9.299033644692948e-06, "epoch": 2.1386138613861387, "percentage": 71.29, "elapsed_time": "6:53:47", "remaining_time": "2:46:40"} +{"current_steps": 649, "total_steps": 909, "loss": 0.46534985303878784, "lr": 9.234220672868928e-06, "epoch": 2.1419141914191417, "percentage": 71.4, "elapsed_time": "6:54:20", "remaining_time": "2:45:59"} +{"current_steps": 650, "total_steps": 909, "loss": 0.4351472854614258, "lr": 9.169566496503476e-06, "epoch": 2.145214521452145, "percentage": 71.51, "elapsed_time": "6:54:55", "remaining_time": "2:45:19"} +{"current_steps": 651, "total_steps": 909, "loss": 0.41445475816726685, "lr": 9.105072069246983e-06, "epoch": 2.1485148514851486, "percentage": 71.62, "elapsed_time": "6:55:39", "remaining_time": "2:44:43"} +{"current_steps": 356, "total_steps": 1302, "loss": 0.4048612117767334, "lr": 3.649593507576039e-05, "epoch": 0.8212226066897347, "percentage": 27.34, "elapsed_time": "20:35:21", "remaining_time": "2 days, 6:42:43"} +{"current_steps": 652, "total_steps": 909, "loss": 0.475847989320755, "lr": 9.040738342393532e-06, "epoch": 2.1518151815181517, "percentage": 71.73, "elapsed_time": "6:56:10", "remaining_time": "2:44:02"} +{"current_steps": 653, "total_steps": 909, "loss": 0.48487618565559387, "lr": 8.976566264866876e-06, "epoch": 2.155115511551155, "percentage": 71.84, "elapsed_time": "6:56:36", "remaining_time": "2:43:19"} +{"current_steps": 654, "total_steps": 909, "loss": 0.4661785364151001, "lr": 8.912556783206414e-06, "epoch": 2.1584158415841586, "percentage": 71.95, "elapsed_time": "6:57:32", "remaining_time": "2:42:48"} +{"current_steps": 655, "total_steps": 909, "loss": 0.48009538650512695, "lr": 8.84871084155328e-06, "epoch": 2.1617161716171616, "percentage": 72.06, "elapsed_time": "6:58:06", "remaining_time": "2:42:08"} +{"current_steps": 656, "total_steps": 909, "loss": 0.45644935965538025, "lr": 8.785029381636387e-06, "epoch": 2.165016501650165, "percentage": 72.17, "elapsed_time": "6:58:42", "remaining_time": "2:41:28"} +{"current_steps": 357, "total_steps": 1302, "loss": 0.42552047967910767, "lr": 3.646553674438017e-05, "epoch": 0.8235294117647058, "percentage": 27.42, "elapsed_time": "20:38:14", "remaining_time": "2 days, 6:37:42"} +{"current_steps": 657, "total_steps": 909, "loss": 0.4896699786186218, "lr": 8.721513342758516e-06, "epoch": 2.1683168316831685, "percentage": 72.28, "elapsed_time": "6:59:19", "remaining_time": "2:40:50"} +{"current_steps": 658, "total_steps": 909, "loss": 0.4286258816719055, "lr": 8.658163661782507e-06, "epoch": 2.1716171617161715, "percentage": 72.39, "elapsed_time": "6:59:57", "remaining_time": "2:40:11"} +{"current_steps": 659, "total_steps": 909, "loss": 0.42029869556427, "lr": 8.59498127311742e-06, "epoch": 2.174917491749175, "percentage": 72.5, "elapsed_time": "7:00:50", "remaining_time": "2:39:38"} +{"current_steps": 660, "total_steps": 909, "loss": 0.48522356152534485, "lr": 8.531967108704722e-06, "epoch": 2.1782178217821784, "percentage": 72.61, "elapsed_time": "7:01:23", "remaining_time": "2:38:58"} +{"current_steps": 661, "total_steps": 909, "loss": 0.461814284324646, "lr": 8.4691220980046e-06, "epoch": 2.1815181518151814, "percentage": 72.72, "elapsed_time": "7:01:52", "remaining_time": "2:38:17"} +{"current_steps": 358, "total_steps": 1302, "loss": 0.43928390741348267, "lr": 3.643501990120192e-05, "epoch": 0.825836216839677, "percentage": 27.5, "elapsed_time": "20:41:47", "remaining_time": "2 days, 6:34:27"} +{"current_steps": 662, "total_steps": 909, "loss": 0.49913299083709717, "lr": 8.406447167982205e-06, "epoch": 2.184818481848185, "percentage": 72.83, "elapsed_time": "7:02:43", "remaining_time": "2:37:43"} +{"current_steps": 663, "total_steps": 909, "loss": 0.4936009645462036, "lr": 8.343943243094008e-06, "epoch": 2.1881188118811883, "percentage": 72.94, "elapsed_time": "7:03:20", "remaining_time": "2:37:04"} +{"current_steps": 664, "total_steps": 909, "loss": 0.44817712903022766, "lr": 8.281611245274123e-06, "epoch": 2.1914191419141913, "percentage": 73.05, "elapsed_time": "7:03:55", "remaining_time": "2:36:25"} +{"current_steps": 665, "total_steps": 909, "loss": 0.482817143201828, "lr": 8.219452093920763e-06, "epoch": 2.1947194719471947, "percentage": 73.16, "elapsed_time": "7:04:30", "remaining_time": "2:35:45"} +{"current_steps": 666, "total_steps": 909, "loss": 0.4643383026123047, "lr": 8.157466705882645e-06, "epoch": 2.198019801980198, "percentage": 73.27, "elapsed_time": "7:05:07", "remaining_time": "2:35:06"} +{"current_steps": 359, "total_steps": 1302, "loss": 0.3848022222518921, "lr": 3.6404384765872645e-05, "epoch": 0.8281430219146482, "percentage": 27.57, "elapsed_time": "20:45:23", "remaining_time": "2 days, 6:31:19"} +{"current_steps": 667, "total_steps": 909, "loss": 0.4797602593898773, "lr": 8.095655995445472e-06, "epoch": 2.201320132013201, "percentage": 73.38, "elapsed_time": "7:05:59", "remaining_time": "2:34:33"} +{"current_steps": 668, "total_steps": 909, "loss": 0.44109994173049927, "lr": 8.03402087431844e-06, "epoch": 2.2046204620462047, "percentage": 73.49, "elapsed_time": "7:06:48", "remaining_time": "2:33:59"} +{"current_steps": 669, "total_steps": 909, "loss": 0.46359869837760925, "lr": 7.972562251620817e-06, "epoch": 2.207920792079208, "percentage": 73.6, "elapsed_time": "7:07:32", "remaining_time": "2:33:22"} +{"current_steps": 670, "total_steps": 909, "loss": 0.4576035141944885, "lr": 7.9112810338685e-06, "epoch": 2.211221122112211, "percentage": 73.71, "elapsed_time": "7:08:06", "remaining_time": "2:32:42"} +{"current_steps": 671, "total_steps": 909, "loss": 0.40902045369148254, "lr": 7.850178124960678e-06, "epoch": 2.2145214521452146, "percentage": 73.82, "elapsed_time": "7:08:42", "remaining_time": "2:32:03"} +{"current_steps": 672, "total_steps": 909, "loss": 0.45797932147979736, "lr": 7.789254426166454e-06, "epoch": 2.217821782178218, "percentage": 73.93, "elapsed_time": "7:09:25", "remaining_time": "2:31:26"} +{"current_steps": 360, "total_steps": 1302, "loss": 0.4336538314819336, "lr": 3.637363155889078e-05, "epoch": 0.8304498269896193, "percentage": 27.65, "elapsed_time": "20:48:50", "remaining_time": "2 days, 6:27:48"} +{"current_steps": 673, "total_steps": 909, "loss": 0.43204474449157715, "lr": 7.728510836111602e-06, "epoch": 2.221122112211221, "percentage": 74.04, "elapsed_time": "7:09:52", "remaining_time": "2:30:44"} +{"current_steps": 674, "total_steps": 909, "loss": 0.46007901430130005, "lr": 7.667948250765278e-06, "epoch": 2.2244224422442245, "percentage": 74.15, "elapsed_time": "7:10:24", "remaining_time": "2:30:04"} +{"current_steps": 675, "total_steps": 909, "loss": 0.46342402696609497, "lr": 7.607567563426823e-06, "epoch": 2.227722772277228, "percentage": 74.26, "elapsed_time": "7:11:05", "remaining_time": "2:29:26"} +{"current_steps": 676, "total_steps": 909, "loss": 0.48953354358673096, "lr": 7.5473696647125605e-06, "epoch": 2.231023102310231, "percentage": 74.37, "elapsed_time": "7:11:44", "remaining_time": "2:28:48"} +{"current_steps": 677, "total_steps": 909, "loss": 0.5022163391113281, "lr": 7.487355442542696e-06, "epoch": 2.2343234323432344, "percentage": 74.48, "elapsed_time": "7:12:16", "remaining_time": "2:28:08"} +{"current_steps": 678, "total_steps": 909, "loss": 0.5144001245498657, "lr": 7.4275257821281995e-06, "epoch": 2.237623762376238, "percentage": 74.59, "elapsed_time": "7:12:43", "remaining_time": "2:27:26"} +{"current_steps": 361, "total_steps": 1302, "loss": 0.41845884919166565, "lr": 3.634276050160459e-05, "epoch": 0.8327566320645905, "percentage": 27.73, "elapsed_time": "20:52:16", "remaining_time": "2 days, 6:24:13"} +{"current_steps": 679, "total_steps": 909, "loss": 0.489937961101532, "lr": 7.3678815659577505e-06, "epoch": 2.240924092409241, "percentage": 74.7, "elapsed_time": "7:13:17", "remaining_time": "2:26:46"} +{"current_steps": 680, "total_steps": 909, "loss": 0.48842746019363403, "lr": 7.3084236737847125e-06, "epoch": 2.2442244224422443, "percentage": 74.81, "elapsed_time": "7:13:48", "remaining_time": "2:26:05"} +{"current_steps": 681, "total_steps": 909, "loss": 0.5024458765983582, "lr": 7.249152982614176e-06, "epoch": 2.2475247524752477, "percentage": 74.92, "elapsed_time": "7:14:25", "remaining_time": "2:25:26"} +{"current_steps": 682, "total_steps": 909, "loss": 0.46162086725234985, "lr": 7.190070366690014e-06, "epoch": 2.2508250825082508, "percentage": 75.03, "elapsed_time": "7:14:58", "remaining_time": "2:24:46"} +{"current_steps": 683, "total_steps": 909, "loss": 0.44991785287857056, "lr": 7.13117669748199e-06, "epoch": 2.254125412541254, "percentage": 75.14, "elapsed_time": "7:15:29", "remaining_time": "2:24:06"} +{"current_steps": 684, "total_steps": 909, "loss": 0.43738633394241333, "lr": 7.072472843672877e-06, "epoch": 2.2574257425742577, "percentage": 75.25, "elapsed_time": "7:16:06", "remaining_time": "2:23:27"} +{"current_steps": 362, "total_steps": 1302, "loss": 0.40771448612213135, "lr": 3.6311771816210544e-05, "epoch": 0.8350634371395617, "percentage": 27.8, "elapsed_time": "20:55:45", "remaining_time": "2 days, 6:20:48"} +{"current_steps": 685, "total_steps": 909, "loss": 0.46122169494628906, "lr": 7.013959671145691e-06, "epoch": 2.2607260726072607, "percentage": 75.36, "elapsed_time": "7:16:44", "remaining_time": "2:22:48"} +{"current_steps": 686, "total_steps": 909, "loss": 0.4504377841949463, "lr": 6.955638042970896e-06, "epoch": 2.264026402640264, "percentage": 75.47, "elapsed_time": "7:17:34", "remaining_time": "2:22:14"} +{"current_steps": 687, "total_steps": 909, "loss": 0.4620972275733948, "lr": 6.897508819393645e-06, "epoch": 2.2673267326732676, "percentage": 75.58, "elapsed_time": "7:18:07", "remaining_time": "2:21:34"} +{"current_steps": 688, "total_steps": 909, "loss": 0.5271490216255188, "lr": 6.8395728578211525e-06, "epoch": 2.2706270627062706, "percentage": 75.69, "elapsed_time": "7:18:39", "remaining_time": "2:20:54"} +{"current_steps": 689, "total_steps": 909, "loss": 0.4448450803756714, "lr": 6.781831012810001e-06, "epoch": 2.273927392739274, "percentage": 75.8, "elapsed_time": "7:19:12", "remaining_time": "2:20:14"} +{"current_steps": 363, "total_steps": 1302, "loss": 0.41530346870422363, "lr": 3.628066572575179e-05, "epoch": 0.8373702422145328, "percentage": 27.88, "elapsed_time": "20:59:23", "remaining_time": "2 days, 6:17:45"} +{"current_steps": 690, "total_steps": 909, "loss": 0.4602925181388855, "lr": 6.72428413605354e-06, "epoch": 2.2772277227722775, "percentage": 75.91, "elapsed_time": "7:20:10", "remaining_time": "2:19:42"} +{"current_steps": 691, "total_steps": 909, "loss": 0.4722862243652344, "lr": 6.6669330763693485e-06, "epoch": 2.2805280528052805, "percentage": 76.02, "elapsed_time": "7:20:53", "remaining_time": "2:19:05"} +{"current_steps": 692, "total_steps": 909, "loss": 0.47454553842544556, "lr": 6.609778679686694e-06, "epoch": 2.283828382838284, "percentage": 76.13, "elapsed_time": "7:21:24", "remaining_time": "2:18:25"} +{"current_steps": 693, "total_steps": 909, "loss": 0.4750802516937256, "lr": 6.552821789034067e-06, "epoch": 2.287128712871287, "percentage": 76.24, "elapsed_time": "7:22:07", "remaining_time": "2:17:48"} +{"current_steps": 694, "total_steps": 909, "loss": 0.4640570282936096, "lr": 6.496063244526723e-06, "epoch": 2.2904290429042904, "percentage": 76.35, "elapsed_time": "7:22:49", "remaining_time": "2:17:11"} +{"current_steps": 695, "total_steps": 909, "loss": 0.47181540727615356, "lr": 6.439503883354323e-06, "epoch": 2.293729372937294, "percentage": 76.46, "elapsed_time": "7:23:33", "remaining_time": "2:16:34"} +{"current_steps": 364, "total_steps": 1302, "loss": 0.39871683716773987, "lr": 3.6249442454116474e-05, "epoch": 0.839677047289504, "percentage": 27.96, "elapsed_time": "21:02:58", "remaining_time": "2 days, 6:14:35"} +{"current_steps": 696, "total_steps": 909, "loss": 0.4335097372531891, "lr": 6.3831445397685755e-06, "epoch": 2.297029702970297, "percentage": 76.57, "elapsed_time": "7:24:09", "remaining_time": "2:15:55"} +{"current_steps": 697, "total_steps": 909, "loss": 0.5158364772796631, "lr": 6.3269860450709016e-06, "epoch": 2.3003300330033003, "percentage": 76.68, "elapsed_time": "7:24:53", "remaining_time": "2:15:19"} +{"current_steps": 698, "total_steps": 909, "loss": 0.497075617313385, "lr": 6.271029227600216e-06, "epoch": 2.3036303630363038, "percentage": 76.79, "elapsed_time": "7:25:20", "remaining_time": "2:14:37"} +{"current_steps": 699, "total_steps": 909, "loss": 0.4946526288986206, "lr": 6.215274912720697e-06, "epoch": 2.3069306930693068, "percentage": 76.9, "elapsed_time": "7:25:49", "remaining_time": "2:13:56"} +{"current_steps": 700, "total_steps": 909, "loss": 0.4632418155670166, "lr": 6.159723922809577e-06, "epoch": 2.31023102310231, "percentage": 77.01, "elapsed_time": "7:26:31", "remaining_time": "2:13:19"} +{"current_steps": 701, "total_steps": 909, "loss": 0.4936927258968353, "lr": 6.10437707724507e-06, "epoch": 2.3135313531353137, "percentage": 77.12, "elapsed_time": "7:26:57", "remaining_time": "2:12:37"} +{"current_steps": 365, "total_steps": 1302, "loss": 0.4190472960472107, "lr": 3.621810222603619e-05, "epoch": 0.8419838523644751, "percentage": 28.03, "elapsed_time": "21:06:24", "remaining_time": "2 days, 6:11:02"} +{"current_steps": 702, "total_steps": 909, "loss": 0.4373137056827545, "lr": 6.049235192394242e-06, "epoch": 2.3168316831683167, "percentage": 77.23, "elapsed_time": "7:27:24", "remaining_time": "2:11:55"} +{"current_steps": 703, "total_steps": 909, "loss": 0.49224400520324707, "lr": 5.994299081600996e-06, "epoch": 2.32013201320132, "percentage": 77.34, "elapsed_time": "7:27:56", "remaining_time": "2:11:15"} +{"current_steps": 704, "total_steps": 909, "loss": 0.453000545501709, "lr": 5.939569555174045e-06, "epoch": 2.3234323432343236, "percentage": 77.45, "elapsed_time": "7:28:33", "remaining_time": "2:10:37"} +{"current_steps": 705, "total_steps": 909, "loss": 0.4201410114765167, "lr": 5.885047420374992e-06, "epoch": 2.3267326732673266, "percentage": 77.56, "elapsed_time": "7:29:11", "remaining_time": "2:09:58"} +{"current_steps": 706, "total_steps": 909, "loss": 0.4817071557044983, "lr": 5.830733481406415e-06, "epoch": 2.33003300330033, "percentage": 77.67, "elapsed_time": "7:29:46", "remaining_time": "2:09:19"} +{"current_steps": 707, "total_steps": 909, "loss": 0.42609190940856934, "lr": 5.776628539399975e-06, "epoch": 2.3333333333333335, "percentage": 77.78, "elapsed_time": "7:30:27", "remaining_time": "2:08:42"} +{"current_steps": 366, "total_steps": 1302, "loss": 0.43681979179382324, "lr": 3.61866452670843e-05, "epoch": 0.8442906574394463, "percentage": 28.11, "elapsed_time": "21:09:59", "remaining_time": "2 days, 6:07:49"} +{"current_steps": 708, "total_steps": 909, "loss": 0.46225881576538086, "lr": 5.722733392404652e-06, "epoch": 2.3366336633663365, "percentage": 77.89, "elapsed_time": "7:31:06", "remaining_time": "2:08:04"} +{"current_steps": 709, "total_steps": 909, "loss": 0.49061962962150574, "lr": 5.669048835374933e-06, "epoch": 2.33993399339934, "percentage": 78.0, "elapsed_time": "7:31:36", "remaining_time": "2:07:23"} +{"current_steps": 710, "total_steps": 909, "loss": 0.4506024122238159, "lr": 5.615575660159089e-06, "epoch": 2.3432343234323434, "percentage": 78.11, "elapsed_time": "7:32:14", "remaining_time": "2:06:45"} +{"current_steps": 711, "total_steps": 909, "loss": 0.4433022141456604, "lr": 5.562314655487522e-06, "epoch": 2.3465346534653464, "percentage": 78.22, "elapsed_time": "7:32:56", "remaining_time": "2:06:08"} +{"current_steps": 712, "total_steps": 909, "loss": 0.45988917350769043, "lr": 5.5092666069611055e-06, "epoch": 2.34983498349835, "percentage": 78.33, "elapsed_time": "7:33:32", "remaining_time": "2:05:29"} +{"current_steps": 367, "total_steps": 1302, "loss": 0.3929348289966583, "lr": 3.615507180367437e-05, "epoch": 0.8465974625144176, "percentage": 28.19, "elapsed_time": "21:13:19", "remaining_time": "2 days, 6:04:02"} +{"current_steps": 713, "total_steps": 909, "loss": 0.44675180315971375, "lr": 5.4564322970396154e-06, "epoch": 2.3531353135313533, "percentage": 78.44, "elapsed_time": "7:34:11", "remaining_time": "2:04:51"} +{"current_steps": 714, "total_steps": 909, "loss": 0.46991807222366333, "lr": 5.403812505030157e-06, "epoch": 2.3564356435643563, "percentage": 78.55, "elapsed_time": "7:34:44", "remaining_time": "2:04:11"} +{"current_steps": 715, "total_steps": 909, "loss": 0.49208664894104004, "lr": 5.351408007075714e-06, "epoch": 2.3597359735973598, "percentage": 78.66, "elapsed_time": "7:35:35", "remaining_time": "2:03:36"} +{"current_steps": 716, "total_steps": 909, "loss": 0.48280128836631775, "lr": 5.299219576143673e-06, "epoch": 2.363036303630363, "percentage": 78.77, "elapsed_time": "7:36:06", "remaining_time": "2:02:56"} +{"current_steps": 717, "total_steps": 909, "loss": 0.4491961896419525, "lr": 5.247247982014414e-06, "epoch": 2.366336633663366, "percentage": 78.88, "elapsed_time": "7:36:41", "remaining_time": "2:02:17"} +{"current_steps": 368, "total_steps": 1302, "loss": 0.41809314489364624, "lr": 3.612338206305851e-05, "epoch": 0.8489042675893888, "percentage": 28.26, "elapsed_time": "21:16:50", "remaining_time": "2 days, 6:00:41"} +{"current_steps": 718, "total_steps": 909, "loss": 0.4943190813064575, "lr": 5.195493991269991e-06, "epoch": 2.3696369636963697, "percentage": 78.99, "elapsed_time": "7:37:28", "remaining_time": "2:01:41"} +{"current_steps": 719, "total_steps": 909, "loss": 0.4586840867996216, "lr": 5.143958367282795e-06, "epoch": 2.372937293729373, "percentage": 79.1, "elapsed_time": "7:38:00", "remaining_time": "2:01:01"} +{"current_steps": 720, "total_steps": 909, "loss": 0.46227943897247314, "lr": 5.0926418702042914e-06, "epoch": 2.376237623762376, "percentage": 79.21, "elapsed_time": "7:38:39", "remaining_time": "2:00:23"} +{"current_steps": 721, "total_steps": 909, "loss": 0.45386868715286255, "lr": 5.041545256953839e-06, "epoch": 2.3795379537953796, "percentage": 79.32, "elapsed_time": "7:39:22", "remaining_time": "1:59:46"} +{"current_steps": 722, "total_steps": 909, "loss": 0.5026980042457581, "lr": 4.990669281207492e-06, "epoch": 2.382838283828383, "percentage": 79.43, "elapsed_time": "7:39:58", "remaining_time": "1:59:08"} +{"current_steps": 723, "total_steps": 909, "loss": 0.4834757447242737, "lr": 4.940014693386909e-06, "epoch": 2.386138613861386, "percentage": 79.54, "elapsed_time": "7:40:38", "remaining_time": "1:58:30"} +{"current_steps": 369, "total_steps": 1302, "loss": 0.4286714792251587, "lr": 3.609157627332574e-05, "epoch": 0.8512110726643599, "percentage": 28.34, "elapsed_time": "21:20:08", "remaining_time": "2 days, 5:56:47"} +{"current_steps": 724, "total_steps": 909, "loss": 0.44382545351982117, "lr": 4.889582240648254e-06, "epoch": 2.3894389438943895, "percentage": 79.65, "elapsed_time": "7:41:14", "remaining_time": "1:57:51"} +{"current_steps": 725, "total_steps": 909, "loss": 0.45313894748687744, "lr": 4.839372666871212e-06, "epoch": 2.3927392739273925, "percentage": 79.76, "elapsed_time": "7:42:06", "remaining_time": "1:57:16"} +{"current_steps": 726, "total_steps": 909, "loss": 0.4597586393356323, "lr": 4.789386712647994e-06, "epoch": 2.396039603960396, "percentage": 79.87, "elapsed_time": "7:42:44", "remaining_time": "1:56:38"} +{"current_steps": 727, "total_steps": 909, "loss": 0.4427994191646576, "lr": 4.739625115272408e-06, "epoch": 2.3993399339933994, "percentage": 79.98, "elapsed_time": "7:43:16", "remaining_time": "1:55:58"} +{"current_steps": 728, "total_steps": 909, "loss": 0.4459637403488159, "lr": 4.690088608729007e-06, "epoch": 2.4026402640264024, "percentage": 80.09, "elapsed_time": "7:43:55", "remaining_time": "1:55:20"} +{"current_steps": 370, "total_steps": 1302, "loss": 0.40744441747665405, "lr": 3.605965466340035e-05, "epoch": 0.8535178777393311, "percentage": 28.42, "elapsed_time": "21:23:29", "remaining_time": "2 days, 5:53:00"} +{"current_steps": 729, "total_steps": 909, "loss": 0.5043150186538696, "lr": 4.640777923682247e-06, "epoch": 2.405940594059406, "percentage": 80.2, "elapsed_time": "7:44:33", "remaining_time": "1:54:42"} +{"current_steps": 730, "total_steps": 909, "loss": 0.4942860007286072, "lr": 4.5916937874657055e-06, "epoch": 2.4092409240924093, "percentage": 80.31, "elapsed_time": "7:45:06", "remaining_time": "1:54:02"} +{"current_steps": 731, "total_steps": 909, "loss": 0.4572402834892273, "lr": 4.5428369240713655e-06, "epoch": 2.4125412541254123, "percentage": 80.42, "elapsed_time": "7:45:40", "remaining_time": "1:53:23"} +{"current_steps": 732, "total_steps": 909, "loss": 0.44927412271499634, "lr": 4.494208054138934e-06, "epoch": 2.4158415841584158, "percentage": 80.53, "elapsed_time": "7:46:26", "remaining_time": "1:52:47"} +{"current_steps": 733, "total_steps": 909, "loss": 0.461928129196167, "lr": 4.445807894945211e-06, "epoch": 2.419141914191419, "percentage": 80.64, "elapsed_time": "7:47:11", "remaining_time": "1:52:10"} +{"current_steps": 371, "total_steps": 1302, "loss": 0.38964733481407166, "lr": 3.602761746304025e-05, "epoch": 0.8558246828143022, "percentage": 28.49, "elapsed_time": "21:27:03", "remaining_time": "2 days, 5:49:48"} +{"current_steps": 734, "total_steps": 909, "loss": 0.46279191970825195, "lr": 4.397637160393493e-06, "epoch": 2.4224422442244222, "percentage": 80.75, "elapsed_time": "7:47:51", "remaining_time": "1:51:32"} +{"current_steps": 735, "total_steps": 909, "loss": 0.48653045296669006, "lr": 4.349696561003076e-06, "epoch": 2.4257425742574257, "percentage": 80.86, "elapsed_time": "7:48:28", "remaining_time": "1:50:54"} +{"current_steps": 736, "total_steps": 909, "loss": 0.4587661027908325, "lr": 4.301986803898752e-06, "epoch": 2.429042904290429, "percentage": 80.97, "elapsed_time": "7:48:59", "remaining_time": "1:50:14"} +{"current_steps": 737, "total_steps": 909, "loss": 0.4946083426475525, "lr": 4.2545085928003906e-06, "epoch": 2.432343234323432, "percentage": 81.08, "elapsed_time": "7:49:58", "remaining_time": "1:49:41"} +{"current_steps": 738, "total_steps": 909, "loss": 0.4614926278591156, "lr": 4.207262628012534e-06, "epoch": 2.4356435643564356, "percentage": 81.19, "elapsed_time": "7:50:43", "remaining_time": "1:49:04"} +{"current_steps": 372, "total_steps": 1302, "loss": 0.3991929292678833, "lr": 3.5995464902835335e-05, "epoch": 0.8581314878892734, "percentage": 28.57, "elapsed_time": "21:30:38", "remaining_time": "2 days, 5:46:36"} +{"current_steps": 739, "total_steps": 909, "loss": 0.46377992630004883, "lr": 4.160249606414109e-06, "epoch": 2.438943894389439, "percentage": 81.3, "elapsed_time": "7:51:14", "remaining_time": "1:48:24"} +{"current_steps": 740, "total_steps": 909, "loss": 0.4217844009399414, "lr": 4.1134702214481126e-06, "epoch": 2.442244224422442, "percentage": 81.41, "elapsed_time": "7:51:44", "remaining_time": "1:47:44"} +{"current_steps": 741, "total_steps": 909, "loss": 0.4616321325302124, "lr": 4.066925163111406e-06, "epoch": 2.4455445544554455, "percentage": 81.52, "elapsed_time": "7:52:14", "remaining_time": "1:47:04"} +{"current_steps": 742, "total_steps": 909, "loss": 0.48755043745040894, "lr": 4.020615117944515e-06, "epoch": 2.448844884488449, "percentage": 81.63, "elapsed_time": "7:52:58", "remaining_time": "1:46:27"} +{"current_steps": 743, "total_steps": 909, "loss": 0.47338151931762695, "lr": 3.974540769021529e-06, "epoch": 2.452145214521452, "percentage": 81.74, "elapsed_time": "7:53:28", "remaining_time": "1:45:46"} +{"current_steps": 744, "total_steps": 909, "loss": 0.47220849990844727, "lr": 3.928702795940007e-06, "epoch": 2.4554455445544554, "percentage": 81.85, "elapsed_time": "7:54:07", "remaining_time": "1:45:08"} +{"current_steps": 373, "total_steps": 1302, "loss": 0.40249741077423096, "lr": 3.596319721420579e-05, "epoch": 0.8604382929642446, "percentage": 28.65, "elapsed_time": "21:34:11", "remaining_time": "2 days, 5:43:20"} +{"current_steps": 745, "total_steps": 909, "loss": 0.4117845296859741, "lr": 3.883101874810966e-06, "epoch": 2.458745874587459, "percentage": 81.96, "elapsed_time": "7:54:53", "remaining_time": "1:44:32"} +{"current_steps": 746, "total_steps": 909, "loss": 0.44338276982307434, "lr": 3.8377386782488875e-06, "epoch": 2.462046204620462, "percentage": 82.07, "elapsed_time": "7:55:36", "remaining_time": "1:43:55"} +{"current_steps": 747, "total_steps": 909, "loss": 0.470272958278656, "lr": 3.7926138753618257e-06, "epoch": 2.4653465346534653, "percentage": 82.18, "elapsed_time": "7:56:19", "remaining_time": "1:43:17"} +{"current_steps": 748, "total_steps": 909, "loss": 0.4825139045715332, "lr": 3.747728131741517e-06, "epoch": 2.4686468646864688, "percentage": 82.29, "elapsed_time": "7:56:56", "remaining_time": "1:42:39"} +{"current_steps": 749, "total_steps": 909, "loss": 0.43612140417099, "lr": 3.703082109453575e-06, "epoch": 2.4719471947194718, "percentage": 82.4, "elapsed_time": "7:57:31", "remaining_time": "1:42:00"} +{"current_steps": 750, "total_steps": 909, "loss": 0.4573146402835846, "lr": 3.6586764670277065e-06, "epoch": 2.4752475247524752, "percentage": 82.51, "elapsed_time": "7:58:09", "remaining_time": "1:41:22"} +{"current_steps": 374, "total_steps": 1302, "loss": 0.40938979387283325, "lr": 3.593081462940045e-05, "epoch": 0.8627450980392157, "percentage": 28.73, "elapsed_time": "21:37:42", "remaining_time": "2 days, 5:39:57"} +{"current_steps": 751, "total_steps": 909, "loss": 0.4419093430042267, "lr": 3.61451185944802e-06, "epoch": 2.4785478547854787, "percentage": 82.62, "elapsed_time": "7:58:50", "remaining_time": "1:40:44"} +{"current_steps": 752, "total_steps": 909, "loss": 0.440906822681427, "lr": 3.570588938143353e-06, "epoch": 2.4818481848184817, "percentage": 82.73, "elapsed_time": "7:59:28", "remaining_time": "1:40:06"} +{"current_steps": 753, "total_steps": 909, "loss": 0.432383269071579, "lr": 3.5269083509776735e-06, "epoch": 2.485148514851485, "percentage": 82.84, "elapsed_time": "8:00:05", "remaining_time": "1:39:27"} +{"current_steps": 754, "total_steps": 909, "loss": 0.4615401029586792, "lr": 3.4834707422404957e-06, "epoch": 2.4884488448844886, "percentage": 82.95, "elapsed_time": "8:00:41", "remaining_time": "1:38:49"} +{"current_steps": 755, "total_steps": 909, "loss": 0.43933019042015076, "lr": 3.440276752637417e-06, "epoch": 2.4917491749174916, "percentage": 83.06, "elapsed_time": "8:01:16", "remaining_time": "1:38:10"} +{"current_steps": 375, "total_steps": 1302, "loss": 0.4158065915107727, "lr": 3.589831738149514e-05, "epoch": 0.8650519031141869, "percentage": 28.8, "elapsed_time": "21:41:20", "remaining_time": "2 days, 5:36:55"} +{"current_steps": 756, "total_steps": 909, "loss": 0.4651945233345032, "lr": 3.3973270192806427e-06, "epoch": 2.495049504950495, "percentage": 83.17, "elapsed_time": "8:01:56", "remaining_time": "1:37:32"} +{"current_steps": 757, "total_steps": 909, "loss": 0.4423069953918457, "lr": 3.3546221756795874e-06, "epoch": 2.4983498349834985, "percentage": 83.28, "elapsed_time": "8:02:50", "remaining_time": "1:36:57"} +{"current_steps": 758, "total_steps": 909, "loss": 0.4905679225921631, "lr": 3.3121628517315373e-06, "epoch": 2.5016501650165015, "percentage": 83.39, "elapsed_time": "8:03:30", "remaining_time": "1:36:19"} +{"current_steps": 759, "total_steps": 909, "loss": 0.46989548206329346, "lr": 3.2699496737123758e-06, "epoch": 2.504950495049505, "percentage": 83.5, "elapsed_time": "8:04:10", "remaining_time": "1:35:41"} +{"current_steps": 760, "total_steps": 909, "loss": 0.5168344378471375, "lr": 3.2279832642673025e-06, "epoch": 2.5082508250825084, "percentage": 83.61, "elapsed_time": "8:05:03", "remaining_time": "1:35:05"} +{"current_steps": 376, "total_steps": 1302, "loss": 0.42090845108032227, "lr": 3.586570570439097e-05, "epoch": 0.8673587081891581, "percentage": 28.88, "elapsed_time": "21:44:52", "remaining_time": "2 days, 5:33:35"} +{"current_steps": 761, "total_steps": 909, "loss": 0.46055924892425537, "lr": 3.186264242401693e-06, "epoch": 2.5115511551155114, "percentage": 83.72, "elapsed_time": "8:05:40", "remaining_time": "1:34:27"} +{"current_steps": 762, "total_steps": 909, "loss": 0.5135318040847778, "lr": 3.144793223471949e-06, "epoch": 2.514851485148515, "percentage": 83.83, "elapsed_time": "8:06:07", "remaining_time": "1:33:46"} +{"current_steps": 763, "total_steps": 909, "loss": 0.5026534199714661, "lr": 3.1035708191764246e-06, "epoch": 2.5181518151815183, "percentage": 83.94, "elapsed_time": "8:06:47", "remaining_time": "1:33:08"} +{"current_steps": 764, "total_steps": 909, "loss": 0.43348389863967896, "lr": 3.0625976375463938e-06, "epoch": 2.5214521452145213, "percentage": 84.05, "elapsed_time": "8:07:18", "remaining_time": "1:32:29"} +{"current_steps": 765, "total_steps": 909, "loss": 0.4620594382286072, "lr": 3.021874282937103e-06, "epoch": 2.5247524752475248, "percentage": 84.16, "elapsed_time": "8:08:09", "remaining_time": "1:31:53"} +{"current_steps": 766, "total_steps": 909, "loss": 0.4646865725517273, "lr": 2.9814013560188425e-06, "epoch": 2.5280528052805282, "percentage": 84.27, "elapsed_time": "8:08:41", "remaining_time": "1:31:13"} +{"current_steps": 377, "total_steps": 1302, "loss": 0.39470359683036804, "lr": 3.583297983281265e-05, "epoch": 0.8696655132641292, "percentage": 28.96, "elapsed_time": "21:48:18", "remaining_time": "2 days, 5:30:03"} +{"current_steps": 767, "total_steps": 909, "loss": 0.46846333146095276, "lr": 2.9411794537680795e-06, "epoch": 2.5313531353135312, "percentage": 84.38, "elapsed_time": "8:09:25", "remaining_time": "1:30:36"} +{"current_steps": 768, "total_steps": 909, "loss": 0.487953782081604, "lr": 2.901209169458672e-06, "epoch": 2.5346534653465347, "percentage": 84.49, "elapsed_time": "8:09:51", "remaining_time": "1:29:56"} +{"current_steps": 769, "total_steps": 909, "loss": 0.4543481469154358, "lr": 2.861491092653115e-06, "epoch": 2.537953795379538, "percentage": 84.6, "elapsed_time": "8:10:39", "remaining_time": "1:29:19"} +{"current_steps": 770, "total_steps": 909, "loss": 0.4961584806442261, "lr": 2.822025809193818e-06, "epoch": 2.541254125412541, "percentage": 84.71, "elapsed_time": "8:11:24", "remaining_time": "1:28:42"} +{"current_steps": 378, "total_steps": 1302, "loss": 0.4191187620162964, "lr": 3.580014000230682e-05, "epoch": 0.8719723183391004, "percentage": 29.03, "elapsed_time": "21:51:31", "remaining_time": "2 days, 5:25:56"} +{"current_steps": 771, "total_steps": 909, "loss": 0.44123750925064087, "lr": 2.7828139011944967e-06, "epoch": 2.5445544554455446, "percentage": 84.82, "elapsed_time": "8:12:09", "remaining_time": "1:28:05"} +{"current_steps": 772, "total_steps": 909, "loss": 0.43014320731163025, "lr": 2.743855947031575e-06, "epoch": 2.547854785478548, "percentage": 84.93, "elapsed_time": "8:12:44", "remaining_time": "1:27:26"} +{"current_steps": 773, "total_steps": 909, "loss": 0.4774499535560608, "lr": 2.7051525213356546e-06, "epoch": 2.551155115511551, "percentage": 85.04, "elapsed_time": "8:13:21", "remaining_time": "1:26:47"} +{"current_steps": 774, "total_steps": 909, "loss": 0.44963133335113525, "lr": 2.6667041949830186e-06, "epoch": 2.5544554455445545, "percentage": 85.15, "elapsed_time": "8:14:05", "remaining_time": "1:26:10"} +{"current_steps": 775, "total_steps": 909, "loss": 0.4840245842933655, "lr": 2.6285115350872524e-06, "epoch": 2.557755775577558, "percentage": 85.26, "elapsed_time": "8:14:34", "remaining_time": "1:25:30"} +{"current_steps": 776, "total_steps": 909, "loss": 0.5490096807479858, "lr": 2.5905751049908466e-06, "epoch": 2.561056105610561, "percentage": 85.37, "elapsed_time": "8:15:01", "remaining_time": "1:24:50"} +{"current_steps": 777, "total_steps": 909, "loss": 0.4965711832046509, "lr": 2.5528954642568947e-06, "epoch": 2.5643564356435644, "percentage": 85.48, "elapsed_time": "8:15:38", "remaining_time": "1:24:12"} +{"current_steps": 379, "total_steps": 1302, "loss": 0.419559508562088, "lr": 3.5767186449240356e-05, "epoch": 0.8742791234140715, "percentage": 29.11, "elapsed_time": "21:55:06", "remaining_time": "2 days, 5:22:46"} +{"current_steps": 778, "total_steps": 909, "loss": 0.4518459439277649, "lr": 2.5154731686608424e-06, "epoch": 2.567656765676568, "percentage": 85.59, "elapsed_time": "8:16:34", "remaining_time": "1:23:36"} +{"current_steps": 779, "total_steps": 909, "loss": 0.5022287964820862, "lr": 2.4783087701823026e-06, "epoch": 2.570957095709571, "percentage": 85.7, "elapsed_time": "8:17:19", "remaining_time": "1:22:59"} +{"current_steps": 780, "total_steps": 909, "loss": 0.47195330262184143, "lr": 2.441402816996876e-06, "epoch": 2.5742574257425743, "percentage": 85.81, "elapsed_time": "8:17:53", "remaining_time": "1:22:20"} +{"current_steps": 781, "total_steps": 909, "loss": 0.5155715346336365, "lr": 2.4047558534681124e-06, "epoch": 2.5775577557755778, "percentage": 85.92, "elapsed_time": "8:18:36", "remaining_time": "1:21:43"} +{"current_steps": 782, "total_steps": 909, "loss": 0.46963661909103394, "lr": 2.3683684201394507e-06, "epoch": 2.580858085808581, "percentage": 86.03, "elapsed_time": "8:19:04", "remaining_time": "1:21:03"} +{"current_steps": 380, "total_steps": 1302, "loss": 0.4139449894428253, "lr": 3.5734119410798646e-05, "epoch": 0.8765859284890427, "percentage": 29.19, "elapsed_time": "21:58:39", "remaining_time": "2 days, 5:19:28"} +{"current_steps": 783, "total_steps": 909, "loss": 0.4279938340187073, "lr": 2.3322410537262495e-06, "epoch": 2.5841584158415842, "percentage": 86.14, "elapsed_time": "8:19:40", "remaining_time": "1:20:24"} +{"current_steps": 784, "total_steps": 909, "loss": 0.47818487882614136, "lr": 2.296374287107883e-06, "epoch": 2.5874587458745877, "percentage": 86.25, "elapsed_time": "8:20:17", "remaining_time": "1:19:45"} +{"current_steps": 785, "total_steps": 909, "loss": 0.4445609152317047, "lr": 2.260768649319869e-06, "epoch": 2.5907590759075907, "percentage": 86.36, "elapsed_time": "8:20:56", "remaining_time": "1:19:07"} +{"current_steps": 786, "total_steps": 909, "loss": 0.4838835895061493, "lr": 2.2254246655460765e-06, "epoch": 2.594059405940594, "percentage": 86.47, "elapsed_time": "8:21:29", "remaining_time": "1:18:28"} +{"current_steps": 787, "total_steps": 909, "loss": 0.4454101324081421, "lr": 2.1903428571109566e-06, "epoch": 2.5973597359735976, "percentage": 86.58, "elapsed_time": "8:22:03", "remaining_time": "1:17:49"} +{"current_steps": 381, "total_steps": 1302, "loss": 0.4354771673679352, "lr": 3.570093912498391e-05, "epoch": 0.8788927335640139, "percentage": 29.26, "elapsed_time": "22:01:59", "remaining_time": "2 days, 5:15:40"} +{"current_steps": 788, "total_steps": 909, "loss": 0.46468472480773926, "lr": 2.1555237414718854e-06, "epoch": 2.6006600660066006, "percentage": 86.69, "elapsed_time": "8:22:43", "remaining_time": "1:17:11"} +{"current_steps": 789, "total_steps": 909, "loss": 0.508684515953064, "lr": 2.1209678322115133e-06, "epoch": 2.603960396039604, "percentage": 86.8, "elapsed_time": "8:23:13", "remaining_time": "1:16:32"} +{"current_steps": 790, "total_steps": 909, "loss": 0.46998751163482666, "lr": 2.0866756390301778e-06, "epoch": 2.6072607260726075, "percentage": 86.91, "elapsed_time": "8:23:45", "remaining_time": "1:15:52"} +{"current_steps": 791, "total_steps": 909, "loss": 0.41589513421058655, "lr": 2.0526476677384123e-06, "epoch": 2.6105610561056105, "percentage": 87.02, "elapsed_time": "8:24:23", "remaining_time": "1:15:14"} +{"current_steps": 792, "total_steps": 909, "loss": 0.4948643445968628, "lr": 2.018884420249474e-06, "epoch": 2.613861386138614, "percentage": 87.13, "elapsed_time": "8:25:02", "remaining_time": "1:14:36"} +{"current_steps": 793, "total_steps": 909, "loss": 0.4494874179363251, "lr": 1.9853863945719243e-06, "epoch": 2.6171617161716174, "percentage": 87.24, "elapsed_time": "8:25:51", "remaining_time": "1:13:59"} +{"current_steps": 382, "total_steps": 1302, "loss": 0.4134560823440552, "lr": 3.566764583061345e-05, "epoch": 0.881199538638985, "percentage": 29.34, "elapsed_time": "22:05:26", "remaining_time": "2 days, 5:12:08"} +{"current_steps": 794, "total_steps": 909, "loss": 0.42173343896865845, "lr": 1.9521540848023113e-06, "epoch": 2.6204620462046204, "percentage": 87.35, "elapsed_time": "8:26:23", "remaining_time": "1:13:20"} +{"current_steps": 795, "total_steps": 909, "loss": 0.4319555461406708, "lr": 1.9191879811178605e-06, "epoch": 2.623762376237624, "percentage": 87.46, "elapsed_time": "8:27:09", "remaining_time": "1:12:43"} +{"current_steps": 796, "total_steps": 909, "loss": 0.40467706322669983, "lr": 1.8864885697692582e-06, "epoch": 2.6270627062706273, "percentage": 87.57, "elapsed_time": "8:27:52", "remaining_time": "1:12:05"} +{"current_steps": 797, "total_steps": 909, "loss": 0.5141273736953735, "lr": 1.8540563330734662e-06, "epoch": 2.6303630363036303, "percentage": 87.68, "elapsed_time": "8:28:29", "remaining_time": "1:11:27"} +{"current_steps": 798, "total_steps": 909, "loss": 0.44990289211273193, "lr": 1.8218917494066212e-06, "epoch": 2.633663366336634, "percentage": 87.79, "elapsed_time": "8:29:11", "remaining_time": "1:10:49"} +{"current_steps": 383, "total_steps": 1302, "loss": 0.4183395802974701, "lr": 3.563423976731799e-05, "epoch": 0.8835063437139562, "percentage": 29.42, "elapsed_time": "22:09:03", "remaining_time": "2 days, 5:09:03"} +{"current_steps": 799, "total_steps": 909, "loss": 0.4878673553466797, "lr": 1.7899952931969756e-06, "epoch": 2.6369636963696372, "percentage": 87.9, "elapsed_time": "8:29:46", "remaining_time": "1:10:10"} +{"current_steps": 800, "total_steps": 909, "loss": 0.46406376361846924, "lr": 1.7583674349178803e-06, "epoch": 2.6402640264026402, "percentage": 88.01, "elapsed_time": "8:30:25", "remaining_time": "1:09:32"} +{"current_steps": 801, "total_steps": 909, "loss": 0.44470641016960144, "lr": 1.7270086410808762e-06, "epoch": 2.6435643564356437, "percentage": 88.12, "elapsed_time": "8:31:37", "remaining_time": "1:08:59"} +{"current_steps": 802, "total_steps": 909, "loss": 0.5306479930877686, "lr": 1.695919374228796e-06, "epoch": 2.6468646864686467, "percentage": 88.23, "elapsed_time": "8:32:17", "remaining_time": "1:08:20"} +{"current_steps": 803, "total_steps": 909, "loss": 0.4570600390434265, "lr": 1.6651000929289462e-06, "epoch": 2.65016501650165, "percentage": 88.34, "elapsed_time": "8:32:59", "remaining_time": "1:07:43"} +{"current_steps": 384, "total_steps": 1302, "loss": 0.39182937145233154, "lr": 3.560072117553989e-05, "epoch": 0.8858131487889274, "percentage": 29.49, "elapsed_time": "22:12:33", "remaining_time": "2 days, 5:05:38"} +{"current_steps": 804, "total_steps": 909, "loss": 0.48561781644821167, "lr": 1.6345512517663275e-06, "epoch": 2.6534653465346536, "percentage": 88.45, "elapsed_time": "8:33:31", "remaining_time": "1:07:03"} +{"current_steps": 805, "total_steps": 909, "loss": 0.4666748642921448, "lr": 1.6042733013369604e-06, "epoch": 2.6567656765676566, "percentage": 88.56, "elapsed_time": "8:34:02", "remaining_time": "1:06:24"} +{"current_steps": 806, "total_steps": 909, "loss": 0.4761434495449066, "lr": 1.5742666882412106e-06, "epoch": 2.66006600660066, "percentage": 88.67, "elapsed_time": "8:34:36", "remaining_time": "1:05:45"} +{"current_steps": 807, "total_steps": 909, "loss": 0.4475252628326416, "lr": 1.5445318550772204e-06, "epoch": 2.6633663366336635, "percentage": 88.78, "elapsed_time": "8:35:13", "remaining_time": "1:05:07"} +{"current_steps": 808, "total_steps": 909, "loss": 0.5299564599990845, "lr": 1.5150692404343637e-06, "epoch": 2.6666666666666665, "percentage": 88.89, "elapsed_time": "8:35:44", "remaining_time": "1:04:28"} +{"current_steps": 809, "total_steps": 909, "loss": 0.518581748008728, "lr": 1.4858792788867904e-06, "epoch": 2.66996699669967, "percentage": 89.0, "elapsed_time": "8:36:20", "remaining_time": "1:03:49"} +{"current_steps": 385, "total_steps": 1302, "loss": 0.41256874799728394, "lr": 3.5567090296531455e-05, "epoch": 0.8881199538638985, "percentage": 29.57, "elapsed_time": "22:16:02", "remaining_time": "2 days, 5:02:12"} +{"current_steps": 810, "total_steps": 909, "loss": 0.5162506103515625, "lr": 1.4569624009870165e-06, "epoch": 2.6732673267326734, "percentage": 89.11, "elapsed_time": "8:36:59", "remaining_time": "1:03:11"} +{"current_steps": 811, "total_steps": 909, "loss": 0.4762595593929291, "lr": 1.4283190332595665e-06, "epoch": 2.6765676567656764, "percentage": 89.22, "elapsed_time": "8:37:32", "remaining_time": "1:02:32"} +{"current_steps": 812, "total_steps": 909, "loss": 0.44347697496414185, "lr": 1.3999495981946764e-06, "epoch": 2.67986798679868, "percentage": 89.33, "elapsed_time": "8:38:04", "remaining_time": "1:01:53"} +{"current_steps": 813, "total_steps": 909, "loss": 0.4344146251678467, "lr": 1.3718545142420768e-06, "epoch": 2.6831683168316833, "percentage": 89.44, "elapsed_time": "8:38:48", "remaining_time": "1:01:15"} +{"current_steps": 814, "total_steps": 909, "loss": 0.4936307668685913, "lr": 1.344034195804813e-06, "epoch": 2.6864686468646863, "percentage": 89.55, "elapsed_time": "8:39:18", "remaining_time": "1:00:36"} +{"current_steps": 815, "total_steps": 909, "loss": 0.43635520339012146, "lr": 1.3164890532331386e-06, "epoch": 2.68976897689769, "percentage": 89.66, "elapsed_time": "8:39:55", "remaining_time": "0:59:58"} +{"current_steps": 386, "total_steps": 1302, "loss": 0.41440385580062866, "lr": 3.553334737235316e-05, "epoch": 0.8904267589388697, "percentage": 29.65, "elapsed_time": "22:19:40", "remaining_time": "2 days, 4:59:07"} +{"current_steps": 816, "total_steps": 909, "loss": 0.48006054759025574, "lr": 1.2892194928184499e-06, "epoch": 2.693069306930693, "percentage": 89.77, "elapsed_time": "8:40:29", "remaining_time": "0:59:19"} +{"current_steps": 817, "total_steps": 909, "loss": 0.4296647906303406, "lr": 1.2622259167873008e-06, "epoch": 2.6963696369636962, "percentage": 89.88, "elapsed_time": "8:41:12", "remaining_time": "0:58:41"} +{"current_steps": 818, "total_steps": 909, "loss": 0.47840994596481323, "lr": 1.2355087232954754e-06, "epoch": 2.6996699669966997, "percentage": 89.99, "elapsed_time": "8:41:52", "remaining_time": "0:58:03"} +{"current_steps": 819, "total_steps": 909, "loss": 0.41459953784942627, "lr": 1.209068306422112e-06, "epoch": 2.7029702970297027, "percentage": 90.1, "elapsed_time": "8:42:33", "remaining_time": "0:57:25"} +{"current_steps": 820, "total_steps": 909, "loss": 0.4278629422187805, "lr": 1.1829050561638766e-06, "epoch": 2.706270627062706, "percentage": 90.21, "elapsed_time": "8:43:26", "remaining_time": "0:56:48"} +{"current_steps": 387, "total_steps": 1302, "loss": 0.45127183198928833, "lr": 3.549949264587196e-05, "epoch": 0.8927335640138409, "percentage": 29.72, "elapsed_time": "22:22:56", "remaining_time": "2 days, 4:55:10"} +{"current_steps": 821, "total_steps": 909, "loss": 0.44538602232933044, "lr": 1.1570193584292323e-06, "epoch": 2.7095709570957096, "percentage": 90.32, "elapsed_time": "8:43:57", "remaining_time": "0:56:09"} +{"current_steps": 822, "total_steps": 909, "loss": 0.4757949709892273, "lr": 1.1314115950327365e-06, "epoch": 2.7128712871287126, "percentage": 90.43, "elapsed_time": "8:44:42", "remaining_time": "0:55:32"} +{"current_steps": 823, "total_steps": 909, "loss": 0.49131542444229126, "lr": 1.106082143689402e-06, "epoch": 2.716171617161716, "percentage": 90.54, "elapsed_time": "8:45:19", "remaining_time": "0:54:53"} +{"current_steps": 824, "total_steps": 909, "loss": 0.4917967915534973, "lr": 1.0810313780091408e-06, "epoch": 2.7194719471947195, "percentage": 90.65, "elapsed_time": "8:45:52", "remaining_time": "0:54:14"} +{"current_steps": 825, "total_steps": 909, "loss": 0.4949303865432739, "lr": 1.056259667491244e-06, "epoch": 2.7227722772277225, "percentage": 90.76, "elapsed_time": "8:46:26", "remaining_time": "0:53:36"} +{"current_steps": 826, "total_steps": 909, "loss": 0.4287925958633423, "lr": 1.0317673775189374e-06, "epoch": 2.726072607260726, "percentage": 90.87, "elapsed_time": "8:47:10", "remaining_time": "0:52:58"} +{"current_steps": 388, "total_steps": 1302, "loss": 0.39976680278778076, "lr": 3.546552636075952e-05, "epoch": 0.895040369088812, "percentage": 29.8, "elapsed_time": "22:26:34", "remaining_time": "2 days, 4:52:05"} +{"current_steps": 827, "total_steps": 909, "loss": 0.5059949159622192, "lr": 1.007554869353975e-06, "epoch": 2.7293729372937294, "percentage": 90.98, "elapsed_time": "8:47:43", "remaining_time": "0:52:19"} +{"current_steps": 828, "total_steps": 909, "loss": 0.47914958000183105, "lr": 9.83622500131336e-07, "epoch": 2.7326732673267324, "percentage": 91.09, "elapsed_time": "8:48:14", "remaining_time": "0:51:40"} +{"current_steps": 829, "total_steps": 909, "loss": 0.5237720608711243, "lr": 9.599706228539452e-07, "epoch": 2.735973597359736, "percentage": 91.2, "elapsed_time": "8:48:47", "remaining_time": "0:51:01"} +{"current_steps": 830, "total_steps": 909, "loss": 0.4628916382789612, "lr": 9.365995863874566e-07, "epoch": 2.7392739273927393, "percentage": 91.31, "elapsed_time": "8:49:23", "remaining_time": "0:50:23"} +{"current_steps": 831, "total_steps": 909, "loss": 0.49988898634910583, "lr": 9.135097354551203e-07, "epoch": 2.7425742574257423, "percentage": 91.42, "elapsed_time": "8:50:11", "remaining_time": "0:49:45"} +{"current_steps": 389, "total_steps": 1302, "loss": 0.41348305344581604, "lr": 3.543144876149041e-05, "epoch": 0.8973471741637832, "percentage": 29.88, "elapsed_time": "22:30:10", "remaining_time": "2 days, 4:48:56"} +{"current_steps": 832, "total_steps": 909, "loss": 0.4631851315498352, "lr": 8.907014106327039e-07, "epoch": 2.745874587458746, "percentage": 91.53, "elapsed_time": "8:51:13", "remaining_time": "0:49:09"} +{"current_steps": 833, "total_steps": 909, "loss": 0.47001713514328003, "lr": 8.681749483434387e-07, "epoch": 2.7491749174917492, "percentage": 91.64, "elapsed_time": "8:51:51", "remaining_time": "0:48:31"} +{"current_steps": 834, "total_steps": 909, "loss": 0.4437292218208313, "lr": 8.459306808530999e-07, "epoch": 2.7524752475247523, "percentage": 91.75, "elapsed_time": "8:52:30", "remaining_time": "0:47:53"} +{"current_steps": 835, "total_steps": 909, "loss": 0.5006406903266907, "lr": 8.239689362650694e-07, "epoch": 2.7557755775577557, "percentage": 91.86, "elapsed_time": "8:53:02", "remaining_time": "0:47:14"} +{"current_steps": 836, "total_steps": 909, "loss": 0.45732003450393677, "lr": 8.022900385155185e-07, "epoch": 2.759075907590759, "percentage": 91.97, "elapsed_time": "8:53:41", "remaining_time": "0:46:36"} +{"current_steps": 390, "total_steps": 1302, "loss": 0.3971322476863861, "lr": 3.539726009334045e-05, "epoch": 0.8996539792387543, "percentage": 29.95, "elapsed_time": "22:33:33", "remaining_time": "2 days, 4:45:14"} +{"current_steps": 837, "total_steps": 909, "loss": 0.5012909173965454, "lr": 7.808943073686159e-07, "epoch": 2.762376237623762, "percentage": 92.08, "elapsed_time": "8:54:27", "remaining_time": "0:45:58"} +{"current_steps": 838, "total_steps": 909, "loss": 0.5104090571403503, "lr": 7.597820584118221e-07, "epoch": 2.7656765676567656, "percentage": 92.19, "elapsed_time": "8:55:01", "remaining_time": "0:45:19"} +{"current_steps": 839, "total_steps": 909, "loss": 0.44415900111198425, "lr": 7.38953603051229e-07, "epoch": 2.768976897689769, "percentage": 92.3, "elapsed_time": "8:55:54", "remaining_time": "0:44:42"} +{"current_steps": 840, "total_steps": 909, "loss": 0.46958473324775696, "lr": 7.184092485069638e-07, "epoch": 2.772277227722772, "percentage": 92.41, "elapsed_time": "8:56:27", "remaining_time": "0:44:03"} +{"current_steps": 841, "total_steps": 909, "loss": 0.4305083155632019, "lr": 6.981492978086634e-07, "epoch": 2.7755775577557755, "percentage": 92.52, "elapsed_time": "8:57:17", "remaining_time": "0:43:26"} +{"current_steps": 391, "total_steps": 1302, "loss": 0.42948639392852783, "lr": 3.536296060238484e-05, "epoch": 0.9019607843137255, "percentage": 30.03, "elapsed_time": "22:37:17", "remaining_time": "2 days, 4:42:23"} +{"current_steps": 842, "total_steps": 909, "loss": 0.4812752604484558, "lr": 6.78174049791005e-07, "epoch": 2.778877887788779, "percentage": 92.63, "elapsed_time": "8:57:55", "remaining_time": "0:42:48"} +{"current_steps": 843, "total_steps": 909, "loss": 0.522142231464386, "lr": 6.584837990892889e-07, "epoch": 2.782178217821782, "percentage": 92.74, "elapsed_time": "8:58:26", "remaining_time": "0:42:09"} +{"current_steps": 844, "total_steps": 909, "loss": 0.4789726138114929, "lr": 6.390788361351053e-07, "epoch": 2.7854785478547854, "percentage": 92.85, "elapsed_time": "8:59:05", "remaining_time": "0:41:31"} +{"current_steps": 845, "total_steps": 909, "loss": 0.44507476687431335, "lr": 6.199594471520453e-07, "epoch": 2.788778877887789, "percentage": 92.96, "elapsed_time": "8:59:44", "remaining_time": "0:40:52"} +{"current_steps": 846, "total_steps": 909, "loss": 0.47613948583602905, "lr": 6.011259141514747e-07, "epoch": 2.792079207920792, "percentage": 93.07, "elapsed_time": "9:00:23", "remaining_time": "0:40:14"} +{"current_steps": 847, "total_steps": 909, "loss": 0.44828763604164124, "lr": 5.825785149283758e-07, "epoch": 2.7953795379537953, "percentage": 93.18, "elapsed_time": "9:01:07", "remaining_time": "0:39:36"} +{"current_steps": 392, "total_steps": 1302, "loss": 0.4134788513183594, "lr": 3.532855053549646e-05, "epoch": 0.9042675893886967, "percentage": 30.11, "elapsed_time": "22:40:55", "remaining_time": "2 days, 4:39:16"} +{"current_steps": 848, "total_steps": 909, "loss": 0.4695909321308136, "lr": 5.64317523057254e-07, "epoch": 2.798679867986799, "percentage": 93.29, "elapsed_time": "9:01:45", "remaining_time": "0:38:58"} +{"current_steps": 849, "total_steps": 909, "loss": 0.48341453075408936, "lr": 5.463432078881093e-07, "epoch": 2.801980198019802, "percentage": 93.4, "elapsed_time": "9:02:29", "remaining_time": "0:38:20"} +{"current_steps": 850, "total_steps": 909, "loss": 0.47008436918258667, "lr": 5.286558345424397e-07, "epoch": 2.8052805280528053, "percentage": 93.51, "elapsed_time": "9:03:03", "remaining_time": "0:37:41"} +{"current_steps": 851, "total_steps": 909, "loss": 0.5081039071083069, "lr": 5.112556639093536e-07, "epoch": 2.8085808580858087, "percentage": 93.62, "elapsed_time": "9:03:38", "remaining_time": "0:37:03"} +{"current_steps": 852, "total_steps": 909, "loss": 0.49790090322494507, "lr": 4.941429526417163e-07, "epoch": 2.8118811881188117, "percentage": 93.73, "elapsed_time": "9:04:22", "remaining_time": "0:36:25"} +{"current_steps": 853, "total_steps": 909, "loss": 0.476767897605896, "lr": 4.773179531523542e-07, "epoch": 2.815181518151815, "percentage": 93.84, "elapsed_time": "9:04:58", "remaining_time": "0:35:46"} +{"current_steps": 393, "total_steps": 1302, "loss": 0.41317641735076904, "lr": 3.529403014034406e-05, "epoch": 0.9065743944636678, "percentage": 30.18, "elapsed_time": "22:44:27", "remaining_time": "2 days, 4:35:57"} +{"current_steps": 854, "total_steps": 909, "loss": 0.5067446231842041, "lr": 4.6078091361034585e-07, "epoch": 2.8184818481848186, "percentage": 93.95, "elapsed_time": "9:05:31", "remaining_time": "0:35:08"} +{"current_steps": 855, "total_steps": 909, "loss": 0.45703452825546265, "lr": 4.4453207793735185e-07, "epoch": 2.8217821782178216, "percentage": 94.06, "elapsed_time": "9:06:07", "remaining_time": "0:34:29"} +{"current_steps": 856, "total_steps": 909, "loss": 0.4193270206451416, "lr": 4.285716858040223e-07, "epoch": 2.825082508250825, "percentage": 94.17, "elapsed_time": "9:06:47", "remaining_time": "0:33:51"} +{"current_steps": 857, "total_steps": 909, "loss": 0.4367069602012634, "lr": 4.128999726264549e-07, "epoch": 2.8283828382838285, "percentage": 94.28, "elapsed_time": "9:07:20", "remaining_time": "0:33:12"} +{"current_steps": 858, "total_steps": 909, "loss": 0.46601590514183044, "lr": 3.9751716956273113e-07, "epoch": 2.8316831683168315, "percentage": 94.39, "elapsed_time": "9:07:58", "remaining_time": "0:32:34"} +{"current_steps": 859, "total_steps": 909, "loss": 0.4801405072212219, "lr": 3.824235035095036e-07, "epoch": 2.834983498349835, "percentage": 94.5, "elapsed_time": "9:08:37", "remaining_time": "0:31:56"} +{"current_steps": 394, "total_steps": 1302, "loss": 0.44126808643341064, "lr": 3.5259399665390486e-05, "epoch": 0.908881199538639, "percentage": 30.26, "elapsed_time": "22:48:02", "remaining_time": "2 days, 4:32:44"} +{"current_steps": 860, "total_steps": 909, "loss": 0.4729960262775421, "lr": 3.676191970986409e-07, "epoch": 2.8382838283828384, "percentage": 94.61, "elapsed_time": "9:09:11", "remaining_time": "0:31:17"} +{"current_steps": 861, "total_steps": 909, "loss": 0.453819215297699, "lr": 3.531044686939611e-07, "epoch": 2.8415841584158414, "percentage": 94.72, "elapsed_time": "9:09:44", "remaining_time": "0:30:38"} +{"current_steps": 862, "total_steps": 909, "loss": 0.4655516743659973, "lr": 3.388795323879923e-07, "epoch": 2.844884488448845, "percentage": 94.83, "elapsed_time": "9:10:14", "remaining_time": "0:30:00"} +{"current_steps": 863, "total_steps": 909, "loss": 0.4915505647659302, "lr": 3.249445979988286e-07, "epoch": 2.8481848184818483, "percentage": 94.94, "elapsed_time": "9:10:44", "remaining_time": "0:29:21"} +{"current_steps": 864, "total_steps": 909, "loss": 0.46072205901145935, "lr": 3.112998710670279e-07, "epoch": 2.8514851485148514, "percentage": 95.05, "elapsed_time": "9:11:32", "remaining_time": "0:28:43"} +{"current_steps": 865, "total_steps": 909, "loss": 0.47496911883354187, "lr": 2.979455528525854e-07, "epoch": 2.854785478547855, "percentage": 95.16, "elapsed_time": "9:12:03", "remaining_time": "0:28:04"} +{"current_steps": 395, "total_steps": 1302, "loss": 0.38224154710769653, "lr": 3.522465935989088e-05, "epoch": 0.9111880046136102, "percentage": 30.34, "elapsed_time": "22:51:38", "remaining_time": "2 days, 4:29:35"} +{"current_steps": 866, "total_steps": 909, "loss": 0.4863288402557373, "lr": 2.8488184033195867e-07, "epoch": 2.8580858085808583, "percentage": 95.27, "elapsed_time": "9:12:37", "remaining_time": "0:27:26"} +{"current_steps": 867, "total_steps": 909, "loss": 0.4543803930282593, "lr": 2.721089261951626e-07, "epoch": 2.8613861386138613, "percentage": 95.38, "elapsed_time": "9:13:09", "remaining_time": "0:26:47"} +{"current_steps": 868, "total_steps": 909, "loss": 0.4589266777038574, "lr": 2.5962699884293894e-07, "epoch": 2.8646864686468647, "percentage": 95.49, "elapsed_time": "9:13:43", "remaining_time": "0:26:09"} +{"current_steps": 869, "total_steps": 909, "loss": 0.45603302121162415, "lr": 2.474362423839627e-07, "epoch": 2.867986798679868, "percentage": 95.6, "elapsed_time": "9:14:23", "remaining_time": "0:25:31"} +{"current_steps": 870, "total_steps": 909, "loss": 0.4547184109687805, "lr": 2.3553683663213088e-07, "epoch": 2.871287128712871, "percentage": 95.71, "elapsed_time": "9:14:55", "remaining_time": "0:24:52"} +{"current_steps": 871, "total_steps": 909, "loss": 0.4900602102279663, "lr": 2.2392895710391604e-07, "epoch": 2.8745874587458746, "percentage": 95.82, "elapsed_time": "9:15:32", "remaining_time": "0:24:14"} +{"current_steps": 396, "total_steps": 1302, "loss": 0.42479267716407776, "lr": 3.518980947389091e-05, "epoch": 0.9134948096885813, "percentage": 30.41, "elapsed_time": "22:55:13", "remaining_time": "2 days, 4:26:21"} +{"current_steps": 872, "total_steps": 909, "loss": 0.48706525564193726, "lr": 2.126127750157725e-07, "epoch": 2.877887788778878, "percentage": 95.93, "elapsed_time": "9:16:19", "remaining_time": "0:23:36"} +{"current_steps": 873, "total_steps": 909, "loss": 0.4726618230342865, "lr": 2.0158845728160958e-07, "epoch": 2.881188118811881, "percentage": 96.04, "elapsed_time": "9:17:05", "remaining_time": "0:22:58"} +{"current_steps": 874, "total_steps": 909, "loss": 0.45884019136428833, "lr": 1.9085616651033147e-07, "epoch": 2.8844884488448845, "percentage": 96.15, "elapsed_time": "9:17:58", "remaining_time": "0:22:20"} +{"current_steps": 875, "total_steps": 909, "loss": 0.4787840247154236, "lr": 1.804160610034411e-07, "epoch": 2.887788778877888, "percentage": 96.26, "elapsed_time": "9:18:28", "remaining_time": "0:21:42"} +{"current_steps": 397, "total_steps": 1302, "loss": 0.4111449122428894, "lr": 3.5154850258224954e-05, "epoch": 0.9158016147635525, "percentage": 30.49, "elapsed_time": "22:58:31", "remaining_time": "2 days, 4:22:28"} +{"current_steps": 876, "total_steps": 909, "loss": 0.4758448004722595, "lr": 1.702682947527001e-07, "epoch": 2.891089108910891, "percentage": 96.37, "elapsed_time": "9:19:08", "remaining_time": "0:21:03"} +{"current_steps": 877, "total_steps": 909, "loss": 0.47089093923568726, "lr": 1.6041301743786596e-07, "epoch": 2.8943894389438944, "percentage": 96.48, "elapsed_time": "9:19:36", "remaining_time": "0:20:25"} +{"current_steps": 878, "total_steps": 909, "loss": 0.46921056509017944, "lr": 1.5085037442446937e-07, "epoch": 2.897689768976898, "percentage": 96.59, "elapsed_time": "9:20:36", "remaining_time": "0:19:47"} +{"current_steps": 879, "total_steps": 909, "loss": 0.5218731164932251, "lr": 1.415805067616871e-07, "epoch": 2.900990099009901, "percentage": 96.7, "elapsed_time": "9:21:09", "remaining_time": "0:19:09"} +{"current_steps": 880, "total_steps": 909, "loss": 0.43099671602249146, "lr": 1.3260355118025036e-07, "epoch": 2.9042904290429044, "percentage": 96.81, "elapsed_time": "9:21:44", "remaining_time": "0:18:30"} +{"current_steps": 881, "total_steps": 909, "loss": 0.48290592432022095, "lr": 1.2391964009043078e-07, "epoch": 2.907590759075908, "percentage": 96.92, "elapsed_time": "9:22:23", "remaining_time": "0:17:52"} +{"current_steps": 398, "total_steps": 1302, "loss": 0.41729629039764404, "lr": 3.5119781964514306e-05, "epoch": 0.9181084198385236, "percentage": 30.57, "elapsed_time": "23:02:08", "remaining_time": "2 days, 4:19:20"} +{"current_steps": 882, "total_steps": 909, "loss": 0.4634360074996948, "lr": 1.1552890158009311e-07, "epoch": 2.910891089108911, "percentage": 97.03, "elapsed_time": "9:23:10", "remaining_time": "0:17:14"} +{"current_steps": 883, "total_steps": 909, "loss": 0.5041622519493103, "lr": 1.0743145941279453e-07, "epoch": 2.9141914191419143, "percentage": 97.14, "elapsed_time": "9:23:41", "remaining_time": "0:16:35"} +{"current_steps": 884, "total_steps": 909, "loss": 0.480410099029541, "lr": 9.962743302596612e-08, "epoch": 2.9174917491749177, "percentage": 97.25, "elapsed_time": "9:24:14", "remaining_time": "0:15:57"} +{"current_steps": 885, "total_steps": 909, "loss": 0.49919891357421875, "lr": 9.211693752915419e-08, "epoch": 2.9207920792079207, "percentage": 97.36, "elapsed_time": "9:24:46", "remaining_time": "0:15:18"} +{"current_steps": 886, "total_steps": 909, "loss": 0.508806586265564, "lr": 8.490008370231506e-08, "epoch": 2.924092409240924, "percentage": 97.47, "elapsed_time": "9:25:22", "remaining_time": "0:14:40"} +{"current_steps": 399, "total_steps": 1302, "loss": 0.4183010458946228, "lr": 3.508460484516533e-05, "epoch": 0.9204152249134948, "percentage": 30.65, "elapsed_time": "23:05:39", "remaining_time": "2 days, 4:15:57"} +{"current_steps": 887, "total_steps": 909, "loss": 0.4233350157737732, "lr": 7.797697799418525e-08, "epoch": 2.9273927392739276, "percentage": 97.58, "elapsed_time": "9:26:16", "remaining_time": "0:14:02"} +{"current_steps": 888, "total_steps": 909, "loss": 0.4577901363372803, "lr": 7.134772252071154e-08, "epoch": 2.9306930693069306, "percentage": 97.69, "elapsed_time": "9:26:45", "remaining_time": "0:13:24"} +{"current_steps": 889, "total_steps": 909, "loss": 0.4028077721595764, "lr": 6.501241506354561e-08, "epoch": 2.933993399339934, "percentage": 97.8, "elapsed_time": "9:27:32", "remaining_time": "0:12:46"} +{"current_steps": 890, "total_steps": 909, "loss": 0.48321446776390076, "lr": 5.897114906859402e-08, "epoch": 2.9372937293729375, "percentage": 97.91, "elapsed_time": "9:28:13", "remaining_time": "0:12:07"} +{"current_steps": 891, "total_steps": 909, "loss": 0.48732608556747437, "lr": 5.322401364465491e-08, "epoch": 2.9405940594059405, "percentage": 98.02, "elapsed_time": "9:28:48", "remaining_time": "0:11:29"} +{"current_steps": 892, "total_steps": 909, "loss": 0.46879494190216064, "lr": 4.777109356208565e-08, "epoch": 2.943894389438944, "percentage": 98.13, "elapsed_time": "9:29:24", "remaining_time": "0:10:51"} +{"current_steps": 400, "total_steps": 1302, "loss": 0.3952270746231079, "lr": 3.5049319153367704e-05, "epoch": 0.922722029988466, "percentage": 30.72, "elapsed_time": "23:09:11", "remaining_time": "2 days, 4:12:36"} +{"current_steps": 893, "total_steps": 909, "loss": 0.4858628511428833, "lr": 4.261246925156837e-08, "epoch": 2.9471947194719474, "percentage": 98.24, "elapsed_time": "9:30:17", "remaining_time": "0:10:13"} +{"current_steps": 894, "total_steps": 909, "loss": 0.48119616508483887, "lr": 3.7748216802913077e-08, "epoch": 2.9504950495049505, "percentage": 98.35, "elapsed_time": "9:31:02", "remaining_time": "0:09:34"} +{"current_steps": 895, "total_steps": 909, "loss": 0.4994167983531952, "lr": 3.3178407963938564e-08, "epoch": 2.953795379537954, "percentage": 98.46, "elapsed_time": "9:31:43", "remaining_time": "0:08:56"} +{"current_steps": 896, "total_steps": 909, "loss": 0.46394845843315125, "lr": 2.8903110139417712e-08, "epoch": 2.9570957095709574, "percentage": 98.57, "elapsed_time": "9:32:33", "remaining_time": "0:08:18"} +{"current_steps": 897, "total_steps": 909, "loss": 0.42504560947418213, "lr": 2.4922386390076047e-08, "epoch": 2.9603960396039604, "percentage": 98.68, "elapsed_time": "9:33:07", "remaining_time": "0:07:40"} +{"current_steps": 401, "total_steps": 1302, "loss": 0.422443687915802, "lr": 3.501392514309253e-05, "epoch": 0.9250288350634371, "percentage": 30.8, "elapsed_time": "23:12:39", "remaining_time": "2 days, 4:09:08"} +{"current_steps": 898, "total_steps": 909, "loss": 0.4186960756778717, "lr": 2.1236295431670275e-08, "epoch": 2.963696369636964, "percentage": 98.79, "elapsed_time": "9:33:49", "remaining_time": "0:07:01"} +{"current_steps": 899, "total_steps": 909, "loss": 0.4529160261154175, "lr": 1.7844891634113402e-08, "epoch": 2.9669966996699673, "percentage": 98.9, "elapsed_time": "9:34:36", "remaining_time": "0:06:23"} +{"current_steps": 900, "total_steps": 909, "loss": 0.44012153148651123, "lr": 1.4748225020679851e-08, "epoch": 2.9702970297029703, "percentage": 99.01, "elapsed_time": "9:35:21", "remaining_time": "0:05:45"} +{"current_steps": 901, "total_steps": 909, "loss": 0.4775368571281433, "lr": 1.1946341267263794e-08, "epoch": 2.9735973597359737, "percentage": 99.12, "elapsed_time": "9:35:58", "remaining_time": "0:05:06"} +{"current_steps": 402, "total_steps": 1302, "loss": 0.41572409868240356, "lr": 3.4978423069090567e-05, "epoch": 0.9273356401384083, "percentage": 30.88, "elapsed_time": "23:15:53", "remaining_time": "2 days, 4:05:07"} +{"current_steps": 902, "total_steps": 909, "loss": 0.4465276002883911, "lr": 9.439281701704162e-09, "epoch": 2.976897689768977, "percentage": 99.23, "elapsed_time": "9:36:32", "remaining_time": "0:04:28"} +{"current_steps": 903, "total_steps": 909, "loss": 0.4674132168292999, "lr": 7.227083303180671e-09, "epoch": 2.98019801980198, "percentage": 99.34, "elapsed_time": "9:37:07", "remaining_time": "0:03:50"} +{"current_steps": 904, "total_steps": 909, "loss": 0.4751841127872467, "lr": 5.30977870166316e-09, "epoch": 2.9834983498349836, "percentage": 99.45, "elapsed_time": "9:37:49", "remaining_time": "0:03:11"} +{"current_steps": 905, "total_steps": 909, "loss": 0.4587743580341339, "lr": 3.687396177434188e-09, "epoch": 2.9867986798679866, "percentage": 99.56, "elapsed_time": "9:38:24", "remaining_time": "0:02:33"} +{"current_steps": 906, "total_steps": 909, "loss": 0.4815826416015625, "lr": 2.359959660667155e-09, "epoch": 2.99009900990099, "percentage": 99.67, "elapsed_time": "9:39:04", "remaining_time": "0:01:55"} +{"current_steps": 907, "total_steps": 909, "loss": 0.45863479375839233, "lr": 1.3274887310732454e-09, "epoch": 2.9933993399339935, "percentage": 99.78, "elapsed_time": "9:39:48", "remaining_time": "0:01:16"} +{"current_steps": 403, "total_steps": 1302, "loss": 0.41415733098983765, "lr": 3.4942813186890355e-05, "epoch": 0.9296424452133795, "percentage": 30.95, "elapsed_time": "23:19:20", "remaining_time": "2 days, 4:01:36"} +{"current_steps": 908, "total_steps": 909, "loss": 0.4888804256916046, "lr": 5.899986176260974e-10, "epoch": 2.9966996699669965, "percentage": 99.89, "elapsed_time": "9:40:39", "remaining_time": "0:00:38"} +{"current_steps": 909, "total_steps": 909, "loss": 0.46804267168045044, "lr": 1.475001983131108e-10, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "9:41:26", "remaining_time": "0:00:00"} +{"current_steps": 909, "total_steps": 909, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "9:41:50", "remaining_time": "0:00:00"} +{"current_steps": 404, "total_steps": 1302, "loss": 0.43313664197921753, "lr": 3.4907095752796395e-05, "epoch": 0.9319492502883506, "percentage": 31.03, "elapsed_time": "23:22:42", "remaining_time": "2 days, 3:57:53"} +{"current_steps": 405, "total_steps": 1302, "loss": 0.4016340970993042, "lr": 3.487127102388729e-05, "epoch": 0.9342560553633218, "percentage": 31.11, "elapsed_time": "23:26:18", "remaining_time": "2 days, 3:54:43"} +{"current_steps": 406, "total_steps": 1302, "loss": 0.4060002863407135, "lr": 3.483533925801393e-05, "epoch": 0.936562860438293, "percentage": 31.18, "elapsed_time": "23:29:51", "remaining_time": "2 days, 3:51:24"} +{"current_steps": 407, "total_steps": 1302, "loss": 0.4150099754333496, "lr": 3.4799300713797576e-05, "epoch": 0.9388696655132641, "percentage": 31.26, "elapsed_time": "23:32:55", "remaining_time": "2 days, 3:47:03"} +{"current_steps": 408, "total_steps": 1302, "loss": 0.419131338596344, "lr": 3.476315565062808e-05, "epoch": 0.9411764705882353, "percentage": 31.34, "elapsed_time": "23:35:59", "remaining_time": "2 days, 3:42:42"} +{"current_steps": 409, "total_steps": 1302, "loss": 0.4076481759548187, "lr": 3.4726904328661915e-05, "epoch": 0.9434832756632064, "percentage": 31.41, "elapsed_time": "23:39:32", "remaining_time": "2 days, 3:39:22"} +{"current_steps": 410, "total_steps": 1302, "loss": 0.4006500244140625, "lr": 3.4690547008820396e-05, "epoch": 0.9457900807381776, "percentage": 31.49, "elapsed_time": "23:42:59", "remaining_time": "2 days, 3:35:53"} +{"current_steps": 411, "total_steps": 1302, "loss": 0.4099448323249817, "lr": 3.465408395278776e-05, "epoch": 0.9480968858131488, "percentage": 31.57, "elapsed_time": "23:46:37", "remaining_time": "2 days, 3:32:44"} +{"current_steps": 412, "total_steps": 1302, "loss": 0.4313267469406128, "lr": 3.4617515423009277e-05, "epoch": 0.9504036908881199, "percentage": 31.64, "elapsed_time": "23:50:02", "remaining_time": "2 days, 3:29:10"} +{"current_steps": 413, "total_steps": 1302, "loss": 0.3978089690208435, "lr": 3.4580841682689384e-05, "epoch": 0.9527104959630911, "percentage": 31.72, "elapsed_time": "23:53:35", "remaining_time": "2 days, 3:25:52"} +{"current_steps": 414, "total_steps": 1302, "loss": 0.38555943965911865, "lr": 3.4544062995789764e-05, "epoch": 0.9550173010380623, "percentage": 31.8, "elapsed_time": "23:56:54", "remaining_time": "2 days, 3:22:03"} +{"current_steps": 415, "total_steps": 1302, "loss": 0.4067246615886688, "lr": 3.4507179627027466e-05, "epoch": 0.9573241061130334, "percentage": 31.87, "elapsed_time": "1 day, 0:00:21", "remaining_time": "2 days, 3:18:32"} +{"current_steps": 416, "total_steps": 1302, "loss": 0.4124700129032135, "lr": 3.4470191841873e-05, "epoch": 0.9596309111880046, "percentage": 31.95, "elapsed_time": "1 day, 0:03:57", "remaining_time": "2 days, 3:15:20"} +{"current_steps": 417, "total_steps": 1302, "loss": 0.42649126052856445, "lr": 3.443309990654842e-05, "epoch": 0.9619377162629758, "percentage": 32.03, "elapsed_time": "1 day, 0:07:32", "remaining_time": "2 days, 3:12:07"} +{"current_steps": 418, "total_steps": 1302, "loss": 0.4056153893470764, "lr": 3.439590408802539e-05, "epoch": 0.9642445213379469, "percentage": 32.1, "elapsed_time": "1 day, 0:11:06", "remaining_time": "2 days, 3:08:50"} +{"current_steps": 419, "total_steps": 1302, "loss": 0.39001065492630005, "lr": 3.4358604654023294e-05, "epoch": 0.9665513264129181, "percentage": 32.18, "elapsed_time": "1 day, 0:14:40", "remaining_time": "2 days, 3:05:35"} +{"current_steps": 420, "total_steps": 1302, "loss": 0.42024141550064087, "lr": 3.43212018730073e-05, "epoch": 0.9688581314878892, "percentage": 32.26, "elapsed_time": "1 day, 0:18:17", "remaining_time": "2 days, 3:02:24"} +{"current_steps": 421, "total_steps": 1302, "loss": 0.405081570148468, "lr": 3.428369601418642e-05, "epoch": 0.9711649365628604, "percentage": 32.33, "elapsed_time": "1 day, 0:21:55", "remaining_time": "2 days, 2:59:16"} +{"current_steps": 422, "total_steps": 1302, "loss": 0.38926345109939575, "lr": 3.4246087347511565e-05, "epoch": 0.9734717416378316, "percentage": 32.41, "elapsed_time": "1 day, 0:25:26", "remaining_time": "2 days, 2:55:53"} +{"current_steps": 423, "total_steps": 1302, "loss": 0.40160441398620605, "lr": 3.420837614367362e-05, "epoch": 0.9757785467128027, "percentage": 32.49, "elapsed_time": "1 day, 0:29:06", "remaining_time": "2 days, 2:52:49"} +{"current_steps": 424, "total_steps": 1302, "loss": 0.41054728627204895, "lr": 3.417056267410149e-05, "epoch": 0.9780853517877739, "percentage": 32.57, "elapsed_time": "1 day, 0:32:40", "remaining_time": "2 days, 2:49:32"} +{"current_steps": 425, "total_steps": 1302, "loss": 0.416596382856369, "lr": 3.413264721096014e-05, "epoch": 0.9803921568627451, "percentage": 32.64, "elapsed_time": "1 day, 0:36:12", "remaining_time": "2 days, 2:46:11"} +{"current_steps": 426, "total_steps": 1302, "loss": 0.4033527076244354, "lr": 3.409463002714865e-05, "epoch": 0.9826989619377162, "percentage": 32.72, "elapsed_time": "1 day, 0:39:49", "remaining_time": "2 days, 2:43:00"} +{"current_steps": 427, "total_steps": 1302, "loss": 0.43167567253112793, "lr": 3.405651139629823e-05, "epoch": 0.9850057670126874, "percentage": 32.8, "elapsed_time": "1 day, 0:43:20", "remaining_time": "2 days, 2:39:38"} +{"current_steps": 428, "total_steps": 1302, "loss": 0.42502743005752563, "lr": 3.401829159277026e-05, "epoch": 0.9873125720876585, "percentage": 32.87, "elapsed_time": "1 day, 0:46:37", "remaining_time": "2 days, 2:35:46"} +{"current_steps": 429, "total_steps": 1302, "loss": 0.417066752910614, "lr": 3.3979970891654326e-05, "epoch": 0.9896193771626297, "percentage": 32.95, "elapsed_time": "1 day, 0:50:18", "remaining_time": "2 days, 2:32:42"} +{"current_steps": 430, "total_steps": 1302, "loss": 0.40265771746635437, "lr": 3.394154956876624e-05, "epoch": 0.9919261822376009, "percentage": 33.03, "elapsed_time": "1 day, 0:53:42", "remaining_time": "2 days, 2:29:06"} +{"current_steps": 431, "total_steps": 1302, "loss": 0.42258793115615845, "lr": 3.390302790064602e-05, "epoch": 0.994232987312572, "percentage": 33.1, "elapsed_time": "1 day, 0:57:15", "remaining_time": "2 days, 2:25:45"} +{"current_steps": 432, "total_steps": 1302, "loss": 0.415671706199646, "lr": 3.386440616455594e-05, "epoch": 0.9965397923875432, "percentage": 33.18, "elapsed_time": "1 day, 1:00:14", "remaining_time": "2 days, 2:21:20"} +{"current_steps": 433, "total_steps": 1302, "loss": 0.395920068025589, "lr": 3.382568463847852e-05, "epoch": 0.9988465974625144, "percentage": 33.26, "elapsed_time": "1 day, 1:03:48", "remaining_time": "2 days, 2:18:02"} +{"current_steps": 434, "total_steps": 1302, "loss": 0.42002686858177185, "lr": 3.378686360111453e-05, "epoch": 1.0, "percentage": 33.33, "elapsed_time": "1 day, 1:05:38", "remaining_time": "2 days, 2:11:17"} +{"current_steps": 435, "total_steps": 1302, "loss": 0.37021559476852417, "lr": 3.374794333188097e-05, "epoch": 1.0023068050749713, "percentage": 33.41, "elapsed_time": "1 day, 1:09:02", "remaining_time": "2 days, 2:07:40"} +{"current_steps": 436, "total_steps": 1302, "loss": 0.37579652667045593, "lr": 3.3708924110909075e-05, "epoch": 1.0046136101499423, "percentage": 33.49, "elapsed_time": "1 day, 1:12:20", "remaining_time": "2 days, 2:03:53"} +{"current_steps": 437, "total_steps": 1302, "loss": 0.3478539288043976, "lr": 3.3669806219042284e-05, "epoch": 1.0069204152249136, "percentage": 33.56, "elapsed_time": "1 day, 1:15:51", "remaining_time": "2 days, 2:00:29"} +{"current_steps": 438, "total_steps": 1302, "loss": 0.39271557331085205, "lr": 3.363058993783424e-05, "epoch": 1.0092272202998847, "percentage": 33.64, "elapsed_time": "1 day, 1:19:18", "remaining_time": "2 days, 1:56:59"} +{"current_steps": 439, "total_steps": 1302, "loss": 0.37586796283721924, "lr": 3.3591275549546726e-05, "epoch": 1.011534025374856, "percentage": 33.72, "elapsed_time": "1 day, 1:22:41", "remaining_time": "2 days, 1:53:21"} +{"current_steps": 440, "total_steps": 1302, "loss": 0.3719305396080017, "lr": 3.355186333714768e-05, "epoch": 1.013840830449827, "percentage": 33.79, "elapsed_time": "1 day, 1:26:14", "remaining_time": "2 days, 1:50:03"} +{"current_steps": 441, "total_steps": 1302, "loss": 0.3985127806663513, "lr": 3.3512353584309124e-05, "epoch": 1.0161476355247983, "percentage": 33.87, "elapsed_time": "1 day, 1:29:27", "remaining_time": "2 days, 1:46:04"} +{"current_steps": 442, "total_steps": 1302, "loss": 0.38374030590057373, "lr": 3.347274657540514e-05, "epoch": 1.0184544405997693, "percentage": 33.95, "elapsed_time": "1 day, 1:33:05", "remaining_time": "2 days, 1:42:56"} +{"current_steps": 443, "total_steps": 1302, "loss": 0.3638855218887329, "lr": 3.343304259550982e-05, "epoch": 1.0207612456747406, "percentage": 34.02, "elapsed_time": "1 day, 1:36:37", "remaining_time": "2 days, 1:39:35"} +{"current_steps": 444, "total_steps": 1302, "loss": 0.3874225616455078, "lr": 3.339324193039518e-05, "epoch": 1.0230680507497116, "percentage": 34.1, "elapsed_time": "1 day, 1:40:13", "remaining_time": "2 days, 1:36:22"} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..57d472c --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,6406 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 909, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0033003300330033004, + "grad_norm": 10.81499361768409, + "learning_rate": 0.0, + "loss": 1.2079360485076904, + "step": 1 + }, + { + "epoch": 0.006600660066006601, + "grad_norm": 10.226770877445293, + "learning_rate": 4.395604395604396e-07, + "loss": 1.123347520828247, + "step": 2 + }, + { + "epoch": 0.009900990099009901, + "grad_norm": 11.292644267807786, + "learning_rate": 8.791208791208792e-07, + "loss": 1.261695384979248, + "step": 3 + }, + { + "epoch": 0.013201320132013201, + "grad_norm": 10.504638106263508, + "learning_rate": 1.3186813186813187e-06, + "loss": 1.1276888847351074, + "step": 4 + }, + { + "epoch": 0.0165016501650165, + "grad_norm": 10.822100601159539, + "learning_rate": 1.7582417582417585e-06, + "loss": 1.2254480123519897, + "step": 5 + }, + { + "epoch": 0.019801980198019802, + "grad_norm": 9.905516433474448, + "learning_rate": 2.197802197802198e-06, + "loss": 1.1809396743774414, + "step": 6 + }, + { + "epoch": 0.0231023102310231, + "grad_norm": 9.323364829402967, + "learning_rate": 2.6373626373626375e-06, + "loss": 1.2000095844268799, + "step": 7 + }, + { + "epoch": 0.026402640264026403, + "grad_norm": 6.706098746162178, + "learning_rate": 3.0769230769230774e-06, + "loss": 1.0248074531555176, + "step": 8 + }, + { + "epoch": 0.0297029702970297, + "grad_norm": 5.761138380327878, + "learning_rate": 3.516483516483517e-06, + "loss": 1.0840561389923096, + "step": 9 + }, + { + "epoch": 0.033003300330033, + "grad_norm": 2.7364343552329315, + "learning_rate": 3.9560439560439565e-06, + "loss": 0.955639123916626, + "step": 10 + }, + { + "epoch": 0.036303630363036306, + "grad_norm": 2.113810438625661, + "learning_rate": 4.395604395604396e-06, + "loss": 0.9281604290008545, + "step": 11 + }, + { + "epoch": 0.039603960396039604, + "grad_norm": 1.849238684536393, + "learning_rate": 4.8351648351648355e-06, + "loss": 0.9079018831253052, + "step": 12 + }, + { + "epoch": 0.0429042904290429, + "grad_norm": 1.6747171029255208, + "learning_rate": 5.274725274725275e-06, + "loss": 0.9039217233657837, + "step": 13 + }, + { + "epoch": 0.0462046204620462, + "grad_norm": 2.0121666555693416, + "learning_rate": 5.7142857142857145e-06, + "loss": 0.8910936117172241, + "step": 14 + }, + { + "epoch": 0.04950495049504951, + "grad_norm": 2.0600124028897526, + "learning_rate": 6.153846153846155e-06, + "loss": 0.895532488822937, + "step": 15 + }, + { + "epoch": 0.052805280528052806, + "grad_norm": 2.0613449368510044, + "learning_rate": 6.5934065934065935e-06, + "loss": 0.8889240622520447, + "step": 16 + }, + { + "epoch": 0.056105610561056105, + "grad_norm": 1.785450637059245, + "learning_rate": 7.032967032967034e-06, + "loss": 0.8499570488929749, + "step": 17 + }, + { + "epoch": 0.0594059405940594, + "grad_norm": 1.5894161631201256, + "learning_rate": 7.472527472527473e-06, + "loss": 0.839992105960846, + "step": 18 + }, + { + "epoch": 0.0627062706270627, + "grad_norm": 1.1904834264503976, + "learning_rate": 7.912087912087913e-06, + "loss": 0.7718420028686523, + "step": 19 + }, + { + "epoch": 0.066006600660066, + "grad_norm": 1.0397335564670163, + "learning_rate": 8.351648351648353e-06, + "loss": 0.7865867614746094, + "step": 20 + }, + { + "epoch": 0.06930693069306931, + "grad_norm": 0.8314739102256958, + "learning_rate": 8.791208791208792e-06, + "loss": 0.7982739806175232, + "step": 21 + }, + { + "epoch": 0.07260726072607261, + "grad_norm": 0.6542597896181986, + "learning_rate": 9.230769230769232e-06, + "loss": 0.7846421599388123, + "step": 22 + }, + { + "epoch": 0.07590759075907591, + "grad_norm": 0.6269389928815381, + "learning_rate": 9.670329670329671e-06, + "loss": 0.7005743980407715, + "step": 23 + }, + { + "epoch": 0.07920792079207921, + "grad_norm": 0.6603922634859757, + "learning_rate": 1.010989010989011e-05, + "loss": 0.7084314227104187, + "step": 24 + }, + { + "epoch": 0.08250825082508251, + "grad_norm": 0.6856248928818359, + "learning_rate": 1.054945054945055e-05, + "loss": 0.7310304641723633, + "step": 25 + }, + { + "epoch": 0.0858085808580858, + "grad_norm": 0.5728331825854258, + "learning_rate": 1.098901098901099e-05, + "loss": 0.7056888341903687, + "step": 26 + }, + { + "epoch": 0.0891089108910891, + "grad_norm": 0.47956485465857923, + "learning_rate": 1.1428571428571429e-05, + "loss": 0.6987950205802917, + "step": 27 + }, + { + "epoch": 0.0924092409240924, + "grad_norm": 0.47407141179043555, + "learning_rate": 1.186813186813187e-05, + "loss": 0.7319807410240173, + "step": 28 + }, + { + "epoch": 0.09570957095709572, + "grad_norm": 0.4856924244101555, + "learning_rate": 1.230769230769231e-05, + "loss": 0.6983063220977783, + "step": 29 + }, + { + "epoch": 0.09900990099009901, + "grad_norm": 0.49122925908544063, + "learning_rate": 1.2747252747252747e-05, + "loss": 0.70492023229599, + "step": 30 + }, + { + "epoch": 0.10231023102310231, + "grad_norm": 0.4556788168903923, + "learning_rate": 1.3186813186813187e-05, + "loss": 0.7376629114151001, + "step": 31 + }, + { + "epoch": 0.10561056105610561, + "grad_norm": 0.4272838300827657, + "learning_rate": 1.3626373626373627e-05, + "loss": 0.6623936295509338, + "step": 32 + }, + { + "epoch": 0.10891089108910891, + "grad_norm": 0.40886227927218277, + "learning_rate": 1.4065934065934068e-05, + "loss": 0.7136330604553223, + "step": 33 + }, + { + "epoch": 0.11221122112211221, + "grad_norm": 0.37821179606418975, + "learning_rate": 1.4505494505494506e-05, + "loss": 0.7113747596740723, + "step": 34 + }, + { + "epoch": 0.11551155115511551, + "grad_norm": 0.4538557716923258, + "learning_rate": 1.4945054945054947e-05, + "loss": 0.8252867460250854, + "step": 35 + }, + { + "epoch": 0.1188118811881188, + "grad_norm": 0.3875808052898815, + "learning_rate": 1.5384615384615387e-05, + "loss": 0.7406599521636963, + "step": 36 + }, + { + "epoch": 0.12211221122112212, + "grad_norm": 0.3503240143986989, + "learning_rate": 1.5824175824175826e-05, + "loss": 0.6572297811508179, + "step": 37 + }, + { + "epoch": 0.1254125412541254, + "grad_norm": 0.3779655372487014, + "learning_rate": 1.6263736263736265e-05, + "loss": 0.7520949840545654, + "step": 38 + }, + { + "epoch": 0.12871287128712872, + "grad_norm": 0.36968690038350466, + "learning_rate": 1.6703296703296707e-05, + "loss": 0.6861323118209839, + "step": 39 + }, + { + "epoch": 0.132013201320132, + "grad_norm": 0.3724328241107235, + "learning_rate": 1.7142857142857142e-05, + "loss": 0.6818518042564392, + "step": 40 + }, + { + "epoch": 0.1353135313531353, + "grad_norm": 0.35542054984937593, + "learning_rate": 1.7582417582417584e-05, + "loss": 0.6663186550140381, + "step": 41 + }, + { + "epoch": 0.13861386138613863, + "grad_norm": 0.3441266617586836, + "learning_rate": 1.8021978021978023e-05, + "loss": 0.6492191553115845, + "step": 42 + }, + { + "epoch": 0.1419141914191419, + "grad_norm": 0.3478448092762331, + "learning_rate": 1.8461538461538465e-05, + "loss": 0.6444741487503052, + "step": 43 + }, + { + "epoch": 0.14521452145214522, + "grad_norm": 0.34951148057960574, + "learning_rate": 1.8901098901098903e-05, + "loss": 0.6476814150810242, + "step": 44 + }, + { + "epoch": 0.1485148514851485, + "grad_norm": 0.3356672452160599, + "learning_rate": 1.9340659340659342e-05, + "loss": 0.6660827994346619, + "step": 45 + }, + { + "epoch": 0.15181518151815182, + "grad_norm": 0.30809956365723695, + "learning_rate": 1.9780219780219784e-05, + "loss": 0.6924091577529907, + "step": 46 + }, + { + "epoch": 0.1551155115511551, + "grad_norm": 0.9030699054312887, + "learning_rate": 2.021978021978022e-05, + "loss": 0.6899605989456177, + "step": 47 + }, + { + "epoch": 0.15841584158415842, + "grad_norm": 0.35784060194946976, + "learning_rate": 2.0659340659340665e-05, + "loss": 0.7242028713226318, + "step": 48 + }, + { + "epoch": 0.1617161716171617, + "grad_norm": 0.3093966721093651, + "learning_rate": 2.10989010989011e-05, + "loss": 0.6203902959823608, + "step": 49 + }, + { + "epoch": 0.16501650165016502, + "grad_norm": 0.4242705872636108, + "learning_rate": 2.153846153846154e-05, + "loss": 0.6420010328292847, + "step": 50 + }, + { + "epoch": 0.16831683168316833, + "grad_norm": 0.35079960590346965, + "learning_rate": 2.197802197802198e-05, + "loss": 0.7517598867416382, + "step": 51 + }, + { + "epoch": 0.1716171617161716, + "grad_norm": 0.3078803790362521, + "learning_rate": 2.241758241758242e-05, + "loss": 0.6568161249160767, + "step": 52 + }, + { + "epoch": 0.17491749174917492, + "grad_norm": 0.34666662805484005, + "learning_rate": 2.2857142857142858e-05, + "loss": 0.7348504662513733, + "step": 53 + }, + { + "epoch": 0.1782178217821782, + "grad_norm": 0.302791415801781, + "learning_rate": 2.32967032967033e-05, + "loss": 0.6164949536323547, + "step": 54 + }, + { + "epoch": 0.18151815181518152, + "grad_norm": 0.33732756727763136, + "learning_rate": 2.373626373626374e-05, + "loss": 0.6505363583564758, + "step": 55 + }, + { + "epoch": 0.1848184818481848, + "grad_norm": 0.34780152362496847, + "learning_rate": 2.4175824175824177e-05, + "loss": 0.7562520503997803, + "step": 56 + }, + { + "epoch": 0.18811881188118812, + "grad_norm": 0.3310895358869482, + "learning_rate": 2.461538461538462e-05, + "loss": 0.6943148374557495, + "step": 57 + }, + { + "epoch": 0.19141914191419143, + "grad_norm": 0.3367877938063833, + "learning_rate": 2.5054945054945058e-05, + "loss": 0.6571655869483948, + "step": 58 + }, + { + "epoch": 0.19471947194719472, + "grad_norm": 0.32103256018771714, + "learning_rate": 2.5494505494505493e-05, + "loss": 0.7229321002960205, + "step": 59 + }, + { + "epoch": 0.19801980198019803, + "grad_norm": 0.30468399230672144, + "learning_rate": 2.593406593406594e-05, + "loss": 0.6307672262191772, + "step": 60 + }, + { + "epoch": 0.20132013201320131, + "grad_norm": 0.3282635121595526, + "learning_rate": 2.6373626373626374e-05, + "loss": 0.6336506009101868, + "step": 61 + }, + { + "epoch": 0.20462046204620463, + "grad_norm": 0.3280360563022675, + "learning_rate": 2.6813186813186813e-05, + "loss": 0.6492213010787964, + "step": 62 + }, + { + "epoch": 0.2079207920792079, + "grad_norm": 0.3292430577817229, + "learning_rate": 2.7252747252747255e-05, + "loss": 0.6763280034065247, + "step": 63 + }, + { + "epoch": 0.21122112211221122, + "grad_norm": 0.47832355846700536, + "learning_rate": 2.7692307692307694e-05, + "loss": 0.7322396039962769, + "step": 64 + }, + { + "epoch": 0.2145214521452145, + "grad_norm": 0.31915340164178446, + "learning_rate": 2.8131868131868136e-05, + "loss": 0.7080870270729065, + "step": 65 + }, + { + "epoch": 0.21782178217821782, + "grad_norm": 0.3227571040968621, + "learning_rate": 2.8571428571428574e-05, + "loss": 0.6054466962814331, + "step": 66 + }, + { + "epoch": 0.22112211221122113, + "grad_norm": 0.33375713186655664, + "learning_rate": 2.9010989010989013e-05, + "loss": 0.6782290935516357, + "step": 67 + }, + { + "epoch": 0.22442244224422442, + "grad_norm": 0.3437770801965916, + "learning_rate": 2.9450549450549455e-05, + "loss": 0.6804753541946411, + "step": 68 + }, + { + "epoch": 0.22772277227722773, + "grad_norm": 0.3228427319313703, + "learning_rate": 2.9890109890109894e-05, + "loss": 0.6493992805480957, + "step": 69 + }, + { + "epoch": 0.23102310231023102, + "grad_norm": 0.3540211756840673, + "learning_rate": 3.0329670329670332e-05, + "loss": 0.6263789534568787, + "step": 70 + }, + { + "epoch": 0.23432343234323433, + "grad_norm": 0.34989089824503405, + "learning_rate": 3.0769230769230774e-05, + "loss": 0.6960322856903076, + "step": 71 + }, + { + "epoch": 0.2376237623762376, + "grad_norm": 0.33624443163866324, + "learning_rate": 3.120879120879121e-05, + "loss": 0.6146604418754578, + "step": 72 + }, + { + "epoch": 0.24092409240924093, + "grad_norm": 0.39618402867027047, + "learning_rate": 3.164835164835165e-05, + "loss": 0.6361377239227295, + "step": 73 + }, + { + "epoch": 0.24422442244224424, + "grad_norm": 0.361603087273114, + "learning_rate": 3.2087912087912094e-05, + "loss": 0.636134147644043, + "step": 74 + }, + { + "epoch": 0.24752475247524752, + "grad_norm": 0.37985663132790304, + "learning_rate": 3.252747252747253e-05, + "loss": 0.5936564803123474, + "step": 75 + }, + { + "epoch": 0.2508250825082508, + "grad_norm": 0.35883234873646996, + "learning_rate": 3.296703296703297e-05, + "loss": 0.6001103520393372, + "step": 76 + }, + { + "epoch": 0.25412541254125415, + "grad_norm": 0.35227803701073973, + "learning_rate": 3.340659340659341e-05, + "loss": 0.6254594326019287, + "step": 77 + }, + { + "epoch": 0.25742574257425743, + "grad_norm": 0.3563257650896171, + "learning_rate": 3.384615384615385e-05, + "loss": 0.6457959413528442, + "step": 78 + }, + { + "epoch": 0.2607260726072607, + "grad_norm": 0.37234316340556584, + "learning_rate": 3.4285714285714284e-05, + "loss": 0.6186954975128174, + "step": 79 + }, + { + "epoch": 0.264026402640264, + "grad_norm": 0.35352748449766547, + "learning_rate": 3.4725274725274726e-05, + "loss": 0.6175529956817627, + "step": 80 + }, + { + "epoch": 0.26732673267326734, + "grad_norm": 0.35441369709658355, + "learning_rate": 3.516483516483517e-05, + "loss": 0.6694468259811401, + "step": 81 + }, + { + "epoch": 0.2706270627062706, + "grad_norm": 0.39955400784840756, + "learning_rate": 3.56043956043956e-05, + "loss": 0.627490222454071, + "step": 82 + }, + { + "epoch": 0.2739273927392739, + "grad_norm": 0.38314031523497477, + "learning_rate": 3.6043956043956045e-05, + "loss": 0.6410495638847351, + "step": 83 + }, + { + "epoch": 0.27722772277227725, + "grad_norm": 0.36926215386141575, + "learning_rate": 3.648351648351649e-05, + "loss": 0.6305102109909058, + "step": 84 + }, + { + "epoch": 0.28052805280528054, + "grad_norm": 0.38364118080284076, + "learning_rate": 3.692307692307693e-05, + "loss": 0.6558895111083984, + "step": 85 + }, + { + "epoch": 0.2838283828382838, + "grad_norm": 0.3370292682974053, + "learning_rate": 3.7362637362637365e-05, + "loss": 0.6029388308525085, + "step": 86 + }, + { + "epoch": 0.2871287128712871, + "grad_norm": 0.39541874871701704, + "learning_rate": 3.7802197802197807e-05, + "loss": 0.6551017761230469, + "step": 87 + }, + { + "epoch": 0.29042904290429045, + "grad_norm": 0.3629036550044273, + "learning_rate": 3.824175824175825e-05, + "loss": 0.6588809490203857, + "step": 88 + }, + { + "epoch": 0.29372937293729373, + "grad_norm": 0.37786447228212183, + "learning_rate": 3.8681318681318684e-05, + "loss": 0.614648699760437, + "step": 89 + }, + { + "epoch": 0.297029702970297, + "grad_norm": 0.42911861803278684, + "learning_rate": 3.9120879120879126e-05, + "loss": 0.7034356594085693, + "step": 90 + }, + { + "epoch": 0.30033003300330036, + "grad_norm": 0.3707184094312094, + "learning_rate": 3.956043956043957e-05, + "loss": 0.6908263564109802, + "step": 91 + }, + { + "epoch": 0.30363036303630364, + "grad_norm": 0.38262186656216063, + "learning_rate": 4e-05, + "loss": 0.6882215738296509, + "step": 92 + }, + { + "epoch": 0.3069306930693069, + "grad_norm": 0.3709464296309744, + "learning_rate": 3.999985249980169e-05, + "loss": 0.6377270221710205, + "step": 93 + }, + { + "epoch": 0.3102310231023102, + "grad_norm": 0.3412837406106036, + "learning_rate": 3.999941000138238e-05, + "loss": 0.6735270619392395, + "step": 94 + }, + { + "epoch": 0.31353135313531355, + "grad_norm": 0.40165192879996064, + "learning_rate": 3.999867251126893e-05, + "loss": 0.6934541463851929, + "step": 95 + }, + { + "epoch": 0.31683168316831684, + "grad_norm": 0.34707128601816045, + "learning_rate": 3.9997640040339335e-05, + "loss": 0.6367039084434509, + "step": 96 + }, + { + "epoch": 0.3201320132013201, + "grad_norm": 0.4268828113970776, + "learning_rate": 3.999631260382257e-05, + "loss": 0.6274522542953491, + "step": 97 + }, + { + "epoch": 0.3234323432343234, + "grad_norm": 0.454428833020686, + "learning_rate": 3.999469022129834e-05, + "loss": 0.5874066352844238, + "step": 98 + }, + { + "epoch": 0.32673267326732675, + "grad_norm": 0.4200675840489775, + "learning_rate": 3.9992772916696824e-05, + "loss": 0.6175942420959473, + "step": 99 + }, + { + "epoch": 0.33003300330033003, + "grad_norm": 0.3796321080056305, + "learning_rate": 3.99905607182983e-05, + "loss": 0.5625832080841064, + "step": 100 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.39108856096759403, + "learning_rate": 3.998805365873274e-05, + "loss": 0.6153020262718201, + "step": 101 + }, + { + "epoch": 0.33663366336633666, + "grad_norm": 0.3873560194436071, + "learning_rate": 3.998525177497932e-05, + "loss": 0.5585426092147827, + "step": 102 + }, + { + "epoch": 0.33993399339933994, + "grad_norm": 0.4084712106325698, + "learning_rate": 3.998215510836589e-05, + "loss": 0.6586359739303589, + "step": 103 + }, + { + "epoch": 0.3432343234323432, + "grad_norm": 0.4383246876899704, + "learning_rate": 3.997876370456833e-05, + "loss": 0.62096107006073, + "step": 104 + }, + { + "epoch": 0.3465346534653465, + "grad_norm": 0.4026893562706946, + "learning_rate": 3.997507761360993e-05, + "loss": 0.6059336066246033, + "step": 105 + }, + { + "epoch": 0.34983498349834985, + "grad_norm": 0.46586240044914223, + "learning_rate": 3.997109688986059e-05, + "loss": 0.617970883846283, + "step": 106 + }, + { + "epoch": 0.35313531353135313, + "grad_norm": 0.44949199032710474, + "learning_rate": 3.9966821592036066e-05, + "loss": 0.6453397274017334, + "step": 107 + }, + { + "epoch": 0.3564356435643564, + "grad_norm": 0.4794978158156406, + "learning_rate": 3.996225178319709e-05, + "loss": 0.6371763348579407, + "step": 108 + }, + { + "epoch": 0.35973597359735976, + "grad_norm": 0.4463512391721941, + "learning_rate": 3.9957387530748435e-05, + "loss": 0.5971124172210693, + "step": 109 + }, + { + "epoch": 0.36303630363036304, + "grad_norm": 0.368079413354641, + "learning_rate": 3.995222890643792e-05, + "loss": 0.5679532289505005, + "step": 110 + }, + { + "epoch": 0.36633663366336633, + "grad_norm": 0.43733705586285254, + "learning_rate": 3.9946775986355346e-05, + "loss": 0.5988069772720337, + "step": 111 + }, + { + "epoch": 0.3696369636963696, + "grad_norm": 0.38235582844960775, + "learning_rate": 3.994102885093141e-05, + "loss": 0.6352983713150024, + "step": 112 + }, + { + "epoch": 0.37293729372937295, + "grad_norm": 0.389837871286893, + "learning_rate": 3.993498758493646e-05, + "loss": 0.58957839012146, + "step": 113 + }, + { + "epoch": 0.37623762376237624, + "grad_norm": 0.40399856168911097, + "learning_rate": 3.992865227747929e-05, + "loss": 0.6396822929382324, + "step": 114 + }, + { + "epoch": 0.3795379537953795, + "grad_norm": 0.38891668976227123, + "learning_rate": 3.992202302200582e-05, + "loss": 0.6314754486083984, + "step": 115 + }, + { + "epoch": 0.38283828382838286, + "grad_norm": 0.4087528543828922, + "learning_rate": 3.991509991629769e-05, + "loss": 0.673650860786438, + "step": 116 + }, + { + "epoch": 0.38613861386138615, + "grad_norm": 0.36330054292020786, + "learning_rate": 3.990788306247085e-05, + "loss": 0.5813701152801514, + "step": 117 + }, + { + "epoch": 0.38943894389438943, + "grad_norm": 0.4247110332678589, + "learning_rate": 3.990037256697404e-05, + "loss": 0.6419334411621094, + "step": 118 + }, + { + "epoch": 0.3927392739273927, + "grad_norm": 0.4244126002071751, + "learning_rate": 3.989256854058721e-05, + "loss": 0.6319208145141602, + "step": 119 + }, + { + "epoch": 0.39603960396039606, + "grad_norm": 0.3651632933942853, + "learning_rate": 3.988447109841991e-05, + "loss": 0.5989845991134644, + "step": 120 + }, + { + "epoch": 0.39933993399339934, + "grad_norm": 0.393158353074077, + "learning_rate": 3.987608035990957e-05, + "loss": 0.5853303670883179, + "step": 121 + }, + { + "epoch": 0.40264026402640263, + "grad_norm": 0.35965233332276103, + "learning_rate": 3.986739644881975e-05, + "loss": 0.6115257143974304, + "step": 122 + }, + { + "epoch": 0.40594059405940597, + "grad_norm": 0.4252711474203845, + "learning_rate": 3.985841949323831e-05, + "loss": 0.6440504789352417, + "step": 123 + }, + { + "epoch": 0.40924092409240925, + "grad_norm": 0.5578797297271848, + "learning_rate": 3.984914962557553e-05, + "loss": 0.5765030384063721, + "step": 124 + }, + { + "epoch": 0.41254125412541254, + "grad_norm": 0.4362455029468141, + "learning_rate": 3.983958698256214e-05, + "loss": 0.6387556791305542, + "step": 125 + }, + { + "epoch": 0.4158415841584158, + "grad_norm": 0.39274811063076087, + "learning_rate": 3.98297317052473e-05, + "loss": 0.6263147592544556, + "step": 126 + }, + { + "epoch": 0.41914191419141916, + "grad_norm": 0.42682589637163704, + "learning_rate": 3.981958393899656e-05, + "loss": 0.6091845035552979, + "step": 127 + }, + { + "epoch": 0.42244224422442245, + "grad_norm": 0.4033131171538041, + "learning_rate": 3.980914383348967e-05, + "loss": 0.6458015441894531, + "step": 128 + }, + { + "epoch": 0.42574257425742573, + "grad_norm": 0.3881606915462862, + "learning_rate": 3.9798411542718395e-05, + "loss": 0.6115552186965942, + "step": 129 + }, + { + "epoch": 0.429042904290429, + "grad_norm": 0.38910317938225847, + "learning_rate": 3.978738722498423e-05, + "loss": 0.6427993774414062, + "step": 130 + }, + { + "epoch": 0.43234323432343236, + "grad_norm": 0.36836380096259913, + "learning_rate": 3.977607104289609e-05, + "loss": 0.6121467351913452, + "step": 131 + }, + { + "epoch": 0.43564356435643564, + "grad_norm": 0.3743062201629088, + "learning_rate": 3.9764463163367875e-05, + "loss": 0.5951442718505859, + "step": 132 + }, + { + "epoch": 0.4389438943894389, + "grad_norm": 0.3699746655092952, + "learning_rate": 3.9752563757616045e-05, + "loss": 0.6639472842216492, + "step": 133 + }, + { + "epoch": 0.44224422442244227, + "grad_norm": 0.37398919831188604, + "learning_rate": 3.974037300115706e-05, + "loss": 0.6084764003753662, + "step": 134 + }, + { + "epoch": 0.44554455445544555, + "grad_norm": 0.37043195153646374, + "learning_rate": 3.972789107380484e-05, + "loss": 0.6211085915565491, + "step": 135 + }, + { + "epoch": 0.44884488448844884, + "grad_norm": 0.3509837417375981, + "learning_rate": 3.9715118159668046e-05, + "loss": 0.6098147034645081, + "step": 136 + }, + { + "epoch": 0.4521452145214521, + "grad_norm": 0.3350785925775803, + "learning_rate": 3.970205444714742e-05, + "loss": 0.6155884861946106, + "step": 137 + }, + { + "epoch": 0.45544554455445546, + "grad_norm": 0.38529379761335925, + "learning_rate": 3.9688700128932975e-05, + "loss": 0.5984665155410767, + "step": 138 + }, + { + "epoch": 0.45874587458745875, + "grad_norm": 0.45130397769476205, + "learning_rate": 3.967505540200117e-05, + "loss": 0.6656880378723145, + "step": 139 + }, + { + "epoch": 0.46204620462046203, + "grad_norm": 0.3277874952439621, + "learning_rate": 3.966112046761201e-05, + "loss": 0.6607398390769958, + "step": 140 + }, + { + "epoch": 0.46534653465346537, + "grad_norm": 2.6727599644732267, + "learning_rate": 3.9646895531306046e-05, + "loss": 0.6578342914581299, + "step": 141 + }, + { + "epoch": 0.46864686468646866, + "grad_norm": 0.47429126269764676, + "learning_rate": 3.963238080290136e-05, + "loss": 0.6103699803352356, + "step": 142 + }, + { + "epoch": 0.47194719471947194, + "grad_norm": 0.32652590291724093, + "learning_rate": 3.96175764964905e-05, + "loss": 0.5484676957130432, + "step": 143 + }, + { + "epoch": 0.4752475247524752, + "grad_norm": 0.4531372955951849, + "learning_rate": 3.960248283043727e-05, + "loss": 0.578776478767395, + "step": 144 + }, + { + "epoch": 0.47854785478547857, + "grad_norm": 0.3685580706465372, + "learning_rate": 3.958710002737355e-05, + "loss": 0.6184446811676025, + "step": 145 + }, + { + "epoch": 0.48184818481848185, + "grad_norm": 0.3584005630962511, + "learning_rate": 3.9571428314195984e-05, + "loss": 0.6307916045188904, + "step": 146 + }, + { + "epoch": 0.48514851485148514, + "grad_norm": 0.4049679254542765, + "learning_rate": 3.955546792206265e-05, + "loss": 0.6064697504043579, + "step": 147 + }, + { + "epoch": 0.4884488448844885, + "grad_norm": 0.3846258995775384, + "learning_rate": 3.953921908638966e-05, + "loss": 0.6055655479431152, + "step": 148 + }, + { + "epoch": 0.49174917491749176, + "grad_norm": 0.3643318343315678, + "learning_rate": 3.952268204684765e-05, + "loss": 0.5856431126594543, + "step": 149 + }, + { + "epoch": 0.49504950495049505, + "grad_norm": 0.3854715521866927, + "learning_rate": 3.950585704735829e-05, + "loss": 0.6634635925292969, + "step": 150 + }, + { + "epoch": 0.49834983498349833, + "grad_norm": 0.34338835592304534, + "learning_rate": 3.948874433609065e-05, + "loss": 0.5880753397941589, + "step": 151 + }, + { + "epoch": 0.5016501650165016, + "grad_norm": 0.3481018111538647, + "learning_rate": 3.947134416545757e-05, + "loss": 0.5594221949577332, + "step": 152 + }, + { + "epoch": 0.504950495049505, + "grad_norm": 0.6570220882473125, + "learning_rate": 3.94536567921119e-05, + "loss": 0.664652407169342, + "step": 153 + }, + { + "epoch": 0.5082508250825083, + "grad_norm": 0.340048306266198, + "learning_rate": 3.9435682476942755e-05, + "loss": 0.6002815961837769, + "step": 154 + }, + { + "epoch": 0.5115511551155115, + "grad_norm": 0.3488682381523364, + "learning_rate": 3.941742148507163e-05, + "loss": 0.5905177593231201, + "step": 155 + }, + { + "epoch": 0.5148514851485149, + "grad_norm": 0.33062666453941425, + "learning_rate": 3.939887408584853e-05, + "loss": 0.5636795163154602, + "step": 156 + }, + { + "epoch": 0.5181518151815182, + "grad_norm": 0.35862086331061066, + "learning_rate": 3.938004055284796e-05, + "loss": 0.5639582276344299, + "step": 157 + }, + { + "epoch": 0.5214521452145214, + "grad_norm": 0.31769111173717246, + "learning_rate": 3.9360921163864895e-05, + "loss": 0.6515591144561768, + "step": 158 + }, + { + "epoch": 0.5247524752475248, + "grad_norm": 0.38401455820073427, + "learning_rate": 3.934151620091071e-05, + "loss": 0.5721683502197266, + "step": 159 + }, + { + "epoch": 0.528052805280528, + "grad_norm": 0.3284331200684813, + "learning_rate": 3.9321825950209e-05, + "loss": 0.5801802277565002, + "step": 160 + }, + { + "epoch": 0.5313531353135313, + "grad_norm": 0.3493998878359796, + "learning_rate": 3.9301850702191344e-05, + "loss": 0.603084921836853, + "step": 161 + }, + { + "epoch": 0.5346534653465347, + "grad_norm": 0.32233519110844616, + "learning_rate": 3.928159075149304e-05, + "loss": 0.6376925110816956, + "step": 162 + }, + { + "epoch": 0.5379537953795379, + "grad_norm": 0.35833134197704153, + "learning_rate": 3.926104639694877e-05, + "loss": 0.5764102935791016, + "step": 163 + }, + { + "epoch": 0.5412541254125413, + "grad_norm": 0.3523567199445224, + "learning_rate": 3.924021794158818e-05, + "loss": 0.6102188229560852, + "step": 164 + }, + { + "epoch": 0.5445544554455446, + "grad_norm": 0.36694222553878597, + "learning_rate": 3.921910569263139e-05, + "loss": 0.5833287835121155, + "step": 165 + }, + { + "epoch": 0.5478547854785478, + "grad_norm": 0.37179813198977807, + "learning_rate": 3.919770996148448e-05, + "loss": 0.5891385078430176, + "step": 166 + }, + { + "epoch": 0.5511551155115512, + "grad_norm": 0.3507301680001106, + "learning_rate": 3.917603106373493e-05, + "loss": 0.5838547348976135, + "step": 167 + }, + { + "epoch": 0.5544554455445545, + "grad_norm": 0.3134001311174479, + "learning_rate": 3.9154069319146904e-05, + "loss": 0.5727800726890564, + "step": 168 + }, + { + "epoch": 0.5577557755775577, + "grad_norm": 0.33531781904204605, + "learning_rate": 3.913182505165656e-05, + "loss": 0.6102641224861145, + "step": 169 + }, + { + "epoch": 0.5610561056105611, + "grad_norm": 0.35178976522027133, + "learning_rate": 3.91092985893673e-05, + "loss": 0.5718260407447815, + "step": 170 + }, + { + "epoch": 0.5643564356435643, + "grad_norm": 0.47006108726602863, + "learning_rate": 3.908649026454488e-05, + "loss": 0.6308504939079285, + "step": 171 + }, + { + "epoch": 0.5676567656765676, + "grad_norm": 0.3687514240026255, + "learning_rate": 3.906340041361255e-05, + "loss": 0.6089432835578918, + "step": 172 + }, + { + "epoch": 0.570957095709571, + "grad_norm": 0.3586674884704593, + "learning_rate": 3.904002937714606e-05, + "loss": 0.6583501696586609, + "step": 173 + }, + { + "epoch": 0.5742574257425742, + "grad_norm": 0.3399808047240735, + "learning_rate": 3.9016377499868666e-05, + "loss": 0.6108609437942505, + "step": 174 + }, + { + "epoch": 0.5775577557755776, + "grad_norm": 0.3840880337988826, + "learning_rate": 3.899244513064603e-05, + "loss": 0.63509202003479, + "step": 175 + }, + { + "epoch": 0.5808580858085809, + "grad_norm": 0.3725541644477348, + "learning_rate": 3.896823262248107e-05, + "loss": 0.5759241580963135, + "step": 176 + }, + { + "epoch": 0.5841584158415841, + "grad_norm": 0.30755721985114126, + "learning_rate": 3.8943740332508754e-05, + "loss": 0.6148169040679932, + "step": 177 + }, + { + "epoch": 0.5874587458745875, + "grad_norm": 0.3916756097057637, + "learning_rate": 3.891896862199086e-05, + "loss": 0.5266364216804504, + "step": 178 + }, + { + "epoch": 0.5907590759075908, + "grad_norm": 0.3417854779376455, + "learning_rate": 3.88939178563106e-05, + "loss": 0.5626640319824219, + "step": 179 + }, + { + "epoch": 0.594059405940594, + "grad_norm": 0.33526488525207704, + "learning_rate": 3.886858840496727e-05, + "loss": 0.6063880920410156, + "step": 180 + }, + { + "epoch": 0.5973597359735974, + "grad_norm": 0.37344333250119977, + "learning_rate": 3.884298064157077e-05, + "loss": 0.5979235768318176, + "step": 181 + }, + { + "epoch": 0.6006600660066007, + "grad_norm": 0.3835133271197793, + "learning_rate": 3.881709494383612e-05, + "loss": 0.6628611087799072, + "step": 182 + }, + { + "epoch": 0.6039603960396039, + "grad_norm": 0.4344526004756121, + "learning_rate": 3.879093169357789e-05, + "loss": 0.6215270757675171, + "step": 183 + }, + { + "epoch": 0.6072607260726073, + "grad_norm": 0.3644174435488244, + "learning_rate": 3.876449127670452e-05, + "loss": 0.6148592233657837, + "step": 184 + }, + { + "epoch": 0.6105610561056105, + "grad_norm": 0.3619226265536735, + "learning_rate": 3.87377740832127e-05, + "loss": 0.6254778504371643, + "step": 185 + }, + { + "epoch": 0.6138613861386139, + "grad_norm": 0.3492162593840536, + "learning_rate": 3.871078050718155e-05, + "loss": 0.6025378704071045, + "step": 186 + }, + { + "epoch": 0.6171617161716172, + "grad_norm": 0.3866924759539626, + "learning_rate": 3.8683510946766866e-05, + "loss": 0.5887518525123596, + "step": 187 + }, + { + "epoch": 0.6204620462046204, + "grad_norm": 0.3357229513721586, + "learning_rate": 3.865596580419519e-05, + "loss": 0.6180317401885986, + "step": 188 + }, + { + "epoch": 0.6237623762376238, + "grad_norm": 0.3594949077768003, + "learning_rate": 3.8628145485757925e-05, + "loss": 0.5970651507377625, + "step": 189 + }, + { + "epoch": 0.6270627062706271, + "grad_norm": 0.3496234009951303, + "learning_rate": 3.860005040180533e-05, + "loss": 0.6027296781539917, + "step": 190 + }, + { + "epoch": 0.6303630363036303, + "grad_norm": 0.3830042583584045, + "learning_rate": 3.857168096674044e-05, + "loss": 0.6326305270195007, + "step": 191 + }, + { + "epoch": 0.6336633663366337, + "grad_norm": 0.333508477943962, + "learning_rate": 3.854303759901299e-05, + "loss": 0.6508482694625854, + "step": 192 + }, + { + "epoch": 0.636963696369637, + "grad_norm": 0.352327105927571, + "learning_rate": 3.851412072111322e-05, + "loss": 0.6088548302650452, + "step": 193 + }, + { + "epoch": 0.6402640264026402, + "grad_norm": 0.36196379228138037, + "learning_rate": 3.8484930759565645e-05, + "loss": 0.5975607633590698, + "step": 194 + }, + { + "epoch": 0.6435643564356436, + "grad_norm": 0.3231664855297077, + "learning_rate": 3.845546814492279e-05, + "loss": 0.5467930436134338, + "step": 195 + }, + { + "epoch": 0.6468646864686468, + "grad_norm": 0.35556526722817444, + "learning_rate": 3.8425733311758795e-05, + "loss": 0.583969235420227, + "step": 196 + }, + { + "epoch": 0.6501650165016502, + "grad_norm": 0.331073543443887, + "learning_rate": 3.8395726698663045e-05, + "loss": 0.6007376909255981, + "step": 197 + }, + { + "epoch": 0.6534653465346535, + "grad_norm": 0.34786293006180385, + "learning_rate": 3.836544874823368e-05, + "loss": 0.5971908569335938, + "step": 198 + }, + { + "epoch": 0.6567656765676567, + "grad_norm": 0.3128647628132879, + "learning_rate": 3.8334899907071064e-05, + "loss": 0.592069685459137, + "step": 199 + }, + { + "epoch": 0.6600660066006601, + "grad_norm": 0.3308125796746202, + "learning_rate": 3.830408062577121e-05, + "loss": 0.6188071966171265, + "step": 200 + }, + { + "epoch": 0.6633663366336634, + "grad_norm": 0.34889077565364124, + "learning_rate": 3.827299135891913e-05, + "loss": 0.5976923704147339, + "step": 201 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.33443153994631497, + "learning_rate": 3.8241632565082124e-05, + "loss": 0.6120954155921936, + "step": 202 + }, + { + "epoch": 0.66996699669967, + "grad_norm": 0.3573334503206899, + "learning_rate": 3.821000470680303e-05, + "loss": 0.6661979556083679, + "step": 203 + }, + { + "epoch": 0.6732673267326733, + "grad_norm": 0.34662331225184934, + "learning_rate": 3.8178108250593384e-05, + "loss": 0.5853559970855713, + "step": 204 + }, + { + "epoch": 0.6765676567656765, + "grad_norm": 0.33823171869993424, + "learning_rate": 3.814594366692654e-05, + "loss": 0.6648768186569214, + "step": 205 + }, + { + "epoch": 0.6798679867986799, + "grad_norm": 0.4178878629038068, + "learning_rate": 3.8113511430230745e-05, + "loss": 0.5893838405609131, + "step": 206 + }, + { + "epoch": 0.6831683168316832, + "grad_norm": 0.36858896529016355, + "learning_rate": 3.808081201888214e-05, + "loss": 0.6177140474319458, + "step": 207 + }, + { + "epoch": 0.6864686468646864, + "grad_norm": 0.38061402245158527, + "learning_rate": 3.8047845915197695e-05, + "loss": 0.5793695449829102, + "step": 208 + }, + { + "epoch": 0.6897689768976898, + "grad_norm": 0.3591315376932048, + "learning_rate": 3.8014613605428084e-05, + "loss": 0.5571605563163757, + "step": 209 + }, + { + "epoch": 0.693069306930693, + "grad_norm": 0.33319862057164595, + "learning_rate": 3.798111557975053e-05, + "loss": 0.5945760011672974, + "step": 210 + }, + { + "epoch": 0.6963696369636964, + "grad_norm": 0.3495679574237745, + "learning_rate": 3.7947352332261586e-05, + "loss": 0.600873589515686, + "step": 211 + }, + { + "epoch": 0.6996699669966997, + "grad_norm": 0.37390147639764304, + "learning_rate": 3.791332436096983e-05, + "loss": 0.6234852075576782, + "step": 212 + }, + { + "epoch": 0.7029702970297029, + "grad_norm": 0.3571653694610809, + "learning_rate": 3.7879032167788494e-05, + "loss": 0.6129578948020935, + "step": 213 + }, + { + "epoch": 0.7062706270627063, + "grad_norm": 0.48971881906384135, + "learning_rate": 3.784447625852812e-05, + "loss": 0.6204475164413452, + "step": 214 + }, + { + "epoch": 0.7095709570957096, + "grad_norm": 0.3610294548812676, + "learning_rate": 3.780965714288905e-05, + "loss": 0.6734122037887573, + "step": 215 + }, + { + "epoch": 0.7128712871287128, + "grad_norm": 0.35396639697907356, + "learning_rate": 3.777457533445393e-05, + "loss": 0.5678560137748718, + "step": 216 + }, + { + "epoch": 0.7161716171617162, + "grad_norm": 0.3232076597831296, + "learning_rate": 3.7739231350680135e-05, + "loss": 0.5784683227539062, + "step": 217 + }, + { + "epoch": 0.7194719471947195, + "grad_norm": 0.3540897506756201, + "learning_rate": 3.7703625712892125e-05, + "loss": 0.6060354113578796, + "step": 218 + }, + { + "epoch": 0.7227722772277227, + "grad_norm": 0.35008278157890194, + "learning_rate": 3.766775894627376e-05, + "loss": 0.6248741745948792, + "step": 219 + }, + { + "epoch": 0.7260726072607261, + "grad_norm": 0.32018676747331787, + "learning_rate": 3.7631631579860553e-05, + "loss": 0.6014479398727417, + "step": 220 + }, + { + "epoch": 0.7293729372937293, + "grad_norm": 0.32068744744726313, + "learning_rate": 3.759524414653189e-05, + "loss": 0.6283233761787415, + "step": 221 + }, + { + "epoch": 0.7326732673267327, + "grad_norm": 0.3047460979670785, + "learning_rate": 3.755859718300313e-05, + "loss": 0.5710185766220093, + "step": 222 + }, + { + "epoch": 0.735973597359736, + "grad_norm": 0.34698489216212486, + "learning_rate": 3.75216912298177e-05, + "loss": 0.6007407903671265, + "step": 223 + }, + { + "epoch": 0.7392739273927392, + "grad_norm": 0.4952362221345831, + "learning_rate": 3.748452683133916e-05, + "loss": 0.6852575540542603, + "step": 224 + }, + { + "epoch": 0.7425742574257426, + "grad_norm": 0.32106680253004655, + "learning_rate": 3.7447104535743115e-05, + "loss": 0.6270833611488342, + "step": 225 + }, + { + "epoch": 0.7458745874587459, + "grad_norm": 0.30214814189665545, + "learning_rate": 3.740942489500916e-05, + "loss": 0.5925471782684326, + "step": 226 + }, + { + "epoch": 0.7491749174917491, + "grad_norm": 0.3171932777170319, + "learning_rate": 3.737148846491275e-05, + "loss": 0.573570728302002, + "step": 227 + }, + { + "epoch": 0.7524752475247525, + "grad_norm": 0.31480815810804524, + "learning_rate": 3.7333295805016986e-05, + "loss": 0.6088368892669678, + "step": 228 + }, + { + "epoch": 0.7557755775577558, + "grad_norm": 0.3103068539492526, + "learning_rate": 3.729484747866435e-05, + "loss": 0.5496470332145691, + "step": 229 + }, + { + "epoch": 0.759075907590759, + "grad_norm": 0.3007603199811456, + "learning_rate": 3.725614405296843e-05, + "loss": 0.6008220314979553, + "step": 230 + }, + { + "epoch": 0.7623762376237624, + "grad_norm": 0.3007492168191884, + "learning_rate": 3.721718609880551e-05, + "loss": 0.5982120037078857, + "step": 231 + }, + { + "epoch": 0.7656765676567657, + "grad_norm": 0.3010002181490163, + "learning_rate": 3.717797419080618e-05, + "loss": 0.6404559016227722, + "step": 232 + }, + { + "epoch": 0.768976897689769, + "grad_norm": 0.35604106645956024, + "learning_rate": 3.713850890734689e-05, + "loss": 0.5875239372253418, + "step": 233 + }, + { + "epoch": 0.7722772277227723, + "grad_norm": 0.33191901009333297, + "learning_rate": 3.709879083054133e-05, + "loss": 0.5962772369384766, + "step": 234 + }, + { + "epoch": 0.7755775577557755, + "grad_norm": 0.29418628627284477, + "learning_rate": 3.705882054623192e-05, + "loss": 0.5764110684394836, + "step": 235 + }, + { + "epoch": 0.7788778877887789, + "grad_norm": 0.30409612807603364, + "learning_rate": 3.7018598643981165e-05, + "loss": 0.5635858178138733, + "step": 236 + }, + { + "epoch": 0.7821782178217822, + "grad_norm": 0.3039645238556037, + "learning_rate": 3.69781257170629e-05, + "loss": 0.5880881547927856, + "step": 237 + }, + { + "epoch": 0.7854785478547854, + "grad_norm": 0.30606246597511416, + "learning_rate": 3.6937402362453606e-05, + "loss": 0.5644733905792236, + "step": 238 + }, + { + "epoch": 0.7887788778877888, + "grad_norm": 0.328325214152846, + "learning_rate": 3.689642918082358e-05, + "loss": 0.6431151032447815, + "step": 239 + }, + { + "epoch": 0.7920792079207921, + "grad_norm": 0.2863869456911102, + "learning_rate": 3.6855206776528055e-05, + "loss": 0.5848085880279541, + "step": 240 + }, + { + "epoch": 0.7953795379537953, + "grad_norm": 0.3169795193025283, + "learning_rate": 3.681373575759831e-05, + "loss": 0.590021550655365, + "step": 241 + }, + { + "epoch": 0.7986798679867987, + "grad_norm": 0.3630216059086489, + "learning_rate": 3.67720167357327e-05, + "loss": 0.6217919588088989, + "step": 242 + }, + { + "epoch": 0.801980198019802, + "grad_norm": 0.2999270957223198, + "learning_rate": 3.673005032628763e-05, + "loss": 0.6075180172920227, + "step": 243 + }, + { + "epoch": 0.8052805280528053, + "grad_norm": 0.35145967135780704, + "learning_rate": 3.668783714826846e-05, + "loss": 0.6078404188156128, + "step": 244 + }, + { + "epoch": 0.8085808580858086, + "grad_norm": 0.32650805345047657, + "learning_rate": 3.664537782432042e-05, + "loss": 0.6297526955604553, + "step": 245 + }, + { + "epoch": 0.8118811881188119, + "grad_norm": 0.32461322862254094, + "learning_rate": 3.660267298071936e-05, + "loss": 0.5684514045715332, + "step": 246 + }, + { + "epoch": 0.8151815181518152, + "grad_norm": 0.32171296221654416, + "learning_rate": 3.655972324736259e-05, + "loss": 0.6192148327827454, + "step": 247 + }, + { + "epoch": 0.8184818481848185, + "grad_norm": 0.3322336621503604, + "learning_rate": 3.6516529257759506e-05, + "loss": 0.5900243520736694, + "step": 248 + }, + { + "epoch": 0.8217821782178217, + "grad_norm": 0.35183312055445004, + "learning_rate": 3.6473091649022337e-05, + "loss": 0.5941751599311829, + "step": 249 + }, + { + "epoch": 0.8250825082508251, + "grad_norm": 0.31255833045908565, + "learning_rate": 3.6429411061856645e-05, + "loss": 0.5744310021400452, + "step": 250 + }, + { + "epoch": 0.8283828382838284, + "grad_norm": 0.3266269251233177, + "learning_rate": 3.6385488140551985e-05, + "loss": 0.5985124707221985, + "step": 251 + }, + { + "epoch": 0.8316831683168316, + "grad_norm": 0.30426711611593643, + "learning_rate": 3.6341323532972294e-05, + "loss": 0.581912636756897, + "step": 252 + }, + { + "epoch": 0.834983498349835, + "grad_norm": 0.3297819735063718, + "learning_rate": 3.629691789054643e-05, + "loss": 0.586786150932312, + "step": 253 + }, + { + "epoch": 0.8382838283828383, + "grad_norm": 0.3074133078124695, + "learning_rate": 3.625227186825848e-05, + "loss": 0.6312603950500488, + "step": 254 + }, + { + "epoch": 0.8415841584158416, + "grad_norm": 0.33007753969064285, + "learning_rate": 3.620738612463818e-05, + "loss": 0.5886626243591309, + "step": 255 + }, + { + "epoch": 0.8448844884488449, + "grad_norm": 0.31334340596765187, + "learning_rate": 3.6162261321751114e-05, + "loss": 0.5892266035079956, + "step": 256 + }, + { + "epoch": 0.8481848184818482, + "grad_norm": 0.31784442826893616, + "learning_rate": 3.6116898125189045e-05, + "loss": 0.5472115278244019, + "step": 257 + }, + { + "epoch": 0.8514851485148515, + "grad_norm": 0.3456330158902343, + "learning_rate": 3.6071297204059995e-05, + "loss": 0.5981796383857727, + "step": 258 + }, + { + "epoch": 0.8547854785478548, + "grad_norm": 0.3377124553034101, + "learning_rate": 3.6025459230978475e-05, + "loss": 0.6708342432975769, + "step": 259 + }, + { + "epoch": 0.858085808580858, + "grad_norm": 0.3081391395426973, + "learning_rate": 3.597938488205549e-05, + "loss": 0.6306079626083374, + "step": 260 + }, + { + "epoch": 0.8613861386138614, + "grad_norm": 0.3398583824115319, + "learning_rate": 3.59330748368886e-05, + "loss": 0.6098329424858093, + "step": 261 + }, + { + "epoch": 0.8646864686468647, + "grad_norm": 0.32878067719138626, + "learning_rate": 3.588652977855189e-05, + "loss": 0.5617724061012268, + "step": 262 + }, + { + "epoch": 0.8679867986798679, + "grad_norm": 0.34962664282188816, + "learning_rate": 3.58397503935859e-05, + "loss": 0.5780894756317139, + "step": 263 + }, + { + "epoch": 0.8712871287128713, + "grad_norm": 0.32665214019362204, + "learning_rate": 3.5792737371987477e-05, + "loss": 0.578921377658844, + "step": 264 + }, + { + "epoch": 0.8745874587458746, + "grad_norm": 0.36673188949709323, + "learning_rate": 3.574549140719962e-05, + "loss": 0.614944577217102, + "step": 265 + }, + { + "epoch": 0.8778877887788779, + "grad_norm": 0.3248666143164946, + "learning_rate": 3.569801319610125e-05, + "loss": 0.6269869208335876, + "step": 266 + }, + { + "epoch": 0.8811881188118812, + "grad_norm": 0.3338123662452596, + "learning_rate": 3.565030343899693e-05, + "loss": 0.6045581102371216, + "step": 267 + }, + { + "epoch": 0.8844884488448845, + "grad_norm": 0.31011600887091817, + "learning_rate": 3.5602362839606514e-05, + "loss": 0.5872907638549805, + "step": 268 + }, + { + "epoch": 0.8877887788778878, + "grad_norm": 0.31857062779594814, + "learning_rate": 3.55541921050548e-05, + "loss": 0.6283375024795532, + "step": 269 + }, + { + "epoch": 0.8910891089108911, + "grad_norm": 0.32445751859048455, + "learning_rate": 3.5505791945861076e-05, + "loss": 0.5747002363204956, + "step": 270 + }, + { + "epoch": 0.8943894389438944, + "grad_norm": 0.2923309334474062, + "learning_rate": 3.545716307592864e-05, + "loss": 0.6205827593803406, + "step": 271 + }, + { + "epoch": 0.8976897689768977, + "grad_norm": 0.43972579907455317, + "learning_rate": 3.54083062125343e-05, + "loss": 0.5987251400947571, + "step": 272 + }, + { + "epoch": 0.900990099009901, + "grad_norm": 0.33194286352506225, + "learning_rate": 3.535922207631776e-05, + "loss": 0.6275356411933899, + "step": 273 + }, + { + "epoch": 0.9042904290429042, + "grad_norm": 0.3408278730793354, + "learning_rate": 3.5309911391270996e-05, + "loss": 0.6097655892372131, + "step": 274 + }, + { + "epoch": 0.9075907590759076, + "grad_norm": 0.3441995699777348, + "learning_rate": 3.52603748847276e-05, + "loss": 0.544170618057251, + "step": 275 + }, + { + "epoch": 0.9108910891089109, + "grad_norm": 0.3034867763949278, + "learning_rate": 3.521061328735202e-05, + "loss": 0.5723366141319275, + "step": 276 + }, + { + "epoch": 0.9141914191419142, + "grad_norm": 0.3091145609625042, + "learning_rate": 3.516062733312879e-05, + "loss": 0.5801889896392822, + "step": 277 + }, + { + "epoch": 0.9174917491749175, + "grad_norm": 0.3532845546992122, + "learning_rate": 3.511041775935175e-05, + "loss": 0.5942766666412354, + "step": 278 + }, + { + "epoch": 0.9207920792079208, + "grad_norm": 0.3192035342587887, + "learning_rate": 3.50599853066131e-05, + "loss": 0.5604017972946167, + "step": 279 + }, + { + "epoch": 0.9240924092409241, + "grad_norm": 0.4475571406552253, + "learning_rate": 3.500933071879251e-05, + "loss": 0.6151460409164429, + "step": 280 + }, + { + "epoch": 0.9273927392739274, + "grad_norm": 0.30946498453996385, + "learning_rate": 3.495845474304616e-05, + "loss": 0.5854936838150024, + "step": 281 + }, + { + "epoch": 0.9306930693069307, + "grad_norm": 0.3188531409769719, + "learning_rate": 3.490735812979572e-05, + "loss": 0.5586672425270081, + "step": 282 + }, + { + "epoch": 0.933993399339934, + "grad_norm": 0.3250546549981712, + "learning_rate": 3.485604163271721e-05, + "loss": 0.578475832939148, + "step": 283 + }, + { + "epoch": 0.9372937293729373, + "grad_norm": 0.45030229248281484, + "learning_rate": 3.4804506008730015e-05, + "loss": 0.5236382484436035, + "step": 284 + }, + { + "epoch": 0.9405940594059405, + "grad_norm": 0.31677157675280776, + "learning_rate": 3.475275201798559e-05, + "loss": 0.5964822769165039, + "step": 285 + }, + { + "epoch": 0.9438943894389439, + "grad_norm": 0.3221519247617692, + "learning_rate": 3.4700780423856334e-05, + "loss": 0.5551598072052002, + "step": 286 + }, + { + "epoch": 0.9471947194719472, + "grad_norm": 0.31322506983838, + "learning_rate": 3.464859199292429e-05, + "loss": 0.6095103621482849, + "step": 287 + }, + { + "epoch": 0.9504950495049505, + "grad_norm": 0.33333701342858213, + "learning_rate": 3.4596187494969846e-05, + "loss": 0.5893416404724121, + "step": 288 + }, + { + "epoch": 0.9537953795379538, + "grad_norm": 0.31167002926986764, + "learning_rate": 3.454356770296039e-05, + "loss": 0.5992231965065002, + "step": 289 + }, + { + "epoch": 0.9570957095709571, + "grad_norm": 0.3407826991036566, + "learning_rate": 3.4490733393038895e-05, + "loss": 0.6071972250938416, + "step": 290 + }, + { + "epoch": 0.9603960396039604, + "grad_norm": 0.321397588262469, + "learning_rate": 3.443768534451248e-05, + "loss": 0.5836942195892334, + "step": 291 + }, + { + "epoch": 0.9636963696369637, + "grad_norm": 0.3596023570145339, + "learning_rate": 3.4384424339840916e-05, + "loss": 0.5707553625106812, + "step": 292 + }, + { + "epoch": 0.966996699669967, + "grad_norm": 0.326365753033755, + "learning_rate": 3.4330951164625075e-05, + "loss": 0.5883970260620117, + "step": 293 + }, + { + "epoch": 0.9702970297029703, + "grad_norm": 0.3276030981345682, + "learning_rate": 3.427726660759535e-05, + "loss": 0.6281589269638062, + "step": 294 + }, + { + "epoch": 0.9735973597359736, + "grad_norm": 0.3559560269123216, + "learning_rate": 3.422337146060003e-05, + "loss": 0.6641702651977539, + "step": 295 + }, + { + "epoch": 0.976897689768977, + "grad_norm": 0.34661891319338206, + "learning_rate": 3.4169266518593596e-05, + "loss": 0.6398966312408447, + "step": 296 + }, + { + "epoch": 0.9801980198019802, + "grad_norm": 0.3392015122860613, + "learning_rate": 3.411495257962501e-05, + "loss": 0.6376276016235352, + "step": 297 + }, + { + "epoch": 0.9834983498349835, + "grad_norm": 0.3454832175281825, + "learning_rate": 3.406043044482596e-05, + "loss": 0.648975133895874, + "step": 298 + }, + { + "epoch": 0.9867986798679867, + "grad_norm": 0.3284679145456545, + "learning_rate": 3.4005700918399016e-05, + "loss": 0.6201390624046326, + "step": 299 + }, + { + "epoch": 0.9900990099009901, + "grad_norm": 0.33000362479964457, + "learning_rate": 3.395076480760576e-05, + "loss": 0.6103875637054443, + "step": 300 + }, + { + "epoch": 0.9933993399339934, + "grad_norm": 0.31707924192462417, + "learning_rate": 3.3895622922754936e-05, + "loss": 0.5486876368522644, + "step": 301 + }, + { + "epoch": 0.9966996699669967, + "grad_norm": 0.3094164003933957, + "learning_rate": 3.384027607719043e-05, + "loss": 0.5980846285820007, + "step": 302 + }, + { + "epoch": 1.0, + "grad_norm": 0.33089398879681, + "learning_rate": 3.378472508727931e-05, + "loss": 0.5986801385879517, + "step": 303 + }, + { + "epoch": 1.0033003300330032, + "grad_norm": 0.4690060258405477, + "learning_rate": 3.372897077239979e-05, + "loss": 0.5586727857589722, + "step": 304 + }, + { + "epoch": 1.0066006600660067, + "grad_norm": 0.34686786747213394, + "learning_rate": 3.36730139549291e-05, + "loss": 0.5393255949020386, + "step": 305 + }, + { + "epoch": 1.00990099009901, + "grad_norm": 0.4023568892604613, + "learning_rate": 3.361685546023143e-05, + "loss": 0.5377227067947388, + "step": 306 + }, + { + "epoch": 1.0132013201320131, + "grad_norm": 0.39915820884177944, + "learning_rate": 3.356049611664568e-05, + "loss": 0.5223784446716309, + "step": 307 + }, + { + "epoch": 1.0165016501650166, + "grad_norm": 0.3654265250846575, + "learning_rate": 3.350393675547328e-05, + "loss": 0.5502469539642334, + "step": 308 + }, + { + "epoch": 1.0198019801980198, + "grad_norm": 0.42079557297663883, + "learning_rate": 3.3447178210965936e-05, + "loss": 0.5626603960990906, + "step": 309 + }, + { + "epoch": 1.023102310231023, + "grad_norm": 0.3684084639129366, + "learning_rate": 3.3390221320313303e-05, + "loss": 0.48262274265289307, + "step": 310 + }, + { + "epoch": 1.0264026402640265, + "grad_norm": 0.39908786063309193, + "learning_rate": 3.333306692363065e-05, + "loss": 0.5850967168807983, + "step": 311 + }, + { + "epoch": 1.0297029702970297, + "grad_norm": 0.44262876970078274, + "learning_rate": 3.3275715863946466e-05, + "loss": 0.5444281697273254, + "step": 312 + }, + { + "epoch": 1.033003300330033, + "grad_norm": 0.35239079669120155, + "learning_rate": 3.3218168987190004e-05, + "loss": 0.5329654216766357, + "step": 313 + }, + { + "epoch": 1.0363036303630364, + "grad_norm": 0.38499730860339404, + "learning_rate": 3.316042714217885e-05, + "loss": 0.5276832580566406, + "step": 314 + }, + { + "epoch": 1.0396039603960396, + "grad_norm": 0.3928937531164494, + "learning_rate": 3.310249118060636e-05, + "loss": 0.5344791412353516, + "step": 315 + }, + { + "epoch": 1.0429042904290429, + "grad_norm": 0.3466589226743573, + "learning_rate": 3.304436195702911e-05, + "loss": 0.5479785203933716, + "step": 316 + }, + { + "epoch": 1.046204620462046, + "grad_norm": 0.370325309360066, + "learning_rate": 3.298604032885431e-05, + "loss": 0.5223082900047302, + "step": 317 + }, + { + "epoch": 1.0495049504950495, + "grad_norm": 0.4271803134046634, + "learning_rate": 3.292752715632713e-05, + "loss": 0.5667799711227417, + "step": 318 + }, + { + "epoch": 1.0528052805280528, + "grad_norm": 0.33752277032768196, + "learning_rate": 3.2868823302518016e-05, + "loss": 0.5194317698478699, + "step": 319 + }, + { + "epoch": 1.056105610561056, + "grad_norm": 0.35801795115870316, + "learning_rate": 3.2809929633309985e-05, + "loss": 0.4911007285118103, + "step": 320 + }, + { + "epoch": 1.0594059405940595, + "grad_norm": 0.33819516112787196, + "learning_rate": 3.2750847017385826e-05, + "loss": 0.5269002914428711, + "step": 321 + }, + { + "epoch": 1.0627062706270627, + "grad_norm": 0.3280280196094967, + "learning_rate": 3.269157632621529e-05, + "loss": 0.5124789476394653, + "step": 322 + }, + { + "epoch": 1.066006600660066, + "grad_norm": 0.3841029677303286, + "learning_rate": 3.263211843404225e-05, + "loss": 0.5483890771865845, + "step": 323 + }, + { + "epoch": 1.0693069306930694, + "grad_norm": 0.348752311292252, + "learning_rate": 3.25724742178718e-05, + "loss": 0.5582579374313354, + "step": 324 + }, + { + "epoch": 1.0726072607260726, + "grad_norm": 0.3672218653955236, + "learning_rate": 3.2512644557457304e-05, + "loss": 0.5662975907325745, + "step": 325 + }, + { + "epoch": 1.0759075907590758, + "grad_norm": 0.339133227284404, + "learning_rate": 3.2452630335287445e-05, + "loss": 0.5502511858940125, + "step": 326 + }, + { + "epoch": 1.0792079207920793, + "grad_norm": 0.3607463939055526, + "learning_rate": 3.239243243657318e-05, + "loss": 0.5614978075027466, + "step": 327 + }, + { + "epoch": 1.0825082508250825, + "grad_norm": 0.3354690532522152, + "learning_rate": 3.233205174923472e-05, + "loss": 0.4828110635280609, + "step": 328 + }, + { + "epoch": 1.0858085808580857, + "grad_norm": 0.3296040603044689, + "learning_rate": 3.22714891638884e-05, + "loss": 0.5437847971916199, + "step": 329 + }, + { + "epoch": 1.0891089108910892, + "grad_norm": 0.3295415767468974, + "learning_rate": 3.221074557383355e-05, + "loss": 0.6240063309669495, + "step": 330 + }, + { + "epoch": 1.0924092409240924, + "grad_norm": 0.3032628226796708, + "learning_rate": 3.2149821875039325e-05, + "loss": 0.5435442328453064, + "step": 331 + }, + { + "epoch": 1.0957095709570956, + "grad_norm": 0.30875440813945676, + "learning_rate": 3.20887189661315e-05, + "loss": 0.5240401029586792, + "step": 332 + }, + { + "epoch": 1.099009900990099, + "grad_norm": 0.3043121620505056, + "learning_rate": 3.202743774837919e-05, + "loss": 0.5227692127227783, + "step": 333 + }, + { + "epoch": 1.1023102310231023, + "grad_norm": 0.3439754692795775, + "learning_rate": 3.196597912568157e-05, + "loss": 0.5607417821884155, + "step": 334 + }, + { + "epoch": 1.1056105610561056, + "grad_norm": 0.29691798670137787, + "learning_rate": 3.1904344004554536e-05, + "loss": 0.5607600808143616, + "step": 335 + }, + { + "epoch": 1.108910891089109, + "grad_norm": 0.32493088910689055, + "learning_rate": 3.184253329411737e-05, + "loss": 0.47135430574417114, + "step": 336 + }, + { + "epoch": 1.1122112211221122, + "grad_norm": 0.3202945703052858, + "learning_rate": 3.178054790607924e-05, + "loss": 0.5708764791488647, + "step": 337 + }, + { + "epoch": 1.1155115511551155, + "grad_norm": 0.3164605548495645, + "learning_rate": 3.1718388754725883e-05, + "loss": 0.5522497296333313, + "step": 338 + }, + { + "epoch": 1.118811881188119, + "grad_norm": 0.3449586600316318, + "learning_rate": 3.1656056756906e-05, + "loss": 0.5556532144546509, + "step": 339 + }, + { + "epoch": 1.1221122112211221, + "grad_norm": 0.3130025484639745, + "learning_rate": 3.1593552832017795e-05, + "loss": 0.5727676153182983, + "step": 340 + }, + { + "epoch": 1.1254125412541254, + "grad_norm": 0.3195703179740936, + "learning_rate": 3.153087790199541e-05, + "loss": 0.5131651759147644, + "step": 341 + }, + { + "epoch": 1.1287128712871288, + "grad_norm": 0.3191177264656739, + "learning_rate": 3.146803289129528e-05, + "loss": 0.5143063068389893, + "step": 342 + }, + { + "epoch": 1.132013201320132, + "grad_norm": 0.33398757419035885, + "learning_rate": 3.1405018726882595e-05, + "loss": 0.509161114692688, + "step": 343 + }, + { + "epoch": 1.1353135313531353, + "grad_norm": 0.33058725446313514, + "learning_rate": 3.13418363382175e-05, + "loss": 0.5213526487350464, + "step": 344 + }, + { + "epoch": 1.1386138613861387, + "grad_norm": 0.3226863318187914, + "learning_rate": 3.127848665724149e-05, + "loss": 0.5465434789657593, + "step": 345 + }, + { + "epoch": 1.141914191419142, + "grad_norm": 0.6179658385179007, + "learning_rate": 3.1214970618363626e-05, + "loss": 0.5342190265655518, + "step": 346 + }, + { + "epoch": 1.1452145214521452, + "grad_norm": 0.47777163001134637, + "learning_rate": 3.115128915844672e-05, + "loss": 0.541754424571991, + "step": 347 + }, + { + "epoch": 1.1485148514851484, + "grad_norm": 0.33931974771490697, + "learning_rate": 3.10874432167936e-05, + "loss": 0.5318331122398376, + "step": 348 + }, + { + "epoch": 1.1518151815181519, + "grad_norm": 0.32111740987941506, + "learning_rate": 3.1023433735133134e-05, + "loss": 0.4972509741783142, + "step": 349 + }, + { + "epoch": 1.155115511551155, + "grad_norm": 0.30074948382432587, + "learning_rate": 3.095926165760647e-05, + "loss": 0.5417294502258301, + "step": 350 + }, + { + "epoch": 1.1584158415841583, + "grad_norm": 0.3410522798436207, + "learning_rate": 3.089492793075302e-05, + "loss": 0.554945707321167, + "step": 351 + }, + { + "epoch": 1.1617161716171618, + "grad_norm": 0.3254774061643724, + "learning_rate": 3.083043350349653e-05, + "loss": 0.5204564929008484, + "step": 352 + }, + { + "epoch": 1.165016501650165, + "grad_norm": 0.3088402728006412, + "learning_rate": 3.076577932713108e-05, + "loss": 0.4856947064399719, + "step": 353 + }, + { + "epoch": 1.1683168316831682, + "grad_norm": 0.2896918095760776, + "learning_rate": 3.0700966355307055e-05, + "loss": 0.5269368886947632, + "step": 354 + }, + { + "epoch": 1.1716171617161717, + "grad_norm": 0.32747543865706225, + "learning_rate": 3.063599554401708e-05, + "loss": 0.5811939239501953, + "step": 355 + }, + { + "epoch": 1.174917491749175, + "grad_norm": 0.29324577597304957, + "learning_rate": 3.057086785158189e-05, + "loss": 0.5636904239654541, + "step": 356 + }, + { + "epoch": 1.1782178217821782, + "grad_norm": 0.31779620334412045, + "learning_rate": 3.050558423863626e-05, + "loss": 0.546089768409729, + "step": 357 + }, + { + "epoch": 1.1815181518151816, + "grad_norm": 0.3093045991582328, + "learning_rate": 3.0440145668114774e-05, + "loss": 0.5239901542663574, + "step": 358 + }, + { + "epoch": 1.1848184818481848, + "grad_norm": 0.31848934088179354, + "learning_rate": 3.0374553105237637e-05, + "loss": 0.5833466053009033, + "step": 359 + }, + { + "epoch": 1.188118811881188, + "grad_norm": 0.33803859097620154, + "learning_rate": 3.0308807517496456e-05, + "loss": 0.5060774087905884, + "step": 360 + }, + { + "epoch": 1.1914191419141915, + "grad_norm": 0.31145081064149094, + "learning_rate": 3.0242909874639953e-05, + "loss": 0.5164307355880737, + "step": 361 + }, + { + "epoch": 1.1947194719471947, + "grad_norm": 0.29765085452905116, + "learning_rate": 3.0176861148659672e-05, + "loss": 0.49949395656585693, + "step": 362 + }, + { + "epoch": 1.198019801980198, + "grad_norm": 0.3296486034239661, + "learning_rate": 3.0110662313775623e-05, + "loss": 0.5581181049346924, + "step": 363 + }, + { + "epoch": 1.2013201320132012, + "grad_norm": 0.3116631729941006, + "learning_rate": 3.0044314346421938e-05, + "loss": 0.5657376646995544, + "step": 364 + }, + { + "epoch": 1.2046204620462047, + "grad_norm": 0.33012695180790946, + "learning_rate": 2.9977818225232443e-05, + "loss": 0.5269935131072998, + "step": 365 + }, + { + "epoch": 1.2079207920792079, + "grad_norm": 0.31869984664933465, + "learning_rate": 2.991117493102626e-05, + "loss": 0.5385931730270386, + "step": 366 + }, + { + "epoch": 1.2112211221122111, + "grad_norm": 0.30491226427581125, + "learning_rate": 2.984438544679329e-05, + "loss": 0.5615143179893494, + "step": 367 + }, + { + "epoch": 1.2145214521452146, + "grad_norm": 0.32195999076013593, + "learning_rate": 2.9777450757679754e-05, + "loss": 0.5175333023071289, + "step": 368 + }, + { + "epoch": 1.2178217821782178, + "grad_norm": 0.30930257180361886, + "learning_rate": 2.971037185097364e-05, + "loss": 0.565494179725647, + "step": 369 + }, + { + "epoch": 1.221122112211221, + "grad_norm": 0.34237830645177886, + "learning_rate": 2.9643149716090146e-05, + "loss": 0.5519120693206787, + "step": 370 + }, + { + "epoch": 1.2244224422442245, + "grad_norm": 0.30959351563618437, + "learning_rate": 2.9575785344557114e-05, + "loss": 0.49374374747276306, + "step": 371 + }, + { + "epoch": 1.2277227722772277, + "grad_norm": 0.31310768619122714, + "learning_rate": 2.950827973000034e-05, + "loss": 0.5608875751495361, + "step": 372 + }, + { + "epoch": 1.231023102310231, + "grad_norm": 0.31986895424613543, + "learning_rate": 2.944063386812899e-05, + "loss": 0.5866271257400513, + "step": 373 + }, + { + "epoch": 1.2343234323432344, + "grad_norm": 0.3359900469491975, + "learning_rate": 2.9372848756720867e-05, + "loss": 0.5342913269996643, + "step": 374 + }, + { + "epoch": 1.2376237623762376, + "grad_norm": 0.2956484140793021, + "learning_rate": 2.9304925395607696e-05, + "loss": 0.5539537668228149, + "step": 375 + }, + { + "epoch": 1.2409240924092408, + "grad_norm": 0.3239136306261367, + "learning_rate": 2.9236864786660423e-05, + "loss": 0.5614147186279297, + "step": 376 + }, + { + "epoch": 1.2442244224422443, + "grad_norm": 0.3311932744032855, + "learning_rate": 2.9168667933774356e-05, + "loss": 0.46689367294311523, + "step": 377 + }, + { + "epoch": 1.2475247524752475, + "grad_norm": 0.3291299090174619, + "learning_rate": 2.910033584285444e-05, + "loss": 0.5383083820343018, + "step": 378 + }, + { + "epoch": 1.2508250825082508, + "grad_norm": 0.3013900588246958, + "learning_rate": 2.903186952180037e-05, + "loss": 0.5349752902984619, + "step": 379 + }, + { + "epoch": 1.2541254125412542, + "grad_norm": 0.3219145450840317, + "learning_rate": 2.8963269980491743e-05, + "loss": 0.5792303681373596, + "step": 380 + }, + { + "epoch": 1.2574257425742574, + "grad_norm": 0.2840550960191948, + "learning_rate": 2.8894538230773147e-05, + "loss": 0.524924099445343, + "step": 381 + }, + { + "epoch": 1.2607260726072607, + "grad_norm": 0.3172399675943548, + "learning_rate": 2.882567528643925e-05, + "loss": 0.5137406587600708, + "step": 382 + }, + { + "epoch": 1.2640264026402641, + "grad_norm": 0.2893676822687234, + "learning_rate": 2.8756682163219857e-05, + "loss": 0.5196574926376343, + "step": 383 + }, + { + "epoch": 1.2673267326732673, + "grad_norm": 0.31363904787626334, + "learning_rate": 2.8687559878764903e-05, + "loss": 0.585644006729126, + "step": 384 + }, + { + "epoch": 1.2706270627062706, + "grad_norm": 0.3310272877884813, + "learning_rate": 2.8618309452629445e-05, + "loss": 0.5973786115646362, + "step": 385 + }, + { + "epoch": 1.273927392739274, + "grad_norm": 0.3201222210217655, + "learning_rate": 2.854893190625865e-05, + "loss": 0.5909825563430786, + "step": 386 + }, + { + "epoch": 1.2772277227722773, + "grad_norm": 0.3507731714316878, + "learning_rate": 2.84794282629727e-05, + "loss": 0.5903690457344055, + "step": 387 + }, + { + "epoch": 1.2805280528052805, + "grad_norm": 0.31011243056320775, + "learning_rate": 2.840979954795171e-05, + "loss": 0.5316457152366638, + "step": 388 + }, + { + "epoch": 1.283828382838284, + "grad_norm": 0.32950464198309637, + "learning_rate": 2.8340046788220613e-05, + "loss": 0.5080389976501465, + "step": 389 + }, + { + "epoch": 1.2871287128712872, + "grad_norm": 0.37769184930606736, + "learning_rate": 2.8270171012633994e-05, + "loss": 0.6137889623641968, + "step": 390 + }, + { + "epoch": 1.2904290429042904, + "grad_norm": 0.34430823745531935, + "learning_rate": 2.8200173251860928e-05, + "loss": 0.5433805584907532, + "step": 391 + }, + { + "epoch": 1.2937293729372938, + "grad_norm": 0.356563736773021, + "learning_rate": 2.8130054538369775e-05, + "loss": 0.4965590834617615, + "step": 392 + }, + { + "epoch": 1.297029702970297, + "grad_norm": 0.29380923244218154, + "learning_rate": 2.805981590641295e-05, + "loss": 0.5361340045928955, + "step": 393 + }, + { + "epoch": 1.3003300330033003, + "grad_norm": 0.31403525376793245, + "learning_rate": 2.7989458392011678e-05, + "loss": 0.47011327743530273, + "step": 394 + }, + { + "epoch": 1.3036303630363038, + "grad_norm": 0.30710914438533876, + "learning_rate": 2.7918983032940666e-05, + "loss": 0.5893687605857849, + "step": 395 + }, + { + "epoch": 1.306930693069307, + "grad_norm": 0.3126943781985397, + "learning_rate": 2.7848390868712886e-05, + "loss": 0.5219327211380005, + "step": 396 + }, + { + "epoch": 1.3102310231023102, + "grad_norm": 0.35585146532127665, + "learning_rate": 2.7777682940564142e-05, + "loss": 0.5652155876159668, + "step": 397 + }, + { + "epoch": 1.3135313531353137, + "grad_norm": 0.41906023992763497, + "learning_rate": 2.7706860291437784e-05, + "loss": 0.5361950397491455, + "step": 398 + }, + { + "epoch": 1.316831683168317, + "grad_norm": 0.29071400108766793, + "learning_rate": 2.763592396596929e-05, + "loss": 0.5355206727981567, + "step": 399 + }, + { + "epoch": 1.3201320132013201, + "grad_norm": 0.298123677847084, + "learning_rate": 2.756487501047086e-05, + "loss": 0.5082858800888062, + "step": 400 + }, + { + "epoch": 1.3234323432343233, + "grad_norm": 0.3144050740212562, + "learning_rate": 2.7493714472916013e-05, + "loss": 0.5282934904098511, + "step": 401 + }, + { + "epoch": 1.3267326732673268, + "grad_norm": 0.29396121691648713, + "learning_rate": 2.7422443402924074e-05, + "loss": 0.5502887964248657, + "step": 402 + }, + { + "epoch": 1.33003300330033, + "grad_norm": 0.2854429234726643, + "learning_rate": 2.7351062851744747e-05, + "loss": 0.5374204516410828, + "step": 403 + }, + { + "epoch": 1.3333333333333333, + "grad_norm": 0.30308752538818784, + "learning_rate": 2.7279573872242574e-05, + "loss": 0.5602293014526367, + "step": 404 + }, + { + "epoch": 1.3366336633663367, + "grad_norm": 0.30975657746221447, + "learning_rate": 2.7207977518881418e-05, + "loss": 0.5321286916732788, + "step": 405 + }, + { + "epoch": 1.33993399339934, + "grad_norm": 0.28965457921713383, + "learning_rate": 2.713627484770892e-05, + "loss": 0.5523560047149658, + "step": 406 + }, + { + "epoch": 1.3432343234323432, + "grad_norm": 0.30598816879566076, + "learning_rate": 2.706446691634089e-05, + "loss": 0.47019705176353455, + "step": 407 + }, + { + "epoch": 1.3465346534653464, + "grad_norm": 0.2977261513860205, + "learning_rate": 2.6992554783945748e-05, + "loss": 0.540359616279602, + "step": 408 + }, + { + "epoch": 1.3498349834983498, + "grad_norm": 0.2845048826043699, + "learning_rate": 2.6920539511228874e-05, + "loss": 0.561464786529541, + "step": 409 + }, + { + "epoch": 1.353135313531353, + "grad_norm": 0.2939741197740927, + "learning_rate": 2.6848422160416956e-05, + "loss": 0.5429259538650513, + "step": 410 + }, + { + "epoch": 1.3564356435643563, + "grad_norm": 0.2968609589915083, + "learning_rate": 2.677620379524237e-05, + "loss": 0.5452640652656555, + "step": 411 + }, + { + "epoch": 1.3597359735973598, + "grad_norm": 0.28949363661635646, + "learning_rate": 2.670388548092741e-05, + "loss": 0.49627864360809326, + "step": 412 + }, + { + "epoch": 1.363036303630363, + "grad_norm": 0.328169978832012, + "learning_rate": 2.663146828416867e-05, + "loss": 0.5331633687019348, + "step": 413 + }, + { + "epoch": 1.3663366336633662, + "grad_norm": 0.2926434963884909, + "learning_rate": 2.6558953273121216e-05, + "loss": 0.5447151064872742, + "step": 414 + }, + { + "epoch": 1.3696369636963697, + "grad_norm": 0.2863360845432002, + "learning_rate": 2.648634151738292e-05, + "loss": 0.5467007160186768, + "step": 415 + }, + { + "epoch": 1.372937293729373, + "grad_norm": 0.33044933855099695, + "learning_rate": 2.6413634087978602e-05, + "loss": 0.5804279446601868, + "step": 416 + }, + { + "epoch": 1.3762376237623761, + "grad_norm": 0.29168904019746145, + "learning_rate": 2.63408320573443e-05, + "loss": 0.5323517322540283, + "step": 417 + }, + { + "epoch": 1.3795379537953796, + "grad_norm": 0.3046417110987717, + "learning_rate": 2.6267936499311402e-05, + "loss": 0.5452409982681274, + "step": 418 + }, + { + "epoch": 1.3828382838283828, + "grad_norm": 0.2878853361033164, + "learning_rate": 2.619494848909084e-05, + "loss": 0.4622665047645569, + "step": 419 + }, + { + "epoch": 1.386138613861386, + "grad_norm": 0.3129938954769346, + "learning_rate": 2.6121869103257206e-05, + "loss": 0.531772255897522, + "step": 420 + }, + { + "epoch": 1.3894389438943895, + "grad_norm": 0.3044320552061303, + "learning_rate": 2.6048699419732897e-05, + "loss": 0.519554853439331, + "step": 421 + }, + { + "epoch": 1.3927392739273927, + "grad_norm": 0.32616258357306027, + "learning_rate": 2.5975440517772187e-05, + "loss": 0.545585572719574, + "step": 422 + }, + { + "epoch": 1.396039603960396, + "grad_norm": 0.297995845019565, + "learning_rate": 2.5902093477945345e-05, + "loss": 0.5641547441482544, + "step": 423 + }, + { + "epoch": 1.3993399339933994, + "grad_norm": 0.28406971495281874, + "learning_rate": 2.5828659382122655e-05, + "loss": 0.5578028559684753, + "step": 424 + }, + { + "epoch": 1.4026402640264026, + "grad_norm": 0.35618435421860006, + "learning_rate": 2.5755139313458484e-05, + "loss": 0.5931404232978821, + "step": 425 + }, + { + "epoch": 1.4059405940594059, + "grad_norm": 0.3227282264542969, + "learning_rate": 2.5681534356375314e-05, + "loss": 0.5486891865730286, + "step": 426 + }, + { + "epoch": 1.4092409240924093, + "grad_norm": 0.31220449886262164, + "learning_rate": 2.5607845596547706e-05, + "loss": 0.5007671117782593, + "step": 427 + }, + { + "epoch": 1.4125412541254125, + "grad_norm": 0.2970377848116104, + "learning_rate": 2.5534074120886346e-05, + "loss": 0.5044519901275635, + "step": 428 + }, + { + "epoch": 1.4158415841584158, + "grad_norm": 0.30667327850480125, + "learning_rate": 2.5460221017521952e-05, + "loss": 0.5227789878845215, + "step": 429 + }, + { + "epoch": 1.4191419141914192, + "grad_norm": 0.2902458759439887, + "learning_rate": 2.538628737578926e-05, + "loss": 0.5530189871788025, + "step": 430 + }, + { + "epoch": 1.4224422442244224, + "grad_norm": 0.3114416510328153, + "learning_rate": 2.5312274286210966e-05, + "loss": 0.508142352104187, + "step": 431 + }, + { + "epoch": 1.4257425742574257, + "grad_norm": 0.30284970816559353, + "learning_rate": 2.523818284048159e-05, + "loss": 0.5497263669967651, + "step": 432 + }, + { + "epoch": 1.4290429042904291, + "grad_norm": 0.3619418905679721, + "learning_rate": 2.5164014131451443e-05, + "loss": 0.5477034449577332, + "step": 433 + }, + { + "epoch": 1.4323432343234324, + "grad_norm": 0.28668741491270383, + "learning_rate": 2.508976925311045e-05, + "loss": 0.5091728568077087, + "step": 434 + }, + { + "epoch": 1.4356435643564356, + "grad_norm": 0.2922234358135184, + "learning_rate": 2.501544930057203e-05, + "loss": 0.5022713541984558, + "step": 435 + }, + { + "epoch": 1.438943894389439, + "grad_norm": 0.29994035273286174, + "learning_rate": 2.494105537005697e-05, + "loss": 0.5401599407196045, + "step": 436 + }, + { + "epoch": 1.4422442244224423, + "grad_norm": 0.27863085551634303, + "learning_rate": 2.4866588558877208e-05, + "loss": 0.5632063150405884, + "step": 437 + }, + { + "epoch": 1.4455445544554455, + "grad_norm": 0.2968792338733857, + "learning_rate": 2.479204996541969e-05, + "loss": 0.552355170249939, + "step": 438 + }, + { + "epoch": 1.448844884488449, + "grad_norm": 0.3222205976590156, + "learning_rate": 2.4717440689130154e-05, + "loss": 0.5604996681213379, + "step": 439 + }, + { + "epoch": 1.4521452145214522, + "grad_norm": 0.2781451863798608, + "learning_rate": 2.4642761830496893e-05, + "loss": 0.4961245656013489, + "step": 440 + }, + { + "epoch": 1.4554455445544554, + "grad_norm": 0.3327533816855903, + "learning_rate": 2.4568014491034565e-05, + "loss": 0.5403590202331543, + "step": 441 + }, + { + "epoch": 1.4587458745874589, + "grad_norm": 0.2944499869326328, + "learning_rate": 2.4493199773267902e-05, + "loss": 0.4753378629684448, + "step": 442 + }, + { + "epoch": 1.462046204620462, + "grad_norm": 0.30936599048377306, + "learning_rate": 2.4418318780715477e-05, + "loss": 0.5125438570976257, + "step": 443 + }, + { + "epoch": 1.4653465346534653, + "grad_norm": 0.3047486735791836, + "learning_rate": 2.434337261787342e-05, + "loss": 0.5670269727706909, + "step": 444 + }, + { + "epoch": 1.4686468646864688, + "grad_norm": 0.3348418102837006, + "learning_rate": 2.426836239019911e-05, + "loss": 0.5538198947906494, + "step": 445 + }, + { + "epoch": 1.471947194719472, + "grad_norm": 0.2790312641462961, + "learning_rate": 2.4193289204094893e-05, + "loss": 0.5012328028678894, + "step": 446 + }, + { + "epoch": 1.4752475247524752, + "grad_norm": 0.30485310749783334, + "learning_rate": 2.4118154166891762e-05, + "loss": 0.538119912147522, + "step": 447 + }, + { + "epoch": 1.4785478547854787, + "grad_norm": 0.32398781026753815, + "learning_rate": 2.4042958386833003e-05, + "loss": 0.5252339839935303, + "step": 448 + }, + { + "epoch": 1.481848184818482, + "grad_norm": 0.326928536480608, + "learning_rate": 2.3967702973057853e-05, + "loss": 0.5367081761360168, + "step": 449 + }, + { + "epoch": 1.4851485148514851, + "grad_norm": 0.3044938562463835, + "learning_rate": 2.3892389035585167e-05, + "loss": 0.5091884136199951, + "step": 450 + }, + { + "epoch": 1.4884488448844886, + "grad_norm": 0.2897824690201277, + "learning_rate": 2.3817017685297016e-05, + "loss": 0.5079891681671143, + "step": 451 + }, + { + "epoch": 1.4917491749174918, + "grad_norm": 0.2966882318097961, + "learning_rate": 2.3741590033922313e-05, + "loss": 0.511939287185669, + "step": 452 + }, + { + "epoch": 1.495049504950495, + "grad_norm": 0.28797637565211376, + "learning_rate": 2.3666107194020404e-05, + "loss": 0.5070478916168213, + "step": 453 + }, + { + "epoch": 1.4983498349834983, + "grad_norm": 0.29050652670321586, + "learning_rate": 2.3590570278964682e-05, + "loss": 0.547492504119873, + "step": 454 + }, + { + "epoch": 1.5016501650165015, + "grad_norm": 0.311874965448668, + "learning_rate": 2.3514980402926132e-05, + "loss": 0.5386558771133423, + "step": 455 + }, + { + "epoch": 1.504950495049505, + "grad_norm": 0.26980126113979913, + "learning_rate": 2.3439338680856943e-05, + "loss": 0.48668172955513, + "step": 456 + }, + { + "epoch": 1.5082508250825084, + "grad_norm": 0.31689121328788056, + "learning_rate": 2.3363646228474002e-05, + "loss": 0.5497942566871643, + "step": 457 + }, + { + "epoch": 1.5115511551155114, + "grad_norm": 0.3648919358675907, + "learning_rate": 2.328790416224248e-05, + "loss": 0.5267748832702637, + "step": 458 + }, + { + "epoch": 1.5148514851485149, + "grad_norm": 0.3191029117024018, + "learning_rate": 2.3212113599359368e-05, + "loss": 0.5578982830047607, + "step": 459 + }, + { + "epoch": 1.5181518151815183, + "grad_norm": 0.30610891906133464, + "learning_rate": 2.3136275657736956e-05, + "loss": 0.5136545896530151, + "step": 460 + }, + { + "epoch": 1.5214521452145213, + "grad_norm": 0.28466532575384307, + "learning_rate": 2.3060391455986403e-05, + "loss": 0.5718669891357422, + "step": 461 + }, + { + "epoch": 1.5247524752475248, + "grad_norm": 0.3064265170567389, + "learning_rate": 2.2984462113401184e-05, + "loss": 0.5427108407020569, + "step": 462 + }, + { + "epoch": 1.528052805280528, + "grad_norm": 0.28495826208338726, + "learning_rate": 2.2908488749940596e-05, + "loss": 0.5293564200401306, + "step": 463 + }, + { + "epoch": 1.5313531353135312, + "grad_norm": 0.3073240786964915, + "learning_rate": 2.2832472486213275e-05, + "loss": 0.550743579864502, + "step": 464 + }, + { + "epoch": 1.5346534653465347, + "grad_norm": 0.30789089349395116, + "learning_rate": 2.2756414443460602e-05, + "loss": 0.5957387685775757, + "step": 465 + }, + { + "epoch": 1.537953795379538, + "grad_norm": 0.2840660845057486, + "learning_rate": 2.2680315743540234e-05, + "loss": 0.4994407892227173, + "step": 466 + }, + { + "epoch": 1.5412541254125411, + "grad_norm": 0.2912314912557071, + "learning_rate": 2.260417750890949e-05, + "loss": 0.5120857954025269, + "step": 467 + }, + { + "epoch": 1.5445544554455446, + "grad_norm": 0.3024618438133355, + "learning_rate": 2.2528000862608845e-05, + "loss": 0.5727359056472778, + "step": 468 + }, + { + "epoch": 1.5478547854785478, + "grad_norm": 0.30379584493476613, + "learning_rate": 2.2451786928245344e-05, + "loss": 0.584964394569397, + "step": 469 + }, + { + "epoch": 1.551155115511551, + "grad_norm": 0.2782374360382863, + "learning_rate": 2.237553682997603e-05, + "loss": 0.5507112741470337, + "step": 470 + }, + { + "epoch": 1.5544554455445545, + "grad_norm": 0.26333814455393634, + "learning_rate": 2.2299251692491364e-05, + "loss": 0.49136701226234436, + "step": 471 + }, + { + "epoch": 1.5577557755775577, + "grad_norm": 0.31673569076077385, + "learning_rate": 2.2222932640998635e-05, + "loss": 0.5374805927276611, + "step": 472 + }, + { + "epoch": 1.561056105610561, + "grad_norm": 0.29370656251116817, + "learning_rate": 2.2146580801205362e-05, + "loss": 0.523996114730835, + "step": 473 + }, + { + "epoch": 1.5643564356435644, + "grad_norm": 0.27277397989040114, + "learning_rate": 2.207019729930271e-05, + "loss": 0.48198428750038147, + "step": 474 + }, + { + "epoch": 1.5676567656765676, + "grad_norm": 0.2861287068823064, + "learning_rate": 2.199378326194883e-05, + "loss": 0.5148699879646301, + "step": 475 + }, + { + "epoch": 1.5709570957095709, + "grad_norm": 0.2981231032466442, + "learning_rate": 2.1917339816252303e-05, + "loss": 0.5297671556472778, + "step": 476 + }, + { + "epoch": 1.5742574257425743, + "grad_norm": 0.2775943923870632, + "learning_rate": 2.1840868089755465e-05, + "loss": 0.5082278847694397, + "step": 477 + }, + { + "epoch": 1.5775577557755776, + "grad_norm": 0.2988631140370514, + "learning_rate": 2.176436921041779e-05, + "loss": 0.4755392372608185, + "step": 478 + }, + { + "epoch": 1.5808580858085808, + "grad_norm": 0.28707182004966697, + "learning_rate": 2.1687844306599275e-05, + "loss": 0.5249454975128174, + "step": 479 + }, + { + "epoch": 1.5841584158415842, + "grad_norm": 0.3023499942723386, + "learning_rate": 2.161129450704376e-05, + "loss": 0.5626166462898254, + "step": 480 + }, + { + "epoch": 1.5874587458745875, + "grad_norm": 0.28182475866947054, + "learning_rate": 2.1534720940862318e-05, + "loss": 0.5590533018112183, + "step": 481 + }, + { + "epoch": 1.5907590759075907, + "grad_norm": 0.2724331542693392, + "learning_rate": 2.1458124737516557e-05, + "loss": 0.5146170854568481, + "step": 482 + }, + { + "epoch": 1.5940594059405941, + "grad_norm": 0.28834268248771533, + "learning_rate": 2.1381507026802007e-05, + "loss": 0.5633066296577454, + "step": 483 + }, + { + "epoch": 1.5973597359735974, + "grad_norm": 0.29376551657635425, + "learning_rate": 2.130486893883141e-05, + "loss": 0.5273865461349487, + "step": 484 + }, + { + "epoch": 1.6006600660066006, + "grad_norm": 0.277893471974935, + "learning_rate": 2.1228211604018088e-05, + "loss": 0.5040723085403442, + "step": 485 + }, + { + "epoch": 1.603960396039604, + "grad_norm": 0.2901419412347278, + "learning_rate": 2.1151536153059254e-05, + "loss": 0.5254411697387695, + "step": 486 + }, + { + "epoch": 1.6072607260726073, + "grad_norm": 0.29340041503520936, + "learning_rate": 2.1074843716919323e-05, + "loss": 0.5789728760719299, + "step": 487 + }, + { + "epoch": 1.6105610561056105, + "grad_norm": 0.2858502686555999, + "learning_rate": 2.0998135426813245e-05, + "loss": 0.5521235466003418, + "step": 488 + }, + { + "epoch": 1.613861386138614, + "grad_norm": 0.2770947277408911, + "learning_rate": 2.092141241418984e-05, + "loss": 0.4702959954738617, + "step": 489 + }, + { + "epoch": 1.6171617161716172, + "grad_norm": 0.29713285242144816, + "learning_rate": 2.0844675810715046e-05, + "loss": 0.4960707128047943, + "step": 490 + }, + { + "epoch": 1.6204620462046204, + "grad_norm": 0.2800759957297699, + "learning_rate": 2.076792674825529e-05, + "loss": 0.5334826111793518, + "step": 491 + }, + { + "epoch": 1.6237623762376239, + "grad_norm": 0.4465546145157964, + "learning_rate": 2.0691166358860775e-05, + "loss": 0.5604894161224365, + "step": 492 + }, + { + "epoch": 1.627062706270627, + "grad_norm": 0.2895889767199155, + "learning_rate": 2.061439577474875e-05, + "loss": 0.5565654635429382, + "step": 493 + }, + { + "epoch": 1.6303630363036303, + "grad_norm": 0.2663082120203026, + "learning_rate": 2.0537616128286875e-05, + "loss": 0.541640043258667, + "step": 494 + }, + { + "epoch": 1.6336633663366338, + "grad_norm": 0.27975047407467746, + "learning_rate": 2.0460828551976436e-05, + "loss": 0.5247132182121277, + "step": 495 + }, + { + "epoch": 1.636963696369637, + "grad_norm": 0.30554958978585, + "learning_rate": 2.0384034178435727e-05, + "loss": 0.533937394618988, + "step": 496 + }, + { + "epoch": 1.6402640264026402, + "grad_norm": 0.29094539458240765, + "learning_rate": 2.0307234140383264e-05, + "loss": 0.5857927799224854, + "step": 497 + }, + { + "epoch": 1.6435643564356437, + "grad_norm": 0.2718482098386275, + "learning_rate": 2.0230429570621134e-05, + "loss": 0.5191807746887207, + "step": 498 + }, + { + "epoch": 1.6468646864686467, + "grad_norm": 0.28523897670587156, + "learning_rate": 2.0153621602018276e-05, + "loss": 0.5255881547927856, + "step": 499 + }, + { + "epoch": 1.6501650165016502, + "grad_norm": 0.27057309315143646, + "learning_rate": 2.0076811367493736e-05, + "loss": 0.5134017467498779, + "step": 500 + }, + { + "epoch": 1.6534653465346536, + "grad_norm": 0.2603322919481828, + "learning_rate": 2e-05, + "loss": 0.4548872113227844, + "step": 501 + }, + { + "epoch": 1.6567656765676566, + "grad_norm": 0.2841830282558966, + "learning_rate": 1.9923188632506268e-05, + "loss": 0.4879235625267029, + "step": 502 + }, + { + "epoch": 1.66006600660066, + "grad_norm": 0.2718072353452213, + "learning_rate": 1.9846378397981737e-05, + "loss": 0.5488070249557495, + "step": 503 + }, + { + "epoch": 1.6633663366336635, + "grad_norm": 0.26980717544426264, + "learning_rate": 1.976957042937887e-05, + "loss": 0.474858820438385, + "step": 504 + }, + { + "epoch": 1.6666666666666665, + "grad_norm": 0.2729038695715346, + "learning_rate": 1.969276585961674e-05, + "loss": 0.573983907699585, + "step": 505 + }, + { + "epoch": 1.66996699669967, + "grad_norm": 0.2754435399081945, + "learning_rate": 1.9615965821564284e-05, + "loss": 0.5299487709999084, + "step": 506 + }, + { + "epoch": 1.6732673267326734, + "grad_norm": 0.28078214205826996, + "learning_rate": 1.9539171448023568e-05, + "loss": 0.580963134765625, + "step": 507 + }, + { + "epoch": 1.6765676567656764, + "grad_norm": 0.28056872169008745, + "learning_rate": 1.946238387171313e-05, + "loss": 0.5240850448608398, + "step": 508 + }, + { + "epoch": 1.6798679867986799, + "grad_norm": 0.27579932032687055, + "learning_rate": 1.9385604225251245e-05, + "loss": 0.5397930145263672, + "step": 509 + }, + { + "epoch": 1.6831683168316833, + "grad_norm": 0.2649239844230271, + "learning_rate": 1.9308833641139235e-05, + "loss": 0.4949077367782593, + "step": 510 + }, + { + "epoch": 1.6864686468646863, + "grad_norm": 0.26821293718742795, + "learning_rate": 1.9232073251744715e-05, + "loss": 0.4906027913093567, + "step": 511 + }, + { + "epoch": 1.6897689768976898, + "grad_norm": 0.30180544906142204, + "learning_rate": 1.9155324189284957e-05, + "loss": 0.562363862991333, + "step": 512 + }, + { + "epoch": 1.693069306930693, + "grad_norm": 0.26560887539548794, + "learning_rate": 1.9078587585810167e-05, + "loss": 0.5347090363502502, + "step": 513 + }, + { + "epoch": 1.6963696369636962, + "grad_norm": 0.28206984650870465, + "learning_rate": 1.900186457318676e-05, + "loss": 0.5554836988449097, + "step": 514 + }, + { + "epoch": 1.6996699669966997, + "grad_norm": 0.2667791650009087, + "learning_rate": 1.8925156283080684e-05, + "loss": 0.5179104208946228, + "step": 515 + }, + { + "epoch": 1.702970297029703, + "grad_norm": 0.2759730227945326, + "learning_rate": 1.8848463846940756e-05, + "loss": 0.552240252494812, + "step": 516 + }, + { + "epoch": 1.7062706270627062, + "grad_norm": 0.34634391778922186, + "learning_rate": 1.8771788395981915e-05, + "loss": 0.534430980682373, + "step": 517 + }, + { + "epoch": 1.7095709570957096, + "grad_norm": 0.26711110641337843, + "learning_rate": 1.8695131061168598e-05, + "loss": 0.5601803064346313, + "step": 518 + }, + { + "epoch": 1.7128712871287128, + "grad_norm": 0.3479876576460715, + "learning_rate": 1.8618492973198e-05, + "loss": 0.5119711756706238, + "step": 519 + }, + { + "epoch": 1.716171617161716, + "grad_norm": 0.32608510378908223, + "learning_rate": 1.8541875262483446e-05, + "loss": 0.5632577538490295, + "step": 520 + }, + { + "epoch": 1.7194719471947195, + "grad_norm": 0.2744236737297373, + "learning_rate": 1.8465279059137686e-05, + "loss": 0.5499478578567505, + "step": 521 + }, + { + "epoch": 1.7227722772277227, + "grad_norm": 0.2835433030263243, + "learning_rate": 1.8388705492956244e-05, + "loss": 0.5176683664321899, + "step": 522 + }, + { + "epoch": 1.726072607260726, + "grad_norm": 0.30494439216544983, + "learning_rate": 1.8312155693400735e-05, + "loss": 0.49528205394744873, + "step": 523 + }, + { + "epoch": 1.7293729372937294, + "grad_norm": 0.26710805184601655, + "learning_rate": 1.8235630789582213e-05, + "loss": 0.5684216022491455, + "step": 524 + }, + { + "epoch": 1.7326732673267327, + "grad_norm": 0.3852411183060649, + "learning_rate": 1.815913191024454e-05, + "loss": 0.5375942587852478, + "step": 525 + }, + { + "epoch": 1.7359735973597359, + "grad_norm": 0.33262500157086355, + "learning_rate": 1.8082660183747704e-05, + "loss": 0.5541956424713135, + "step": 526 + }, + { + "epoch": 1.7392739273927393, + "grad_norm": 0.28642691265671333, + "learning_rate": 1.8006216738051175e-05, + "loss": 0.5304872393608093, + "step": 527 + }, + { + "epoch": 1.7425742574257426, + "grad_norm": 0.2734388390360432, + "learning_rate": 1.7929802700697297e-05, + "loss": 0.48648735880851746, + "step": 528 + }, + { + "epoch": 1.7458745874587458, + "grad_norm": 0.28617564742207474, + "learning_rate": 1.7853419198794638e-05, + "loss": 0.49221059679985046, + "step": 529 + }, + { + "epoch": 1.7491749174917492, + "grad_norm": 0.2790947673251484, + "learning_rate": 1.7777067359001375e-05, + "loss": 0.5652948021888733, + "step": 530 + }, + { + "epoch": 1.7524752475247525, + "grad_norm": 0.2853703561489374, + "learning_rate": 1.7700748307508643e-05, + "loss": 0.5187686681747437, + "step": 531 + }, + { + "epoch": 1.7557755775577557, + "grad_norm": 0.2853976224574607, + "learning_rate": 1.7624463170023974e-05, + "loss": 0.5013114809989929, + "step": 532 + }, + { + "epoch": 1.7590759075907592, + "grad_norm": 0.2619757068753479, + "learning_rate": 1.7548213071754663e-05, + "loss": 0.47477245330810547, + "step": 533 + }, + { + "epoch": 1.7623762376237624, + "grad_norm": 0.29220608585061886, + "learning_rate": 1.7471999137391162e-05, + "loss": 0.5600515007972717, + "step": 534 + }, + { + "epoch": 1.7656765676567656, + "grad_norm": 0.2680464906367101, + "learning_rate": 1.7395822491090513e-05, + "loss": 0.5017521381378174, + "step": 535 + }, + { + "epoch": 1.768976897689769, + "grad_norm": 0.3164936697237469, + "learning_rate": 1.7319684256459773e-05, + "loss": 0.48718830943107605, + "step": 536 + }, + { + "epoch": 1.7722772277227723, + "grad_norm": 0.26576630911317906, + "learning_rate": 1.72435855565394e-05, + "loss": 0.5348131060600281, + "step": 537 + }, + { + "epoch": 1.7755775577557755, + "grad_norm": 0.3785718389935733, + "learning_rate": 1.716752751378673e-05, + "loss": 0.5132070183753967, + "step": 538 + }, + { + "epoch": 1.778877887788779, + "grad_norm": 0.2912227396538846, + "learning_rate": 1.7091511250059407e-05, + "loss": 0.5194598436355591, + "step": 539 + }, + { + "epoch": 1.7821782178217822, + "grad_norm": 0.25340183641995817, + "learning_rate": 1.701553788659883e-05, + "loss": 0.4950656294822693, + "step": 540 + }, + { + "epoch": 1.7854785478547854, + "grad_norm": 0.32993048381725726, + "learning_rate": 1.6939608544013603e-05, + "loss": 0.5465744137763977, + "step": 541 + }, + { + "epoch": 1.7887788778877889, + "grad_norm": 0.33326548174687204, + "learning_rate": 1.6863724342263047e-05, + "loss": 0.5328625440597534, + "step": 542 + }, + { + "epoch": 1.7920792079207921, + "grad_norm": 0.2747817812302539, + "learning_rate": 1.6787886400640645e-05, + "loss": 0.483689546585083, + "step": 543 + }, + { + "epoch": 1.7953795379537953, + "grad_norm": 0.2619017709081145, + "learning_rate": 1.6712095837757525e-05, + "loss": 0.5225390195846558, + "step": 544 + }, + { + "epoch": 1.7986798679867988, + "grad_norm": 0.2718453161830156, + "learning_rate": 1.6636353771526005e-05, + "loss": 0.5168595314025879, + "step": 545 + }, + { + "epoch": 1.801980198019802, + "grad_norm": 0.2915579523683445, + "learning_rate": 1.6560661319143064e-05, + "loss": 0.5257725119590759, + "step": 546 + }, + { + "epoch": 1.8052805280528053, + "grad_norm": 0.2767711815305055, + "learning_rate": 1.648501959707387e-05, + "loss": 0.5023485422134399, + "step": 547 + }, + { + "epoch": 1.8085808580858087, + "grad_norm": 0.267570701584644, + "learning_rate": 1.6409429721035324e-05, + "loss": 0.48897239565849304, + "step": 548 + }, + { + "epoch": 1.811881188118812, + "grad_norm": 0.28714006005114934, + "learning_rate": 1.63338928059796e-05, + "loss": 0.5318676829338074, + "step": 549 + }, + { + "epoch": 1.8151815181518152, + "grad_norm": 0.2802563301473015, + "learning_rate": 1.6258409966077693e-05, + "loss": 0.4996787905693054, + "step": 550 + }, + { + "epoch": 1.8184818481848186, + "grad_norm": 0.28354713397276166, + "learning_rate": 1.6182982314702987e-05, + "loss": 0.4833434820175171, + "step": 551 + }, + { + "epoch": 1.8217821782178216, + "grad_norm": 0.2904168234412241, + "learning_rate": 1.6107610964414836e-05, + "loss": 0.5050291419029236, + "step": 552 + }, + { + "epoch": 1.825082508250825, + "grad_norm": 0.2859100119195952, + "learning_rate": 1.6032297026942154e-05, + "loss": 0.5423529148101807, + "step": 553 + }, + { + "epoch": 1.8283828382838285, + "grad_norm": 0.2700093369793658, + "learning_rate": 1.5957041613167007e-05, + "loss": 0.5670536756515503, + "step": 554 + }, + { + "epoch": 1.8316831683168315, + "grad_norm": 0.2784484594925466, + "learning_rate": 1.5881845833108245e-05, + "loss": 0.5148528814315796, + "step": 555 + }, + { + "epoch": 1.834983498349835, + "grad_norm": 0.2795083034807244, + "learning_rate": 1.5806710795905113e-05, + "loss": 0.5441350340843201, + "step": 556 + }, + { + "epoch": 1.8382838283828384, + "grad_norm": 0.27706485047893287, + "learning_rate": 1.5731637609800897e-05, + "loss": 0.5338016748428345, + "step": 557 + }, + { + "epoch": 1.8415841584158414, + "grad_norm": 0.281671337152691, + "learning_rate": 1.5656627382126587e-05, + "loss": 0.522803783416748, + "step": 558 + }, + { + "epoch": 1.844884488448845, + "grad_norm": 0.2867314215651197, + "learning_rate": 1.5581681219284523e-05, + "loss": 0.5079183578491211, + "step": 559 + }, + { + "epoch": 1.8481848184818483, + "grad_norm": 0.2880604655799914, + "learning_rate": 1.5506800226732104e-05, + "loss": 0.5360547304153442, + "step": 560 + }, + { + "epoch": 1.8514851485148514, + "grad_norm": 0.276328956502413, + "learning_rate": 1.5431985508965438e-05, + "loss": 0.5137909650802612, + "step": 561 + }, + { + "epoch": 1.8547854785478548, + "grad_norm": 0.26198432963654783, + "learning_rate": 1.5357238169503107e-05, + "loss": 0.513020396232605, + "step": 562 + }, + { + "epoch": 1.858085808580858, + "grad_norm": 0.3155751914603546, + "learning_rate": 1.5282559310869856e-05, + "loss": 0.5015939474105835, + "step": 563 + }, + { + "epoch": 1.8613861386138613, + "grad_norm": 0.2654102353913447, + "learning_rate": 1.5207950034580317e-05, + "loss": 0.5012743473052979, + "step": 564 + }, + { + "epoch": 1.8646864686468647, + "grad_norm": 0.27309132142690246, + "learning_rate": 1.5133411441122799e-05, + "loss": 0.48864254355430603, + "step": 565 + }, + { + "epoch": 1.867986798679868, + "grad_norm": 0.6058665885379618, + "learning_rate": 1.5058944629943044e-05, + "loss": 0.437102347612381, + "step": 566 + }, + { + "epoch": 1.8712871287128712, + "grad_norm": 0.2718164602566872, + "learning_rate": 1.4984550699427978e-05, + "loss": 0.5518525838851929, + "step": 567 + }, + { + "epoch": 1.8745874587458746, + "grad_norm": 0.2832474093938169, + "learning_rate": 1.4910230746889559e-05, + "loss": 0.5618141889572144, + "step": 568 + }, + { + "epoch": 1.8778877887788779, + "grad_norm": 0.2790138686096534, + "learning_rate": 1.4835985868548557e-05, + "loss": 0.4990406632423401, + "step": 569 + }, + { + "epoch": 1.881188118811881, + "grad_norm": 0.26198363334655667, + "learning_rate": 1.4761817159518415e-05, + "loss": 0.5004926919937134, + "step": 570 + }, + { + "epoch": 1.8844884488448845, + "grad_norm": 0.28233065536105734, + "learning_rate": 1.4687725713789042e-05, + "loss": 0.5166051983833313, + "step": 571 + }, + { + "epoch": 1.8877887788778878, + "grad_norm": 0.2654807250852616, + "learning_rate": 1.461371262421074e-05, + "loss": 0.5510391592979431, + "step": 572 + }, + { + "epoch": 1.891089108910891, + "grad_norm": 0.2766439695892797, + "learning_rate": 1.4539778982478061e-05, + "loss": 0.5305938720703125, + "step": 573 + }, + { + "epoch": 1.8943894389438944, + "grad_norm": 0.35617765802983586, + "learning_rate": 1.4465925879113663e-05, + "loss": 0.562718391418457, + "step": 574 + }, + { + "epoch": 1.8976897689768977, + "grad_norm": 0.26373250902859363, + "learning_rate": 1.4392154403452294e-05, + "loss": 0.541257381439209, + "step": 575 + }, + { + "epoch": 1.900990099009901, + "grad_norm": 0.2584596806712207, + "learning_rate": 1.4318465643624696e-05, + "loss": 0.556663990020752, + "step": 576 + }, + { + "epoch": 1.9042904290429044, + "grad_norm": 0.2655751613308258, + "learning_rate": 1.4244860686541522e-05, + "loss": 0.5691581964492798, + "step": 577 + }, + { + "epoch": 1.9075907590759076, + "grad_norm": 0.3146864569567829, + "learning_rate": 1.4171340617877349e-05, + "loss": 0.513170063495636, + "step": 578 + }, + { + "epoch": 1.9108910891089108, + "grad_norm": 0.288458498752148, + "learning_rate": 1.4097906522054656e-05, + "loss": 0.5679588317871094, + "step": 579 + }, + { + "epoch": 1.9141914191419143, + "grad_norm": 0.2858005511149637, + "learning_rate": 1.4024559482227818e-05, + "loss": 0.513796329498291, + "step": 580 + }, + { + "epoch": 1.9174917491749175, + "grad_norm": 0.25543101337641916, + "learning_rate": 1.3951300580267108e-05, + "loss": 0.4618416428565979, + "step": 581 + }, + { + "epoch": 1.9207920792079207, + "grad_norm": 0.2670194314216259, + "learning_rate": 1.3878130896742796e-05, + "loss": 0.5491312742233276, + "step": 582 + }, + { + "epoch": 1.9240924092409242, + "grad_norm": 0.24204031552297342, + "learning_rate": 1.3805051510909164e-05, + "loss": 0.5524745583534241, + "step": 583 + }, + { + "epoch": 1.9273927392739274, + "grad_norm": 0.25091865473771396, + "learning_rate": 1.3732063500688604e-05, + "loss": 0.5232075452804565, + "step": 584 + }, + { + "epoch": 1.9306930693069306, + "grad_norm": 0.26059464209400784, + "learning_rate": 1.3659167942655702e-05, + "loss": 0.5257346034049988, + "step": 585 + }, + { + "epoch": 1.933993399339934, + "grad_norm": 0.2814401591736557, + "learning_rate": 1.35863659120214e-05, + "loss": 0.5196455717086792, + "step": 586 + }, + { + "epoch": 1.9372937293729373, + "grad_norm": 0.2624714306516865, + "learning_rate": 1.3513658482617085e-05, + "loss": 0.5122568011283875, + "step": 587 + }, + { + "epoch": 1.9405940594059405, + "grad_norm": 0.2644911414307543, + "learning_rate": 1.3441046726878786e-05, + "loss": 0.5236790180206299, + "step": 588 + }, + { + "epoch": 1.943894389438944, + "grad_norm": 0.2699458396883844, + "learning_rate": 1.3368531715831337e-05, + "loss": 0.5508555173873901, + "step": 589 + }, + { + "epoch": 1.9471947194719472, + "grad_norm": 0.26005129022694123, + "learning_rate": 1.3296114519072594e-05, + "loss": 0.4742932617664337, + "step": 590 + }, + { + "epoch": 1.9504950495049505, + "grad_norm": 0.2530711129220065, + "learning_rate": 1.3223796204757638e-05, + "loss": 0.5406354665756226, + "step": 591 + }, + { + "epoch": 1.953795379537954, + "grad_norm": 0.26847075280504556, + "learning_rate": 1.3151577839583043e-05, + "loss": 0.508262038230896, + "step": 592 + }, + { + "epoch": 1.9570957095709571, + "grad_norm": 0.2601716190776577, + "learning_rate": 1.3079460488771136e-05, + "loss": 0.5260204672813416, + "step": 593 + }, + { + "epoch": 1.9603960396039604, + "grad_norm": 0.2597900374740898, + "learning_rate": 1.3007445216054257e-05, + "loss": 0.522408127784729, + "step": 594 + }, + { + "epoch": 1.9636963696369638, + "grad_norm": 0.23858694591096777, + "learning_rate": 1.2935533083659114e-05, + "loss": 0.4849371910095215, + "step": 595 + }, + { + "epoch": 1.966996699669967, + "grad_norm": 0.26399518807159883, + "learning_rate": 1.2863725152291091e-05, + "loss": 0.5319019556045532, + "step": 596 + }, + { + "epoch": 1.9702970297029703, + "grad_norm": 0.2797422170192374, + "learning_rate": 1.2792022481118587e-05, + "loss": 0.5562412738800049, + "step": 597 + }, + { + "epoch": 1.9735973597359737, + "grad_norm": 0.2537907416959109, + "learning_rate": 1.2720426127757431e-05, + "loss": 0.49608999490737915, + "step": 598 + }, + { + "epoch": 1.976897689768977, + "grad_norm": 0.2521690484869479, + "learning_rate": 1.2648937148255253e-05, + "loss": 0.5082768201828003, + "step": 599 + }, + { + "epoch": 1.9801980198019802, + "grad_norm": 0.2572245668654862, + "learning_rate": 1.2577556597075933e-05, + "loss": 0.5706614255905151, + "step": 600 + }, + { + "epoch": 1.9834983498349836, + "grad_norm": 0.2697883750179181, + "learning_rate": 1.2506285527083991e-05, + "loss": 0.5366507768630981, + "step": 601 + }, + { + "epoch": 1.9867986798679866, + "grad_norm": 0.26402819852563175, + "learning_rate": 1.2435124989529139e-05, + "loss": 0.5462816953659058, + "step": 602 + }, + { + "epoch": 1.99009900990099, + "grad_norm": 0.246894878071046, + "learning_rate": 1.236407603403072e-05, + "loss": 0.5050650238990784, + "step": 603 + }, + { + "epoch": 1.9933993399339935, + "grad_norm": 0.477370357077484, + "learning_rate": 1.2293139708562221e-05, + "loss": 0.4915675222873688, + "step": 604 + }, + { + "epoch": 1.9966996699669965, + "grad_norm": 0.2657795870076786, + "learning_rate": 1.2222317059435863e-05, + "loss": 0.5807889103889465, + "step": 605 + }, + { + "epoch": 2.0, + "grad_norm": 0.2770967943671612, + "learning_rate": 1.2151609131287124e-05, + "loss": 0.49173152446746826, + "step": 606 + }, + { + "epoch": 2.0033003300330035, + "grad_norm": 0.7014931959992592, + "learning_rate": 1.2081016967059336e-05, + "loss": 0.4426806569099426, + "step": 607 + }, + { + "epoch": 2.0066006600660065, + "grad_norm": 0.3040348249510974, + "learning_rate": 1.201054160798833e-05, + "loss": 0.45669305324554443, + "step": 608 + }, + { + "epoch": 2.00990099009901, + "grad_norm": 0.31030490189011145, + "learning_rate": 1.1940184093587047e-05, + "loss": 0.4638911783695221, + "step": 609 + }, + { + "epoch": 2.0132013201320134, + "grad_norm": 0.36234285165121427, + "learning_rate": 1.186994546163023e-05, + "loss": 0.4541138708591461, + "step": 610 + }, + { + "epoch": 2.0165016501650164, + "grad_norm": 0.38564024677228226, + "learning_rate": 1.1799826748139079e-05, + "loss": 0.49081191420555115, + "step": 611 + }, + { + "epoch": 2.01980198019802, + "grad_norm": 0.3266656962672454, + "learning_rate": 1.1729828987366009e-05, + "loss": 0.4794033169746399, + "step": 612 + }, + { + "epoch": 2.0231023102310233, + "grad_norm": 0.291304204290645, + "learning_rate": 1.165995321177939e-05, + "loss": 0.4142993688583374, + "step": 613 + }, + { + "epoch": 2.0264026402640263, + "grad_norm": 0.33294658416576944, + "learning_rate": 1.159020045204829e-05, + "loss": 0.47322210669517517, + "step": 614 + }, + { + "epoch": 2.0297029702970297, + "grad_norm": 0.3539618583487969, + "learning_rate": 1.15205717370273e-05, + "loss": 0.4899124503135681, + "step": 615 + }, + { + "epoch": 2.033003300330033, + "grad_norm": 0.2952110750729378, + "learning_rate": 1.1451068093741355e-05, + "loss": 0.4857853055000305, + "step": 616 + }, + { + "epoch": 2.036303630363036, + "grad_norm": 0.28290377247578213, + "learning_rate": 1.1381690547370559e-05, + "loss": 0.4790021479129791, + "step": 617 + }, + { + "epoch": 2.0396039603960396, + "grad_norm": 0.2902876717109542, + "learning_rate": 1.13124401212351e-05, + "loss": 0.4519282281398773, + "step": 618 + }, + { + "epoch": 2.042904290429043, + "grad_norm": 0.32584221310071065, + "learning_rate": 1.1243317836780138e-05, + "loss": 0.4738570749759674, + "step": 619 + }, + { + "epoch": 2.046204620462046, + "grad_norm": 0.3093985088780693, + "learning_rate": 1.1174324713560751e-05, + "loss": 0.5111795663833618, + "step": 620 + }, + { + "epoch": 2.0495049504950495, + "grad_norm": 0.2707360386310654, + "learning_rate": 1.1105461769226858e-05, + "loss": 0.4750926196575165, + "step": 621 + }, + { + "epoch": 2.052805280528053, + "grad_norm": 0.3107814822051771, + "learning_rate": 1.1036730019508259e-05, + "loss": 0.4580341577529907, + "step": 622 + }, + { + "epoch": 2.056105610561056, + "grad_norm": 0.28803288143665157, + "learning_rate": 1.0968130478199635e-05, + "loss": 0.43322116136550903, + "step": 623 + }, + { + "epoch": 2.0594059405940595, + "grad_norm": 0.2810686637672446, + "learning_rate": 1.0899664157145562e-05, + "loss": 0.5015532374382019, + "step": 624 + }, + { + "epoch": 2.062706270627063, + "grad_norm": 0.28464578766110366, + "learning_rate": 1.0831332066225645e-05, + "loss": 0.4508541226387024, + "step": 625 + }, + { + "epoch": 2.066006600660066, + "grad_norm": 0.2904901154874499, + "learning_rate": 1.0763135213339589e-05, + "loss": 0.49554720520973206, + "step": 626 + }, + { + "epoch": 2.0693069306930694, + "grad_norm": 0.27820378239401394, + "learning_rate": 1.0695074604392305e-05, + "loss": 0.4523652493953705, + "step": 627 + }, + { + "epoch": 2.072607260726073, + "grad_norm": 0.2794675014886217, + "learning_rate": 1.0627151243279136e-05, + "loss": 0.44413498044013977, + "step": 628 + }, + { + "epoch": 2.075907590759076, + "grad_norm": 0.30159300158430347, + "learning_rate": 1.055936613187101e-05, + "loss": 0.4645534157752991, + "step": 629 + }, + { + "epoch": 2.0792079207920793, + "grad_norm": 0.26698861915138783, + "learning_rate": 1.0491720269999663e-05, + "loss": 0.44823265075683594, + "step": 630 + }, + { + "epoch": 2.0825082508250823, + "grad_norm": 0.2813791646704669, + "learning_rate": 1.0424214655442891e-05, + "loss": 0.45181727409362793, + "step": 631 + }, + { + "epoch": 2.0858085808580857, + "grad_norm": 0.28721240697359884, + "learning_rate": 1.0356850283909852e-05, + "loss": 0.5371145009994507, + "step": 632 + }, + { + "epoch": 2.089108910891089, + "grad_norm": 0.26030729348418064, + "learning_rate": 1.0289628149026369e-05, + "loss": 0.4564274847507477, + "step": 633 + }, + { + "epoch": 2.092409240924092, + "grad_norm": 0.3008427259435641, + "learning_rate": 1.0222549242320254e-05, + "loss": 0.4490276873111725, + "step": 634 + }, + { + "epoch": 2.0957095709570956, + "grad_norm": 0.27241405218961473, + "learning_rate": 1.0155614553206715e-05, + "loss": 0.4663650095462799, + "step": 635 + }, + { + "epoch": 2.099009900990099, + "grad_norm": 0.2814271376941218, + "learning_rate": 1.0088825068973746e-05, + "loss": 0.46265488862991333, + "step": 636 + }, + { + "epoch": 2.102310231023102, + "grad_norm": 0.27083223857822414, + "learning_rate": 1.002218177476756e-05, + "loss": 0.45717963576316833, + "step": 637 + }, + { + "epoch": 2.1056105610561056, + "grad_norm": 0.27321625989679976, + "learning_rate": 9.955685653578068e-06, + "loss": 0.47119495272636414, + "step": 638 + }, + { + "epoch": 2.108910891089109, + "grad_norm": 0.2756031623165562, + "learning_rate": 9.88933768622439e-06, + "loss": 0.46565738320350647, + "step": 639 + }, + { + "epoch": 2.112211221122112, + "grad_norm": 0.26745369116167694, + "learning_rate": 9.823138851340337e-06, + "loss": 0.45610398054122925, + "step": 640 + }, + { + "epoch": 2.1155115511551155, + "grad_norm": 0.2722722292829376, + "learning_rate": 9.75709012536005e-06, + "loss": 0.4907280206680298, + "step": 641 + }, + { + "epoch": 2.118811881188119, + "grad_norm": 0.3111977337695957, + "learning_rate": 9.691192482503546e-06, + "loss": 0.500091552734375, + "step": 642 + }, + { + "epoch": 2.122112211221122, + "grad_norm": 0.2648612882642695, + "learning_rate": 9.625446894762371e-06, + "loss": 0.4330231547355652, + "step": 643 + }, + { + "epoch": 2.1254125412541254, + "grad_norm": 0.2809597353379975, + "learning_rate": 9.559854331885233e-06, + "loss": 0.4750261902809143, + "step": 644 + }, + { + "epoch": 2.128712871287129, + "grad_norm": 0.28201431758911444, + "learning_rate": 9.49441576136374e-06, + "loss": 0.4567373991012573, + "step": 645 + }, + { + "epoch": 2.132013201320132, + "grad_norm": 0.2901654659031683, + "learning_rate": 9.429132148418116e-06, + "loss": 0.4601932168006897, + "step": 646 + }, + { + "epoch": 2.1353135313531353, + "grad_norm": 0.2792782648133288, + "learning_rate": 9.364004455982931e-06, + "loss": 0.4909035265445709, + "step": 647 + }, + { + "epoch": 2.1386138613861387, + "grad_norm": 0.2531215125004539, + "learning_rate": 9.299033644692948e-06, + "loss": 0.4443170428276062, + "step": 648 + }, + { + "epoch": 2.1419141914191417, + "grad_norm": 0.2676386529649011, + "learning_rate": 9.234220672868928e-06, + "loss": 0.46534985303878784, + "step": 649 + }, + { + "epoch": 2.145214521452145, + "grad_norm": 0.2667778492620529, + "learning_rate": 9.169566496503476e-06, + "loss": 0.4351472854614258, + "step": 650 + }, + { + "epoch": 2.1485148514851486, + "grad_norm": 0.26819623679400084, + "learning_rate": 9.105072069246983e-06, + "loss": 0.41445475816726685, + "step": 651 + }, + { + "epoch": 2.1518151815181517, + "grad_norm": 0.2627848025641513, + "learning_rate": 9.040738342393532e-06, + "loss": 0.475847989320755, + "step": 652 + }, + { + "epoch": 2.155115511551155, + "grad_norm": 0.26883146792086515, + "learning_rate": 8.976566264866876e-06, + "loss": 0.48487618565559387, + "step": 653 + }, + { + "epoch": 2.1584158415841586, + "grad_norm": 0.2373773636564882, + "learning_rate": 8.912556783206414e-06, + "loss": 0.4661785364151001, + "step": 654 + }, + { + "epoch": 2.1617161716171616, + "grad_norm": 0.25939800378632233, + "learning_rate": 8.84871084155328e-06, + "loss": 0.48009538650512695, + "step": 655 + }, + { + "epoch": 2.165016501650165, + "grad_norm": 0.26858346089342566, + "learning_rate": 8.785029381636387e-06, + "loss": 0.45644935965538025, + "step": 656 + }, + { + "epoch": 2.1683168316831685, + "grad_norm": 0.25509808532967904, + "learning_rate": 8.721513342758516e-06, + "loss": 0.4896699786186218, + "step": 657 + }, + { + "epoch": 2.1716171617161715, + "grad_norm": 0.2678040151014407, + "learning_rate": 8.658163661782507e-06, + "loss": 0.4286258816719055, + "step": 658 + }, + { + "epoch": 2.174917491749175, + "grad_norm": 0.25541690613787077, + "learning_rate": 8.59498127311742e-06, + "loss": 0.42029869556427, + "step": 659 + }, + { + "epoch": 2.1782178217821784, + "grad_norm": 0.2748486648157056, + "learning_rate": 8.531967108704722e-06, + "loss": 0.48522356152534485, + "step": 660 + }, + { + "epoch": 2.1815181518151814, + "grad_norm": 0.37918495336042346, + "learning_rate": 8.4691220980046e-06, + "loss": 0.461814284324646, + "step": 661 + }, + { + "epoch": 2.184818481848185, + "grad_norm": 0.2581277433441387, + "learning_rate": 8.406447167982205e-06, + "loss": 0.49913299083709717, + "step": 662 + }, + { + "epoch": 2.1881188118811883, + "grad_norm": 0.2804949954645611, + "learning_rate": 8.343943243094008e-06, + "loss": 0.4936009645462036, + "step": 663 + }, + { + "epoch": 2.1914191419141913, + "grad_norm": 0.2621319196989517, + "learning_rate": 8.281611245274123e-06, + "loss": 0.44817712903022766, + "step": 664 + }, + { + "epoch": 2.1947194719471947, + "grad_norm": 0.26441078845804705, + "learning_rate": 8.219452093920763e-06, + "loss": 0.482817143201828, + "step": 665 + }, + { + "epoch": 2.198019801980198, + "grad_norm": 0.25954690482303255, + "learning_rate": 8.157466705882645e-06, + "loss": 0.4643383026123047, + "step": 666 + }, + { + "epoch": 2.201320132013201, + "grad_norm": 0.26531559844936237, + "learning_rate": 8.095655995445472e-06, + "loss": 0.4797602593898773, + "step": 667 + }, + { + "epoch": 2.2046204620462047, + "grad_norm": 0.26505896756203806, + "learning_rate": 8.03402087431844e-06, + "loss": 0.44109994173049927, + "step": 668 + }, + { + "epoch": 2.207920792079208, + "grad_norm": 0.24679836702691405, + "learning_rate": 7.972562251620817e-06, + "loss": 0.46359869837760925, + "step": 669 + }, + { + "epoch": 2.211221122112211, + "grad_norm": 0.23925371744802634, + "learning_rate": 7.9112810338685e-06, + "loss": 0.4576035141944885, + "step": 670 + }, + { + "epoch": 2.2145214521452146, + "grad_norm": 0.2854541383231889, + "learning_rate": 7.850178124960678e-06, + "loss": 0.40902045369148254, + "step": 671 + }, + { + "epoch": 2.217821782178218, + "grad_norm": 0.2726752140080075, + "learning_rate": 7.789254426166454e-06, + "loss": 0.45797932147979736, + "step": 672 + }, + { + "epoch": 2.221122112211221, + "grad_norm": 0.2463208855251595, + "learning_rate": 7.728510836111602e-06, + "loss": 0.43204474449157715, + "step": 673 + }, + { + "epoch": 2.2244224422442245, + "grad_norm": 0.2632084235311744, + "learning_rate": 7.667948250765278e-06, + "loss": 0.46007901430130005, + "step": 674 + }, + { + "epoch": 2.227722772277228, + "grad_norm": 0.2508043419515415, + "learning_rate": 7.607567563426823e-06, + "loss": 0.46342402696609497, + "step": 675 + }, + { + "epoch": 2.231023102310231, + "grad_norm": 0.25728063807342477, + "learning_rate": 7.5473696647125605e-06, + "loss": 0.48953354358673096, + "step": 676 + }, + { + "epoch": 2.2343234323432344, + "grad_norm": 0.2667124077929822, + "learning_rate": 7.487355442542696e-06, + "loss": 0.5022163391113281, + "step": 677 + }, + { + "epoch": 2.237623762376238, + "grad_norm": 0.2666199657154719, + "learning_rate": 7.4275257821281995e-06, + "loss": 0.5144001245498657, + "step": 678 + }, + { + "epoch": 2.240924092409241, + "grad_norm": 0.2598091753134079, + "learning_rate": 7.3678815659577505e-06, + "loss": 0.489937961101532, + "step": 679 + }, + { + "epoch": 2.2442244224422443, + "grad_norm": 0.25000738365352393, + "learning_rate": 7.3084236737847125e-06, + "loss": 0.48842746019363403, + "step": 680 + }, + { + "epoch": 2.2475247524752477, + "grad_norm": 0.2672754249714767, + "learning_rate": 7.249152982614176e-06, + "loss": 0.5024458765983582, + "step": 681 + }, + { + "epoch": 2.2508250825082508, + "grad_norm": 0.25558161311007577, + "learning_rate": 7.190070366690014e-06, + "loss": 0.46162086725234985, + "step": 682 + }, + { + "epoch": 2.254125412541254, + "grad_norm": 0.24807827286497117, + "learning_rate": 7.13117669748199e-06, + "loss": 0.44991785287857056, + "step": 683 + }, + { + "epoch": 2.2574257425742577, + "grad_norm": 0.24635539567650763, + "learning_rate": 7.072472843672877e-06, + "loss": 0.43738633394241333, + "step": 684 + }, + { + "epoch": 2.2607260726072607, + "grad_norm": 0.25605350464823584, + "learning_rate": 7.013959671145691e-06, + "loss": 0.46122169494628906, + "step": 685 + }, + { + "epoch": 2.264026402640264, + "grad_norm": 0.24205320356251103, + "learning_rate": 6.955638042970896e-06, + "loss": 0.4504377841949463, + "step": 686 + }, + { + "epoch": 2.2673267326732676, + "grad_norm": 0.2570116198268661, + "learning_rate": 6.897508819393645e-06, + "loss": 0.4620972275733948, + "step": 687 + }, + { + "epoch": 2.2706270627062706, + "grad_norm": 0.2629731642768507, + "learning_rate": 6.8395728578211525e-06, + "loss": 0.5271490216255188, + "step": 688 + }, + { + "epoch": 2.273927392739274, + "grad_norm": 1.9898738742816064, + "learning_rate": 6.781831012810001e-06, + "loss": 0.4448450803756714, + "step": 689 + }, + { + "epoch": 2.2772277227722775, + "grad_norm": 0.3213733503923664, + "learning_rate": 6.72428413605354e-06, + "loss": 0.4602925181388855, + "step": 690 + }, + { + "epoch": 2.2805280528052805, + "grad_norm": 0.26788259096559774, + "learning_rate": 6.6669330763693485e-06, + "loss": 0.4722862243652344, + "step": 691 + }, + { + "epoch": 2.283828382838284, + "grad_norm": 0.25272077157298134, + "learning_rate": 6.609778679686694e-06, + "loss": 0.47454553842544556, + "step": 692 + }, + { + "epoch": 2.287128712871287, + "grad_norm": 0.24015565864939845, + "learning_rate": 6.552821789034067e-06, + "loss": 0.4750802516937256, + "step": 693 + }, + { + "epoch": 2.2904290429042904, + "grad_norm": 0.2559036200154721, + "learning_rate": 6.496063244526723e-06, + "loss": 0.4640570282936096, + "step": 694 + }, + { + "epoch": 2.293729372937294, + "grad_norm": 0.25061879602537984, + "learning_rate": 6.439503883354323e-06, + "loss": 0.47181540727615356, + "step": 695 + }, + { + "epoch": 2.297029702970297, + "grad_norm": 0.24588968301020392, + "learning_rate": 6.3831445397685755e-06, + "loss": 0.4335097372531891, + "step": 696 + }, + { + "epoch": 2.3003300330033003, + "grad_norm": 0.26057507812572134, + "learning_rate": 6.3269860450709016e-06, + "loss": 0.5158364772796631, + "step": 697 + }, + { + "epoch": 2.3036303630363038, + "grad_norm": 0.24767301357183136, + "learning_rate": 6.271029227600216e-06, + "loss": 0.497075617313385, + "step": 698 + }, + { + "epoch": 2.3069306930693068, + "grad_norm": 0.2612680212099097, + "learning_rate": 6.215274912720697e-06, + "loss": 0.4946526288986206, + "step": 699 + }, + { + "epoch": 2.31023102310231, + "grad_norm": 0.25694731286364175, + "learning_rate": 6.159723922809577e-06, + "loss": 0.4632418155670166, + "step": 700 + }, + { + "epoch": 2.3135313531353137, + "grad_norm": 0.26826842519558464, + "learning_rate": 6.10437707724507e-06, + "loss": 0.4936927258968353, + "step": 701 + }, + { + "epoch": 2.3168316831683167, + "grad_norm": 0.3039451981089408, + "learning_rate": 6.049235192394242e-06, + "loss": 0.4373137056827545, + "step": 702 + }, + { + "epoch": 2.32013201320132, + "grad_norm": 0.2502753739217944, + "learning_rate": 5.994299081600996e-06, + "loss": 0.49224400520324707, + "step": 703 + }, + { + "epoch": 2.3234323432343236, + "grad_norm": 0.25232784831466315, + "learning_rate": 5.939569555174045e-06, + "loss": 0.453000545501709, + "step": 704 + }, + { + "epoch": 2.3267326732673266, + "grad_norm": 0.2443845287083898, + "learning_rate": 5.885047420374992e-06, + "loss": 0.4201410114765167, + "step": 705 + }, + { + "epoch": 2.33003300330033, + "grad_norm": 0.2757856931959748, + "learning_rate": 5.830733481406415e-06, + "loss": 0.4817071557044983, + "step": 706 + }, + { + "epoch": 2.3333333333333335, + "grad_norm": 0.23548633980687703, + "learning_rate": 5.776628539399975e-06, + "loss": 0.42609190940856934, + "step": 707 + }, + { + "epoch": 2.3366336633663365, + "grad_norm": 0.2484780532867763, + "learning_rate": 5.722733392404652e-06, + "loss": 0.46225881576538086, + "step": 708 + }, + { + "epoch": 2.33993399339934, + "grad_norm": 0.28677279656296756, + "learning_rate": 5.669048835374933e-06, + "loss": 0.49061962962150574, + "step": 709 + }, + { + "epoch": 2.3432343234323434, + "grad_norm": 0.25600200089074804, + "learning_rate": 5.615575660159089e-06, + "loss": 0.4506024122238159, + "step": 710 + }, + { + "epoch": 2.3465346534653464, + "grad_norm": 0.23921559671813297, + "learning_rate": 5.562314655487522e-06, + "loss": 0.4433022141456604, + "step": 711 + }, + { + "epoch": 2.34983498349835, + "grad_norm": 0.26708565402858225, + "learning_rate": 5.5092666069611055e-06, + "loss": 0.45988917350769043, + "step": 712 + }, + { + "epoch": 2.3531353135313533, + "grad_norm": 0.2294068192725238, + "learning_rate": 5.4564322970396154e-06, + "loss": 0.44675180315971375, + "step": 713 + }, + { + "epoch": 2.3564356435643563, + "grad_norm": 0.2431380886271115, + "learning_rate": 5.403812505030157e-06, + "loss": 0.46991807222366333, + "step": 714 + }, + { + "epoch": 2.3597359735973598, + "grad_norm": 0.2412850801003648, + "learning_rate": 5.351408007075714e-06, + "loss": 0.49208664894104004, + "step": 715 + }, + { + "epoch": 2.363036303630363, + "grad_norm": 2.760535806072788, + "learning_rate": 5.299219576143673e-06, + "loss": 0.48280128836631775, + "step": 716 + }, + { + "epoch": 2.366336633663366, + "grad_norm": 0.24609236023763137, + "learning_rate": 5.247247982014414e-06, + "loss": 0.4491961896419525, + "step": 717 + }, + { + "epoch": 2.3696369636963697, + "grad_norm": 0.24672380739006747, + "learning_rate": 5.195493991269991e-06, + "loss": 0.4943190813064575, + "step": 718 + }, + { + "epoch": 2.372937293729373, + "grad_norm": 0.27378763646010795, + "learning_rate": 5.143958367282795e-06, + "loss": 0.4586840867996216, + "step": 719 + }, + { + "epoch": 2.376237623762376, + "grad_norm": 0.2422334792581867, + "learning_rate": 5.0926418702042914e-06, + "loss": 0.46227943897247314, + "step": 720 + }, + { + "epoch": 2.3795379537953796, + "grad_norm": 0.23796137337817433, + "learning_rate": 5.041545256953839e-06, + "loss": 0.45386868715286255, + "step": 721 + }, + { + "epoch": 2.382838283828383, + "grad_norm": 0.24415832537414764, + "learning_rate": 4.990669281207492e-06, + "loss": 0.5026980042457581, + "step": 722 + }, + { + "epoch": 2.386138613861386, + "grad_norm": 0.247792875546048, + "learning_rate": 4.940014693386909e-06, + "loss": 0.4834757447242737, + "step": 723 + }, + { + "epoch": 2.3894389438943895, + "grad_norm": 0.43027345510854853, + "learning_rate": 4.889582240648254e-06, + "loss": 0.44382545351982117, + "step": 724 + }, + { + "epoch": 2.3927392739273925, + "grad_norm": 0.2519737312346543, + "learning_rate": 4.839372666871212e-06, + "loss": 0.45313894748687744, + "step": 725 + }, + { + "epoch": 2.396039603960396, + "grad_norm": 0.23932824454201898, + "learning_rate": 4.789386712647994e-06, + "loss": 0.4597586393356323, + "step": 726 + }, + { + "epoch": 2.3993399339933994, + "grad_norm": 0.23075224453442636, + "learning_rate": 4.739625115272408e-06, + "loss": 0.4427994191646576, + "step": 727 + }, + { + "epoch": 2.4026402640264024, + "grad_norm": 0.24450312969705348, + "learning_rate": 4.690088608729007e-06, + "loss": 0.4459637403488159, + "step": 728 + }, + { + "epoch": 2.405940594059406, + "grad_norm": 0.2516039358654293, + "learning_rate": 4.640777923682247e-06, + "loss": 0.5043150186538696, + "step": 729 + }, + { + "epoch": 2.4092409240924093, + "grad_norm": 0.26743057517217783, + "learning_rate": 4.5916937874657055e-06, + "loss": 0.4942860007286072, + "step": 730 + }, + { + "epoch": 2.4125412541254123, + "grad_norm": 0.25489023032736696, + "learning_rate": 4.5428369240713655e-06, + "loss": 0.4572402834892273, + "step": 731 + }, + { + "epoch": 2.4158415841584158, + "grad_norm": 0.24954926782274506, + "learning_rate": 4.494208054138934e-06, + "loss": 0.44927412271499634, + "step": 732 + }, + { + "epoch": 2.419141914191419, + "grad_norm": 0.24684795220524788, + "learning_rate": 4.445807894945211e-06, + "loss": 0.461928129196167, + "step": 733 + }, + { + "epoch": 2.4224422442244222, + "grad_norm": 0.2375757440633774, + "learning_rate": 4.397637160393493e-06, + "loss": 0.46279191970825195, + "step": 734 + }, + { + "epoch": 2.4257425742574257, + "grad_norm": 0.24407488686385456, + "learning_rate": 4.349696561003076e-06, + "loss": 0.48653045296669006, + "step": 735 + }, + { + "epoch": 2.429042904290429, + "grad_norm": 0.2443771510662661, + "learning_rate": 4.301986803898752e-06, + "loss": 0.4587661027908325, + "step": 736 + }, + { + "epoch": 2.432343234323432, + "grad_norm": 0.25142970699984885, + "learning_rate": 4.2545085928003906e-06, + "loss": 0.4946083426475525, + "step": 737 + }, + { + "epoch": 2.4356435643564356, + "grad_norm": 0.2446760243354809, + "learning_rate": 4.207262628012534e-06, + "loss": 0.4614926278591156, + "step": 738 + }, + { + "epoch": 2.438943894389439, + "grad_norm": 0.24323846273380414, + "learning_rate": 4.160249606414109e-06, + "loss": 0.46377992630004883, + "step": 739 + }, + { + "epoch": 2.442244224422442, + "grad_norm": 0.2554844227936452, + "learning_rate": 4.1134702214481126e-06, + "loss": 0.4217844009399414, + "step": 740 + }, + { + "epoch": 2.4455445544554455, + "grad_norm": 0.40365970056175393, + "learning_rate": 4.066925163111406e-06, + "loss": 0.4616321325302124, + "step": 741 + }, + { + "epoch": 2.448844884488449, + "grad_norm": 0.23727547629912737, + "learning_rate": 4.020615117944515e-06, + "loss": 0.48755043745040894, + "step": 742 + }, + { + "epoch": 2.452145214521452, + "grad_norm": 0.2636488971277773, + "learning_rate": 3.974540769021529e-06, + "loss": 0.47338151931762695, + "step": 743 + }, + { + "epoch": 2.4554455445544554, + "grad_norm": 0.26687939105998304, + "learning_rate": 3.928702795940007e-06, + "loss": 0.47220849990844727, + "step": 744 + }, + { + "epoch": 2.458745874587459, + "grad_norm": 0.23440870124340746, + "learning_rate": 3.883101874810966e-06, + "loss": 0.4117845296859741, + "step": 745 + }, + { + "epoch": 2.462046204620462, + "grad_norm": 0.2389531188545627, + "learning_rate": 3.8377386782488875e-06, + "loss": 0.44338276982307434, + "step": 746 + }, + { + "epoch": 2.4653465346534653, + "grad_norm": 0.28253943840492757, + "learning_rate": 3.7926138753618257e-06, + "loss": 0.470272958278656, + "step": 747 + }, + { + "epoch": 2.4686468646864688, + "grad_norm": 0.2533414456878978, + "learning_rate": 3.747728131741517e-06, + "loss": 0.4825139045715332, + "step": 748 + }, + { + "epoch": 2.4719471947194718, + "grad_norm": 0.22813621303002277, + "learning_rate": 3.703082109453575e-06, + "loss": 0.43612140417099, + "step": 749 + }, + { + "epoch": 2.4752475247524752, + "grad_norm": 0.22709733679425215, + "learning_rate": 3.6586764670277065e-06, + "loss": 0.4573146402835846, + "step": 750 + }, + { + "epoch": 2.4785478547854787, + "grad_norm": 0.24807030489347143, + "learning_rate": 3.61451185944802e-06, + "loss": 0.4419093430042267, + "step": 751 + }, + { + "epoch": 2.4818481848184817, + "grad_norm": 0.23735191741997233, + "learning_rate": 3.570588938143353e-06, + "loss": 0.440906822681427, + "step": 752 + }, + { + "epoch": 2.485148514851485, + "grad_norm": 0.24792760735437452, + "learning_rate": 3.5269083509776735e-06, + "loss": 0.432383269071579, + "step": 753 + }, + { + "epoch": 2.4884488448844886, + "grad_norm": 0.24788857238042053, + "learning_rate": 3.4834707422404957e-06, + "loss": 0.4615401029586792, + "step": 754 + }, + { + "epoch": 2.4917491749174916, + "grad_norm": 0.29288725170403773, + "learning_rate": 3.440276752637417e-06, + "loss": 0.43933019042015076, + "step": 755 + }, + { + "epoch": 2.495049504950495, + "grad_norm": 0.24422605775888084, + "learning_rate": 3.3973270192806427e-06, + "loss": 0.4651945233345032, + "step": 756 + }, + { + "epoch": 2.4983498349834985, + "grad_norm": 0.3408455968625333, + "learning_rate": 3.3546221756795874e-06, + "loss": 0.4423069953918457, + "step": 757 + }, + { + "epoch": 2.5016501650165015, + "grad_norm": 0.32517130275625505, + "learning_rate": 3.3121628517315373e-06, + "loss": 0.4905679225921631, + "step": 758 + }, + { + "epoch": 2.504950495049505, + "grad_norm": 0.24015956320352147, + "learning_rate": 3.2699496737123758e-06, + "loss": 0.46989548206329346, + "step": 759 + }, + { + "epoch": 2.5082508250825084, + "grad_norm": 0.24393784259324253, + "learning_rate": 3.2279832642673025e-06, + "loss": 0.5168344378471375, + "step": 760 + }, + { + "epoch": 2.5115511551155114, + "grad_norm": 0.2446798962745333, + "learning_rate": 3.186264242401693e-06, + "loss": 0.46055924892425537, + "step": 761 + }, + { + "epoch": 2.514851485148515, + "grad_norm": 0.2561165095643357, + "learning_rate": 3.144793223471949e-06, + "loss": 0.5135318040847778, + "step": 762 + }, + { + "epoch": 2.5181518151815183, + "grad_norm": 1.1234233736547772, + "learning_rate": 3.1035708191764246e-06, + "loss": 0.5026534199714661, + "step": 763 + }, + { + "epoch": 2.5214521452145213, + "grad_norm": 0.23866674349332329, + "learning_rate": 3.0625976375463938e-06, + "loss": 0.43348389863967896, + "step": 764 + }, + { + "epoch": 2.5247524752475248, + "grad_norm": 0.2295043927466033, + "learning_rate": 3.021874282937103e-06, + "loss": 0.4620594382286072, + "step": 765 + }, + { + "epoch": 2.5280528052805282, + "grad_norm": 0.25250691113798673, + "learning_rate": 2.9814013560188425e-06, + "loss": 0.4646865725517273, + "step": 766 + }, + { + "epoch": 2.5313531353135312, + "grad_norm": 0.2396511266141401, + "learning_rate": 2.9411794537680795e-06, + "loss": 0.46846333146095276, + "step": 767 + }, + { + "epoch": 2.5346534653465347, + "grad_norm": 0.24818691561244743, + "learning_rate": 2.901209169458672e-06, + "loss": 0.487953782081604, + "step": 768 + }, + { + "epoch": 2.537953795379538, + "grad_norm": 0.24296952409375147, + "learning_rate": 2.861491092653115e-06, + "loss": 0.4543481469154358, + "step": 769 + }, + { + "epoch": 2.541254125412541, + "grad_norm": 0.24368208278529027, + "learning_rate": 2.822025809193818e-06, + "loss": 0.4961584806442261, + "step": 770 + }, + { + "epoch": 2.5445544554455446, + "grad_norm": 0.2377375055697493, + "learning_rate": 2.7828139011944967e-06, + "loss": 0.44123750925064087, + "step": 771 + }, + { + "epoch": 2.547854785478548, + "grad_norm": 0.2301227484744363, + "learning_rate": 2.743855947031575e-06, + "loss": 0.43014320731163025, + "step": 772 + }, + { + "epoch": 2.551155115511551, + "grad_norm": 0.2250422650499226, + "learning_rate": 2.7051525213356546e-06, + "loss": 0.4774499535560608, + "step": 773 + }, + { + "epoch": 2.5544554455445545, + "grad_norm": 0.23823454905644054, + "learning_rate": 2.6667041949830186e-06, + "loss": 0.44963133335113525, + "step": 774 + }, + { + "epoch": 2.557755775577558, + "grad_norm": 0.2554981481850554, + "learning_rate": 2.6285115350872524e-06, + "loss": 0.4840245842933655, + "step": 775 + }, + { + "epoch": 2.561056105610561, + "grad_norm": 0.2589754738757413, + "learning_rate": 2.5905751049908466e-06, + "loss": 0.5490096807479858, + "step": 776 + }, + { + "epoch": 2.5643564356435644, + "grad_norm": 0.30754095371590884, + "learning_rate": 2.5528954642568947e-06, + "loss": 0.4965711832046509, + "step": 777 + }, + { + "epoch": 2.567656765676568, + "grad_norm": 0.2261872478084121, + "learning_rate": 2.5154731686608424e-06, + "loss": 0.4518459439277649, + "step": 778 + }, + { + "epoch": 2.570957095709571, + "grad_norm": 0.24374764034742216, + "learning_rate": 2.4783087701823026e-06, + "loss": 0.5022287964820862, + "step": 779 + }, + { + "epoch": 2.5742574257425743, + "grad_norm": 0.2531412256958666, + "learning_rate": 2.441402816996876e-06, + "loss": 0.47195330262184143, + "step": 780 + }, + { + "epoch": 2.5775577557755778, + "grad_norm": 0.25588546327446415, + "learning_rate": 2.4047558534681124e-06, + "loss": 0.5155715346336365, + "step": 781 + }, + { + "epoch": 2.580858085808581, + "grad_norm": 0.26863032492519423, + "learning_rate": 2.3683684201394507e-06, + "loss": 0.46963661909103394, + "step": 782 + }, + { + "epoch": 2.5841584158415842, + "grad_norm": 0.2303264290466175, + "learning_rate": 2.3322410537262495e-06, + "loss": 0.4279938340187073, + "step": 783 + }, + { + "epoch": 2.5874587458745877, + "grad_norm": 0.24160002325917174, + "learning_rate": 2.296374287107883e-06, + "loss": 0.47818487882614136, + "step": 784 + }, + { + "epoch": 2.5907590759075907, + "grad_norm": 0.23493031875502465, + "learning_rate": 2.260768649319869e-06, + "loss": 0.4445609152317047, + "step": 785 + }, + { + "epoch": 2.594059405940594, + "grad_norm": 0.2545526596288379, + "learning_rate": 2.2254246655460765e-06, + "loss": 0.4838835895061493, + "step": 786 + }, + { + "epoch": 2.5973597359735976, + "grad_norm": 0.24631479441885146, + "learning_rate": 2.1903428571109566e-06, + "loss": 0.4454101324081421, + "step": 787 + }, + { + "epoch": 2.6006600660066006, + "grad_norm": 0.2399303225290425, + "learning_rate": 2.1555237414718854e-06, + "loss": 0.46468472480773926, + "step": 788 + }, + { + "epoch": 2.603960396039604, + "grad_norm": 0.24533578787784271, + "learning_rate": 2.1209678322115133e-06, + "loss": 0.508684515953064, + "step": 789 + }, + { + "epoch": 2.6072607260726075, + "grad_norm": 0.23699012050293838, + "learning_rate": 2.0866756390301778e-06, + "loss": 0.46998751163482666, + "step": 790 + }, + { + "epoch": 2.6105610561056105, + "grad_norm": 0.22442653448303418, + "learning_rate": 2.0526476677384123e-06, + "loss": 0.41589513421058655, + "step": 791 + }, + { + "epoch": 2.613861386138614, + "grad_norm": 0.23870429201603713, + "learning_rate": 2.018884420249474e-06, + "loss": 0.4948643445968628, + "step": 792 + }, + { + "epoch": 2.6171617161716174, + "grad_norm": 0.23103305184303033, + "learning_rate": 1.9853863945719243e-06, + "loss": 0.4494874179363251, + "step": 793 + }, + { + "epoch": 2.6204620462046204, + "grad_norm": 0.23980252076908543, + "learning_rate": 1.9521540848023113e-06, + "loss": 0.42173343896865845, + "step": 794 + }, + { + "epoch": 2.623762376237624, + "grad_norm": 0.24254851053091633, + "learning_rate": 1.9191879811178605e-06, + "loss": 0.4319555461406708, + "step": 795 + }, + { + "epoch": 2.6270627062706273, + "grad_norm": 0.21769714480169441, + "learning_rate": 1.8864885697692582e-06, + "loss": 0.40467706322669983, + "step": 796 + }, + { + "epoch": 2.6303630363036303, + "grad_norm": 0.23815188307796767, + "learning_rate": 1.8540563330734662e-06, + "loss": 0.5141273736953735, + "step": 797 + }, + { + "epoch": 2.633663366336634, + "grad_norm": 0.23237959155910853, + "learning_rate": 1.8218917494066212e-06, + "loss": 0.44990289211273193, + "step": 798 + }, + { + "epoch": 2.6369636963696372, + "grad_norm": 0.2393948822814923, + "learning_rate": 1.7899952931969756e-06, + "loss": 0.4878673553466797, + "step": 799 + }, + { + "epoch": 2.6402640264026402, + "grad_norm": 0.22595932266177446, + "learning_rate": 1.7583674349178803e-06, + "loss": 0.46406376361846924, + "step": 800 + }, + { + "epoch": 2.6435643564356437, + "grad_norm": 0.22163499847677615, + "learning_rate": 1.7270086410808762e-06, + "loss": 0.44470641016960144, + "step": 801 + }, + { + "epoch": 2.6468646864686467, + "grad_norm": 0.23461158504190754, + "learning_rate": 1.695919374228796e-06, + "loss": 0.5306479930877686, + "step": 802 + }, + { + "epoch": 2.65016501650165, + "grad_norm": 0.23844670077139818, + "learning_rate": 1.6651000929289462e-06, + "loss": 0.4570600390434265, + "step": 803 + }, + { + "epoch": 2.6534653465346536, + "grad_norm": 0.24202990025785212, + "learning_rate": 1.6345512517663275e-06, + "loss": 0.48561781644821167, + "step": 804 + }, + { + "epoch": 2.6567656765676566, + "grad_norm": 0.23785932147050265, + "learning_rate": 1.6042733013369604e-06, + "loss": 0.4666748642921448, + "step": 805 + }, + { + "epoch": 2.66006600660066, + "grad_norm": 0.2420529385568233, + "learning_rate": 1.5742666882412106e-06, + "loss": 0.4761434495449066, + "step": 806 + }, + { + "epoch": 2.6633663366336635, + "grad_norm": 0.23716960917200494, + "learning_rate": 1.5445318550772204e-06, + "loss": 0.4475252628326416, + "step": 807 + }, + { + "epoch": 2.6666666666666665, + "grad_norm": 0.2477540352529907, + "learning_rate": 1.5150692404343637e-06, + "loss": 0.5299564599990845, + "step": 808 + }, + { + "epoch": 2.66996699669967, + "grad_norm": 0.23933028255710986, + "learning_rate": 1.4858792788867904e-06, + "loss": 0.518581748008728, + "step": 809 + }, + { + "epoch": 2.6732673267326734, + "grad_norm": 0.2332077440459636, + "learning_rate": 1.4569624009870165e-06, + "loss": 0.5162506103515625, + "step": 810 + }, + { + "epoch": 2.6765676567656764, + "grad_norm": 0.23396257763770162, + "learning_rate": 1.4283190332595665e-06, + "loss": 0.4762595593929291, + "step": 811 + }, + { + "epoch": 2.67986798679868, + "grad_norm": 0.24891326451914347, + "learning_rate": 1.3999495981946764e-06, + "loss": 0.44347697496414185, + "step": 812 + }, + { + "epoch": 2.6831683168316833, + "grad_norm": 0.22951918904681498, + "learning_rate": 1.3718545142420768e-06, + "loss": 0.4344146251678467, + "step": 813 + }, + { + "epoch": 2.6864686468646863, + "grad_norm": 0.23863686607461265, + "learning_rate": 1.344034195804813e-06, + "loss": 0.4936307668685913, + "step": 814 + }, + { + "epoch": 2.68976897689769, + "grad_norm": 0.23758007083024585, + "learning_rate": 1.3164890532331386e-06, + "loss": 0.43635520339012146, + "step": 815 + }, + { + "epoch": 2.693069306930693, + "grad_norm": 0.24550816708533926, + "learning_rate": 1.2892194928184499e-06, + "loss": 0.48006054759025574, + "step": 816 + }, + { + "epoch": 2.6963696369636962, + "grad_norm": 0.22610358677951214, + "learning_rate": 1.2622259167873008e-06, + "loss": 0.4296647906303406, + "step": 817 + }, + { + "epoch": 2.6996699669966997, + "grad_norm": 0.3871947383123805, + "learning_rate": 1.2355087232954754e-06, + "loss": 0.47840994596481323, + "step": 818 + }, + { + "epoch": 2.7029702970297027, + "grad_norm": 0.21432181977841594, + "learning_rate": 1.209068306422112e-06, + "loss": 0.41459953784942627, + "step": 819 + }, + { + "epoch": 2.706270627062706, + "grad_norm": 0.24313471794627498, + "learning_rate": 1.1829050561638766e-06, + "loss": 0.4278629422187805, + "step": 820 + }, + { + "epoch": 2.7095709570957096, + "grad_norm": 0.24379358416226346, + "learning_rate": 1.1570193584292323e-06, + "loss": 0.44538602232933044, + "step": 821 + }, + { + "epoch": 2.7128712871287126, + "grad_norm": 0.23094639733408046, + "learning_rate": 1.1314115950327365e-06, + "loss": 0.4757949709892273, + "step": 822 + }, + { + "epoch": 2.716171617161716, + "grad_norm": 0.22182336808333136, + "learning_rate": 1.106082143689402e-06, + "loss": 0.49131542444229126, + "step": 823 + }, + { + "epoch": 2.7194719471947195, + "grad_norm": 0.2534124798335607, + "learning_rate": 1.0810313780091408e-06, + "loss": 0.4917967915534973, + "step": 824 + }, + { + "epoch": 2.7227722772277225, + "grad_norm": 0.23670068032674005, + "learning_rate": 1.056259667491244e-06, + "loss": 0.4949303865432739, + "step": 825 + }, + { + "epoch": 2.726072607260726, + "grad_norm": 0.23770304320813665, + "learning_rate": 1.0317673775189374e-06, + "loss": 0.4287925958633423, + "step": 826 + }, + { + "epoch": 2.7293729372937294, + "grad_norm": 0.2425418928573913, + "learning_rate": 1.007554869353975e-06, + "loss": 0.5059949159622192, + "step": 827 + }, + { + "epoch": 2.7326732673267324, + "grad_norm": 0.25049371554006, + "learning_rate": 9.83622500131336e-07, + "loss": 0.47914958000183105, + "step": 828 + }, + { + "epoch": 2.735973597359736, + "grad_norm": 0.24168515794090734, + "learning_rate": 9.599706228539452e-07, + "loss": 0.5237720608711243, + "step": 829 + }, + { + "epoch": 2.7392739273927393, + "grad_norm": 0.23836969767457952, + "learning_rate": 9.365995863874566e-07, + "loss": 0.4628916382789612, + "step": 830 + }, + { + "epoch": 2.7425742574257423, + "grad_norm": 0.22835633263617844, + "learning_rate": 9.135097354551203e-07, + "loss": 0.49988898634910583, + "step": 831 + }, + { + "epoch": 2.745874587458746, + "grad_norm": 0.2229937423966958, + "learning_rate": 8.907014106327039e-07, + "loss": 0.4631851315498352, + "step": 832 + }, + { + "epoch": 2.7491749174917492, + "grad_norm": 0.24485133529173167, + "learning_rate": 8.681749483434387e-07, + "loss": 0.47001713514328003, + "step": 833 + }, + { + "epoch": 2.7524752475247523, + "grad_norm": 0.23400965677751775, + "learning_rate": 8.459306808530999e-07, + "loss": 0.4437292218208313, + "step": 834 + }, + { + "epoch": 2.7557755775577557, + "grad_norm": 0.26632452732629835, + "learning_rate": 8.239689362650694e-07, + "loss": 0.5006406903266907, + "step": 835 + }, + { + "epoch": 2.759075907590759, + "grad_norm": 0.23471614589516374, + "learning_rate": 8.022900385155185e-07, + "loss": 0.45732003450393677, + "step": 836 + }, + { + "epoch": 2.762376237623762, + "grad_norm": 0.47225644675751677, + "learning_rate": 7.808943073686159e-07, + "loss": 0.5012909173965454, + "step": 837 + }, + { + "epoch": 2.7656765676567656, + "grad_norm": 0.25510766784506034, + "learning_rate": 7.597820584118221e-07, + "loss": 0.5104090571403503, + "step": 838 + }, + { + "epoch": 2.768976897689769, + "grad_norm": 0.22536004830501363, + "learning_rate": 7.38953603051229e-07, + "loss": 0.44415900111198425, + "step": 839 + }, + { + "epoch": 2.772277227722772, + "grad_norm": 0.23868123290562657, + "learning_rate": 7.184092485069638e-07, + "loss": 0.46958473324775696, + "step": 840 + }, + { + "epoch": 2.7755775577557755, + "grad_norm": 0.22685199851447227, + "learning_rate": 6.981492978086634e-07, + "loss": 0.4305083155632019, + "step": 841 + }, + { + "epoch": 2.778877887788779, + "grad_norm": 0.2363937135429503, + "learning_rate": 6.78174049791005e-07, + "loss": 0.4812752604484558, + "step": 842 + }, + { + "epoch": 2.782178217821782, + "grad_norm": 0.23536493344498524, + "learning_rate": 6.584837990892889e-07, + "loss": 0.522142231464386, + "step": 843 + }, + { + "epoch": 2.7854785478547854, + "grad_norm": 0.2629089101886439, + "learning_rate": 6.390788361351053e-07, + "loss": 0.4789726138114929, + "step": 844 + }, + { + "epoch": 2.788778877887789, + "grad_norm": 0.221963892758326, + "learning_rate": 6.199594471520453e-07, + "loss": 0.44507476687431335, + "step": 845 + }, + { + "epoch": 2.792079207920792, + "grad_norm": 0.23452674626378717, + "learning_rate": 6.011259141514747e-07, + "loss": 0.47613948583602905, + "step": 846 + }, + { + "epoch": 2.7953795379537953, + "grad_norm": 0.22167932095355114, + "learning_rate": 5.825785149283758e-07, + "loss": 0.44828763604164124, + "step": 847 + }, + { + "epoch": 2.798679867986799, + "grad_norm": 0.3027768768548174, + "learning_rate": 5.64317523057254e-07, + "loss": 0.4695909321308136, + "step": 848 + }, + { + "epoch": 2.801980198019802, + "grad_norm": 0.2349539472452322, + "learning_rate": 5.463432078881093e-07, + "loss": 0.48341453075408936, + "step": 849 + }, + { + "epoch": 2.8052805280528053, + "grad_norm": 0.21333400051209225, + "learning_rate": 5.286558345424397e-07, + "loss": 0.47008436918258667, + "step": 850 + }, + { + "epoch": 2.8085808580858087, + "grad_norm": 0.2369125413431687, + "learning_rate": 5.112556639093536e-07, + "loss": 0.5081039071083069, + "step": 851 + }, + { + "epoch": 2.8118811881188117, + "grad_norm": 0.23230496066562498, + "learning_rate": 4.941429526417163e-07, + "loss": 0.49790090322494507, + "step": 852 + }, + { + "epoch": 2.815181518151815, + "grad_norm": 0.2314377157636827, + "learning_rate": 4.773179531523542e-07, + "loss": 0.476767897605896, + "step": 853 + }, + { + "epoch": 2.8184818481848186, + "grad_norm": 0.234974793768271, + "learning_rate": 4.6078091361034585e-07, + "loss": 0.5067446231842041, + "step": 854 + }, + { + "epoch": 2.8217821782178216, + "grad_norm": 0.2229121342330284, + "learning_rate": 4.4453207793735185e-07, + "loss": 0.45703452825546265, + "step": 855 + }, + { + "epoch": 2.825082508250825, + "grad_norm": 0.25006675020075053, + "learning_rate": 4.285716858040223e-07, + "loss": 0.4193270206451416, + "step": 856 + }, + { + "epoch": 2.8283828382838285, + "grad_norm": 0.2214334357956483, + "learning_rate": 4.128999726264549e-07, + "loss": 0.4367069602012634, + "step": 857 + }, + { + "epoch": 2.8316831683168315, + "grad_norm": 0.23745672544685706, + "learning_rate": 3.9751716956273113e-07, + "loss": 0.46601590514183044, + "step": 858 + }, + { + "epoch": 2.834983498349835, + "grad_norm": 0.23728948504727357, + "learning_rate": 3.824235035095036e-07, + "loss": 0.4801405072212219, + "step": 859 + }, + { + "epoch": 2.8382838283828384, + "grad_norm": 0.2305722834125333, + "learning_rate": 3.676191970986409e-07, + "loss": 0.4729960262775421, + "step": 860 + }, + { + "epoch": 2.8415841584158414, + "grad_norm": 0.2565962552578653, + "learning_rate": 3.531044686939611e-07, + "loss": 0.453819215297699, + "step": 861 + }, + { + "epoch": 2.844884488448845, + "grad_norm": 0.2345568934684747, + "learning_rate": 3.388795323879923e-07, + "loss": 0.4655516743659973, + "step": 862 + }, + { + "epoch": 2.8481848184818483, + "grad_norm": 0.2602122051468819, + "learning_rate": 3.249445979988286e-07, + "loss": 0.4915505647659302, + "step": 863 + }, + { + "epoch": 2.8514851485148514, + "grad_norm": 0.227534967530927, + "learning_rate": 3.112998710670279e-07, + "loss": 0.46072205901145935, + "step": 864 + }, + { + "epoch": 2.854785478547855, + "grad_norm": 0.2372527927247435, + "learning_rate": 2.979455528525854e-07, + "loss": 0.47496911883354187, + "step": 865 + }, + { + "epoch": 2.8580858085808583, + "grad_norm": 0.2396587074165527, + "learning_rate": 2.8488184033195867e-07, + "loss": 0.4863288402557373, + "step": 866 + }, + { + "epoch": 2.8613861386138613, + "grad_norm": 0.23166629272471134, + "learning_rate": 2.721089261951626e-07, + "loss": 0.4543803930282593, + "step": 867 + }, + { + "epoch": 2.8646864686468647, + "grad_norm": 0.2431611152190322, + "learning_rate": 2.5962699884293894e-07, + "loss": 0.4589266777038574, + "step": 868 + }, + { + "epoch": 2.867986798679868, + "grad_norm": 0.2225895431580723, + "learning_rate": 2.474362423839627e-07, + "loss": 0.45603302121162415, + "step": 869 + }, + { + "epoch": 2.871287128712871, + "grad_norm": 0.2221408751585563, + "learning_rate": 2.3553683663213088e-07, + "loss": 0.4547184109687805, + "step": 870 + }, + { + "epoch": 2.8745874587458746, + "grad_norm": 0.24123343867414457, + "learning_rate": 2.2392895710391604e-07, + "loss": 0.4900602102279663, + "step": 871 + }, + { + "epoch": 2.877887788778878, + "grad_norm": 0.2412441535157341, + "learning_rate": 2.126127750157725e-07, + "loss": 0.48706525564193726, + "step": 872 + }, + { + "epoch": 2.881188118811881, + "grad_norm": 0.24173675884162568, + "learning_rate": 2.0158845728160958e-07, + "loss": 0.4726618230342865, + "step": 873 + }, + { + "epoch": 2.8844884488448845, + "grad_norm": 0.25907893004745514, + "learning_rate": 1.9085616651033147e-07, + "loss": 0.45884019136428833, + "step": 874 + }, + { + "epoch": 2.887788778877888, + "grad_norm": 0.2641670850826395, + "learning_rate": 1.804160610034411e-07, + "loss": 0.4787840247154236, + "step": 875 + }, + { + "epoch": 2.891089108910891, + "grad_norm": 0.24253910042279672, + "learning_rate": 1.702682947527001e-07, + "loss": 0.4758448004722595, + "step": 876 + }, + { + "epoch": 2.8943894389438944, + "grad_norm": 0.2279011748861112, + "learning_rate": 1.6041301743786596e-07, + "loss": 0.47089093923568726, + "step": 877 + }, + { + "epoch": 2.897689768976898, + "grad_norm": 0.29849498701163135, + "learning_rate": 1.5085037442446937e-07, + "loss": 0.46921056509017944, + "step": 878 + }, + { + "epoch": 2.900990099009901, + "grad_norm": 0.2344970489799305, + "learning_rate": 1.415805067616871e-07, + "loss": 0.5218731164932251, + "step": 879 + }, + { + "epoch": 2.9042904290429044, + "grad_norm": 0.2254215991599414, + "learning_rate": 1.3260355118025036e-07, + "loss": 0.43099671602249146, + "step": 880 + }, + { + "epoch": 2.907590759075908, + "grad_norm": 0.23874830724823604, + "learning_rate": 1.2391964009043078e-07, + "loss": 0.48290592432022095, + "step": 881 + }, + { + "epoch": 2.910891089108911, + "grad_norm": 0.23943766068140404, + "learning_rate": 1.1552890158009311e-07, + "loss": 0.4634360074996948, + "step": 882 + }, + { + "epoch": 2.9141914191419143, + "grad_norm": 0.2453653346062948, + "learning_rate": 1.0743145941279453e-07, + "loss": 0.5041622519493103, + "step": 883 + }, + { + "epoch": 2.9174917491749177, + "grad_norm": 0.21518547033713775, + "learning_rate": 9.962743302596612e-08, + "loss": 0.480410099029541, + "step": 884 + }, + { + "epoch": 2.9207920792079207, + "grad_norm": 0.24487326504708118, + "learning_rate": 9.211693752915419e-08, + "loss": 0.49919891357421875, + "step": 885 + }, + { + "epoch": 2.924092409240924, + "grad_norm": 0.23373083594094138, + "learning_rate": 8.490008370231506e-08, + "loss": 0.508806586265564, + "step": 886 + }, + { + "epoch": 2.9273927392739276, + "grad_norm": 0.23076843849897602, + "learning_rate": 7.797697799418525e-08, + "loss": 0.4233350157737732, + "step": 887 + }, + { + "epoch": 2.9306930693069306, + "grad_norm": 0.2406032429252954, + "learning_rate": 7.134772252071154e-08, + "loss": 0.4577901363372803, + "step": 888 + }, + { + "epoch": 2.933993399339934, + "grad_norm": 0.22213331512067527, + "learning_rate": 6.501241506354561e-08, + "loss": 0.4028077721595764, + "step": 889 + }, + { + "epoch": 2.9372937293729375, + "grad_norm": 0.23681508976522572, + "learning_rate": 5.897114906859402e-08, + "loss": 0.48321446776390076, + "step": 890 + }, + { + "epoch": 2.9405940594059405, + "grad_norm": 0.27558742916404966, + "learning_rate": 5.322401364465491e-08, + "loss": 0.48732608556747437, + "step": 891 + }, + { + "epoch": 2.943894389438944, + "grad_norm": 0.22725537704850798, + "learning_rate": 4.777109356208565e-08, + "loss": 0.46879494190216064, + "step": 892 + }, + { + "epoch": 2.9471947194719474, + "grad_norm": 0.23495776431163154, + "learning_rate": 4.261246925156837e-08, + "loss": 0.4858628511428833, + "step": 893 + }, + { + "epoch": 2.9504950495049505, + "grad_norm": 0.22802725333151694, + "learning_rate": 3.7748216802913077e-08, + "loss": 0.48119616508483887, + "step": 894 + }, + { + "epoch": 2.953795379537954, + "grad_norm": 0.22512889420077337, + "learning_rate": 3.3178407963938564e-08, + "loss": 0.4994167983531952, + "step": 895 + }, + { + "epoch": 2.9570957095709574, + "grad_norm": 0.23739211802797258, + "learning_rate": 2.8903110139417712e-08, + "loss": 0.46394845843315125, + "step": 896 + }, + { + "epoch": 2.9603960396039604, + "grad_norm": 0.2476698533912655, + "learning_rate": 2.4922386390076047e-08, + "loss": 0.42504560947418213, + "step": 897 + }, + { + "epoch": 2.963696369636964, + "grad_norm": 0.24523827629331452, + "learning_rate": 2.1236295431670275e-08, + "loss": 0.4186960756778717, + "step": 898 + }, + { + "epoch": 2.9669966996699673, + "grad_norm": 0.22738870735932892, + "learning_rate": 1.7844891634113402e-08, + "loss": 0.4529160261154175, + "step": 899 + }, + { + "epoch": 2.9702970297029703, + "grad_norm": 0.23734524364327658, + "learning_rate": 1.4748225020679851e-08, + "loss": 0.44012153148651123, + "step": 900 + }, + { + "epoch": 2.9735973597359737, + "grad_norm": 0.23103066951863727, + "learning_rate": 1.1946341267263794e-08, + "loss": 0.4775368571281433, + "step": 901 + }, + { + "epoch": 2.976897689768977, + "grad_norm": 0.22618868632744704, + "learning_rate": 9.439281701704162e-09, + "loss": 0.4465276002883911, + "step": 902 + }, + { + "epoch": 2.98019801980198, + "grad_norm": 0.24271367480309458, + "learning_rate": 7.227083303180671e-09, + "loss": 0.4674132168292999, + "step": 903 + }, + { + "epoch": 2.9834983498349836, + "grad_norm": 0.23142674174926925, + "learning_rate": 5.30977870166316e-09, + "loss": 0.4751841127872467, + "step": 904 + }, + { + "epoch": 2.9867986798679866, + "grad_norm": 0.24061959007170008, + "learning_rate": 3.687396177434188e-09, + "loss": 0.4587743580341339, + "step": 905 + }, + { + "epoch": 2.99009900990099, + "grad_norm": 0.22301171950064888, + "learning_rate": 2.359959660667155e-09, + "loss": 0.4815826416015625, + "step": 906 + }, + { + "epoch": 2.9933993399339935, + "grad_norm": 0.22240717058445192, + "learning_rate": 1.3274887310732454e-09, + "loss": 0.45863479375839233, + "step": 907 + }, + { + "epoch": 2.9966996699669965, + "grad_norm": 0.23321307876392341, + "learning_rate": 5.899986176260974e-10, + "loss": 0.4888804256916046, + "step": 908 + }, + { + "epoch": 3.0, + "grad_norm": 0.2343821134475686, + "learning_rate": 1.475001983131108e-10, + "loss": 0.46804267168045044, + "step": 909 + }, + { + "epoch": 3.0, + "step": 909, + "total_flos": 1274755977576448.0, + "train_loss": 0.5495063810720958, + "train_runtime": 34913.8374, + "train_samples_per_second": 3.33, + "train_steps_per_second": 0.026 + } + ], + "logging_steps": 1, + "max_steps": 909, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1274755977576448.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..f7e3756 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2299c356bc8becdedc07e7f99b268be8ab4a91e2c4e99a06f13c8e908a3188e4 +size 7313 diff --git a/training_loss.png b/training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..414f491951500795448ff7a1e2c1f212ad665698 GIT binary patch literal 40959 zcmd?Rg}Z}xQ_mL7>E!AA-#T0@fLz$1t17!i~tWl z5$PV9g8xanDCxP}cCc`9H+^^?Q8RUMv~_T?wKijOyZ_MH+QI%3zmOom_ysmA7Z*oo zNdW=7|6alG@X%7A!NB!9j6&#m?T#~okei}^Fj;b$)(Dbfb6xSWmdESA!|omi)5EeW z`&KFm_dS>6#l4r~l(2BH0*k&_y$xM6ucqeVvp$!>_mR&9xhs$Fpt4t*A^(!^9ebYO zm3FfJ!5HRcnR^E72S;(PJ~1wzq$gbc3wY^DeOz45NX&lU_Do*pj7Z}Nihv(w(hd`2 zgc5$<+#v~2gr9U0Y&Q63H%}9C4gM+oMG!d!|Bk-+|D#_EH&fHlj4v1;dlfEKnY9q* z=@q9L7jo8@OxyL+KM$`(EVwFXqs7Z~#&k+&!lsnl2YkS07;Kq#`c<*jax=6n5wo2_=n5-@Q zI*ogLvHRy-Cicfjtq_B2IN+N~{yIZ@7EHrfAZ z7nAK#Ic&eL<5-Q>4p3Gw4pfj-hL>cGH^s^Q^ietZTvnKC-Npm6a2fvE5dG zwRf-&8XN>0M;&!_b@7p+!No(rrI?tQ@|eDb&EQi@W0NYW^$Q!MD7mJgarMd-tb+0hqI2aqIGvcGPu5g8m+QX@ z)ICl)+Q@AG`Zf6Ty++TOH{p|QvEkBNmpY|`i_O~_ZAHx+f^c`Ho@(p~7p_cwepL8flJIcbnCBgL0{SlI-5Sdu##%0`oSf z#~j61-Yomz@9ypjGDIheJ8>j?Oc1u?Kf~(F*SSxHgCUnY5DeHkQdfH|PRaF@(0S&2 zOB<^{)2b^my2b1SK@-KyWcWvWeRVftMPAPooZ3vVFA@l_60cbM!t1m#t9h`Va@<_# zx5@i1BLjo{`1v!#Ty|#+yZT`5YxzP!Ra`;>moLxZAF;ZIj2A|c3x#zo{4aInS2}0a>d&EoFAOHNz|281Zt=^YvuO)J5#%@R)&}T z==k{fc3K4d0@1My%Iu%V>>e)r$5t&>5`RtLYm(U>3jXoqmZ9J7^fh($7+xlvo}M0) z)rpqb+1dJG_tEH{9<77TLjM+YI=V)lxVnZ;xX4(=OmY+Y+uVXmqS3uyWnI0!p*7yy zW(&Xa%aPYT?YX2`YgZsNuJL<|fJD|4AC6J2i=5-(Y*|F#i#977WL4DgaV z!j^rPy}K4RcP3enBWQSa#?oVD8yiE3ou;4bHrkg>CVMY6=+>^$< zXoYJc@mYN+%7)z-(P0q1YL8+URRslX{>oVnpWUyHPphxE`tJ{p(qbY~eAXVh7#J8NiCcW5FV4!kw&7Ki#uGVl z|HWM{^*53t6BSRNJZY9%uD&!cf>Q-;p=!(TAyV0faW4rrE9Yf*% z_c!~n+kDTt?t2#>!N~}9F<4okUFlLFf7HP0N=Qf;IjWu39idRKcxVd-hJ$dUebM6M zoxRHJY(hAgdZo5^RGShJ$3eTYb73D)M=tD(x}B-6RI{umHvBG;kv_}tjkM>f0Dn^j z@NN5=O`Ta68Eou6V}F?^PoJ7s)|xEz6}AQ85*NA5-I@~xM@Ky+BO^oHV}%VluCDr- zsgX-&iT8Wa&33qw=K1sILUOfz3oWhDj841LFUJ+BJ}r6kPE%;G*fjy>x}-isxX+Z;PGwzjs-a1H}(3M!N( z{e699;Gh>h=snLQh7l8Vu0lT+TF@{bpG}mt22Xfx^B&6u%9b!^K{}qpSe@Uey9kv^joGxTc-HZ!frvd^nlY$6UEFysGCh` zK0DR(VxR2O=cnA0?eUx*3;AV5rcHR&kk!$!vN{$_J`sPF1%ntmcZh(`oi_46bhRBS zr>r}e7FdOh#8g+8tf6p3gRc?-)iM(A-{_2{M@1}O5 zDi<0CBWYPX<&J&)jr0p~bsAXY{Fr85JIAqZ8aL~@2hJWP#avdwA-vIyFtr)4LdS>^bz<~k%Z!a(5%N@>X zG~v~QbLd@2DC&bG7?+a5y1hQ%v0rEXlS^1wc&wz)s0nPzH*9q4)~W4^MnV$jg-x$K z!-@~1-x>3YiYNf6u?!3hP;1wZZHwG|OJcv2r^Ehu>lKTX+l8sp^sBJ|B(Fs&!THl> zyuSTsH9{Gi3`5m&4kjx&sJ9nEcl4HnTtlG$-ox)mK z8Sml4hhr|1O|rE*xh=SKh5=}7dvFeGN0++}@|a$w%Q;(7gQ3pcvSqo{yz)vze(C+c zAD!g<#800-tygS@d$iLgmzI-rero1WM61$k^MOZ=k@Vrl&&Wal&0OQl6gcPN25m(F zoP>U|(e(b_ZS<4Rpp@s&mxLEDCQ%#)&t$791&^?@wr&`y@y?jc$;nySTPVzxWpQ77 z+Z{tApC*pWm43VuF4z2`?%@6`nD)*`L7X}3)6j~>e&?RNT4q($*J zqVVI{Ginr^C|$o!hry8=SR~b-lFa|@n{K!yoYGZbn&-^(a>wm!cTAkZ( zHOhEnzT`YVf7q~n8|%>u{uwSV4%l>JI{wGvDfR%G07A8*%%*C&mJzVMga5uk9eIP? z`nOBy&b-%1i7{}fp)qfZQBqSQEw$|rg6KRMB~UxNZVpDmgTp}Px6{O$_URM#CCl&4 z5TsZCBsoj~+AdAY_`+8<6qIuK*Rp&%!9dVqMB3+IwN2b>!>U~({qW5zQ9A~meEke7 zY;5^g90+23wD!Vj)OUM4lDFUl$i>8vq4BaUgpleAt$xQFDLT~8lfCB8O-)UiK!nvR zcQj2gW=u1^ze3AKMQV)tY0J=%p=NqF&wwsMFjz3v>3{T5-0#4n+e$d{DVN$QNXeeP zv^3)b^|qt6cbyDVTfPF)=NYCgApKT%v?jmtRXbzuy76sa(SaP^LL4>2k=WR<5<0c`iQhT+^=xP6am(FFuxEmW+~0+fMn1DBuGU#slyBq@lMUM zuZ9nrjz*ds?M!+KNG~sHDRv*f8g**90~;Hv z<%$^=A#U!gJqw!?JFAl|=$1r^Dy7Xj_&Tq5h(%q$e%%Z*fJcW&Uj&?moId8o<(|>w zBlo%G&cFR1X$tSug9o%&|9t-lFZl9nai_E$z@T?Gmw%5pg|~d{lr?}kav-H4Z#5rz zW2iLsWK+oJ=H|+QYOD@9cam;Rck!Gdz*>UH8^>lRo?Y|K(_|K$U7bA~&RInzbM z{2&XWDnQChVvaOS3p`RWq@aM`XKyZ(GWN8TTb`ML3b6Y?7$piRwy$}{*Sp0TH=#M;^#-u^{A zTx=6o9MLO4@0NR$(?G#-`BPF_${{CbyvM{EO^@ue3I4VD*njfKu$SG^zBVpoK2)f} zWA^`5c+lwz&p%2dh;73z?34Qa;>w|XZts=MpV9LYe%+`p)SdO`VZ}{t?NHDG8fvNM zuhK^0Lbe|IDouhX4=X8wRe)xZ+WR2>=~L|BhWeatLGDNnLw-6y88>g;3Iae9J7Ccn z|8KDAhZSS%0C`i$qgpQhexY#KY%8iz*f$B_!Lh;+QgX2GbpXB!qH%`A9GKgIDTA2gw?a1CHJ~02fy_sr{Nh$ior&O ztf{lWT@j+LMjfk(u=i)V3F9--acC0HaZD+q$) zQtiGy@aXz`7ft7WWc;r6l|oE_%v0cCj)86p0k#8Hj&3x&LLo~mzWb=J*k3m(faoVN z3S^^Zpj7|NX7L!q zln)B}Qp4yzxn4DTNVYxdFNX?0Nc~R@xoCjaSA7q9b9E;H4uO8O1vIm+ z5khgqBPwnz#BU>=^?DmEhUG_4AJX{ zoj}dTDL){AfVHWt92~;iCIdSxqV_$1)SIFYnScNDfn!H?^=0qBM+tFpUCT2#6d%s( z0pbw0@Be^{G8z`oDat(k5+VqU9sm|rL7*aQFS)twNCp~g{A2ap)6e8q0SklyG=eal zF$l=0)L}H{-0kiIFo6nT#&&>cUr|9A*pbjJ&OxJCwd9w%ORij4v<>}%p^Ney+!(HXOdpb zo{3BCRmkY!lDpM|2SIEEHMOO1Y}51dNKl#PPiIOjXdjG)bw^%>oma7gG3ZgF-|O%6 z!%OK5FZ5lsLB$3z>k2RrHVFxdKh8sN8nOxsT1mx*kbIpel)ypC3ew44+;qL`HJ4Ei zXvneeu6x6r%LL4(6`Nc-0QR=LZ|2<*H%QlHTS_Fl`T6<4M6LiEs?)IdHMM!UvFS=o zytg@|s5sRJ>J0Gd$EPphaXqyz60q#r^^sLgmOkBQ=uGMd-cG(Xy>b2Bwi}yVT{y80 z8h(|tQQ9=w8m(9BBLU|-*>BJK*|TS3l?w%oe^t^ktx9aDI{r*Ky#bt7(ErHi!krH| zpb|7eIt~XSLcW#JjhN$}1e!0Mow|8v-ggJ17UxLQ3`30q0mu|#!ZT!9!dHc%4#P2y z^gB~}!zRX9X!vC#322g6%XKai2t<Q3O3$z|+DzESi9c zqC4~YWmNP;i`{(>*BNUc%@JudrYI`(@D^4FfqKwnE6NFw9+Z@jrt!EiLssva%j#&;>PuuF)b_m(Xz0n{*_@5jwl8f;QhjGzaX1BV)StI9}`c%$yKT{Ws? zZ_j_~)TtuJ@hi3ipRPL{gDu`dyo9`i>U6-x_vW56y-`+0T*R3YC7ijEJ^#E+j5*$! zjCBJ3Xnwf2`MbiI`@6&p6N*1Uy=58Wjb4gcv-o?#rcp)ei)LRdO}(O`Wh77JtfMF1 zbSzJ#kg=te0NADw=~2{?;q0^0fS;9@*9@ZQcK9YJ-PcgR7`qBksE=j2v6e=4j`}y?*_8ajIKS`&EYyOdEF z@ArrM#Aw+2J6uHvvYiNj^~#mGAy7kGfYUriwNUS^zYP*IuY*$#rnqaopAsT$5vvES zq*Q?7H~WlZy?@L>)^%De>6<`xzPTOH56_%E`xXMDXi`3%ZqCvidCd5u=&KmyGp)?h zT@k`Qz6-Nu>r|BS#=5$6kn{yX4g(hKQZAjiHa7MckJ-nC<^tYb)wZR95@OJim4E;f z5fjtM?Spt6qn0E(4!V4o)q!jB^`~6yoSfLK{s%&6WaBt@j_`QD?l?-ofA_1$QpJoT zq{X+@)l48aApwvH%sXC*z#b@CSa3abc6I{jIsrO*;D#QmY8>pYuYSAKnFlzDLr5>691pY-{Vu~#+)fpDmSltg^^aPcv-6z%={_d%*V`t)gRv1~NK?`T@U|1=aX*Onb~%{Xm&opcY1xd3ve2Q4!{ai z8G7#ulO%_D9RA9`7?g_8g>$d)8L+$4i06#y-o_kShd`n0+wAP?d%Ixqj1_$3<;$0e zvc8oYH9EOUfJp&aW66rGsiSF9Pywjqc1VY0rzl@(u9Ao7lB6 zVh$rD(aOO95gIK%vU79eCy6<DWX?V!d__W8jWEn=V8{~OZvi!Y3 z95NUHN{)0v8+(SP9Ej#o0uu=a_6=z1={c{ivh>!`{xV2&Q5%`n)jskC*EtFFgoLn1 zB^{*y&~RZRk`g!&^oL9Bha*v}C*}56{?47#pel;=^!A#rO?|GB9EEd7{`7MQh_RIW z+skbznnA(p&Ol#n*tc)0NIe>3Q;rX2JRmMOPPJ2>pgAZVuH-xT5+iZ(@tBYOk4YIt zQ{TVG1GXq6vjyDO@o=+mGK|r_-lni743eVlU@2*=+)=sk%rfj)D}>+`h_`52(*w{d z4R72zzjR6D2rb)yA{*ile>*9`us8@an11Z1_QwnAIviC^BAmQ-vI(4s-$ka_M}VNs0k|N@8};x}sFt2oN`U&HN~w_C5sFk9?aMKvyWiafohB5~ zyoe%jBBBFgwob`~3(}Byc0g7MetF5#408HTcdTDe?#vB8P74}GZ*Qp>RyneIV|Bmz z>`X_<1#JINQ2FuEK3cOtgAMi=>sHNbE8}X?=m8#-fihfJz_#z+y-Q?qPSv4gF0^|Q z<`dgnI=puFIO_4Y|uKaO|_36 zO3)l;RBE10*AaH+*6>r&7clxHM>Lwu>H>cRPd$)3QNL0|VmvQHqztv|(bv zYP-8pE%W!{R@`Ce=U9v*!4I6n&bFSKQ|0+lH2}@_40Lp8k!$R6%6L`7{556Nk#G&s zQCa5oW!7wmMKge0S#@y!|NgZ_3EASGW6P^pHAbCZjHyWi6mG7pV4uuiBe(aWa937? z5*vKBLG|?evD`ZyR3P9;^Q%Y1ahy=hO-XtNOu{q7RA0(ZzRv(3Xr`d+9X}tyUr4SV z>mc}F!@cFn!m;YIe&-ozNQmM&-xe4IS1jlk0oJ#@`YROQvC*MxPYf9zfVqY#iatSu z^PH&B?dUk70(k>r7qa{X;Q?3LB~9vwm=Ot-sYdLfOT1Nx?1|Z#qKNX}2a&?~aaDm7 zFehVlP7i40m+%0jDISKRSLY&I7qI?wbr|}=Iizu@hS`u8UAL`qaGK{fV%W&}Bq>aLc)*<#Q`L@B ztWePDk(>QdNAYlv22fE#Yl6i8T^fH8pJ6JcOVv;86b+_tDkz9GjlnoZ5VK&>Dy0s$ zn3}*x^D=n#rdKk%>$_?=64z1uoh1MxmCIk1r{u7;I8L+s2ahcISTfo;y{uf{jkDMzln&U6RD>B#D;vng&X(pGpUo$uz2LhCY`5t1q^2xgr**&GLSH@+yb1$`n2$qhjFwG8I*@{DK@7py=QUBR>5fWWDcWTI4_ z$Y5tpu$XWjdQ*cz1MVNBvWA0+fB9t{8yfny;8t;VE1HNqg`t;c{0Cn#y~ankYSkQu znQE#IqR=DwF-%uHHx3=sf-HFS?t_r5k zCF9Z4lUfL%ENh+^OwHCsA}PptGY66UKlfd^iQDxZ)xi{d@Hx2=6|1a!2F zcuYoTZTCYD%Aap9v7vzIG}&qj;yt$Cn;J?h98R}B1Uon2KMp8lhLzehUJn=!g`>?b zJ|N-q*Y23T28H(bM@#bB!$pTl6j7j+;(FZyVpHA#CM?uwJ9BvZSbJZg%OoUo1%>kucVZesc4eI67(C5yGUj% zqfqcbGO+6~AcNh+9``ETSR?p=H=t$}=oRO=bJzEtrlj~QvxpV3I%=@W`{Ndse8rCK zb5|}nVYD%tjL63iA8y{honD>~2t_nWtz-`$L4_TCef@WG)6>(I6Sov}FoWN`<}4T0 zJ5N{Op9!dthVs-(M`71lqAPvRxaF zHqJnqtObR1&_`e?$S=oXO;hyS*&yw6%CJFIIR$!LCoY*`jm0oap@bF9?MRW^qCWZ; za@id=BsC}J22n7Aa-V9TejZSJ6>|QgD!n`PbY~P4X!brVJbAE9znbXnZE5)e9CU-l zqZ8T3fcajtyVaAmxK<2JTp+o5BZWEsIAwttuRoYlw}08G#j;rox%weliA_cv#gmFP zkaS20uFTR4hX41l1&)adA|{vic?0bJIgmJd0vha05H_Kkn5mOv-GZLuxPkh1`-Rh3 z7ANsxq6vn|(nh?){uAnVAVOXW)DaYgS|5a60dy2N=UumPpAFPI5}-DoXht7Ddp#gf zb1Yf8>tB5Kfm4gQk9$)B=bNY?%#m6EPM3VbvDf*4_b_u=;~l;gxHlpQ9gyJ;Vb>t@ z){_fIM5!A0XwX1)G7&3btWU$sT5Je|tG-_J(RmZ|aGN6jX!vy)`$_6ptg=Z@QuTvJ%XAxcp@2Oh>&>>3LAIaxc~FXWkyN- zIJ=XSC58^}>yEg1f|WiGV~Tz*Dgj~S=~IH4H<3fYqO`)Ed_sybAeAIIO6vYKefsfN zI2>uF5%564*(laI>xnI3LsfD9AYbSLk${eO`1L0VzXft};>}zH zOFk{T3hVdhe;)#1NbCyFMP41!{Aam&Fr!EE$j06?_J5h-WE&e(f3h`X;UZdPF4xNj z|M{R31a;-LCml$@fao1W_@A{`Au}lFDf|CkaM>ims6@%1NbnJ6us@FzH;>rIKD3f~ zRF#DgfBPSg1js`#blPjA;E;w7L6Z87|L-pdwV(}G{ys0l*4_NisaVnMXx4Sz7=H_~ zV5A3IMACto(s~K>B+g@$N3zH`pg<1?QG^|ixyHp_X?;dINp8Q{|h|Dmd;Ms;zrpmy7ZiI(U zRO^3YJua-1rwgx@>LPyJiQt&VxPk1TWdP`JEZsQVuHBY_wivY4GE@BxLo+X9W#bm+ z-pMJTC-Fjj{uZ~Hh$dkWgHqcDv~|)Ldc&@%sGvP_m)dp3gmJM!~-#L}?#nQo|d!J7MZ!vQeu#X_Zk$=&bf;{CrLS|PV zSuL*dY~e%f;~)~9rGcVSWLOv;f)@W!l@1$$#zjaz`0GEy@^FIX%G}GP5#*^+6izTL zGJ`=epi8h3M*8?*-RUb|nF&O7s0#Dw6dI8tXqO%IY2s~9gk|BR%l(yj%`#U?or{TS zRTq|gP5(4OQ168@YV`V4T_nZx&*=;Lp9oxCUD1k3{Hs?)2r3VvS|7<;FR7YNx?EYA zRLu{&TLSfNC$Ng=!{tI`V9^b2Az&|)TWjYGb`)s}uJA7Jn_g{LpnBU&@D~>iH$sdX zL}MtqhFbiMQM4kSC}Km4%1FW@BBT?~N$&j@v;obTm1cdlWe#0GzNK8!2+2J-FN205m$#7PcC(UG=Y;Q=ZjwcY71S}$@)59~jegIK;0+wz=YEMG&pQky)L>@uq+YmPCW6p-gN>|!rZ^r%%#R>I zZLdAQOhm(jg?B#S&h;aUf!_SP!LHZg24PhaFI-2ca-lpu;liCDP$S2oJrWCHlJTPU z^z;n&aaDMsj8t=s5$7N}B|F$AN?=4Dht{;>>}yNOtf)P3NCJt*pmakDlpbxXghr_4 z_@pFaWM|p`IEq-H+A*%se)d$QNFUHTlc)~c>m*8T?*8SLnUuKpct zjkMo^IQUm|WF(O)biyJlDB^*>DL7c7kx^0jyF}sd<$38-E{xqD{03^@vp=YtQf8mt zkO)9ae{hYakx>j3=S`tb!^6wllqh8G2tdB6+9hlKW3i=t>hT^i37Y9${yq^PouTCmk8`Gt`ep-o>wivqb! z2)aT*!vI8SK)2M6^@k466_9gSQDK~nSuzBzuA}We==rO?SakLEbZN?K7ZlGIY;(d~RBlI%>AU?o= z$^-(ju~zIc^gE!j$IyM)mFN8VMzn3H&}W)gGdD~jRGBD1rVIj^64ePWrXES{#C#fD z^nm^Zr-L1*Y|?Ll2O*Y~Cw7iMtSEAxQ6KU@KKxJR~mXhY(fa#6~L(jlT2I!n7%j^haYHunnb_<*Ragl<=NHtltKwmX;Qqj0`i3)d2F}&jv3dr`6Ex zTOUrw>L!?Qq+e!A(>E}Phd$?o1lr%WrI+@8Gm>ki*AAccJ)Yj(1y6dNXVX@p;fE8f z6CD0aHIR*<+}^a|bd4xD)LUCYC}~Dg3gJ5nKlJj6T*jtp7`k`Kg#iQl72g{sJd|+U z1lXYEh>$#Uik9SSy|0HsGDu@C`DmljjtQ{L?E*m>c_i`GQyj#@v(1_*%T^-&1c@rJ zVviB9r+k(W$+VPhk}FLhZ9FmU%~pV#e9SNK-q=b}cp|T#Wej1O7qxYWA%s}D0HAqK zbc?V2fqn9)v#)&!Jaht?-J*>4Y706%-wVW|m|hT|i)uRMyRfb`BU##DvQY_tjwj!a zDD--NND%0vOjCgdP!ip5iZkVJ(sekmY1gkQ|0{li-13cZn6NAJ$9JiIGXyzU&gAZ# zI*WQD1wgQG){TH2O}c+;A@xU*zlzk}`pQx+v*t96{OrJXeS|K#8J6rUqfq}=?yE*| z{fG?tik_N5d*@viR&_DgwD-M_4r%hSJzt`ihM<=+SQF^^s?2}{jyRs-L6*+jH1bMm zOuF1|t2aU;7^wtF0B4BT0ro{ZWu(6Q!F@#9aQe9RQ{YKy{1YJKKVAkW`bl>b;BJ7x zB)VA7uuNQ&pv?%Dt5&wIRH-M46XYA(lhqcCe>qt`vCrWwZ zK`=}t)nmyJkru*^^?eR9ioIE`)~ zCq6ICn?eQoTY&s3lj{C47uLoosY-$vrSRmRN8K_$il*LFJ%YXH^rrqj7hTM@uJGg= zY>}KcJL#Lj{O%g!8xQY*Ceb$wuxzE9hOH9``Q%>iP#q>x)&!U>Rt=4q8pcp zOzxO7(a5u&W@kJ3Mzwd5WT_My$5MfBo%Km0sL=_&Dv)%3 z*e&$~t{Rzch<8f@f=#T#z&xZJ16v_(0)CbdqY>Oo?R&LOUPL34Mv#A)2=Q>%IO|uj zN)Xdqnmuxr8uOwJ&{9FfjOg5_)w{x|@y^3xlBG;?7C~`_WC1v zohR2W{KS#ZXHk2pI8J zKPDTk@92rjs%_4s@ifj*Ipw`)z9V`cv2`q8*^gBpYja@L$|Ku)#4%7cL!TVkC3ln8 zMtLViAg5?mUacqRWJ_7_>Ot?(Tqaqh3jD=?%vAHaU-Wk)38TNyC-pu3+H2ys>a0_~ zJeYVztAhbo41-lra;Y8#Yfxt-2U-Nvj#fFI@+(Hnk&7_G8Ky2*z=;Tr}xayJKpRp;@Mb3S8_df3Q&RCeGxbV8Kg2Pas&m0snV8`^h7js{iM`O!BKl7&JBpuN_QE6OyO5M{PTYRUk`%Isr*nKauEV0Gav zdgN-N;4Zl=<*duqkzI3(TA8U*Ie&eu6rL~{0VLwQLVdcgu6Riw00OHl@Do#E$|tXy zPD>!_U#~X;Xit3hoJSj*k%`~Ov0QZG(C54jOOFoT*NPfhIBu6!+Ezwn-hCQ*TbV?J z9unbaLP(5Y{QD7!*YVq=zaHS^y?hzW2wk#U(=B@9T)4x5R%R_lz@dgYkND`2Kt=bdH7bc6lRBVX2&S1=BiCtuh zcjZ@~2c`o0$m3~!GM!Shh0hQitMF}#=B~Xl(|-73FbalU-CH86t|>gf)HS`eceD4$ z>5_3*Z58e<-A{16aoo7r$aC(@jB9@lqp}%%?Qws`dd?d{>9A$S%AVqxS1#aLG?0<- zkuwG)^e?)pZcLsoc&u)x#mpO@(j2xYI z9Ug^S?^4VUu21fsTJHSux>FpX-N#FXt=+HO{yxJUaC{+oGv9KLx77Nc!~T0tLjs5` zGO!j*)Mbg@+k6XiCGq|=?=1$|2##)~_>8b3pWG@mM{2|;_vzH0G)|2Cl))6e9S(i! zD&u^rDKwM6jBfA-LwDE6&!w~CMY!Rg^%Qe>a;y{bw-!4%;%c>#eFriV`TNdv?zEtY z5Yh!c#=^UUE7<++HuH#9F433!TrVBxzA;r-WS*+}@~S8|kC0Bi0X1e?{4B$JEpFh*(<30SEs&KLI;O z8lr<-ee{w09sAjafcwL|A;EKLfTYqQ3I+I|{K3Mi55w-=+5dIhWBDd2!Y0>3g0$bb z_*Sq5D^{a9yOpfPv&XICxBicaCtmpaQZlz}XeB%u>Dl%~opbsGysPz=1jw5kD$Y^7 z3v$rSeE>MAxGoJ>HYI|y*G^ooKXRK)nI-0VTj_|F6n2oa`N{&w8|k2X^2EQeZ3zAO z!K92{i}$jZoreBR#z1N%f9-`d>wZ&K&f$TNac2bjS@D9m4_w7-c*u!Dxfhry3x}zm z{Wc?1x|%ac{rtN}ctFX(igsCj&2!%8Epa(qh%ErGnoI-7yi#VnyY|gjj8>yE^iRi7 z2q(oEd<7W}2@kl+Uc!HVO(KA+2Ja33^yNUjZxwEr{K~y9K9(en=_{Q)mX*Ur+BHNW z3Eh>H{rQJ=;@R>923b^(?O_zuX=pe*@XHP76R_rAx2>$D=76g?8Vm zndl5Go;niBFY@$0p?q(OL+i#CW>pXpvdV_bub@d%08bt(H4kb8191q?3zLoG0o5<1 zgks?b8tBVZj&;%|FxHK&dQ;IMVxuHfjo=iHKSn)G9y*$<&){X>Y25d zR*iex`N@?S_s>s?eaKA8;U)Nrz%=@b^_03ChCxHw-Jc6zDIBjMC)E?zNeV#>^65h| zd+AaUEpgHv^1IuyV(4$r|I$3c;sy>O* z*H$-u3U!p8;mc1u7OaWS?WYo@N;@ijwS$)fU2cb`UEcbBJ9=lE?)eW?N0%$GoNf=V9K`+ zZsVTF6Ut7!oygSZStCnnUr65R=fT8mF95>Y!@qwLmzN#Au7t;HpL*4oE4#rk5K-w( z8Sz&%R=4@;)#)gyXWNAQNJ5X)%I;@ndKU&m5ixT4&R@QZcV!m!hm0rzLIE@tHyjRI zJ+0$XS<|i}s-oTNh}GL9uhjRI9rVfhR^%EIfRj#ZWAR5mvpY3UXf5rrBOuqy6edEm zUB9LSTe?0PJ6H$H-8`zwKlO9{$rzPVwR;iXrS--3PcJ#P*j@I0ej9fOl`hG;8K?`7 zsSLl(E(?k>1$T{m-9?o(rBL`*El)ug;~;)BgI<(ewvLS_mV86a#gilaVp(5!pS$k0 zQGD87z#a{~>ua1NVmD<-gt6SW9}@gt+`P<55NYa!LUpKPK&z25R1h;~Pt&V;r9Ip) zm#-%bsffz7Y|869ALHcd13JoBO|ZyspEOLTy=0(qwKI+y*^9r@9O{q+YZOG#1SR~b z%CRWNGBf^FX}duU?(Ha<2JMa%Nw{ZWr;Z1l7#!NK00SfShax~gYHQnv*G1*xG%2_pDa6;V zckvEX<1EpN>&6T}#^-+t$Sw^sFyzCwh8!Iqw=|Z`4{v0d)X9%3GpN6EzvJw)Z!H{^ zd++i3?)@(kArKiOhI?DDVQE~qE7!G)NtU+#-1OvUK8a}>kD0gilxOr?=WQH?3VADn>p(A2`rB35J&vO(G8R=n|LLdG;qN!}4krs)kB@3~M zJ=K0us*=v%3I`LKO-TP@e3X64N==I4K8+lbW#>F$ETs`1QfI=@z^ZICdN=T=8uQM| zHM`$OUV%PYrE!hCk?he=#8{UJD+6cNlsrEQ`!L_U#p)X|G)YCne({#hu!rD5P=d!m zTl;6$!FStr?A>8Wj{vg(k_}CWQE9@xeMv|{3KCyr3#lnB*}Z`)P3CMAcw4EYWK5SH zKF`;+m^wQ?E^<3Q?Z?wj`(Fy22xO8cI8XLCak8obF?~c@voA*A zOOk>;xpb*Ak~eJUEou1H0BPkMI_kjA__`=F8GBt(Sl^MrgZhWH$hD6QIU)%~|33GYD4T)z z+Onw$rJBPdv#A>v{tv9&CxFbitBIQLhHP9~Pld!q0Ez2;HvtgCwQw7w54h`*Pl^>p zXY5q36?Q!#NJV-Q$Yo{WFcMClX`_L80uSu`2lU)mb2%Ou~<_7Grk z-P_B3{&9Vg=Z0DUPo&4%aC+oAfNUbb%0CVvSn)C2x0FZ?8xaH{!RqOf`TKZ-rE%jnL|&O9lONt?#@_I9gS*wQ|Jr# zgr)}@;S*NQv`yN?oHX@QzQk2SM&&iKuOB$F_1VYk4_Jv{9jz;!P5$jc-S0-FqwzJe za@kp2Rx^aF)#8`0>$?Ht`}wm*d{Bxxv{59*prs*Dse|B)OW->oilMd=(IzG)hQ6n* z=;kK+SeX;;sDw7>l()IevRwDL&Yzd_dhYv>`+onsfb_-tFL};3u!Yt+;#`)$5rhGT zXn#jkdG%w0y4c9j?YHz^e%{8Xl9vpde);(jC9pph*{!h@n(2;*OGD3DLdhE zG0rz@p{LUb{z~?S*qv4L@O8(H`L%SFA1&iPv)-5``12Bl2CRB zFbH8*plpQd-K+480ca%zZPAC9I76WUuy^t~F;Iy`OA-f2Y`O8b|fZ4MpYcpaka3S1Uu@+^h57e_C0+ z$iGReR9%!$Vt1DX*t=itxS&s3#IcWNXs5cciN6x@$nV>4J^t*U*GG`Eq;D0z1A8f6 zc2ViE@tGe^-|6=!p9B@KS(Qq<@5cg_Q%=L%-!wQYy5ptKg&{(kU;8*8`6u~7|0Bs` z5!Sp1+Dvo=B{h_uAFGbe3PS0JGz(t7NP$;Phv3@)%IDrEp(Q%#hxQL>F>8R=BI$Z3 zkKx-mQj3cz;iU{bw5XPr#&N;8mI+>im2T3N-I(GwK;&JWlpXrN6?cjfT-b|H(odrl zyKw&2H>zB#rRMJ+xTJTNG?{WYbsHZHz81feN%D}DIlpP^M{Arj|2cJB#OGlii~r!Q z9IIxZ)eT#uzVwH(Fi!qGG9J}plj$iE3#E!$9ebz;?PV4LyQa*NrRH(z{KjzdVQ65~AjPPuhE-^BjG-1AX0>1N3ls z+lLu_+1{!|zjt5!jk@?78SRW;zedP2N$(RXGZ(x{%9QNk6_dS02^=#1MczvIy`sb`~y4M-#udXqa1_$Y7qfT3AcX{~F9aA`t}qkw2CkH1_;A_DoBDLl8kg z-uYWdkT1H#=MXqNK|W`c^1Ei|eWuy$+KywE_uKDwma#4V+uCodrLGx?@|NBxJII%Q zidj+R6J_UM$X8ZG@(K&X`xcZeEZ$FSLH{?CtdHKNEVoAT$@fp7-)@7}oZwX~^tJ8! zMK|=Tac{P`Q#Z<=O?e!Ml*-SHizWjkbFc$EcO16?Dw8GA!194>iA)k!mh#W!huy z_;W%C=nG^5hOR#wV>XsFX11^DV#gQ1Mxji_ndoV}xfpMO-K_-w9m&smZ5=9YV_L!3 z7Qtl4yJ@GAVt$$G-l5VDekJ9tL^Hh`qVs;oTG>_x}ic>!2$4@LzP%B_PtR zpn@PJ-I9VJh;(6vgS>GWqdcBL-Bp1a1J-&YWmHeEilzb_yvR-Badrm`XQ z!^&ILiuZl6giK97sB;xl7YocrEV{Bvt8`j+Zw}5o$uB4u5jmX|(`elg>XIZS5}j_$ zy-$&2y!tR1#|3xm`?H$IrKEdXpNT!Lo!H_ZrR^Xtt8#EJKx+3@!Gm zD{+lBcMP!+ta)b4t3%f4ceZ+$?^fQ~aC7zdTRB*%Opy9g^x4AL!5H_x<+RggMLA%8 zH=SD&R3C8(&qc%YFf@D-l)E&{b)OBzvxiJ^(}4MKzg+0zbUUz6z5SEMwQF#Jp!^F7 zHcq|kuJzUYaeU^Yua>T)PU4=|&)JY)UYVn&4R37qZ|nMgX2}oYnwR{*3UV>9_+tj5 zAd{`&sEPsgAB}%Y`w6g%$b2c-P(Zpv(ga8A+M&eoY~9YE!R+t9eucCR_`Ri#aUsBN#`#CVo4hr-O5e zo(SKaF$)zNf#mdE`b9$_*q~Iv_0LVD|F?C)X&y*-RKyJU-6LcekVevhiHR^r-%VYl z21JjA z{wBBmeJCk2NYS+-Mb~bjOQ@h!>HgIcWz;=tq`d{7cmm z**?~%-_%_WoVp`SWaQMmmB47zs5WQ|d;&l~r_w~%Ga4cEP344e!$B!b{HSE;^56w% zxtg&3F9p$OW)|AxHuz-jl?<1N^&ehJRZn==-&@@C1REyYI@$P}Ao<$Sc&#*>i2keA?yC_WP z1-L}{<#7WJx_cD(IxL1le-mAK`jqdS;4z23A1;wqt9C7*<7NtBVMXNVMAeyg2>?Wo zxl3EvHTF*SRN8zq(OQsC>xQV`PnT^AY}DtIL&2b@6VTpmZRdIbO|=?*$9QOAVn0*X z&-J((?0DUlQY!yzcZRYb3YPIr5!3aH)4fV`RsXU}GerLSLInYrFIl8~_}LXLJK^sM zeVj^(?4(a{$WnmE65Su1gsfY!yPuyWOFHO+SVIgnSt!kggG`KrK|Dn$N81}N4^VE4 za{0Vsr8&D<0mQ{iuYi#~0Wuq@;Wxf-^VW{G?iAD59EY}h%JfXQ*JFsO#FJTD@dogq zQ_I)P1{GSEPb8-$=1?IzwE4vF(`i%JKf85uRG*~aCEUgxb2+m{0N*s0Kmh#hkcj{U_b2K*mM&W(HfO8g+bcf zM`Q|&HZkK&yex^kr89G+t*zetEzE+e#_O%VLi)XHlYgArOa|I>Vwws?I(Z5?3$qa|@@FRMaJt+(5M2rHR7 zo}ebqr@r|2l+9nZ5RqPNo49HmGQ8|$+;L^oSi>5 z>LZ@|6m=wD4S}}Ykewg=gtrY(quo5tsbdjJpmz^9+Bk1>Oa_36qU-u0hwM|ye$U8% z18-AO>^tgiunNr@*a=SF+;0RwgQ%f*W3vHAQ0y=^s!qdV$`49er(h)_#Q7Z*FZDFX zI$WbC@n?}Jp+%IR!mOM)5ks+@jQ!EUrmAH&gjj*a8@{-RHH^`}+-;GSi=eQG+Gr4lS=&6#S6H|&Uu_4fNbmC9k zBs*QZzzDJKP$~8a#s(T@$P3G0(o@K+sG%&NqFy92c(XkJzRvuOtMW!6@_rLqUhm%I za!vCU*E-yc3wi?4m=ckitgqZ=V)IUmPG0j)-)cGG-A5BuV+V!Q$k89^MENhe+6FN) zXs!l4>nWFj+T>S`tB*&WQvFW{u2#DNIr_#z7X%{#N|7ewCnoJwL#SBZ%2bZATx&~h zMM`(k4OHUFVv5AH3H=qkCOTjD%~S=8<<_M`W6P80mI4Q7iEkpzorDt~FRcfvmsrK@ z-PysjLBH%N7b<9Tt=glwc@epsDPKf%`&-f-Xb|Q-UmpzQX*9i6hqf}%IE23gL*cA6 zVR|!AgIEoNOvbg7A`L(;D%vk)c-H?ELVpBu^yLTgGTcbCdg; zQ~grWhb|%XnLkd(-qwtO{-{qcNbC60)KoBs&=HHI&}~LcLhFi?{%7u6+3haWyl!$A z`aU?=^I^Y7gj1?q4_2LdplwX0Amv^{O{|LythG}?atu@&$nX78KlVmmcj!yXI9A_< zP9gMSLflZQs0^)Fo2pe<;kOWhujyzA$jS2-wd{pg$#dZ~7k=5nr;{a6%l?%b|7NO& zebjzFzW>RYUN(NP4LZvEigriUKZoJWu^6rJ8iAN#c($7U&c5S3)CSfRUkux)DLy8a zWZTSf+%nx85xtqpB&KN!S}%56tMrm>@-ZYs{Rw-Hy$OC<12E&^BL-L5>0b}gsRj{a z_~P}A^N2sk{|5dzWfo-TH9gB%ETRY@ml-YFZJE6eR7xa=*R?)Xq*ISPJOIkn6JW!8 zfHUa+5VyFG4V>00L37+O<9Qy2N+W^{n62DpY&=BxoS2WPZrY4uR!Y2exo`gWUrL0` zZK6WT!Bs5T^Q{*IhnS)AE9ElQIED1Zagxt6@D()P;b_bEM!cV@#3k?_4B^fmO%M?y z0y=ZQwM(^Zr}9=j^pCmQD@hZyRK8jHR97P!#l2_>6WyBxj~z-W9@#avWbrPxryJ!_ z330OV;}GOwkp+^ON6}h2)o}{X(Y+{mugMx1I0Y;?;#4!MA=~Z&ML}@n6Ac{U7qcO$ zd6J=P4I%~}w7o8;odahv-pc$-a<8b_lRjZ2%OjC(^jWw)g;mB0iU#SXe~B2UbzDV| z@wnHOo9VG!Rq{74=wUw2T8Kf3#YDn@Pd7{*hiKkfxggd^@157WzkM`9TC#Xo8vE1! zL!H>o9-}?_1oH#rHd~nc7&Nth3Wl`W~AnGJ}4Msv+V=lm3&q@>ZPL zSoYW9sF>(~JPIl9dB~fSbM;T?)za!j@*vQUIq zw}EC?+y;)Vnnb>Dujm=5ytGhqP`J6ciM&euBRS9ezA1>&bKHCcib@9Yu3O`LI{WB3 z^R1fkpZw7h*uGa3)Y`2pdzIWbJ0*XP`39KCB_ngz<8j<6XdS!d*P5hi)ScjRkQ2+m;1jHRRgWI3 z71JLIcs#tiV03U*1uC67;?WK5MBEBlLE%ihy@JP$HQ0F*K&}{$5gj692`>It*VN3f)#xOP)8$qltoV@bdS0OX!f$e5I2Z$m1h5qd$`31tg zT@B~PnzhCQPbcR#%5S73Goa&HTFO3}aLv(Bp+t-CxM*t|+>E*?4c0mi&=~fU%3tVb z^h;|aWJ(8tK*Z_P7hDh+1P(7b1`aPAgvy49XT^aqF9iQAAo9(OtAW^Gibm|aW8FL3 zdTm*>i3mg-2uv-&{^m zs?Pl9KlE>4T*OG(lNboX*0h7wFsX%+mO|Znkj_w59SyS!O4|TS)$wjrz^SGFGS2$G z`MpK8Kt`LliALpiFUeB`Nu-vHL;Siebr^T5H;*G=jS~R=8xFj2l5SEYvJM7h*LZJ@ znm{U#2xMIZ00848%g5YXx@aQwCKdq(Th#m~!1D^1CrTqB21_8LNf8yJz2C&QWfHvc zfnA1|eTaaxt?9;!#Dg-#>T`8gw?y9Vzci?eXH5@mx3CLd9EULfTN8inTuiX^sF;G) z_A|Sq09|7l(+fwbuXigAsJtvzcKdKH80|pj0B82ajvUe~E7#FmSnzT0v@=O_=os`7 z@*CRYy7!yN5`XqaoxgU$dq22S{C1+7qi!QdK4CwbgF@&E_2Kd>rq0Jk9ckYgOIb@1< zH4gPWK)H-UdIlo@#w6}S%gN3CC_1i8EH+<)D0K8R?Djgb`IhYcv2tuQsNsNl~8D+0~}mNgGFC)VGfLa~ZIZ zpA&p(EfXGmY=p=$zQ-G)-t3z@Zc|TAcjHtl7@(z>J&`G4-N=?UG<1`bZJa)6xe2EO zU(}Wt6~H4R-9xCxp)*vO+fl>!vw>s7#JL7PDtcgJAnoA3o#^Zeq=W+&2ynQyl8I_4^>ytEiGx7VO?sMwF3>jW ziSiRx;$5(|O{c<{@l9_-W}{fQ!^ccJ7>a9-p?2Pa3u*m~`1+ET$W(IkLuj*=Dq$266&8GB_D>&zTYO*RB4Ta%v$H1Yzr%mPfP=`- zZ^4nmr>j`6u{jSlv}n2v;6`Xp58nTL5xnkUFKqC8fl&VXw`zFhx6ekygF-1b&(j^p zpoSP+m=k6EVe!sux#WXtZz*IL__EZ&z^d_k3O0gH&j~)C4a^6!y)4cM$dEyGpbSit4?bT#^~D@dadEw;Yjry(0GY!Yo==j7EnXNUcQ0S+8jYP z@zbl1{(XRGUa|^QRd~}EnR0nV$TEZ5`8~;0 zorx6t_fiY1C9|eGydW$!y!^q}=-oeg$GD(ly)Et&U*GpisUo81lQR#RG$!TmTQP2F zEjfAZwZ-Yc()5owc^0uF+~Tuy`=Jube=^riI9P(+Q8Mug68Y3fIWk29=NA5!;Vl zB6gL1st;2UkBYD9)uh(?CSIP!f}gnPgKFx1z~3J@Mi)dyDlyJad7j-|pqJfo_I_D6 zMSsh3^+?3R?H#4Ko+t)pDkii&x`|OT;99a+o8Eta6xyCPC(iWCuuWIszD)UIO?@?9 z>F)ZZ#WoQA0<$A1;6*E1U2R+V8=^L9qthT+=*N0L#}pPFO{x788J9dBy$xa+K%rVL z=P7g=L;%PJ!P*ORQbgK}2Z?(+zd?0yoHEINXpB-uW&?xH+;8rdm+-gY4Y8~LgepFw zMgR!38$$RZ28MFCkHW7+U%R~j3BWaZUUK>VV_prVU!7Y4X3|#oCYauweCAj08N55@ zTM_OB>1$R=?==zUhli5%<}%K&BQQ?Bd_vDw3T*LvjoTfdvG=>G4*Q6J{AyU#?xIPZ z*tnJgL>G0UViXM>%0{`azRAQZ&uEj>jWi#s#c`a*?KU+6BtbA00%9DKk$~d9 zZ2Ur-mn^jZ7pogEE*vO}_wq7K#RWB(@Ex&u-0S~0f3W}SjSV;sx?l~0_SVDti6?kS zy?i|8e_qy|&+OkhmqYUhcWmHzs5?QB-xqy{^bMAISl3Ir+gIIJxNS;}C%k z^JID|*9DI5S&f4&nl_tNy#?5ck1K8tAxh6sVyFtn*n= zQ(AfPA&dsK{HWn+la2TN=%DsySvWY|tCk6dRpQU=d^cH+dO>=r-X64v| zsePKPnREl1!QI-=)p*^u)B8PK*SH(=X@SsT0D#j8w34xj27@$_fYyqkNF_8YGwusH zv1#!JDxGhJOTrY@E<@Wt5cl@(&DEv_sN}Wf6iZjRc{*J!D?ac+yV@85ohRrtBoR(+ zBYn&j5H-mk$FftbDtjcVrFU*+8q|Df1v= z8hJ};OJPNiR~Lndf7SgE-8SLM3U?%!xh*H|3~>DH>6nqc-dLslDi7S=#Op$NT>lb% zx{KpL(>hxQYc z6XH||h{)O7P9SCwJ2ol+nDNPzjo5b?-Jra~I(If1f9uy@?OHcDweCpolJyH}8)GiS zP=2MW9+le%UXz}>iIYZJKCk^ZTwFg2>!Xsz79)%w+gCkTtC!z-mRVIZha8Y#XqNDn)cx$5HR_w~7zr{;E7(DDUl#rA!*p5x_FTi(-u z?-(xrZc7c79Y!|Z)f3>wgBnOo*dRt~)E?w_3R1Hl!>_u<*{1`3<`2tF56r&$C9pi< z9kO$)zNi>4;>Em^S6N98(Y6wyjZW%>M;WFq;5IA!!S=7W|9X^XSK&aTy*%|ZDK3%Z zQLUY>JQ_5^XFV1`(3O2{pz>H4d1xo_oQ?KmpoK7B3=>iH%5(m&hiTz_FUD?*$W2uI zZ=+MBQD$%EddkGzJ@>B`s7Ug3L8MdFV&U6BEN|hfn2iz#&balHq!5B;MxG6l9G&Mk zO{t?77zS$D1w(xO?JZK(?jSB)&On^N{BHrpU#y))J?eHV^&R`dqrv1PP^>Tc@9A!I zy+FC)uDFM#{!sn5;H)p?u^!gDy%u}jvW`Agk2bn&NBzZ63lWvk{7r`uoB3(={PPo+ z20IIOXmPj5&;ULLId_OmSHbu4WTZ{uYtM&_bF^9xj&G1y|7Ov(h~c8gnimyL;2^~O zrAMEh}CW<2h0Y+@tK@o6d2KT=#irc{Y+_OPzn*o=?exTV1y=1mHFpj`vF%UyFJ*ry{d7Q_lZ-TPQD~Ak zJs)q*N+jj#x3bR9=ei245ezz}6Kl1l9M7CsGmPTAG}DTA(+jOLXonXH2Q;4Z)Ip*P z6$%d($|q(~1RARG-LkoSgr9P={A75W6kev8e*&dTOxieGu`F@6d?(A@8-v+`p~w5 zykx#n{-azCMcIlp+LzSy7^PZ{)iN7CtNo)cQzuZNf=cg+b>Jgcj6!H{-lGUExdkD^nORUDOy4juX|mr(TL0i)?j9##z>W$@yFtlUmq)qiYY-j$;mB_ zqv|r^o{ux(SxCh9Ud+zC`p0(gow|b(qMSX56vAyNG0#a9(@`GO!tXp+xI~r|B=7uw zVTt_>B=`M3-HIJ>Aa+m~)s=ykVdnyQ3wLzaNpo~K``;JWdq^pj;JECWhMe2IGhABU z>NHSCFm9}bYTq^@$-OrZD|TaAAH1oNf*3u(fxN6QutfQG5WX&;zWdlZNnaHem|sr8 z>5zYJejKgf_9sZx+~BXP?7J4Syi1ikI%GOr406^;kW}Uv`1oJ*UXS(ed%Uug`GeL2 zEz;?fcII2JiuZ~Ws&u}DD~Pk{@pRU4l#KdZJbIch>NoTYkB0H|Hhb!i>q1M;SvW}- z-yV?VFu_D8im4&2z0kbY3%zPvR{>F^Maph9EykIM;h}5^&(G|y9kgPIM=4P@Aj72! zUY2-pb0nbST_ejak?UtyZ?bAiORi4`;`t{7heQ-nOtlH5&5Rn#t`2j(HBm@0Ur zCgy#AdU8r?zJM$!Vs}^Ge?2lDO48wse?Q0Cmfw-!;>@UYE75N+m|E<^Tp`gvN9DDm zwN*xOfT4vC#wZ~E6Sif@40}toQ^&aT(ohMrl5&_=$|qQuK5G{^ay%PGL$*%m2PQJ= z>IU|oJ|+eUYmh9hn#&;kIew};{HVznAhjO*gZI{X%i>Qp>L=luh>-bbwCsRos!SH! zJ!ACaIQJ&V(6#sU>RiI&kL-LOJYztAbt!X?kG>QPiB8Gsa;2qH{m>A!?oYYNaJwNO zx^-`1K_?~O_KAY!?i$9IInAir6Ll_h0u%^2&yLMFi1NV};F!Ecf=D23Xq61HYrv`O z2;#F$NgzGDs6;ol;f5av6QZR4iIwn+l=dStH4W7Sablu+ zT#am^GABEm(lyS{K*yf6hoAl~l)vhtttd?Q$vuV3=HMM0=lZ2oSeyD|=o$or2Q?jR zcu{d*TofhOCKR4X{rw*BQN;bPJH|lvtv?K+K}^3|DL@)6>k`WP@69F1Dq)il6XcQS z)#qUaogaO`+H%4Z71E>Y8bP~#V7o8liC3soUk1`kFhvE|P>_|QgE~xFA+Mpqd;qb{ zEyo3c*Hpm)9kdxVsAD!kv7Q0Fg_(Y06N1FtBt-^j(qZ2fqMJsQ_OK?qhAXAW0?qDz za#~5zd&r`MuZuFm{6(Ac`->9H6yXO_D+W&ilZk7$D4W+1v>BnY`hk`)CHdBgtD1vU z5!BP~4e&~2AHVcM1&liUC8Y-FAVK`t-q>ztjUt)z`)!=Y;~z!e-snr#B;;eYLaC)c zd(9jh+I;l|;blfmv0G{tu45fhjXJS#Fb|TOp_QO`cSv29swprYyL=);BH zik6Tm{7oQ5wEy0zCP!Vx)Bv5|>E$0e{l9^KBL7>_YUFRei}eMf89ChyA0xxqJK0F4 zx-Tb_J#0f#VuPxvvf7WKiC6(asaYpyEqhs^`m?HHhKR>rSwu3{WL;&poe)hHQnLKF zn}MuJ2~FZ=tOf7Ww;13}`VT7Ij37ZUHVyYwe9bVeUqfjIy?V91Xo)rI*ly7`+Xj|; z$3#Mu6PijOWpj@Pb$fV(^j;+0Sbc|44^E1Uq8!FX$8rKnM^@|39w98gtx}B{gSnj> zE-_atN8xvNWlnL8`o|Ym5twt;jpb71w8nc)0oR%2Kd#XjaQe7hy?vQ&|Acw}N&6R# zJeO|;6ngqULTvCzcHWt{y51%9LEH<~_N8zZW#M9cF@YRbr#cGmvU<`VohuEdkIZi~F@7b=a;sl9yAN$aLL8Z9xrkHqN zzxFA9oP5USb7>_jYGO64YcWxSm~CU0ZXPGdwU*7C&wTON99!#UryIM5ssv7_hT(ERYp7?nOZA}q1%H8=B0=g?>sO;9g&n&w} zQtp}+R1t?)jdLOt-wxc?e0>R`w74{@&We5Oh_ws~0uA}dA55}ejtk^@*0~yP3vNBH z;!s4yhuT(0=+tG9PlyZXId*EHwaLRCW907}b3s{XcXOzqwAL_BN=Zp&E}>#C zaA_^N^*nmCinT~#_Bx?o9IajR%kmp`PI?Z|Rt2j?ztXv5qCi|q=wYvk;g%0-aEJAF z>IT-bGmk(MTdA|$SM^OdWr*l$3p(<;H@$h82ACoyGzU=;6I7pXt>Uq$rRBTKLn!yC z=}KuEoav4Am8kd?nHw=NUHs(z*|!khbPCeXY&B=3>h#b>PLT`aj`{P0YfZX$0h;}o zp&#aNm~#E$WY4x7kCeN1G*sI%oMoYBP%pYq;R?<%qm|6NZ5bVBs*mE*Z`C^szJt=r zLTdMloWj9MgN|nzMGgE_cM%bf>(!g)450Ml<*qEZvH7Fni!$tNUs&|HBQA7Yt59Ge z#6MA4E>w^iJ8?uf*6JX1)SXZxdv;x=p~!|h39N$UNY)NG!ick1pjU@FK16|xCxF%c zz$IT)BWF5#tSY-Oml#}e<*yA%VM+pUHbwP1>MCu2!z0S5^b&4p49-wGCtvt1d=~TQ z8n{X3@H$;bOZdf7=_#)e)TK5*h@+HAq%<_KP}egH`5OK5$NcGJ0oXw^OJe~ye7$^D7Ib=~d?K%UV&lFye`G7uA?-E{9i_=9#uR)2sN z@rz>kiSl>%)eP}6xSE;SsyzR75Z9d~aHxf9uC(2|sj*C&b4hwdT6mBu183j2s|bxP z;aP@Rvc^X>3io_GeKRvz^p1KK{?2>YH`we-u3VZ+6=&t=OY^zc?+p&t$8MPUPd%MD z_%`sILeZW=BPxvZ8nM}2&M$h=D8{v1O`!LqT*t4h*x_YX?r@ZpdI_`bR_ix3UZVbW zCr?t=Z-Lh;D*(!XvwUxr?&xdU2xM_eXQ%ko%niW?$(%t%-JCHKbao2kza1hh_8}o; zfh@1n89z?!IT{|#JGPCE(!&Tz!#t;-g-5_2XRbZv+}O(%^aP;*+0m!KTufdYgGcOD zMAa!0Opfxzmlx2=8>~0w3#jX;`(#v*#x%dW3Cgt3*Z~nXFHRC|G8O&(D2}Z0jea0lWAKv{JsySU z1qSEdzffBS+ALKtq{1!_!8N;ew=%0xGwGB6%<|shzkqW=S;m$by9U2}ug2nk zl#MF}vROEDUyT?5!oN3LQ<{ZAZT{x_HeRZ79@#exl+$@9`w0xxX@z7GmmO45#PF@C zq4=Qr!-y=38dQ&}&rGpF$${9+)B&NkR$E7Bp}OKCdek*k6frR|W1&0_(K(=oM9SR- z!^{6x+|Zbu37+D+j{VZTrdq5)@Oql2!q}twELPv4)J9#vgU-(0R(Zl4K|`@HenmVO zcCR${PsiU+3N?Q+i6qbcP!>WpnZR3de7;>Npb-9&@YxeN|G0lBm^nK55g~O4Wq16d zT;o;|-;3uK#=)zxEon4qQI&YIH{-}P?u6`R)q+Vk3wHYY=`3)G4V#_;J zsMRDJt8?VxWAgnM;{r>|!NQ>FGkdH#?>dnOM?de6Z_&Y~ zl)ineO+dm3>H-zHY$B)88Ig5cUdY6QCV11OV>~IyGc1Wkt)g_PX{>oivfM{f2oi+% z&*ZOf8r1P|dUdfFtq3kQp;vEjM+&k1N*houPy`BWKhkdWy!)IAqouirE1oO3a&SfTAsLZBJ{;HWi5JoEONAwTn>^p~EmOmqc zd!2Qv2$~M?(3ZV6eD%GZ2>|S$OdBdG4l>cRL6&@HbF5I{+Ut^|V+**yz(hy!IPO@= zx7*iT9kXZe0vcVCPFKwJbxu94$q7vlhz{Vuk1NcCrk<0pWQ}(m_o{nZe~jiJMlNq0 zFXM!DL7o<1Xl7Cc-aA8!f!udK1k>~`jkXZvm8LQ)da-@`tecBqE;acc7}AJJvFn2S zxn@*PWEo@5gVlclitCx*1DWcn8i7EgK^Bv#=Es(-;gw@#A`AE(W!`CrnlD{u%Ccb# zt7#k+;@s%L4EX>gFV^pwm`@r^B4YF!id*YaXP}^Vt!vy1XSh0f|6X8}HXd>=_Nyx+ za!xph45|;VycaYUdwW=S4q|DPAQ$HP+&X-*=IFeTlWC5?{UnvK8dK=j$kHzPE=`*+ zxsv2N?b4)d&AHW1FdJbY8u3=Lq7vQX6%4Uh0^aOl-L`9KpT}JSx%`Fgnzds8v-Zrw zwCy#e5g|^#pAoPGJ-i)Be&2S!sUB^wnwqZx$p^1tKSS_xtT*s&Fjz-*hh~ZGG(6+Pd$! zTO#?a$nTd3uA`ROtqBtn?Oq+xYz`!ph?$UhgsdpPSMSzU)2f1h9ak`@b z%|XhsX3cG~&_bR4m=be@|K#j21K=fp{unGT9RG8w`iu{CB5(@x`)30KJJR;H) zFjYk4xu@1|pGLZ8KzdJlQUMZq+CBDYluzt&|03OnA=pP%6a&1QQ*nH?bE(+XdLa$) z3Z2jyN#Bn;aB=Q zoKoc*)AdYKo0hyAWneV?l@=Rq*y66nb@$(_=CO&&OBNq7qH|W=@|FtHpS0|!((@JC zf`!7Kr_$x#p?LJbljUgeIK&Ww3H!pHaPUORlHK)fyp0MBS@%=@RG)NUve% z2dlqaLQT=?^3txZvb^~9Qa95c$nMb)NGS#oAwx@$`!nQp2#qX&FWguhj}c`yHuu4= zt}?yasXRuHEBG0TjgHY0W6g(t;LtoBW?~v#I9N=ij$t-@?}hcbkjHhyJP8XU=yZgWz-Vyz`jMC?@ zGuwBZW1w;E|EFU);3?{j-(u`ZQ;pKR8rcN=Y?OhPVj^Gad=&<+THfrvtg9yl&=m1j zA{1y{nnIhd$)(|aux;ACSMR9`@VEXd|DQFU`TQc~q}8VL(pP1rtY-bM5$+Je;pFWL!;5@ROdg=%&$>+4 zB=9Y!gi|7g=3n1Gf{vNxGeRp;rvR1mCbplypNc|;$wdMN2IO+oc{$L%>+|w)RhyLB z3%1X5+AWkv_@5At&--c%Qz0l4jk{tvl0(uPeanO&3csu?#0R-w@to$pCO1RTqMML~ z0NTRO%M$}53zFt8O!v*F%j|rp#01pvN|{X@j32F%d^YMXxG%FwE$E3sj(Bt{%4)|o z8*Dd~-*Uj6dLvn$qlrl;%DNYDCCXzx2FCGev{%M625@TEWmn?igQ1d1;WYQ$gV#452wXYmy*5|U` zy+G-WB~Gfne|=RwONzK4sR$vca`PHM0=X;H1Kg#YdofrNH(|w+{h~^ac;^+$ zBVG!DVvuWMXVJt%$eY1A%JB2>^0*i0^em(U;s5?<8$9=iUVZFk3Ni1;X#c(kCT>*! z0lNR&&*$kKfD);UO}y#qvPOo>%sXW^{lKr)?Cj#c5cjTaD6E6u0|{0p4l0jW%(iV^ zbx;6yP?^`!{305m;vsH#SIyG0x zTP=zMhBrIzaaUvxTRHt+RdW*lHt5B7(m zoqfpE?DfU&9E&C155x?ZH@v9w^7_Ma2sScU+yHfQm2y%B+`i4!lY{L~C4uCbf#}rY zypMs`{dwN%6*cU6h!PU_G4=cI((K{&M?5Eg7g}bkfuz4vV)i(B_uKM=JJw}nlYE1) zl1&=wB(VJhvQR1h9RfcBOCh2W`)5ud$dezEi;$~`kv|e03>zJ;Bux-Ym3{t@0jV@2 ziLI-S>0tVC3fwPBy zkY4=4uburq6Z%K){jYh2hb0P`p@rdF%ZkN2683(`U zFVdwlSDy>v<>#)@G4f;qw1gI;99d+&^KoSScc5>sda)eAC1?7F_dySnAAGqC#xh}^ z&KNeb(?Nw5k7Tv@+1cBb=fto>f8aG}$9xUGnzeA%!9P%a2$F3%sSt9d9|2t%SNj$t zXGL0XWa!qzWk+6>Hb1MRinHKd1o6dbRwE_IlO9qhV(LGy{QCnW0+JdG-(Hn|CuoeO zxdrRz3Gcbfog)2Y_R_`KB;=ckV zJ7P@i4Xt_=6+#RJaPBVi%8LZ;2ALQLL-@QB@&)(v6&KHSxS}+VBx#P85uep_9y#J^ zQu_JJ`1kt0Tf=TWeRiXl+1z&b0bHa#DFxP=XLJ^XcgRst?8ym%>3-Ij z5yK_Wa2HC=$fjz%Y?x3E_i?UaTdt9Q?-#sB$}-*RdpPq_gY(-oNW2GM0Wg z@1Z!l6q5<^DuaAcS4>KXaS>y6Z=LFwX1p%jF10bsK~VhoMIJ6oYX|(N3e}W1yHj+4 z8lKIxr1uF1@Y5i4iC=w<8-UP1A-Lx(n~JHwb82`_=_xPpVwUV>Xl&Uytj=iCE|0ax zydbxCYKvDS$-U3a67u6)UhqQ^l#7J2)r^F1D6CAD$oe3{ApBs~8(rd3bj%w4`#4`W zH@UjloGL^U)^bt`%3CDi>*(1XpX8AQrDd9&?Ivq5=3lY@smk1aZy@YibjpYUH^d+% zpVk85a}B3|tcVcX5pQ5CAZMi1br+LJscKvbIIK+DR1Iw5NLG3(zCS!p-U$tc5RCh+ z2x@>&W9*}tL#ob_MSg9-%+z_VYeC?aZ5bwSjDZ*zEu2o!G1}n`KLRRj03G9=rxWh@ zKpuw+UvkJX*E}mE5012-IDWvlbA&eKtjpnSe5oGybirgl3U<@ic3rs#c*(YbX?F1<;~;Fw~y>B`A)^1 zL0Sg2f9yLW>;j*H)F+HV1owWiDQf87Vexxydd?(9Pm=z;q17$6GV`em zJf(nI%9MBZQlK5c-&TTT4w7T%JIbGbQrt>g9<6d(ScBU}KzlA)KkpN1E%njH;nSa( zY%{5u!o~U9&l(}{5)^yitCNwbb-W)4)UHsM$7z3vh?-v!tNE=}eHxGv<=KPl6DEdU zeL_ou<=@!zQ6=W^pY&*iAtwszEi-#P9?3s?k6K$wW-hYK>!Un9@7|Zwt zpnWm9Hv8JFh6sdM)Req6+qV)$THhtnLQ5YJ4Kl07TR`NQloEZQVq+s$v>;x9^Qp#7 zbQyKUv{$%+A?7O@}h=4#_MzlyZ15k zEPtP=c02lfGmpHc1|mEC3HHCKyb1$30~9^Z`?=%evM|@-E(4W0kVW#-{;4=PvVJ#v zB3^2S0)p;!7Hc)YGCzOj?|k40kCZ*-L)8AqUK;}=EB-ReolhTon=H-`%_)=9{O>Kj z$lZ;q*h@%Qrf+9g(hQksWxv;kF7Z5doSeN>sbAp*zV|msk!CZ){2scPor?ANxf6L7 zJ1S-5W%(0U==Oqj#H%zijBZV%jw`E57OSIa55%y&lSoPy@D`rp)(7SiU4E;=-B^M-k2j>MCsU}^{ z;xUy4+pljaV3dio+8)0BSF4$>RMeKZx{CT|j&y>A5;=n>e-0d;1j*W=F84!r+=xnH zsY3y0^ubPj%PFJAR^G4tBUsx-@ic$+BsjdyMHPU;gr)(5Tt!y9Q?pKMuiioj;4V!vG4nOVnP+>X z9u2r4IrQ(-tO5rfSjg27M>_aO6W0U^g;?H#HKdmR--G)Zf0x3*ROHO(a=Xb2Zr5B$ zV@-!j9ch`+arkQxXxY2?(i)!oWu%BkvF;X8PgD}(bVOBo0p>DvA?HTIi0XNez@a@P z!-LF?mY}AX@y#yeWLd~2K{xG=77f%?fCMCSh^%8zPsMv{?5bp&cp`T1(+Qh<{;q7NIQ7Qn=E9Lz+{cdvb)wwd#G!=r8cqGkv02D@1TcR~ zxi97rW;!gzJ)v9wkqi0RM{gsgi67kxH4M{%PU>sVs#x_*W-~PrX{!ee1zoT;Jf}j= zEYHc!?eb}X;0SscxH*)5aBzUq2%%_MaZmD&cHh_yMD=1nGK&nuHPX?cf^oApHZ~-+ z<0B(5L!5Q_l$c2gIhPi);Nu()U%AVVue%C3FJr^72&o-Qn6}AtJVzG?Daz7;#?qaR zfo=X@UDg&MyCl)!@XW{K`tqdQYA`Eyi3!GY`<@>g!;q_?bOnwklC=~U0_3PG2<+$f zJVX8uEJwof5vZ+@yz~-AV3_=F^n}^wD6cO!4Lkmyfezn){_KMM7y_6TVLkl$!H#3? zG|KH|H^{TjinI9tiMhA>=dDbK{7GZEn=)~kUR`Fx3ac=^OMh&Di$89ipRwT z-JSnt*Mu>n&q2^4FwOG2$2o z1xpwh{Ps_sv#8wXIK0_*Vq&#$&gHt-L?Lu)&}mtvafK>txdoFQ7^u3CCkkX#Qo^{$ z?^ZmEkj@h@XhpN7KzVd(aD&Z99CWVa@VoFf@Yzuz@Dy&<{|3*&zyHtQL=LC>e@zb$ zfO*!GB&4KmW{-?vT*CwmpfEBV3Oj(MYkGBYCa0(fzHQAK(%PmXLt0RZ5=1!j) zq-c|plAdm8wnV@PlBAy&is@psFuOMPHG2dDozI>Gj4g%H;HD7mB;dA{v`7;b6;*fj zZxiOk8r4p}gqa%IO-+}I;t9q-H;)$IV9~Y0*b^8rq@t%6Nq>FX9noZaX7D^cj3=iN z#?EK88QnytVufK5(A;$VU!C1~G?aTE0Px2WszH>Uv7|7zAxGAP8AHmFwP>vGWZ#;i ziBW`+rI2`Yk!9%0lI@O2mL{fDij+bmk@wzdxN#Gz_xtpo_dWOT*O}v=Ip>+@`Tc(1 zWzOOI>0b4G9>vOnp){#Up}Plw!)AkrLZ0kN?&&x=jEw~wC(zpZ0tjzQ5!HWkIRa5%~GVGXY)av@|0 z&WE9*!eVsRNMhn$MKh}X^^%egK&%(Rj>yT%x&%t=`Dy~Dp1;0M12H`h0@r~~r<>HQ zjN`Pm3oc$1G>w~Mw6R!2_mVc)!G;fI{1PkJx228bBalIuzcz0l8{7P6QgAQFRG#c${af1)* z((8z8eSLk;oK*N%R#zR4AHVMI>G=r8$J82~uP25ykh_ng7W+o7aH@6l1ouyn_Ov!O z={*DzX$Z9E-D+wL4GsJUL2$oX4CNuyskQ*-&scWO?S2gQ-`tp&p9gnnXs941gOI7Zm+$d#OVo`eFDF z>a|17`2&uXTy%6a$=UgG9d>znx!63uu?`FBj9!sr=0g|>L~hC$Uf4brj6LOS_06cR zt}bSImWTinz7Hszyn+IO^X)gD@4INuw?3Goupe(|Sh=n$fRsE*_oIK^bNCqXH-9|C z-wcd2R`|Kl=f)QQlC|sr(r^YQT7`s!bcAQOi!|Kw zY6=AZJpo9ZC77aCSy92K?OA{Rc?cmOpd#)7@AN%hJt;1(C2Z0k-y7E6f)x({Gaa5C zT@`otU3PYME}ow0u(pW5Zpe#qB)>*VM&>=tBfJR-G#+;yCf}q%1XZ@QNE#RzymKOb zPU%ze)ssPKZG^+&^a3q(J95H!m$Y=csd?nrzb2y5aa&Ve@qn(&g@lFmL;T_c`UPL6 ze3wcTv`AOqyH^c|`&Z+B#U{TVv92j>O}&~IkeIoOrYUZzOX4?(XioWgk}5J79DCEwMI$(7&udmWp;lPPIGrdo~T< zCVoCr@7IGx!mX!U@;MyvFg-j9Tl8fym34L7qFCnH|0GIFSi}8_VJ#TIWYMbJEy0Lyvy`t`3b`d}y<`15sH;ZSrQQ$C-tx)!t3qHMS^+e5YP;vtCvef`t>z(LShcY24BcsuEhI=&7IAUByI*TZ#d_V`9 zqWqGQbHhxMp`jsr;j=?zWCOHJv5R>b?%v*-klx&{T`-NAIS=RI5y+=i|4mFcpx?8H zc$Lku1H(J_#j95t=1d0vOqtKwvv(x-;qfXkH!(e1*@`?nI~%+_`v83!Gba)$V{UHF zJ|dzXz2HQ2kM8~Z_f^Y|%zfnvU4cO1_52`Hp8lc*=yZNrncf5|+A9YV(pg^j7dEBU z0oE+R0vj40?)G3427u+4m+S8>aCl5<)j84WzP`S0W12HaR@rd{Lg5hH9GlJN03`Mg z?wj^HW{&rEdC^ji($FQ)G9_Hu?~{Mlf1`9!)P%_!xQ&W0Y9q47Kf50J`$DL7X&*HH zv)c*9D4?}oEhj(#ZGx%|B9Kh`QGo2w$zvDIMb#>=1pe)voqzGB#cM}1$H%?Z)zwwZ z%@vqT=Gyx2O(+%hk!l97gl;l&5XwI}ahV8v54mt|e!jA*%GUHP*E~ZxX+Hh74vp!W0_5oeuuNz}4Vj>l$=m|}`;WMIc%&5<%fhWtsZ@Ue{~O{!#~a z6_UQru@*t-UV6)uzn`66o1drn-b<8owpbSYk1|!ZHrs%bip+rIwbKD}{w=(0M4svkHr2!MS2{I1JnwXx7KO;`Q?j3IN>YoqG(o zhQVM|jE$uc7e7C7XvZHGXjqNh*&&f5t<1O6l@0yBH!Di%X_;l#)2B{~CNT(LU^>jo@;WMm{1Z6PRqt}ZU;*=z#3`+)9pxmKX7} zRiw+asp1C?LRS0ngG30z^572#*!xPJn4Fr&z+kUag8Y1dbw#{s+iGfR8gs`NE(E_n z@_2*4y4B*N-&*`)dP)dNksXBv>aN4`Y@p}$m6;`#fUn>R)1J<1tkO55Uba1?WfwIDti{|lcb36!JG8(8|uwErLB c^!JNx3I_xgZI#)U(SX2@1JQ}lXyc#qZ&k8>0ssI2 literal 0 HcmV?d00001