From 96d83f4617e5ded16c2f450f87ce7dcacee18d8f Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Wed, 10 Jun 2026 00:28:13 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: laion/Qwen3-8B_exp_tas_top_k_32_traces_save-strategy_steps Source: Original Platform --- .gitattributes | 56 + README.md | 60 + added_tokens.json | 28 + all_results.json | 16 + chat_template.jinja | 89 + config.json | 68 + configuration.json | 1 + generation_config.json | 13 + merges.txt | 3 + model-00001-of-00004.safetensors | 3 + model-00002-of-00004.safetensors | 3 + model-00003-of-00004.safetensors | 3 + model-00004-of-00004.safetensors | 3 + model.safetensors.index.json | 407 +++ run_summary.json | 12 + special_tokens_map.json | 31 + tokenizer.json | 3 + tokenizer_config.json | 240 ++ train_results.json | 16 + trainer_log.jsonl | 549 +++ trainer_state.json | 5624 ++++++++++++++++++++++++++++++ training_args.bin | 3 + training_loss.png | Bin 0 -> 39827 bytes vocab.json | 3 + 24 files changed, 7234 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 added_tokens.json create mode 100644 all_results.json create mode 100644 chat_template.jinja create mode 100644 config.json create mode 100644 configuration.json create mode 100644 generation_config.json create mode 100644 merges.txt create mode 100644 model-00001-of-00004.safetensors create mode 100644 model-00002-of-00004.safetensors create mode 100644 model-00003-of-00004.safetensors create mode 100644 model-00004-of-00004.safetensors create mode 100644 model.safetensors.index.json create mode 100644 run_summary.json create mode 100644 special_tokens_map.json create mode 100644 tokenizer.json create mode 100644 tokenizer_config.json create mode 100644 train_results.json create mode 100644 trainer_log.jsonl create mode 100644 trainer_state.json create mode 100644 training_args.bin create mode 100644 training_loss.png create mode 100644 vocab.json diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..3bddac0 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,56 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text + + +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zstandard filter=lfs diff=lfs merge=lfs -text +*.tfevents* filter=lfs diff=lfs merge=lfs -text +*.db* filter=lfs diff=lfs merge=lfs -text +*.ark* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text + +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.gguf* filter=lfs diff=lfs merge=lfs -text +*.ggml filter=lfs diff=lfs merge=lfs -text +*.llamafile* filter=lfs diff=lfs merge=lfs -text +*.pt2 filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text + +training_args.bin filter=lfs diff=lfs merge=lfs -text +model-00002-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text +merges.txt filter=lfs diff=lfs merge=lfs -text +model-00004-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text +model-00003-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text +model-00001-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text +vocab.json filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..9ad20bf --- /dev/null +++ b/README.md @@ -0,0 +1,60 @@ +--- +library_name: transformers +license: apache-2.0 +base_model: Qwen/Qwen3-8B +tags: +- llama-factory +- full +- generated_from_trainer +model-index: +- name: Qwen3-8B_exp_tas_top_k_32_traces_save-strategy_steps + results: [] +--- + + + +# Qwen3-8B_exp_tas_top_k_32_traces_save-strategy_steps + +This model is a fine-tuned version of [Qwen/Qwen3-8B](https://huggingface.co/Qwen/Qwen3-8B) on the DCAgent/exp_tas_top_k_32_traces dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 0.0001 +- train_batch_size: 1 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 32 +- total_train_batch_size: 32 +- total_eval_batch_size: 256 +- optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.87,0.99) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.005 +- num_epochs: 8.0 + +### Training results + + + +### Framework versions + +- Transformers 4.55.0 +- Pytorch 2.7.1+cu128 +- Datasets 3.6.0 +- Tokenizers 0.21.1 diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000..b54f913 --- /dev/null +++ b/added_tokens.json @@ -0,0 +1,28 @@ +{ + "": 151668, + "": 151658, + "": 151666, + "": 151667, + "": 151657, + "": 151665, + "<|box_end|>": 151649, + "<|box_start|>": 151648, + "<|endoftext|>": 151643, + "<|file_sep|>": 151664, + "<|fim_middle|>": 151660, + "<|fim_pad|>": 151662, + "<|fim_prefix|>": 151659, + "<|fim_suffix|>": 151661, + "<|im_end|>": 151645, + "<|im_start|>": 151644, + "<|image_pad|>": 151655, + "<|object_ref_end|>": 151647, + "<|object_ref_start|>": 151646, + "<|quad_end|>": 151651, + "<|quad_start|>": 151650, + "<|repo_name|>": 151663, + "<|video_pad|>": 151656, + "<|vision_end|>": 151653, + "<|vision_pad|>": 151654, + "<|vision_start|>": 151652 +} diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..ffb3d4b --- /dev/null +++ b/all_results.json @@ -0,0 +1,16 @@ +{ + "achieved_tflops_per_gpu": 0.0009416172291339583, + "achieved_tflops_per_gpu_theoretical": 186.33475095527425, + "epoch": 8.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.017221365123987198, + "mfu_percent": 0.00030180039395319175, + "mfu_percent_theoretical": 59.72267658822893, + "total_flos": 1379676432629760.0, + "train_loss": 0.20488507787429666, + "train_runtime": 45788.1262, + "train_samples_per_second": 1.769, + "train_steps_per_second": 0.055, + "valid_targets_mean": 3641.4, + "valid_targets_min": 1073 +} \ No newline at end of file diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..01be9b3 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,89 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- messages[0].content + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('') and message.content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- if message.content is string %} + {%- set content = message.content %} + {%- else %} + {%- set content = '' %} + {%- endif %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is string %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in content %} + {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- set content = content.split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {%- if loop.last or (not loop.last and reasoning_content) %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..cac9b4a --- /dev/null +++ b/config.json @@ -0,0 +1,68 @@ +{ + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 151643, + "eos_token_id": 151645, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.55.0", + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..159097f --- /dev/null +++ b/configuration.json @@ -0,0 +1 @@ +{"framework": "pytorch", "task": "others", "allow_remote": true} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..4b23077 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,13 @@ +{ + "bos_token_id": 151643, + "do_sample": true, + "eos_token_id": [ + 151645, + 151643 + ], + "pad_token_id": 151643, + "temperature": 0.6, + "top_k": 20, + "top_p": 0.95, + "transformers_version": "4.55.0" +} diff --git a/merges.txt b/merges.txt new file mode 100644 index 0000000..80c1a19 --- /dev/null +++ b/merges.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5 +size 1671853 diff --git a/model-00001-of-00004.safetensors b/model-00001-of-00004.safetensors new file mode 100644 index 0000000..97578bd --- /dev/null +++ b/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85757a3a87286023a367f8ff0b8c9bb903ce9005ea2d0eeed97c71205b6f9993 +size 4902257696 diff --git a/model-00002-of-00004.safetensors b/model-00002-of-00004.safetensors new file mode 100644 index 0000000..7cb9a1a --- /dev/null +++ b/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:875efe37c8a1dba2e1d263b926e3917d7f1da9d9a4c998e4504d0e3274e0b0c3 +size 4915960368 diff --git a/model-00003-of-00004.safetensors b/model-00003-of-00004.safetensors new file mode 100644 index 0000000..fc60230 --- /dev/null +++ b/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fc69e9f82cd23e03f21f0842e39bbf92fffb871a2aa763ae1f4c939bd3cd569 +size 4983068496 diff --git a/model-00004-of-00004.safetensors b/model-00004-of-00004.safetensors new file mode 100644 index 0000000..76b7bb2 --- /dev/null +++ b/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38b4aace9b133e5970603a69d09cf531fe7e6a4107911ca74cd5e83cbdd2980f +size 1580230264 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..ba886c0 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,407 @@ +{ + "metadata": { + "total_parameters": 308224, + "total_size": 16381470720 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.32.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.33.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.33.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.34.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.35.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.35.mlp.down_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.35.mlp.gate_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.35.mlp.up_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.35.self_attn.k_norm.weight": "model-00004-of-00004.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.35.self_attn.q_norm.weight": "model-00004-of-00004.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.norm.weight": "model-00004-of-00004.safetensors" + } +} diff --git a/run_summary.json b/run_summary.json new file mode 100644 index 0000000..c3a6427 --- /dev/null +++ b/run_summary.json @@ -0,0 +1,12 @@ +{ + "agent_name": null, + "training_start": null, + "training_end": null, + "created_by": "DCAgent", + "base_model_name": "Qwen/Qwen3-8B", + "dataset_name": "DCAgent/exp_tas_top_k_32_traces", + "training_type": "SFT", + "training_parameters": "https://huggingface.co/laion/Qwen3-8B_exp_tas_top_k_32_traces_save-strategy_steps/blob/main/config.json", + "wandb_link": "https://wandb.ai/dogml/dc-agent/runs/Qwen3-8B_exp_tas_top_k_32_traces_save-strategy_steps", + "traces_location_s3": null +} \ No newline at end of file diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..ac23c0a --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,31 @@ +{ + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..cd71f61 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4 +size 11422654 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..e9dc937 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,240 @@ +{ + "add_bos_token": false, + "add_prefix_space": false, + "added_tokens_decoder": { + "151643": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151644": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151645": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151646": { + "content": "<|object_ref_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151647": { + "content": "<|object_ref_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151648": { + "content": "<|box_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151649": { + "content": "<|box_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151650": { + "content": "<|quad_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151657": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151658": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151665": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151666": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151667": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151668": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": {}, + "model_max_length": 32768, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..ffb3d4b --- /dev/null +++ b/train_results.json @@ -0,0 +1,16 @@ +{ + "achieved_tflops_per_gpu": 0.0009416172291339583, + "achieved_tflops_per_gpu_theoretical": 186.33475095527425, + "epoch": 8.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.017221365123987198, + "mfu_percent": 0.00030180039395319175, + "mfu_percent_theoretical": 59.72267658822893, + "total_flos": 1379676432629760.0, + "train_loss": 0.20488507787429666, + "train_runtime": 45788.1262, + "train_samples_per_second": 1.769, + "train_steps_per_second": 0.055, + "valid_targets_mean": 3641.4, + "valid_targets_min": 1073 +} \ No newline at end of file diff --git a/trainer_log.jsonl b/trainer_log.jsonl new file mode 100644 index 0000000..b1b4d3c --- /dev/null +++ b/trainer_log.jsonl @@ -0,0 +1,549 @@ +{"current_steps": 5, "total_steps": 2536, "loss": 0.8936, "lr": 3.0769230769230774e-05, "epoch": 0.01579778830963665, "percentage": 0.2, "elapsed_time": "0:02:46", "remaining_time": "23:27:32"} +{"current_steps": 5, "total_steps": 2536, "loss": 0.8936, "lr": 3.0769230769230774e-05, "epoch": 0.015772870662460567, "percentage": 0.2, "elapsed_time": "1:16:50", "remaining_time": "27 days, 0:16:57"} +{"current_steps": 10, "total_steps": 2536, "loss": 0.7352, "lr": 6.923076923076924e-05, "epoch": 0.031545741324921134, "percentage": 0.39, "elapsed_time": "1:18:11", "remaining_time": "13 days, 17:11:45"} +{"current_steps": 15, "total_steps": 2536, "loss": 0.6431, "lr": 9.99999612380875e-05, "epoch": 0.0473186119873817, "percentage": 0.59, "elapsed_time": "1:19:40", "remaining_time": "9 days, 7:09:37"} +{"current_steps": 20, "total_steps": 2536, "loss": 0.6226, "lr": 9.999860457746025e-05, "epoch": 0.06309148264984227, "percentage": 0.79, "elapsed_time": "1:20:58", "remaining_time": "7 days, 1:46:10"} +{"current_steps": 25, "total_steps": 2536, "loss": 0.6059, "lr": 9.999530988130677e-05, "epoch": 0.07886435331230283, "percentage": 0.99, "elapsed_time": "1:22:20", "remaining_time": "5 days, 17:50:23"} +{"current_steps": 30, "total_steps": 2536, "loss": 0.5852, "lr": 9.999007727733537e-05, "epoch": 0.0946372239747634, "percentage": 1.18, "elapsed_time": "1:23:39", "remaining_time": "4 days, 20:28:35"} +{"current_steps": 35, "total_steps": 2536, "loss": 0.5905, "lr": 9.998290696837115e-05, "epoch": 0.11041009463722397, "percentage": 1.38, "elapsed_time": "1:24:53", "remaining_time": "4 days, 5:06:33"} +{"current_steps": 40, "total_steps": 2536, "loss": 0.5804, "lr": 9.997379923234816e-05, "epoch": 0.12618296529968454, "percentage": 1.58, "elapsed_time": "1:26:04", "remaining_time": "3 days, 17:31:12"} +{"current_steps": 45, "total_steps": 2536, "loss": 0.5623, "lr": 9.996275442229857e-05, "epoch": 0.14195583596214512, "percentage": 1.77, "elapsed_time": "1:27:13", "remaining_time": "3 days, 8:28:15"} +{"current_steps": 50, "total_steps": 2536, "loss": 0.5505, "lr": 9.994977296633902e-05, "epoch": 0.15772870662460567, "percentage": 1.97, "elapsed_time": "1:28:30", "remaining_time": "3 days, 1:20:25"} +{"current_steps": 55, "total_steps": 2536, "loss": 0.5522, "lr": 9.993485536765398e-05, "epoch": 0.17350157728706625, "percentage": 2.17, "elapsed_time": "1:29:40", "remaining_time": "2 days, 19:25:24"} +{"current_steps": 60, "total_steps": 2536, "loss": 0.5903, "lr": 9.991800220447634e-05, "epoch": 0.1892744479495268, "percentage": 2.37, "elapsed_time": "1:30:51", "remaining_time": "2 days, 14:29:07"} +{"current_steps": 65, "total_steps": 2536, "loss": 0.5609, "lr": 9.989921413006489e-05, "epoch": 0.20504731861198738, "percentage": 2.56, "elapsed_time": "1:32:06", "remaining_time": "2 days, 10:21:32"} +{"current_steps": 70, "total_steps": 2536, "loss": 0.5704, "lr": 9.987849187267908e-05, "epoch": 0.22082018927444794, "percentage": 2.76, "elapsed_time": "1:33:17", "remaining_time": "2 days, 6:46:28"} +{"current_steps": 75, "total_steps": 2536, "loss": 0.5421, "lr": 9.985583623555076e-05, "epoch": 0.23659305993690852, "percentage": 2.96, "elapsed_time": "1:34:23", "remaining_time": "2 days, 3:37:34"} +{"current_steps": 80, "total_steps": 2536, "loss": 0.5597, "lr": 9.9831248096853e-05, "epoch": 0.25236593059936907, "percentage": 3.15, "elapsed_time": "1:35:31", "remaining_time": "2 days, 0:52:47"} +{"current_steps": 85, "total_steps": 2536, "loss": 0.507, "lr": 9.980472840966614e-05, "epoch": 0.26813880126182965, "percentage": 3.35, "elapsed_time": "1:36:40", "remaining_time": "1 day, 22:27:31"} +{"current_steps": 90, "total_steps": 2536, "loss": 0.5632, "lr": 9.977627820194082e-05, "epoch": 0.28391167192429023, "percentage": 3.55, "elapsed_time": "1:37:50", "remaining_time": "1 day, 20:19:17"} +{"current_steps": 95, "total_steps": 2536, "loss": 0.5778, "lr": 9.974589857645802e-05, "epoch": 0.2996845425867508, "percentage": 3.75, "elapsed_time": "1:39:01", "remaining_time": "1 day, 18:24:20"} +{"current_steps": 100, "total_steps": 2536, "loss": 0.5465, "lr": 9.97135907107865e-05, "epoch": 0.31545741324921134, "percentage": 3.94, "elapsed_time": "1:40:14", "remaining_time": "1 day, 16:41:56"} +{"current_steps": 105, "total_steps": 2536, "loss": 0.5122, "lr": 9.967935585723706e-05, "epoch": 0.3312302839116719, "percentage": 4.14, "elapsed_time": "2:04:50", "remaining_time": "2 days, 0:10:11"} +{"current_steps": 110, "total_steps": 2536, "loss": 0.5501, "lr": 9.964319534281397e-05, "epoch": 0.3470031545741325, "percentage": 4.34, "elapsed_time": "2:06:02", "remaining_time": "1 day, 22:19:42"} +{"current_steps": 115, "total_steps": 2536, "loss": 0.5505, "lr": 9.960511056916357e-05, "epoch": 0.3627760252365931, "percentage": 4.53, "elapsed_time": "2:07:04", "remaining_time": "1 day, 20:35:08"} +{"current_steps": 120, "total_steps": 2536, "loss": 0.5504, "lr": 9.956510301251995e-05, "epoch": 0.3785488958990536, "percentage": 4.73, "elapsed_time": "2:08:15", "remaining_time": "1 day, 19:02:09"} +{"current_steps": 125, "total_steps": 2536, "loss": 0.5212, "lr": 9.952317422364772e-05, "epoch": 0.3943217665615142, "percentage": 4.93, "elapsed_time": "2:09:24", "remaining_time": "1 day, 17:36:00"} +{"current_steps": 130, "total_steps": 2536, "loss": 0.5148, "lr": 9.947932582778188e-05, "epoch": 0.41009463722397477, "percentage": 5.13, "elapsed_time": "2:10:35", "remaining_time": "1 day, 16:16:51"} +{"current_steps": 135, "total_steps": 2536, "loss": 0.543, "lr": 9.943355952456483e-05, "epoch": 0.42586750788643535, "percentage": 5.32, "elapsed_time": "2:11:50", "remaining_time": "1 day, 15:04:54"} +{"current_steps": 140, "total_steps": 2536, "loss": 0.5367, "lr": 9.938587708798053e-05, "epoch": 0.4416403785488959, "percentage": 5.52, "elapsed_time": "2:13:03", "remaining_time": "1 day, 13:57:18"} +{"current_steps": 145, "total_steps": 2536, "loss": 0.5431, "lr": 9.933628036628569e-05, "epoch": 0.45741324921135645, "percentage": 5.72, "elapsed_time": "2:14:15", "remaining_time": "1 day, 12:53:58"} +{"current_steps": 150, "total_steps": 2536, "loss": 0.5111, "lr": 9.92847712819381e-05, "epoch": 0.47318611987381703, "percentage": 5.91, "elapsed_time": "2:15:28", "remaining_time": "1 day, 11:54:56"} +{"current_steps": 155, "total_steps": 2536, "loss": 0.5423, "lr": 9.923135183152224e-05, "epoch": 0.4889589905362776, "percentage": 6.11, "elapsed_time": "2:16:40", "remaining_time": "1 day, 10:59:36"} +{"current_steps": 160, "total_steps": 2536, "loss": 0.5404, "lr": 9.91760240856717e-05, "epoch": 0.5047318611987381, "percentage": 6.31, "elapsed_time": "2:17:55", "remaining_time": "1 day, 10:08:15"} +{"current_steps": 165, "total_steps": 2536, "loss": 0.5203, "lr": 9.91187901889891e-05, "epoch": 0.5205047318611987, "percentage": 6.51, "elapsed_time": "2:19:09", "remaining_time": "1 day, 9:19:40"} +{"current_steps": 170, "total_steps": 2536, "loss": 0.5464, "lr": 9.905965235996286e-05, "epoch": 0.5362776025236593, "percentage": 6.7, "elapsed_time": "2:20:19", "remaining_time": "1 day, 8:33:06"} +{"current_steps": 175, "total_steps": 2536, "loss": 0.52, "lr": 9.899861289088121e-05, "epoch": 0.5520504731861199, "percentage": 6.9, "elapsed_time": "2:21:30", "remaining_time": "1 day, 7:49:07"} +{"current_steps": 180, "total_steps": 2536, "loss": 0.4798, "lr": 9.893567414774341e-05, "epoch": 0.5678233438485805, "percentage": 7.1, "elapsed_time": "2:22:40", "remaining_time": "1 day, 7:07:29"} +{"current_steps": 185, "total_steps": 2536, "loss": 0.5247, "lr": 9.88708385701679e-05, "epoch": 0.583596214511041, "percentage": 7.29, "elapsed_time": "2:23:56", "remaining_time": "1 day, 6:29:07"} +{"current_steps": 190, "total_steps": 2536, "loss": 0.5404, "lr": 9.88041086712979e-05, "epoch": 0.5993690851735016, "percentage": 7.49, "elapsed_time": "2:25:02", "remaining_time": "1 day, 5:50:51"} +{"current_steps": 195, "total_steps": 2536, "loss": 0.5269, "lr": 9.873548703770388e-05, "epoch": 0.6151419558359621, "percentage": 7.69, "elapsed_time": "2:26:55", "remaining_time": "1 day, 5:23:56"} +{"current_steps": 200, "total_steps": 2536, "loss": 0.5047, "lr": 9.866497632928336e-05, "epoch": 0.6309148264984227, "percentage": 7.89, "elapsed_time": "2:28:05", "remaining_time": "1 day, 4:49:40"} +{"current_steps": 105, "total_steps": 2536, "loss": 0.5122, "lr": 9.967935585723706e-05, "epoch": 0.3312302839116719, "percentage": 4.14, "elapsed_time": "0:02:03", "remaining_time": "0:47:49"} +{"current_steps": 110, "total_steps": 2536, "loss": 0.5501, "lr": 9.964319534281397e-05, "epoch": 0.3470031545741325, "percentage": 4.34, "elapsed_time": "0:03:19", "remaining_time": "1:13:14"} +{"current_steps": 115, "total_steps": 2536, "loss": 0.5506, "lr": 9.960511056916357e-05, "epoch": 0.3627760252365931, "percentage": 4.53, "elapsed_time": "0:04:22", "remaining_time": "1:31:56"} +{"current_steps": 120, "total_steps": 2536, "loss": 0.5503, "lr": 9.956510301251995e-05, "epoch": 0.3785488958990536, "percentage": 4.73, "elapsed_time": "0:05:33", "remaining_time": "1:51:59"} +{"current_steps": 125, "total_steps": 2536, "loss": 0.521, "lr": 9.952317422364772e-05, "epoch": 0.3943217665615142, "percentage": 4.93, "elapsed_time": "0:06:48", "remaining_time": "2:11:10"} +{"current_steps": 130, "total_steps": 2536, "loss": 0.5149, "lr": 9.947932582778188e-05, "epoch": 0.41009463722397477, "percentage": 5.13, "elapsed_time": "0:07:59", "remaining_time": "2:27:51"} +{"current_steps": 135, "total_steps": 2536, "loss": 0.5427, "lr": 9.943355952456483e-05, "epoch": 0.42586750788643535, "percentage": 5.32, "elapsed_time": "0:09:14", "remaining_time": "2:44:27"} +{"current_steps": 140, "total_steps": 2536, "loss": 0.5366, "lr": 9.938587708798053e-05, "epoch": 0.4416403785488959, "percentage": 5.52, "elapsed_time": "0:10:29", "remaining_time": "2:59:26"} +{"current_steps": 145, "total_steps": 2536, "loss": 0.5452, "lr": 9.933628036628569e-05, "epoch": 0.45741324921135645, "percentage": 5.72, "elapsed_time": "0:11:39", "remaining_time": "3:12:11"} +{"current_steps": 150, "total_steps": 2536, "loss": 0.5104, "lr": 9.92847712819381e-05, "epoch": 0.47318611987381703, "percentage": 5.91, "elapsed_time": "0:12:50", "remaining_time": "3:24:21"} +{"current_steps": 155, "total_steps": 2536, "loss": 0.5411, "lr": 9.923135183152224e-05, "epoch": 0.4889589905362776, "percentage": 6.11, "elapsed_time": "0:14:01", "remaining_time": "3:35:26"} +{"current_steps": 160, "total_steps": 2536, "loss": 0.5391, "lr": 9.91760240856717e-05, "epoch": 0.5047318611987381, "percentage": 6.31, "elapsed_time": "0:15:15", "remaining_time": "3:46:28"} +{"current_steps": 165, "total_steps": 2536, "loss": 0.5194, "lr": 9.91187901889891e-05, "epoch": 0.5205047318611987, "percentage": 6.51, "elapsed_time": "0:16:26", "remaining_time": "3:56:19"} +{"current_steps": 170, "total_steps": 2536, "loss": 0.5463, "lr": 9.905965235996286e-05, "epoch": 0.5362776025236593, "percentage": 6.7, "elapsed_time": "0:17:35", "remaining_time": "4:04:54"} +{"current_steps": 175, "total_steps": 2536, "loss": 0.52, "lr": 9.899861289088121e-05, "epoch": 0.5520504731861199, "percentage": 6.9, "elapsed_time": "0:18:43", "remaining_time": "4:12:39"} +{"current_steps": 180, "total_steps": 2536, "loss": 0.4807, "lr": 9.893567414774341e-05, "epoch": 0.5678233438485805, "percentage": 7.1, "elapsed_time": "0:19:52", "remaining_time": "4:20:08"} +{"current_steps": 185, "total_steps": 2536, "loss": 0.5249, "lr": 9.88708385701679e-05, "epoch": 0.583596214511041, "percentage": 7.29, "elapsed_time": "0:21:06", "remaining_time": "4:28:14"} +{"current_steps": 190, "total_steps": 2536, "loss": 0.5409, "lr": 9.88041086712979e-05, "epoch": 0.5993690851735016, "percentage": 7.49, "elapsed_time": "0:22:11", "remaining_time": "4:34:03"} +{"current_steps": 195, "total_steps": 2536, "loss": 0.5271, "lr": 9.873548703770388e-05, "epoch": 0.6151419558359621, "percentage": 7.69, "elapsed_time": "0:23:19", "remaining_time": "4:40:02"} +{"current_steps": 200, "total_steps": 2536, "loss": 0.5044, "lr": 9.866497632928336e-05, "epoch": 0.6309148264984227, "percentage": 7.89, "elapsed_time": "0:24:28", "remaining_time": "4:45:50"} +{"current_steps": 105, "total_steps": 2536, "loss": 0.5122, "lr": 9.967935585723706e-05, "epoch": 0.3312302839116719, "percentage": 4.14, "elapsed_time": "0:02:02", "remaining_time": "0:47:25"} +{"current_steps": 110, "total_steps": 2536, "loss": 0.5502, "lr": 9.964319534281397e-05, "epoch": 0.3470031545741325, "percentage": 4.34, "elapsed_time": "0:03:18", "remaining_time": "1:12:49"} +{"current_steps": 115, "total_steps": 2536, "loss": 0.5506, "lr": 9.960511056916357e-05, "epoch": 0.3627760252365931, "percentage": 4.53, "elapsed_time": "0:04:20", "remaining_time": "1:31:27"} +{"current_steps": 120, "total_steps": 2536, "loss": 0.5505, "lr": 9.956510301251995e-05, "epoch": 0.3785488958990536, "percentage": 4.73, "elapsed_time": "0:05:32", "remaining_time": "1:51:32"} +{"current_steps": 125, "total_steps": 2536, "loss": 0.5214, "lr": 9.952317422364772e-05, "epoch": 0.3943217665615142, "percentage": 4.93, "elapsed_time": "0:06:42", "remaining_time": "2:09:24"} +{"current_steps": 130, "total_steps": 2536, "loss": 0.515, "lr": 9.947932582778188e-05, "epoch": 0.41009463722397477, "percentage": 5.13, "elapsed_time": "0:07:53", "remaining_time": "2:26:06"} +{"current_steps": 135, "total_steps": 2536, "loss": 0.5429, "lr": 9.943355952456483e-05, "epoch": 0.42586750788643535, "percentage": 5.32, "elapsed_time": "0:09:09", "remaining_time": "2:42:55"} +{"current_steps": 140, "total_steps": 2536, "loss": 0.5364, "lr": 9.938587708798053e-05, "epoch": 0.4416403785488959, "percentage": 5.52, "elapsed_time": "0:10:21", "remaining_time": "2:57:21"} +{"current_steps": 145, "total_steps": 2536, "loss": 0.5434, "lr": 9.933628036628569e-05, "epoch": 0.45741324921135645, "percentage": 5.72, "elapsed_time": "0:11:32", "remaining_time": "3:10:21"} +{"current_steps": 150, "total_steps": 2536, "loss": 0.5111, "lr": 9.92847712819381e-05, "epoch": 0.47318611987381703, "percentage": 5.91, "elapsed_time": "0:12:44", "remaining_time": "3:22:33"} +{"current_steps": 155, "total_steps": 2536, "loss": 0.542, "lr": 9.923135183152224e-05, "epoch": 0.4889589905362776, "percentage": 6.11, "elapsed_time": "0:13:55", "remaining_time": "3:33:47"} +{"current_steps": 160, "total_steps": 2536, "loss": 0.5408, "lr": 9.91760240856717e-05, "epoch": 0.5047318611987381, "percentage": 6.31, "elapsed_time": "0:15:08", "remaining_time": "3:44:51"} +{"current_steps": 165, "total_steps": 2536, "loss": 0.5203, "lr": 9.91187901889891e-05, "epoch": 0.5205047318611987, "percentage": 6.51, "elapsed_time": "0:16:20", "remaining_time": "3:54:54"} +{"current_steps": 170, "total_steps": 2536, "loss": 0.5465, "lr": 9.905965235996286e-05, "epoch": 0.5362776025236593, "percentage": 6.7, "elapsed_time": "0:17:29", "remaining_time": "4:03:30"} +{"current_steps": 175, "total_steps": 2536, "loss": 0.5197, "lr": 9.899861289088121e-05, "epoch": 0.5520504731861199, "percentage": 6.9, "elapsed_time": "0:18:37", "remaining_time": "4:11:13"} +{"current_steps": 180, "total_steps": 2536, "loss": 0.4799, "lr": 9.893567414774341e-05, "epoch": 0.5678233438485805, "percentage": 7.1, "elapsed_time": "0:19:45", "remaining_time": "4:18:42"} +{"current_steps": 185, "total_steps": 2536, "loss": 0.5248, "lr": 9.88708385701679e-05, "epoch": 0.583596214511041, "percentage": 7.29, "elapsed_time": "0:20:59", "remaining_time": "4:26:45"} +{"current_steps": 190, "total_steps": 2536, "loss": 0.5407, "lr": 9.88041086712979e-05, "epoch": 0.5993690851735016, "percentage": 7.49, "elapsed_time": "0:22:04", "remaining_time": "4:32:35"} +{"current_steps": 195, "total_steps": 2536, "loss": 0.5267, "lr": 9.873548703770388e-05, "epoch": 0.6151419558359621, "percentage": 7.69, "elapsed_time": "0:23:13", "remaining_time": "4:38:46"} +{"current_steps": 200, "total_steps": 2536, "loss": 0.504, "lr": 9.866497632928336e-05, "epoch": 0.6309148264984227, "percentage": 7.89, "elapsed_time": "0:24:21", "remaining_time": "4:44:29"} +{"current_steps": 205, "total_steps": 2536, "loss": 0.5286, "lr": 9.859257927915774e-05, "epoch": 0.6466876971608833, "percentage": 8.08, "elapsed_time": "0:29:22", "remaining_time": "5:33:58"} +{"current_steps": 210, "total_steps": 2536, "loss": 0.5219, "lr": 9.851829869356651e-05, "epoch": 0.6624605678233438, "percentage": 8.28, "elapsed_time": "0:30:31", "remaining_time": "5:38:02"} +{"current_steps": 215, "total_steps": 2536, "loss": 0.5191, "lr": 9.844213745175826e-05, "epoch": 0.6782334384858044, "percentage": 8.48, "elapsed_time": "0:31:42", "remaining_time": "5:42:16"} +{"current_steps": 220, "total_steps": 2536, "loss": 0.5118, "lr": 9.83640985058792e-05, "epoch": 0.694006309148265, "percentage": 8.68, "elapsed_time": "0:32:52", "remaining_time": "5:46:03"} +{"current_steps": 225, "total_steps": 2536, "loss": 0.5221, "lr": 9.828418488085877e-05, "epoch": 0.7097791798107256, "percentage": 8.87, "elapsed_time": "0:34:04", "remaining_time": "5:49:57"} +{"current_steps": 230, "total_steps": 2536, "loss": 0.5113, "lr": 9.820239967429233e-05, "epoch": 0.7255520504731862, "percentage": 9.07, "elapsed_time": "0:35:13", "remaining_time": "5:53:11"} +{"current_steps": 235, "total_steps": 2536, "loss": 0.5304, "lr": 9.811874605632104e-05, "epoch": 0.7413249211356467, "percentage": 9.27, "elapsed_time": "0:36:24", "remaining_time": "5:56:30"} +{"current_steps": 240, "total_steps": 2536, "loss": 0.5083, "lr": 9.803322726950905e-05, "epoch": 0.7570977917981072, "percentage": 9.46, "elapsed_time": "0:37:28", "remaining_time": "5:58:27"} +{"current_steps": 245, "total_steps": 2536, "loss": 0.52, "lr": 9.794584662871787e-05, "epoch": 0.7728706624605678, "percentage": 9.66, "elapsed_time": "0:38:34", "remaining_time": "6:00:46"} +{"current_steps": 250, "total_steps": 2536, "loss": 0.4882, "lr": 9.785660752097768e-05, "epoch": 0.7886435331230284, "percentage": 9.86, "elapsed_time": "0:39:44", "remaining_time": "6:03:27"} +{"current_steps": 255, "total_steps": 2536, "loss": 0.5434, "lr": 9.77655134053563e-05, "epoch": 0.804416403785489, "percentage": 10.06, "elapsed_time": "0:40:54", "remaining_time": "6:05:54"} +{"current_steps": 260, "total_steps": 2536, "loss": 0.5356, "lr": 9.767256781282486e-05, "epoch": 0.8201892744479495, "percentage": 10.25, "elapsed_time": "0:42:04", "remaining_time": "6:08:16"} +{"current_steps": 265, "total_steps": 2536, "loss": 0.5247, "lr": 9.757777434612116e-05, "epoch": 0.8359621451104101, "percentage": 10.45, "elapsed_time": "0:43:13", "remaining_time": "6:10:25"} +{"current_steps": 270, "total_steps": 2536, "loss": 0.5046, "lr": 9.748113667960987e-05, "epoch": 0.8517350157728707, "percentage": 10.65, "elapsed_time": "0:44:15", "remaining_time": "6:11:25"} +{"current_steps": 275, "total_steps": 2536, "loss": 0.5147, "lr": 9.738265855914013e-05, "epoch": 0.8675078864353313, "percentage": 10.84, "elapsed_time": "0:45:22", "remaining_time": "6:13:06"} +{"current_steps": 280, "total_steps": 2536, "loss": 0.5112, "lr": 9.728234380190038e-05, "epoch": 0.8832807570977917, "percentage": 11.04, "elapsed_time": "0:46:32", "remaining_time": "6:15:00"} +{"current_steps": 285, "total_steps": 2536, "loss": 0.5077, "lr": 9.718019629627045e-05, "epoch": 0.8990536277602523, "percentage": 11.24, "elapsed_time": "0:47:41", "remaining_time": "6:16:37"} +{"current_steps": 290, "total_steps": 2536, "loss": 0.5297, "lr": 9.70762200016707e-05, "epoch": 0.9148264984227129, "percentage": 11.44, "elapsed_time": "0:48:49", "remaining_time": "6:18:05"} +{"current_steps": 295, "total_steps": 2536, "loss": 0.5364, "lr": 9.697041894840865e-05, "epoch": 0.9305993690851735, "percentage": 11.63, "elapsed_time": "0:49:54", "remaining_time": "6:19:11"} +{"current_steps": 300, "total_steps": 2536, "loss": 0.5107, "lr": 9.68627972375228e-05, "epoch": 0.9463722397476341, "percentage": 11.83, "elapsed_time": "0:51:05", "remaining_time": "6:20:50"} +{"current_steps": 305, "total_steps": 2536, "loss": 0.4958, "lr": 9.675335904062353e-05, "epoch": 0.9621451104100947, "percentage": 12.03, "elapsed_time": "1:03:54", "remaining_time": "7:47:30"} +{"current_steps": 310, "total_steps": 2536, "loss": 0.5171, "lr": 9.66421085997315e-05, "epoch": 0.9779179810725552, "percentage": 12.22, "elapsed_time": "1:05:05", "remaining_time": "7:47:23"} +{"current_steps": 315, "total_steps": 2536, "loss": 0.5318, "lr": 9.65290502271132e-05, "epoch": 0.9936908517350158, "percentage": 12.42, "elapsed_time": "1:06:23", "remaining_time": "7:48:04"} +{"current_steps": 320, "total_steps": 2536, "loss": 0.4546, "lr": 9.641418830511377e-05, "epoch": 1.0094637223974763, "percentage": 12.62, "elapsed_time": "1:07:40", "remaining_time": "7:48:36"} +{"current_steps": 325, "total_steps": 2536, "loss": 0.4463, "lr": 9.62975272859872e-05, "epoch": 1.025236593059937, "percentage": 12.82, "elapsed_time": "1:08:46", "remaining_time": "7:47:55"} +{"current_steps": 330, "total_steps": 2536, "loss": 0.4202, "lr": 9.617907169172367e-05, "epoch": 1.0410094637223974, "percentage": 13.01, "elapsed_time": "1:09:52", "remaining_time": "7:47:04"} +{"current_steps": 335, "total_steps": 2536, "loss": 0.4191, "lr": 9.605882611387432e-05, "epoch": 1.0567823343848581, "percentage": 13.21, "elapsed_time": "1:10:53", "remaining_time": "7:45:46"} +{"current_steps": 340, "total_steps": 2536, "loss": 0.4242, "lr": 9.593679521337327e-05, "epoch": 1.0725552050473186, "percentage": 13.41, "elapsed_time": "1:12:05", "remaining_time": "7:45:36"} +{"current_steps": 345, "total_steps": 2536, "loss": 0.4375, "lr": 9.581298372035695e-05, "epoch": 1.088328075709779, "percentage": 13.6, "elapsed_time": "1:13:12", "remaining_time": "7:44:57"} +{"current_steps": 350, "total_steps": 2536, "loss": 0.4139, "lr": 9.56873964339807e-05, "epoch": 1.1041009463722398, "percentage": 13.8, "elapsed_time": "1:14:20", "remaining_time": "7:44:19"} +{"current_steps": 355, "total_steps": 2536, "loss": 0.4362, "lr": 9.556003822223287e-05, "epoch": 1.1198738170347002, "percentage": 14.0, "elapsed_time": "1:15:28", "remaining_time": "7:43:40"} +{"current_steps": 360, "total_steps": 2536, "loss": 0.4258, "lr": 9.5430914021746e-05, "epoch": 1.135646687697161, "percentage": 14.2, "elapsed_time": "1:16:41", "remaining_time": "7:43:32"} +{"current_steps": 365, "total_steps": 2536, "loss": 0.4447, "lr": 9.530002883760552e-05, "epoch": 1.1514195583596214, "percentage": 14.39, "elapsed_time": "1:17:46", "remaining_time": "7:42:38"} +{"current_steps": 370, "total_steps": 2536, "loss": 0.4143, "lr": 9.516738774315577e-05, "epoch": 1.167192429022082, "percentage": 14.59, "elapsed_time": "1:18:59", "remaining_time": "7:42:27"} +{"current_steps": 375, "total_steps": 2536, "loss": 0.4281, "lr": 9.503299587980331e-05, "epoch": 1.1829652996845426, "percentage": 14.79, "elapsed_time": "1:20:10", "remaining_time": "7:42:01"} +{"current_steps": 380, "total_steps": 2536, "loss": 0.4347, "lr": 9.489685845681762e-05, "epoch": 1.1987381703470033, "percentage": 14.98, "elapsed_time": "1:21:20", "remaining_time": "7:41:32"} +{"current_steps": 385, "total_steps": 2536, "loss": 0.4368, "lr": 9.47589807511292e-05, "epoch": 1.2145110410094637, "percentage": 15.18, "elapsed_time": "1:22:29", "remaining_time": "7:40:52"} +{"current_steps": 390, "total_steps": 2536, "loss": 0.4168, "lr": 9.461936810712507e-05, "epoch": 1.2302839116719242, "percentage": 15.38, "elapsed_time": "1:23:33", "remaining_time": "7:39:45"} +{"current_steps": 395, "total_steps": 2536, "loss": 0.4415, "lr": 9.447802593644152e-05, "epoch": 1.2460567823343849, "percentage": 15.58, "elapsed_time": "1:24:40", "remaining_time": "7:38:59"} +{"current_steps": 400, "total_steps": 2536, "loss": 0.419, "lr": 9.433495971775444e-05, "epoch": 1.2618296529968454, "percentage": 15.77, "elapsed_time": "1:25:52", "remaining_time": "7:38:32"} +{"current_steps": 405, "total_steps": 2536, "loss": 0.4336, "lr": 9.419017499656686e-05, "epoch": 1.277602523659306, "percentage": 15.97, "elapsed_time": "1:37:20", "remaining_time": "8:32:13"} +{"current_steps": 410, "total_steps": 2536, "loss": 0.4441, "lr": 9.404367738499409e-05, "epoch": 1.2933753943217665, "percentage": 16.17, "elapsed_time": "1:38:31", "remaining_time": "8:30:51"} +{"current_steps": 415, "total_steps": 2536, "loss": 0.4359, "lr": 9.38954725615461e-05, "epoch": 1.3091482649842272, "percentage": 16.36, "elapsed_time": "1:39:37", "remaining_time": "8:29:12"} +{"current_steps": 420, "total_steps": 2536, "loss": 0.4434, "lr": 9.374556627090749e-05, "epoch": 1.3249211356466877, "percentage": 16.56, "elapsed_time": "1:40:44", "remaining_time": "8:27:32"} +{"current_steps": 425, "total_steps": 2536, "loss": 0.4405, "lr": 9.359396432371476e-05, "epoch": 1.3406940063091484, "percentage": 16.76, "elapsed_time": "1:41:54", "remaining_time": "8:26:09"} +{"current_steps": 430, "total_steps": 2536, "loss": 0.4582, "lr": 9.344067259633112e-05, "epoch": 1.3564668769716088, "percentage": 16.96, "elapsed_time": "1:43:09", "remaining_time": "8:25:13"} +{"current_steps": 435, "total_steps": 2536, "loss": 0.4309, "lr": 9.328569703061862e-05, "epoch": 1.3722397476340693, "percentage": 17.15, "elapsed_time": "1:44:14", "remaining_time": "8:23:27"} +{"current_steps": 440, "total_steps": 2536, "loss": 0.4341, "lr": 9.3129043633708e-05, "epoch": 1.38801261829653, "percentage": 17.35, "elapsed_time": "1:45:17", "remaining_time": "8:21:33"} +{"current_steps": 445, "total_steps": 2536, "loss": 0.4132, "lr": 9.297071847776568e-05, "epoch": 1.4037854889589905, "percentage": 17.55, "elapsed_time": "1:46:29", "remaining_time": "8:20:21"} +{"current_steps": 450, "total_steps": 2536, "loss": 0.4408, "lr": 9.281072769975847e-05, "epoch": 1.4195583596214512, "percentage": 17.74, "elapsed_time": "1:47:40", "remaining_time": "8:19:07"} +{"current_steps": 455, "total_steps": 2536, "loss": 0.4422, "lr": 9.264907750121568e-05, "epoch": 1.4353312302839116, "percentage": 17.94, "elapsed_time": "1:48:49", "remaining_time": "8:17:44"} +{"current_steps": 460, "total_steps": 2536, "loss": 0.4453, "lr": 9.248577414798871e-05, "epoch": 1.4511041009463723, "percentage": 18.14, "elapsed_time": "1:49:51", "remaining_time": "8:15:48"} +{"current_steps": 465, "total_steps": 2536, "loss": 0.4358, "lr": 9.232082397000826e-05, "epoch": 1.4668769716088328, "percentage": 18.34, "elapsed_time": "1:51:00", "remaining_time": "8:14:26"} +{"current_steps": 470, "total_steps": 2536, "loss": 0.4281, "lr": 9.215423336103884e-05, "epoch": 1.4826498422712935, "percentage": 18.53, "elapsed_time": "1:52:10", "remaining_time": "8:13:04"} +{"current_steps": 475, "total_steps": 2536, "loss": 0.4424, "lr": 9.198600877843105e-05, "epoch": 1.498422712933754, "percentage": 18.73, "elapsed_time": "1:53:11", "remaining_time": "8:11:06"} +{"current_steps": 480, "total_steps": 2536, "loss": 0.4507, "lr": 9.181615674287121e-05, "epoch": 1.5141955835962144, "percentage": 18.93, "elapsed_time": "1:54:24", "remaining_time": "8:10:02"} +{"current_steps": 485, "total_steps": 2536, "loss": 0.4405, "lr": 9.164468383812864e-05, "epoch": 1.5299684542586751, "percentage": 19.12, "elapsed_time": "1:55:27", "remaining_time": "8:08:15"} +{"current_steps": 490, "total_steps": 2536, "loss": 0.4294, "lr": 9.147159671080049e-05, "epoch": 1.5457413249211358, "percentage": 19.32, "elapsed_time": "1:56:37", "remaining_time": "8:06:57"} +{"current_steps": 495, "total_steps": 2536, "loss": 0.4239, "lr": 9.129690207005402e-05, "epoch": 1.5615141955835963, "percentage": 19.52, "elapsed_time": "1:57:50", "remaining_time": "8:05:53"} +{"current_steps": 500, "total_steps": 2536, "loss": 0.4347, "lr": 9.11206066873666e-05, "epoch": 1.5772870662460567, "percentage": 19.72, "elapsed_time": "1:58:53", "remaining_time": "8:04:09"} +{"current_steps": 505, "total_steps": 2536, "loss": 0.4593, "lr": 9.094271739626326e-05, "epoch": 1.5930599369085172, "percentage": 19.91, "elapsed_time": "2:11:33", "remaining_time": "8:49:06"} +{"current_steps": 510, "total_steps": 2536, "loss": 0.4157, "lr": 9.076324109205174e-05, "epoch": 1.608832807570978, "percentage": 20.11, "elapsed_time": "2:12:45", "remaining_time": "8:47:23"} +{"current_steps": 515, "total_steps": 2536, "loss": 0.4525, "lr": 9.058218473155528e-05, "epoch": 1.6246056782334386, "percentage": 20.31, "elapsed_time": "2:13:55", "remaining_time": "8:45:33"} +{"current_steps": 520, "total_steps": 2536, "loss": 0.4214, "lr": 9.039955533284292e-05, "epoch": 1.640378548895899, "percentage": 20.5, "elapsed_time": "2:15:04", "remaining_time": "8:43:41"} +{"current_steps": 525, "total_steps": 2536, "loss": 0.4461, "lr": 9.021535997495749e-05, "epoch": 1.6561514195583595, "percentage": 20.7, "elapsed_time": "2:16:10", "remaining_time": "8:41:35"} +{"current_steps": 530, "total_steps": 2536, "loss": 0.4407, "lr": 9.002960579764116e-05, "epoch": 1.6719242902208202, "percentage": 20.9, "elapsed_time": "2:17:22", "remaining_time": "8:39:56"} +{"current_steps": 535, "total_steps": 2536, "loss": 0.4314, "lr": 8.984230000105882e-05, "epoch": 1.687697160883281, "percentage": 21.1, "elapsed_time": "2:18:27", "remaining_time": "8:37:52"} +{"current_steps": 540, "total_steps": 2536, "loss": 0.4398, "lr": 8.965344984551882e-05, "epoch": 1.7034700315457414, "percentage": 21.29, "elapsed_time": "2:19:29", "remaining_time": "8:35:37"} +{"current_steps": 545, "total_steps": 2536, "loss": 0.4389, "lr": 8.946306265119167e-05, "epoch": 1.7192429022082019, "percentage": 21.49, "elapsed_time": "2:20:40", "remaining_time": "8:33:53"} +{"current_steps": 550, "total_steps": 2536, "loss": 0.4288, "lr": 8.927114579782625e-05, "epoch": 1.7350157728706623, "percentage": 21.69, "elapsed_time": "2:21:51", "remaining_time": "8:32:14"} +{"current_steps": 555, "total_steps": 2536, "loss": 0.4424, "lr": 8.907770672446381e-05, "epoch": 1.750788643533123, "percentage": 21.88, "elapsed_time": "2:23:02", "remaining_time": "8:30:35"} +{"current_steps": 560, "total_steps": 2536, "loss": 0.4189, "lr": 8.888275292914948e-05, "epoch": 1.7665615141955837, "percentage": 22.08, "elapsed_time": "2:24:11", "remaining_time": "8:28:48"} +{"current_steps": 565, "total_steps": 2536, "loss": 0.4083, "lr": 8.868629196864182e-05, "epoch": 1.7823343848580442, "percentage": 22.28, "elapsed_time": "2:25:14", "remaining_time": "8:26:40"} +{"current_steps": 570, "total_steps": 2536, "loss": 0.4458, "lr": 8.848833145811976e-05, "epoch": 1.7981072555205047, "percentage": 22.48, "elapsed_time": "2:26:24", "remaining_time": "8:25:00"} +{"current_steps": 575, "total_steps": 2536, "loss": 0.4215, "lr": 8.828887907088753e-05, "epoch": 1.8138801261829653, "percentage": 22.67, "elapsed_time": "2:27:34", "remaining_time": "8:23:18"} +{"current_steps": 580, "total_steps": 2536, "loss": 0.439, "lr": 8.808794253807707e-05, "epoch": 1.8296529968454258, "percentage": 22.87, "elapsed_time": "2:28:37", "remaining_time": "8:21:14"} +{"current_steps": 585, "total_steps": 2536, "loss": 0.4216, "lr": 8.788552964834859e-05, "epoch": 1.8454258675078865, "percentage": 23.07, "elapsed_time": "2:29:45", "remaining_time": "8:19:28"} +{"current_steps": 590, "total_steps": 2536, "loss": 0.4411, "lr": 8.768164824758846e-05, "epoch": 1.861198738170347, "percentage": 23.26, "elapsed_time": "2:30:50", "remaining_time": "8:17:30"} +{"current_steps": 595, "total_steps": 2536, "loss": 0.4492, "lr": 8.747630623860521e-05, "epoch": 1.8769716088328074, "percentage": 23.46, "elapsed_time": "2:32:04", "remaining_time": "8:16:05"} +{"current_steps": 600, "total_steps": 2536, "loss": 0.4475, "lr": 8.726951158082311e-05, "epoch": 1.8927444794952681, "percentage": 23.66, "elapsed_time": "2:33:16", "remaining_time": "8:14:33"} +{"current_steps": 605, "total_steps": 2536, "loss": 0.4272, "lr": 8.706127228997376e-05, "epoch": 1.9085173501577288, "percentage": 23.86, "elapsed_time": "2:45:02", "remaining_time": "8:46:46"} +{"current_steps": 610, "total_steps": 2536, "loss": 0.4211, "lr": 8.685159643778528e-05, "epoch": 1.9242902208201893, "percentage": 24.05, "elapsed_time": "2:46:12", "remaining_time": "8:44:47"} +{"current_steps": 615, "total_steps": 2536, "loss": 0.4143, "lr": 8.664049215166955e-05, "epoch": 1.9400630914826498, "percentage": 24.25, "elapsed_time": "2:47:22", "remaining_time": "8:42:49"} +{"current_steps": 620, "total_steps": 2536, "loss": 0.4392, "lr": 8.6427967614407e-05, "epoch": 1.9558359621451105, "percentage": 24.45, "elapsed_time": "2:48:27", "remaining_time": "8:40:36"} +{"current_steps": 625, "total_steps": 2536, "loss": 0.4323, "lr": 8.621403106382968e-05, "epoch": 1.971608832807571, "percentage": 24.65, "elapsed_time": "2:49:36", "remaining_time": "8:38:34"} +{"current_steps": 630, "total_steps": 2536, "loss": 0.4511, "lr": 8.599869079250165e-05, "epoch": 1.9873817034700316, "percentage": 24.84, "elapsed_time": "2:50:44", "remaining_time": "8:36:33"} +{"current_steps": 635, "total_steps": 2536, "loss": 0.4148, "lr": 8.578195514739784e-05, "epoch": 2.003154574132492, "percentage": 25.04, "elapsed_time": "2:51:48", "remaining_time": "8:34:19"} +{"current_steps": 640, "total_steps": 2536, "loss": 0.3381, "lr": 8.556383252958026e-05, "epoch": 2.0189274447949526, "percentage": 25.24, "elapsed_time": "2:52:57", "remaining_time": "8:32:23"} +{"current_steps": 645, "total_steps": 2536, "loss": 0.3389, "lr": 8.534433139387259e-05, "epoch": 2.034700315457413, "percentage": 25.43, "elapsed_time": "2:54:04", "remaining_time": "8:30:21"} +{"current_steps": 650, "total_steps": 2536, "loss": 0.3169, "lr": 8.512346024853219e-05, "epoch": 2.050473186119874, "percentage": 25.63, "elapsed_time": "2:55:12", "remaining_time": "8:28:23"} +{"current_steps": 655, "total_steps": 2536, "loss": 0.3269, "lr": 8.490122765492057e-05, "epoch": 2.0662460567823344, "percentage": 25.83, "elapsed_time": "2:56:21", "remaining_time": "8:26:28"} +{"current_steps": 660, "total_steps": 2536, "loss": 0.325, "lr": 8.467764222717136e-05, "epoch": 2.082018927444795, "percentage": 26.03, "elapsed_time": "2:57:29", "remaining_time": "8:24:29"} +{"current_steps": 665, "total_steps": 2536, "loss": 0.333, "lr": 8.445271263185646e-05, "epoch": 2.0977917981072554, "percentage": 26.22, "elapsed_time": "2:58:34", "remaining_time": "8:22:26"} +{"current_steps": 670, "total_steps": 2536, "loss": 0.3071, "lr": 8.422644758765012e-05, "epoch": 2.1135646687697163, "percentage": 26.42, "elapsed_time": "2:59:41", "remaining_time": "8:20:26"} +{"current_steps": 675, "total_steps": 2536, "loss": 0.3367, "lr": 8.399885586499101e-05, "epoch": 2.1293375394321767, "percentage": 26.62, "elapsed_time": "3:00:51", "remaining_time": "8:18:38"} +{"current_steps": 680, "total_steps": 2536, "loss": 0.3413, "lr": 8.376994628574219e-05, "epoch": 2.145110410094637, "percentage": 26.81, "elapsed_time": "3:02:00", "remaining_time": "8:16:47"} +{"current_steps": 685, "total_steps": 2536, "loss": 0.3347, "lr": 8.353972772284927e-05, "epoch": 2.1608832807570977, "percentage": 27.01, "elapsed_time": "3:03:09", "remaining_time": "8:14:56"} +{"current_steps": 690, "total_steps": 2536, "loss": 0.3309, "lr": 8.330820909999633e-05, "epoch": 2.176656151419558, "percentage": 27.21, "elapsed_time": "3:04:20", "remaining_time": "8:13:11"} +{"current_steps": 695, "total_steps": 2536, "loss": 0.3455, "lr": 8.307539939126016e-05, "epoch": 2.192429022082019, "percentage": 27.41, "elapsed_time": "3:05:25", "remaining_time": "8:11:09"} +{"current_steps": 700, "total_steps": 2536, "loss": 0.3309, "lr": 8.284130762076235e-05, "epoch": 2.2082018927444795, "percentage": 27.6, "elapsed_time": "3:06:33", "remaining_time": "8:09:18"} +{"current_steps": 705, "total_steps": 2536, "loss": 0.3183, "lr": 8.260594286231947e-05, "epoch": 2.22397476340694, "percentage": 27.8, "elapsed_time": "3:15:23", "remaining_time": "8:27:28"} +{"current_steps": 710, "total_steps": 2536, "loss": 0.349, "lr": 8.236931423909138e-05, "epoch": 2.2397476340694005, "percentage": 28.0, "elapsed_time": "3:16:37", "remaining_time": "8:25:42"} +{"current_steps": 715, "total_steps": 2536, "loss": 0.3271, "lr": 8.213143092322769e-05, "epoch": 2.2555205047318614, "percentage": 28.19, "elapsed_time": "3:17:43", "remaining_time": "8:23:35"} +{"current_steps": 720, "total_steps": 2536, "loss": 0.3097, "lr": 8.189230213551202e-05, "epoch": 2.271293375394322, "percentage": 28.39, "elapsed_time": "3:18:50", "remaining_time": "8:21:32"} +{"current_steps": 725, "total_steps": 2536, "loss": 0.3464, "lr": 8.165193714500481e-05, "epoch": 2.2870662460567823, "percentage": 28.59, "elapsed_time": "3:19:54", "remaining_time": "8:19:20"} +{"current_steps": 730, "total_steps": 2536, "loss": 0.3422, "lr": 8.141034526868389e-05, "epoch": 2.302839116719243, "percentage": 28.79, "elapsed_time": "3:21:05", "remaining_time": "8:17:29"} +{"current_steps": 735, "total_steps": 2536, "loss": 0.3341, "lr": 8.116753587108339e-05, "epoch": 2.3186119873817033, "percentage": 28.98, "elapsed_time": "3:22:13", "remaining_time": "8:15:31"} +{"current_steps": 740, "total_steps": 2536, "loss": 0.3144, "lr": 8.092351836393076e-05, "epoch": 2.334384858044164, "percentage": 29.18, "elapsed_time": "3:23:24", "remaining_time": "8:13:41"} +{"current_steps": 745, "total_steps": 2536, "loss": 0.344, "lr": 8.067830220578191e-05, "epoch": 2.3501577287066246, "percentage": 29.38, "elapsed_time": "3:24:35", "remaining_time": "8:11:50"} +{"current_steps": 750, "total_steps": 2536, "loss": 0.336, "lr": 8.043189690165467e-05, "epoch": 2.365930599369085, "percentage": 29.57, "elapsed_time": "3:25:46", "remaining_time": "8:10:01"} +{"current_steps": 755, "total_steps": 2536, "loss": 0.3277, "lr": 8.018431200266023e-05, "epoch": 2.3817034700315456, "percentage": 29.77, "elapsed_time": "3:26:56", "remaining_time": "8:08:10"} +{"current_steps": 760, "total_steps": 2536, "loss": 0.3289, "lr": 7.993555710563303e-05, "epoch": 2.3974763406940065, "percentage": 29.97, "elapsed_time": "3:28:02", "remaining_time": "8:06:08"} +{"current_steps": 765, "total_steps": 2536, "loss": 0.3337, "lr": 7.968564185275873e-05, "epoch": 2.413249211356467, "percentage": 30.17, "elapsed_time": "3:29:13", "remaining_time": "8:04:21"} +{"current_steps": 770, "total_steps": 2536, "loss": 0.3592, "lr": 7.943457593120045e-05, "epoch": 2.4290220820189274, "percentage": 30.36, "elapsed_time": "3:30:22", "remaining_time": "8:02:29"} +{"current_steps": 775, "total_steps": 2536, "loss": 0.33, "lr": 7.918236907272327e-05, "epoch": 2.444794952681388, "percentage": 30.56, "elapsed_time": "3:31:34", "remaining_time": "8:00:44"} +{"current_steps": 780, "total_steps": 2536, "loss": 0.3429, "lr": 7.892903105331712e-05, "epoch": 2.4605678233438484, "percentage": 30.76, "elapsed_time": "3:32:40", "remaining_time": "7:58:47"} +{"current_steps": 785, "total_steps": 2536, "loss": 0.354, "lr": 7.867457169281765e-05, "epoch": 2.4763406940063093, "percentage": 30.95, "elapsed_time": "3:33:46", "remaining_time": "7:56:50"} +{"current_steps": 790, "total_steps": 2536, "loss": 0.3407, "lr": 7.841900085452574e-05, "epoch": 2.4921135646687698, "percentage": 31.15, "elapsed_time": "3:34:55", "remaining_time": "7:55:00"} +{"current_steps": 795, "total_steps": 2536, "loss": 0.3323, "lr": 7.816232844482516e-05, "epoch": 2.5078864353312302, "percentage": 31.35, "elapsed_time": "3:35:59", "remaining_time": "7:53:00"} +{"current_steps": 800, "total_steps": 2536, "loss": 0.3453, "lr": 7.790456441279853e-05, "epoch": 2.5236593059936907, "percentage": 31.55, "elapsed_time": "3:37:09", "remaining_time": "7:51:12"} +{"current_steps": 805, "total_steps": 2536, "loss": 0.3405, "lr": 7.764571874984174e-05, "epoch": 2.5394321766561516, "percentage": 31.74, "elapsed_time": "3:50:33", "remaining_time": "8:15:45"} +{"current_steps": 810, "total_steps": 2536, "loss": 0.3383, "lr": 7.73858014892766e-05, "epoch": 2.555205047318612, "percentage": 31.94, "elapsed_time": "3:51:42", "remaining_time": "8:13:44"} +{"current_steps": 815, "total_steps": 2536, "loss": 0.3684, "lr": 7.712482270596199e-05, "epoch": 2.5709779179810726, "percentage": 32.14, "elapsed_time": "3:52:50", "remaining_time": "8:11:41"} +{"current_steps": 820, "total_steps": 2536, "loss": 0.3348, "lr": 7.686279251590331e-05, "epoch": 2.586750788643533, "percentage": 32.33, "elapsed_time": "3:53:46", "remaining_time": "8:09:12"} +{"current_steps": 825, "total_steps": 2536, "loss": 0.3327, "lr": 7.659972107586035e-05, "epoch": 2.6025236593059935, "percentage": 32.53, "elapsed_time": "3:54:52", "remaining_time": "8:07:07"} +{"current_steps": 830, "total_steps": 2536, "loss": 0.3234, "lr": 7.633561858295364e-05, "epoch": 2.6182965299684544, "percentage": 32.73, "elapsed_time": "3:56:04", "remaining_time": "8:05:14"} +{"current_steps": 835, "total_steps": 2536, "loss": 0.3435, "lr": 7.607049527426916e-05, "epoch": 2.634069400630915, "percentage": 32.93, "elapsed_time": "3:57:13", "remaining_time": "8:03:14"} +{"current_steps": 840, "total_steps": 2536, "loss": 0.3437, "lr": 7.580436142646155e-05, "epoch": 2.6498422712933754, "percentage": 33.12, "elapsed_time": "3:58:17", "remaining_time": "8:01:06"} +{"current_steps": 845, "total_steps": 2536, "loss": 0.3291, "lr": 7.55372273553557e-05, "epoch": 2.665615141955836, "percentage": 33.32, "elapsed_time": "3:59:24", "remaining_time": "7:59:05"} +{"current_steps": 850, "total_steps": 2536, "loss": 0.345, "lr": 7.526910341554703e-05, "epoch": 2.6813880126182967, "percentage": 33.52, "elapsed_time": "4:00:29", "remaining_time": "7:57:00"} +{"current_steps": 855, "total_steps": 2536, "loss": 0.3484, "lr": 7.500000000000001e-05, "epoch": 2.697160883280757, "percentage": 33.71, "elapsed_time": "4:01:41", "remaining_time": "7:55:10"} +{"current_steps": 860, "total_steps": 2536, "loss": 0.3473, "lr": 7.472992753964532e-05, "epoch": 2.7129337539432177, "percentage": 33.91, "elapsed_time": "4:02:49", "remaining_time": "7:53:12"} +{"current_steps": 865, "total_steps": 2536, "loss": 0.3463, "lr": 7.445889650297559e-05, "epoch": 2.728706624605678, "percentage": 34.11, "elapsed_time": "4:03:55", "remaining_time": "7:51:13"} +{"current_steps": 870, "total_steps": 2536, "loss": 0.3287, "lr": 7.418691739563957e-05, "epoch": 2.7444794952681386, "percentage": 34.31, "elapsed_time": "4:05:04", "remaining_time": "7:49:18"} +{"current_steps": 875, "total_steps": 2536, "loss": 0.3552, "lr": 7.391400076003492e-05, "epoch": 2.7602523659305995, "percentage": 34.5, "elapsed_time": "4:06:14", "remaining_time": "7:47:26"} +{"current_steps": 880, "total_steps": 2536, "loss": 0.3412, "lr": 7.36401571748996e-05, "epoch": 2.77602523659306, "percentage": 34.7, "elapsed_time": "4:07:22", "remaining_time": "7:45:29"} +{"current_steps": 885, "total_steps": 2536, "loss": 0.3486, "lr": 7.336539725490178e-05, "epoch": 2.7917981072555205, "percentage": 34.9, "elapsed_time": "4:08:24", "remaining_time": "7:43:24"} +{"current_steps": 890, "total_steps": 2536, "loss": 0.3511, "lr": 7.30897316502284e-05, "epoch": 2.807570977917981, "percentage": 35.09, "elapsed_time": "4:09:29", "remaining_time": "7:41:24"} +{"current_steps": 895, "total_steps": 2536, "loss": 0.3477, "lr": 7.281317104617239e-05, "epoch": 2.823343848580442, "percentage": 35.29, "elapsed_time": "4:10:37", "remaining_time": "7:39:30"} +{"current_steps": 900, "total_steps": 2536, "loss": 0.3471, "lr": 7.253572616271844e-05, "epoch": 2.8391167192429023, "percentage": 35.49, "elapsed_time": "4:11:47", "remaining_time": "7:37:42"} +{"current_steps": 905, "total_steps": 2536, "loss": 0.3324, "lr": 7.225740775412751e-05, "epoch": 2.854889589905363, "percentage": 35.69, "elapsed_time": "4:23:31", "remaining_time": "7:54:55"} +{"current_steps": 910, "total_steps": 2536, "loss": 0.3266, "lr": 7.197822660851991e-05, "epoch": 2.8706624605678233, "percentage": 35.88, "elapsed_time": "4:24:38", "remaining_time": "7:52:52"} +{"current_steps": 915, "total_steps": 2536, "loss": 0.3351, "lr": 7.169819354745725e-05, "epoch": 2.8864353312302837, "percentage": 36.08, "elapsed_time": "4:25:50", "remaining_time": "7:50:56"} +{"current_steps": 920, "total_steps": 2536, "loss": 0.3322, "lr": 7.141731942552288e-05, "epoch": 2.9022082018927446, "percentage": 36.28, "elapsed_time": "4:26:58", "remaining_time": "7:48:56"} +{"current_steps": 925, "total_steps": 2536, "loss": 0.3419, "lr": 7.113561512990119e-05, "epoch": 2.917981072555205, "percentage": 36.47, "elapsed_time": "4:28:05", "remaining_time": "7:46:55"} +{"current_steps": 930, "total_steps": 2536, "loss": 0.3696, "lr": 7.085309157995557e-05, "epoch": 2.9337539432176656, "percentage": 36.67, "elapsed_time": "4:29:15", "remaining_time": "7:44:57"} +{"current_steps": 935, "total_steps": 2536, "loss": 0.3571, "lr": 7.056975972680517e-05, "epoch": 2.949526813880126, "percentage": 36.87, "elapsed_time": "4:30:26", "remaining_time": "7:43:04"} +{"current_steps": 940, "total_steps": 2536, "loss": 0.3197, "lr": 7.028563055290044e-05, "epoch": 2.965299684542587, "percentage": 37.07, "elapsed_time": "4:31:36", "remaining_time": "7:41:09"} +{"current_steps": 945, "total_steps": 2536, "loss": 0.355, "lr": 7.000071507159744e-05, "epoch": 2.9810725552050474, "percentage": 37.26, "elapsed_time": "4:32:37", "remaining_time": "7:38:59"} +{"current_steps": 950, "total_steps": 2536, "loss": 0.3426, "lr": 6.971502432673085e-05, "epoch": 2.996845425867508, "percentage": 37.46, "elapsed_time": "4:33:44", "remaining_time": "7:37:00"} +{"current_steps": 955, "total_steps": 2536, "loss": 0.2364, "lr": 6.942856939218599e-05, "epoch": 3.0126182965299684, "percentage": 37.66, "elapsed_time": "4:34:51", "remaining_time": "7:35:02"} +{"current_steps": 960, "total_steps": 2536, "loss": 0.233, "lr": 6.914136137146951e-05, "epoch": 3.028391167192429, "percentage": 37.85, "elapsed_time": "4:35:53", "remaining_time": "7:32:55"} +{"current_steps": 965, "total_steps": 2536, "loss": 0.2151, "lr": 6.885341139727912e-05, "epoch": 3.0441640378548898, "percentage": 38.05, "elapsed_time": "4:37:07", "remaining_time": "7:31:09"} +{"current_steps": 970, "total_steps": 2536, "loss": 0.214, "lr": 6.856473063107187e-05, "epoch": 3.0599369085173502, "percentage": 38.25, "elapsed_time": "4:38:17", "remaining_time": "7:29:17"} +{"current_steps": 975, "total_steps": 2536, "loss": 0.2324, "lr": 6.827533026263169e-05, "epoch": 3.0757097791798107, "percentage": 38.45, "elapsed_time": "4:39:21", "remaining_time": "7:27:15"} +{"current_steps": 980, "total_steps": 2536, "loss": 0.2165, "lr": 6.798522150963552e-05, "epoch": 3.091482649842271, "percentage": 38.64, "elapsed_time": "4:40:32", "remaining_time": "7:25:25"} +{"current_steps": 985, "total_steps": 2536, "loss": 0.2348, "lr": 6.769441561721863e-05, "epoch": 3.107255520504732, "percentage": 38.84, "elapsed_time": "4:41:41", "remaining_time": "7:23:33"} +{"current_steps": 990, "total_steps": 2536, "loss": 0.2303, "lr": 6.740292385753858e-05, "epoch": 3.1230283911671926, "percentage": 39.04, "elapsed_time": "4:42:55", "remaining_time": "7:21:48"} +{"current_steps": 995, "total_steps": 2536, "loss": 0.2271, "lr": 6.711075752933847e-05, "epoch": 3.138801261829653, "percentage": 39.24, "elapsed_time": "4:44:00", "remaining_time": "7:19:51"} +{"current_steps": 1000, "total_steps": 2536, "loss": 0.2236, "lr": 6.681792795750875e-05, "epoch": 3.1545741324921135, "percentage": 39.43, "elapsed_time": "4:45:10", "remaining_time": "7:18:02"} +{"current_steps": 1005, "total_steps": 2536, "loss": 0.2405, "lr": 6.652444649264856e-05, "epoch": 3.170347003154574, "percentage": 39.63, "elapsed_time": "4:50:52", "remaining_time": "7:23:06"} +{"current_steps": 1010, "total_steps": 2536, "loss": 0.2386, "lr": 6.623032451062542e-05, "epoch": 3.186119873817035, "percentage": 39.83, "elapsed_time": "4:52:00", "remaining_time": "7:21:11"} +{"current_steps": 1015, "total_steps": 2536, "loss": 0.2375, "lr": 6.593557341213457e-05, "epoch": 3.2018927444794953, "percentage": 40.02, "elapsed_time": "4:53:02", "remaining_time": "7:19:07"} +{"current_steps": 1020, "total_steps": 2536, "loss": 0.2403, "lr": 6.564020462225679e-05, "epoch": 3.217665615141956, "percentage": 40.22, "elapsed_time": "4:54:04", "remaining_time": "7:17:03"} +{"current_steps": 1025, "total_steps": 2536, "loss": 0.2277, "lr": 6.534422959001585e-05, "epoch": 3.2334384858044163, "percentage": 40.42, "elapsed_time": "4:55:11", "remaining_time": "7:15:09"} +{"current_steps": 1030, "total_steps": 2536, "loss": 0.23, "lr": 6.504765978793443e-05, "epoch": 3.249211356466877, "percentage": 40.62, "elapsed_time": "4:56:17", "remaining_time": "7:13:13"} +{"current_steps": 1035, "total_steps": 2536, "loss": 0.2298, "lr": 6.475050671158961e-05, "epoch": 3.2649842271293377, "percentage": 40.81, "elapsed_time": "4:57:24", "remaining_time": "7:11:18"} +{"current_steps": 1040, "total_steps": 2536, "loss": 0.2231, "lr": 6.445278187916722e-05, "epoch": 3.280757097791798, "percentage": 41.01, "elapsed_time": "4:58:29", "remaining_time": "7:09:22"} +{"current_steps": 1045, "total_steps": 2536, "loss": 0.2406, "lr": 6.415449683101537e-05, "epoch": 3.2965299684542586, "percentage": 41.21, "elapsed_time": "4:59:39", "remaining_time": "7:07:32"} +{"current_steps": 1050, "total_steps": 2536, "loss": 0.2343, "lr": 6.385566312919716e-05, "epoch": 3.312302839116719, "percentage": 41.4, "elapsed_time": "5:00:48", "remaining_time": "7:05:42"} +{"current_steps": 1055, "total_steps": 2536, "loss": 0.221, "lr": 6.355629235704248e-05, "epoch": 3.32807570977918, "percentage": 41.6, "elapsed_time": "5:01:58", "remaining_time": "7:03:55"} +{"current_steps": 1060, "total_steps": 2536, "loss": 0.2393, "lr": 6.3256396118699e-05, "epoch": 3.3438485804416405, "percentage": 41.8, "elapsed_time": "5:03:05", "remaining_time": "7:02:02"} +{"current_steps": 1065, "total_steps": 2536, "loss": 0.2323, "lr": 6.295598603868246e-05, "epoch": 3.359621451104101, "percentage": 42.0, "elapsed_time": "5:04:09", "remaining_time": "7:00:07"} +{"current_steps": 1070, "total_steps": 2536, "loss": 0.2252, "lr": 6.265507376142594e-05, "epoch": 3.3753943217665614, "percentage": 42.19, "elapsed_time": "5:05:16", "remaining_time": "6:58:15"} +{"current_steps": 1075, "total_steps": 2536, "loss": 0.2266, "lr": 6.235367095082867e-05, "epoch": 3.3911671924290223, "percentage": 42.39, "elapsed_time": "5:06:23", "remaining_time": "6:56:24"} +{"current_steps": 1080, "total_steps": 2536, "loss": 0.239, "lr": 6.205178928980377e-05, "epoch": 3.406940063091483, "percentage": 42.59, "elapsed_time": "5:07:32", "remaining_time": "6:54:36"} +{"current_steps": 1085, "total_steps": 2536, "loss": 0.2496, "lr": 6.174944047982549e-05, "epoch": 3.4227129337539433, "percentage": 42.78, "elapsed_time": "5:08:42", "remaining_time": "6:52:50"} +{"current_steps": 1090, "total_steps": 2536, "loss": 0.2247, "lr": 6.144663624047564e-05, "epoch": 3.4384858044164037, "percentage": 42.98, "elapsed_time": "5:09:53", "remaining_time": "6:51:06"} +{"current_steps": 1095, "total_steps": 2536, "loss": 0.2368, "lr": 6.114338830898922e-05, "epoch": 3.454258675078864, "percentage": 43.18, "elapsed_time": "5:11:02", "remaining_time": "6:49:19"} +{"current_steps": 1100, "total_steps": 2536, "loss": 0.2427, "lr": 6.083970843979957e-05, "epoch": 3.470031545741325, "percentage": 43.38, "elapsed_time": "5:12:14", "remaining_time": "6:47:36"} +{"current_steps": 1105, "total_steps": 2536, "loss": 0.2328, "lr": 6.0535608404082724e-05, "epoch": 3.4858044164037856, "percentage": 43.57, "elapsed_time": "5:20:38", "remaining_time": "6:55:14"} +{"current_steps": 1110, "total_steps": 2536, "loss": 0.2434, "lr": 6.0231099989301086e-05, "epoch": 3.501577287066246, "percentage": 43.77, "elapsed_time": "5:21:47", "remaining_time": "6:53:24"} +{"current_steps": 1115, "total_steps": 2536, "loss": 0.2365, "lr": 5.9926194998746624e-05, "epoch": 3.5173501577287065, "percentage": 43.97, "elapsed_time": "5:22:57", "remaining_time": "6:51:36"} +{"current_steps": 1120, "total_steps": 2536, "loss": 0.226, "lr": 5.9620905251083196e-05, "epoch": 3.5331230283911674, "percentage": 44.16, "elapsed_time": "5:24:05", "remaining_time": "6:49:44"} +{"current_steps": 1125, "total_steps": 2536, "loss": 0.2384, "lr": 5.931524257988864e-05, "epoch": 3.548895899053628, "percentage": 44.36, "elapsed_time": "5:25:14", "remaining_time": "6:47:55"} +{"current_steps": 1130, "total_steps": 2536, "loss": 0.2344, "lr": 5.900921883319591e-05, "epoch": 3.5646687697160884, "percentage": 44.56, "elapsed_time": "5:26:24", "remaining_time": "6:46:08"} +{"current_steps": 1135, "total_steps": 2536, "loss": 0.2393, "lr": 5.870284587303394e-05, "epoch": 3.580441640378549, "percentage": 44.76, "elapsed_time": "5:27:28", "remaining_time": "6:44:13"} +{"current_steps": 1140, "total_steps": 2536, "loss": 0.2234, "lr": 5.839613557496776e-05, "epoch": 3.5962145110410093, "percentage": 44.95, "elapsed_time": "5:28:38", "remaining_time": "6:42:26"} +{"current_steps": 1145, "total_steps": 2536, "loss": 0.2269, "lr": 5.808909982763825e-05, "epoch": 3.61198738170347, "percentage": 45.15, "elapsed_time": "5:29:48", "remaining_time": "6:40:40"} +{"current_steps": 1150, "total_steps": 2536, "loss": 0.2335, "lr": 5.778175053230126e-05, "epoch": 3.6277602523659307, "percentage": 45.35, "elapsed_time": "5:30:55", "remaining_time": "6:38:50"} +{"current_steps": 1155, "total_steps": 2536, "loss": 0.2214, "lr": 5.747409960236637e-05, "epoch": 3.643533123028391, "percentage": 45.54, "elapsed_time": "5:32:02", "remaining_time": "6:37:00"} +{"current_steps": 1160, "total_steps": 2536, "loss": 0.2414, "lr": 5.716615896293501e-05, "epoch": 3.6593059936908516, "percentage": 45.74, "elapsed_time": "5:33:10", "remaining_time": "6:35:12"} +{"current_steps": 1165, "total_steps": 2536, "loss": 0.2357, "lr": 5.68579405503383e-05, "epoch": 3.6750788643533125, "percentage": 45.94, "elapsed_time": "5:34:17", "remaining_time": "6:33:24"} +{"current_steps": 1170, "total_steps": 2536, "loss": 0.2354, "lr": 5.654945631167433e-05, "epoch": 3.690851735015773, "percentage": 46.14, "elapsed_time": "5:35:30", "remaining_time": "6:31:42"} +{"current_steps": 1175, "total_steps": 2536, "loss": 0.2393, "lr": 5.624071820434508e-05, "epoch": 3.7066246056782335, "percentage": 46.33, "elapsed_time": "5:36:36", "remaining_time": "6:29:53"} +{"current_steps": 1180, "total_steps": 2536, "loss": 0.2374, "lr": 5.593173819559294e-05, "epoch": 3.722397476340694, "percentage": 46.53, "elapsed_time": "5:37:42", "remaining_time": "6:28:04"} +{"current_steps": 1185, "total_steps": 2536, "loss": 0.2301, "lr": 5.562252826203687e-05, "epoch": 3.7381703470031544, "percentage": 46.73, "elapsed_time": "5:38:45", "remaining_time": "6:26:12"} +{"current_steps": 1190, "total_steps": 2536, "loss": 0.2463, "lr": 5.531310038920805e-05, "epoch": 3.753943217665615, "percentage": 46.92, "elapsed_time": "5:39:53", "remaining_time": "6:24:26"} +{"current_steps": 1195, "total_steps": 2536, "loss": 0.2305, "lr": 5.500346657108545e-05, "epoch": 3.769716088328076, "percentage": 47.12, "elapsed_time": "5:41:03", "remaining_time": "6:22:44"} +{"current_steps": 1200, "total_steps": 2536, "loss": 0.2337, "lr": 5.469363880963082e-05, "epoch": 3.7854889589905363, "percentage": 47.32, "elapsed_time": "5:42:11", "remaining_time": "6:20:58"} +{"current_steps": 1205, "total_steps": 2536, "loss": 0.2386, "lr": 5.438362911432347e-05, "epoch": 3.8012618296529967, "percentage": 47.52, "elapsed_time": "5:54:05", "remaining_time": "6:31:06"} +{"current_steps": 1210, "total_steps": 2536, "loss": 0.2424, "lr": 5.407344950169486e-05, "epoch": 3.8170347003154577, "percentage": 47.71, "elapsed_time": "5:55:10", "remaining_time": "6:29:13"} +{"current_steps": 1215, "total_steps": 2536, "loss": 0.2444, "lr": 5.376311199486268e-05, "epoch": 3.832807570977918, "percentage": 47.91, "elapsed_time": "5:56:13", "remaining_time": "6:27:18"} +{"current_steps": 1220, "total_steps": 2536, "loss": 0.2337, "lr": 5.3452628623064934e-05, "epoch": 3.8485804416403786, "percentage": 48.11, "elapsed_time": "5:57:19", "remaining_time": "6:25:27"} +{"current_steps": 1225, "total_steps": 2536, "loss": 0.2377, "lr": 5.31420114211936e-05, "epoch": 3.864353312302839, "percentage": 48.3, "elapsed_time": "5:58:31", "remaining_time": "6:23:41"} +{"current_steps": 1230, "total_steps": 2536, "loss": 0.2263, "lr": 5.2831272429328116e-05, "epoch": 3.8801261829652995, "percentage": 48.5, "elapsed_time": "5:59:40", "remaining_time": "6:21:54"} +{"current_steps": 1235, "total_steps": 2536, "loss": 0.2235, "lr": 5.2520423692268775e-05, "epoch": 3.89589905362776, "percentage": 48.7, "elapsed_time": "6:00:53", "remaining_time": "6:20:10"} +{"current_steps": 1240, "total_steps": 2536, "loss": 0.2333, "lr": 5.220947725906975e-05, "epoch": 3.911671924290221, "percentage": 48.9, "elapsed_time": "6:02:04", "remaining_time": "6:18:25"} +{"current_steps": 1245, "total_steps": 2536, "loss": 0.2501, "lr": 5.18984451825721e-05, "epoch": 3.9274447949526814, "percentage": 49.09, "elapsed_time": "6:03:04", "remaining_time": "6:16:29"} +{"current_steps": 1250, "total_steps": 2536, "loss": 0.2365, "lr": 5.1587339518936585e-05, "epoch": 3.943217665615142, "percentage": 49.29, "elapsed_time": "6:04:10", "remaining_time": "6:14:39"} +{"current_steps": 1255, "total_steps": 2536, "loss": 0.2332, "lr": 5.127617232717631e-05, "epoch": 3.958990536277603, "percentage": 49.49, "elapsed_time": "6:05:20", "remaining_time": "6:12:54"} +{"current_steps": 1260, "total_steps": 2536, "loss": 0.2336, "lr": 5.096495566868935e-05, "epoch": 3.9747634069400632, "percentage": 49.68, "elapsed_time": "6:06:27", "remaining_time": "6:11:07"} +{"current_steps": 1265, "total_steps": 2536, "loss": 0.2438, "lr": 5.065370160679115e-05, "epoch": 3.9905362776025237, "percentage": 49.88, "elapsed_time": "6:07:36", "remaining_time": "6:09:21"} +{"current_steps": 1270, "total_steps": 2536, "loss": 0.1921, "lr": 5.034242220624706e-05, "epoch": 4.006309148264984, "percentage": 50.08, "elapsed_time": "6:08:40", "remaining_time": "6:07:31"} +{"current_steps": 1275, "total_steps": 2536, "loss": 0.1417, "lr": 5.003112953280452e-05, "epoch": 4.022082018927445, "percentage": 50.28, "elapsed_time": "6:09:50", "remaining_time": "6:05:46"} +{"current_steps": 1280, "total_steps": 2536, "loss": 0.1399, "lr": 4.971983565272553e-05, "epoch": 4.037854889589905, "percentage": 50.47, "elapsed_time": "6:11:01", "remaining_time": "6:04:04"} +{"current_steps": 1285, "total_steps": 2536, "loss": 0.1313, "lr": 4.940855263231873e-05, "epoch": 4.053627760252366, "percentage": 50.67, "elapsed_time": "6:12:07", "remaining_time": "6:02:16"} +{"current_steps": 1290, "total_steps": 2536, "loss": 0.141, "lr": 4.909729253747197e-05, "epoch": 4.069400630914826, "percentage": 50.87, "elapsed_time": "6:13:14", "remaining_time": "6:00:31"} +{"current_steps": 1295, "total_steps": 2536, "loss": 0.1351, "lr": 4.878606743318439e-05, "epoch": 4.085173501577287, "percentage": 51.06, "elapsed_time": "6:14:20", "remaining_time": "5:58:43"} +{"current_steps": 1300, "total_steps": 2536, "loss": 0.1441, "lr": 4.8474889383098855e-05, "epoch": 4.100946372239748, "percentage": 51.26, "elapsed_time": "6:15:25", "remaining_time": "5:56:57"} +{"current_steps": 1305, "total_steps": 2536, "loss": 0.1386, "lr": 4.816377044903428e-05, "epoch": 4.116719242902208, "percentage": 51.46, "elapsed_time": "6:30:14", "remaining_time": "6:08:06"} +{"current_steps": 1310, "total_steps": 2536, "loss": 0.1345, "lr": 4.7852722690518196e-05, "epoch": 4.132492113564669, "percentage": 51.66, "elapsed_time": "6:31:21", "remaining_time": "6:06:15"} +{"current_steps": 1315, "total_steps": 2536, "loss": 0.1315, "lr": 4.75417581643192e-05, "epoch": 4.148264984227129, "percentage": 51.85, "elapsed_time": "6:32:32", "remaining_time": "6:04:28"} +{"current_steps": 1320, "total_steps": 2536, "loss": 0.15, "lr": 4.723088892397968e-05, "epoch": 4.16403785488959, "percentage": 52.05, "elapsed_time": "6:33:36", "remaining_time": "6:02:35"} +{"current_steps": 1325, "total_steps": 2536, "loss": 0.1272, "lr": 4.6920127019348556e-05, "epoch": 4.17981072555205, "percentage": 52.25, "elapsed_time": "6:34:45", "remaining_time": "6:00:47"} +{"current_steps": 1330, "total_steps": 2536, "loss": 0.1358, "lr": 4.6609484496114256e-05, "epoch": 4.195583596214511, "percentage": 52.44, "elapsed_time": "6:35:53", "remaining_time": "5:58:59"} +{"current_steps": 1335, "total_steps": 2536, "loss": 0.1522, "lr": 4.629897339533771e-05, "epoch": 4.211356466876971, "percentage": 52.64, "elapsed_time": "6:37:06", "remaining_time": "5:57:14"} +{"current_steps": 1340, "total_steps": 2536, "loss": 0.1386, "lr": 4.598860575298575e-05, "epoch": 4.2271293375394325, "percentage": 52.84, "elapsed_time": "6:38:19", "remaining_time": "5:55:31"} +{"current_steps": 1345, "total_steps": 2536, "loss": 0.1443, "lr": 4.5678393599464435e-05, "epoch": 4.242902208201893, "percentage": 53.04, "elapsed_time": "6:39:28", "remaining_time": "5:53:44"} +{"current_steps": 1350, "total_steps": 2536, "loss": 0.1485, "lr": 4.5368348959152864e-05, "epoch": 4.2586750788643535, "percentage": 53.23, "elapsed_time": "6:40:39", "remaining_time": "5:51:58"} +{"current_steps": 1355, "total_steps": 2536, "loss": 0.1396, "lr": 4.505848384993696e-05, "epoch": 4.274447949526814, "percentage": 53.43, "elapsed_time": "6:41:50", "remaining_time": "5:50:14"} +{"current_steps": 1360, "total_steps": 2536, "loss": 0.1456, "lr": 4.474881028274375e-05, "epoch": 4.290220820189274, "percentage": 53.63, "elapsed_time": "6:42:50", "remaining_time": "5:48:20"} +{"current_steps": 1365, "total_steps": 2536, "loss": 0.1363, "lr": 4.4439340261075716e-05, "epoch": 4.305993690851735, "percentage": 53.82, "elapsed_time": "6:43:59", "remaining_time": "5:46:34"} +{"current_steps": 1370, "total_steps": 2536, "loss": 0.1435, "lr": 4.413008578054558e-05, "epoch": 4.321766561514195, "percentage": 54.02, "elapsed_time": "6:45:08", "remaining_time": "5:44:48"} +{"current_steps": 1375, "total_steps": 2536, "loss": 0.1398, "lr": 4.3821058828411244e-05, "epoch": 4.337539432176656, "percentage": 54.22, "elapsed_time": "6:46:14", "remaining_time": "5:43:01"} +{"current_steps": 1380, "total_steps": 2536, "loss": 0.1375, "lr": 4.35122713831113e-05, "epoch": 4.353312302839116, "percentage": 54.42, "elapsed_time": "6:47:20", "remaining_time": "5:41:13"} +{"current_steps": 1385, "total_steps": 2536, "loss": 0.1387, "lr": 4.320373541380054e-05, "epoch": 4.369085173501578, "percentage": 54.61, "elapsed_time": "6:48:26", "remaining_time": "5:39:26"} +{"current_steps": 1390, "total_steps": 2536, "loss": 0.1442, "lr": 4.289546287988614e-05, "epoch": 4.384858044164038, "percentage": 54.81, "elapsed_time": "6:49:31", "remaining_time": "5:37:38"} +{"current_steps": 1395, "total_steps": 2536, "loss": 0.1442, "lr": 4.258746573056401e-05, "epoch": 4.400630914826499, "percentage": 55.01, "elapsed_time": "6:50:40", "remaining_time": "5:35:54"} +{"current_steps": 1400, "total_steps": 2536, "loss": 0.1509, "lr": 4.2279755904355704e-05, "epoch": 4.416403785488959, "percentage": 55.21, "elapsed_time": "6:51:44", "remaining_time": "5:34:06"} +{"current_steps": 1405, "total_steps": 2536, "loss": 0.1363, "lr": 4.197234532864558e-05, "epoch": 4.4321766561514195, "percentage": 55.4, "elapsed_time": "7:03:26", "remaining_time": "5:40:52"} +{"current_steps": 1410, "total_steps": 2536, "loss": 0.138, "lr": 4.1665245919218544e-05, "epoch": 4.44794952681388, "percentage": 55.6, "elapsed_time": "7:04:33", "remaining_time": "5:39:02"} +{"current_steps": 1415, "total_steps": 2536, "loss": 0.1423, "lr": 4.135846957979811e-05, "epoch": 4.4637223974763405, "percentage": 55.8, "elapsed_time": "7:05:36", "remaining_time": "5:37:10"} +{"current_steps": 1420, "total_steps": 2536, "loss": 0.1401, "lr": 4.105202820158503e-05, "epoch": 4.479495268138801, "percentage": 55.99, "elapsed_time": "7:06:44", "remaining_time": "5:35:22"} +{"current_steps": 1425, "total_steps": 2536, "loss": 0.1341, "lr": 4.074593366279636e-05, "epoch": 4.495268138801261, "percentage": 56.19, "elapsed_time": "7:07:48", "remaining_time": "5:33:32"} +{"current_steps": 1430, "total_steps": 2536, "loss": 0.1333, "lr": 4.044019782820505e-05, "epoch": 4.511041009463723, "percentage": 56.39, "elapsed_time": "7:08:58", "remaining_time": "5:31:46"} +{"current_steps": 1435, "total_steps": 2536, "loss": 0.1374, "lr": 4.0134832548680006e-05, "epoch": 4.526813880126183, "percentage": 56.59, "elapsed_time": "7:10:05", "remaining_time": "5:29:59"} +{"current_steps": 1440, "total_steps": 2536, "loss": 0.143, "lr": 3.982984966072677e-05, "epoch": 4.542586750788644, "percentage": 56.78, "elapsed_time": "7:11:18", "remaining_time": "5:28:16"} +{"current_steps": 1445, "total_steps": 2536, "loss": 0.1371, "lr": 3.952526098602873e-05, "epoch": 4.558359621451104, "percentage": 56.98, "elapsed_time": "7:12:27", "remaining_time": "5:26:31"} +{"current_steps": 1450, "total_steps": 2536, "loss": 0.1469, "lr": 3.9221078330988806e-05, "epoch": 4.574132492113565, "percentage": 57.18, "elapsed_time": "7:13:30", "remaining_time": "5:24:40"} +{"current_steps": 1455, "total_steps": 2536, "loss": 0.1431, "lr": 3.89173134862719e-05, "epoch": 4.589905362776025, "percentage": 57.37, "elapsed_time": "7:14:38", "remaining_time": "5:22:55"} +{"current_steps": 1460, "total_steps": 2536, "loss": 0.1411, "lr": 3.861397822634784e-05, "epoch": 4.605678233438486, "percentage": 57.57, "elapsed_time": "7:15:46", "remaining_time": "5:21:09"} +{"current_steps": 1465, "total_steps": 2536, "loss": 0.1399, "lr": 3.831108430903494e-05, "epoch": 4.621451104100946, "percentage": 57.77, "elapsed_time": "7:16:52", "remaining_time": "5:19:22"} +{"current_steps": 1470, "total_steps": 2536, "loss": 0.134, "lr": 3.800864347504437e-05, "epoch": 4.6372239747634065, "percentage": 57.97, "elapsed_time": "7:18:04", "remaining_time": "5:17:40"} +{"current_steps": 1475, "total_steps": 2536, "loss": 0.1411, "lr": 3.7706667447524876e-05, "epoch": 4.652996845425868, "percentage": 58.16, "elapsed_time": "7:19:09", "remaining_time": "5:15:54"} +{"current_steps": 1480, "total_steps": 2536, "loss": 0.1468, "lr": 3.740516793160855e-05, "epoch": 4.668769716088328, "percentage": 58.36, "elapsed_time": "7:20:12", "remaining_time": "5:14:05"} +{"current_steps": 1485, "total_steps": 2536, "loss": 0.1372, "lr": 3.710415661395699e-05, "epoch": 4.684542586750789, "percentage": 58.56, "elapsed_time": "7:21:17", "remaining_time": "5:12:19"} +{"current_steps": 1490, "total_steps": 2536, "loss": 0.1461, "lr": 3.6803645162308376e-05, "epoch": 4.700315457413249, "percentage": 58.75, "elapsed_time": "7:22:25", "remaining_time": "5:10:34"} +{"current_steps": 1495, "total_steps": 2536, "loss": 0.151, "lr": 3.6503645225025175e-05, "epoch": 4.71608832807571, "percentage": 58.95, "elapsed_time": "7:23:33", "remaining_time": "5:08:51"} +{"current_steps": 1500, "total_steps": 2536, "loss": 0.1392, "lr": 3.620416843064266e-05, "epoch": 4.73186119873817, "percentage": 59.15, "elapsed_time": "7:24:41", "remaining_time": "5:07:07"} +{"current_steps": 1505, "total_steps": 2536, "loss": 0.1513, "lr": 3.5905226387418126e-05, "epoch": 4.747634069400631, "percentage": 59.35, "elapsed_time": "7:37:23", "remaining_time": "5:13:19"} +{"current_steps": 1510, "total_steps": 2536, "loss": 0.1457, "lr": 3.5606830682880965e-05, "epoch": 4.763406940063091, "percentage": 59.54, "elapsed_time": "7:38:35", "remaining_time": "5:11:35"} +{"current_steps": 1515, "total_steps": 2536, "loss": 0.1396, "lr": 3.530899288338352e-05, "epoch": 4.779179810725552, "percentage": 59.74, "elapsed_time": "7:39:44", "remaining_time": "5:09:50"} +{"current_steps": 1520, "total_steps": 2536, "loss": 0.1465, "lr": 3.501172453365268e-05, "epoch": 4.794952681388013, "percentage": 59.94, "elapsed_time": "7:40:51", "remaining_time": "5:08:03"} +{"current_steps": 1525, "total_steps": 2536, "loss": 0.1377, "lr": 3.471503715634252e-05, "epoch": 4.8107255520504735, "percentage": 60.13, "elapsed_time": "7:41:58", "remaining_time": "5:06:15"} +{"current_steps": 1530, "total_steps": 2536, "loss": 0.1487, "lr": 3.44189422515875e-05, "epoch": 4.826498422712934, "percentage": 60.33, "elapsed_time": "7:43:08", "remaining_time": "5:04:31"} +{"current_steps": 1535, "total_steps": 2536, "loss": 0.1459, "lr": 3.4123451296556845e-05, "epoch": 4.842271293375394, "percentage": 60.53, "elapsed_time": "7:44:12", "remaining_time": "5:02:43"} +{"current_steps": 1540, "total_steps": 2536, "loss": 0.1479, "lr": 3.382857574500957e-05, "epoch": 4.858044164037855, "percentage": 60.73, "elapsed_time": "7:45:23", "remaining_time": "5:00:59"} +{"current_steps": 1545, "total_steps": 2536, "loss": 0.1522, "lr": 3.3534327026850574e-05, "epoch": 4.873817034700315, "percentage": 60.92, "elapsed_time": "7:46:33", "remaining_time": "4:59:15"} +{"current_steps": 1550, "total_steps": 2536, "loss": 0.1431, "lr": 3.324071654768754e-05, "epoch": 4.889589905362776, "percentage": 61.12, "elapsed_time": "7:47:43", "remaining_time": "4:57:31"} +{"current_steps": 1555, "total_steps": 2536, "loss": 0.1401, "lr": 3.2947755688388874e-05, "epoch": 4.905362776025236, "percentage": 61.32, "elapsed_time": "7:48:52", "remaining_time": "4:55:47"} +{"current_steps": 1560, "total_steps": 2536, "loss": 0.1463, "lr": 3.26554558046426e-05, "epoch": 4.921135646687697, "percentage": 61.51, "elapsed_time": "7:49:59", "remaining_time": "4:54:02"} +{"current_steps": 1565, "total_steps": 2536, "loss": 0.1308, "lr": 3.236382822651606e-05, "epoch": 4.936908517350158, "percentage": 61.71, "elapsed_time": "7:51:05", "remaining_time": "4:52:17"} +{"current_steps": 1570, "total_steps": 2536, "loss": 0.1426, "lr": 3.207288425801689e-05, "epoch": 4.952681388012619, "percentage": 61.91, "elapsed_time": "7:52:20", "remaining_time": "4:50:37"} +{"current_steps": 1575, "total_steps": 2536, "loss": 0.1435, "lr": 3.1782635176654764e-05, "epoch": 4.968454258675079, "percentage": 62.11, "elapsed_time": "7:53:28", "remaining_time": "4:48:53"} +{"current_steps": 1580, "total_steps": 2536, "loss": 0.1455, "lr": 3.149309223300428e-05, "epoch": 4.9842271293375395, "percentage": 62.3, "elapsed_time": "7:54:33", "remaining_time": "4:47:08"} +{"current_steps": 1585, "total_steps": 2536, "loss": 0.1392, "lr": 3.120426665026891e-05, "epoch": 5.0, "percentage": 62.5, "elapsed_time": "7:55:44", "remaining_time": "4:45:26"} +{"current_steps": 1590, "total_steps": 2536, "loss": 0.0784, "lr": 3.091616962384587e-05, "epoch": 5.0157728706624605, "percentage": 62.7, "elapsed_time": "7:56:54", "remaining_time": "4:43:44"} +{"current_steps": 1595, "total_steps": 2536, "loss": 0.079, "lr": 3.06288123208923e-05, "epoch": 5.031545741324921, "percentage": 62.89, "elapsed_time": "7:58:07", "remaining_time": "4:42:04"} +{"current_steps": 1600, "total_steps": 2536, "loss": 0.0682, "lr": 3.034220587989226e-05, "epoch": 5.047318611987381, "percentage": 63.09, "elapsed_time": "7:59:19", "remaining_time": "4:40:24"} +{"current_steps": 1605, "total_steps": 2536, "loss": 0.0742, "lr": 3.005636141022512e-05, "epoch": 5.063091482649842, "percentage": 63.29, "elapsed_time": "8:06:31", "remaining_time": "4:42:12"} +{"current_steps": 1610, "total_steps": 2536, "loss": 0.0725, "lr": 2.977128999173482e-05, "epoch": 5.078864353312303, "percentage": 63.49, "elapsed_time": "8:07:37", "remaining_time": "4:40:27"} +{"current_steps": 1615, "total_steps": 2536, "loss": 0.075, "lr": 2.948700267430049e-05, "epoch": 5.094637223974764, "percentage": 63.68, "elapsed_time": "8:08:38", "remaining_time": "4:38:39"} +{"current_steps": 1620, "total_steps": 2536, "loss": 0.0771, "lr": 2.920351047740808e-05, "epoch": 5.110410094637224, "percentage": 63.88, "elapsed_time": "8:09:44", "remaining_time": "4:36:55"} +{"current_steps": 1625, "total_steps": 2536, "loss": 0.0785, "lr": 2.892082438972325e-05, "epoch": 5.126182965299685, "percentage": 64.08, "elapsed_time": "8:10:55", "remaining_time": "4:35:13"} +{"current_steps": 1630, "total_steps": 2536, "loss": 0.0776, "lr": 2.863895536866541e-05, "epoch": 5.141955835962145, "percentage": 64.27, "elapsed_time": "8:12:04", "remaining_time": "4:33:30"} +{"current_steps": 1635, "total_steps": 2536, "loss": 0.0736, "lr": 2.835791433998301e-05, "epoch": 5.157728706624606, "percentage": 64.47, "elapsed_time": "8:13:13", "remaining_time": "4:31:47"} +{"current_steps": 1640, "total_steps": 2536, "loss": 0.077, "lr": 2.807771219733004e-05, "epoch": 5.173501577287066, "percentage": 64.67, "elapsed_time": "8:14:16", "remaining_time": "4:30:02"} +{"current_steps": 1645, "total_steps": 2536, "loss": 0.0807, "lr": 2.7798359801843766e-05, "epoch": 5.1892744479495265, "percentage": 64.87, "elapsed_time": "8:15:21", "remaining_time": "4:28:18"} +{"current_steps": 1650, "total_steps": 2536, "loss": 0.0753, "lr": 2.7519867981723712e-05, "epoch": 5.205047318611987, "percentage": 65.06, "elapsed_time": "8:16:32", "remaining_time": "4:26:37"} +{"current_steps": 1655, "total_steps": 2536, "loss": 0.0787, "lr": 2.724224753181197e-05, "epoch": 5.220820189274448, "percentage": 65.26, "elapsed_time": "8:17:43", "remaining_time": "4:24:57"} +{"current_steps": 1660, "total_steps": 2536, "loss": 0.0817, "lr": 2.6965509213174777e-05, "epoch": 5.236593059936909, "percentage": 65.46, "elapsed_time": "8:18:53", "remaining_time": "4:23:16"} +{"current_steps": 1665, "total_steps": 2536, "loss": 0.0762, "lr": 2.6689663752685334e-05, "epoch": 5.252365930599369, "percentage": 65.65, "elapsed_time": "8:20:01", "remaining_time": "4:21:34"} +{"current_steps": 1670, "total_steps": 2536, "loss": 0.0721, "lr": 2.641472184260809e-05, "epoch": 5.26813880126183, "percentage": 65.85, "elapsed_time": "8:21:13", "remaining_time": "4:19:55"} +{"current_steps": 1675, "total_steps": 2536, "loss": 0.0765, "lr": 2.614069414018428e-05, "epoch": 5.28391167192429, "percentage": 66.05, "elapsed_time": "8:22:22", "remaining_time": "4:18:13"} +{"current_steps": 1680, "total_steps": 2536, "loss": 0.0729, "lr": 2.5867591267218805e-05, "epoch": 5.299684542586751, "percentage": 66.25, "elapsed_time": "8:23:28", "remaining_time": "4:16:31"} +{"current_steps": 1685, "total_steps": 2536, "loss": 0.0753, "lr": 2.5595423809668452e-05, "epoch": 5.315457413249211, "percentage": 66.44, "elapsed_time": "8:24:38", "remaining_time": "4:14:52"} +{"current_steps": 1690, "total_steps": 2536, "loss": 0.0776, "lr": 2.532420231723172e-05, "epoch": 5.331230283911672, "percentage": 66.64, "elapsed_time": "8:25:50", "remaining_time": "4:13:13"} +{"current_steps": 1695, "total_steps": 2536, "loss": 0.0753, "lr": 2.5053937302939767e-05, "epoch": 5.347003154574132, "percentage": 66.84, "elapsed_time": "8:26:59", "remaining_time": "4:11:33"} +{"current_steps": 1700, "total_steps": 2536, "loss": 0.074, "lr": 2.4784639242748953e-05, "epoch": 5.3627760252365935, "percentage": 67.03, "elapsed_time": "8:28:06", "remaining_time": "4:09:52"} +{"current_steps": 1705, "total_steps": 2536, "loss": 0.0766, "lr": 2.451631857513472e-05, "epoch": 5.378548895899054, "percentage": 67.23, "elapsed_time": "8:41:24", "remaining_time": "4:14:07"} +{"current_steps": 1710, "total_steps": 2536, "loss": 0.0751, "lr": 2.4248985700687084e-05, "epoch": 5.394321766561514, "percentage": 67.43, "elapsed_time": "8:42:28", "remaining_time": "4:12:22"} +{"current_steps": 1715, "total_steps": 2536, "loss": 0.0762, "lr": 2.39826509817074e-05, "epoch": 5.410094637223975, "percentage": 67.63, "elapsed_time": "8:43:35", "remaining_time": "4:10:38"} +{"current_steps": 1720, "total_steps": 2536, "loss": 0.0802, "lr": 2.3717324741806718e-05, "epoch": 5.425867507886435, "percentage": 67.82, "elapsed_time": "8:44:44", "remaining_time": "4:08:56"} +{"current_steps": 1725, "total_steps": 2536, "loss": 0.0775, "lr": 2.3453017265505673e-05, "epoch": 5.441640378548896, "percentage": 68.02, "elapsed_time": "8:45:56", "remaining_time": "4:07:15"} +{"current_steps": 1730, "total_steps": 2536, "loss": 0.0759, "lr": 2.3189738797835708e-05, "epoch": 5.457413249211356, "percentage": 68.22, "elapsed_time": "8:46:57", "remaining_time": "4:05:30"} +{"current_steps": 1735, "total_steps": 2536, "loss": 0.0756, "lr": 2.292749954394216e-05, "epoch": 5.473186119873817, "percentage": 68.41, "elapsed_time": "8:48:03", "remaining_time": "4:03:47"} +{"current_steps": 1740, "total_steps": 2536, "loss": 0.0752, "lr": 2.266630966868852e-05, "epoch": 5.488958990536277, "percentage": 68.61, "elapsed_time": "8:49:11", "remaining_time": "4:02:05"} +{"current_steps": 1745, "total_steps": 2536, "loss": 0.0774, "lr": 2.2406179296262453e-05, "epoch": 5.504731861198739, "percentage": 68.81, "elapsed_time": "8:50:19", "remaining_time": "4:00:23"} +{"current_steps": 1750, "total_steps": 2536, "loss": 0.0722, "lr": 2.2147118509783445e-05, "epoch": 5.520504731861199, "percentage": 69.01, "elapsed_time": "8:51:28", "remaining_time": "3:58:42"} +{"current_steps": 1755, "total_steps": 2536, "loss": 0.0747, "lr": 2.1889137350911894e-05, "epoch": 5.5362776025236595, "percentage": 69.2, "elapsed_time": "8:52:32", "remaining_time": "3:56:59"} +{"current_steps": 1760, "total_steps": 2536, "loss": 0.0747, "lr": 2.1632245819459913e-05, "epoch": 5.55205047318612, "percentage": 69.4, "elapsed_time": "8:53:37", "remaining_time": "3:55:16"} +{"current_steps": 1765, "total_steps": 2536, "loss": 0.0788, "lr": 2.1376453873003664e-05, "epoch": 5.5678233438485805, "percentage": 69.6, "elapsed_time": "8:54:46", "remaining_time": "3:53:36"} +{"current_steps": 1770, "total_steps": 2536, "loss": 0.0817, "lr": 2.112177142649746e-05, "epoch": 5.583596214511041, "percentage": 69.79, "elapsed_time": "8:55:56", "remaining_time": "3:51:56"} +{"current_steps": 1775, "total_steps": 2536, "loss": 0.0799, "lr": 2.0868208351889402e-05, "epoch": 5.599369085173501, "percentage": 69.99, "elapsed_time": "8:57:07", "remaining_time": "3:50:16"} +{"current_steps": 1780, "total_steps": 2536, "loss": 0.0725, "lr": 2.0615774477738738e-05, "epoch": 5.615141955835962, "percentage": 70.19, "elapsed_time": "8:58:10", "remaining_time": "3:48:34"} +{"current_steps": 1785, "total_steps": 2536, "loss": 0.0791, "lr": 2.0364479588834835e-05, "epoch": 5.630914826498422, "percentage": 70.39, "elapsed_time": "8:59:19", "remaining_time": "3:46:54"} +{"current_steps": 1790, "total_steps": 2536, "loss": 0.0793, "lr": 2.0114333425817993e-05, "epoch": 5.646687697160884, "percentage": 70.58, "elapsed_time": "9:00:25", "remaining_time": "3:45:13"} +{"current_steps": 1795, "total_steps": 2536, "loss": 0.0734, "lr": 1.9865345684801846e-05, "epoch": 5.662460567823344, "percentage": 70.78, "elapsed_time": "9:01:32", "remaining_time": "3:43:33"} +{"current_steps": 1800, "total_steps": 2536, "loss": 0.0779, "lr": 1.9617526016997486e-05, "epoch": 5.678233438485805, "percentage": 70.98, "elapsed_time": "9:02:44", "remaining_time": "3:41:55"} +{"current_steps": 1805, "total_steps": 2536, "loss": 0.0785, "lr": 1.937088402833943e-05, "epoch": 5.694006309148265, "percentage": 71.18, "elapsed_time": "9:13:32", "remaining_time": "3:44:10"} +{"current_steps": 1810, "total_steps": 2536, "loss": 0.0784, "lr": 1.9125429279113173e-05, "epoch": 5.709779179810726, "percentage": 71.37, "elapsed_time": "9:14:43", "remaining_time": "3:42:30"} +{"current_steps": 1815, "total_steps": 2536, "loss": 0.0803, "lr": 1.8881171283584752e-05, "epoch": 5.725552050473186, "percentage": 71.57, "elapsed_time": "9:15:51", "remaining_time": "3:40:48"} +{"current_steps": 1820, "total_steps": 2536, "loss": 0.0797, "lr": 1.8638119509631853e-05, "epoch": 5.7413249211356465, "percentage": 71.77, "elapsed_time": "9:17:00", "remaining_time": "3:39:07"} +{"current_steps": 1825, "total_steps": 2536, "loss": 0.0799, "lr": 1.839628337837686e-05, "epoch": 5.757097791798107, "percentage": 71.96, "elapsed_time": "9:18:12", "remaining_time": "3:37:28"} +{"current_steps": 1830, "total_steps": 2536, "loss": 0.078, "lr": 1.8155672263821666e-05, "epoch": 5.7728706624605675, "percentage": 72.16, "elapsed_time": "9:19:21", "remaining_time": "3:35:47"} +{"current_steps": 1835, "total_steps": 2536, "loss": 0.0786, "lr": 1.7916295492484315e-05, "epoch": 5.788643533123029, "percentage": 72.36, "elapsed_time": "9:20:26", "remaining_time": "3:34:05"} +{"current_steps": 1840, "total_steps": 2536, "loss": 0.0739, "lr": 1.7678162343037524e-05, "epoch": 5.804416403785489, "percentage": 72.56, "elapsed_time": "9:21:36", "remaining_time": "3:32:25"} +{"current_steps": 1845, "total_steps": 2536, "loss": 0.0744, "lr": 1.744128204594893e-05, "epoch": 5.82018927444795, "percentage": 72.75, "elapsed_time": "9:22:45", "remaining_time": "3:30:45"} +{"current_steps": 1850, "total_steps": 2536, "loss": 0.078, "lr": 1.7205663783123436e-05, "epoch": 5.83596214511041, "percentage": 72.95, "elapsed_time": "9:23:52", "remaining_time": "3:29:05"} +{"current_steps": 1855, "total_steps": 2536, "loss": 0.0772, "lr": 1.6971316687547213e-05, "epoch": 5.851735015772871, "percentage": 73.15, "elapsed_time": "9:25:00", "remaining_time": "3:27:25"} +{"current_steps": 1860, "total_steps": 2536, "loss": 0.074, "lr": 1.6738249842933697e-05, "epoch": 5.867507886435331, "percentage": 73.34, "elapsed_time": "9:26:08", "remaining_time": "3:25:45"} +{"current_steps": 1865, "total_steps": 2536, "loss": 0.0787, "lr": 1.6506472283371527e-05, "epoch": 5.883280757097792, "percentage": 73.54, "elapsed_time": "9:27:15", "remaining_time": "3:24:05"} +{"current_steps": 1870, "total_steps": 2536, "loss": 0.0752, "lr": 1.6275992992974308e-05, "epoch": 5.899053627760252, "percentage": 73.74, "elapsed_time": "9:28:24", "remaining_time": "3:22:26"} +{"current_steps": 1875, "total_steps": 2536, "loss": 0.0749, "lr": 1.604682090553243e-05, "epoch": 5.914826498422713, "percentage": 73.94, "elapsed_time": "9:29:35", "remaining_time": "3:20:47"} +{"current_steps": 1880, "total_steps": 2536, "loss": 0.0753, "lr": 1.5818964904166756e-05, "epoch": 5.930599369085174, "percentage": 74.13, "elapsed_time": "9:30:43", "remaining_time": "3:19:08"} +{"current_steps": 1885, "total_steps": 2536, "loss": 0.0734, "lr": 1.55924338209843e-05, "epoch": 5.946372239747634, "percentage": 74.33, "elapsed_time": "9:31:51", "remaining_time": "3:17:29"} +{"current_steps": 1890, "total_steps": 2536, "loss": 0.0759, "lr": 1.536723643673582e-05, "epoch": 5.962145110410095, "percentage": 74.53, "elapsed_time": "9:33:01", "remaining_time": "3:15:51"} +{"current_steps": 1895, "total_steps": 2536, "loss": 0.0752, "lr": 1.5143381480475583e-05, "epoch": 5.977917981072555, "percentage": 74.72, "elapsed_time": "9:34:09", "remaining_time": "3:14:12"} +{"current_steps": 1900, "total_steps": 2536, "loss": 0.0704, "lr": 1.49208776292229e-05, "epoch": 5.993690851735016, "percentage": 74.92, "elapsed_time": "9:35:09", "remaining_time": "3:12:31"} +{"current_steps": 1905, "total_steps": 2536, "loss": 0.0483, "lr": 1.4699733507625862e-05, "epoch": 6.009463722397476, "percentage": 75.12, "elapsed_time": "9:41:59", "remaining_time": "3:12:46"} +{"current_steps": 1910, "total_steps": 2536, "loss": 0.0406, "lr": 1.4479957687626933e-05, "epoch": 6.025236593059937, "percentage": 75.32, "elapsed_time": "9:43:05", "remaining_time": "3:11:06"} +{"current_steps": 1915, "total_steps": 2536, "loss": 0.0401, "lr": 1.4261558688130838e-05, "epoch": 6.041009463722397, "percentage": 75.51, "elapsed_time": "9:44:14", "remaining_time": "3:09:27"} +{"current_steps": 1920, "total_steps": 2536, "loss": 0.0369, "lr": 1.4044544974674246e-05, "epoch": 6.056782334384858, "percentage": 75.71, "elapsed_time": "9:45:20", "remaining_time": "3:07:47"} +{"current_steps": 1925, "total_steps": 2536, "loss": 0.0374, "lr": 1.3828924959097612e-05, "epoch": 6.072555205047319, "percentage": 75.91, "elapsed_time": "9:46:33", "remaining_time": "3:06:10"} +{"current_steps": 1930, "total_steps": 2536, "loss": 0.0351, "lr": 1.3614706999219213e-05, "epoch": 6.0883280757097795, "percentage": 76.1, "elapsed_time": "9:47:43", "remaining_time": "3:04:32"} +{"current_steps": 1935, "total_steps": 2536, "loss": 0.0385, "lr": 1.340189939851112e-05, "epoch": 6.10410094637224, "percentage": 76.3, "elapsed_time": "9:48:50", "remaining_time": "3:02:53"} +{"current_steps": 1940, "total_steps": 2536, "loss": 0.0369, "lr": 1.3190510405777345e-05, "epoch": 6.1198738170347005, "percentage": 76.5, "elapsed_time": "9:50:04", "remaining_time": "3:01:16"} +{"current_steps": 1945, "total_steps": 2536, "loss": 0.037, "lr": 1.2980548214834142e-05, "epoch": 6.135646687697161, "percentage": 76.7, "elapsed_time": "9:51:11", "remaining_time": "2:59:38"} +{"current_steps": 1950, "total_steps": 2536, "loss": 0.0363, "lr": 1.2772020964192316e-05, "epoch": 6.151419558359621, "percentage": 76.89, "elapsed_time": "9:52:14", "remaining_time": "2:57:58"} +{"current_steps": 1955, "total_steps": 2536, "loss": 0.0392, "lr": 1.2564936736741867e-05, "epoch": 6.167192429022082, "percentage": 77.09, "elapsed_time": "9:53:19", "remaining_time": "2:56:19"} +{"current_steps": 1960, "total_steps": 2536, "loss": 0.0384, "lr": 1.23593035594386e-05, "epoch": 6.182965299684542, "percentage": 77.29, "elapsed_time": "9:54:22", "remaining_time": "2:54:40"} +{"current_steps": 1965, "total_steps": 2536, "loss": 0.0382, "lr": 1.215512940299305e-05, "epoch": 6.198738170347003, "percentage": 77.48, "elapsed_time": "9:55:20", "remaining_time": "2:52:59"} +{"current_steps": 1970, "total_steps": 2536, "loss": 0.0376, "lr": 1.1952422181561424e-05, "epoch": 6.214511041009464, "percentage": 77.68, "elapsed_time": "9:56:33", "remaining_time": "2:51:23"} +{"current_steps": 1975, "total_steps": 2536, "loss": 0.0374, "lr": 1.1751189752438957e-05, "epoch": 6.230283911671925, "percentage": 77.88, "elapsed_time": "9:57:43", "remaining_time": "2:49:47"} +{"current_steps": 1980, "total_steps": 2536, "loss": 0.0378, "lr": 1.1551439915755274e-05, "epoch": 6.246056782334385, "percentage": 78.08, "elapsed_time": "9:58:52", "remaining_time": "2:48:10"} +{"current_steps": 1985, "total_steps": 2536, "loss": 0.0374, "lr": 1.135318041417207e-05, "epoch": 6.261829652996846, "percentage": 78.27, "elapsed_time": "10:00:00", "remaining_time": "2:46:33"} +{"current_steps": 1990, "total_steps": 2536, "loss": 0.0365, "lr": 1.1156418932582941e-05, "epoch": 6.277602523659306, "percentage": 78.47, "elapsed_time": "10:01:05", "remaining_time": "2:44:55"} +{"current_steps": 1995, "total_steps": 2536, "loss": 0.0411, "lr": 1.096116309781558e-05, "epoch": 6.2933753943217665, "percentage": 78.67, "elapsed_time": "10:02:16", "remaining_time": "2:43:19"} +{"current_steps": 2000, "total_steps": 2536, "loss": 0.0378, "lr": 1.0767420478336093e-05, "epoch": 6.309148264984227, "percentage": 78.86, "elapsed_time": "10:03:28", "remaining_time": "2:41:43"} +{"current_steps": 2005, "total_steps": 2536, "loss": 0.0384, "lr": 1.0575198583955698e-05, "epoch": 6.3249211356466875, "percentage": 79.06, "elapsed_time": "10:11:55", "remaining_time": "2:42:03"} +{"current_steps": 2010, "total_steps": 2536, "loss": 0.0352, "lr": 1.0384504865539497e-05, "epoch": 6.340694006309148, "percentage": 79.26, "elapsed_time": "10:13:04", "remaining_time": "2:40:26"} +{"current_steps": 2015, "total_steps": 2536, "loss": 0.0387, "lr": 1.0195346714717813e-05, "epoch": 6.356466876971609, "percentage": 79.46, "elapsed_time": "10:14:14", "remaining_time": "2:38:49"} +{"current_steps": 2020, "total_steps": 2536, "loss": 0.0396, "lr": 1.0007731463599601e-05, "epoch": 6.37223974763407, "percentage": 79.65, "elapsed_time": "10:15:21", "remaining_time": "2:37:11"} +{"current_steps": 2025, "total_steps": 2536, "loss": 0.0367, "lr": 9.82166638448827e-06, "epoch": 6.38801261829653, "percentage": 79.85, "elapsed_time": "10:16:27", "remaining_time": "2:35:33"} +{"current_steps": 2030, "total_steps": 2536, "loss": 0.0365, "lr": 9.637158689599746e-06, "epoch": 6.403785488958991, "percentage": 80.05, "elapsed_time": "10:17:39", "remaining_time": "2:33:57"} +{"current_steps": 2035, "total_steps": 2536, "loss": 0.0422, "lr": 9.454215530782994e-06, "epoch": 6.419558359621451, "percentage": 80.24, "elapsed_time": "10:18:47", "remaining_time": "2:32:20"} +{"current_steps": 2040, "total_steps": 2536, "loss": 0.0409, "lr": 9.272843999242736e-06, "epoch": 6.435331230283912, "percentage": 80.44, "elapsed_time": "10:19:58", "remaining_time": "2:30:44"} +{"current_steps": 2045, "total_steps": 2536, "loss": 0.0382, "lr": 9.093051125264623e-06, "epoch": 6.451104100946372, "percentage": 80.64, "elapsed_time": "10:21:01", "remaining_time": "2:29:06"} +{"current_steps": 2050, "total_steps": 2536, "loss": 0.0399, "lr": 8.91484387794267e-06, "epoch": 6.466876971608833, "percentage": 80.84, "elapsed_time": "10:22:08", "remaining_time": "2:27:29"} +{"current_steps": 2055, "total_steps": 2536, "loss": 0.0357, "lr": 8.73822916490919e-06, "epoch": 6.482649842271293, "percentage": 81.03, "elapsed_time": "10:23:18", "remaining_time": "2:25:53"} +{"current_steps": 2060, "total_steps": 2536, "loss": 0.0371, "lr": 8.563213832067014e-06, "epoch": 6.498422712933754, "percentage": 81.23, "elapsed_time": "10:24:25", "remaining_time": "2:24:17"} +{"current_steps": 2065, "total_steps": 2536, "loss": 0.0394, "lr": 8.389804663324142e-06, "epoch": 6.514195583596215, "percentage": 81.43, "elapsed_time": "10:25:32", "remaining_time": "2:22:40"} +{"current_steps": 2070, "total_steps": 2536, "loss": 0.037, "lr": 8.218008380330723e-06, "epoch": 6.529968454258675, "percentage": 81.62, "elapsed_time": "10:26:34", "remaining_time": "2:21:03"} +{"current_steps": 2075, "total_steps": 2536, "loss": 0.0393, "lr": 8.047831642218611e-06, "epoch": 6.545741324921136, "percentage": 81.82, "elapsed_time": "10:27:40", "remaining_time": "2:19:27"} +{"current_steps": 2080, "total_steps": 2536, "loss": 0.037, "lr": 7.879281045343184e-06, "epoch": 6.561514195583596, "percentage": 82.02, "elapsed_time": "10:28:48", "remaining_time": "2:17:51"} +{"current_steps": 2085, "total_steps": 2536, "loss": 0.0374, "lr": 7.712363123027678e-06, "epoch": 6.577287066246057, "percentage": 82.22, "elapsed_time": "10:29:53", "remaining_time": "2:16:14"} +{"current_steps": 2090, "total_steps": 2536, "loss": 0.036, "lr": 7.547084345309924e-06, "epoch": 6.593059936908517, "percentage": 82.41, "elapsed_time": "10:31:00", "remaining_time": "2:14:39"} +{"current_steps": 2095, "total_steps": 2536, "loss": 0.0382, "lr": 7.383451118691576e-06, "epoch": 6.608832807570978, "percentage": 82.61, "elapsed_time": "10:32:08", "remaining_time": "2:13:04"} +{"current_steps": 2100, "total_steps": 2536, "loss": 0.036, "lr": 7.221469785889784e-06, "epoch": 6.624605678233438, "percentage": 82.81, "elapsed_time": "10:33:17", "remaining_time": "2:11:28"} +{"current_steps": 2105, "total_steps": 2536, "loss": 0.0369, "lr": 7.061146625591331e-06, "epoch": 6.6403785488958995, "percentage": 83.0, "elapsed_time": "10:38:55", "remaining_time": "2:10:49"} +{"current_steps": 2110, "total_steps": 2536, "loss": 0.0376, "lr": 6.902487852209238e-06, "epoch": 6.65615141955836, "percentage": 83.2, "elapsed_time": "10:40:00", "remaining_time": "2:09:12"} +{"current_steps": 2115, "total_steps": 2536, "loss": 0.0399, "lr": 6.7454996156419485e-06, "epoch": 6.6719242902208205, "percentage": 83.4, "elapsed_time": "10:41:12", "remaining_time": "2:07:38"} +{"current_steps": 2120, "total_steps": 2536, "loss": 0.0377, "lr": 6.590188001034864e-06, "epoch": 6.687697160883281, "percentage": 83.6, "elapsed_time": "10:42:23", "remaining_time": "2:06:03"} +{"current_steps": 2125, "total_steps": 2536, "loss": 0.0353, "lr": 6.436559028544559e-06, "epoch": 6.703470031545741, "percentage": 83.79, "elapsed_time": "10:43:32", "remaining_time": "2:04:28"} +{"current_steps": 2130, "total_steps": 2536, "loss": 0.0375, "lr": 6.284618653105328e-06, "epoch": 6.719242902208202, "percentage": 83.99, "elapsed_time": "10:44:40", "remaining_time": "2:02:52"} +{"current_steps": 2135, "total_steps": 2536, "loss": 0.0353, "lr": 6.134372764198465e-06, "epoch": 6.735015772870662, "percentage": 84.19, "elapsed_time": "10:45:47", "remaining_time": "2:01:17"} +{"current_steps": 2140, "total_steps": 2536, "loss": 0.038, "lr": 5.985827185623899e-06, "epoch": 6.750788643533123, "percentage": 84.38, "elapsed_time": "10:46:51", "remaining_time": "1:59:41"} +{"current_steps": 2145, "total_steps": 2536, "loss": 0.0382, "lr": 5.8389876752745045e-06, "epoch": 6.766561514195583, "percentage": 84.58, "elapsed_time": "10:48:01", "remaining_time": "1:58:07"} +{"current_steps": 2150, "total_steps": 2536, "loss": 0.0355, "lr": 5.693859924912892e-06, "epoch": 6.782334384858045, "percentage": 84.78, "elapsed_time": "10:49:12", "remaining_time": "1:56:33"} +{"current_steps": 2155, "total_steps": 2536, "loss": 0.039, "lr": 5.550449559950755e-06, "epoch": 6.798107255520505, "percentage": 84.98, "elapsed_time": "10:51:22", "remaining_time": "1:55:09"} +{"current_steps": 2160, "total_steps": 2536, "loss": 0.0391, "lr": 5.408762139230888e-06, "epoch": 6.813880126182966, "percentage": 85.17, "elapsed_time": "10:52:33", "remaining_time": "1:53:35"} +{"current_steps": 2165, "total_steps": 2536, "loss": 0.0365, "lr": 5.268803154811669e-06, "epoch": 6.829652996845426, "percentage": 85.37, "elapsed_time": "10:53:43", "remaining_time": "1:52:01"} +{"current_steps": 2170, "total_steps": 2536, "loss": 0.0381, "lr": 5.1305780317541855e-06, "epoch": 6.8454258675078865, "percentage": 85.57, "elapsed_time": "10:54:49", "remaining_time": "1:50:26"} +{"current_steps": 2175, "total_steps": 2536, "loss": 0.0388, "lr": 4.99409212791192e-06, "epoch": 6.861198738170347, "percentage": 85.76, "elapsed_time": "10:56:00", "remaining_time": "1:48:52"} +{"current_steps": 2180, "total_steps": 2536, "loss": 0.0365, "lr": 4.8593507337231666e-06, "epoch": 6.8769716088328074, "percentage": 85.96, "elapsed_time": "10:57:11", "remaining_time": "1:47:19"} +{"current_steps": 2185, "total_steps": 2536, "loss": 0.0375, "lr": 4.726359072005859e-06, "epoch": 6.892744479495268, "percentage": 86.16, "elapsed_time": "10:58:20", "remaining_time": "1:45:45"} +{"current_steps": 2190, "total_steps": 2536, "loss": 0.0347, "lr": 4.5951222977551444e-06, "epoch": 6.908517350157728, "percentage": 86.36, "elapsed_time": "10:59:27", "remaining_time": "1:44:11"} +{"current_steps": 2195, "total_steps": 2536, "loss": 0.0382, "lr": 4.465645497943621e-06, "epoch": 6.92429022082019, "percentage": 86.55, "elapsed_time": "11:00:37", "remaining_time": "1:42:37"} +{"current_steps": 2200, "total_steps": 2536, "loss": 0.0359, "lr": 4.337933691324109e-06, "epoch": 6.94006309148265, "percentage": 86.75, "elapsed_time": "11:01:43", "remaining_time": "1:41:03"} +{"current_steps": 2205, "total_steps": 2536, "loss": 0.0378, "lr": 4.21199182823514e-06, "epoch": 6.955835962145111, "percentage": 86.95, "elapsed_time": "11:10:14", "remaining_time": "1:40:36"} +{"current_steps": 2210, "total_steps": 2536, "loss": 0.039, "lr": 4.08782479040905e-06, "epoch": 6.971608832807571, "percentage": 87.15, "elapsed_time": "11:11:23", "remaining_time": "1:39:02"} +{"current_steps": 2215, "total_steps": 2536, "loss": 0.0362, "lr": 3.9654373907827665e-06, "epoch": 6.987381703470032, "percentage": 87.34, "elapsed_time": "11:12:35", "remaining_time": "1:37:28"} +{"current_steps": 2220, "total_steps": 2536, "loss": 0.0306, "lr": 3.844834373311257e-06, "epoch": 7.003154574132492, "percentage": 87.54, "elapsed_time": "11:13:39", "remaining_time": "1:35:53"} +{"current_steps": 2225, "total_steps": 2536, "loss": 0.0226, "lr": 3.7260204127836316e-06, "epoch": 7.018927444794953, "percentage": 87.74, "elapsed_time": "11:14:43", "remaining_time": "1:34:18"} +{"current_steps": 2230, "total_steps": 2536, "loss": 0.0214, "lr": 3.609000114641964e-06, "epoch": 7.034700315457413, "percentage": 87.93, "elapsed_time": "11:15:52", "remaining_time": "1:32:44"} +{"current_steps": 2235, "total_steps": 2536, "loss": 0.0202, "lr": 3.4937780148027344e-06, "epoch": 7.0504731861198735, "percentage": 88.13, "elapsed_time": "11:17:01", "remaining_time": "1:31:10"} +{"current_steps": 2240, "total_steps": 2536, "loss": 0.0204, "lr": 3.3803585794810466e-06, "epoch": 7.066246056782334, "percentage": 88.33, "elapsed_time": "11:18:05", "remaining_time": "1:29:36"} +{"current_steps": 2245, "total_steps": 2536, "loss": 0.0213, "lr": 3.2687462050175034e-06, "epoch": 7.082018927444795, "percentage": 88.53, "elapsed_time": "11:19:13", "remaining_time": "1:28:02"} +{"current_steps": 2250, "total_steps": 2536, "loss": 0.0197, "lr": 3.1589452177077815e-06, "epoch": 7.097791798107256, "percentage": 88.72, "elapsed_time": "11:20:23", "remaining_time": "1:26:29"} +{"current_steps": 2255, "total_steps": 2536, "loss": 0.0213, "lr": 3.0509598736349343e-06, "epoch": 7.113564668769716, "percentage": 88.92, "elapsed_time": "11:21:32", "remaining_time": "1:24:55"} +{"current_steps": 2260, "total_steps": 2536, "loss": 0.0218, "lr": 2.9447943585044545e-06, "epoch": 7.129337539432177, "percentage": 89.12, "elapsed_time": "11:22:44", "remaining_time": "1:23:22"} +{"current_steps": 2265, "total_steps": 2536, "loss": 0.021, "lr": 2.840452787481979e-06, "epoch": 7.145110410094637, "percentage": 89.31, "elapsed_time": "11:23:50", "remaining_time": "1:21:49"} +{"current_steps": 2270, "total_steps": 2536, "loss": 0.0211, "lr": 2.7379392050338236e-06, "epoch": 7.160883280757098, "percentage": 89.51, "elapsed_time": "11:25:00", "remaining_time": "1:20:16"} +{"current_steps": 2275, "total_steps": 2536, "loss": 0.0202, "lr": 2.63725758477017e-06, "epoch": 7.176656151419558, "percentage": 89.71, "elapsed_time": "11:26:04", "remaining_time": "1:18:42"} +{"current_steps": 2280, "total_steps": 2536, "loss": 0.0214, "lr": 2.5384118292910818e-06, "epoch": 7.192429022082019, "percentage": 89.91, "elapsed_time": "11:27:08", "remaining_time": "1:17:09"} +{"current_steps": 2285, "total_steps": 2536, "loss": 0.0211, "lr": 2.4414057700351934e-06, "epoch": 7.208201892744479, "percentage": 90.1, "elapsed_time": "11:28:12", "remaining_time": "1:15:35"} +{"current_steps": 2290, "total_steps": 2536, "loss": 0.0232, "lr": 2.34624316713124e-06, "epoch": 7.2239747634069404, "percentage": 90.3, "elapsed_time": "11:29:23", "remaining_time": "1:14:03"} +{"current_steps": 2295, "total_steps": 2536, "loss": 0.0203, "lr": 2.2529277092522503e-06, "epoch": 7.239747634069401, "percentage": 90.5, "elapsed_time": "11:30:32", "remaining_time": "1:12:30"} +{"current_steps": 2300, "total_steps": 2536, "loss": 0.0209, "lr": 2.1614630134726367e-06, "epoch": 7.255520504731861, "percentage": 90.69, "elapsed_time": "11:31:37", "remaining_time": "1:10:58"} +{"current_steps": 2305, "total_steps": 2536, "loss": 0.0193, "lr": 2.0718526251279346e-06, "epoch": 7.271293375394322, "percentage": 90.89, "elapsed_time": "11:36:32", "remaining_time": "1:09:48"} +{"current_steps": 2310, "total_steps": 2536, "loss": 0.0191, "lr": 1.9841000176774148e-06, "epoch": 7.287066246056782, "percentage": 91.09, "elapsed_time": "11:37:38", "remaining_time": "1:08:15"} +{"current_steps": 2315, "total_steps": 2536, "loss": 0.0196, "lr": 1.898208592569406e-06, "epoch": 7.302839116719243, "percentage": 91.29, "elapsed_time": "11:39:29", "remaining_time": "1:06:46"} +{"current_steps": 2320, "total_steps": 2536, "loss": 0.0206, "lr": 1.8141816791095e-06, "epoch": 7.318611987381703, "percentage": 91.48, "elapsed_time": "11:40:34", "remaining_time": "1:05:13"} +{"current_steps": 2325, "total_steps": 2536, "loss": 0.0225, "lr": 1.7320225343314566e-06, "epoch": 7.334384858044164, "percentage": 91.68, "elapsed_time": "11:41:41", "remaining_time": "1:03:40"} +{"current_steps": 2330, "total_steps": 2536, "loss": 0.0216, "lr": 1.6517343428709975e-06, "epoch": 7.350157728706624, "percentage": 91.88, "elapsed_time": "11:42:51", "remaining_time": "1:02:08"} +{"current_steps": 2335, "total_steps": 2536, "loss": 0.02, "lr": 1.5733202168423055e-06, "epoch": 7.365930599369086, "percentage": 92.07, "elapsed_time": "11:44:03", "remaining_time": "1:00:36"} +{"current_steps": 2340, "total_steps": 2536, "loss": 0.022, "lr": 1.4967831957174606e-06, "epoch": 7.381703470031546, "percentage": 92.27, "elapsed_time": "11:45:10", "remaining_time": "0:59:03"} +{"current_steps": 2345, "total_steps": 2536, "loss": 0.0201, "lr": 1.4221262462085715e-06, "epoch": 7.3974763406940065, "percentage": 92.47, "elapsed_time": "11:46:16", "remaining_time": "0:57:31"} +{"current_steps": 2350, "total_steps": 2536, "loss": 0.0213, "lr": 1.3493522621528088e-06, "epoch": 7.413249211356467, "percentage": 92.67, "elapsed_time": "11:47:22", "remaining_time": "0:55:59"} +{"current_steps": 2355, "total_steps": 2536, "loss": 0.0232, "lr": 1.2784640644002366e-06, "epoch": 7.429022082018927, "percentage": 92.86, "elapsed_time": "11:48:34", "remaining_time": "0:54:27"} +{"current_steps": 2360, "total_steps": 2536, "loss": 0.0215, "lr": 1.209464400704452e-06, "epoch": 7.444794952681388, "percentage": 93.06, "elapsed_time": "11:49:40", "remaining_time": "0:52:55"} +{"current_steps": 2365, "total_steps": 2536, "loss": 0.0202, "lr": 1.1423559456160803e-06, "epoch": 7.460567823343848, "percentage": 93.26, "elapsed_time": "11:50:49", "remaining_time": "0:51:23"} +{"current_steps": 2370, "total_steps": 2536, "loss": 0.0203, "lr": 1.0771413003791253e-06, "epoch": 7.476340694006309, "percentage": 93.45, "elapsed_time": "11:51:56", "remaining_time": "0:49:51"} +{"current_steps": 2375, "total_steps": 2536, "loss": 0.019, "lr": 1.0138229928301212e-06, "epoch": 7.492113564668769, "percentage": 93.65, "elapsed_time": "11:53:06", "remaining_time": "0:48:20"} +{"current_steps": 2380, "total_steps": 2536, "loss": 0.0209, "lr": 9.524034773001511e-07, "epoch": 7.50788643533123, "percentage": 93.85, "elapsed_time": "11:54:12", "remaining_time": "0:46:48"} +{"current_steps": 2385, "total_steps": 2536, "loss": 0.0212, "lr": 8.928851345197165e-07, "epoch": 7.523659305993691, "percentage": 94.05, "elapsed_time": "11:55:17", "remaining_time": "0:45:17"} +{"current_steps": 2390, "total_steps": 2536, "loss": 0.0211, "lr": 8.352702715264726e-07, "epoch": 7.539432176656152, "percentage": 94.24, "elapsed_time": "11:56:21", "remaining_time": "0:43:45"} +{"current_steps": 2395, "total_steps": 2536, "loss": 0.0207, "lr": 7.795611215757615e-07, "epoch": 7.555205047318612, "percentage": 94.44, "elapsed_time": "11:57:28", "remaining_time": "0:42:14"} +{"current_steps": 2400, "total_steps": 2536, "loss": 0.019, "lr": 7.257598440540802e-07, "epoch": 7.570977917981073, "percentage": 94.64, "elapsed_time": "11:58:34", "remaining_time": "0:40:43"} +{"current_steps": 2405, "total_steps": 2536, "loss": 0.022, "lr": 6.738685243953769e-07, "epoch": 7.586750788643533, "percentage": 94.83, "elapsed_time": "12:06:03", "remaining_time": "0:39:32"} +{"current_steps": 2410, "total_steps": 2536, "loss": 0.0181, "lr": 6.238891740002195e-07, "epoch": 7.6025236593059935, "percentage": 95.03, "elapsed_time": "12:07:12", "remaining_time": "0:38:01"} +{"current_steps": 2415, "total_steps": 2536, "loss": 0.0224, "lr": 5.758237301577874e-07, "epoch": 7.618296529968454, "percentage": 95.23, "elapsed_time": "12:08:22", "remaining_time": "0:36:29"} +{"current_steps": 2420, "total_steps": 2536, "loss": 0.0198, "lr": 5.296740559708413e-07, "epoch": 7.634069400630915, "percentage": 95.43, "elapsed_time": "12:09:32", "remaining_time": "0:34:58"} +{"current_steps": 2425, "total_steps": 2536, "loss": 0.0188, "lr": 4.854419402834709e-07, "epoch": 7.649842271293375, "percentage": 95.62, "elapsed_time": "12:10:42", "remaining_time": "0:33:26"} +{"current_steps": 2430, "total_steps": 2536, "loss": 0.0211, "lr": 4.431290976117497e-07, "epoch": 7.665615141955836, "percentage": 95.82, "elapsed_time": "12:11:46", "remaining_time": "0:31:55"} +{"current_steps": 2435, "total_steps": 2536, "loss": 0.0212, "lr": 4.0273716807731067e-07, "epoch": 7.681388012618297, "percentage": 96.02, "elapsed_time": "12:12:58", "remaining_time": "0:30:24"} +{"current_steps": 2440, "total_steps": 2536, "loss": 0.0221, "lr": 3.642677173437137e-07, "epoch": 7.697160883280757, "percentage": 96.21, "elapsed_time": "12:14:12", "remaining_time": "0:28:53"} +{"current_steps": 2445, "total_steps": 2536, "loss": 0.0201, "lr": 3.2772223655583857e-07, "epoch": 7.712933753943218, "percentage": 96.41, "elapsed_time": "12:15:15", "remaining_time": "0:27:21"} +{"current_steps": 2450, "total_steps": 2536, "loss": 0.0198, "lr": 2.9310214228202013e-07, "epoch": 7.728706624605678, "percentage": 96.61, "elapsed_time": "12:16:24", "remaining_time": "0:25:50"} +{"current_steps": 2455, "total_steps": 2536, "loss": 0.0208, "lr": 2.604087764591534e-07, "epoch": 7.744479495268139, "percentage": 96.81, "elapsed_time": "12:17:29", "remaining_time": "0:24:19"} +{"current_steps": 2460, "total_steps": 2536, "loss": 0.0203, "lr": 2.2964340634069603e-07, "epoch": 7.760252365930599, "percentage": 97.0, "elapsed_time": "12:18:34", "remaining_time": "0:22:49"} +{"current_steps": 2465, "total_steps": 2536, "loss": 0.0195, "lr": 2.0080722444754118e-07, "epoch": 7.7760252365930596, "percentage": 97.2, "elapsed_time": "12:19:44", "remaining_time": "0:21:18"} +{"current_steps": 2470, "total_steps": 2536, "loss": 0.0209, "lr": 1.7390134852177664e-07, "epoch": 7.79179810725552, "percentage": 97.4, "elapsed_time": "12:20:51", "remaining_time": "0:19:47"} +{"current_steps": 2475, "total_steps": 2536, "loss": 0.0222, "lr": 1.48926821483375e-07, "epoch": 7.807570977917981, "percentage": 97.59, "elapsed_time": "12:22:02", "remaining_time": "0:18:17"} +{"current_steps": 2480, "total_steps": 2536, "loss": 0.0194, "lr": 1.2588461138977604e-07, "epoch": 7.823343848580442, "percentage": 97.79, "elapsed_time": "12:23:11", "remaining_time": "0:16:46"} +{"current_steps": 2485, "total_steps": 2536, "loss": 0.0216, "lr": 1.0477561139832781e-07, "epoch": 7.839116719242902, "percentage": 97.99, "elapsed_time": "12:24:17", "remaining_time": "0:15:16"} +{"current_steps": 2490, "total_steps": 2536, "loss": 0.02, "lr": 8.560063973171439e-08, "epoch": 7.854889589905363, "percentage": 98.19, "elapsed_time": "12:25:22", "remaining_time": "0:13:46"} +{"current_steps": 2495, "total_steps": 2536, "loss": 0.0211, "lr": 6.836043964620342e-08, "epoch": 7.870662460567823, "percentage": 98.38, "elapsed_time": "12:26:28", "remaining_time": "0:12:16"} +{"current_steps": 2500, "total_steps": 2536, "loss": 0.0191, "lr": 5.3055679402846946e-08, "epoch": 7.886435331230284, "percentage": 98.58, "elapsed_time": "12:27:29", "remaining_time": "0:10:45"} +{"current_steps": 2505, "total_steps": 2536, "loss": 0.0216, "lr": 3.968695224158547e-08, "epoch": 7.902208201892744, "percentage": 98.78, "elapsed_time": "12:32:11", "remaining_time": "0:09:18"} +{"current_steps": 2510, "total_steps": 2536, "loss": 0.0198, "lr": 2.8254776358238588e-08, "epoch": 7.917981072555205, "percentage": 98.97, "elapsed_time": "12:33:19", "remaining_time": "0:07:48"} +{"current_steps": 2515, "total_steps": 2536, "loss": 0.0225, "lr": 1.8759594884443233e-08, "epoch": 7.933753943217665, "percentage": 99.17, "elapsed_time": "12:34:27", "remaining_time": "0:06:17"} +{"current_steps": 2520, "total_steps": 2536, "loss": 0.0215, "lr": 1.1201775870445242e-08, "epoch": 7.9495268138801265, "percentage": 99.37, "elapsed_time": "12:35:35", "remaining_time": "0:04:47"} +{"current_steps": 2525, "total_steps": 2536, "loss": 0.0197, "lr": 5.581612270855186e-09, "epoch": 7.965299684542587, "percentage": 99.57, "elapsed_time": "12:36:45", "remaining_time": "0:03:17"} +{"current_steps": 2530, "total_steps": 2536, "loss": 0.022, "lr": 1.8993219332907877e-09, "epoch": 7.981072555205047, "percentage": 99.76, "elapsed_time": "12:37:56", "remaining_time": "0:01:47"} +{"current_steps": 2535, "total_steps": 2536, "loss": 0.0198, "lr": 1.5504758992257451e-10, "epoch": 7.996845425867508, "percentage": 99.96, "elapsed_time": "12:39:03", "remaining_time": "0:00:17"} +{"current_steps": 2536, "total_steps": 2536, "epoch": 8.0, "percentage": 100.0, "elapsed_time": "12:43:06", "remaining_time": "0:00:00"} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..4bb7c8a --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,5624 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 8.0, + "eval_steps": 500, + "global_step": 2536, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.015772870662460567, + "grad_norm": 2.10712415764294, + "learning_rate": 3.0769230769230774e-05, + "loss": 0.8936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.8192116022109985, + "step": 5, + "valid_targets_mean": 3687.0, + "valid_targets_min": 817 + }, + { + "epoch": 0.031545741324921134, + "grad_norm": 0.9774661363114595, + "learning_rate": 6.923076923076924e-05, + "loss": 0.7352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.7449174523353577, + "step": 10, + "valid_targets_mean": 3634.9, + "valid_targets_min": 365 + }, + { + "epoch": 0.0473186119873817, + "grad_norm": 0.6675599503431875, + "learning_rate": 9.99999612380875e-05, + "loss": 0.6431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5866308212280273, + "step": 15, + "valid_targets_mean": 4114.3, + "valid_targets_min": 654 + }, + { + "epoch": 0.06309148264984227, + "grad_norm": 0.5597970310360976, + "learning_rate": 9.999860457746025e-05, + "loss": 0.6226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5945974588394165, + "step": 20, + "valid_targets_mean": 3899.3, + "valid_targets_min": 742 + }, + { + "epoch": 0.07886435331230283, + "grad_norm": 0.5130882597109143, + "learning_rate": 9.999530988130677e-05, + "loss": 0.6059, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6057475805282593, + "step": 25, + "valid_targets_mean": 3869.3, + "valid_targets_min": 1010 + }, + { + "epoch": 0.0946372239747634, + "grad_norm": 0.5029138300383583, + "learning_rate": 9.999007727733537e-05, + "loss": 0.5852, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6053914427757263, + "step": 30, + "valid_targets_mean": 4300.8, + "valid_targets_min": 446 + }, + { + "epoch": 0.11041009463722397, + "grad_norm": 0.5009233906967834, + "learning_rate": 9.998290696837115e-05, + "loss": 0.5905, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5980535745620728, + "step": 35, + "valid_targets_mean": 4093.8, + "valid_targets_min": 816 + }, + { + "epoch": 0.12618296529968454, + "grad_norm": 0.47286420993476336, + "learning_rate": 9.997379923234816e-05, + "loss": 0.5804, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5337224006652832, + "step": 40, + "valid_targets_mean": 3785.5, + "valid_targets_min": 910 + }, + { + "epoch": 0.14195583596214512, + "grad_norm": 0.49763693295108935, + "learning_rate": 9.996275442229857e-05, + "loss": 0.5623, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5855312347412109, + "step": 45, + "valid_targets_mean": 3848.8, + "valid_targets_min": 754 + }, + { + "epoch": 0.15772870662460567, + "grad_norm": 0.40793992020527864, + "learning_rate": 9.994977296633902e-05, + "loss": 0.5505, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5050753355026245, + "step": 50, + "valid_targets_mean": 4168.1, + "valid_targets_min": 1222 + }, + { + "epoch": 0.17350157728706625, + "grad_norm": 0.4312126166212651, + "learning_rate": 9.993485536765398e-05, + "loss": 0.5522, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5569537878036499, + "step": 55, + "valid_targets_mean": 3675.2, + "valid_targets_min": 1277 + }, + { + "epoch": 0.1892744479495268, + "grad_norm": 0.4778967389940008, + "learning_rate": 9.991800220447634e-05, + "loss": 0.5903, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.629382312297821, + "step": 60, + "valid_targets_mean": 3896.7, + "valid_targets_min": 642 + }, + { + "epoch": 0.20504731861198738, + "grad_norm": 0.4447384345869354, + "learning_rate": 9.989921413006489e-05, + "loss": 0.5609, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.490943044424057, + "step": 65, + "valid_targets_mean": 4355.4, + "valid_targets_min": 945 + }, + { + "epoch": 0.22082018927444794, + "grad_norm": 0.4939315288398232, + "learning_rate": 9.987849187267908e-05, + "loss": 0.5704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5496993064880371, + "step": 70, + "valid_targets_mean": 3723.2, + "valid_targets_min": 943 + }, + { + "epoch": 0.23659305993690852, + "grad_norm": 0.43230126284224585, + "learning_rate": 9.985583623555076e-05, + "loss": 0.5421, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5004186630249023, + "step": 75, + "valid_targets_mean": 4126.2, + "valid_targets_min": 1074 + }, + { + "epoch": 0.25236593059936907, + "grad_norm": 0.47213570082284406, + "learning_rate": 9.9831248096853e-05, + "loss": 0.5597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5496848821640015, + "step": 80, + "valid_targets_mean": 3556.9, + "valid_targets_min": 569 + }, + { + "epoch": 0.26813880126182965, + "grad_norm": 0.4284241766097971, + "learning_rate": 9.980472840966614e-05, + "loss": 0.507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.500241219997406, + "step": 85, + "valid_targets_mean": 3878.6, + "valid_targets_min": 474 + }, + { + "epoch": 0.28391167192429023, + "grad_norm": 0.4383271570844627, + "learning_rate": 9.977627820194082e-05, + "loss": 0.5632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5637352466583252, + "step": 90, + "valid_targets_mean": 3731.9, + "valid_targets_min": 1202 + }, + { + "epoch": 0.2996845425867508, + "grad_norm": 0.4834512693361993, + "learning_rate": 9.974589857645802e-05, + "loss": 0.5778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6166688203811646, + "step": 95, + "valid_targets_mean": 4275.2, + "valid_targets_min": 1037 + }, + { + "epoch": 0.31545741324921134, + "grad_norm": 0.41160421671893854, + "learning_rate": 9.97135907107865e-05, + "loss": 0.5465, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5364589691162109, + "step": 100, + "valid_targets_mean": 4273.6, + "valid_targets_min": 885 + }, + { + "epoch": 0.3312302839116719, + "grad_norm": 0.45494806684450917, + "learning_rate": 9.967935585723706e-05, + "loss": 0.5122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5046274065971375, + "step": 105, + "valid_targets_mean": 3732.6, + "valid_targets_min": 1278 + }, + { + "epoch": 0.3470031545741325, + "grad_norm": 0.41775156093151494, + "learning_rate": 9.964319534281397e-05, + "loss": 0.5502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5412352085113525, + "step": 110, + "valid_targets_mean": 4857.0, + "valid_targets_min": 780 + }, + { + "epoch": 0.3627760252365931, + "grad_norm": 0.48940094875997525, + "learning_rate": 9.960511056916357e-05, + "loss": 0.5506, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5636942982673645, + "step": 115, + "valid_targets_mean": 3158.1, + "valid_targets_min": 1193 + }, + { + "epoch": 0.3785488958990536, + "grad_norm": 0.44934540558801356, + "learning_rate": 9.956510301251995e-05, + "loss": 0.5505, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5873898267745972, + "step": 120, + "valid_targets_mean": 4639.4, + "valid_targets_min": 697 + }, + { + "epoch": 0.3943217665615142, + "grad_norm": 0.39099147631389863, + "learning_rate": 9.952317422364772e-05, + "loss": 0.5214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.48694920539855957, + "step": 125, + "valid_targets_mean": 3651.2, + "valid_targets_min": 956 + }, + { + "epoch": 0.41009463722397477, + "grad_norm": 0.40895988402030836, + "learning_rate": 9.947932582778188e-05, + "loss": 0.515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5333250164985657, + "step": 130, + "valid_targets_mean": 4110.1, + "valid_targets_min": 983 + }, + { + "epoch": 0.42586750788643535, + "grad_norm": 0.3679432961583188, + "learning_rate": 9.943355952456483e-05, + "loss": 0.5429, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4961763918399811, + "step": 135, + "valid_targets_mean": 4566.8, + "valid_targets_min": 1013 + }, + { + "epoch": 0.4416403785488959, + "grad_norm": 0.4329914192953498, + "learning_rate": 9.938587708798053e-05, + "loss": 0.5364, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5525034666061401, + "step": 140, + "valid_targets_mean": 3714.8, + "valid_targets_min": 691 + }, + { + "epoch": 0.45741324921135645, + "grad_norm": 0.4098928363179154, + "learning_rate": 9.933628036628569e-05, + "loss": 0.5434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5565780997276306, + "step": 145, + "valid_targets_mean": 3991.0, + "valid_targets_min": 702 + }, + { + "epoch": 0.47318611987381703, + "grad_norm": 0.4400989861855397, + "learning_rate": 9.92847712819381e-05, + "loss": 0.5111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5241206884384155, + "step": 150, + "valid_targets_mean": 4279.6, + "valid_targets_min": 842 + }, + { + "epoch": 0.4889589905362776, + "grad_norm": 0.4549202385498493, + "learning_rate": 9.923135183152224e-05, + "loss": 0.542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5465066432952881, + "step": 155, + "valid_targets_mean": 3741.0, + "valid_targets_min": 585 + }, + { + "epoch": 0.5047318611987381, + "grad_norm": 0.3762893700885342, + "learning_rate": 9.91760240856717e-05, + "loss": 0.5408, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5361104011535645, + "step": 160, + "valid_targets_mean": 4606.3, + "valid_targets_min": 1134 + }, + { + "epoch": 0.5205047318611987, + "grad_norm": 0.41459406484762945, + "learning_rate": 9.91187901889891e-05, + "loss": 0.5203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5669844150543213, + "step": 165, + "valid_targets_mean": 4350.3, + "valid_targets_min": 1150 + }, + { + "epoch": 0.5362776025236593, + "grad_norm": 0.41961355518752774, + "learning_rate": 9.905965235996286e-05, + "loss": 0.5465, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5698909759521484, + "step": 170, + "valid_targets_mean": 3405.5, + "valid_targets_min": 651 + }, + { + "epoch": 0.5520504731861199, + "grad_norm": 0.36473330194719544, + "learning_rate": 9.899861289088121e-05, + "loss": 0.5197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4982893764972687, + "step": 175, + "valid_targets_mean": 4402.5, + "valid_targets_min": 768 + }, + { + "epoch": 0.5678233438485805, + "grad_norm": 0.37885650021008505, + "learning_rate": 9.893567414774341e-05, + "loss": 0.4799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4758290946483612, + "step": 180, + "valid_targets_mean": 4281.1, + "valid_targets_min": 1364 + }, + { + "epoch": 0.583596214511041, + "grad_norm": 0.40725591482776213, + "learning_rate": 9.88708385701679e-05, + "loss": 0.5248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5616986155509949, + "step": 185, + "valid_targets_mean": 3954.9, + "valid_targets_min": 1225 + }, + { + "epoch": 0.5993690851735016, + "grad_norm": 0.4686325891209418, + "learning_rate": 9.88041086712979e-05, + "loss": 0.5407, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.540499210357666, + "step": 190, + "valid_targets_mean": 3217.9, + "valid_targets_min": 522 + }, + { + "epoch": 0.6151419558359621, + "grad_norm": 0.40781269981110413, + "learning_rate": 9.873548703770388e-05, + "loss": 0.5267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4986717104911804, + "step": 195, + "valid_targets_mean": 3731.8, + "valid_targets_min": 1026 + }, + { + "epoch": 0.6309148264984227, + "grad_norm": 0.436761071558544, + "learning_rate": 9.866497632928336e-05, + "loss": 0.504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5034062266349792, + "step": 200, + "valid_targets_mean": 3127.6, + "valid_targets_min": 593 + }, + { + "epoch": 0.6466876971608833, + "grad_norm": 0.39585150555509635, + "learning_rate": 9.859257927915774e-05, + "loss": 0.5286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.525206446647644, + "step": 205, + "valid_targets_mean": 3862.1, + "valid_targets_min": 740 + }, + { + "epoch": 0.6624605678233438, + "grad_norm": 0.3793446014838136, + "learning_rate": 9.851829869356651e-05, + "loss": 0.5219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4797798991203308, + "step": 210, + "valid_targets_mean": 4016.3, + "valid_targets_min": 1433 + }, + { + "epoch": 0.6782334384858044, + "grad_norm": 0.40979650110290383, + "learning_rate": 9.844213745175826e-05, + "loss": 0.5191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4772030711174011, + "step": 215, + "valid_targets_mean": 3141.0, + "valid_targets_min": 858 + }, + { + "epoch": 0.694006309148265, + "grad_norm": 0.872268733672252, + "learning_rate": 9.83640985058792e-05, + "loss": 0.5118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5154582858085632, + "step": 220, + "valid_targets_mean": 3880.9, + "valid_targets_min": 671 + }, + { + "epoch": 0.7097791798107256, + "grad_norm": 0.35932663295935796, + "learning_rate": 9.828418488085877e-05, + "loss": 0.5221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5506904125213623, + "step": 225, + "valid_targets_mean": 4473.6, + "valid_targets_min": 1008 + }, + { + "epoch": 0.7255520504731862, + "grad_norm": 0.37213420146124015, + "learning_rate": 9.820239967429233e-05, + "loss": 0.5113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4812445640563965, + "step": 230, + "valid_targets_mean": 3646.1, + "valid_targets_min": 1102 + }, + { + "epoch": 0.7413249211356467, + "grad_norm": 0.37628347817834695, + "learning_rate": 9.811874605632104e-05, + "loss": 0.5304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4709596633911133, + "step": 235, + "valid_targets_mean": 4000.4, + "valid_targets_min": 675 + }, + { + "epoch": 0.7570977917981072, + "grad_norm": 0.3787365584846596, + "learning_rate": 9.803322726950905e-05, + "loss": 0.5083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5106516480445862, + "step": 240, + "valid_targets_mean": 3729.7, + "valid_targets_min": 1204 + }, + { + "epoch": 0.7728706624605678, + "grad_norm": 0.4257137668466712, + "learning_rate": 9.794584662871787e-05, + "loss": 0.52, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5036972761154175, + "step": 245, + "valid_targets_mean": 3983.1, + "valid_targets_min": 651 + }, + { + "epoch": 0.7886435331230284, + "grad_norm": 0.4538360065619815, + "learning_rate": 9.785660752097768e-05, + "loss": 0.4882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5045570135116577, + "step": 250, + "valid_targets_mean": 3592.4, + "valid_targets_min": 1115 + }, + { + "epoch": 0.804416403785489, + "grad_norm": 0.3906114583577622, + "learning_rate": 9.77655134053563e-05, + "loss": 0.5434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5284218192100525, + "step": 255, + "valid_targets_mean": 3916.8, + "valid_targets_min": 784 + }, + { + "epoch": 0.8201892744479495, + "grad_norm": 0.4003334881648508, + "learning_rate": 9.767256781282486e-05, + "loss": 0.5356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5656174421310425, + "step": 260, + "valid_targets_mean": 4382.1, + "valid_targets_min": 1023 + }, + { + "epoch": 0.8359621451104101, + "grad_norm": 0.418836486436425, + "learning_rate": 9.757777434612116e-05, + "loss": 0.5247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5147979855537415, + "step": 265, + "valid_targets_mean": 3287.1, + "valid_targets_min": 915 + }, + { + "epoch": 0.8517350157728707, + "grad_norm": 0.43725050822235556, + "learning_rate": 9.748113667960987e-05, + "loss": 0.5046, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5037941932678223, + "step": 270, + "valid_targets_mean": 3135.5, + "valid_targets_min": 685 + }, + { + "epoch": 0.8675078864353313, + "grad_norm": 0.4099832470454359, + "learning_rate": 9.738265855914013e-05, + "loss": 0.5147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5275985598564148, + "step": 275, + "valid_targets_mean": 3285.8, + "valid_targets_min": 848 + }, + { + "epoch": 0.8832807570977917, + "grad_norm": 0.38697933436639453, + "learning_rate": 9.728234380190038e-05, + "loss": 0.5112, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5363134741783142, + "step": 280, + "valid_targets_mean": 3554.3, + "valid_targets_min": 1004 + }, + { + "epoch": 0.8990536277602523, + "grad_norm": 0.4144345499072098, + "learning_rate": 9.718019629627045e-05, + "loss": 0.5077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5390485525131226, + "step": 285, + "valid_targets_mean": 3550.5, + "valid_targets_min": 830 + }, + { + "epoch": 0.9148264984227129, + "grad_norm": 0.40779021442321706, + "learning_rate": 9.70762200016707e-05, + "loss": 0.5297, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5196043252944946, + "step": 290, + "valid_targets_mean": 3194.9, + "valid_targets_min": 705 + }, + { + "epoch": 0.9305993690851735, + "grad_norm": 0.41913428246000933, + "learning_rate": 9.697041894840865e-05, + "loss": 0.5364, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5191986560821533, + "step": 295, + "valid_targets_mean": 3030.6, + "valid_targets_min": 926 + }, + { + "epoch": 0.9463722397476341, + "grad_norm": 0.37846562168385284, + "learning_rate": 9.68627972375228e-05, + "loss": 0.5107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5184040665626526, + "step": 300, + "valid_targets_mean": 3679.3, + "valid_targets_min": 703 + }, + { + "epoch": 0.9621451104100947, + "grad_norm": 0.38807743355601315, + "learning_rate": 9.675335904062353e-05, + "loss": 0.4958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4978182315826416, + "step": 305, + "valid_targets_mean": 4090.1, + "valid_targets_min": 719 + }, + { + "epoch": 0.9779179810725552, + "grad_norm": 0.39285110507395976, + "learning_rate": 9.66421085997315e-05, + "loss": 0.5171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.46750667691230774, + "step": 310, + "valid_targets_mean": 4286.0, + "valid_targets_min": 989 + }, + { + "epoch": 0.9936908517350158, + "grad_norm": 0.40270559083571517, + "learning_rate": 9.65290502271132e-05, + "loss": 0.5318, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4890715777873993, + "step": 315, + "valid_targets_mean": 3354.5, + "valid_targets_min": 391 + }, + { + "epoch": 1.0094637223974763, + "grad_norm": 0.3709916964455041, + "learning_rate": 9.641418830511377e-05, + "loss": 0.4546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4400157928466797, + "step": 320, + "valid_targets_mean": 4205.9, + "valid_targets_min": 915 + }, + { + "epoch": 1.025236593059937, + "grad_norm": 0.4453308989523916, + "learning_rate": 9.62975272859872e-05, + "loss": 0.4463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.48303818702697754, + "step": 325, + "valid_targets_mean": 3650.5, + "valid_targets_min": 1388 + }, + { + "epoch": 1.0410094637223974, + "grad_norm": 0.34756060161330476, + "learning_rate": 9.617907169172367e-05, + "loss": 0.4202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42825114727020264, + "step": 330, + "valid_targets_mean": 4366.3, + "valid_targets_min": 1115 + }, + { + "epoch": 1.0567823343848581, + "grad_norm": 0.38452531417431646, + "learning_rate": 9.605882611387432e-05, + "loss": 0.4191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4215315878391266, + "step": 335, + "valid_targets_mean": 3585.4, + "valid_targets_min": 812 + }, + { + "epoch": 1.0725552050473186, + "grad_norm": 0.4197099264114279, + "learning_rate": 9.593679521337327e-05, + "loss": 0.4242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.45517975091934204, + "step": 340, + "valid_targets_mean": 2850.7, + "valid_targets_min": 725 + }, + { + "epoch": 1.088328075709779, + "grad_norm": 0.4608025595294115, + "learning_rate": 9.581298372035695e-05, + "loss": 0.4375, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4695129692554474, + "step": 345, + "valid_targets_mean": 3005.2, + "valid_targets_min": 342 + }, + { + "epoch": 1.1041009463722398, + "grad_norm": 0.34967584895053533, + "learning_rate": 9.56873964339807e-05, + "loss": 0.4139, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4164508879184723, + "step": 350, + "valid_targets_mean": 4542.8, + "valid_targets_min": 1330 + }, + { + "epoch": 1.1198738170347002, + "grad_norm": 0.35503463540821656, + "learning_rate": 9.556003822223287e-05, + "loss": 0.4362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43841874599456787, + "step": 355, + "valid_targets_mean": 4594.4, + "valid_targets_min": 1093 + }, + { + "epoch": 1.135646687697161, + "grad_norm": 0.3547341341046614, + "learning_rate": 9.5430914021746e-05, + "loss": 0.4258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40891870856285095, + "step": 360, + "valid_targets_mean": 4080.0, + "valid_targets_min": 908 + }, + { + "epoch": 1.1514195583596214, + "grad_norm": 0.4252609654588314, + "learning_rate": 9.530002883760552e-05, + "loss": 0.4447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.48524558544158936, + "step": 365, + "valid_targets_mean": 3646.4, + "valid_targets_min": 522 + }, + { + "epoch": 1.167192429022082, + "grad_norm": 0.35287486751046293, + "learning_rate": 9.516738774315577e-05, + "loss": 0.4143, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43369510769844055, + "step": 370, + "valid_targets_mean": 4042.0, + "valid_targets_min": 1049 + }, + { + "epoch": 1.1829652996845426, + "grad_norm": 0.38302840718587533, + "learning_rate": 9.503299587980331e-05, + "loss": 0.4281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44888949394226074, + "step": 375, + "valid_targets_mean": 3829.6, + "valid_targets_min": 829 + }, + { + "epoch": 1.1987381703470033, + "grad_norm": 0.36310365542476164, + "learning_rate": 9.489685845681762e-05, + "loss": 0.4347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43655747175216675, + "step": 380, + "valid_targets_mean": 4141.0, + "valid_targets_min": 1150 + }, + { + "epoch": 1.2145110410094637, + "grad_norm": 0.37590745843069956, + "learning_rate": 9.47589807511292e-05, + "loss": 0.4368, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43766650557518005, + "step": 385, + "valid_targets_mean": 3409.5, + "valid_targets_min": 703 + }, + { + "epoch": 1.2302839116719242, + "grad_norm": 0.3376482168093794, + "learning_rate": 9.461936810712507e-05, + "loss": 0.4168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3962699770927429, + "step": 390, + "valid_targets_mean": 4696.7, + "valid_targets_min": 1198 + }, + { + "epoch": 1.2460567823343849, + "grad_norm": 0.3480246618851481, + "learning_rate": 9.447802593644152e-05, + "loss": 0.4415, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3955923616886139, + "step": 395, + "valid_targets_mean": 3463.5, + "valid_targets_min": 560 + }, + { + "epoch": 1.2618296529968454, + "grad_norm": 0.35180890961734984, + "learning_rate": 9.433495971775444e-05, + "loss": 0.419, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40084391832351685, + "step": 400, + "valid_targets_mean": 4159.3, + "valid_targets_min": 719 + }, + { + "epoch": 1.277602523659306, + "grad_norm": 0.4096835641332222, + "learning_rate": 9.419017499656686e-05, + "loss": 0.4336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4419606328010559, + "step": 405, + "valid_targets_mean": 3472.3, + "valid_targets_min": 758 + }, + { + "epoch": 1.2933753943217665, + "grad_norm": 0.36227272561580437, + "learning_rate": 9.404367738499409e-05, + "loss": 0.4441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44684967398643494, + "step": 410, + "valid_targets_mean": 3887.8, + "valid_targets_min": 1040 + }, + { + "epoch": 1.3091482649842272, + "grad_norm": 0.3449680032240986, + "learning_rate": 9.38954725615461e-05, + "loss": 0.4359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39267343282699585, + "step": 415, + "valid_targets_mean": 3567.4, + "valid_targets_min": 1021 + }, + { + "epoch": 1.3249211356466877, + "grad_norm": 0.3699446767121469, + "learning_rate": 9.374556627090749e-05, + "loss": 0.4434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4599018692970276, + "step": 420, + "valid_targets_mean": 4210.3, + "valid_targets_min": 669 + }, + { + "epoch": 1.3406940063091484, + "grad_norm": 0.3625079489176437, + "learning_rate": 9.359396432371476e-05, + "loss": 0.4405, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43227165937423706, + "step": 425, + "valid_targets_mean": 3604.4, + "valid_targets_min": 642 + }, + { + "epoch": 1.3564668769716088, + "grad_norm": 0.38755290007795334, + "learning_rate": 9.344067259633112e-05, + "loss": 0.4582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44076699018478394, + "step": 430, + "valid_targets_mean": 3779.0, + "valid_targets_min": 1163 + }, + { + "epoch": 1.3722397476340693, + "grad_norm": 0.3550196025820148, + "learning_rate": 9.328569703061862e-05, + "loss": 0.4309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4183352589607239, + "step": 435, + "valid_targets_mean": 3847.6, + "valid_targets_min": 631 + }, + { + "epoch": 1.38801261829653, + "grad_norm": 0.39219100707944626, + "learning_rate": 9.3129043633708e-05, + "loss": 0.4341, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.426186203956604, + "step": 440, + "valid_targets_mean": 3420.6, + "valid_targets_min": 1036 + }, + { + "epoch": 1.4037854889589905, + "grad_norm": 0.360700574937905, + "learning_rate": 9.297071847776568e-05, + "loss": 0.4132, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44991418719291687, + "step": 445, + "valid_targets_mean": 3767.9, + "valid_targets_min": 828 + }, + { + "epoch": 1.4195583596214512, + "grad_norm": 0.37641964237057424, + "learning_rate": 9.281072769975847e-05, + "loss": 0.4408, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4608301818370819, + "step": 450, + "valid_targets_mean": 3779.9, + "valid_targets_min": 354 + }, + { + "epoch": 1.4353312302839116, + "grad_norm": 0.3370616755969605, + "learning_rate": 9.264907750121568e-05, + "loss": 0.4422, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42679429054260254, + "step": 455, + "valid_targets_mean": 4083.6, + "valid_targets_min": 731 + }, + { + "epoch": 1.4511041009463723, + "grad_norm": 0.34995421177522484, + "learning_rate": 9.248577414798871e-05, + "loss": 0.4453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4372273087501526, + "step": 460, + "valid_targets_mean": 3869.3, + "valid_targets_min": 653 + }, + { + "epoch": 1.4668769716088328, + "grad_norm": 0.37055018885128926, + "learning_rate": 9.232082397000826e-05, + "loss": 0.4358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4264824688434601, + "step": 465, + "valid_targets_mean": 3745.2, + "valid_targets_min": 500 + }, + { + "epoch": 1.4826498422712935, + "grad_norm": 0.363197861815723, + "learning_rate": 9.215423336103884e-05, + "loss": 0.4281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40679222345352173, + "step": 470, + "valid_targets_mean": 3635.5, + "valid_targets_min": 987 + }, + { + "epoch": 1.498422712933754, + "grad_norm": 0.3442484040779993, + "learning_rate": 9.198600877843105e-05, + "loss": 0.4424, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4230436682701111, + "step": 475, + "valid_targets_mean": 3334.7, + "valid_targets_min": 814 + }, + { + "epoch": 1.5141955835962144, + "grad_norm": 0.3445106797069375, + "learning_rate": 9.181615674287121e-05, + "loss": 0.4507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43903082609176636, + "step": 480, + "valid_targets_mean": 4332.8, + "valid_targets_min": 1028 + }, + { + "epoch": 1.5299684542586751, + "grad_norm": 0.3522345901043456, + "learning_rate": 9.164468383812864e-05, + "loss": 0.4405, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.460207462310791, + "step": 485, + "valid_targets_mean": 4291.9, + "valid_targets_min": 1243 + }, + { + "epoch": 1.5457413249211358, + "grad_norm": 0.3141130162512159, + "learning_rate": 9.147159671080049e-05, + "loss": 0.4294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38489389419555664, + "step": 490, + "valid_targets_mean": 3873.9, + "valid_targets_min": 1078 + }, + { + "epoch": 1.5615141955835963, + "grad_norm": 0.3321618402403598, + "learning_rate": 9.129690207005402e-05, + "loss": 0.4239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4298274517059326, + "step": 495, + "valid_targets_mean": 4232.3, + "valid_targets_min": 661 + }, + { + "epoch": 1.5772870662460567, + "grad_norm": 0.3772263086814816, + "learning_rate": 9.11206066873666e-05, + "loss": 0.4347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4175015687942505, + "step": 500, + "valid_targets_mean": 3304.5, + "valid_targets_min": 736 + }, + { + "epoch": 1.5930599369085172, + "grad_norm": 0.3791083420905323, + "learning_rate": 9.094271739626326e-05, + "loss": 0.4593, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.43678396940231323, + "step": 505, + "valid_targets_mean": 3435.6, + "valid_targets_min": 764 + }, + { + "epoch": 1.608832807570978, + "grad_norm": 0.3540073477173805, + "learning_rate": 9.076324109205174e-05, + "loss": 0.4157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4308760166168213, + "step": 510, + "valid_targets_mean": 3936.9, + "valid_targets_min": 1311 + }, + { + "epoch": 1.6246056782334386, + "grad_norm": 0.41009175361690536, + "learning_rate": 9.058218473155528e-05, + "loss": 0.4525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.47333914041519165, + "step": 515, + "valid_targets_mean": 3115.0, + "valid_targets_min": 568 + }, + { + "epoch": 1.640378548895899, + "grad_norm": 0.3581860575720006, + "learning_rate": 9.039955533284292e-05, + "loss": 0.4214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4739847481250763, + "step": 520, + "valid_targets_mean": 3708.1, + "valid_targets_min": 747 + }, + { + "epoch": 1.6561514195583595, + "grad_norm": 0.35281416810470595, + "learning_rate": 9.021535997495749e-05, + "loss": 0.4461, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4247710704803467, + "step": 525, + "valid_targets_mean": 4113.6, + "valid_targets_min": 1120 + }, + { + "epoch": 1.6719242902208202, + "grad_norm": 0.39866157176828804, + "learning_rate": 9.002960579764116e-05, + "loss": 0.4407, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4475296139717102, + "step": 530, + "valid_targets_mean": 3678.4, + "valid_targets_min": 758 + }, + { + "epoch": 1.687697160883281, + "grad_norm": 0.4652704918320769, + "learning_rate": 8.984230000105882e-05, + "loss": 0.4314, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4161592125892639, + "step": 535, + "valid_targets_mean": 3779.4, + "valid_targets_min": 858 + }, + { + "epoch": 1.7034700315457414, + "grad_norm": 0.3562761183074548, + "learning_rate": 8.965344984551882e-05, + "loss": 0.4398, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.47303807735443115, + "step": 540, + "valid_targets_mean": 4028.3, + "valid_targets_min": 972 + }, + { + "epoch": 1.7192429022082019, + "grad_norm": 0.3588165266557382, + "learning_rate": 8.946306265119167e-05, + "loss": 0.4389, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4207335412502289, + "step": 545, + "valid_targets_mean": 3279.6, + "valid_targets_min": 1061 + }, + { + "epoch": 1.7350157728706623, + "grad_norm": 0.33519799683499096, + "learning_rate": 8.927114579782625e-05, + "loss": 0.4288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4520818293094635, + "step": 550, + "valid_targets_mean": 4337.9, + "valid_targets_min": 1212 + }, + { + "epoch": 1.750788643533123, + "grad_norm": 0.33728276483321373, + "learning_rate": 8.907770672446381e-05, + "loss": 0.4424, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4397907555103302, + "step": 555, + "valid_targets_mean": 4163.6, + "valid_targets_min": 1286 + }, + { + "epoch": 1.7665615141955837, + "grad_norm": 0.3538051501164244, + "learning_rate": 8.888275292914948e-05, + "loss": 0.4189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.440610408782959, + "step": 560, + "valid_targets_mean": 3841.4, + "valid_targets_min": 792 + }, + { + "epoch": 1.7823343848580442, + "grad_norm": 0.3490339556278616, + "learning_rate": 8.868629196864182e-05, + "loss": 0.4083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4395935535430908, + "step": 565, + "valid_targets_mean": 3784.7, + "valid_targets_min": 957 + }, + { + "epoch": 1.7981072555205047, + "grad_norm": 0.34343195791539693, + "learning_rate": 8.848833145811976e-05, + "loss": 0.4458, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4762798249721527, + "step": 570, + "valid_targets_mean": 4463.1, + "valid_targets_min": 474 + }, + { + "epoch": 1.8138801261829653, + "grad_norm": 0.33882505522639184, + "learning_rate": 8.828887907088753e-05, + "loss": 0.4215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4558441638946533, + "step": 575, + "valid_targets_mean": 4094.3, + "valid_targets_min": 808 + }, + { + "epoch": 1.8296529968454258, + "grad_norm": 0.34470869364399237, + "learning_rate": 8.808794253807707e-05, + "loss": 0.439, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4418623149394989, + "step": 580, + "valid_targets_mean": 3988.2, + "valid_targets_min": 842 + }, + { + "epoch": 1.8454258675078865, + "grad_norm": 0.3600842094200383, + "learning_rate": 8.788552964834859e-05, + "loss": 0.4216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4537474513053894, + "step": 585, + "valid_targets_mean": 3940.2, + "valid_targets_min": 863 + }, + { + "epoch": 1.861198738170347, + "grad_norm": 0.3234868618958924, + "learning_rate": 8.768164824758846e-05, + "loss": 0.4411, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40534308552742004, + "step": 590, + "valid_targets_mean": 4239.0, + "valid_targets_min": 976 + }, + { + "epoch": 1.8769716088328074, + "grad_norm": 0.32320981845573427, + "learning_rate": 8.747630623860521e-05, + "loss": 0.4492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44744548201560974, + "step": 595, + "valid_targets_mean": 3968.0, + "valid_targets_min": 719 + }, + { + "epoch": 1.8927444794952681, + "grad_norm": 0.31725359888592364, + "learning_rate": 8.726951158082311e-05, + "loss": 0.4475, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3871520459651947, + "step": 600, + "valid_targets_mean": 3981.0, + "valid_targets_min": 349 + }, + { + "epoch": 1.9085173501577288, + "grad_norm": 0.3662531918465924, + "learning_rate": 8.706127228997376e-05, + "loss": 0.4272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4548760652542114, + "step": 605, + "valid_targets_mean": 3999.3, + "valid_targets_min": 754 + }, + { + "epoch": 1.9242902208201893, + "grad_norm": 0.31683366829417364, + "learning_rate": 8.685159643778528e-05, + "loss": 0.4211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4224669933319092, + "step": 610, + "valid_targets_mean": 4014.4, + "valid_targets_min": 1244 + }, + { + "epoch": 1.9400630914826498, + "grad_norm": 0.3057131358839659, + "learning_rate": 8.664049215166955e-05, + "loss": 0.4143, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4011157155036926, + "step": 615, + "valid_targets_mean": 3895.8, + "valid_targets_min": 793 + }, + { + "epoch": 1.9558359621451105, + "grad_norm": 0.3257751873552026, + "learning_rate": 8.6427967614407e-05, + "loss": 0.4392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4327627718448639, + "step": 620, + "valid_targets_mean": 4740.3, + "valid_targets_min": 960 + }, + { + "epoch": 1.971608832807571, + "grad_norm": 0.35556965233198373, + "learning_rate": 8.621403106382968e-05, + "loss": 0.4323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42427748441696167, + "step": 625, + "valid_targets_mean": 3652.9, + "valid_targets_min": 1776 + }, + { + "epoch": 1.9873817034700316, + "grad_norm": 0.3644471959911388, + "learning_rate": 8.599869079250165e-05, + "loss": 0.4511, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.485731303691864, + "step": 630, + "valid_targets_mean": 3775.1, + "valid_targets_min": 1036 + }, + { + "epoch": 2.003154574132492, + "grad_norm": 0.4559171531570724, + "learning_rate": 8.578195514739784e-05, + "loss": 0.4148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34623274207115173, + "step": 635, + "valid_targets_mean": 3056.2, + "valid_targets_min": 1239 + }, + { + "epoch": 2.0189274447949526, + "grad_norm": 0.40803925338454106, + "learning_rate": 8.556383252958026e-05, + "loss": 0.3381, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3074378967285156, + "step": 640, + "valid_targets_mean": 3432.8, + "valid_targets_min": 1036 + }, + { + "epoch": 2.034700315457413, + "grad_norm": 0.38557128553930253, + "learning_rate": 8.534433139387259e-05, + "loss": 0.3389, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3428223133087158, + "step": 645, + "valid_targets_mean": 3720.1, + "valid_targets_min": 1067 + }, + { + "epoch": 2.050473186119874, + "grad_norm": 0.42496386491574595, + "learning_rate": 8.512346024853219e-05, + "loss": 0.3169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.319104939699173, + "step": 650, + "valid_targets_mean": 3313.9, + "valid_targets_min": 959 + }, + { + "epoch": 2.0662460567823344, + "grad_norm": 0.3804707123087557, + "learning_rate": 8.490122765492057e-05, + "loss": 0.3269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3144036829471588, + "step": 655, + "valid_targets_mean": 3432.0, + "valid_targets_min": 675 + }, + { + "epoch": 2.082018927444795, + "grad_norm": 0.37065795614271224, + "learning_rate": 8.467764222717136e-05, + "loss": 0.325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31387022137641907, + "step": 660, + "valid_targets_mean": 3739.8, + "valid_targets_min": 474 + }, + { + "epoch": 2.0977917981072554, + "grad_norm": 0.3400238081259839, + "learning_rate": 8.445271263185646e-05, + "loss": 0.333, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31331780552864075, + "step": 665, + "valid_targets_mean": 4724.2, + "valid_targets_min": 1383 + }, + { + "epoch": 2.1135646687697163, + "grad_norm": 0.3628222919421193, + "learning_rate": 8.422644758765012e-05, + "loss": 0.3071, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31967639923095703, + "step": 670, + "valid_targets_mean": 3822.9, + "valid_targets_min": 803 + }, + { + "epoch": 2.1293375394321767, + "grad_norm": 0.36831178305919104, + "learning_rate": 8.399885586499101e-05, + "loss": 0.3367, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32112836837768555, + "step": 675, + "valid_targets_mean": 3602.7, + "valid_targets_min": 391 + }, + { + "epoch": 2.145110410094637, + "grad_norm": 0.3831623829696854, + "learning_rate": 8.376994628574219e-05, + "loss": 0.3413, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36429059505462646, + "step": 680, + "valid_targets_mean": 3788.7, + "valid_targets_min": 1044 + }, + { + "epoch": 2.1608832807570977, + "grad_norm": 0.3670709990709447, + "learning_rate": 8.353972772284927e-05, + "loss": 0.3347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35088732838630676, + "step": 685, + "valid_targets_mean": 4248.5, + "valid_targets_min": 1029 + }, + { + "epoch": 2.176656151419558, + "grad_norm": 0.364629689849054, + "learning_rate": 8.330820909999633e-05, + "loss": 0.3309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3233838975429535, + "step": 690, + "valid_targets_mean": 3572.8, + "valid_targets_min": 677 + }, + { + "epoch": 2.192429022082019, + "grad_norm": 0.40198074720693766, + "learning_rate": 8.307539939126016e-05, + "loss": 0.3455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3670358657836914, + "step": 695, + "valid_targets_mean": 3640.4, + "valid_targets_min": 999 + }, + { + "epoch": 2.2082018927444795, + "grad_norm": 0.3833761912733954, + "learning_rate": 8.284130762076235e-05, + "loss": 0.3309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3193661570549011, + "step": 700, + "valid_targets_mean": 3502.1, + "valid_targets_min": 758 + }, + { + "epoch": 2.22397476340694, + "grad_norm": 0.37358275865160673, + "learning_rate": 8.260594286231947e-05, + "loss": 0.3183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32544875144958496, + "step": 705, + "valid_targets_mean": 3447.4, + "valid_targets_min": 716 + }, + { + "epoch": 2.2397476340694005, + "grad_norm": 0.40281010784525473, + "learning_rate": 8.236931423909138e-05, + "loss": 0.349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.349687397480011, + "step": 710, + "valid_targets_mean": 3201.8, + "valid_targets_min": 629 + }, + { + "epoch": 2.2555205047318614, + "grad_norm": 0.372290714508795, + "learning_rate": 8.213143092322769e-05, + "loss": 0.3271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32800039649009705, + "step": 715, + "valid_targets_mean": 3917.9, + "valid_targets_min": 914 + }, + { + "epoch": 2.271293375394322, + "grad_norm": 0.3460102094154886, + "learning_rate": 8.189230213551202e-05, + "loss": 0.3097, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3035333752632141, + "step": 720, + "valid_targets_mean": 3878.7, + "valid_targets_min": 1054 + }, + { + "epoch": 2.2870662460567823, + "grad_norm": 0.3620344634281311, + "learning_rate": 8.165193714500481e-05, + "loss": 0.3464, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34915387630462646, + "step": 725, + "valid_targets_mean": 3865.6, + "valid_targets_min": 1181 + }, + { + "epoch": 2.302839116719243, + "grad_norm": 0.37246776518218855, + "learning_rate": 8.141034526868389e-05, + "loss": 0.3422, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36882883310317993, + "step": 730, + "valid_targets_mean": 3895.3, + "valid_targets_min": 754 + }, + { + "epoch": 2.3186119873817033, + "grad_norm": 0.32928444752637037, + "learning_rate": 8.116753587108339e-05, + "loss": 0.3341, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3063160181045532, + "step": 735, + "valid_targets_mean": 4141.0, + "valid_targets_min": 736 + }, + { + "epoch": 2.334384858044164, + "grad_norm": 0.3682340278296583, + "learning_rate": 8.092351836393076e-05, + "loss": 0.3144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31867101788520813, + "step": 740, + "valid_targets_mean": 3966.4, + "valid_targets_min": 970 + }, + { + "epoch": 2.3501577287066246, + "grad_norm": 0.35635977114846723, + "learning_rate": 8.067830220578191e-05, + "loss": 0.344, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33521783351898193, + "step": 745, + "valid_targets_mean": 4157.5, + "valid_targets_min": 814 + }, + { + "epoch": 2.365930599369085, + "grad_norm": 0.36473851290128406, + "learning_rate": 8.043189690165467e-05, + "loss": 0.336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35809627175331116, + "step": 750, + "valid_targets_mean": 4096.0, + "valid_targets_min": 1158 + }, + { + "epoch": 2.3817034700315456, + "grad_norm": 0.37400084060251504, + "learning_rate": 8.018431200266023e-05, + "loss": 0.3277, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35486137866973877, + "step": 755, + "valid_targets_mean": 3988.1, + "valid_targets_min": 562 + }, + { + "epoch": 2.3974763406940065, + "grad_norm": 0.3366207837520557, + "learning_rate": 7.993555710563303e-05, + "loss": 0.3289, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.332172691822052, + "step": 760, + "valid_targets_mean": 4782.4, + "valid_targets_min": 1155 + }, + { + "epoch": 2.413249211356467, + "grad_norm": 0.3488722448988735, + "learning_rate": 7.968564185275873e-05, + "loss": 0.3337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3320530354976654, + "step": 765, + "valid_targets_mean": 3837.2, + "valid_targets_min": 1672 + }, + { + "epoch": 2.4290220820189274, + "grad_norm": 0.3972434223513897, + "learning_rate": 7.943457593120045e-05, + "loss": 0.3592, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3777347803115845, + "step": 770, + "valid_targets_mean": 3375.6, + "valid_targets_min": 740 + }, + { + "epoch": 2.444794952681388, + "grad_norm": 0.3729282593792925, + "learning_rate": 7.918236907272327e-05, + "loss": 0.33, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3274171054363251, + "step": 775, + "valid_targets_mean": 3582.3, + "valid_targets_min": 837 + }, + { + "epoch": 2.4605678233438484, + "grad_norm": 0.3883786155015376, + "learning_rate": 7.892903105331712e-05, + "loss": 0.3429, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35154858231544495, + "step": 780, + "valid_targets_mean": 3429.6, + "valid_targets_min": 595 + }, + { + "epoch": 2.4763406940063093, + "grad_norm": 0.3785442572227149, + "learning_rate": 7.867457169281765e-05, + "loss": 0.354, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3599446713924408, + "step": 785, + "valid_targets_mean": 3588.4, + "valid_targets_min": 653 + }, + { + "epoch": 2.4921135646687698, + "grad_norm": 0.3969027010396338, + "learning_rate": 7.841900085452574e-05, + "loss": 0.3407, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3688129782676697, + "step": 790, + "valid_targets_mean": 3276.2, + "valid_targets_min": 768 + }, + { + "epoch": 2.5078864353312302, + "grad_norm": 0.35793841017431804, + "learning_rate": 7.816232844482516e-05, + "loss": 0.3323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3582887351512909, + "step": 795, + "valid_targets_mean": 3903.1, + "valid_targets_min": 848 + }, + { + "epoch": 2.5236593059936907, + "grad_norm": 0.31866040247414423, + "learning_rate": 7.790456441279853e-05, + "loss": 0.3453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3136487305164337, + "step": 800, + "valid_targets_mean": 4573.3, + "valid_targets_min": 968 + }, + { + "epoch": 2.5394321766561516, + "grad_norm": 0.3695839385218983, + "learning_rate": 7.764571874984174e-05, + "loss": 0.3405, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38279229402542114, + "step": 805, + "valid_targets_mean": 4102.4, + "valid_targets_min": 809 + }, + { + "epoch": 2.555205047318612, + "grad_norm": 0.35522576716550336, + "learning_rate": 7.73858014892766e-05, + "loss": 0.3383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35253381729125977, + "step": 810, + "valid_targets_mean": 4143.5, + "valid_targets_min": 685 + }, + { + "epoch": 2.5709779179810726, + "grad_norm": 0.3484915600741624, + "learning_rate": 7.712482270596199e-05, + "loss": 0.3684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3393446207046509, + "step": 815, + "valid_targets_mean": 3947.6, + "valid_targets_min": 642 + }, + { + "epoch": 2.586750788643533, + "grad_norm": 0.3666547381975771, + "learning_rate": 7.686279251590331e-05, + "loss": 0.3348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33391496539115906, + "step": 820, + "valid_targets_mean": 3900.9, + "valid_targets_min": 965 + }, + { + "epoch": 2.6025236593059935, + "grad_norm": 0.36844544062401197, + "learning_rate": 7.659972107586035e-05, + "loss": 0.3327, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3613488972187042, + "step": 825, + "valid_targets_mean": 3755.7, + "valid_targets_min": 1073 + }, + { + "epoch": 2.6182965299684544, + "grad_norm": 0.3537018070308048, + "learning_rate": 7.633561858295364e-05, + "loss": 0.3234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35583215951919556, + "step": 830, + "valid_targets_mean": 3956.0, + "valid_targets_min": 764 + }, + { + "epoch": 2.634069400630915, + "grad_norm": 0.34075110327660973, + "learning_rate": 7.607049527426916e-05, + "loss": 0.3435, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3498722016811371, + "step": 835, + "valid_targets_mean": 4906.1, + "valid_targets_min": 1010 + }, + { + "epoch": 2.6498422712933754, + "grad_norm": 0.39311133022228784, + "learning_rate": 7.580436142646155e-05, + "loss": 0.3437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.350446879863739, + "step": 840, + "valid_targets_mean": 2970.5, + "valid_targets_min": 866 + }, + { + "epoch": 2.665615141955836, + "grad_norm": 0.3317613529209523, + "learning_rate": 7.55372273553557e-05, + "loss": 0.3291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33733218908309937, + "step": 845, + "valid_targets_mean": 4193.8, + "valid_targets_min": 830 + }, + { + "epoch": 2.6813880126182967, + "grad_norm": 0.32952833500411555, + "learning_rate": 7.526910341554703e-05, + "loss": 0.345, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3293534815311432, + "step": 850, + "valid_targets_mean": 4271.3, + "valid_targets_min": 1399 + }, + { + "epoch": 2.697160883280757, + "grad_norm": 0.36855434381056507, + "learning_rate": 7.500000000000001e-05, + "loss": 0.3484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3580437898635864, + "step": 855, + "valid_targets_mean": 3858.8, + "valid_targets_min": 1387 + }, + { + "epoch": 2.7129337539432177, + "grad_norm": 0.3307915071742468, + "learning_rate": 7.472992753964532e-05, + "loss": 0.3473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31549596786499023, + "step": 860, + "valid_targets_mean": 4362.1, + "valid_targets_min": 735 + }, + { + "epoch": 2.728706624605678, + "grad_norm": 0.34192067680044314, + "learning_rate": 7.445889650297559e-05, + "loss": 0.3463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.327921062707901, + "step": 865, + "valid_targets_mean": 3860.6, + "valid_targets_min": 915 + }, + { + "epoch": 2.7444794952681386, + "grad_norm": 0.35825582522669586, + "learning_rate": 7.418691739563957e-05, + "loss": 0.3287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3101162910461426, + "step": 870, + "valid_targets_mean": 3518.5, + "valid_targets_min": 1276 + }, + { + "epoch": 2.7602523659305995, + "grad_norm": 0.33762129670022484, + "learning_rate": 7.391400076003492e-05, + "loss": 0.3552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3207801580429077, + "step": 875, + "valid_targets_mean": 4098.6, + "valid_targets_min": 1129 + }, + { + "epoch": 2.77602523659306, + "grad_norm": 0.40233829114726466, + "learning_rate": 7.36401571748996e-05, + "loss": 0.3412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3593313694000244, + "step": 880, + "valid_targets_mean": 3392.5, + "valid_targets_min": 342 + }, + { + "epoch": 2.7917981072555205, + "grad_norm": 0.4023791936424401, + "learning_rate": 7.336539725490178e-05, + "loss": 0.3486, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36353376507759094, + "step": 885, + "valid_targets_mean": 3129.3, + "valid_targets_min": 972 + }, + { + "epoch": 2.807570977917981, + "grad_norm": 0.32978487154903324, + "learning_rate": 7.30897316502284e-05, + "loss": 0.3511, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3517262935638428, + "step": 890, + "valid_targets_mean": 4831.5, + "valid_targets_min": 1005 + }, + { + "epoch": 2.823343848580442, + "grad_norm": 0.36941821632032373, + "learning_rate": 7.281317104617239e-05, + "loss": 0.3477, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3774658143520355, + "step": 895, + "valid_targets_mean": 4029.0, + "valid_targets_min": 840 + }, + { + "epoch": 2.8391167192429023, + "grad_norm": 0.3612788509739381, + "learning_rate": 7.253572616271844e-05, + "loss": 0.3471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.359031081199646, + "step": 900, + "valid_targets_mean": 3890.6, + "valid_targets_min": 811 + }, + { + "epoch": 2.854889589905363, + "grad_norm": 0.3432351388874631, + "learning_rate": 7.225740775412751e-05, + "loss": 0.3324, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32447415590286255, + "step": 905, + "valid_targets_mean": 3857.4, + "valid_targets_min": 868 + }, + { + "epoch": 2.8706624605678233, + "grad_norm": 0.3716864210975034, + "learning_rate": 7.197822660851991e-05, + "loss": 0.3266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32068637013435364, + "step": 910, + "valid_targets_mean": 3354.0, + "valid_targets_min": 703 + }, + { + "epoch": 2.8864353312302837, + "grad_norm": 0.35233671009289513, + "learning_rate": 7.169819354745725e-05, + "loss": 0.3351, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3392893075942993, + "step": 915, + "valid_targets_mean": 4084.0, + "valid_targets_min": 932 + }, + { + "epoch": 2.9022082018927446, + "grad_norm": 0.3205935329399274, + "learning_rate": 7.141731942552288e-05, + "loss": 0.3322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3268066644668579, + "step": 920, + "valid_targets_mean": 4559.8, + "valid_targets_min": 746 + }, + { + "epoch": 2.917981072555205, + "grad_norm": 0.3486479847884881, + "learning_rate": 7.113561512990119e-05, + "loss": 0.3419, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3648620843887329, + "step": 925, + "valid_targets_mean": 3818.8, + "valid_targets_min": 876 + }, + { + "epoch": 2.9337539432176656, + "grad_norm": 0.36527758089754836, + "learning_rate": 7.085309157995557e-05, + "loss": 0.3696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34798184037208557, + "step": 930, + "valid_targets_mean": 3527.9, + "valid_targets_min": 961 + }, + { + "epoch": 2.949526813880126, + "grad_norm": 0.36376704798091514, + "learning_rate": 7.056975972680517e-05, + "loss": 0.3571, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3389028310775757, + "step": 935, + "valid_targets_mean": 3556.2, + "valid_targets_min": 661 + }, + { + "epoch": 2.965299684542587, + "grad_norm": 0.3427826212480962, + "learning_rate": 7.028563055290044e-05, + "loss": 0.3197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3057384490966797, + "step": 940, + "valid_targets_mean": 3776.8, + "valid_targets_min": 1288 + }, + { + "epoch": 2.9810725552050474, + "grad_norm": 0.38147759578333684, + "learning_rate": 7.000071507159744e-05, + "loss": 0.355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36406630277633667, + "step": 945, + "valid_targets_mean": 3465.0, + "valid_targets_min": 568 + }, + { + "epoch": 2.996845425867508, + "grad_norm": 0.37933496166116637, + "learning_rate": 6.971502432673085e-05, + "loss": 0.3426, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3924667239189148, + "step": 950, + "valid_targets_mean": 4021.2, + "valid_targets_min": 924 + }, + { + "epoch": 3.0126182965299684, + "grad_norm": 0.7687482700099385, + "learning_rate": 6.942856939218599e-05, + "loss": 0.2364, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22858980298042297, + "step": 955, + "valid_targets_mean": 4278.5, + "valid_targets_min": 1016 + }, + { + "epoch": 3.028391167192429, + "grad_norm": 0.43436948992902524, + "learning_rate": 6.914136137146951e-05, + "loss": 0.233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22961165010929108, + "step": 960, + "valid_targets_mean": 3152.1, + "valid_targets_min": 917 + }, + { + "epoch": 3.0441640378548898, + "grad_norm": 0.43226297800728486, + "learning_rate": 6.885341139727912e-05, + "loss": 0.2151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23175911605358124, + "step": 965, + "valid_targets_mean": 4503.0, + "valid_targets_min": 1248 + }, + { + "epoch": 3.0599369085173502, + "grad_norm": 0.3586837830266431, + "learning_rate": 6.856473063107187e-05, + "loss": 0.214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2144646793603897, + "step": 970, + "valid_targets_mean": 4602.6, + "valid_targets_min": 708 + }, + { + "epoch": 3.0757097791798107, + "grad_norm": 0.39714316101409813, + "learning_rate": 6.827533026263169e-05, + "loss": 0.2324, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23752881586551666, + "step": 975, + "valid_targets_mean": 4112.2, + "valid_targets_min": 885 + }, + { + "epoch": 3.091482649842271, + "grad_norm": 0.4034188598172916, + "learning_rate": 6.798522150963552e-05, + "loss": 0.2165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21137112379074097, + "step": 980, + "valid_targets_mean": 3846.7, + "valid_targets_min": 1020 + }, + { + "epoch": 3.107255520504732, + "grad_norm": 0.4243752600843904, + "learning_rate": 6.769441561721863e-05, + "loss": 0.2348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23859837651252747, + "step": 985, + "valid_targets_mean": 3453.0, + "valid_targets_min": 789 + }, + { + "epoch": 3.1230283911671926, + "grad_norm": 0.4129713315777461, + "learning_rate": 6.740292385753858e-05, + "loss": 0.2303, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19675597548484802, + "step": 990, + "valid_targets_mean": 3582.6, + "valid_targets_min": 1137 + }, + { + "epoch": 3.138801261829653, + "grad_norm": 0.38020570852371904, + "learning_rate": 6.711075752933847e-05, + "loss": 0.2271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21929940581321716, + "step": 995, + "valid_targets_mean": 4322.8, + "valid_targets_min": 938 + }, + { + "epoch": 3.1545741324921135, + "grad_norm": 0.388542102333, + "learning_rate": 6.681792795750875e-05, + "loss": 0.2236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24065446853637695, + "step": 1000, + "valid_targets_mean": 4211.5, + "valid_targets_min": 938 + }, + { + "epoch": 3.170347003154574, + "grad_norm": 0.37062046006312427, + "learning_rate": 6.652444649264856e-05, + "loss": 0.2405, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23021221160888672, + "step": 1005, + "valid_targets_mean": 4916.3, + "valid_targets_min": 1624 + }, + { + "epoch": 3.186119873817035, + "grad_norm": 0.4340392691381971, + "learning_rate": 6.623032451062542e-05, + "loss": 0.2386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2661784887313843, + "step": 1010, + "valid_targets_mean": 3899.9, + "valid_targets_min": 1083 + }, + { + "epoch": 3.2018927444794953, + "grad_norm": 0.4260266697363941, + "learning_rate": 6.593557341213457e-05, + "loss": 0.2375, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24500736594200134, + "step": 1015, + "valid_targets_mean": 3857.8, + "valid_targets_min": 958 + }, + { + "epoch": 3.217665615141956, + "grad_norm": 0.4350503137533814, + "learning_rate": 6.564020462225679e-05, + "loss": 0.2403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24792088568210602, + "step": 1020, + "valid_targets_mean": 3748.6, + "valid_targets_min": 654 + }, + { + "epoch": 3.2334384858044163, + "grad_norm": 0.39020228509903115, + "learning_rate": 6.534422959001585e-05, + "loss": 0.2277, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22573116421699524, + "step": 1025, + "valid_targets_mean": 4069.3, + "valid_targets_min": 731 + }, + { + "epoch": 3.249211356466877, + "grad_norm": 0.4119380724882183, + "learning_rate": 6.504765978793443e-05, + "loss": 0.23, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22467444837093353, + "step": 1030, + "valid_targets_mean": 4076.1, + "valid_targets_min": 1297 + }, + { + "epoch": 3.2649842271293377, + "grad_norm": 0.4184873455063318, + "learning_rate": 6.475050671158961e-05, + "loss": 0.2298, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2501946687698364, + "step": 1035, + "valid_targets_mean": 3618.0, + "valid_targets_min": 922 + }, + { + "epoch": 3.280757097791798, + "grad_norm": 0.41146183333825054, + "learning_rate": 6.445278187916722e-05, + "loss": 0.2231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22016121447086334, + "step": 1040, + "valid_targets_mean": 3798.8, + "valid_targets_min": 1069 + }, + { + "epoch": 3.2965299684542586, + "grad_norm": 0.38970074428462953, + "learning_rate": 6.415449683101537e-05, + "loss": 0.2406, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23840738832950592, + "step": 1045, + "valid_targets_mean": 3985.5, + "valid_targets_min": 1023 + }, + { + "epoch": 3.312302839116719, + "grad_norm": 0.42514461171505874, + "learning_rate": 6.385566312919716e-05, + "loss": 0.2343, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21439124643802643, + "step": 1050, + "valid_targets_mean": 3354.7, + "valid_targets_min": 1369 + }, + { + "epoch": 3.32807570977918, + "grad_norm": 0.4244276185537002, + "learning_rate": 6.355629235704248e-05, + "loss": 0.221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2335590124130249, + "step": 1055, + "valid_targets_mean": 3675.7, + "valid_targets_min": 1329 + }, + { + "epoch": 3.3438485804416405, + "grad_norm": 0.4215367440869733, + "learning_rate": 6.3256396118699e-05, + "loss": 0.2393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21470558643341064, + "step": 1060, + "valid_targets_mean": 3397.9, + "valid_targets_min": 987 + }, + { + "epoch": 3.359621451104101, + "grad_norm": 0.44050673596004053, + "learning_rate": 6.295598603868246e-05, + "loss": 0.2323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2552182078361511, + "step": 1065, + "valid_targets_mean": 3271.8, + "valid_targets_min": 872 + }, + { + "epoch": 3.3753943217665614, + "grad_norm": 0.414073575423661, + "learning_rate": 6.265507376142594e-05, + "loss": 0.2252, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24249374866485596, + "step": 1070, + "valid_targets_mean": 3873.2, + "valid_targets_min": 1058 + }, + { + "epoch": 3.3911671924290223, + "grad_norm": 0.38016468873162756, + "learning_rate": 6.235367095082867e-05, + "loss": 0.2266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20872919261455536, + "step": 1075, + "valid_targets_mean": 4038.8, + "valid_targets_min": 1127 + }, + { + "epoch": 3.406940063091483, + "grad_norm": 0.3928656169287651, + "learning_rate": 6.205178928980377e-05, + "loss": 0.239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22502362728118896, + "step": 1080, + "valid_targets_mean": 3922.1, + "valid_targets_min": 666 + }, + { + "epoch": 3.4227129337539433, + "grad_norm": 0.4255777640996923, + "learning_rate": 6.174944047982549e-05, + "loss": 0.2496, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22144320607185364, + "step": 1085, + "valid_targets_mean": 3059.8, + "valid_targets_min": 825 + }, + { + "epoch": 3.4384858044164037, + "grad_norm": 0.4452758552171811, + "learning_rate": 6.144663624047564e-05, + "loss": 0.2247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23618116974830627, + "step": 1090, + "valid_targets_mean": 3132.2, + "valid_targets_min": 834 + }, + { + "epoch": 3.454258675078864, + "grad_norm": 0.4147082956357252, + "learning_rate": 6.114338830898922e-05, + "loss": 0.2368, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2344067096710205, + "step": 1095, + "valid_targets_mean": 3651.3, + "valid_targets_min": 1256 + }, + { + "epoch": 3.470031545741325, + "grad_norm": 0.44296538705101896, + "learning_rate": 6.083970843979957e-05, + "loss": 0.2427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2572486996650696, + "step": 1100, + "valid_targets_mean": 3208.3, + "valid_targets_min": 675 + }, + { + "epoch": 3.4858044164037856, + "grad_norm": 0.4335697262123166, + "learning_rate": 6.0535608404082724e-05, + "loss": 0.2328, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24830931425094604, + "step": 1105, + "valid_targets_mean": 3336.1, + "valid_targets_min": 849 + }, + { + "epoch": 3.501577287066246, + "grad_norm": 0.4001268713135724, + "learning_rate": 6.0231099989301086e-05, + "loss": 0.2434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24537333846092224, + "step": 1110, + "valid_targets_mean": 4053.1, + "valid_targets_min": 828 + }, + { + "epoch": 3.5173501577287065, + "grad_norm": 0.38032531600886793, + "learning_rate": 5.9926194998746624e-05, + "loss": 0.2365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2191232293844223, + "step": 1115, + "valid_targets_mean": 3963.7, + "valid_targets_min": 474 + }, + { + "epoch": 3.5331230283911674, + "grad_norm": 0.4413111113181332, + "learning_rate": 5.9620905251083196e-05, + "loss": 0.226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25337281823158264, + "step": 1120, + "valid_targets_mean": 3356.7, + "valid_targets_min": 534 + }, + { + "epoch": 3.548895899053628, + "grad_norm": 0.3907243447394178, + "learning_rate": 5.931524257988864e-05, + "loss": 0.2384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22448721528053284, + "step": 1125, + "valid_targets_mean": 4019.4, + "valid_targets_min": 1130 + }, + { + "epoch": 3.5646687697160884, + "grad_norm": 0.37968688458970584, + "learning_rate": 5.900921883319591e-05, + "loss": 0.2344, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24035479128360748, + "step": 1130, + "valid_targets_mean": 4474.2, + "valid_targets_min": 1402 + }, + { + "epoch": 3.580441640378549, + "grad_norm": 0.4579142841218182, + "learning_rate": 5.870284587303394e-05, + "loss": 0.2393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24654285609722137, + "step": 1135, + "valid_targets_mean": 3167.7, + "valid_targets_min": 772 + }, + { + "epoch": 3.5962145110410093, + "grad_norm": 0.3865017231040676, + "learning_rate": 5.839613557496776e-05, + "loss": 0.2234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23607927560806274, + "step": 1140, + "valid_targets_mean": 4443.2, + "valid_targets_min": 782 + }, + { + "epoch": 3.61198738170347, + "grad_norm": 0.40786084317752475, + "learning_rate": 5.808909982763825e-05, + "loss": 0.2269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23828022181987762, + "step": 1145, + "valid_targets_mean": 4382.2, + "valid_targets_min": 833 + }, + { + "epoch": 3.6277602523659307, + "grad_norm": 0.41683246256171275, + "learning_rate": 5.778175053230126e-05, + "loss": 0.2335, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2074422836303711, + "step": 1150, + "valid_targets_mean": 3207.0, + "valid_targets_min": 816 + }, + { + "epoch": 3.643533123028391, + "grad_norm": 0.4127754651476796, + "learning_rate": 5.747409960236637e-05, + "loss": 0.2214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2237945795059204, + "step": 1155, + "valid_targets_mean": 3538.0, + "valid_targets_min": 1163 + }, + { + "epoch": 3.6593059936908516, + "grad_norm": 0.3939385020204566, + "learning_rate": 5.716615896293501e-05, + "loss": 0.2414, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22520986199378967, + "step": 1160, + "valid_targets_mean": 4009.8, + "valid_targets_min": 836 + }, + { + "epoch": 3.6750788643533125, + "grad_norm": 0.40230964374737266, + "learning_rate": 5.68579405503383e-05, + "loss": 0.2357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2363431453704834, + "step": 1165, + "valid_targets_mean": 3878.6, + "valid_targets_min": 626 + }, + { + "epoch": 3.690851735015773, + "grad_norm": 0.34994582499941074, + "learning_rate": 5.654945631167433e-05, + "loss": 0.2354, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.250026673078537, + "step": 1170, + "valid_targets_mean": 5134.4, + "valid_targets_min": 1468 + }, + { + "epoch": 3.7066246056782335, + "grad_norm": 0.40797908376310166, + "learning_rate": 5.624071820434508e-05, + "loss": 0.2393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25660961866378784, + "step": 1175, + "valid_targets_mean": 3818.5, + "valid_targets_min": 678 + }, + { + "epoch": 3.722397476340694, + "grad_norm": 0.387553152436449, + "learning_rate": 5.593173819559294e-05, + "loss": 0.2374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2387440800666809, + "step": 1180, + "valid_targets_mean": 4164.5, + "valid_targets_min": 961 + }, + { + "epoch": 3.7381703470031544, + "grad_norm": 0.44744488912147923, + "learning_rate": 5.562252826203687e-05, + "loss": 0.2301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23685699701309204, + "step": 1185, + "valid_targets_mean": 3143.5, + "valid_targets_min": 958 + }, + { + "epoch": 3.753943217665615, + "grad_norm": 0.38892932566117155, + "learning_rate": 5.531310038920805e-05, + "loss": 0.2463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24442225694656372, + "step": 1190, + "valid_targets_mean": 3944.6, + "valid_targets_min": 730 + }, + { + "epoch": 3.769716088328076, + "grad_norm": 0.3790093924419974, + "learning_rate": 5.500346657108545e-05, + "loss": 0.2305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2028028666973114, + "step": 1195, + "valid_targets_mean": 4588.8, + "valid_targets_min": 1308 + }, + { + "epoch": 3.7854889589905363, + "grad_norm": 0.4589841933590628, + "learning_rate": 5.469363880963082e-05, + "loss": 0.2337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24983350932598114, + "step": 1200, + "valid_targets_mean": 3242.1, + "valid_targets_min": 1442 + }, + { + "epoch": 3.8012618296529967, + "grad_norm": 0.45996699804173213, + "learning_rate": 5.438362911432347e-05, + "loss": 0.2386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26556554436683655, + "step": 1205, + "valid_targets_mean": 3398.3, + "valid_targets_min": 741 + }, + { + "epoch": 3.8170347003154577, + "grad_norm": 0.41234694688404394, + "learning_rate": 5.407344950169486e-05, + "loss": 0.2424, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2418709397315979, + "step": 1210, + "valid_targets_mean": 3575.7, + "valid_targets_min": 978 + }, + { + "epoch": 3.832807570977918, + "grad_norm": 0.4238199028423431, + "learning_rate": 5.376311199486268e-05, + "loss": 0.2444, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25250011682510376, + "step": 1215, + "valid_targets_mean": 3547.4, + "valid_targets_min": 814 + }, + { + "epoch": 3.8485804416403786, + "grad_norm": 0.42252745143491205, + "learning_rate": 5.3452628623064934e-05, + "loss": 0.2337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23513750731945038, + "step": 1220, + "valid_targets_mean": 3706.2, + "valid_targets_min": 860 + }, + { + "epoch": 3.864353312302839, + "grad_norm": 0.43277188709174047, + "learning_rate": 5.31420114211936e-05, + "loss": 0.2377, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2320515215396881, + "step": 1225, + "valid_targets_mean": 3372.7, + "valid_targets_min": 916 + }, + { + "epoch": 3.8801261829652995, + "grad_norm": 0.39821348246740956, + "learning_rate": 5.2831272429328116e-05, + "loss": 0.2263, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21725791692733765, + "step": 1230, + "valid_targets_mean": 4264.4, + "valid_targets_min": 841 + }, + { + "epoch": 3.89589905362776, + "grad_norm": 0.4007963343918992, + "learning_rate": 5.2520423692268775e-05, + "loss": 0.2235, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2254546582698822, + "step": 1235, + "valid_targets_mean": 3844.1, + "valid_targets_min": 568 + }, + { + "epoch": 3.911671924290221, + "grad_norm": 0.35934744702001803, + "learning_rate": 5.220947725906975e-05, + "loss": 0.2333, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21963903307914734, + "step": 1240, + "valid_targets_mean": 4466.3, + "valid_targets_min": 1140 + }, + { + "epoch": 3.9274447949526814, + "grad_norm": 0.4086674443050181, + "learning_rate": 5.18984451825721e-05, + "loss": 0.2501, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23662705719470978, + "step": 1245, + "valid_targets_mean": 4148.2, + "valid_targets_min": 1165 + }, + { + "epoch": 3.943217665615142, + "grad_norm": 0.38047162434691456, + "learning_rate": 5.1587339518936585e-05, + "loss": 0.2365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2408026158809662, + "step": 1250, + "valid_targets_mean": 4111.3, + "valid_targets_min": 970 + }, + { + "epoch": 3.958990536277603, + "grad_norm": 0.4205686296344959, + "learning_rate": 5.127617232717631e-05, + "loss": 0.2332, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25451695919036865, + "step": 1255, + "valid_targets_mean": 3483.0, + "valid_targets_min": 868 + }, + { + "epoch": 3.9747634069400632, + "grad_norm": 0.43266492423061176, + "learning_rate": 5.096495566868935e-05, + "loss": 0.2336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25790631771087646, + "step": 1260, + "valid_targets_mean": 3801.1, + "valid_targets_min": 868 + }, + { + "epoch": 3.9905362776025237, + "grad_norm": 0.4219647080671064, + "learning_rate": 5.065370160679115e-05, + "loss": 0.2438, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22469377517700195, + "step": 1265, + "valid_targets_mean": 3419.8, + "valid_targets_min": 1298 + }, + { + "epoch": 4.006309148264984, + "grad_norm": 0.49226944511675513, + "learning_rate": 5.034242220624706e-05, + "loss": 0.1921, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13946948945522308, + "step": 1270, + "valid_targets_mean": 3082.3, + "valid_targets_min": 932 + }, + { + "epoch": 4.022082018927445, + "grad_norm": 0.6221728712902935, + "learning_rate": 5.003112953280452e-05, + "loss": 0.1417, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15345337986946106, + "step": 1275, + "valid_targets_mean": 3482.0, + "valid_targets_min": 768 + }, + { + "epoch": 4.037854889589905, + "grad_norm": 0.3815629781365611, + "learning_rate": 4.971983565272553e-05, + "loss": 0.1399, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1290070116519928, + "step": 1280, + "valid_targets_mean": 4213.8, + "valid_targets_min": 1064 + }, + { + "epoch": 4.053627760252366, + "grad_norm": 0.45240879430701475, + "learning_rate": 4.940855263231873e-05, + "loss": 0.1313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12267524003982544, + "step": 1285, + "valid_targets_mean": 3384.7, + "valid_targets_min": 685 + }, + { + "epoch": 4.069400630914826, + "grad_norm": 0.40926725045177215, + "learning_rate": 4.909729253747197e-05, + "loss": 0.141, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13643282651901245, + "step": 1290, + "valid_targets_mean": 3988.4, + "valid_targets_min": 915 + }, + { + "epoch": 4.085173501577287, + "grad_norm": 0.4347926355819619, + "learning_rate": 4.878606743318439e-05, + "loss": 0.1351, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13089679181575775, + "step": 1295, + "valid_targets_mean": 3411.6, + "valid_targets_min": 917 + }, + { + "epoch": 4.100946372239748, + "grad_norm": 0.400630100691516, + "learning_rate": 4.8474889383098855e-05, + "loss": 0.1441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13528120517730713, + "step": 1300, + "valid_targets_mean": 4608.4, + "valid_targets_min": 1201 + }, + { + "epoch": 4.116719242902208, + "grad_norm": 0.4905201948395251, + "learning_rate": 4.816377044903428e-05, + "loss": 0.1386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14674723148345947, + "step": 1305, + "valid_targets_mean": 2800.1, + "valid_targets_min": 798 + }, + { + "epoch": 4.132492113564669, + "grad_norm": 0.4149285083502539, + "learning_rate": 4.7852722690518196e-05, + "loss": 0.1345, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1416550576686859, + "step": 1310, + "valid_targets_mean": 3899.2, + "valid_targets_min": 848 + }, + { + "epoch": 4.148264984227129, + "grad_norm": 0.4652902054882981, + "learning_rate": 4.75417581643192e-05, + "loss": 0.1315, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14979085326194763, + "step": 1315, + "valid_targets_mean": 3797.9, + "valid_targets_min": 742 + }, + { + "epoch": 4.16403785488959, + "grad_norm": 0.4464495393262253, + "learning_rate": 4.723088892397968e-05, + "loss": 0.15, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14508263766765594, + "step": 1320, + "valid_targets_mean": 3469.8, + "valid_targets_min": 829 + }, + { + "epoch": 4.17981072555205, + "grad_norm": 0.4207271698801715, + "learning_rate": 4.6920127019348556e-05, + "loss": 0.1272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12700632214546204, + "step": 1325, + "valid_targets_mean": 3914.0, + "valid_targets_min": 1416 + }, + { + "epoch": 4.195583596214511, + "grad_norm": 0.39530673997204535, + "learning_rate": 4.6609484496114256e-05, + "loss": 0.1358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1171342134475708, + "step": 1330, + "valid_targets_mean": 3599.9, + "valid_targets_min": 1036 + }, + { + "epoch": 4.211356466876971, + "grad_norm": 0.4359234636330998, + "learning_rate": 4.629897339533771e-05, + "loss": 0.1522, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14114472270011902, + "step": 1335, + "valid_targets_mean": 3454.4, + "valid_targets_min": 675 + }, + { + "epoch": 4.2271293375394325, + "grad_norm": 0.38230985967596276, + "learning_rate": 4.598860575298575e-05, + "loss": 0.1386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1264978051185608, + "step": 1340, + "valid_targets_mean": 4712.0, + "valid_targets_min": 1327 + }, + { + "epoch": 4.242902208201893, + "grad_norm": 0.4482098278603598, + "learning_rate": 4.5678393599464435e-05, + "loss": 0.1443, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15272407233715057, + "step": 1345, + "valid_targets_mean": 3620.5, + "valid_targets_min": 802 + }, + { + "epoch": 4.2586750788643535, + "grad_norm": 0.4164117840232126, + "learning_rate": 4.5368348959152864e-05, + "loss": 0.1485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15474383533000946, + "step": 1350, + "valid_targets_mean": 4407.8, + "valid_targets_min": 1581 + }, + { + "epoch": 4.274447949526814, + "grad_norm": 0.37499863585046184, + "learning_rate": 4.505848384993696e-05, + "loss": 0.1396, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12045424431562424, + "step": 1355, + "valid_targets_mean": 4614.6, + "valid_targets_min": 780 + }, + { + "epoch": 4.290220820189274, + "grad_norm": 0.42783658900424754, + "learning_rate": 4.474881028274375e-05, + "loss": 0.1456, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14601534605026245, + "step": 1360, + "valid_targets_mean": 3950.4, + "valid_targets_min": 349 + }, + { + "epoch": 4.305993690851735, + "grad_norm": 0.4290375086012617, + "learning_rate": 4.4439340261075716e-05, + "loss": 0.1363, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13034974038600922, + "step": 1365, + "valid_targets_mean": 3599.8, + "valid_targets_min": 1338 + }, + { + "epoch": 4.321766561514195, + "grad_norm": 0.4018259137777466, + "learning_rate": 4.413008578054558e-05, + "loss": 0.1435, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1399686485528946, + "step": 1370, + "valid_targets_mean": 4561.3, + "valid_targets_min": 1524 + }, + { + "epoch": 4.337539432176656, + "grad_norm": 0.453887769739834, + "learning_rate": 4.3821058828411244e-05, + "loss": 0.1398, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1445344090461731, + "step": 1375, + "valid_targets_mean": 3351.2, + "valid_targets_min": 412 + }, + { + "epoch": 4.353312302839116, + "grad_norm": 0.37659645397595065, + "learning_rate": 4.35122713831113e-05, + "loss": 0.1375, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1223079115152359, + "step": 1380, + "valid_targets_mean": 4772.9, + "valid_targets_min": 1073 + }, + { + "epoch": 4.369085173501578, + "grad_norm": 0.4290002370950025, + "learning_rate": 4.320373541380054e-05, + "loss": 0.1387, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15943288803100586, + "step": 1385, + "valid_targets_mean": 4049.0, + "valid_targets_min": 873 + }, + { + "epoch": 4.384858044164038, + "grad_norm": 0.4192131149390768, + "learning_rate": 4.289546287988614e-05, + "loss": 0.1442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14616958796977997, + "step": 1390, + "valid_targets_mean": 4097.8, + "valid_targets_min": 1280 + }, + { + "epoch": 4.400630914826499, + "grad_norm": 0.4310227821196779, + "learning_rate": 4.258746573056401e-05, + "loss": 0.1442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1378534734249115, + "step": 1395, + "valid_targets_mean": 3732.5, + "valid_targets_min": 975 + }, + { + "epoch": 4.416403785488959, + "grad_norm": 0.4602627297055569, + "learning_rate": 4.2279755904355704e-05, + "loss": 0.1509, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1524263620376587, + "step": 1400, + "valid_targets_mean": 3250.9, + "valid_targets_min": 669 + }, + { + "epoch": 4.4321766561514195, + "grad_norm": 0.4103528741168458, + "learning_rate": 4.197234532864558e-05, + "loss": 0.1363, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12123788148164749, + "step": 1405, + "valid_targets_mean": 3816.8, + "valid_targets_min": 836 + }, + { + "epoch": 4.44794952681388, + "grad_norm": 0.4287914927460044, + "learning_rate": 4.1665245919218544e-05, + "loss": 0.138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15397454798221588, + "step": 1410, + "valid_targets_mean": 3597.8, + "valid_targets_min": 989 + }, + { + "epoch": 4.4637223974763405, + "grad_norm": 0.49713335377038753, + "learning_rate": 4.135846957979811e-05, + "loss": 0.1423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13250550627708435, + "step": 1415, + "valid_targets_mean": 2671.5, + "valid_targets_min": 595 + }, + { + "epoch": 4.479495268138801, + "grad_norm": 0.41810789903728096, + "learning_rate": 4.105202820158503e-05, + "loss": 0.1401, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13871216773986816, + "step": 1420, + "valid_targets_mean": 3532.3, + "valid_targets_min": 1167 + }, + { + "epoch": 4.495268138801261, + "grad_norm": 0.4908723102270641, + "learning_rate": 4.074593366279636e-05, + "loss": 0.1341, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14446024596691132, + "step": 1425, + "valid_targets_mean": 3162.2, + "valid_targets_min": 1073 + }, + { + "epoch": 4.511041009463723, + "grad_norm": 0.3864233145682553, + "learning_rate": 4.044019782820505e-05, + "loss": 0.1333, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.131387859582901, + "step": 1430, + "valid_targets_mean": 4543.9, + "valid_targets_min": 1454 + }, + { + "epoch": 4.526813880126183, + "grad_norm": 0.4059100723836895, + "learning_rate": 4.0134832548680006e-05, + "loss": 0.1374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14086687564849854, + "step": 1435, + "valid_targets_mean": 4403.4, + "valid_targets_min": 1406 + }, + { + "epoch": 4.542586750788644, + "grad_norm": 0.3884860866095086, + "learning_rate": 3.982984966072677e-05, + "loss": 0.143, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13908298313617706, + "step": 1440, + "valid_targets_mean": 4406.6, + "valid_targets_min": 1514 + }, + { + "epoch": 4.558359621451104, + "grad_norm": 0.4293134592856557, + "learning_rate": 3.952526098602873e-05, + "loss": 0.1371, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15750595927238464, + "step": 1445, + "valid_targets_mean": 4256.6, + "valid_targets_min": 782 + }, + { + "epoch": 4.574132492113565, + "grad_norm": 0.43968176363289035, + "learning_rate": 3.9221078330988806e-05, + "loss": 0.1469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1521768718957901, + "step": 1450, + "valid_targets_mean": 3652.6, + "valid_targets_min": 681 + }, + { + "epoch": 4.589905362776025, + "grad_norm": 0.48471267391387524, + "learning_rate": 3.89173134862719e-05, + "loss": 0.1431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1417105495929718, + "step": 1455, + "valid_targets_mean": 3109.5, + "valid_targets_min": 678 + }, + { + "epoch": 4.605678233438486, + "grad_norm": 0.41654046912638915, + "learning_rate": 3.861397822634784e-05, + "loss": 0.1411, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14843851327896118, + "step": 1460, + "valid_targets_mean": 4048.7, + "valid_targets_min": 328 + }, + { + "epoch": 4.621451104100946, + "grad_norm": 0.4416300594817172, + "learning_rate": 3.831108430903494e-05, + "loss": 0.1399, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14418508112430573, + "step": 1465, + "valid_targets_mean": 3894.4, + "valid_targets_min": 1312 + }, + { + "epoch": 4.6372239747634065, + "grad_norm": 0.4231618335639366, + "learning_rate": 3.800864347504437e-05, + "loss": 0.134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14216049015522003, + "step": 1470, + "valid_targets_mean": 4004.8, + "valid_targets_min": 1163 + }, + { + "epoch": 4.652996845425868, + "grad_norm": 0.452051661219182, + "learning_rate": 3.7706667447524876e-05, + "loss": 0.1411, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13259270787239075, + "step": 1475, + "valid_targets_mean": 3253.7, + "valid_targets_min": 642 + }, + { + "epoch": 4.668769716088328, + "grad_norm": 0.4757582718396925, + "learning_rate": 3.740516793160855e-05, + "loss": 0.1468, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14734065532684326, + "step": 1480, + "valid_targets_mean": 3495.1, + "valid_targets_min": 754 + }, + { + "epoch": 4.684542586750789, + "grad_norm": 0.38955482510214673, + "learning_rate": 3.710415661395699e-05, + "loss": 0.1372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13405834138393402, + "step": 1485, + "valid_targets_mean": 4140.7, + "valid_targets_min": 938 + }, + { + "epoch": 4.700315457413249, + "grad_norm": 0.43015283370193924, + "learning_rate": 3.6803645162308376e-05, + "loss": 0.1461, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15300995111465454, + "step": 1490, + "valid_targets_mean": 4602.2, + "valid_targets_min": 898 + }, + { + "epoch": 4.71608832807571, + "grad_norm": 0.48935718080723706, + "learning_rate": 3.6503645225025175e-05, + "loss": 0.151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15365701913833618, + "step": 1495, + "valid_targets_mean": 3268.4, + "valid_targets_min": 1152 + }, + { + "epoch": 4.73186119873817, + "grad_norm": 0.4055835606886522, + "learning_rate": 3.620416843064266e-05, + "loss": 0.1392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12138361483812332, + "step": 1500, + "valid_targets_mean": 4178.7, + "valid_targets_min": 815 + }, + { + "epoch": 4.747634069400631, + "grad_norm": 0.44394130488038175, + "learning_rate": 3.5905226387418126e-05, + "loss": 0.1513, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13299168646335602, + "step": 1505, + "valid_targets_mean": 3148.7, + "valid_targets_min": 895 + }, + { + "epoch": 4.763406940063091, + "grad_norm": 0.412669241238801, + "learning_rate": 3.5606830682880965e-05, + "loss": 0.1457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.145119309425354, + "step": 1510, + "valid_targets_mean": 4012.8, + "valid_targets_min": 1377 + }, + { + "epoch": 4.779179810725552, + "grad_norm": 0.4358244895626672, + "learning_rate": 3.530899288338352e-05, + "loss": 0.1396, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16828608512878418, + "step": 1515, + "valid_targets_mean": 3939.1, + "valid_targets_min": 730 + }, + { + "epoch": 4.794952681388013, + "grad_norm": 0.4606066916198381, + "learning_rate": 3.501172453365268e-05, + "loss": 0.1465, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1422199308872223, + "step": 1520, + "valid_targets_mean": 3348.8, + "valid_targets_min": 1047 + }, + { + "epoch": 4.8107255520504735, + "grad_norm": 0.4071505574057695, + "learning_rate": 3.471503715634252e-05, + "loss": 0.1377, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12694065272808075, + "step": 1525, + "valid_targets_mean": 4031.6, + "valid_targets_min": 926 + }, + { + "epoch": 4.826498422712934, + "grad_norm": 0.4431581332196672, + "learning_rate": 3.44189422515875e-05, + "loss": 0.1487, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15230685472488403, + "step": 1530, + "valid_targets_mean": 4062.1, + "valid_targets_min": 759 + }, + { + "epoch": 4.842271293375394, + "grad_norm": 0.4748778474958715, + "learning_rate": 3.4123451296556845e-05, + "loss": 0.1459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1604817807674408, + "step": 1535, + "valid_targets_mean": 3203.7, + "valid_targets_min": 1156 + }, + { + "epoch": 4.858044164037855, + "grad_norm": 0.42462775603297814, + "learning_rate": 3.382857574500957e-05, + "loss": 0.1479, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14987151324748993, + "step": 1540, + "valid_targets_mean": 4112.8, + "valid_targets_min": 987 + }, + { + "epoch": 4.873817034700315, + "grad_norm": 0.41404233916053496, + "learning_rate": 3.3534327026850574e-05, + "loss": 0.1522, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14622119069099426, + "step": 1545, + "valid_targets_mean": 3851.1, + "valid_targets_min": 1549 + }, + { + "epoch": 4.889589905362776, + "grad_norm": 0.42894617135237606, + "learning_rate": 3.324071654768754e-05, + "loss": 0.1431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14016184210777283, + "step": 1550, + "valid_targets_mean": 4175.3, + "valid_targets_min": 354 + }, + { + "epoch": 4.905362776025236, + "grad_norm": 0.44548643111263175, + "learning_rate": 3.2947755688388874e-05, + "loss": 0.1401, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1478036642074585, + "step": 1555, + "valid_targets_mean": 3333.3, + "valid_targets_min": 731 + }, + { + "epoch": 4.921135646687697, + "grad_norm": 0.42131443259817314, + "learning_rate": 3.26554558046426e-05, + "loss": 0.1463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12622922658920288, + "step": 1560, + "valid_targets_mean": 3541.7, + "valid_targets_min": 675 + }, + { + "epoch": 4.936908517350158, + "grad_norm": 0.39506807788124915, + "learning_rate": 3.236382822651606e-05, + "loss": 0.1308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14205527305603027, + "step": 1565, + "valid_targets_mean": 4171.3, + "valid_targets_min": 725 + }, + { + "epoch": 4.952681388012619, + "grad_norm": 0.4380088542278778, + "learning_rate": 3.207288425801689e-05, + "loss": 0.1426, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1410386860370636, + "step": 1570, + "valid_targets_mean": 3663.4, + "valid_targets_min": 834 + }, + { + "epoch": 4.968454258675079, + "grad_norm": 0.432493466888825, + "learning_rate": 3.1782635176654764e-05, + "loss": 0.1435, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13422390818595886, + "step": 1575, + "valid_targets_mean": 3374.3, + "valid_targets_min": 1043 + }, + { + "epoch": 4.9842271293375395, + "grad_norm": 0.4080489328075729, + "learning_rate": 3.149309223300428e-05, + "loss": 0.1455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1494569182395935, + "step": 1580, + "valid_targets_mean": 4281.2, + "valid_targets_min": 1288 + }, + { + "epoch": 5.0, + "grad_norm": 0.3854003237760334, + "learning_rate": 3.120426665026891e-05, + "loss": 0.1392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11573436111211777, + "step": 1585, + "valid_targets_mean": 3961.9, + "valid_targets_min": 626 + }, + { + "epoch": 5.0157728706624605, + "grad_norm": 0.6098354724545925, + "learning_rate": 3.091616962384587e-05, + "loss": 0.0784, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08880452066659927, + "step": 1590, + "valid_targets_mean": 3453.1, + "valid_targets_min": 730 + }, + { + "epoch": 5.031545741324921, + "grad_norm": 0.3494205951089116, + "learning_rate": 3.06288123208923e-05, + "loss": 0.079, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06743300706148148, + "step": 1595, + "valid_targets_mean": 4131.9, + "valid_targets_min": 926 + }, + { + "epoch": 5.047318611987381, + "grad_norm": 0.3562901602719652, + "learning_rate": 3.034220587989226e-05, + "loss": 0.0682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06512755155563354, + "step": 1600, + "valid_targets_mean": 3931.4, + "valid_targets_min": 809 + }, + { + "epoch": 5.063091482649842, + "grad_norm": 0.4441376763123438, + "learning_rate": 3.005636141022512e-05, + "loss": 0.0742, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08241255581378937, + "step": 1605, + "valid_targets_mean": 3501.9, + "valid_targets_min": 456 + }, + { + "epoch": 5.078864353312303, + "grad_norm": 0.4035371274358376, + "learning_rate": 2.977128999173482e-05, + "loss": 0.0725, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07899090647697449, + "step": 1610, + "valid_targets_mean": 3742.7, + "valid_targets_min": 926 + }, + { + "epoch": 5.094637223974764, + "grad_norm": 0.39784769067935233, + "learning_rate": 2.948700267430049e-05, + "loss": 0.075, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0730673223733902, + "step": 1615, + "valid_targets_mean": 3794.5, + "valid_targets_min": 899 + }, + { + "epoch": 5.110410094637224, + "grad_norm": 0.3896471949968342, + "learning_rate": 2.920351047740808e-05, + "loss": 0.0771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08270689845085144, + "step": 1620, + "valid_targets_mean": 4006.2, + "valid_targets_min": 1086 + }, + { + "epoch": 5.126182965299685, + "grad_norm": 0.413498748380209, + "learning_rate": 2.892082438972325e-05, + "loss": 0.0785, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08353646844625473, + "step": 1625, + "valid_targets_mean": 3624.4, + "valid_targets_min": 915 + }, + { + "epoch": 5.141955835962145, + "grad_norm": 0.39899026705045726, + "learning_rate": 2.863895536866541e-05, + "loss": 0.0776, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07658601552248001, + "step": 1630, + "valid_targets_mean": 3703.2, + "valid_targets_min": 725 + }, + { + "epoch": 5.157728706624606, + "grad_norm": 0.40585855723255365, + "learning_rate": 2.835791433998301e-05, + "loss": 0.0736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06763305515050888, + "step": 1635, + "valid_targets_mean": 3659.4, + "valid_targets_min": 560 + }, + { + "epoch": 5.173501577287066, + "grad_norm": 0.36447324475486664, + "learning_rate": 2.807771219733004e-05, + "loss": 0.077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06629404425621033, + "step": 1640, + "valid_targets_mean": 3927.2, + "valid_targets_min": 681 + }, + { + "epoch": 5.1892744479495265, + "grad_norm": 0.4157465928083423, + "learning_rate": 2.7798359801843766e-05, + "loss": 0.0807, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08127208054065704, + "step": 1645, + "valid_targets_mean": 3681.9, + "valid_targets_min": 651 + }, + { + "epoch": 5.205047318611987, + "grad_norm": 0.39120812377799336, + "learning_rate": 2.7519867981723712e-05, + "loss": 0.0753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07300405949354172, + "step": 1650, + "valid_targets_mean": 3552.9, + "valid_targets_min": 832 + }, + { + "epoch": 5.220820189274448, + "grad_norm": 0.3646203530221762, + "learning_rate": 2.724224753181197e-05, + "loss": 0.0787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06782227009534836, + "step": 1655, + "valid_targets_mean": 4073.5, + "valid_targets_min": 1076 + }, + { + "epoch": 5.236593059936909, + "grad_norm": 0.42189063269624233, + "learning_rate": 2.6965509213174777e-05, + "loss": 0.0817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08319750428199768, + "step": 1660, + "valid_targets_mean": 3510.0, + "valid_targets_min": 1061 + }, + { + "epoch": 5.252365930599369, + "grad_norm": 0.42537317875481107, + "learning_rate": 2.6689663752685334e-05, + "loss": 0.0762, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08212348818778992, + "step": 1665, + "valid_targets_mean": 3861.3, + "valid_targets_min": 1289 + }, + { + "epoch": 5.26813880126183, + "grad_norm": 0.4097129582116899, + "learning_rate": 2.641472184260809e-05, + "loss": 0.0721, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07353250682353973, + "step": 1670, + "valid_targets_mean": 3304.8, + "valid_targets_min": 926 + }, + { + "epoch": 5.28391167192429, + "grad_norm": 0.3622831755547028, + "learning_rate": 2.614069414018428e-05, + "loss": 0.0765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08480450510978699, + "step": 1675, + "valid_targets_mean": 4963.7, + "valid_targets_min": 780 + }, + { + "epoch": 5.299684542586751, + "grad_norm": 0.37198536071840593, + "learning_rate": 2.5867591267218805e-05, + "loss": 0.0729, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07586896419525146, + "step": 1680, + "valid_targets_mean": 3997.1, + "valid_targets_min": 1222 + }, + { + "epoch": 5.315457413249211, + "grad_norm": 0.4151958964240305, + "learning_rate": 2.5595423809668452e-05, + "loss": 0.0753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07574107497930527, + "step": 1685, + "valid_targets_mean": 3183.7, + "valid_targets_min": 1023 + }, + { + "epoch": 5.331230283911672, + "grad_norm": 0.4471401479805964, + "learning_rate": 2.532420231723172e-05, + "loss": 0.0776, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0878693163394928, + "step": 1690, + "valid_targets_mean": 3672.7, + "valid_targets_min": 928 + }, + { + "epoch": 5.347003154574132, + "grad_norm": 0.3979607147767652, + "learning_rate": 2.5053937302939767e-05, + "loss": 0.0753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0795070230960846, + "step": 1695, + "valid_targets_mean": 3926.4, + "valid_targets_min": 500 + }, + { + "epoch": 5.3627760252365935, + "grad_norm": 0.39805310039516273, + "learning_rate": 2.4784639242748953e-05, + "loss": 0.074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06955453753471375, + "step": 1700, + "valid_targets_mean": 3630.8, + "valid_targets_min": 568 + }, + { + "epoch": 5.378548895899054, + "grad_norm": 0.39990488537939356, + "learning_rate": 2.451631857513472e-05, + "loss": 0.0766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07759714126586914, + "step": 1705, + "valid_targets_mean": 3671.8, + "valid_targets_min": 1192 + }, + { + "epoch": 5.394321766561514, + "grad_norm": 0.41128757960408396, + "learning_rate": 2.4248985700687084e-05, + "loss": 0.0751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07411588728427887, + "step": 1710, + "valid_targets_mean": 3248.8, + "valid_targets_min": 637 + }, + { + "epoch": 5.410094637223975, + "grad_norm": 0.36997877203420493, + "learning_rate": 2.39826509817074e-05, + "loss": 0.0762, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07551965862512589, + "step": 1715, + "valid_targets_mean": 4677.7, + "valid_targets_min": 760 + }, + { + "epoch": 5.425867507886435, + "grad_norm": 0.42528899344663845, + "learning_rate": 2.3717324741806718e-05, + "loss": 0.0802, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06979407370090485, + "step": 1720, + "valid_targets_mean": 3843.7, + "valid_targets_min": 1047 + }, + { + "epoch": 5.441640378548896, + "grad_norm": 0.40133076789249866, + "learning_rate": 2.3453017265505673e-05, + "loss": 0.0775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06930731236934662, + "step": 1725, + "valid_targets_mean": 3631.2, + "valid_targets_min": 767 + }, + { + "epoch": 5.457413249211356, + "grad_norm": 0.3939783645403133, + "learning_rate": 2.3189738797835708e-05, + "loss": 0.0759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07065024971961975, + "step": 1730, + "valid_targets_mean": 3818.6, + "valid_targets_min": 965 + }, + { + "epoch": 5.473186119873817, + "grad_norm": 0.39876633480946094, + "learning_rate": 2.292749954394216e-05, + "loss": 0.0756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07639945298433304, + "step": 1735, + "valid_targets_mean": 3225.0, + "valid_targets_min": 792 + }, + { + "epoch": 5.488958990536277, + "grad_norm": 0.38633309766219137, + "learning_rate": 2.266630966868852e-05, + "loss": 0.0752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07148244976997375, + "step": 1740, + "valid_targets_mean": 3731.3, + "valid_targets_min": 842 + }, + { + "epoch": 5.504731861198739, + "grad_norm": 0.4037059735230268, + "learning_rate": 2.2406179296262453e-05, + "loss": 0.0774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07692521810531616, + "step": 1745, + "valid_targets_mean": 4191.1, + "valid_targets_min": 815 + }, + { + "epoch": 5.520504731861199, + "grad_norm": 0.6910856549809169, + "learning_rate": 2.2147118509783445e-05, + "loss": 0.0722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.080791175365448, + "step": 1750, + "valid_targets_mean": 3713.2, + "valid_targets_min": 1244 + }, + { + "epoch": 5.5362776025236595, + "grad_norm": 0.3612966647126669, + "learning_rate": 2.1889137350911894e-05, + "loss": 0.0747, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07808534801006317, + "step": 1755, + "valid_targets_mean": 4457.6, + "valid_targets_min": 1163 + }, + { + "epoch": 5.55205047318612, + "grad_norm": 0.4128504675429312, + "learning_rate": 2.1632245819459913e-05, + "loss": 0.0747, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0801934003829956, + "step": 1760, + "valid_targets_mean": 3004.9, + "valid_targets_min": 1036 + }, + { + "epoch": 5.5678233438485805, + "grad_norm": 0.42681656815957886, + "learning_rate": 2.1376453873003664e-05, + "loss": 0.0788, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0812867060303688, + "step": 1765, + "valid_targets_mean": 3769.7, + "valid_targets_min": 867 + }, + { + "epoch": 5.583596214511041, + "grad_norm": 0.3745977123418182, + "learning_rate": 2.112177142649746e-05, + "loss": 0.0817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07582206279039383, + "step": 1770, + "valid_targets_mean": 4242.5, + "valid_targets_min": 1290 + }, + { + "epoch": 5.599369085173501, + "grad_norm": 0.38423946133280185, + "learning_rate": 2.0868208351889402e-05, + "loss": 0.0799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07348137348890305, + "step": 1775, + "valid_targets_mean": 3734.2, + "valid_targets_min": 790 + }, + { + "epoch": 5.615141955835962, + "grad_norm": 0.38528845424332087, + "learning_rate": 2.0615774477738738e-05, + "loss": 0.0725, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07144745439291, + "step": 1780, + "valid_targets_mean": 3808.6, + "valid_targets_min": 1308 + }, + { + "epoch": 5.630914826498422, + "grad_norm": 0.3897270444181894, + "learning_rate": 2.0364479588834835e-05, + "loss": 0.0791, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07546620815992355, + "step": 1785, + "valid_targets_mean": 3786.6, + "valid_targets_min": 955 + }, + { + "epoch": 5.646687697160884, + "grad_norm": 0.3981154786766467, + "learning_rate": 2.0114333425817993e-05, + "loss": 0.0793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08071469515562057, + "step": 1790, + "valid_targets_mean": 3680.7, + "valid_targets_min": 693 + }, + { + "epoch": 5.662460567823344, + "grad_norm": 0.3939851521569102, + "learning_rate": 1.9865345684801846e-05, + "loss": 0.0734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07217645645141602, + "step": 1795, + "valid_targets_mean": 3861.2, + "valid_targets_min": 922 + }, + { + "epoch": 5.678233438485805, + "grad_norm": 0.3405630431769784, + "learning_rate": 1.9617526016997486e-05, + "loss": 0.0779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07168835401535034, + "step": 1800, + "valid_targets_mean": 4278.1, + "valid_targets_min": 366 + }, + { + "epoch": 5.694006309148265, + "grad_norm": 0.3997776006057144, + "learning_rate": 1.937088402833943e-05, + "loss": 0.0785, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0725889801979065, + "step": 1805, + "valid_targets_mean": 3969.6, + "valid_targets_min": 879 + }, + { + "epoch": 5.709779179810726, + "grad_norm": 0.41257359466214927, + "learning_rate": 1.9125429279113173e-05, + "loss": 0.0784, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08337903022766113, + "step": 1810, + "valid_targets_mean": 3554.9, + "valid_targets_min": 815 + }, + { + "epoch": 5.725552050473186, + "grad_norm": 0.404424902916859, + "learning_rate": 1.8881171283584752e-05, + "loss": 0.0803, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08362133800983429, + "step": 1815, + "valid_targets_mean": 3983.2, + "valid_targets_min": 1082 + }, + { + "epoch": 5.7413249211356465, + "grad_norm": 0.4050132892679048, + "learning_rate": 1.8638119509631853e-05, + "loss": 0.0797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09016312658786774, + "step": 1820, + "valid_targets_mean": 3855.0, + "valid_targets_min": 562 + }, + { + "epoch": 5.757097791798107, + "grad_norm": 0.3766988903400006, + "learning_rate": 1.839628337837686e-05, + "loss": 0.0799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07612290978431702, + "step": 1825, + "valid_targets_mean": 4090.8, + "valid_targets_min": 926 + }, + { + "epoch": 5.7728706624605675, + "grad_norm": 0.3540967368640231, + "learning_rate": 1.8155672263821666e-05, + "loss": 0.078, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07796187698841095, + "step": 1830, + "valid_targets_mean": 4261.0, + "valid_targets_min": 1188 + }, + { + "epoch": 5.788643533123029, + "grad_norm": 0.39126365491562437, + "learning_rate": 1.7916295492484315e-05, + "loss": 0.0786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07326526194810867, + "step": 1835, + "valid_targets_mean": 3482.5, + "valid_targets_min": 829 + }, + { + "epoch": 5.804416403785489, + "grad_norm": 0.40996769308462233, + "learning_rate": 1.7678162343037524e-05, + "loss": 0.0739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07413917779922485, + "step": 1840, + "valid_targets_mean": 3705.1, + "valid_targets_min": 1058 + }, + { + "epoch": 5.82018927444795, + "grad_norm": 0.3819872236638718, + "learning_rate": 1.744128204594893e-05, + "loss": 0.0744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08067099750041962, + "step": 1845, + "valid_targets_mean": 4434.2, + "valid_targets_min": 718 + }, + { + "epoch": 5.83596214511041, + "grad_norm": 0.4216948622143236, + "learning_rate": 1.7205663783123436e-05, + "loss": 0.078, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0826166570186615, + "step": 1850, + "valid_targets_mean": 3717.3, + "valid_targets_min": 899 + }, + { + "epoch": 5.851735015772871, + "grad_norm": 0.4348462362368455, + "learning_rate": 1.6971316687547213e-05, + "loss": 0.0772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08362829685211182, + "step": 1855, + "valid_targets_mean": 3441.2, + "valid_targets_min": 700 + }, + { + "epoch": 5.867507886435331, + "grad_norm": 0.39866766411968957, + "learning_rate": 1.6738249842933697e-05, + "loss": 0.074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07909869402647018, + "step": 1860, + "valid_targets_mean": 3704.6, + "valid_targets_min": 1273 + }, + { + "epoch": 5.883280757097792, + "grad_norm": 0.4214360897352762, + "learning_rate": 1.6506472283371527e-05, + "loss": 0.0787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0726221352815628, + "step": 1865, + "valid_targets_mean": 3907.3, + "valid_targets_min": 814 + }, + { + "epoch": 5.899053627760252, + "grad_norm": 0.3998965529724625, + "learning_rate": 1.6275992992974308e-05, + "loss": 0.0752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08516493439674377, + "step": 1870, + "valid_targets_mean": 3982.6, + "valid_targets_min": 701 + }, + { + "epoch": 5.914826498422713, + "grad_norm": 0.43001176273586333, + "learning_rate": 1.604682090553243e-05, + "loss": 0.0749, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07794927060604095, + "step": 1875, + "valid_targets_mean": 3605.0, + "valid_targets_min": 834 + }, + { + "epoch": 5.930599369085174, + "grad_norm": 0.4026345162726668, + "learning_rate": 1.5818964904166756e-05, + "loss": 0.0753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07847492396831512, + "step": 1880, + "valid_targets_mean": 3895.2, + "valid_targets_min": 1448 + }, + { + "epoch": 5.946372239747634, + "grad_norm": 0.39528285452476664, + "learning_rate": 1.55924338209843e-05, + "loss": 0.0734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08313728123903275, + "step": 1885, + "valid_targets_mean": 4903.1, + "valid_targets_min": 692 + }, + { + "epoch": 5.962145110410095, + "grad_norm": 0.4025913432365921, + "learning_rate": 1.536723643673582e-05, + "loss": 0.0759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08681906759738922, + "step": 1890, + "valid_targets_mean": 3818.5, + "valid_targets_min": 696 + }, + { + "epoch": 5.977917981072555, + "grad_norm": 0.3491540278425625, + "learning_rate": 1.5143381480475583e-05, + "loss": 0.0752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06825911998748779, + "step": 1895, + "valid_targets_mean": 4843.3, + "valid_targets_min": 923 + }, + { + "epoch": 5.993690851735016, + "grad_norm": 0.42181285277694436, + "learning_rate": 1.49208776292229e-05, + "loss": 0.0704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07266388833522797, + "step": 1900, + "valid_targets_mean": 3159.4, + "valid_targets_min": 736 + }, + { + "epoch": 6.009463722397476, + "grad_norm": 0.27912884832724244, + "learning_rate": 1.4699733507625862e-05, + "loss": 0.0483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.034440137445926666, + "step": 1905, + "valid_targets_mean": 3888.1, + "valid_targets_min": 740 + }, + { + "epoch": 6.025236593059937, + "grad_norm": 0.46601419274906697, + "learning_rate": 1.4479957687626933e-05, + "loss": 0.0406, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03996382653713226, + "step": 1910, + "valid_targets_mean": 3467.9, + "valid_targets_min": 1037 + }, + { + "epoch": 6.041009463722397, + "grad_norm": 0.3265295712580043, + "learning_rate": 1.4261558688130838e-05, + "loss": 0.0401, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04567781463265419, + "step": 1915, + "valid_targets_mean": 4330.2, + "valid_targets_min": 585 + }, + { + "epoch": 6.056782334384858, + "grad_norm": 0.33346381719290613, + "learning_rate": 1.4044544974674246e-05, + "loss": 0.0369, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03933947533369064, + "step": 1920, + "valid_targets_mean": 3446.9, + "valid_targets_min": 595 + }, + { + "epoch": 6.072555205047319, + "grad_norm": 0.32630156779915975, + "learning_rate": 1.3828924959097612e-05, + "loss": 0.0374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.037788327783346176, + "step": 1925, + "valid_targets_mean": 4264.8, + "valid_targets_min": 1297 + }, + { + "epoch": 6.0883280757097795, + "grad_norm": 0.30008599289436333, + "learning_rate": 1.3614706999219213e-05, + "loss": 0.0351, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.031689830124378204, + "step": 1930, + "valid_targets_mean": 3629.5, + "valid_targets_min": 858 + }, + { + "epoch": 6.10410094637224, + "grad_norm": 0.31562399531284435, + "learning_rate": 1.340189939851112e-05, + "loss": 0.0385, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03501756489276886, + "step": 1935, + "valid_targets_mean": 3979.0, + "valid_targets_min": 1076 + }, + { + "epoch": 6.1198738170347005, + "grad_norm": 0.32592581177164376, + "learning_rate": 1.3190510405777345e-05, + "loss": 0.0369, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03570239990949631, + "step": 1940, + "valid_targets_mean": 3464.8, + "valid_targets_min": 833 + }, + { + "epoch": 6.135646687697161, + "grad_norm": 0.3173327995566091, + "learning_rate": 1.2980548214834142e-05, + "loss": 0.037, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03535917401313782, + "step": 1945, + "valid_targets_mean": 3663.6, + "valid_targets_min": 1075 + }, + { + "epoch": 6.151419558359621, + "grad_norm": 0.36781663136063414, + "learning_rate": 1.2772020964192316e-05, + "loss": 0.0363, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0407160222530365, + "step": 1950, + "valid_targets_mean": 3060.1, + "valid_targets_min": 978 + }, + { + "epoch": 6.167192429022082, + "grad_norm": 0.351296650872976, + "learning_rate": 1.2564936736741867e-05, + "loss": 0.0392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.041136451065540314, + "step": 1955, + "valid_targets_mean": 3816.0, + "valid_targets_min": 1021 + }, + { + "epoch": 6.182965299684542, + "grad_norm": 0.3352016396532454, + "learning_rate": 1.23593035594386e-05, + "loss": 0.0384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.037065912038087845, + "step": 1960, + "valid_targets_mean": 3583.8, + "valid_targets_min": 1330 + }, + { + "epoch": 6.198738170347003, + "grad_norm": 0.3273693517781997, + "learning_rate": 1.215512940299305e-05, + "loss": 0.0382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03412678465247154, + "step": 1965, + "valid_targets_mean": 4017.6, + "valid_targets_min": 1329 + }, + { + "epoch": 6.214511041009464, + "grad_norm": 0.35936661627771804, + "learning_rate": 1.1952422181561424e-05, + "loss": 0.0376, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.037911586463451385, + "step": 1970, + "valid_targets_mean": 3032.6, + "valid_targets_min": 1078 + }, + { + "epoch": 6.230283911671925, + "grad_norm": 0.32112520008528334, + "learning_rate": 1.1751189752438957e-05, + "loss": 0.0374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.038504790514707565, + "step": 1975, + "valid_targets_mean": 3933.5, + "valid_targets_min": 940 + }, + { + "epoch": 6.246056782334385, + "grad_norm": 0.2695618803895192, + "learning_rate": 1.1551439915755274e-05, + "loss": 0.0378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03634379431605339, + "step": 1980, + "valid_targets_mean": 4845.1, + "valid_targets_min": 725 + }, + { + "epoch": 6.261829652996846, + "grad_norm": 0.30893086560612887, + "learning_rate": 1.135318041417207e-05, + "loss": 0.0374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.032728832215070724, + "step": 1985, + "valid_targets_mean": 4119.0, + "valid_targets_min": 1213 + }, + { + "epoch": 6.277602523659306, + "grad_norm": 0.33432394249630243, + "learning_rate": 1.1156418932582941e-05, + "loss": 0.0365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03936354070901871, + "step": 1990, + "valid_targets_mean": 3959.9, + "valid_targets_min": 1419 + }, + { + "epoch": 6.2933753943217665, + "grad_norm": 0.3351835539841756, + "learning_rate": 1.096116309781558e-05, + "loss": 0.0411, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.035421326756477356, + "step": 1995, + "valid_targets_mean": 3449.4, + "valid_targets_min": 748 + }, + { + "epoch": 6.309148264984227, + "grad_norm": 0.3374464303084839, + "learning_rate": 1.0767420478336093e-05, + "loss": 0.0378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.032976385205984116, + "step": 2000, + "valid_targets_mean": 3881.1, + "valid_targets_min": 593 + }, + { + "epoch": 6.3249211356466875, + "grad_norm": 0.31593847750352616, + "learning_rate": 1.0575198583955698e-05, + "loss": 0.0384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03508736565709114, + "step": 2005, + "valid_targets_mean": 4178.7, + "valid_targets_min": 1017 + }, + { + "epoch": 6.340694006309148, + "grad_norm": 0.3250153885770152, + "learning_rate": 1.0384504865539497e-05, + "loss": 0.0352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.035382404923439026, + "step": 2010, + "valid_targets_mean": 3737.5, + "valid_targets_min": 1222 + }, + { + "epoch": 6.356466876971609, + "grad_norm": 0.3080434595666743, + "learning_rate": 1.0195346714717813e-05, + "loss": 0.0387, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03647909313440323, + "step": 2015, + "valid_targets_mean": 4430.7, + "valid_targets_min": 1452 + }, + { + "epoch": 6.37223974763407, + "grad_norm": 0.34842809907825134, + "learning_rate": 1.0007731463599601e-05, + "loss": 0.0396, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03987966850399971, + "step": 2020, + "valid_targets_mean": 3268.3, + "valid_targets_min": 972 + }, + { + "epoch": 6.38801261829653, + "grad_norm": 0.3279930555964061, + "learning_rate": 9.82166638448827e-06, + "loss": 0.0367, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03941555693745613, + "step": 2025, + "valid_targets_mean": 3709.3, + "valid_targets_min": 1524 + }, + { + "epoch": 6.403785488958991, + "grad_norm": 0.3875069540236957, + "learning_rate": 9.637158689599746e-06, + "loss": 0.0365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.038362614810466766, + "step": 2030, + "valid_targets_mean": 3151.0, + "valid_targets_min": 980 + }, + { + "epoch": 6.419558359621451, + "grad_norm": 0.37011350486728795, + "learning_rate": 9.454215530782994e-06, + "loss": 0.0422, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04417167603969574, + "step": 2035, + "valid_targets_mean": 3226.8, + "valid_targets_min": 922 + }, + { + "epoch": 6.435331230283912, + "grad_norm": 0.3961829847854745, + "learning_rate": 9.272843999242736e-06, + "loss": 0.0409, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.051363505423069, + "step": 2040, + "valid_targets_mean": 3708.4, + "valid_targets_min": 474 + }, + { + "epoch": 6.451104100946372, + "grad_norm": 0.340814887208514, + "learning_rate": 9.093051125264623e-06, + "loss": 0.0382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03522820770740509, + "step": 2045, + "valid_targets_mean": 3267.7, + "valid_targets_min": 365 + }, + { + "epoch": 6.466876971608833, + "grad_norm": 0.33571940272092965, + "learning_rate": 8.91484387794267e-06, + "loss": 0.0399, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03785194084048271, + "step": 2050, + "valid_targets_mean": 3861.7, + "valid_targets_min": 895 + }, + { + "epoch": 6.482649842271293, + "grad_norm": 0.34477996610324446, + "learning_rate": 8.73822916490919e-06, + "loss": 0.0357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03475986421108246, + "step": 2055, + "valid_targets_mean": 3546.6, + "valid_targets_min": 758 + }, + { + "epoch": 6.498422712933754, + "grad_norm": 0.2926217848087934, + "learning_rate": 8.563213832067014e-06, + "loss": 0.0371, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03550596535205841, + "step": 2060, + "valid_targets_mean": 4516.4, + "valid_targets_min": 758 + }, + { + "epoch": 6.514195583596215, + "grad_norm": 0.30269459525807885, + "learning_rate": 8.389804663324142e-06, + "loss": 0.0394, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03180967643857002, + "step": 2065, + "valid_targets_mean": 4539.7, + "valid_targets_min": 1259 + }, + { + "epoch": 6.529968454258675, + "grad_norm": 0.34600335174179586, + "learning_rate": 8.218008380330723e-06, + "loss": 0.037, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.036307066679000854, + "step": 2070, + "valid_targets_mean": 3892.9, + "valid_targets_min": 800 + }, + { + "epoch": 6.545741324921136, + "grad_norm": 0.31504837155444604, + "learning_rate": 8.047831642218611e-06, + "loss": 0.0393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04014365375041962, + "step": 2075, + "valid_targets_mean": 4452.8, + "valid_targets_min": 342 + }, + { + "epoch": 6.561514195583596, + "grad_norm": 0.3270000349535334, + "learning_rate": 7.879281045343184e-06, + "loss": 0.037, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03856312483549118, + "step": 2080, + "valid_targets_mean": 3746.9, + "valid_targets_min": 674 + }, + { + "epoch": 6.577287066246057, + "grad_norm": 0.358812853398882, + "learning_rate": 7.712363123027678e-06, + "loss": 0.0374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03735462576150894, + "step": 2085, + "valid_targets_mean": 3186.0, + "valid_targets_min": 366 + }, + { + "epoch": 6.593059936908517, + "grad_norm": 0.3377631827118206, + "learning_rate": 7.547084345309924e-06, + "loss": 0.036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.040012724697589874, + "step": 2090, + "valid_targets_mean": 3238.1, + "valid_targets_min": 705 + }, + { + "epoch": 6.608832807570978, + "grad_norm": 0.3230049956208926, + "learning_rate": 7.383451118691576e-06, + "loss": 0.0382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04298000782728195, + "step": 2095, + "valid_targets_mean": 3879.8, + "valid_targets_min": 1168 + }, + { + "epoch": 6.624605678233438, + "grad_norm": 0.2850486373091693, + "learning_rate": 7.221469785889784e-06, + "loss": 0.036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.031017430126667023, + "step": 2100, + "valid_targets_mean": 4046.4, + "valid_targets_min": 836 + }, + { + "epoch": 6.6403785488958995, + "grad_norm": 0.3707435173993041, + "learning_rate": 7.061146625591331e-06, + "loss": 0.0369, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04487421363592148, + "step": 2105, + "valid_targets_mean": 3517.4, + "valid_targets_min": 769 + }, + { + "epoch": 6.65615141955836, + "grad_norm": 0.3249192588004001, + "learning_rate": 6.902487852209238e-06, + "loss": 0.0376, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.041910625994205475, + "step": 2110, + "valid_targets_mean": 4240.6, + "valid_targets_min": 1370 + }, + { + "epoch": 6.6719242902208205, + "grad_norm": 0.3285855336330419, + "learning_rate": 6.7454996156419485e-06, + "loss": 0.0399, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03908082842826843, + "step": 2115, + "valid_targets_mean": 4214.1, + "valid_targets_min": 914 + }, + { + "epoch": 6.687697160883281, + "grad_norm": 0.33224105758273714, + "learning_rate": 6.590188001034864e-06, + "loss": 0.0377, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03900352865457535, + "step": 2120, + "valid_targets_mean": 4574.8, + "valid_targets_min": 1069 + }, + { + "epoch": 6.703470031545741, + "grad_norm": 0.3293994235143555, + "learning_rate": 6.436559028544559e-06, + "loss": 0.0353, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.031630996614694595, + "step": 2125, + "valid_targets_mean": 3962.0, + "valid_targets_min": 863 + }, + { + "epoch": 6.719242902208202, + "grad_norm": 0.32004758793637844, + "learning_rate": 6.284618653105328e-06, + "loss": 0.0375, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03920764848589897, + "step": 2130, + "valid_targets_mean": 3798.6, + "valid_targets_min": 987 + }, + { + "epoch": 6.735015772870662, + "grad_norm": 0.32244346398828605, + "learning_rate": 6.134372764198465e-06, + "loss": 0.0353, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03570954129099846, + "step": 2135, + "valid_targets_mean": 3229.8, + "valid_targets_min": 983 + }, + { + "epoch": 6.750788643533123, + "grad_norm": 0.3662392512051875, + "learning_rate": 5.985827185623899e-06, + "loss": 0.038, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04449474811553955, + "step": 2140, + "valid_targets_mean": 3676.3, + "valid_targets_min": 1055 + }, + { + "epoch": 6.766561514195583, + "grad_norm": 0.3332809806523487, + "learning_rate": 5.8389876752745045e-06, + "loss": 0.0382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0391073077917099, + "step": 2145, + "valid_targets_mean": 4164.5, + "valid_targets_min": 957 + }, + { + "epoch": 6.782334384858045, + "grad_norm": 0.3183611175657, + "learning_rate": 5.693859924912892e-06, + "loss": 0.0355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03349914401769638, + "step": 2150, + "valid_targets_mean": 3628.4, + "valid_targets_min": 500 + }, + { + "epoch": 6.798107255520505, + "grad_norm": 0.32383372932554505, + "learning_rate": 5.550449559950755e-06, + "loss": 0.039, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.039054855704307556, + "step": 2155, + "valid_targets_mean": 3833.2, + "valid_targets_min": 681 + }, + { + "epoch": 6.813880126182966, + "grad_norm": 0.3561660647478923, + "learning_rate": 5.408762139230888e-06, + "loss": 0.0391, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04274681583046913, + "step": 2160, + "valid_targets_mean": 3480.2, + "valid_targets_min": 772 + }, + { + "epoch": 6.829652996845426, + "grad_norm": 0.33621553716130176, + "learning_rate": 5.268803154811669e-06, + "loss": 0.0365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0365615040063858, + "step": 2165, + "valid_targets_mean": 3660.6, + "valid_targets_min": 1155 + }, + { + "epoch": 6.8454258675078865, + "grad_norm": 0.3038998626702167, + "learning_rate": 5.1305780317541855e-06, + "loss": 0.0381, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03693147003650665, + "step": 2170, + "valid_targets_mean": 4215.9, + "valid_targets_min": 1404 + }, + { + "epoch": 6.861198738170347, + "grad_norm": 0.36897984120212113, + "learning_rate": 4.99409212791192e-06, + "loss": 0.0388, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03764478117227554, + "step": 2175, + "valid_targets_mean": 3104.0, + "valid_targets_min": 919 + }, + { + "epoch": 6.8769716088328074, + "grad_norm": 0.33822079594737947, + "learning_rate": 4.8593507337231666e-06, + "loss": 0.0365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.043035540729761124, + "step": 2180, + "valid_targets_mean": 3851.4, + "valid_targets_min": 626 + }, + { + "epoch": 6.892744479495268, + "grad_norm": 0.30145523508291866, + "learning_rate": 4.726359072005859e-06, + "loss": 0.0375, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.033608630299568176, + "step": 2185, + "valid_targets_mean": 3856.0, + "valid_targets_min": 328 + }, + { + "epoch": 6.908517350157728, + "grad_norm": 0.3429428100666593, + "learning_rate": 4.5951222977551444e-06, + "loss": 0.0347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03756331652402878, + "step": 2190, + "valid_targets_mean": 3448.3, + "valid_targets_min": 802 + }, + { + "epoch": 6.92429022082019, + "grad_norm": 0.3429842165951299, + "learning_rate": 4.465645497943621e-06, + "loss": 0.0382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04174838215112686, + "step": 2195, + "valid_targets_mean": 3628.9, + "valid_targets_min": 1229 + }, + { + "epoch": 6.94006309148265, + "grad_norm": 0.34675008978132216, + "learning_rate": 4.337933691324109e-06, + "loss": 0.0359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.037052642554044724, + "step": 2200, + "valid_targets_mean": 3198.1, + "valid_targets_min": 568 + }, + { + "epoch": 6.955835962145111, + "grad_norm": 0.3149539259319967, + "learning_rate": 4.21199182823514e-06, + "loss": 0.0378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.036268435418605804, + "step": 2205, + "valid_targets_mean": 4105.2, + "valid_targets_min": 759 + }, + { + "epoch": 6.971608832807571, + "grad_norm": 0.33929093507706914, + "learning_rate": 4.08782479040905e-06, + "loss": 0.039, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.036416955292224884, + "step": 2210, + "valid_targets_mean": 3415.2, + "valid_targets_min": 1058 + }, + { + "epoch": 6.987381703470032, + "grad_norm": 0.30657013255630994, + "learning_rate": 3.9654373907827665e-06, + "loss": 0.0362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.03362428396940231, + "step": 2215, + "valid_targets_mean": 4120.6, + "valid_targets_min": 1327 + }, + { + "epoch": 7.003154574132492, + "grad_norm": 0.23671025217731062, + "learning_rate": 3.844834373311257e-06, + "loss": 0.0306, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02152540534734726, + "step": 2220, + "valid_targets_mean": 3691.1, + "valid_targets_min": 1073 + }, + { + "epoch": 7.018927444794953, + "grad_norm": 0.2046904237861561, + "learning_rate": 3.7260204127836316e-06, + "loss": 0.0226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.019526127725839615, + "step": 2225, + "valid_targets_mean": 3975.6, + "valid_targets_min": 1224 + }, + { + "epoch": 7.034700315457413, + "grad_norm": 0.2556520049864306, + "learning_rate": 3.609000114641964e-06, + "loss": 0.0214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.021668246015906334, + "step": 2230, + "valid_targets_mean": 3995.0, + "valid_targets_min": 1352 + }, + { + "epoch": 7.0504731861198735, + "grad_norm": 0.3047451716970323, + "learning_rate": 3.4937780148027344e-06, + "loss": 0.0202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.024157961830496788, + "step": 2235, + "valid_targets_mean": 3649.8, + "valid_targets_min": 955 + }, + { + "epoch": 7.066246056782334, + "grad_norm": 0.23516493801694757, + "learning_rate": 3.3803585794810466e-06, + "loss": 0.0204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.01915784180164337, + "step": 2240, + "valid_targets_mean": 3932.5, + "valid_targets_min": 828 + }, + { + "epoch": 7.082018927444795, + "grad_norm": 0.28685397345416597, + "learning_rate": 3.2687462050175034e-06, + "loss": 0.0213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02069886401295662, + "step": 2245, + "valid_targets_mean": 3519.2, + "valid_targets_min": 725 + }, + { + "epoch": 7.097791798107256, + "grad_norm": 0.2377604870801302, + "learning_rate": 3.1589452177077815e-06, + "loss": 0.0197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.021812913939356804, + "step": 2250, + "valid_targets_mean": 4209.9, + "valid_targets_min": 354 + }, + { + "epoch": 7.113564668769716, + "grad_norm": 0.25716830137062047, + "learning_rate": 3.0509598736349343e-06, + "loss": 0.0213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02272321656346321, + "step": 2255, + "valid_targets_mean": 3548.7, + "valid_targets_min": 568 + }, + { + "epoch": 7.129337539432177, + "grad_norm": 0.25299239836610615, + "learning_rate": 2.9447943585044545e-06, + "loss": 0.0218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.022318579256534576, + "step": 2260, + "valid_targets_mean": 4138.4, + "valid_targets_min": 681 + }, + { + "epoch": 7.145110410094637, + "grad_norm": 0.27458386844024335, + "learning_rate": 2.840452787481979e-06, + "loss": 0.021, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.023187793791294098, + "step": 2265, + "valid_targets_mean": 3561.2, + "valid_targets_min": 700 + }, + { + "epoch": 7.160883280757098, + "grad_norm": 0.23317759638203217, + "learning_rate": 2.7379392050338236e-06, + "loss": 0.0211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.018476787954568863, + "step": 2270, + "valid_targets_mean": 4330.3, + "valid_targets_min": 637 + }, + { + "epoch": 7.176656151419558, + "grad_norm": 0.24330856389812755, + "learning_rate": 2.63725758477017e-06, + "loss": 0.0202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.01914399303495884, + "step": 2275, + "valid_targets_mean": 4080.5, + "valid_targets_min": 701 + }, + { + "epoch": 7.192429022082019, + "grad_norm": 0.25015226740136925, + "learning_rate": 2.5384118292910818e-06, + "loss": 0.0214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.019158024340867996, + "step": 2280, + "valid_targets_mean": 3478.2, + "valid_targets_min": 585 + }, + { + "epoch": 7.208201892744479, + "grad_norm": 0.2881936201022915, + "learning_rate": 2.4414057700351934e-06, + "loss": 0.0211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.021579192951321602, + "step": 2285, + "valid_targets_mean": 3220.7, + "valid_targets_min": 748 + }, + { + "epoch": 7.2239747634069404, + "grad_norm": 0.27887035969475066, + "learning_rate": 2.34624316713124e-06, + "loss": 0.0232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02623015269637108, + "step": 2290, + "valid_targets_mean": 3628.3, + "valid_targets_min": 1040 + }, + { + "epoch": 7.239747634069401, + "grad_norm": 0.2670202323043872, + "learning_rate": 2.2529277092522503e-06, + "loss": 0.0203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.022541699931025505, + "step": 2295, + "valid_targets_mean": 3351.5, + "valid_targets_min": 1039 + }, + { + "epoch": 7.255520504731861, + "grad_norm": 0.23391708937047623, + "learning_rate": 2.1614630134726367e-06, + "loss": 0.0209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.021500475704669952, + "step": 2300, + "valid_targets_mean": 4289.9, + "valid_targets_min": 562 + }, + { + "epoch": 7.271293375394322, + "grad_norm": 0.2482302782121816, + "learning_rate": 2.0718526251279346e-06, + "loss": 0.0193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.020055655390024185, + "step": 2305, + "valid_targets_mean": 3469.9, + "valid_targets_min": 562 + }, + { + "epoch": 7.287066246056782, + "grad_norm": 0.2453919519909187, + "learning_rate": 1.9841000176774148e-06, + "loss": 0.0191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02011318877339363, + "step": 2310, + "valid_targets_mean": 3563.4, + "valid_targets_min": 1151 + }, + { + "epoch": 7.302839116719243, + "grad_norm": 0.19042364673264342, + "learning_rate": 1.898208592569406e-06, + "loss": 0.0196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.015566266141831875, + "step": 2315, + "valid_targets_mean": 5366.5, + "valid_targets_min": 1283 + }, + { + "epoch": 7.318611987381703, + "grad_norm": 0.233944515106359, + "learning_rate": 1.8141816791095e-06, + "loss": 0.0206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.024694954976439476, + "step": 2320, + "valid_targets_mean": 4257.8, + "valid_targets_min": 742 + }, + { + "epoch": 7.334384858044164, + "grad_norm": 0.239714556753496, + "learning_rate": 1.7320225343314566e-06, + "loss": 0.0225, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.021245725452899933, + "step": 2325, + "valid_targets_mean": 3696.5, + "valid_targets_min": 792 + }, + { + "epoch": 7.350157728706624, + "grad_norm": 0.2792514650601985, + "learning_rate": 1.6517343428709975e-06, + "loss": 0.0216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.026734858751296997, + "step": 2330, + "valid_targets_mean": 3603.2, + "valid_targets_min": 836 + }, + { + "epoch": 7.365930599369086, + "grad_norm": 0.23933074074692448, + "learning_rate": 1.5733202168423055e-06, + "loss": 0.02, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02033247984945774, + "step": 2335, + "valid_targets_mean": 4144.8, + "valid_targets_min": 1059 + }, + { + "epoch": 7.381703470031546, + "grad_norm": 0.21154284910043136, + "learning_rate": 1.4967831957174606e-06, + "loss": 0.022, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.016088739037513733, + "step": 2340, + "valid_targets_mean": 3906.3, + "valid_targets_min": 821 + }, + { + "epoch": 7.3974763406940065, + "grad_norm": 0.21081085989439202, + "learning_rate": 1.4221262462085715e-06, + "loss": 0.0201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.016913168132305145, + "step": 2345, + "valid_targets_mean": 4614.7, + "valid_targets_min": 1200 + }, + { + "epoch": 7.413249211356467, + "grad_norm": 0.24129214952686506, + "learning_rate": 1.3493522621528088e-06, + "loss": 0.0213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0208077784627676, + "step": 2350, + "valid_targets_mean": 3780.1, + "valid_targets_min": 653 + }, + { + "epoch": 7.429022082018927, + "grad_norm": 0.3041449938491774, + "learning_rate": 1.2784640644002366e-06, + "loss": 0.0232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02505934238433838, + "step": 2355, + "valid_targets_mean": 3343.6, + "valid_targets_min": 678 + }, + { + "epoch": 7.444794952681388, + "grad_norm": 0.2559831913479079, + "learning_rate": 1.209464400704452e-06, + "loss": 0.0215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.019966714084148407, + "step": 2360, + "valid_targets_mean": 3369.2, + "valid_targets_min": 954 + }, + { + "epoch": 7.460567823343848, + "grad_norm": 0.2680699751484153, + "learning_rate": 1.1423559456160803e-06, + "loss": 0.0202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02447362430393696, + "step": 2365, + "valid_targets_mean": 4021.5, + "valid_targets_min": 915 + }, + { + "epoch": 7.476340694006309, + "grad_norm": 0.26482797801514274, + "learning_rate": 1.0771413003791253e-06, + "loss": 0.0203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02077951282262802, + "step": 2370, + "valid_targets_mean": 3045.0, + "valid_targets_min": 342 + }, + { + "epoch": 7.492113564668769, + "grad_norm": 0.2606654472952182, + "learning_rate": 1.0138229928301212e-06, + "loss": 0.019, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.01940850168466568, + "step": 2375, + "valid_targets_mean": 3189.5, + "valid_targets_min": 391 + }, + { + "epoch": 7.50788643533123, + "grad_norm": 0.22673295613716715, + "learning_rate": 9.524034773001511e-07, + "loss": 0.0209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.017434414476156235, + "step": 2380, + "valid_targets_mean": 3524.6, + "valid_targets_min": 1194 + }, + { + "epoch": 7.523659305993691, + "grad_norm": 0.25779834696635223, + "learning_rate": 8.928851345197165e-07, + "loss": 0.0212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.021370217204093933, + "step": 2385, + "valid_targets_mean": 3987.6, + "valid_targets_min": 1150 + }, + { + "epoch": 7.539432176656152, + "grad_norm": 0.2792364358047635, + "learning_rate": 8.352702715264726e-07, + "loss": 0.0211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.022021610289812088, + "step": 2390, + "valid_targets_mean": 3877.8, + "valid_targets_min": 1115 + }, + { + "epoch": 7.555205047318612, + "grad_norm": 0.2527690780897385, + "learning_rate": 7.795611215757615e-07, + "loss": 0.0207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02001969888806343, + "step": 2395, + "valid_targets_mean": 3368.6, + "valid_targets_min": 1078 + }, + { + "epoch": 7.570977917981073, + "grad_norm": 0.24928074461557115, + "learning_rate": 7.257598440540802e-07, + "loss": 0.019, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.019445180892944336, + "step": 2400, + "valid_targets_mean": 3541.2, + "valid_targets_min": 677 + }, + { + "epoch": 7.586750788643533, + "grad_norm": 0.25689825680468487, + "learning_rate": 6.738685243953769e-07, + "loss": 0.022, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02116306498646736, + "step": 2405, + "valid_targets_mean": 3546.9, + "valid_targets_min": 932 + }, + { + "epoch": 7.6025236593059935, + "grad_norm": 0.21350336910302817, + "learning_rate": 6.238891740002195e-07, + "loss": 0.0181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.017484672367572784, + "step": 2410, + "valid_targets_mean": 4645.9, + "valid_targets_min": 1158 + }, + { + "epoch": 7.618296529968454, + "grad_norm": 0.28795619467726996, + "learning_rate": 5.758237301577874e-07, + "loss": 0.0224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02376633509993553, + "step": 2415, + "valid_targets_mean": 3167.3, + "valid_targets_min": 808 + }, + { + "epoch": 7.634069400630915, + "grad_norm": 0.23445024223361485, + "learning_rate": 5.296740559708413e-07, + "loss": 0.0198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.018763840198516846, + "step": 2420, + "valid_targets_mean": 3582.3, + "valid_targets_min": 987 + }, + { + "epoch": 7.649842271293375, + "grad_norm": 0.22692248440924154, + "learning_rate": 4.854419402834709e-07, + "loss": 0.0188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.019361719489097595, + "step": 2425, + "valid_targets_mean": 4443.3, + "valid_targets_min": 1076 + }, + { + "epoch": 7.665615141955836, + "grad_norm": 0.30609123299560087, + "learning_rate": 4.431290976117497e-07, + "loss": 0.0211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02600417658686638, + "step": 2430, + "valid_targets_mean": 3147.4, + "valid_targets_min": 987 + }, + { + "epoch": 7.681388012618297, + "grad_norm": 0.22313215378869547, + "learning_rate": 4.0273716807731067e-07, + "loss": 0.0212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.019607003778219223, + "step": 2435, + "valid_targets_mean": 4538.7, + "valid_targets_min": 1278 + }, + { + "epoch": 7.697160883280757, + "grad_norm": 0.26992723719948397, + "learning_rate": 3.642677173437137e-07, + "loss": 0.0221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.021701868623495102, + "step": 2440, + "valid_targets_mean": 3641.7, + "valid_targets_min": 989 + }, + { + "epoch": 7.712933753943218, + "grad_norm": 0.2771351860707734, + "learning_rate": 3.2772223655583857e-07, + "loss": 0.0201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02161252871155739, + "step": 2445, + "valid_targets_mean": 2992.8, + "valid_targets_min": 871 + }, + { + "epoch": 7.728706624605678, + "grad_norm": 0.20744452566292276, + "learning_rate": 2.9310214228202013e-07, + "loss": 0.0198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.01766483671963215, + "step": 2450, + "valid_targets_mean": 5011.2, + "valid_targets_min": 1008 + }, + { + "epoch": 7.744479495268139, + "grad_norm": 0.24979822877491584, + "learning_rate": 2.604087764591534e-07, + "loss": 0.0208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.019588006660342216, + "step": 2455, + "valid_targets_mean": 3160.4, + "valid_targets_min": 692 + }, + { + "epoch": 7.760252365930599, + "grad_norm": 0.2546093387479328, + "learning_rate": 2.2964340634069603e-07, + "loss": 0.0203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02087349444627762, + "step": 2460, + "valid_targets_mean": 3454.5, + "valid_targets_min": 780 + }, + { + "epoch": 7.7760252365930596, + "grad_norm": 0.2324203105453564, + "learning_rate": 2.0080722444754118e-07, + "loss": 0.0195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.018959470093250275, + "step": 2465, + "valid_targets_mean": 3975.2, + "valid_targets_min": 1383 + }, + { + "epoch": 7.79179810725552, + "grad_norm": 0.2559575732777742, + "learning_rate": 1.7390134852177664e-07, + "loss": 0.0209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.020496761426329613, + "step": 2470, + "valid_targets_mean": 3453.6, + "valid_targets_min": 1419 + }, + { + "epoch": 7.807570977917981, + "grad_norm": 0.2984929620825143, + "learning_rate": 1.48926821483375e-07, + "loss": 0.0222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.029172949492931366, + "step": 2475, + "valid_targets_mean": 3610.6, + "valid_targets_min": 321 + }, + { + "epoch": 7.823343848580442, + "grad_norm": 0.2509837594498463, + "learning_rate": 1.2588461138977604e-07, + "loss": 0.0194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02101830765604973, + "step": 2480, + "valid_targets_mean": 3958.2, + "valid_targets_min": 595 + }, + { + "epoch": 7.839116719242902, + "grad_norm": 0.2552271913213256, + "learning_rate": 1.0477561139832781e-07, + "loss": 0.0216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.022542642429471016, + "step": 2485, + "valid_targets_mean": 3606.5, + "valid_targets_min": 591 + }, + { + "epoch": 7.854889589905363, + "grad_norm": 0.209822662898504, + "learning_rate": 8.560063973171439e-08, + "loss": 0.02, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.01608804240822792, + "step": 2490, + "valid_targets_mean": 4524.3, + "valid_targets_min": 1058 + }, + { + "epoch": 7.870662460567823, + "grad_norm": 0.2848340477745129, + "learning_rate": 6.836043964620342e-08, + "loss": 0.0211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.025099724531173706, + "step": 2495, + "valid_targets_mean": 3624.8, + "valid_targets_min": 1158 + }, + { + "epoch": 7.886435331230284, + "grad_norm": 0.22355477627255205, + "learning_rate": 5.3055679402846946e-08, + "loss": 0.0191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.01633409410715103, + "step": 2500, + "valid_targets_mean": 3583.2, + "valid_targets_min": 669 + }, + { + "epoch": 7.902208201892744, + "grad_norm": 0.21900480900696295, + "learning_rate": 3.968695224158547e-08, + "loss": 0.0216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.020641552284359932, + "step": 2505, + "valid_targets_mean": 4462.2, + "valid_targets_min": 616 + }, + { + "epoch": 7.917981072555205, + "grad_norm": 0.23441049279938414, + "learning_rate": 2.8254776358238588e-08, + "loss": 0.0198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.019191911444067955, + "step": 2510, + "valid_targets_mean": 3637.8, + "valid_targets_min": 669 + }, + { + "epoch": 7.933753943217665, + "grad_norm": 0.27852699726603725, + "learning_rate": 1.8759594884443233e-08, + "loss": 0.0225, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02600184828042984, + "step": 2515, + "valid_targets_mean": 4147.2, + "valid_targets_min": 1178 + }, + { + "epoch": 7.9495268138801265, + "grad_norm": 0.26641292280344536, + "learning_rate": 1.1201775870445242e-08, + "loss": 0.0215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.020183980464935303, + "step": 2520, + "valid_targets_mean": 3687.0, + "valid_targets_min": 767 + }, + { + "epoch": 7.965299684542587, + "grad_norm": 0.25780650684309997, + "learning_rate": 5.581612270855186e-09, + "loss": 0.0197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.021703746169805527, + "step": 2525, + "valid_targets_mean": 3691.6, + "valid_targets_min": 910 + }, + { + "epoch": 7.981072555205047, + "grad_norm": 0.27488138468650514, + "learning_rate": 1.8993219332907877e-09, + "loss": 0.022, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.026999393478035927, + "step": 2530, + "valid_targets_mean": 3892.4, + "valid_targets_min": 654 + }, + { + "epoch": 7.996845425867508, + "grad_norm": 0.2268045522461904, + "learning_rate": 1.5504758992257451e-10, + "loss": 0.0198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.02012499049305916, + "step": 2535, + "valid_targets_mean": 4176.0, + "valid_targets_min": 1069 + }, + { + "epoch": 8.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.017221365123987198, + "step": 2536, + "total_flos": 1379676432629760.0, + "train_loss": 0.20488507787429666, + "train_runtime": 45788.1262, + "train_samples_per_second": 1.769, + "train_steps_per_second": 0.055, + "valid_targets_mean": 3641.4, + "valid_targets_min": 1073 + } + ], + "logging_steps": 5, + "max_steps": 2536, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1379676432629760.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..ebd4da4 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66e6ab08343afbf77e05fe0528ed063ee0c8ca8501080fcb2c048c8db69ef713 +size 8721 diff --git a/training_loss.png b/training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..2a6b374f54df0f8e79f3f107d05c0e75aaeaa905 GIT binary patch literal 39827 zcmd?R_dnNd_&@wIL)kOQPD&YN@0F+!3CUIz*?TL5tWqRdk+R<;*?U!rRJ@fv%HDf_ zkF)FZ`QDGm{lona+}Go}9#`@9dY$7qkK;LxQ^ZxRi>$IlA4uYsu+x+r{;+qr-VY5n(}zGn{wa z+?-scg@o+??T?|V)&`?#<^Gco_@btU( zXOwht;?`T9sllwXq3@pGcBH*FRqo0`#ZJeoKs( zo11&uoCS-3ze}@_D#Bkgs2&nLMjs5e`2Web6VFox#qo|5R?XXfe)m4neWph#j@NH* z4Zk)o1IFXyE{NWFghRs}w=+I8`)$use!c~YU_W<$+gQp*EJ*7u{m96O z*HqT?j_wn7o4YHW#pVqUn5y)gOT7NH?^szNI z@i0aK{o$#%X%1cIM8hW10@+oicVr&6Q4tWBy3`u?=|OG<@< z-SdCHzQljOaJQ6>j_#_C4ne8Wz@$}sd%K9|yfG%3$J+a+{W*`A7^9Ak4rZJtz-;V9 z;Nt4)JQgisIb46A|Mclbn>7E2pPrr^8wulRJuFo1)QY#$3fx;`j*E*EcAwTGK6HrZ z>{-f4W}((th5c}oDi1C`zBe6XmoHxyxz{WFEIxiDMrK~o&rjB4?ssVDVP?y@-=)2^ z{#AGz2V>)8zrdRX`m=py)Q)v~;`#+fp)mZW&Q9D7j)jUM$07MIDKgAfZ836VJ-Kg+ zEL$kf7`@LP2(6#%E6WT#*f0N^WJklo5)MCPwKUpdSnDU{G2c%f7#N7v!-9ov3#%)9Rs6*ux zA6ED+poP&Oy{L$029Ild%%w8EH(#|NZw%|@wba7=Bq`~U0IvF7)+6IQi6Os|)(>}o zys?0F-W-3bz4(zQ_NVtQ3^{hJ4gTc_T&;9eQC433a=z0XUueQ2Y#vlSa7T3XV1MDD zy5q^SXCYPdm4{!7+c$bC95GZ4x*uJ}F?njmC0zec(T;VWX%NE{Lxs)WPyoI0hkf0bUD z6C&}ssn6Z$mBg>sEYWKH9)0#$?)HNJ)}QB_8+8ZK<>iu-Zqmk14J48-VP&wo|I`0!5a_YWr zz$Gio=2OEyQ0d-e6)UH|u(AjXZ}s=fi^=|q^Kfo6GBTWgXi9Y7dE~6{27X5Q?GG*~ zsiO40P`IWGI3^_SD-)7dx$jKYT@W$6NKUvTA7EcaTNvzNHg~6I|1OxwjOhQwh+9%?b_{*)8Bx&;D!ep6)**pQZ|0D@9UV9a#3L3S)Xfi$D4!= z51U7^NxjqQjIi;BoJ2~^9z}Uf{O7jhb0M>sR5@Rt8X;C6q4EVBOb#ut*H|>akPxoe zOr?DfLdSfxIf9Ie>S0}-!uI-HB;M4wpywS?eMgE6EjxQmu~l2f`}Yh%GjbB%%eV4P zYgpj`g=xK2=}eVd`YJK>Q?iz8;Q}o4z#c6`LNr7v7bho%GWYQPL&Hvz<716YO$3%Xn%OB<;292@`iblKN;>U&mii48To_c$$X z@ep5O>cN8ti$AYv6FB*Qco-UL(UB~*8~`OkNlEGI>3GvLa-rHyZoSj-yT*it zJ0p(-3KiAW$zXFxso267>JA2HDmnaDX?Zmo-)O!RhLjx=7S=R0#PQt3<3_agIkWx^ zGy62{)JKt#1Q;Yp)4t5iOoDVJvj_7@dhxl=Yh3WE!%Sxsai%q>TpT?b(dSS_85$b8 zEUzuM#=t;9(vyE<%zYNJetOrPB0Ibwc0FzBa>AL0y8SgF)e9FI;APj|YT4_ptX|~s z=RJguf>~{6{{U6O*_#hFp7KE|B!wOR^y!n=`Y)S5J3r?ZN1Do^meDdYlB8&b!67hq z{VFE>;tJ*Iwl6Qv7vJgP#)5EBI{Nw#cNXe6mX?;FR!#O495W5rx=qGa-7&^)xA@B@ z?bE<9-D}s11NOE_6;77YDu2WU<2r*|7u`GGym`|z`Ie29^^s1dn#IBX9;LjeKQ3qI zyhC^Hbrwk{PKc&K?}_7BeHceTxbs*m7dJN{j8V>az4@kN&2{(XgZ+)V#=TB^I1GeA z<>PUen;(!IZf+NSuM!mkYguHyiZD(E)3T5dViBv> zBU=;s`DbX49z8`!MAQNqfbVZ-n$Y&z%t;1{(9ORu9mn@rugPq|>g+Q=E?ocM< zO4fSIBIolS;KnG_G#bb1rH3pM4#|^!WjKsees@u2s~74Yd2j=#vWiMeW~}^aw9-r8 zRH!55{kMOXYq<;6?Y?6Xvwi0E_k~R}MC49c|Ld5Ff;>O1Fz`;uQ zJQPY$?p{H*dtP>umpA;`GX`1)240j|L<0Byy54GYyt>NYJl&o5XODH;)UB>3b>YR) z^VVc3S|@$hrca;>xYTpgF#;mm7^88l-Fic#9RB^gF8U%lHu|nz@aaMg+DEPs7J0vN zVbx7k~)Qj14Pu9f4+0R7K%lN<4~>T{_fW3lZ))M(kWW6U%&Pm_~9<) zvznbK=lcQU++V{5e%iR9ub%+R4kL@mE3CYsYgvP`>^GGN&1(xB{>Li_F*Gs=23jt47g2V`6!MSXLY+pAN` zdEGsmsSadsf1E;#WNrP%ur8nm#Q{_#OGu|{>@pDVPvYXf58HKbB=JE63Yt`gL%m)C z02f|16mT=&q)HzWSh`YhGvxG^JY4PRL9a1N=dKIP2MKZ_D3fajWysjc7mR(#k`s22*jdchEyYj3X5LJ1gMo%*r-p(%{|aQvk| zjM$GKKSFhL7*JKI>8Jpf_ewXX`a|r|^J*APn%{Cn^;gT{@a$}UXMlR`eGt<6V^KKDY^&F%d^@Llj~q2)IrC$hI@6gMe z4i1GFXR6QiDcl~#;lnkx$j$&57vxD*PZ`B(oSj$Ams zfoxw;R(^oWk{1B=P#R@PHY$|I5MS7m1s-;P^2gg)yz8u+bPIrn3%~&W*w15<0lQu} z{A}&cJUyg?{idH!_4639@r|w-T%GBH>&W_`??MD%!Ywb%vNh^xqjib*+{AuV_CD@n zEc+;cY|7eH8s&SvMM|*x{_8LB9-ieZ{k3RYy6!Zi(+~5waPM9|K!tRJt^Ty7;^N}v z8E)0

1}!nx5fz{%hR^VuS4tweo=uRdeDa6B8%v3g;EbYPYBJNX>pcmszMTY2{c| z*7N6(gOh98hbteY;L?aQc;i&37P!Clwt8-JX$}>fd?!+2tGO*c`y4y0&&(t^W?Ii9Ad}jUYRw$IQP}Tw=|Beu!5*Ah>`RAA@)T+Jg|m? zqO3>QWy^Io6x}H+3CT0He+`WYW5KYd`(Iy55IL>wv5DC-s714$@T5hA;?bi=TjJJiMuR#_v*^~J_9p<65yGjj(nR5@Q~Q?4iI0PA2TOMErAUu_zbw>}dp*xq)CAvVTRI9eYesquQl>H@ z1*J3FkQ@eL(ou!oMWR&izejNgO3+RKu%^uDaR&~q}`9Xaxhd(;)ZaM=YCbbZo8XHta~0`AMdZz`T7DB6cp7TLc_vTa`ZAFLeO?FoB7>OPG-aT_xoRTad81f>^@ZQ zpOT(y_F>c<4`~Pi_pRMXnC1}~8y&TkooHxi*n}mw^OtqV%E~fzG|$qFSX^8r9m-Q| zGEI9ZdodN(tvbMA(sgt{uAnx}s6cI^BgKvlof>1et_#c0bpiQmyUTe5H+&m*;Q;o~ zfERoD71TXC0ESC{o}2aj4$HgzTu^L25|AubSnCZ^Ty%ht{=~e%T+X(Qh5dV@v!;Ga z7y1Ja_QIp0nnCLj0oHycP2OZajkLV>F@cx=T@q5#9+^X#GMg7vRWYnXWbOH?D0g>TJkcfQz%1?X{_c;aSKpFH}^J%M4W~du|g$Y(qIC##F`QV zAs|qEZrj?~@pZ4Q14<#{GOm7vO_D~adXdo7`%k=d+T}7hnoYOPed^4?O*H-prj`!} zu1|Qg%%KI&*}`=`?d~CtINn#VFq-0fZG$zwOPgb{V|cT`_yb8-*T8+=6DN>d0Mt$% zozW;(F$y@yEeCr;2V1*i#l4&yLO1HWnZp5KZZQcl&a*@WOda{Go2nYK4iH+c@A*9uORA{xZ30SMEaSi~-HAhl7dK zq>*?7(*_VO1Qn$4#;%m8)cb)hu!*nR4@HU+u!4MAV^*VtH(s*c_Ps>}XiAe^nM**q zkg8%^$_f~WOa5SYh19Gn=W`j-E&2F*wh3H6h&v9R2{f#NDCGj_&A`N+5xx?2h{KNt zw4IBJideGNNx|ApOUaZDHM909H$clMi^y~Mu1R$L>L)_XRQAH5;iUPdfsr~Og(|qA z-id6Vlk2VWCn$97A-Iky+8Gg8T6ga@tO7Mnp&fJp3PpAcv zr>?yj2Kd+nvtDeb*ES!l_TGe2@kb|LKtI1eoR+7$HpF**HUcCAcm%!t0p}sk1A=j? zeVCuh!y^qq!=1-J(%!m#+X6Bo#C!;33CN!J0kWpU>37L?A9{&sWoHs;`dCQ~NTKzkyJj3$q{*mJ{SU=@cp%kJ);7`0U!&3M5%` zP)YtR0?su0SBZ@knk$?MT4rWE_a8?JdcG_5fK-+sJ6ixo&=k)I+*}+9kBLcmYr1@^ z)CkD%E)g7T36Q?KhZS;Cr2v#dnM;x%i+d940G>k&C;O|v_0d86BerV7VTBFZ3t)>O+K-j>gx3XW9;sE8nghz7Oj+Re=dX* zdE%(7ObDPR7`QS{==U_=I7wQ-_NfpW z!wQ!Z>|VnJIIA~TUOebjIA8;YLe3tzyTm$iMo_Tz-R*WKkaWDecVi&oAayce>kl(j z(pM0ck9o;!eAXHtQXVU^Xd=Z4o$FleHU44Xm7ATtXsemC|6(jv>x_&{pUwI7s;Y+r z4vyc}*r+bnXnLiBg2-j#IJu@3nbyN{CKO~mLz7k2oJD?dT--4fBusoLKSD24%Z&+`{ER+kVUDQEB^f}lh^AOWIX3* z*AmsDSP0UcH*Eb`x7oV0UUhz|zfXh&c0l*X^z*I{PJjlOyVPp{yZ}I*k7icaW8YUI ztVZ)-L{Y;LFf1}xigwwqEcB%aQwk=v`26gYyg}`!k00yd2?7Vk#>P?F@>|4^!Jog3 zP1>%RjP#8gnU2k?u4V(CYIhTY8R+;Q-+REDKYsd@vowgDS>(_l@lfo|lnd|z*mfOQ zbF1l8P=p&Sk>N_hBIWXWm*hM!mt8PpkQUT`K<%_QA`X5ha>+h^`I5iX%lad*?`U$5z;}NI0cTQ2pEH%7*4kruLs|%RhNJOE3%sR?XVlI(om$ zJ!kHG2Yw>9+qOW2*$P--^l(3@di)HJ)cpT;>&HaUpKKg)G6x~Ww9gELVf8q@v0?|{ z4S9j9;8Ygh`*jXsFB`oTd0@*Z2cmQBP(JcdtyC*MK7MPKxx#Hq7ZO!Zb_6i#s^VLp z=&HB=zLX~lROwzf?rFi#*q?h*efq5)XppUdg~y%TLHQedc+8<5h8Y8+Z`ZsAPGCLY zq@nD!z#reg7l9qgs(Bg8ln4@LjXi$dR@c%})6gJi5;8p!+;BB>-4E`(kd zta#lc&rFFd!4sDg%mK@QmrFxPNGN>!^Rx5vX;prp93b~C3J^0DU^Bz_cW5hN*p5Rb z7umgu4|awQoIag+ZVu)!@}R)jZ$;{Xk1WNIS;O!?Og90-TVIXM&4zfQ?SB`K-?Vq6c$F>Cz<%pf=w? zG3%Z4Kn5jZ*568Ow6pb2BVPEiqa)#9*re94qLVXMKd%_}1igvpV$7-$`8dGWwwU8G zi@3nOyw>NPPzus@bDY4o|B@tjc~BlQ^w!Gn^B7@$#)U{>z{NmM55a5%q&?@OW@qmK z{Uv#;l@d4c-Yy&JEY$w7={%eoSX9Fc3$e&g1MCsfbtk_R_v$V3e-R)=#R^W=F_hL} z?HB6~_NYN4w}=i%;d@GsN*iWyRmN+PbI@nz?Bv?E&&iZ^QILa?DVqpL7(DZcNWn@n zz+b?ET2Xw0V@QcO>iWV^?8#Iw!t*vA&+6>tw( zMxhy{5#VtI>=GHb z5WUt|4JpdEiL?`i3E$gZd{R4r8kq!17vzifY+gWC4Zo>4HqzKedW}2fv=w6CWm6Imr#W#g!N5 zNXiC0jscY@hV~LwRaLz`T4r{3L0*kz)^r0T zx-u_9e}XX=_^u&P$%7A37m5h!sI+^+SHmH#_ISmF&~Pg#nDs+pZZsDr%k1KZyh-X^ z9=LW&2R4mIuxj>P#T6jhcHfHD08xKxI~uKqaKa<#*h#x)241P!5f))ZSez%x$!B0= zNacTP%3;B6{cmypN@1ya)5R0RYhlLpK-@lKBu{zg>xAV!F zUco3vk}em*Yk?_W0uo?IHsaH%nIx(KQ2P>m${-+iJ83k73shiv`rPL&D-}z~hd}LhVkWx~3+hPmL)tDd{6Xkl|tMKI*q_-6E?R z5JV=>DCiNSHgu!!CbC*WJ5C130m?agK5`S# zq(TQc96h$Vx%uBCVNE~Vr9o>?1c;s*yeCgWh@6DH+x*X711~@=zQLg^64Khu&Ce_+ zPMjD;nzx9Eh;o5`WT8pbqXY1a#NL3&76sHrV^Uf#O3i)@gv$gRE)y=ciuT?%)%Bjl+MNfts8wG!%GHgM3`*dim$QA|n)3 ziN`oI*SJ(ZHz>5>&~LJw-{}fI;3JSGSsm}>T9*6@X)u3$@~}fBaz?wL{RSG;!j{1_ ze)RF#(-xU-p{7s#)S)^_)e6{pgeZi-jNqG zwY1FW-*{+r^696LlWBhM)bKHtsJgT#oNXFu3KOyaEs6_B2Ysl+fi-bE`RlDPr1V0D zT?BH@qo6R9XP;vGC_MZzp3(j=l%q4a2wza@s=*2bYgSWUiLZMuH?Pokiuy2f;fcZJi(b_)Og2GQ`Z-ZDoH#8)AK4#QxcUo_R-aRS^n7I?73@$TKb z^ln?!Ooq5nsTRW()t~)Zsj@d_f`rT7ruyVw*zYxccPpZ^!6>z-(4=p+fFE)=wfz3( znDhp%jqtVl7k576^R|tjc7KI7AZQuU;kjf`mk^+N!nx4&6f}V9f4L`a1zEv;{s%Y- zX?atkWnaErnS~AN@CTKyCL!}46*T6oBsI1_cNH<<{q`P*2fajhV!_JNGRg0LC_L-v ze=Vhn*2jsJVH&Ch+Ie$Cy-boRT5xzcg@qqy@3{IjLqp$@ARFKt zM9}AP*5nBEqDm8;)J|0e+<%!l^MWvzO0uEc17CqCyEF0~tY#e*24-d+d3n=)_Jwd3 ztn)$DFTA<1Fh)m9TPkwAVn@I2-XRV(~0B7y2$!qlF^rt7o;P!YYBlYOF? zyqs}+bz`jDVQIn3ci>5aAO)k24zy>u{RbuEbQj1}W0DYMN>nXT-NF#0$R3k3v zk%atvOu`|*Yt^=_PsDfq9`rhJiHb5%E9?rxLAVdT%%>eaM?$SDSDFD&{4;Z)#7zFo zp&#_$Fu}c(19b&t$opw&90=_pSS9|lKq`&OmY1|%|BjiOh&adBHjXKhvnCACmE1P( z^x6Ay< zIUGdVSLNj_L-6(*d;k<_VUTKf;eeHo#mG1TlpRBrcY%nAXsPQWhXvx{;2;+Pl0o8q zbU@Tkzh2^giWbrt0_??wzf6MUv^s5;o{~#{(%u@4|2CkE3cmEK$mhkocjr)+*aRkk zC<%rE<%vRG18AM{P&9}!)bHX1etnNms>GldQLOwnKSB~%ePifhg4 zW5%WaxZ3U0aFQb(1}X_KP+y$AhnkyJWf$v-P(Kc`Da0n1bXKged!d%W#`OBuNhsR0 z-+*uiUY-TWD9hrdM$w>QSph+C!Pq37%nK%<%L<@-D@^&1J+$;d`<|2ZOyQH&e9dRP z`2M@o)4X8j4=N zG5Mv4+FXDk{aS1F$C+K9(MDUuso(OEHvePN)Z`HO2=+I30( zC}cM^N<*t7unL%~e#&hWSTuCdlA|U*aC^BVCCeKokO%%GK0Xx8F>q@Tc|^v=vrB)D zKDV3xnNtq9#0gv~q+S4tMn>==h}UULQaMu^%4A*NL+|8)>TV&*3Vv+Td)JvK$-29~ zTLJlUiNVJn@(usGTk~?ZAJBQrJ#o~n4ARTLTufpccKGm6A4Dh4>E@ zwRL@TJ=h23IAWI7!smQdi9Giq!n|Q++42lp(uU<>C!iv_f1SN+CAmCan|}|y2Hs@9zWpjLdKgWtd~NAZWwo6DGm#}###5} z_xB6v5`#wwK~cQ!>0Da^DVh+-ph=7fkWoFa%emmWa6^}WIBqmaE?k;?&c(C~*o zhbCw)3okF1gbsQONFOZX_gJ7g<{`M3sB;eas3@)oFjL<-v1;g=@&YwS7}_QMKuS%N zc0av7KY(zuC0vt$E)!uSRIg5VABHv~!3C(&w8xJ}m)hY`qgpv6`>wjczq6NYcLlU{pB6`xxlbk<}hn~NZ9lFhfSvzcQTndKWE#$vxBgJ=JO|@?Z1A_ z3--XXn{$&}a~ABWdT0S36&Q@3wq8Lw+B#uW|Ey1;$qlZ!A1M{jC7&9O_9u5hR z8KJ*|1<1K9=or#))d02L?e9TjcrdhmWMpT5oN(3KD_#3(JTzoap7+d%Yr`yvj#rh~ zmcZ#W3paP}cbu65(wd;@hq|zO`4SpF>O@}fg_gxI1q@s;)iink@wE6$C!o3X$Z==)#1|Jo>Z20J?CCm|d38sJ0mqEcXbM*e-$AI&MtAVWqXaxN(0F4y) zHz041fXnEz1#ETHy#s9HZeNq!<1d45a(NyOx<;nT;`UMW1p>Go1T{tx{+(Cu6x$(v zz8NMQ1$LU%+H^NGpi34T_791QN1%X#Z4zkbvd4z0r2RKZY7xEhEcGOd^djVHwjU$Dj!6fwpTrDNQbvqb8us)L~5qAnsBEcb~MB(M>Z;I=aw zyL>uf<0cPz?0+mnBs3#60L2^XkFM^7=c6l6$P_~Xi0;P#bYq8R2oelb&uSPLN-q%X z!GIE=h2a2hU7?}=`w3`IM@=3Grhp=Z1-()UL-ZeE$!`$WNIQnHY#x=3|SwdGlOPYS?l#-#09{LT20YRw1aFMKts>WImYwBT& zSaEZ_o^bNv$536Im7~_faCZlOU%=iQ6~O%|nbueiA;)X{3@i!(vfzk~t|S7=S2pGdIVCxE`(q06~S5i#{ zLp&GOABrnz`J}6cm1-!?q%iLVkQc0Bs&D=2x`)%AdH%uOZY7w8}Nruy3|}k+KCK6(r0=F zq>nZ0(M$9I?4J5$#eFB;mPXy9n1rb+GwTubN2$(X0FD356HTUOi8PxLrTkuVxy|H! zfd)Op6+NT>P|-Q;Qd~1CN2))0(Q`I(-L+yvjdHgMTsXbMuK-jo-%R{X@9d zfcIqe2P?|jVPoSt|J^a3`~rzfSGQD&Q}H_e;wdc>6?me}eFW9@UXqe#8&QTJjIHV> zj$VRpTxr(dXtC}+-oeDrh}QoQ8c*Xzj`~IHtmq{YHm&g& z6yzdG(wH$_3@m&H8%!?iF`kggw9iV44y2Xv4nzg6`=6jGE9WdEMe3AI*gvu`okK8g zi{|L|5WW7wZOl!whA7>ackvS;3Rl{$7ynj&8}o70CUX}fNaO#X4LFKsf1{oAu@=`p z7Ifqq$r+C}DIsPwVt+K^Y9^=B)AdCd<*w)@vW9=_=w|(I9XV+(ma-~p#C<#U`rT2< zw6FiR%Im*X|JL@%aXuhuYU}pIJE&)>KebSteEw|k#)m@@Qzvs@Ba-c4RYdzlgnpum zw8)2EI4GCTJcbxluZ;8cSns6@qVz?8=M+J0zlg7(xdy>p;||^5oFqy#{6*~dt|+6G z-vXYIvHdR!jS?OZ1>MP{Ivq-!HxQ9;xl$p>iss>r-jI>tjS(nj>3zK1=*;_%9xPWD0~+fNCT&X|L?lf@gn6WiH`s27=%F3UW`dv7Gsz4h;1u~u-4I}FIHxt+ zen$TJ1s)i;cnsRTOYVnY8OM$4P41DR$*ZC8V^}*vNDyQyLWeh@-Fi4RUM%R)yG!O9rPkhA*B_TWkRUTOj( z#(a;HF@IhDoe*0V*!s_M6fD^_q*q@(9=9H@(`HPGwEQEtY!Pa6vX=6n91;F%qO9{0V5 z-N~f=e!8uEz1-1cnj~1!={6Xe*exl^bXyI{|5io?R={!|GcjXj{b{q0rry|&*( z6<R`|tEBOAjA&j6xF@;sJ>17Z>xR259t664SEDPJ43>6VHI-+_At;fNh#v83pf9qtWl+1qp{=zy$}`(R$QB1fUBw zh@d;9Xr=HzU!cp1;^`;e0~zn-aj8>zX17ZmLkpa*RrZT=lN-eoLejMuds6+E|6g8L zn&ZacvawQ*9yhdq*!e>50+_3Q?DQarK=T10NAH_(HT7|PJV{>n^a@LX{^8(;;k52mwAO<@0_u&1S{upKw4dO`a4{|i@M<&)t(sihW|zl$4eHFOJk79B=9{baobr zQsaS}1|=JZc6an>*ONWESKTAX>W5#``0ckr0?@&=zOLdXB9qXbnI5DOv4z zyUzNo0K|)(hhSaZ%{-4E#QlS7(T5weKPOe~>PF%hlDp4UH=TN%YCu|2U|aw3{eB^% zmj3*w<=|d+)u3Yl;Lwv+M;_9)wk;m=U zcTCx1Ar_@cN$bUV(_Q2ht5njz@DS@ewkRZCC&WN1`D0HJ8ghB34Z~`G2gF?i9hlbL z{78^}xGt9x&bj6m$?fZ|j0rc`Bp+Wtc;|K?DMUp0mQWYKBq=~cjx zp?^tieQP4IqrBXJA)o4`!-1R3axNu5-1W{_2h{)`&4U@uX)*Yop`5V$ZUY`yn!rwm zYq`QXP3!7++gDc@6JUx)Ct!+vC_mp$NV-mMjN=Z0KY>f-mIyG*OB_3e;#jHwO@I+-SQpu`84=^ zx_Myq?g2oV_qo5#TK$ipoc#ND?y$@XMzBceIaRqGd&p(-D zb_@ZL^W*mc<5N(CvX6sH6+HT2&#Aj!GHRNYDqdH|hSJ=fDOUb|7E=}5s%7wM;&%zs z)1e{3zfSa~-T0n;?#7%JNTF*B2m+riT<@^@nVa+M`YY|c+#5vv8bLNEQgsO=lNFU- zkl0a=*H1fjQ+QA;1z$SFbw-g$A)4aUll#5TZ$vpBNQZ`eWAGkq*xr4oaO}b#apT_# zZyMSl7)uGk(UbV zwn7Q3=fQ#yuKR-Rm}$~H|3y03>r$U1L32UMTo#w=?%=Y9c0~tEo$kletM4SS{Jyqe z$WwUiJ2@TpQe`;IHO0x{#lERf$zneOG;og{R(4cW9C z#{dIymo0lzMKf+7yn55B87p?2bpv{ zf0u$h3-fwZh_Z{=@y=bRWBXe}bNr>{q`rx#@6jlG6-b>4$!qOATlN};^2IW&=~U5_ zI{V2KIGwgiF!At9BUR*vnYmKB<(E|z3noOa_l_TM^qnO%8|MZolIjUzjB(Fo^(76m@cu|iX*y&qKozuK`x(CIANK;r# zPn{{->MztiT{QB_*>HJ6qq@ja;d4F}+h@U+(uKYIT3i7iPdRH!AHV%=O5ava_!Lz<(B!Ps$=}PS#m}1)1DTvw z#}#tJMOl@28y1Fask2cXnHs&?nX475$wGM>*6jxz$6mMPccmsv5P&3BR+EJh?+-nOCDdRYhb#q812Fgzf)i0t!GBFleEXf=}M0O2s^80Rh>`~t#LBZq0jzTWCBxrXlarca z>+!tgCOky6$MQF*f}~5?uqL9?Z@CPT*9~>Xyh?TNn=4R0qafK8Jq-$rdZ$p^`KA#VU*{=tMhh1%Pz1|@bbLj76&IkOpi)bSvMm z$LnqNp*xwi9a(tNlcDK0Q4Io~ft4&EghI6Gi+?e_V!y*Un%e~cK_Hg4!){ET#J?vx67uI1YVmD}?<3>2^Vh{vvakORZ_Hx=Sj4};6LMmL(D>faEXRLOdlepOvvlT7K}W6vM1YZy!V^;Z|)3$PU`B;)m3 zPC`W|HOF{`Y-9h7=f+a3x41?~-9Db@>m^Q5Ws`mM_pfffy_==vnP@G%MX)&L&f0)f zyTkZa*YxmfKWoi`^g}rn#E$sHS05Sp*6|7pq6~8#$(}JssYv$-jH-lAyCC~ueKFao zvXJJa2A$wMV`xN8=AXyE&Pvmercgj(nU%io)_NP^mR1WGN~(K6>PR6M@pNI!HGX4% z;`sQ9>xzxp4EXf;>%Jwd?q}iINN5``aDG9JeLH)aCV=%sToV!OO;vaA2m{nZR&24U zpk%7~tdOyVa6d{joashcx*lAe`AcoMp$rjYr{`by_o+kN_#cVn8DZ{!-5RO~LdoRonAoVN! z|7q+!z_I@SzTxj_lfCyQvruFtGop;sO0t!58YwILw3QL1C}br&D|=Iv66KUpLNc-v z$&BaybzRSOJ@XDD&opB6f~|uwZM%!!)i$ZH)vT>fBTrBJGukwI zV~`*KT^v$qL8AqovBJ<8pkF>00urh|3gNlBf<5|OoDK`^t*KuZAW!J?bw}S7DMkh) znt%v|;6WN^8n6~52+ULgi}&)%$HK>A!6fa;Hl6fb&vao{QobKln2+`|2m){$C|3kk z&OqDa9WcC+0VocFV(F99(nhMH@|>rjSNey2HOE9^b?K!of~Ql+pSJXHiA#YZzEJ4; zZ=@b{i>SF2sDY4c+2}2{5+&nsFm|l52j~yyE0rB^IUH7$GLj1&`2(T_|k=CaBVua=2qdx|l%y zJJhd^o}!5s#=P^p_FdiLX3N}(hhIEpN}gdQ@4%VA6>^a*@kd3`V@U0vveMz{&TR4D zjkB>^Yx4Wm4RGDVu^M1UKoikr>J!#$)tc%Y-OFRRa(KL<*1f`%QxlW!yR{j zYjV45w{@ru9x=w%I=NbDMBIba8+ROD^z;0D6wHI*0W69_dC{^Am3>0Vn9|nDINCK6 zPand_cqo=?PhNj7*#aN4y@?P&O1zPeAheP!5D@&HoNL+nHXmBL_Pvr_El5fBeqFc_;6z*%*C+pXojO_K z>arJ`^Am}RxtDsaPv>r;=jEo0X&#@hJLc~_r|qp>Gi+Wzye01!uu!;_ZV3b5@o+$c zQgxr{JwT8cS{_XREFQk*!ZtEoKH5gzs~U4%7X5R#GxP>;__+vHNGW9&+rHea%Kqw_ zWpaM#*~c21YmA!q%VE}g!M%}84|~4wd{g<=@Aj3CE9$C6P|H8z5Y;4_YF&57kRnoD zuXh!5-m=5;qJY&bc}+YCs4-9xL7NNH&^6J7f)klUIsHJF`P!MEHwzInR-y6H!8@tq zOC{v>PpIs021>Izi7Zf~S( z_)n`#W;In*b$?vR+g5-wtM(63{KA42`RIzLplTZ=9|3O4->u3--xwu)tJEoL$LMn#79r8l%chX&Ni86y| z09N<>R{F(x$rA2tM=!ikBc=AmCv6od(*Ys9{DdOt11_`5}1!^;v;4 zXLx~~ijw({q$WLyur=jCB-7nb7_i8gk0dcNP`K8(*h%@4Q}vdL)hXYcjurm#&?@P& zV+VXS1fu)G0tCAV_n)MSmn_3JPdyLhIhdY!Y^!!d@%|CD8mfkTRs^p}Ap+p`{M{bfj$;);o1vwVc|jM)TMSNWonPzhhQ`jJQq}iqHdU3TQGCg_tIvRJ4KVD_z8_LZXLM__I2z}g_-YQ)6Fiz-_{*ATjDr25)(;VRmcn5^~P zhM7l}K^}C!FHV2&9nuWSvb!_Fuj+g5?0{>^fU6nB#<048-aHRiELSN~4sS%XYvDzQH&ji>Acl6*$7N%6LA!4yOT4hi}-af`IarA-j zA}b3%;*$CKj-Lc7|4tXSunK&m?iKb8oHmCKy(f*KUcV&7gmwcVrsT_xVc3G`w{BXt zMJ)Q1^)6Q!4cW}IA#02lj81#zdCt8z4wef0HjwV7=VV+@JmW}f$G?mcM3Ek;L zlz|1rCgTztyAh!JLDnJ;^sNE(sLwYQ$2#~3mLzhjx65zbor|?cWae9l?QxnEy=Al+ zQl1+#gGDyM)Z`*$PH@a+42!b|Brr~ejaj*}i0F0VdpR|#`!G#flV%R2W>it9Z{%cI zh#a(G%XP3b*C)5=)vTR8H%Q|t2;ZSAfG$vJ-E|aMR5;}F8)0W=jzsBP&?Q|vDFYBm z_0(fFR`pj1U#$iDL^tzG3@#+K>L4dAj^H8Q7d2+qJY4_flR&LK{6^hB@K${rH&SwH zJD`@ZK+uQ4+0(1~SWK)$>7u~o!ZaIOX;0XD#)hn1+pt8)@Jp=@Qa?KE7Qk7JlU0t`>0Yo+W_@|m3|zFdOzoxh5AMvvd(P3jbCxJpQd_jUQnk0 zBj}eqJM@oLdtfoK6olwrI}C26{J!2hAYME8l>CiY0r5c^iDM44n#NOy0^!> zlAq9r!|KSITPxjgJ%@0Q%TqIiS;hZiz%HJ7DimX0b8?z<0~bl&`1i<~saC%C^00^? zg5XZdqm8JSzAOkEq~EVzX+z43LP_9wa38pdf|CL9@X}@QNgduyGD@>^#AAwmNS+#A z$haV}?DNZtP|1-`F09Y-24CiiFic2!gZ#NGtDawa@R@dP&_1aDod4rfcBjeH3G$X# zA?U&dg;gMB+{_QUN_H^H#YQqvz7fL53=r$8E0kz%1cVeSz~891h5VBNX(~XQ5&W$K z0+dl%0Z=Jf;C`5bV`6%i>bePI`$~;~>zu|9=3ML@DRy3y%U^nYI{k_2?mzPaQ^^_r zpJzlrWou4OJUP)fa$c5JY?ay!+d;ErK8A8ILeATVh`@#)6CyCo<^ZffP&Y)Erjtv8%lPES1&_vD6206y|fX7AUP0D&yQCuyI zUVvZ`bnLF4yO|}PHG)wq+kD8DcwX-kjM_bkgc>6;NQkPjpTS9V8xf8X+1uLkLVf{6 z+m>2gK=VpUzMGSU{2Z0FyZ1Vd?bMbsP%c13(Dop<*!1=VJ8z8{<<8EfRO(P1=5Y%qaH2y^lOMWRajg=!%9ci=JtyOC~}pD0fa>WL2v_u*Pw&& z!yTBEuAfUgV2FlWngFin(zg~afI+7m7(hY=l2K=XUwg^5@2N!zsF1Dh=>CWcWaW%8 zoy>ju;sqt73LS1(G$(~jB;`HEAgy+g|0jl!|G zKE3Iu@{{-+VXU4!Tv0L=zsS50lap!lK(qW?s?Fj}Xn@QS5%XhDc=nUduNK<#O>_ioD%&}Hm{K3vihj3;#Rpm32kIR*tU9+OmfseYK)SGlBCBzoJ)UQ=etFpMok zc|cPHKoX$zk7I>`yGGW+$nYnn9#QMR#gic}-K}`TYlDf)64fJ>YSrutG1dE-=;7l8 zzfMNQ2LXNH)G>;>_zOhH>E=H-V2t`?Z_GD_No_APtj3`J&)4X#hOI?y24C*uZdG24 z&`F1e2afC(Q^mbYk9gAtXvXu1lI@&dcbz}bG3|eqV(O^z_;uI?)4`1sQexE@ByskU z{3y&l3TJ?%Bu^aGW8yjFVy@rp+lO3Fq1p@JC;xThg6k*xd4JK8OoLirEHE8Dm|y@N~wjvxN$w!N{n>o1N|L@yc z(cu59tTQda%(%ii%yNp_072egs$vSwk|^H%n(y4ScVN-PL|%2*q1HU=Oq}j~$#G=% zp;d+Vt~mEw@iHI2l5xuZm`dkRTBzm5e+VVq0Sku1=T(e5-!U|k6W1hg+Bj}cPV)J& zuaP%=Gm5=J`>kF>eN-`hn?Zs5!-G2c=VYFcW>_v6x*15SHw+~Xo@J5$D;twJ#``C+g?;GTCc7H|I3+3=N=?1jnCH4iGWmq}`3p`ZIN&zV+4cXtBBgMTQ$5D- zHZi_aP7+Eh3W3Q{&6Zn6tRX6CanN0~-3|PFEIOYG{-nYV})tv=>%}L~8gcD>= zp%${fnJ05#8&m0Z{x~ChTY6IlfM37-u1@aSaMTAFpFXB`oHlqm9vwt|e)F&SIM;J3 zt_&i@6dNJ|e99Yqukp_vrNJxFf%N6qwyIS!6(N5%jbc%JVoKGbLm8St6`>N-?&t^o zWc!U)w~S*_x(9|G^qSf49ec80C5igCqFx#IwN3jqzk~tb@W0UfF986_qIBQ%ZkY>d zEX`-93;OjiRy7P$~#LRLDpW~$rX!x09+?X zEHTJ`1SO08@f=9oAKUKU#|0!n>U~juyfH$_(#E}SQoW2yGXyJM6|4s3Y!M9yzD&ex$7ppk)LpGhU{-<$kH|!WS>B z)CIK5I(qwf_7QuxI*LDc9X2{^Rv1 zdtR8H-E#3`CFoPKHZj2;sBoVDjYAXnJf(bi#M6AljmsP;AM@X9tWLXkG$Cx}D#Su~unpRZ`Ob6xL#? zBb0)zyQ@Q@FRT7~>Z_@)D5`OsuglWRHcWtoO^mrcEyWWW2gUC3l%I0EERm|aeTwk1 zp>3EnCe996lNImW&IR>;c4Eky(Byiajg16h&tyDCcT#qq0Ov(V2xt5}V>E+JCxXBd zcN*`o7Gu%(ruys@>MRTGmOukhNY`u+qf@?_z2%&7=~#Ng-Cm?V$j^EDLrxOzf^cwS z1raprTWZg8ZLA$E4#8jF(1IDn$I81_UThhEiThP&Yh6*oXY1x^9(p>V%H`WrotCj{ z(i4x-q<=h*c&gl7@ReNz^l#i~!&*3=iz=xBl$Q%#kJqoy8NVW3lx@|Gyh!sHxNrze z&!UU|IVG`M$!}?v0nR|_&y&x;EfVN{cll?A=a`*?1t08aWAH9CQw%p64v9|Q%fuTj zxxNx_j3PdYPf@H5s$bNtF4q(gP%l|qm?*$X$W*zqGj{EHhSbG+SH!}Hb^e9+jc*>9EiJcTJ?(f<=yUP^jF^7>oAr=qY?#!M|(XWvyVMz5EA zB~HoIeB(v4?<-c9Ch|$?s{kv6{>#x9V(jWe?w-=e`-Bzi|a+RKhgYD&&O8C$)z;G6Rgzw_fM3Coy z{Ycw4Vs2qAo!yH7UWz3KKsYwsIuNLFPFn^R(e94VT_dFG1BV|-=e3I?)bhVvkAD+N5@)@%1SWbhYLKvo%`IR#K zGBk>Uk-pj|D$n5qUxJk@RKIhy=(Sq8GimI~!AW)AgNqCfI+~dE)mK%j^0Rc1OnJqj zA5&KAkGKx1e2%^NVu3u01jTZlZgULH6p$$xiN9ca)|9zM0|xbQV&aR!4G7ubFBUH?0j6Rpp_LbKl{vVdMD*A% zgCnB$MyC=kXj*2oX8jA{4t>UA51;vaa7D^GnKR~q>k0m=NnG;{Yy%5~3J|qZw6P3> zQ*}U`&_CzDOna2-ojcli{Q`*-f*7(iK6zmDBlVrbfL)2Ub~Y`NogzFUN&K9i=-qxV zeq7wsA1C!v;_5V?Fp#Qy4qsx!9HunS;sfM!L*Jw#cU&1<2EKb5ibF?`|D<{xOlz|( z&dqZ&;QMQzwxbQqz}F#ro14=pe4k+pU}J$qwtE`ISy17-g0WLnx!PDo4qn+h!r$bai^xv8-P^PBbS0b~eed-f~)_+K18zDBI@Dqo?m(wR`g@V#6EX0$3s%mT9S z`9ibg%xNZ*5Xu)xBd0#Z!WIQkVqW3bjX=98W5}T#v#xu`$-Vi)ZHS2?8ObC9AX&4C zWa3Xr9$Ec-qX&IJHup)cT~T#{{tU<#fN+8gJM{*iIwA22TRK|L?bYb}|0z8hVH@?G zN+tSeOfE#A))C*e_!rRHLL1Z2fA}Qndf<&4CLv?7AO}<-ApWKRBas`gd|`HsML04DNh5RI%3 zY{TF}bu}_}Of)1E<$--j(kLK9QZBnjt07`9Jcrgu7tD;sgy-d{%q+PS%3u_aJA@Hb?gsH!{r69Hkj63P6{Ikj>_@SLVgRp-uz#!Mn%m*Ju z0}O5>R(13=Ri3HjO_zfat9N{?*}r#2y>{3inrOmFlOg75z|{qdWtrw^xM=Q)+48dO zQ&&XU z9SPLQH7aGW5hAiDl(cmJ93l#*C03EK%X>9=o-nY>3$@Ptbq{zEFNFGz5O{TDQ zJ6}_?oLwkBL_Xb%ODo?j+qBRwLer`1!(6fd63-$hPyKICTbm}xDU?bv!nr3{VWI$K zB@`tv;{?0Qw1QB41(>oD5 zaDLzY?)gMQ9@zN5<*8yo+Ud-uU7ZIH>XZS7B&?uDq5(KS&8HMif_DFVJ z`Ss)ruDXr2@4>g-XRF{R=z{_z#)QgA;d{YK)*#D^*R316WZC6=mH&Kb71BWw(Ia+{|ZY`Gj+8 zqb4SnpOO7<`ppp^KxL{E~El4d1D8EP0Lq89T$&U!k+RGG*};>wfvTS8^oFr(Hc zoP<8?`*-gmx~9}Y6=nD@un&OuC_m4Q^8Oti9h)_;`21Isx$qxW63BH#kS{;j(I!S7 zEni_{5XAApXPmwaa;Zcpu^$yX82E|H(gi&Re~5f*ZChJgUY7cT$``-Bdq2waf>&-u zWaiPY50Rs@fy++NCVFP`Kl4p>0CJPrgX*x}2Em((fDIf-$AOR(syhYfmhB0UDD7`t zg0W@&9Mek`(nxyt`^@2x0E#%PeCAzMt%#2;lm>n*AUja$1L!O#*D3u6w-of;OE&6J zkP!&0N}()mP?h@}$n&2R1#kf5d)T8+*94m($#WFBC_Nd-H8^0{{eb~X{`nX&z9+dCjJbI_{{vwXF$@{GaX0z^4##{t*IjWJKBo0sR0c{h0Vb~>VL#B0p^*xhpw z1rKk>xX~2}jwky23@eTj4Q@}5VFrp=APuGidbM1oYyPrqGn4cOKMdX)oUdPc= zz3eKzruzM7oL|mM`?{UJfAl~~Pu;D87ci!B1}s2CAbY48Ueq~6l6lctLpyuL?J2Dw z?%IDW2s)x7ce5KIk694=8kMQhw5q@A$B>K5lkpCB>H$Xh?MqVhJ%s`XfFLa?UlRVc zJNWIwg%zfVGdFeO7kR_d_*?dx?C7rK5x%#dRTauHmBb*rUgs;PZwu_zeB&F}L_(jl zjly!4yn)w!eHt_M#CBEX1sHTRi zWY~nmS;{^x-z>M)aC^V)OHn7_a7xu*Vq+vtoL~&D-CTEu23UZ(uN=`Nj`XU9)b_gY zqdk8{$STGEIm#%CvqO}EY+tNS(Nn5`S?ht;#}i}t3CKu0XVs&I!XUdB6RQ@iBZN0I z9(joqf%gOq;2)`%k}u-IMrUna03~?m6=_a3fhfk*-g=YbtDggv7C)9;nDQ_Eng1c* zl(yj(#A!m7UpkUWim-o?<_1)E(xo3(YtoR)z_fk;%+Tsj+wRSHAnEzvg63G*^!8Q7 z44bTa(|xmVyq$G+joY9 zCfNwk@Q}Rg_xZxqY;mIrYMBO+M>?6mQiGKcB-1L);cbp(AU~U)1Lj=u60xP3!)su- zEGc4RsY-}pCLplSsTirQ7sYdyMWLMGZJ06(oO*x;fL1Vn9f2xJ7^suFFR^hyyO@&M z7w!Rj$5X0~dd5P|*{?JEqC?+4H$17|uRlM4^pS`D@YPX8MILF?wIBUJoz>QV z@^P>SUj{S!=x#*6z2QFN8EQc?g*^aPQA^c7$~a18LrFa1B8oSkYWSgE^p@g1W0u=Ivtov3Et&hyJvSw@87S(jOR(1qHnUqZKCLFWt zH1kbr-b^3u0t55}g@f1L%GSM|&6>~3741uV*6KwG<;aRB&F6UzCv?F;e-Scus6b{y z0@(hyp6D1=3%cWLU+QZ-hNAsgqcC8hLa3f9uuZ;w9N=V8_%N#AO=d>_^$QGcK0j? z*Qx%e!g{yLo_;h+IKTVa(PUztbBYcD_EXxY<776#R60>{MoS_+l-&DPrX8S?`1nZj zg&pSJ3ZnD0>Fa{H8;3V%KTrgffhDxCQ|iPI(da$(E1-Fox4*PAAkO^hQdZtEfEMgc zH(Pp_2XRZ}M*>hj;(>KAZR(2RklU~*$}dZbou@v_;lrN698TFHc)3xGCKo^lR=QVb zO#ImXdO!SEqGp6nrY%SR(>PuE2n1+)&TRLejwhcK-zsVn&=ZKI`6N_M2rsX#W=$&0 zL(jYg{8v{VEh6WH6>8-6TwR{6WEMSk_cfF&1Wbb~!eba#LjGDvN<9JwQZ*j}p^7;I z3mr@T;{^tiA#a?B2k*~f(u}qpeu>hhBj*U;&@<3Sy|`C{e`N#BIkW)|$su~e_TP;0 zc{n6LVBjRmr9Jlo<6TdbZJYOM&@>Q(e6tCqcBPv|FTQa5KR!X?syUx}9aKDy-4!HA z`#-yt&Z5pWR&g!{dA0xe2mzqu2i)5U_I9KL5C~d&{N!^x>u{;h=iZ&dWvCe7=Ac&? z_^IgO?Hk?duh@OyD2V1#8;ibG(jo$HQBH5{XmnA4Qm9_Ef%qi-PUA#nn3PnwB-knq7=idN#Gp&&mrNi3&ddHe`BCC<=FSnsbY0lXlv;nlj0@g zuuX;MQ!NQD6u*Zjn+-dF-;UQQ0v?CD_ZD_0tfma1fuRi;V`!Z{6uJsP zj(VV#N8b+Q9+=X_%3RMe7Y}|E_9kIs0yJTxPC*;jT$3WGZv7+xO4}70>`uOvApOkq z{%nZLzmiZG5yl^URUQooTL? z@%vA0ozKLtfsdrbpceEN-2PI@*dWk_N7H#6`UqRN+!(~^0_jVw?;LHUlD)2^K z_Iz?F`j1cCYe@a;(S7ZH;j#nGP(@ ze2I(SxjExV((7E@`v^J*NcxZi)F%WcP|zU8v|ox++msj9(@e5W@W6)ERv>V(C6kLuc1x*IOmELI^Fo7;3ZvYD&l|AZG-A?=J(s zNSMN39;EVo*Av=lrqubkFb@q?xxPK)B-S223Cb(1EU|vi3pC3u9H)~{TQ^=80;Yx$ zzmGK}O4@{#EpxJVDpzx_U#mZ4q*{U%fm4eT=dQ*nIE{W7oV>zLU_j3RY!e7P_)^{8 z*oHPn0IrjrF&$Nry=U;j=lvmY$nux#FEhpbx~dnMQFE|W$UsiCl*HX)!4q(|Qi zp4fTK^-;2W-YW~b)88g;Q}f+8llZAOyU;=(Mv~mXKh>U-3H@_d{^`bEB%yJ2HyyKD zx=IEkq!!w1srHbl5qV2MeLxeXS?n_bPkiFn`}X_pM=KUn-Y)t9tbmgtHsrUCcLIer zG@wrEQ|jJ2&)`JJgQBJrScOFm-#jmO{J*dWAX}rjC%0>42tQB*eya)!=(WEMzXkJR zz(S9tJ80gF;+YELWZ&Q_Ljzi;g%h?Xz%yZK zLHK1TG#C0*81;HqJ0L{7lh}Kn>6&(I7MXGa<90qP%zZPY#A1e~w5=<6XDp6_@Eb6_ z;1PDtdI}ns=sX@u6l%zy=W^B7o}rQ;C_*-loR(}30~B8!Ft8m%XGaY-P26gyU;`Sm zJc4B7Zn)gAT8&A#upVp=hZ|uSVMdUe3BP#|CC>g$WGTb0O?rCs6!GMxDH>QCJ2NS1 zzZdtp36DNP0EJKpo4FHs+xaQ>e)YZ<=L$FKp%n>(gr(EiSd%zn2oF#K0NJx*EK}wS z4^PIOonIQv)GES^!gu$KA z1^ko2kEhwaC4lq^VHj&r9QD}&HG7=90;^{N=&j}IqGT@vB1%NpM(PA z4IFR?A6B*;a1?3YM;>a1!DB&DZt!)&(K|v-Bk@oIOZgs!(-Q4K3!{mx=HAdiaf)T_ z3F7p7Kc+STl$=2X00Y2!wFB5DiG6k=yx(`R=f9Rv><7}#i#2AK&Wc5%%;Q{wn{J9m z+~%o#hr#3^{{sL}cXJ!{WKZ!Fkh!LUek zN}-#>mn6NGQ$;%eLSCX%x7io(3q|rSB6{g*dPg(gTYeE;)>aujIV)%DEcdzS?5QVY zda`0?O+z$g=(~vSR=f*7-v5iPSI>|%@8LM<-`&Tf4E?(qUAt$hz5Y(A27ZgUdTB7( zDXwejhvj?2#Iq))Q_)T)SD@t6);>v@Z*`%5^$u5-_l6dU<>LqO50&0t|GCta^F;FY z?t!xLU=U9NPs{bCdGqC6(iYmEl(OfHVl{L$sqcTMu1L#GX1=DLVi;km#Z1;5;=LJ| z?4Ho|l3g|aqcVHm%h*rBJJgg6VZ+B)$w^w(k(=Gu^UM4OVvfgGg*g7ort7}WBD(w%Jo&v6MYcKpos{jef0Q}-&^d?Pg`*HllarV`10lB@v@9`hS z`uf|4*GzgO-I1+r8c!I?#9lY~bp52I+zE`i!SKNNFCL$kPJt}36Tw13`$*$X^Ze4T zw_f^(Zvua4wkJ2}`hWQUBgzWhvMX21ZY1{K+U~_r-zGk_*LqKRw>vnah;^`@4gZP= z{oGHzX}rX)&6w1&fPqs#Vswb>cUvl6bICecQGfksTTk9hk9pp+;Gh7ODtY9?yUm_H zn_YbDw!*s}u2Lq_q6C|plJ6!*-s`+ibMB0}7&y7WtL}A;?<6Np)dQ!gBP0U@-{K^P zcc*AbCfAP;39_$}We?))PjJEi4@_}H9_Nq!^NXVtya&YO5PyYwu^56niFnn{Al6{CR4`ru)#SgPZVvy*Do_^jY(w*367 z)U@TnY0k0^?TflE4Flix?77+LG>MB?P7wc?J9Q1R`&hyII(cC$3JWhkKFu#3!ol0s zX?EF~y?3zT|Hf%N`F*;2_%;2jpo!dq+Qxx)%G&z+Z!lgNKe-RS#qAkNUiM(dlAaz1 z0>jaT1+W>lr%qg@YDX=8zSP~&1!cx>p$EZW^O{ppLBG7boY&n3QZ7KJ=>SHG{0eX zIKRC7`tpsId^Z?AaG(Z3U>34j_eW)$hX_#TZ)371Os|l6UcXKOgh+5VHm-H4u>t_Q znT17SqtVr?`4YScSSs5~#Nhg8hI<>j$P-A8O1(c1fj|~^@ z)?2^$a+e$rmN%$Df`5KN!G!4*OJ`C5ZJSM3>@ zm_WyM;PdC_W8Xf1CQI*ASU97QzFhkq({3*Vf4=(1pugOCUH?6@un~Z21hcTP^wz5L zwQMH-9O`juRK&Ydu zOYXZoMwe*IQI96M`E|C-!R>Vd=mYcv($0V1`1qp)8;m04c zYYjYaC&HaDx5-cQ0k^WqMgdQ%zOKK&a;8|F2d=Y*%ICwX8L1bXT^1Bo{j z()~Lr^Qw3Q`|#BKYSK=vWKpQgop)Bv!1m0y0cN!Mie2$k8+qy>bNd z226ZVS{`^dv#lI6-~Pva3+w!i?FDPqky*;1u9c82tt_}jox}J zgQKH&z-a)yUclJWZR{nbs;X*n+Sr)YSGW$iBGg0Nt6!CEDaU|4D|H$?P`|FW9+QwT zc`eg(st%-_rNFWPO!B~Ot{Zf{v37RlcR9+}BL;uIg1MZ747&_iRRG6RZKNxm-b18E z^5To;sg~xe?^w;Ly|^_snYv+!1il>aU=Ab2cFZ~n1fAnvJ0{oG)_VH6n}8v8DmWPE zySf5~ZGPT(tPb24fByVwg2f694GmR+*&Yab1*Z6B^&bUIFQ%9-u*#6X1#Akv3l~yA zB|F+lk*O0zv}eGIG8L4NsSqXa`K021qt~(?Zf)IG3fij3Pz$wf{he^*4G5?{YiqN+ zBQ7CvA4F$Ce>Wb6l~&IEPo;qfq95qzS7%vfAucWbu(46uA7)Lj!9qChcq@CcL-f$x z%x&0hCT3>O0_^;JeTyB_rqd;*q*6eVO3%h7GegDCuytpsTp!F1|NQwQCOdsIWD8`B zMMOn)KpE{A+i}|z)xf{8h6NA6l_<))dCF5#sTG*~SKzjaH*djrfBL{3!+NBh^2*Oo z`BgrP7BdGSZFXP~a}}Ig9hQcE!07TA__gQN`uOK5e*kr&asvLD&E3x~hKl*uSNCaG*Y{A?Rlm>49 z&P9^INGwbLz1_ji#gErd-kRwJbG32 zZ)buknVg(l%ASRV1?mO^{+D;erKBRiEO$p8VbfGe(#tL?qCA#*{^20J_8)U|CAV;3 zp#TGLk?tu^^^&37;$rUjkCny`bp zndw;C^q1lBaT)}K;SrgTxtQKw{UKQ$N5S9R`x8pMzd>mZon6Aa>jK&BkHff$+s<-(O98|xhfZ0o>;@NSs8i|0*tNfni>{Va~$B}b-x zL)ay|mwofv+W15th_h=w@zi-I3(62p6K9>BMPb2VFJF!WY5eTgR$g*CUXph17|tW= zPvpQ3uWau$SO&w>6*_T(2{ke?qB`jkJf|{&4f)X5_Zk(WMtwAXdgo?m^T(^EXMjW6 zUD!;h>F3l+Z;HG{B45C~z|7p&uh!3=Jp-2+Bq1RIbsGTxso0+L?M&K^sI2xljbCO> zO-+Y$byU&C2kGFUfU2KIN0W`XDNxhUb1ZUVCuS$cT=Uiz^vSO3%iz zg3zAT!wFbSCr_P{+^+eujQVZo@5q2hq~d$$6Rm&pk4H5hFpWd-v9BIAQ}SsVFDxzw z$PT1CkeHY|8fGYlsq<%N>@EAM7-lMymJgS_LXAKGW5M9#vStws&OG(L%bZ|}g~;^O zX=+!ve^KC8_NF*$m{C7!Zt@unZm#ECqw&1_lOe-~g<}O&bG_-}-@pO|jI+ z*OQWy1>j4P78Y=jgE-cGdH7HF5x->(?jaiOK(<^I955JKsN;L4!KIE9EN>W)wMDxF zD;y5@15^_klc%oQOR>g*u!A89UY|K0da1a$_y>q&CW9yC5Tl5Vjg4cC z)lA^T?o-$hZ{fs%s}Oa3B-U^#QI+@J-d2y=BiMj0INW_yP`V0?QQm@0W>^?zhW-8T zKGgY$5xnON3ruTB^z7`i7*Fud#UzX9_xASwKyB9G0M>cWC<5mjECqiM*nHO5X!YdD z6R_bGy8rN@2pESOCwBZt^o7$v^*-D%lF70wbtxn}0~K;8C&ZSN?<4!c2pVR6wMec4Y^Qc}*jxs}4PJz9S=@Ahe4^Y`29 zb2E~5+jqf;K=1td2OwjvV5UXgls;2mUTy>ei6U@)j66Kjj~_p-0vaRNzSxz-8y+wB zh?3xl5|^F!2?6r>Gbzgob%=m~WGM>iG5hPU+AFZzsO zhG?xPB*@sftpkaSAl0c5^8Y!rK@->iL*nzl{8(@db+3jT#nUe$R_xh-=UUp#45&&w z*y(|wg2ZREFKa_aNrLRA(4SkpTcj1?tTG7LWs=mK#=s@Ub{MT zTj)d#8dAZl5j;OCA^U(R5p@mPcXa|1UwQZiscJuewt$C<%7b=+9}9A?Cjx{XrqM*% z%j?8fk5gcqNsdN){P5yg2L}N#hB3@KlK_t6duyp}+E^?Dtl*qs&>4sH9P%d6*{l!R zmQ(Qf8VjyM)8Ot&JLh>P_6!^3flt<~E?=Hn->v{J5pX;sLco#{gMhkuL_|czsGI&? z*1_I2IFR?i|2a}84}&aysiOlZ5!_&jMHvwS76PajL+qV99k7*A|C5~3QtGy?*T=Jm z)`*TU7+9ayV)&SnnwlCD9!>;L?fd149oGHT7_~_->~QMNQVZpUwGZ^@T{d~_oz8+Kswe8>XjgjhW zbf`U+nYnoa=fvP(1Vpar$&-=bQa1Jdb(|uQb3r!05@Kh_!M#|~(;pDVk15v_(r+#h z#8HVN&!%zo%28`I_-rR|n1EM|7FPiQ0iLjFOF;n$)K^fq2K2qcmIQC69N4q+LYBtH zcnD;uD>A%cw6bL~I?d1;#>g-(m=JreUPVLu