commit 085e6f4d9486e9da579b18fd070103875c6731ab Author: ModelHub XC Date: Wed Jun 17 04:38:12 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: laion/exp-syh-r2egym-swesmith-mixed_glm_4_7_traces_jupiter_cleaned Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..7a837d1 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,56 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text + + +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zstandard filter=lfs diff=lfs merge=lfs -text +*.tfevents* filter=lfs diff=lfs merge=lfs -text +*.db* filter=lfs diff=lfs merge=lfs -text +*.ark* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text + +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.gguf* filter=lfs diff=lfs merge=lfs -text +*.ggml filter=lfs diff=lfs merge=lfs -text +*.llamafile* filter=lfs diff=lfs merge=lfs -text +*.pt2 filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text + +merges.txt filter=lfs diff=lfs merge=lfs -text +model-00004-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text +model-00003-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text +vocab.json filter=lfs diff=lfs merge=lfs -text +training_args.bin filter=lfs diff=lfs merge=lfs -text +model-00002-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text +model-00001-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..6e64333 --- /dev/null +++ b/README.md @@ -0,0 +1,61 @@ +--- +library_name: transformers +license: apache-2.0 +base_model: Qwen/Qwen3-8B +tags: +- llama-factory +- full +- generated_from_trainer +model-index: +- name: exp-syh-r2egym-swesmith-mixed_glm_4_7_traces_jupiter_cleaned + results: [] +--- + + + +# exp-syh-r2egym-swesmith-mixed_glm_4_7_traces_jupiter_cleaned + +This model is a fine-tuned version of [Qwen/Qwen3-8B](https://huggingface.co/Qwen/Qwen3-8B) on the /data/cat/ws/befe330h-befe330h-otagent/huggingface/hub/datasets--DCAgent--exp-syh-r2egym-swesmith-mixed_glm_4.7_traces_jupiter_cleaned/snapshots/6bda9bf636a815d9ffd0a001e1a602b93c883472_thinking_preprocessed dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 4e-05 +- train_batch_size: 1 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 8 +- gradient_accumulation_steps: 2 +- total_train_batch_size: 16 +- total_eval_batch_size: 64 +- optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.98) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.1 +- num_epochs: 7.0 + +### Training results + + + +### Framework versions + +- Transformers 4.57.6 +- Pytorch 2.9.0+cu128 +- Datasets 4.4.1 +- Tokenizers 0.22.2 diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000..b54f913 --- /dev/null +++ b/added_tokens.json @@ -0,0 +1,28 @@ +{ + "": 151668, + "": 151658, + "": 151666, + "": 151667, + "": 151657, + "": 151665, + "<|box_end|>": 151649, + "<|box_start|>": 151648, + "<|endoftext|>": 151643, + "<|file_sep|>": 151664, + "<|fim_middle|>": 151660, + "<|fim_pad|>": 151662, + "<|fim_prefix|>": 151659, + "<|fim_suffix|>": 151661, + "<|im_end|>": 151645, + "<|im_start|>": 151644, + "<|image_pad|>": 151655, + "<|object_ref_end|>": 151647, + "<|object_ref_start|>": 151646, + "<|quad_end|>": 151651, + "<|quad_start|>": 151650, + "<|repo_name|>": 151663, + "<|video_pad|>": 151656, + "<|vision_end|>": 151653, + "<|vision_pad|>": 151654, + "<|vision_start|>": 151652 +} diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..4bf3659 --- /dev/null +++ b/all_results.json @@ -0,0 +1,16 @@ +{ + "achieved_tflops_per_gpu": 5.548911443773127, + "achieved_tflops_per_gpu_theoretical": 271.03236502585344, + "epoch": 7.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2810341417789459, + "mfu_percent": 0.561062835568567, + "mfu_percent_theoretical": 27.40468807137042, + "total_flos": 2.4258565470369874e+18, + "train_loss": 0.30312070208478775, + "train_runtime": 54647.127, + "train_samples_per_second": 1.285, + "train_steps_per_second": 0.08, + "valid_targets_mean": 4356.1, + "valid_targets_min": 3101 +} \ No newline at end of file diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..01be9b3 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,89 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- messages[0].content + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('') and message.content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- if message.content is string %} + {%- set content = message.content %} + {%- else %} + {%- set content = '' %} + {%- endif %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is string %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in content %} + {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- set content = content.split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {%- if loop.last or (not loop.last and reasoning_content) %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..59b4193 --- /dev/null +++ b/config.json @@ -0,0 +1,68 @@ +{ + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "eos_token_id": 151645, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "pad_token_id": 151643, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "transformers_version": "4.57.6", + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..159097f --- /dev/null +++ b/configuration.json @@ -0,0 +1 @@ +{"framework": "pytorch", "task": "others", "allow_remote": true} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..9adbb28 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,12 @@ +{ + "do_sample": true, + "eos_token_id": [ + 151645, + 151643 + ], + "pad_token_id": 151643, + "temperature": 0.6, + "top_k": 20, + "top_p": 0.95, + "transformers_version": "4.57.6" +} diff --git a/merges.txt b/merges.txt new file mode 100644 index 0000000..80c1a19 --- /dev/null +++ b/merges.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5 +size 1671853 diff --git a/model-00001-of-00004.safetensors b/model-00001-of-00004.safetensors new file mode 100644 index 0000000..bc67dc2 --- /dev/null +++ b/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:774bf12e34c23220988983e32c081f29e0a15f40396b2db42817d19b068c0139 +size 4902257696 diff --git a/model-00002-of-00004.safetensors b/model-00002-of-00004.safetensors new file mode 100644 index 0000000..9ba0873 --- /dev/null +++ b/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f8d9d2e94d7fdbbf986013a8cad3788216ba61e7220c4d02adea45f5b75b304 +size 4915960368 diff --git a/model-00003-of-00004.safetensors b/model-00003-of-00004.safetensors new file mode 100644 index 0000000..1a83316 --- /dev/null +++ b/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0638b5cf303132de2439c257f7f394a6b168c0959457a73096b668baeb51cd0b +size 4983068496 diff --git a/model-00004-of-00004.safetensors b/model-00004-of-00004.safetensors new file mode 100644 index 0000000..1348e9d --- /dev/null +++ b/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d10dfc46fe43ff51a4dd88a44c5ce1ca72e1237000799e4597b7a7c9c57fd3e4 +size 1580230264 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..ba886c0 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,407 @@ +{ + "metadata": { + "total_parameters": 308224, + "total_size": 16381470720 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.32.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.33.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.33.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.34.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.35.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.35.mlp.down_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.35.mlp.gate_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.35.mlp.up_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.35.self_attn.k_norm.weight": "model-00004-of-00004.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.35.self_attn.q_norm.weight": "model-00004-of-00004.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.norm.weight": "model-00004-of-00004.safetensors" + } +} diff --git a/run_summary.json b/run_summary.json new file mode 100644 index 0000000..51b1bca --- /dev/null +++ b/run_summary.json @@ -0,0 +1,12 @@ +{ + "agent_name": "6bda9bf636a815d9ffd0a001e1a602b93c883472_thinking_preprocessed", + "training_start": null, + "training_end": null, + "created_by": "DCAgent", + "base_model_name": "Qwen/Qwen3-8B", + "dataset_name": "/data/cat/ws/befe330h-befe330h-otagent/huggingface/hub/datasets--DCAgent--exp-syh-r2egym-swesmith-mixed_glm_4.7_traces_jupiter_cleaned/snapshots/6bda9bf636a815d9ffd0a001e1a602b93c883472_thinking_preprocessed", + "training_type": "SFT", + "training_parameters": "https://huggingface.co/laion/exp-syh-r2egym-swesmith-mixed_glm_4_7_traces_jupiter_cleaned/blob/main/config.json", + "wandb_link": "https://wandb.ai/dogml/OpenThoughts-Agent/runs/sft_exp-syh-r2egym-swesmith-mixed_glm_4-7_traces_jupiter_cleaned_Qwen3-8B", + "traces_location_s3": null +} \ No newline at end of file diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..ac23c0a --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,31 @@ +{ + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..cd71f61 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4 +size 11422654 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..e9dc937 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,240 @@ +{ + "add_bos_token": false, + "add_prefix_space": false, + "added_tokens_decoder": { + "151643": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151644": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151645": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151646": { + "content": "<|object_ref_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151647": { + "content": "<|object_ref_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151648": { + "content": "<|box_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151649": { + "content": "<|box_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151650": { + "content": "<|quad_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151657": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151658": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151665": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151666": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151667": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151668": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": {}, + "model_max_length": 32768, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..4bf3659 --- /dev/null +++ b/train_results.json @@ -0,0 +1,16 @@ +{ + "achieved_tflops_per_gpu": 5.548911443773127, + "achieved_tflops_per_gpu_theoretical": 271.03236502585344, + "epoch": 7.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2810341417789459, + "mfu_percent": 0.561062835568567, + "mfu_percent_theoretical": 27.40468807137042, + "total_flos": 2.4258565470369874e+18, + "train_loss": 0.30312070208478775, + "train_runtime": 54647.127, + "train_samples_per_second": 1.285, + "train_steps_per_second": 0.08, + "valid_targets_mean": 4356.1, + "valid_targets_min": 3101 +} \ No newline at end of file diff --git a/trainer_log.jsonl b/trainer_log.jsonl new file mode 100644 index 0000000..1f5c465 --- /dev/null +++ b/trainer_log.jsonl @@ -0,0 +1,880 @@ +{"current_steps": 5, "total_steps": 4396, "loss": 0.9065, "lr": 3.6363636363636366e-07, "epoch": 0.00796812749003984, "percentage": 0.11, "elapsed_time": "0:01:10", "remaining_time": "17:10:25"} +{"current_steps": 10, "total_steps": 4396, "loss": 0.8968, "lr": 8.181818181818182e-07, "epoch": 0.01593625498007968, "percentage": 0.23, "elapsed_time": "0:02:15", "remaining_time": "16:33:22"} +{"current_steps": 15, "total_steps": 4396, "loss": 0.862, "lr": 1.2727272727272728e-06, "epoch": 0.02390438247011952, "percentage": 0.34, "elapsed_time": "0:03:20", "remaining_time": "16:14:33"} +{"current_steps": 20, "total_steps": 4396, "loss": 0.814, "lr": 1.7272727272727275e-06, "epoch": 0.03187250996015936, "percentage": 0.45, "elapsed_time": "0:04:24", "remaining_time": "16:05:54"} +{"current_steps": 25, "total_steps": 4396, "loss": 0.7668, "lr": 2.181818181818182e-06, "epoch": 0.0398406374501992, "percentage": 0.57, "elapsed_time": "0:05:30", "remaining_time": "16:04:26"} +{"current_steps": 30, "total_steps": 4396, "loss": 0.7323, "lr": 2.6363636363636364e-06, "epoch": 0.04780876494023904, "percentage": 0.68, "elapsed_time": "0:06:35", "remaining_time": "16:00:12"} +{"current_steps": 35, "total_steps": 4396, "loss": 0.7072, "lr": 3.090909090909091e-06, "epoch": 0.055776892430278883, "percentage": 0.8, "elapsed_time": "0:07:43", "remaining_time": "16:02:11"} +{"current_steps": 40, "total_steps": 4396, "loss": 0.6397, "lr": 3.5454545454545458e-06, "epoch": 0.06374501992031872, "percentage": 0.91, "elapsed_time": "0:08:48", "remaining_time": "15:59:45"} +{"current_steps": 45, "total_steps": 4396, "loss": 0.6265, "lr": 4.000000000000001e-06, "epoch": 0.07171314741035857, "percentage": 1.02, "elapsed_time": "0:09:52", "remaining_time": "15:54:24"} +{"current_steps": 50, "total_steps": 4396, "loss": 0.5906, "lr": 4.454545454545455e-06, "epoch": 0.0796812749003984, "percentage": 1.14, "elapsed_time": "0:10:54", "remaining_time": "15:48:30"} +{"current_steps": 55, "total_steps": 4396, "loss": 0.5989, "lr": 4.90909090909091e-06, "epoch": 0.08764940239043825, "percentage": 1.25, "elapsed_time": "0:12:00", "remaining_time": "15:48:15"} +{"current_steps": 60, "total_steps": 4396, "loss": 0.5821, "lr": 5.3636363636363645e-06, "epoch": 0.09561752988047809, "percentage": 1.36, "elapsed_time": "0:13:02", "remaining_time": "15:42:45"} +{"current_steps": 65, "total_steps": 4396, "loss": 0.5673, "lr": 5.8181818181818185e-06, "epoch": 0.10358565737051793, "percentage": 1.48, "elapsed_time": "0:14:11", "remaining_time": "15:45:55"} +{"current_steps": 70, "total_steps": 4396, "loss": 0.5388, "lr": 6.2727272727272734e-06, "epoch": 0.11155378486055777, "percentage": 1.59, "elapsed_time": "0:15:14", "remaining_time": "15:41:37"} +{"current_steps": 75, "total_steps": 4396, "loss": 0.5336, "lr": 6.7272727272727275e-06, "epoch": 0.11952191235059761, "percentage": 1.71, "elapsed_time": "0:16:21", "remaining_time": "15:42:37"} +{"current_steps": 80, "total_steps": 4396, "loss": 0.5445, "lr": 7.181818181818182e-06, "epoch": 0.12749003984063745, "percentage": 1.82, "elapsed_time": "0:17:22", "remaining_time": "15:37:33"} +{"current_steps": 85, "total_steps": 4396, "loss": 0.5108, "lr": 7.636363636363638e-06, "epoch": 0.13545816733067728, "percentage": 1.93, "elapsed_time": "0:18:24", "remaining_time": "15:33:34"} +{"current_steps": 90, "total_steps": 4396, "loss": 0.4959, "lr": 8.090909090909092e-06, "epoch": 0.14342629482071714, "percentage": 2.05, "elapsed_time": "0:19:28", "remaining_time": "15:31:34"} +{"current_steps": 95, "total_steps": 4396, "loss": 0.4978, "lr": 8.545454545454546e-06, "epoch": 0.15139442231075698, "percentage": 2.16, "elapsed_time": "0:20:34", "remaining_time": "15:31:28"} +{"current_steps": 100, "total_steps": 4396, "loss": 0.4816, "lr": 9e-06, "epoch": 0.1593625498007968, "percentage": 2.27, "elapsed_time": "0:21:39", "remaining_time": "15:30:14"} +{"current_steps": 105, "total_steps": 4396, "loss": 0.4759, "lr": 9.454545454545456e-06, "epoch": 0.16733067729083664, "percentage": 2.39, "elapsed_time": "0:22:46", "remaining_time": "15:30:56"} +{"current_steps": 110, "total_steps": 4396, "loss": 0.4655, "lr": 9.90909090909091e-06, "epoch": 0.1752988047808765, "percentage": 2.5, "elapsed_time": "0:23:56", "remaining_time": "15:32:34"} +{"current_steps": 115, "total_steps": 4396, "loss": 0.4568, "lr": 1.0363636363636364e-05, "epoch": 0.18326693227091634, "percentage": 2.62, "elapsed_time": "0:24:59", "remaining_time": "15:30:19"} +{"current_steps": 120, "total_steps": 4396, "loss": 0.4528, "lr": 1.0818181818181818e-05, "epoch": 0.19123505976095617, "percentage": 2.73, "elapsed_time": "0:26:07", "remaining_time": "15:31:03"} +{"current_steps": 125, "total_steps": 4396, "loss": 0.4818, "lr": 1.1272727272727272e-05, "epoch": 0.199203187250996, "percentage": 2.84, "elapsed_time": "0:27:15", "remaining_time": "15:31:32"} +{"current_steps": 130, "total_steps": 4396, "loss": 0.4572, "lr": 1.1727272727272728e-05, "epoch": 0.20717131474103587, "percentage": 2.96, "elapsed_time": "0:28:20", "remaining_time": "15:29:59"} +{"current_steps": 135, "total_steps": 4396, "loss": 0.4459, "lr": 1.2181818181818184e-05, "epoch": 0.2151394422310757, "percentage": 3.07, "elapsed_time": "0:29:26", "remaining_time": "15:29:10"} +{"current_steps": 140, "total_steps": 4396, "loss": 0.4371, "lr": 1.2636363636363638e-05, "epoch": 0.22310756972111553, "percentage": 3.18, "elapsed_time": "0:30:31", "remaining_time": "15:28:09"} +{"current_steps": 145, "total_steps": 4396, "loss": 0.4506, "lr": 1.3090909090909092e-05, "epoch": 0.23107569721115537, "percentage": 3.3, "elapsed_time": "0:31:36", "remaining_time": "15:26:28"} +{"current_steps": 150, "total_steps": 4396, "loss": 0.4451, "lr": 1.3545454545454546e-05, "epoch": 0.23904382470119523, "percentage": 3.41, "elapsed_time": "0:32:38", "remaining_time": "15:24:11"} +{"current_steps": 155, "total_steps": 4396, "loss": 0.444, "lr": 1.4e-05, "epoch": 0.24701195219123506, "percentage": 3.53, "elapsed_time": "0:33:39", "remaining_time": "15:20:56"} +{"current_steps": 160, "total_steps": 4396, "loss": 0.4459, "lr": 1.4454545454545457e-05, "epoch": 0.2549800796812749, "percentage": 3.64, "elapsed_time": "0:34:46", "remaining_time": "15:20:27"} +{"current_steps": 165, "total_steps": 4396, "loss": 0.4336, "lr": 1.4909090909090911e-05, "epoch": 0.26294820717131473, "percentage": 3.75, "elapsed_time": "0:35:50", "remaining_time": "15:19:11"} +{"current_steps": 170, "total_steps": 4396, "loss": 0.4242, "lr": 1.5363636363636365e-05, "epoch": 0.27091633466135456, "percentage": 3.87, "elapsed_time": "0:36:48", "remaining_time": "15:15:03"} +{"current_steps": 175, "total_steps": 4396, "loss": 0.442, "lr": 1.5818181818181818e-05, "epoch": 0.2788844621513944, "percentage": 3.98, "elapsed_time": "0:37:49", "remaining_time": "15:12:27"} +{"current_steps": 180, "total_steps": 4396, "loss": 0.4318, "lr": 1.6272727272727273e-05, "epoch": 0.2868525896414343, "percentage": 4.09, "elapsed_time": "0:38:56", "remaining_time": "15:12:01"} +{"current_steps": 185, "total_steps": 4396, "loss": 0.4262, "lr": 1.672727272727273e-05, "epoch": 0.2948207171314741, "percentage": 4.21, "elapsed_time": "0:40:00", "remaining_time": "15:10:51"} +{"current_steps": 190, "total_steps": 4396, "loss": 0.411, "lr": 1.7181818181818185e-05, "epoch": 0.30278884462151395, "percentage": 4.32, "elapsed_time": "0:41:04", "remaining_time": "15:09:11"} +{"current_steps": 195, "total_steps": 4396, "loss": 0.4189, "lr": 1.7636363636363637e-05, "epoch": 0.3107569721115538, "percentage": 4.44, "elapsed_time": "0:41:58", "remaining_time": "15:04:24"} +{"current_steps": 200, "total_steps": 4396, "loss": 0.4154, "lr": 1.8090909090909093e-05, "epoch": 0.3187250996015936, "percentage": 4.55, "elapsed_time": "0:43:06", "remaining_time": "15:04:17"} +{"current_steps": 205, "total_steps": 4396, "loss": 0.4171, "lr": 1.8545454545454545e-05, "epoch": 0.32669322709163345, "percentage": 4.66, "elapsed_time": "0:44:11", "remaining_time": "15:03:24"} +{"current_steps": 210, "total_steps": 4396, "loss": 0.4273, "lr": 1.9e-05, "epoch": 0.3346613545816733, "percentage": 4.78, "elapsed_time": "0:45:15", "remaining_time": "15:02:01"} +{"current_steps": 215, "total_steps": 4396, "loss": 0.4045, "lr": 1.9454545454545457e-05, "epoch": 0.3426294820717131, "percentage": 4.89, "elapsed_time": "0:46:15", "remaining_time": "14:59:33"} +{"current_steps": 220, "total_steps": 4396, "loss": 0.412, "lr": 1.9909090909090913e-05, "epoch": 0.350597609561753, "percentage": 5.0, "elapsed_time": "0:47:15", "remaining_time": "14:56:57"} +{"current_steps": 225, "total_steps": 4396, "loss": 0.4096, "lr": 2.0363636363636365e-05, "epoch": 0.35856573705179284, "percentage": 5.12, "elapsed_time": "0:48:13", "remaining_time": "14:53:58"} +{"current_steps": 230, "total_steps": 4396, "loss": 0.3877, "lr": 2.081818181818182e-05, "epoch": 0.3665338645418327, "percentage": 5.23, "elapsed_time": "0:49:16", "remaining_time": "14:52:26"} +{"current_steps": 235, "total_steps": 4396, "loss": 0.391, "lr": 2.1272727272727276e-05, "epoch": 0.3745019920318725, "percentage": 5.35, "elapsed_time": "0:50:12", "remaining_time": "14:49:09"} +{"current_steps": 240, "total_steps": 4396, "loss": 0.404, "lr": 2.172727272727273e-05, "epoch": 0.38247011952191234, "percentage": 5.46, "elapsed_time": "0:51:17", "remaining_time": "14:48:11"} +{"current_steps": 245, "total_steps": 4396, "loss": 0.3992, "lr": 2.2181818181818184e-05, "epoch": 0.3904382470119522, "percentage": 5.57, "elapsed_time": "0:52:23", "remaining_time": "14:47:33"} +{"current_steps": 250, "total_steps": 4396, "loss": 0.3973, "lr": 2.263636363636364e-05, "epoch": 0.398406374501992, "percentage": 5.69, "elapsed_time": "0:53:18", "remaining_time": "14:44:02"} +{"current_steps": 255, "total_steps": 4396, "loss": 0.4015, "lr": 2.3090909090909093e-05, "epoch": 0.4063745019920319, "percentage": 5.8, "elapsed_time": "0:54:19", "remaining_time": "14:42:14"} +{"current_steps": 260, "total_steps": 4396, "loss": 0.3969, "lr": 2.3545454545454548e-05, "epoch": 0.41434262948207173, "percentage": 5.91, "elapsed_time": "0:55:22", "remaining_time": "14:40:55"} +{"current_steps": 265, "total_steps": 4396, "loss": 0.3869, "lr": 2.4e-05, "epoch": 0.42231075697211157, "percentage": 6.03, "elapsed_time": "0:56:26", "remaining_time": "14:39:54"} +{"current_steps": 270, "total_steps": 4396, "loss": 0.4012, "lr": 2.4454545454545456e-05, "epoch": 0.4302788844621514, "percentage": 6.14, "elapsed_time": "0:57:33", "remaining_time": "14:39:32"} +{"current_steps": 275, "total_steps": 4396, "loss": 0.3839, "lr": 2.490909090909091e-05, "epoch": 0.43824701195219123, "percentage": 6.26, "elapsed_time": "0:58:34", "remaining_time": "14:37:49"} +{"current_steps": 280, "total_steps": 4396, "loss": 0.3901, "lr": 2.5363636363636364e-05, "epoch": 0.44621513944223107, "percentage": 6.37, "elapsed_time": "0:59:35", "remaining_time": "14:35:55"} +{"current_steps": 285, "total_steps": 4396, "loss": 0.3953, "lr": 2.5818181818181824e-05, "epoch": 0.4541832669322709, "percentage": 6.48, "elapsed_time": "1:00:36", "remaining_time": "14:34:20"} +{"current_steps": 290, "total_steps": 4396, "loss": 0.3651, "lr": 2.6272727272727276e-05, "epoch": 0.46215139442231074, "percentage": 6.6, "elapsed_time": "1:01:37", "remaining_time": "14:32:26"} +{"current_steps": 295, "total_steps": 4396, "loss": 0.3881, "lr": 2.672727272727273e-05, "epoch": 0.4701195219123506, "percentage": 6.71, "elapsed_time": "1:02:43", "remaining_time": "14:31:52"} +{"current_steps": 300, "total_steps": 4396, "loss": 0.3817, "lr": 2.7181818181818184e-05, "epoch": 0.47808764940239046, "percentage": 6.82, "elapsed_time": "1:03:48", "remaining_time": "14:31:09"} +{"current_steps": 305, "total_steps": 4396, "loss": 0.3805, "lr": 2.763636363636364e-05, "epoch": 0.4860557768924303, "percentage": 6.94, "elapsed_time": "1:04:58", "remaining_time": "14:31:37"} +{"current_steps": 310, "total_steps": 4396, "loss": 0.3925, "lr": 2.8090909090909092e-05, "epoch": 0.4940239043824701, "percentage": 7.05, "elapsed_time": "1:05:56", "remaining_time": "14:29:09"} +{"current_steps": 315, "total_steps": 4396, "loss": 0.3891, "lr": 2.8545454545454548e-05, "epoch": 0.50199203187251, "percentage": 7.17, "elapsed_time": "1:07:02", "remaining_time": "14:28:29"} +{"current_steps": 320, "total_steps": 4396, "loss": 0.379, "lr": 2.9e-05, "epoch": 0.5099601593625498, "percentage": 7.28, "elapsed_time": "1:08:05", "remaining_time": "14:27:15"} +{"current_steps": 325, "total_steps": 4396, "loss": 0.3856, "lr": 2.9454545454545456e-05, "epoch": 0.5179282868525896, "percentage": 7.39, "elapsed_time": "1:09:14", "remaining_time": "14:27:23"} +{"current_steps": 330, "total_steps": 4396, "loss": 0.3615, "lr": 2.9909090909090908e-05, "epoch": 0.5258964143426295, "percentage": 7.51, "elapsed_time": "1:10:16", "remaining_time": "14:25:56"} +{"current_steps": 335, "total_steps": 4396, "loss": 0.3939, "lr": 3.0363636363636364e-05, "epoch": 0.5338645418326693, "percentage": 7.62, "elapsed_time": "1:11:22", "remaining_time": "14:25:11"} +{"current_steps": 340, "total_steps": 4396, "loss": 0.3795, "lr": 3.081818181818182e-05, "epoch": 0.5418326693227091, "percentage": 7.73, "elapsed_time": "1:12:27", "remaining_time": "14:24:18"} +{"current_steps": 345, "total_steps": 4396, "loss": 0.3721, "lr": 3.127272727272728e-05, "epoch": 0.549800796812749, "percentage": 7.85, "elapsed_time": "1:13:27", "remaining_time": "14:22:35"} +{"current_steps": 350, "total_steps": 4396, "loss": 0.3843, "lr": 3.172727272727273e-05, "epoch": 0.5577689243027888, "percentage": 7.96, "elapsed_time": "1:14:32", "remaining_time": "14:21:36"} +{"current_steps": 355, "total_steps": 4396, "loss": 0.3659, "lr": 3.2181818181818184e-05, "epoch": 0.5657370517928287, "percentage": 8.08, "elapsed_time": "1:15:32", "remaining_time": "14:19:50"} +{"current_steps": 360, "total_steps": 4396, "loss": 0.3748, "lr": 3.263636363636364e-05, "epoch": 0.5737051792828686, "percentage": 8.19, "elapsed_time": "1:16:39", "remaining_time": "14:19:24"} +{"current_steps": 365, "total_steps": 4396, "loss": 0.3643, "lr": 3.3090909090909095e-05, "epoch": 0.5816733067729084, "percentage": 8.3, "elapsed_time": "1:17:39", "remaining_time": "14:17:42"} +{"current_steps": 370, "total_steps": 4396, "loss": 0.3795, "lr": 3.354545454545455e-05, "epoch": 0.5896414342629482, "percentage": 8.42, "elapsed_time": "1:18:46", "remaining_time": "14:17:07"} +{"current_steps": 375, "total_steps": 4396, "loss": 0.3749, "lr": 3.4e-05, "epoch": 0.5976095617529881, "percentage": 8.53, "elapsed_time": "1:19:54", "remaining_time": "14:16:53"} +{"current_steps": 380, "total_steps": 4396, "loss": 0.3898, "lr": 3.4454545454545455e-05, "epoch": 0.6055776892430279, "percentage": 8.64, "elapsed_time": "1:20:54", "remaining_time": "14:15:04"} +{"current_steps": 385, "total_steps": 4396, "loss": 0.377, "lr": 3.490909090909091e-05, "epoch": 0.6135458167330677, "percentage": 8.76, "elapsed_time": "1:21:53", "remaining_time": "14:13:06"} +{"current_steps": 390, "total_steps": 4396, "loss": 0.3712, "lr": 3.536363636363637e-05, "epoch": 0.6215139442231076, "percentage": 8.87, "elapsed_time": "1:22:56", "remaining_time": "14:11:56"} +{"current_steps": 395, "total_steps": 4396, "loss": 0.3688, "lr": 3.581818181818182e-05, "epoch": 0.6294820717131474, "percentage": 8.99, "elapsed_time": "1:24:01", "remaining_time": "14:11:09"} +{"current_steps": 400, "total_steps": 4396, "loss": 0.3631, "lr": 3.627272727272728e-05, "epoch": 0.6374501992031872, "percentage": 9.1, "elapsed_time": "1:25:01", "remaining_time": "14:09:20"} +{"current_steps": 405, "total_steps": 4396, "loss": 0.362, "lr": 3.6727272727272734e-05, "epoch": 0.6454183266932271, "percentage": 9.21, "elapsed_time": "1:26:07", "remaining_time": "14:08:47"} +{"current_steps": 410, "total_steps": 4396, "loss": 0.3651, "lr": 3.718181818181818e-05, "epoch": 0.6533864541832669, "percentage": 9.33, "elapsed_time": "1:27:11", "remaining_time": "14:07:43"} +{"current_steps": 415, "total_steps": 4396, "loss": 0.3653, "lr": 3.763636363636364e-05, "epoch": 0.6613545816733067, "percentage": 9.44, "elapsed_time": "1:28:18", "remaining_time": "14:07:02"} +{"current_steps": 420, "total_steps": 4396, "loss": 0.3688, "lr": 3.8090909090909095e-05, "epoch": 0.6693227091633466, "percentage": 9.55, "elapsed_time": "1:29:17", "remaining_time": "14:05:17"} +{"current_steps": 425, "total_steps": 4396, "loss": 0.381, "lr": 3.854545454545455e-05, "epoch": 0.6772908366533864, "percentage": 9.67, "elapsed_time": "1:30:24", "remaining_time": "14:04:41"} +{"current_steps": 430, "total_steps": 4396, "loss": 0.3786, "lr": 3.9e-05, "epoch": 0.6852589641434262, "percentage": 9.78, "elapsed_time": "1:31:22", "remaining_time": "14:02:49"} +{"current_steps": 435, "total_steps": 4396, "loss": 0.3839, "lr": 3.9454545454545455e-05, "epoch": 0.6932270916334662, "percentage": 9.9, "elapsed_time": "1:32:23", "remaining_time": "14:01:13"} +{"current_steps": 440, "total_steps": 4396, "loss": 0.3629, "lr": 3.990909090909091e-05, "epoch": 0.701195219123506, "percentage": 10.01, "elapsed_time": "1:33:22", "remaining_time": "13:59:33"} +{"current_steps": 445, "total_steps": 4396, "loss": 0.3743, "lr": 3.999989909636843e-05, "epoch": 0.7091633466135459, "percentage": 10.12, "elapsed_time": "1:34:23", "remaining_time": "13:58:07"} +{"current_steps": 450, "total_steps": 4396, "loss": 0.3685, "lr": 3.999948917711013e-05, "epoch": 0.7171314741035857, "percentage": 10.24, "elapsed_time": "1:35:30", "remaining_time": "13:57:34"} +{"current_steps": 455, "total_steps": 4396, "loss": 0.3507, "lr": 3.999876394220603e-05, "epoch": 0.7250996015936255, "percentage": 10.35, "elapsed_time": "1:36:33", "remaining_time": "13:56:17"} +{"current_steps": 460, "total_steps": 4396, "loss": 0.3623, "lr": 3.999772340309031e-05, "epoch": 0.7330677290836654, "percentage": 10.46, "elapsed_time": "1:37:33", "remaining_time": "13:54:44"} +{"current_steps": 465, "total_steps": 4396, "loss": 0.3683, "lr": 3.999636757616831e-05, "epoch": 0.7410358565737052, "percentage": 10.58, "elapsed_time": "1:38:34", "remaining_time": "13:53:17"} +{"current_steps": 470, "total_steps": 4396, "loss": 0.3583, "lr": 3.999469648281624e-05, "epoch": 0.749003984063745, "percentage": 10.69, "elapsed_time": "1:39:33", "remaining_time": "13:51:40"} +{"current_steps": 475, "total_steps": 4396, "loss": 0.359, "lr": 3.9992710149380875e-05, "epoch": 0.7569721115537849, "percentage": 10.81, "elapsed_time": "1:40:32", "remaining_time": "13:49:54"} +{"current_steps": 480, "total_steps": 4396, "loss": 0.3519, "lr": 3.999040860717911e-05, "epoch": 0.7649402390438247, "percentage": 10.92, "elapsed_time": "1:41:38", "remaining_time": "13:49:11"} +{"current_steps": 485, "total_steps": 4396, "loss": 0.3414, "lr": 3.998779189249749e-05, "epoch": 0.7729083665338645, "percentage": 11.03, "elapsed_time": "1:42:38", "remaining_time": "13:47:40"} +{"current_steps": 490, "total_steps": 4396, "loss": 0.3545, "lr": 3.998486004659162e-05, "epoch": 0.7808764940239044, "percentage": 11.15, "elapsed_time": "1:43:41", "remaining_time": "13:46:34"} +{"current_steps": 495, "total_steps": 4396, "loss": 0.3568, "lr": 3.9981613115685516e-05, "epoch": 0.7888446215139442, "percentage": 11.26, "elapsed_time": "1:44:45", "remaining_time": "13:45:35"} +{"current_steps": 500, "total_steps": 4396, "loss": 0.3595, "lr": 3.9978051150970906e-05, "epoch": 0.796812749003984, "percentage": 11.37, "elapsed_time": "1:45:50", "remaining_time": "13:44:41"} +{"current_steps": 505, "total_steps": 4396, "loss": 0.3556, "lr": 3.9974174208606376e-05, "epoch": 0.8047808764940239, "percentage": 11.49, "elapsed_time": "1:46:48", "remaining_time": "13:42:53"} +{"current_steps": 510, "total_steps": 4396, "loss": 0.3581, "lr": 3.996998234971652e-05, "epoch": 0.8127490039840638, "percentage": 11.6, "elapsed_time": "1:47:48", "remaining_time": "13:41:30"} +{"current_steps": 515, "total_steps": 4396, "loss": 0.3566, "lr": 3.996547564039096e-05, "epoch": 0.8207171314741036, "percentage": 11.72, "elapsed_time": "1:48:47", "remaining_time": "13:39:52"} +{"current_steps": 520, "total_steps": 4396, "loss": 0.3563, "lr": 3.99606541516833e-05, "epoch": 0.8286852589641435, "percentage": 11.83, "elapsed_time": "1:49:45", "remaining_time": "13:38:06"} +{"current_steps": 525, "total_steps": 4396, "loss": 0.3571, "lr": 3.995551795961004e-05, "epoch": 0.8366533864541833, "percentage": 11.94, "elapsed_time": "1:50:48", "remaining_time": "13:37:05"} +{"current_steps": 530, "total_steps": 4396, "loss": 0.3663, "lr": 3.995006714514932e-05, "epoch": 0.8446215139442231, "percentage": 12.06, "elapsed_time": "1:51:55", "remaining_time": "13:36:23"} +{"current_steps": 535, "total_steps": 4396, "loss": 0.35, "lr": 3.99443017942397e-05, "epoch": 0.852589641434263, "percentage": 12.17, "elapsed_time": "1:52:56", "remaining_time": "13:35:07"} +{"current_steps": 540, "total_steps": 4396, "loss": 0.3734, "lr": 3.993822199777876e-05, "epoch": 0.8605577689243028, "percentage": 12.28, "elapsed_time": "1:54:01", "remaining_time": "13:34:10"} +{"current_steps": 545, "total_steps": 4396, "loss": 0.3531, "lr": 3.9931827851621694e-05, "epoch": 0.8685258964143426, "percentage": 12.4, "elapsed_time": "1:55:05", "remaining_time": "13:33:15"} +{"current_steps": 550, "total_steps": 4396, "loss": 0.3537, "lr": 3.99251194565798e-05, "epoch": 0.8764940239043825, "percentage": 12.51, "elapsed_time": "1:56:05", "remaining_time": "13:31:49"} +{"current_steps": 555, "total_steps": 4396, "loss": 0.3615, "lr": 3.991809691841888e-05, "epoch": 0.8844621513944223, "percentage": 12.63, "elapsed_time": "1:57:14", "remaining_time": "13:31:27"} +{"current_steps": 560, "total_steps": 4396, "loss": 0.3494, "lr": 3.9910760347857554e-05, "epoch": 0.8924302788844621, "percentage": 12.74, "elapsed_time": "1:58:09", "remaining_time": "13:29:20"} +{"current_steps": 565, "total_steps": 4396, "loss": 0.3542, "lr": 3.9903109860565565e-05, "epoch": 0.900398406374502, "percentage": 12.85, "elapsed_time": "1:59:19", "remaining_time": "13:29:05"} +{"current_steps": 570, "total_steps": 4396, "loss": 0.3596, "lr": 3.98951455771619e-05, "epoch": 0.9083665338645418, "percentage": 12.97, "elapsed_time": "2:00:23", "remaining_time": "13:28:05"} +{"current_steps": 575, "total_steps": 4396, "loss": 0.3469, "lr": 3.988686762321293e-05, "epoch": 0.9163346613545816, "percentage": 13.08, "elapsed_time": "2:01:29", "remaining_time": "13:27:20"} +{"current_steps": 580, "total_steps": 4396, "loss": 0.3489, "lr": 3.987827612923041e-05, "epoch": 0.9243027888446215, "percentage": 13.19, "elapsed_time": "2:02:27", "remaining_time": "13:25:43"} +{"current_steps": 585, "total_steps": 4396, "loss": 0.36, "lr": 3.986937123066942e-05, "epoch": 0.9322709163346613, "percentage": 13.31, "elapsed_time": "2:03:32", "remaining_time": "13:24:50"} +{"current_steps": 590, "total_steps": 4396, "loss": 0.3473, "lr": 3.9860153067926235e-05, "epoch": 0.9402390438247012, "percentage": 13.42, "elapsed_time": "2:04:33", "remaining_time": "13:23:32"} +{"current_steps": 595, "total_steps": 4396, "loss": 0.3475, "lr": 3.985062178633612e-05, "epoch": 0.9482071713147411, "percentage": 13.54, "elapsed_time": "2:05:30", "remaining_time": "13:21:44"} +{"current_steps": 600, "total_steps": 4396, "loss": 0.347, "lr": 3.9840777536171026e-05, "epoch": 0.9561752988047809, "percentage": 13.65, "elapsed_time": "2:06:30", "remaining_time": "13:20:19"} +{"current_steps": 605, "total_steps": 4396, "loss": 0.3479, "lr": 3.9830620472637214e-05, "epoch": 0.9641434262948207, "percentage": 13.76, "elapsed_time": "2:07:37", "remaining_time": "13:19:42"} +{"current_steps": 610, "total_steps": 4396, "loss": 0.3457, "lr": 3.982015075587283e-05, "epoch": 0.9721115537848606, "percentage": 13.88, "elapsed_time": "2:08:43", "remaining_time": "13:18:53"} +{"current_steps": 615, "total_steps": 4396, "loss": 0.3467, "lr": 3.980936855094537e-05, "epoch": 0.9800796812749004, "percentage": 13.99, "elapsed_time": "2:09:44", "remaining_time": "13:17:41"} +{"current_steps": 620, "total_steps": 4396, "loss": 0.3748, "lr": 3.979827402784906e-05, "epoch": 0.9880478087649402, "percentage": 14.1, "elapsed_time": "2:10:44", "remaining_time": "13:16:13"} +{"current_steps": 625, "total_steps": 4396, "loss": 0.34, "lr": 3.978686736150221e-05, "epoch": 0.9960159362549801, "percentage": 14.22, "elapsed_time": "2:11:48", "remaining_time": "13:15:15"} +{"current_steps": 630, "total_steps": 4396, "loss": 0.3361, "lr": 3.977514873174443e-05, "epoch": 1.003187250996016, "percentage": 14.33, "elapsed_time": "2:12:42", "remaining_time": "13:13:19"} +{"current_steps": 635, "total_steps": 4396, "loss": 0.3355, "lr": 3.976311832333381e-05, "epoch": 1.0111553784860559, "percentage": 14.44, "elapsed_time": "2:13:54", "remaining_time": "13:13:07"} +{"current_steps": 640, "total_steps": 4396, "loss": 0.3278, "lr": 3.9750776325943984e-05, "epoch": 1.0191235059760957, "percentage": 14.56, "elapsed_time": "2:14:54", "remaining_time": "13:11:44"} +{"current_steps": 645, "total_steps": 4396, "loss": 0.3331, "lr": 3.9738122934161174e-05, "epoch": 1.0270916334661355, "percentage": 14.67, "elapsed_time": "2:15:59", "remaining_time": "13:10:53"} +{"current_steps": 650, "total_steps": 4396, "loss": 0.3482, "lr": 3.97251583474811e-05, "epoch": 1.0350597609561754, "percentage": 14.79, "elapsed_time": "2:17:04", "remaining_time": "13:09:58"} +{"current_steps": 655, "total_steps": 4396, "loss": 0.3288, "lr": 3.971188277030582e-05, "epoch": 1.0430278884462152, "percentage": 14.9, "elapsed_time": "2:18:06", "remaining_time": "13:08:49"} +{"current_steps": 660, "total_steps": 4396, "loss": 0.3308, "lr": 3.969829641194055e-05, "epoch": 1.050996015936255, "percentage": 15.01, "elapsed_time": "2:19:04", "remaining_time": "13:07:17"} +{"current_steps": 665, "total_steps": 4396, "loss": 0.3308, "lr": 3.968439948659033e-05, "epoch": 1.0589641434262949, "percentage": 15.13, "elapsed_time": "2:20:08", "remaining_time": "13:06:13"} +{"current_steps": 670, "total_steps": 4396, "loss": 0.3448, "lr": 3.967019221335664e-05, "epoch": 1.0669322709163347, "percentage": 15.24, "elapsed_time": "2:21:13", "remaining_time": "13:05:22"} +{"current_steps": 675, "total_steps": 4396, "loss": 0.3406, "lr": 3.965567481623399e-05, "epoch": 1.0749003984063745, "percentage": 15.35, "elapsed_time": "2:22:10", "remaining_time": "13:03:47"} +{"current_steps": 680, "total_steps": 4396, "loss": 0.3406, "lr": 3.9640847524106356e-05, "epoch": 1.0828685258964144, "percentage": 15.47, "elapsed_time": "2:23:09", "remaining_time": "13:02:19"} +{"current_steps": 685, "total_steps": 4396, "loss": 0.3398, "lr": 3.9625710570743556e-05, "epoch": 1.0908366533864542, "percentage": 15.58, "elapsed_time": "2:24:12", "remaining_time": "13:01:12"} +{"current_steps": 690, "total_steps": 4396, "loss": 0.3196, "lr": 3.96102641947976e-05, "epoch": 1.098804780876494, "percentage": 15.7, "elapsed_time": "2:25:06", "remaining_time": "12:59:23"} +{"current_steps": 695, "total_steps": 4396, "loss": 0.3415, "lr": 3.959450863979891e-05, "epoch": 1.1067729083665339, "percentage": 15.81, "elapsed_time": "2:26:07", "remaining_time": "12:58:10"} +{"current_steps": 700, "total_steps": 4396, "loss": 0.3347, "lr": 3.957844415415248e-05, "epoch": 1.1147410358565737, "percentage": 15.92, "elapsed_time": "2:27:06", "remaining_time": "12:56:44"} +{"current_steps": 705, "total_steps": 4396, "loss": 0.3351, "lr": 3.956207099113396e-05, "epoch": 1.1227091633466135, "percentage": 16.04, "elapsed_time": "2:28:10", "remaining_time": "12:55:47"} +{"current_steps": 710, "total_steps": 4396, "loss": 0.3385, "lr": 3.954538940888567e-05, "epoch": 1.1306772908366534, "percentage": 16.15, "elapsed_time": "2:29:12", "remaining_time": "12:54:36"} +{"current_steps": 715, "total_steps": 4396, "loss": 0.3539, "lr": 3.952839967041252e-05, "epoch": 1.1386454183266932, "percentage": 16.26, "elapsed_time": "2:30:18", "remaining_time": "12:53:49"} +{"current_steps": 720, "total_steps": 4396, "loss": 0.3308, "lr": 3.951110204357787e-05, "epoch": 1.146613545816733, "percentage": 16.38, "elapsed_time": "2:31:23", "remaining_time": "12:52:53"} +{"current_steps": 725, "total_steps": 4396, "loss": 0.3379, "lr": 3.9493496801099306e-05, "epoch": 1.1545816733067729, "percentage": 16.49, "elapsed_time": "2:32:20", "remaining_time": "12:51:24"} +{"current_steps": 730, "total_steps": 4396, "loss": 0.3349, "lr": 3.9475584220544335e-05, "epoch": 1.1625498007968127, "percentage": 16.61, "elapsed_time": "2:33:26", "remaining_time": "12:50:35"} +{"current_steps": 735, "total_steps": 4396, "loss": 0.3348, "lr": 3.9457364584326005e-05, "epoch": 1.1705179282868525, "percentage": 16.72, "elapsed_time": "2:34:29", "remaining_time": "12:49:28"} +{"current_steps": 740, "total_steps": 4396, "loss": 0.3388, "lr": 3.94388381796985e-05, "epoch": 1.1784860557768924, "percentage": 16.83, "elapsed_time": "2:35:32", "remaining_time": "12:48:28"} +{"current_steps": 745, "total_steps": 4396, "loss": 0.3393, "lr": 3.942000529875251e-05, "epoch": 1.1864541832669322, "percentage": 16.95, "elapsed_time": "2:36:31", "remaining_time": "12:47:06"} +{"current_steps": 750, "total_steps": 4396, "loss": 0.3442, "lr": 3.9400866238410736e-05, "epoch": 1.194422310756972, "percentage": 17.06, "elapsed_time": "2:37:37", "remaining_time": "12:46:16"} +{"current_steps": 755, "total_steps": 4396, "loss": 0.3316, "lr": 3.9381421300423145e-05, "epoch": 1.2023904382470119, "percentage": 17.17, "elapsed_time": "2:38:38", "remaining_time": "12:45:04"} +{"current_steps": 760, "total_steps": 4396, "loss": 0.3459, "lr": 3.936167079136222e-05, "epoch": 1.2103585657370517, "percentage": 17.29, "elapsed_time": "2:39:38", "remaining_time": "12:43:45"} +{"current_steps": 765, "total_steps": 4396, "loss": 0.3378, "lr": 3.934161502261814e-05, "epoch": 1.2183266932270915, "percentage": 17.4, "elapsed_time": "2:40:44", "remaining_time": "12:42:58"} +{"current_steps": 770, "total_steps": 4396, "loss": 0.3412, "lr": 3.932125431039387e-05, "epoch": 1.2262948207171314, "percentage": 17.52, "elapsed_time": "2:41:41", "remaining_time": "12:41:24"} +{"current_steps": 775, "total_steps": 4396, "loss": 0.3331, "lr": 3.930058897570016e-05, "epoch": 1.2342629482071712, "percentage": 17.63, "elapsed_time": "2:42:38", "remaining_time": "12:39:56"} +{"current_steps": 780, "total_steps": 4396, "loss": 0.3337, "lr": 3.9279619344350505e-05, "epoch": 1.2422310756972113, "percentage": 17.74, "elapsed_time": "2:43:39", "remaining_time": "12:38:43"} +{"current_steps": 785, "total_steps": 4396, "loss": 0.3295, "lr": 3.925834574695599e-05, "epoch": 1.2501992031872509, "percentage": 17.86, "elapsed_time": "2:44:37", "remaining_time": "12:37:17"} +{"current_steps": 790, "total_steps": 4396, "loss": 0.3413, "lr": 3.923676851892008e-05, "epoch": 1.258167330677291, "percentage": 17.97, "elapsed_time": "2:45:43", "remaining_time": "12:36:28"} +{"current_steps": 795, "total_steps": 4396, "loss": 0.3236, "lr": 3.921488800043335e-05, "epoch": 1.2661354581673308, "percentage": 18.08, "elapsed_time": "2:46:48", "remaining_time": "12:35:33"} +{"current_steps": 800, "total_steps": 4396, "loss": 0.3446, "lr": 3.9192704536468106e-05, "epoch": 1.2741035856573706, "percentage": 18.2, "elapsed_time": "2:47:51", "remaining_time": "12:34:29"} +{"current_steps": 805, "total_steps": 4396, "loss": 0.3549, "lr": 3.917021847677294e-05, "epoch": 1.2820717131474104, "percentage": 18.31, "elapsed_time": "2:48:53", "remaining_time": "12:33:24"} +{"current_steps": 810, "total_steps": 4396, "loss": 0.3528, "lr": 3.914743017586722e-05, "epoch": 1.2900398406374503, "percentage": 18.43, "elapsed_time": "2:49:56", "remaining_time": "12:32:19"} +{"current_steps": 815, "total_steps": 4396, "loss": 0.3281, "lr": 3.912433999303552e-05, "epoch": 1.29800796812749, "percentage": 18.54, "elapsed_time": "2:50:55", "remaining_time": "12:31:02"} +{"current_steps": 820, "total_steps": 4396, "loss": 0.3313, "lr": 3.910094829232194e-05, "epoch": 1.30597609561753, "percentage": 18.65, "elapsed_time": "2:51:50", "remaining_time": "12:29:22"} +{"current_steps": 825, "total_steps": 4396, "loss": 0.3257, "lr": 3.907725544252436e-05, "epoch": 1.3139442231075698, "percentage": 18.77, "elapsed_time": "2:52:53", "remaining_time": "12:28:22"} +{"current_steps": 830, "total_steps": 4396, "loss": 0.3208, "lr": 3.905326181718862e-05, "epoch": 1.3219123505976096, "percentage": 18.88, "elapsed_time": "2:53:51", "remaining_time": "12:26:58"} +{"current_steps": 835, "total_steps": 4396, "loss": 0.3266, "lr": 3.902896779460266e-05, "epoch": 1.3298804780876494, "percentage": 18.99, "elapsed_time": "2:54:57", "remaining_time": "12:26:07"} +{"current_steps": 840, "total_steps": 4396, "loss": 0.3316, "lr": 3.900437375779055e-05, "epoch": 1.3378486055776893, "percentage": 19.11, "elapsed_time": "2:55:57", "remaining_time": "12:24:52"} +{"current_steps": 845, "total_steps": 4396, "loss": 0.3406, "lr": 3.8979480094506394e-05, "epoch": 1.345816733067729, "percentage": 19.22, "elapsed_time": "2:56:59", "remaining_time": "12:23:46"} +{"current_steps": 850, "total_steps": 4396, "loss": 0.3255, "lr": 3.895428719722832e-05, "epoch": 1.353784860557769, "percentage": 19.34, "elapsed_time": "2:58:08", "remaining_time": "12:23:08"} +{"current_steps": 855, "total_steps": 4396, "loss": 0.3476, "lr": 3.8928795463152186e-05, "epoch": 1.3617529880478088, "percentage": 19.45, "elapsed_time": "2:59:13", "remaining_time": "12:22:15"} +{"current_steps": 860, "total_steps": 4396, "loss": 0.3383, "lr": 3.89030052941854e-05, "epoch": 1.3697211155378486, "percentage": 19.56, "elapsed_time": "3:00:15", "remaining_time": "12:21:08"} +{"current_steps": 865, "total_steps": 4396, "loss": 0.3288, "lr": 3.8876917096940536e-05, "epoch": 1.3776892430278884, "percentage": 19.68, "elapsed_time": "3:01:17", "remaining_time": "12:20:02"} +{"current_steps": 870, "total_steps": 4396, "loss": 0.3441, "lr": 3.8850531282728934e-05, "epoch": 1.3856573705179283, "percentage": 19.79, "elapsed_time": "3:02:14", "remaining_time": "12:18:36"} +{"current_steps": 875, "total_steps": 4396, "loss": 0.3189, "lr": 3.882384826755422e-05, "epoch": 1.393625498007968, "percentage": 19.9, "elapsed_time": "3:03:18", "remaining_time": "12:17:39"} +{"current_steps": 880, "total_steps": 4396, "loss": 0.3197, "lr": 3.8796868472105746e-05, "epoch": 1.401593625498008, "percentage": 20.02, "elapsed_time": "3:04:24", "remaining_time": "12:16:48"} +{"current_steps": 885, "total_steps": 4396, "loss": 0.3194, "lr": 3.8769592321751964e-05, "epoch": 1.4095617529880478, "percentage": 20.13, "elapsed_time": "3:05:26", "remaining_time": "12:15:42"} +{"current_steps": 890, "total_steps": 4396, "loss": 0.3261, "lr": 3.87420202465337e-05, "epoch": 1.4175298804780876, "percentage": 20.25, "elapsed_time": "3:06:26", "remaining_time": "12:14:26"} +{"current_steps": 895, "total_steps": 4396, "loss": 0.3423, "lr": 3.871415268115739e-05, "epoch": 1.4254980079681274, "percentage": 20.36, "elapsed_time": "3:07:26", "remaining_time": "12:13:13"} +{"current_steps": 900, "total_steps": 4396, "loss": 0.3239, "lr": 3.868599006498823e-05, "epoch": 1.4334661354581673, "percentage": 20.47, "elapsed_time": "3:08:32", "remaining_time": "12:12:23"} +{"current_steps": 905, "total_steps": 4396, "loss": 0.3281, "lr": 3.865753284204324e-05, "epoch": 1.441434262948207, "percentage": 20.59, "elapsed_time": "3:09:35", "remaining_time": "12:11:19"} +{"current_steps": 910, "total_steps": 4396, "loss": 0.3301, "lr": 3.862878146098426e-05, "epoch": 1.4494023904382471, "percentage": 20.7, "elapsed_time": "3:10:28", "remaining_time": "12:09:38"} +{"current_steps": 915, "total_steps": 4396, "loss": 0.3258, "lr": 3.859973637511086e-05, "epoch": 1.457370517928287, "percentage": 20.81, "elapsed_time": "3:11:30", "remaining_time": "12:08:33"} +{"current_steps": 920, "total_steps": 4396, "loss": 0.3393, "lr": 3.857039804235327e-05, "epoch": 1.4653386454183268, "percentage": 20.93, "elapsed_time": "3:12:32", "remaining_time": "12:07:26"} +{"current_steps": 925, "total_steps": 4396, "loss": 0.3279, "lr": 3.854076692526505e-05, "epoch": 1.4733067729083666, "percentage": 21.04, "elapsed_time": "3:13:33", "remaining_time": "12:06:18"} +{"current_steps": 930, "total_steps": 4396, "loss": 0.337, "lr": 3.8510843491015874e-05, "epoch": 1.4812749003984065, "percentage": 21.16, "elapsed_time": "3:14:37", "remaining_time": "12:05:20"} +{"current_steps": 935, "total_steps": 4396, "loss": 0.3174, "lr": 3.848062821138415e-05, "epoch": 1.4892430278884463, "percentage": 21.27, "elapsed_time": "3:15:37", "remaining_time": "12:04:06"} +{"current_steps": 940, "total_steps": 4396, "loss": 0.3263, "lr": 3.8450121562749565e-05, "epoch": 1.4972111553784861, "percentage": 21.38, "elapsed_time": "3:16:38", "remaining_time": "12:02:57"} +{"current_steps": 945, "total_steps": 4396, "loss": 0.3337, "lr": 3.841932402608557e-05, "epoch": 1.505179282868526, "percentage": 21.5, "elapsed_time": "3:17:40", "remaining_time": "12:01:52"} +{"current_steps": 950, "total_steps": 4396, "loss": 0.3338, "lr": 3.838823608695185e-05, "epoch": 1.5131474103585658, "percentage": 21.61, "elapsed_time": "3:18:38", "remaining_time": "12:00:32"} +{"current_steps": 955, "total_steps": 4396, "loss": 0.3257, "lr": 3.835685823548659e-05, "epoch": 1.5211155378486056, "percentage": 21.72, "elapsed_time": "3:19:37", "remaining_time": "11:59:17"} +{"current_steps": 960, "total_steps": 4396, "loss": 0.321, "lr": 3.832519096639879e-05, "epoch": 1.5290836653386455, "percentage": 21.84, "elapsed_time": "3:20:43", "remaining_time": "11:58:26"} +{"current_steps": 965, "total_steps": 4396, "loss": 0.3384, "lr": 3.829323477896048e-05, "epoch": 1.5370517928286853, "percentage": 21.95, "elapsed_time": "3:21:46", "remaining_time": "11:57:23"} +{"current_steps": 970, "total_steps": 4396, "loss": 0.3268, "lr": 3.8260990176998835e-05, "epoch": 1.5450199203187251, "percentage": 22.07, "elapsed_time": "3:22:46", "remaining_time": "11:56:12"} +{"current_steps": 975, "total_steps": 4396, "loss": 0.3366, "lr": 3.82284576688882e-05, "epoch": 1.552988047808765, "percentage": 22.18, "elapsed_time": "3:23:47", "remaining_time": "11:55:01"} +{"current_steps": 980, "total_steps": 4396, "loss": 0.3312, "lr": 3.819563776754212e-05, "epoch": 1.5609561752988048, "percentage": 22.29, "elapsed_time": "3:24:51", "remaining_time": "11:54:04"} +{"current_steps": 985, "total_steps": 4396, "loss": 0.3297, "lr": 3.8162530990405235e-05, "epoch": 1.5689243027888446, "percentage": 22.41, "elapsed_time": "3:25:56", "remaining_time": "11:53:09"} +{"current_steps": 990, "total_steps": 4396, "loss": 0.3159, "lr": 3.8129137859445106e-05, "epoch": 1.5768924302788845, "percentage": 22.52, "elapsed_time": "3:26:52", "remaining_time": "11:51:44"} +{"current_steps": 995, "total_steps": 4396, "loss": 0.3195, "lr": 3.8095458901144014e-05, "epoch": 1.5848605577689243, "percentage": 22.63, "elapsed_time": "3:27:45", "remaining_time": "11:50:09"} +{"current_steps": 1000, "total_steps": 4396, "loss": 0.3337, "lr": 3.806149464649066e-05, "epoch": 1.5928286852589641, "percentage": 22.75, "elapsed_time": "3:28:40", "remaining_time": "11:48:41"} +{"current_steps": 1005, "total_steps": 4396, "loss": 0.3366, "lr": 3.802724563097175e-05, "epoch": 1.600796812749004, "percentage": 22.86, "elapsed_time": "3:29:39", "remaining_time": "11:47:25"} +{"current_steps": 1010, "total_steps": 4396, "loss": 0.3412, "lr": 3.7992712394563606e-05, "epoch": 1.6087649402390438, "percentage": 22.98, "elapsed_time": "3:30:42", "remaining_time": "11:46:22"} +{"current_steps": 1015, "total_steps": 4396, "loss": 0.3314, "lr": 3.795789548172362e-05, "epoch": 1.6167330677290837, "percentage": 23.09, "elapsed_time": "3:31:51", "remaining_time": "11:45:41"} +{"current_steps": 1020, "total_steps": 4396, "loss": 0.3316, "lr": 3.7922795441381674e-05, "epoch": 1.6247011952191235, "percentage": 23.2, "elapsed_time": "3:32:53", "remaining_time": "11:44:37"} +{"current_steps": 1025, "total_steps": 4396, "loss": 0.3335, "lr": 3.78874128269315e-05, "epoch": 1.6326693227091633, "percentage": 23.32, "elapsed_time": "3:33:53", "remaining_time": "11:43:25"} +{"current_steps": 1030, "total_steps": 4396, "loss": 0.3382, "lr": 3.785174819622195e-05, "epoch": 1.6406374501992032, "percentage": 23.43, "elapsed_time": "3:34:54", "remaining_time": "11:42:19"} +{"current_steps": 1035, "total_steps": 4396, "loss": 0.332, "lr": 3.7815802111548185e-05, "epoch": 1.648605577689243, "percentage": 23.54, "elapsed_time": "3:36:01", "remaining_time": "11:41:29"} +{"current_steps": 1040, "total_steps": 4396, "loss": 0.3392, "lr": 3.777957513964282e-05, "epoch": 1.6565737051792828, "percentage": 23.66, "elapsed_time": "3:37:03", "remaining_time": "11:40:24"} +{"current_steps": 1045, "total_steps": 4396, "loss": 0.3275, "lr": 3.7743067851666994e-05, "epoch": 1.6645418326693227, "percentage": 23.77, "elapsed_time": "3:38:06", "remaining_time": "11:39:24"} +{"current_steps": 1050, "total_steps": 4396, "loss": 0.326, "lr": 3.770628082320137e-05, "epoch": 1.6725099601593625, "percentage": 23.89, "elapsed_time": "3:39:13", "remaining_time": "11:38:35"} +{"current_steps": 1055, "total_steps": 4396, "loss": 0.3273, "lr": 3.766921463423704e-05, "epoch": 1.6804780876494023, "percentage": 24.0, "elapsed_time": "3:40:25", "remaining_time": "11:38:04"} +{"current_steps": 1060, "total_steps": 4396, "loss": 0.32, "lr": 3.7631869869166396e-05, "epoch": 1.6884462151394422, "percentage": 24.11, "elapsed_time": "3:41:28", "remaining_time": "11:37:02"} +{"current_steps": 1065, "total_steps": 4396, "loss": 0.325, "lr": 3.759424711677391e-05, "epoch": 1.696414342629482, "percentage": 24.23, "elapsed_time": "3:42:32", "remaining_time": "11:36:01"} +{"current_steps": 1070, "total_steps": 4396, "loss": 0.3268, "lr": 3.755634697022686e-05, "epoch": 1.7043824701195218, "percentage": 24.34, "elapsed_time": "3:43:36", "remaining_time": "11:35:02"} +{"current_steps": 1075, "total_steps": 4396, "loss": 0.3343, "lr": 3.751817002706596e-05, "epoch": 1.7123505976095617, "percentage": 24.45, "elapsed_time": "3:44:35", "remaining_time": "11:33:48"} +{"current_steps": 1080, "total_steps": 4396, "loss": 0.3208, "lr": 3.747971688919597e-05, "epoch": 1.7203187250996015, "percentage": 24.57, "elapsed_time": "3:45:29", "remaining_time": "11:32:19"} +{"current_steps": 1085, "total_steps": 4396, "loss": 0.3275, "lr": 3.744098816287616e-05, "epoch": 1.7282868525896413, "percentage": 24.68, "elapsed_time": "3:46:40", "remaining_time": "11:31:44"} +{"current_steps": 1090, "total_steps": 4396, "loss": 0.3316, "lr": 3.7401984458710796e-05, "epoch": 1.7362549800796812, "percentage": 24.8, "elapsed_time": "3:47:45", "remaining_time": "11:30:48"} +{"current_steps": 1095, "total_steps": 4396, "loss": 0.3268, "lr": 3.73627063916395e-05, "epoch": 1.744223107569721, "percentage": 24.91, "elapsed_time": "3:48:50", "remaining_time": "11:29:53"} +{"current_steps": 1100, "total_steps": 4396, "loss": 0.3236, "lr": 3.732315458092754e-05, "epoch": 1.7521912350597608, "percentage": 25.02, "elapsed_time": "3:49:53", "remaining_time": "11:28:51"} +{"current_steps": 1105, "total_steps": 4396, "loss": 0.3201, "lr": 3.728332965015608e-05, "epoch": 1.7601593625498007, "percentage": 25.14, "elapsed_time": "3:50:57", "remaining_time": "11:27:50"} +{"current_steps": 1110, "total_steps": 4396, "loss": 0.3285, "lr": 3.724323222721234e-05, "epoch": 1.7681274900398405, "percentage": 25.25, "elapsed_time": "3:51:56", "remaining_time": "11:26:39"} +{"current_steps": 1115, "total_steps": 4396, "loss": 0.3256, "lr": 3.720286294427972e-05, "epoch": 1.7760956175298803, "percentage": 25.36, "elapsed_time": "3:52:57", "remaining_time": "11:25:30"} +{"current_steps": 1120, "total_steps": 4396, "loss": 0.3173, "lr": 3.716222243782778e-05, "epoch": 1.7840637450199202, "percentage": 25.48, "elapsed_time": "3:53:58", "remaining_time": "11:24:22"} +{"current_steps": 1125, "total_steps": 4396, "loss": 0.3214, "lr": 3.712131134860229e-05, "epoch": 1.7920318725099602, "percentage": 25.59, "elapsed_time": "3:54:55", "remaining_time": "11:23:03"} +{"current_steps": 1130, "total_steps": 4396, "loss": 0.3344, "lr": 3.708013032161502e-05, "epoch": 1.8, "percentage": 25.71, "elapsed_time": "3:56:01", "remaining_time": "11:22:10"} +{"current_steps": 1135, "total_steps": 4396, "loss": 0.3229, "lr": 3.703868000613369e-05, "epoch": 1.8079681274900399, "percentage": 25.82, "elapsed_time": "3:57:02", "remaining_time": "11:21:03"} +{"current_steps": 1140, "total_steps": 4396, "loss": 0.3273, "lr": 3.6996961055671625e-05, "epoch": 1.8159362549800797, "percentage": 25.93, "elapsed_time": "3:58:06", "remaining_time": "11:20:02"} +{"current_steps": 1145, "total_steps": 4396, "loss": 0.3125, "lr": 3.695497412797751e-05, "epoch": 1.8239043824701195, "percentage": 26.05, "elapsed_time": "3:58:58", "remaining_time": "11:18:31"} +{"current_steps": 1150, "total_steps": 4396, "loss": 0.3268, "lr": 3.6912719885025026e-05, "epoch": 1.8318725099601594, "percentage": 26.16, "elapsed_time": "3:59:59", "remaining_time": "11:17:23"} +{"current_steps": 1155, "total_steps": 4396, "loss": 0.315, "lr": 3.687019899300238e-05, "epoch": 1.8398406374501992, "percentage": 26.27, "elapsed_time": "4:01:05", "remaining_time": "11:16:30"} +{"current_steps": 1160, "total_steps": 4396, "loss": 0.3357, "lr": 3.6827412122301805e-05, "epoch": 1.847808764940239, "percentage": 26.39, "elapsed_time": "4:02:04", "remaining_time": "11:15:19"} +{"current_steps": 1165, "total_steps": 4396, "loss": 0.3154, "lr": 3.678435994750905e-05, "epoch": 1.8557768924302789, "percentage": 26.5, "elapsed_time": "4:03:06", "remaining_time": "11:14:15"} +{"current_steps": 1170, "total_steps": 4396, "loss": 0.3124, "lr": 3.6741043147392634e-05, "epoch": 1.8637450199203187, "percentage": 26.62, "elapsed_time": "4:04:11", "remaining_time": "11:13:18"} +{"current_steps": 1175, "total_steps": 4396, "loss": 0.3229, "lr": 3.6697462404893257e-05, "epoch": 1.8717131474103585, "percentage": 26.73, "elapsed_time": "4:05:16", "remaining_time": "11:12:21"} +{"current_steps": 1180, "total_steps": 4396, "loss": 0.3231, "lr": 3.665361840711297e-05, "epoch": 1.8796812749003984, "percentage": 26.84, "elapsed_time": "4:06:20", "remaining_time": "11:11:24"} +{"current_steps": 1185, "total_steps": 4396, "loss": 0.3217, "lr": 3.660951184530434e-05, "epoch": 1.8876494023904382, "percentage": 26.96, "elapsed_time": "4:07:23", "remaining_time": "11:10:20"} +{"current_steps": 1190, "total_steps": 4396, "loss": 0.3111, "lr": 3.656514341485959e-05, "epoch": 1.895617529880478, "percentage": 27.07, "elapsed_time": "4:08:22", "remaining_time": "11:09:07"} +{"current_steps": 1195, "total_steps": 4396, "loss": 0.3153, "lr": 3.65205138152996e-05, "epoch": 1.9035856573705179, "percentage": 27.18, "elapsed_time": "4:09:27", "remaining_time": "11:08:12"} +{"current_steps": 1200, "total_steps": 4396, "loss": 0.3301, "lr": 3.647562375026289e-05, "epoch": 1.9115537848605577, "percentage": 27.3, "elapsed_time": "4:10:32", "remaining_time": "11:07:15"} +{"current_steps": 1205, "total_steps": 4396, "loss": 0.3288, "lr": 3.643047392749453e-05, "epoch": 1.9195219123505978, "percentage": 27.41, "elapsed_time": "4:11:34", "remaining_time": "11:06:13"} +{"current_steps": 1210, "total_steps": 4396, "loss": 0.333, "lr": 3.638506505883497e-05, "epoch": 1.9274900398406376, "percentage": 27.53, "elapsed_time": "4:12:44", "remaining_time": "11:05:28"} +{"current_steps": 1215, "total_steps": 4396, "loss": 0.3176, "lr": 3.633939786020884e-05, "epoch": 1.9354581673306774, "percentage": 27.64, "elapsed_time": "4:13:48", "remaining_time": "11:04:29"} +{"current_steps": 1220, "total_steps": 4396, "loss": 0.3162, "lr": 3.629347305161365e-05, "epoch": 1.9434262948207173, "percentage": 27.75, "elapsed_time": "4:14:48", "remaining_time": "11:03:19"} +{"current_steps": 1225, "total_steps": 4396, "loss": 0.3208, "lr": 3.62472913571084e-05, "epoch": 1.951394422310757, "percentage": 27.87, "elapsed_time": "4:15:48", "remaining_time": "11:02:10"} +{"current_steps": 1230, "total_steps": 4396, "loss": 0.3182, "lr": 3.620085350480226e-05, "epoch": 1.959362549800797, "percentage": 27.98, "elapsed_time": "4:16:48", "remaining_time": "11:01:01"} +{"current_steps": 1235, "total_steps": 4396, "loss": 0.319, "lr": 3.615416022684298e-05, "epoch": 1.9673306772908368, "percentage": 28.09, "elapsed_time": "4:17:49", "remaining_time": "10:59:53"} +{"current_steps": 1240, "total_steps": 4396, "loss": 0.3339, "lr": 3.610721225940542e-05, "epoch": 1.9752988047808766, "percentage": 28.21, "elapsed_time": "4:18:45", "remaining_time": "10:58:35"} +{"current_steps": 1245, "total_steps": 4396, "loss": 0.3208, "lr": 3.606001034267992e-05, "epoch": 1.9832669322709164, "percentage": 28.32, "elapsed_time": "4:19:46", "remaining_time": "10:57:29"} +{"current_steps": 1250, "total_steps": 4396, "loss": 0.3173, "lr": 3.6012555220860634e-05, "epoch": 1.9912350597609563, "percentage": 28.43, "elapsed_time": "4:20:44", "remaining_time": "10:56:15"} +{"current_steps": 1255, "total_steps": 4396, "loss": 0.3369, "lr": 3.596484764213379e-05, "epoch": 1.999203187250996, "percentage": 28.55, "elapsed_time": "4:21:54", "remaining_time": "10:55:30"} +{"current_steps": 1260, "total_steps": 4396, "loss": 0.2994, "lr": 3.591688835866589e-05, "epoch": 2.006374501992032, "percentage": 28.66, "elapsed_time": "4:22:54", "remaining_time": "10:54:21"} +{"current_steps": 1265, "total_steps": 4396, "loss": 0.2972, "lr": 3.5868678126591884e-05, "epoch": 2.014342629482072, "percentage": 28.78, "elapsed_time": "4:24:00", "remaining_time": "10:53:26"} +{"current_steps": 1270, "total_steps": 4396, "loss": 0.3038, "lr": 3.5820217706003194e-05, "epoch": 2.0223107569721117, "percentage": 28.89, "elapsed_time": "4:24:58", "remaining_time": "10:52:12"} +{"current_steps": 1275, "total_steps": 4396, "loss": 0.3117, "lr": 3.577150786093579e-05, "epoch": 2.0302788844621515, "percentage": 29.0, "elapsed_time": "4:25:58", "remaining_time": "10:51:04"} +{"current_steps": 1280, "total_steps": 4396, "loss": 0.3092, "lr": 3.572254935935808e-05, "epoch": 2.0382470119521914, "percentage": 29.12, "elapsed_time": "4:27:03", "remaining_time": "10:50:06"} +{"current_steps": 1285, "total_steps": 4396, "loss": 0.2966, "lr": 3.567334297315887e-05, "epoch": 2.046215139442231, "percentage": 29.23, "elapsed_time": "4:28:03", "remaining_time": "10:48:58"} +{"current_steps": 1290, "total_steps": 4396, "loss": 0.3171, "lr": 3.562388947813514e-05, "epoch": 2.054183266932271, "percentage": 29.34, "elapsed_time": "4:29:07", "remaining_time": "10:47:58"} +{"current_steps": 1295, "total_steps": 4396, "loss": 0.302, "lr": 3.557418965397985e-05, "epoch": 2.062151394422311, "percentage": 29.46, "elapsed_time": "4:30:05", "remaining_time": "10:46:46"} +{"current_steps": 1300, "total_steps": 4396, "loss": 0.3238, "lr": 3.552424428426962e-05, "epoch": 2.0701195219123507, "percentage": 29.57, "elapsed_time": "4:31:08", "remaining_time": "10:45:43"} +{"current_steps": 1305, "total_steps": 4396, "loss": 0.3265, "lr": 3.547405415645237e-05, "epoch": 2.0780876494023905, "percentage": 29.69, "elapsed_time": "4:32:07", "remaining_time": "10:44:33"} +{"current_steps": 1310, "total_steps": 4396, "loss": 0.3078, "lr": 3.542362006183496e-05, "epoch": 2.0860557768924304, "percentage": 29.8, "elapsed_time": "4:33:08", "remaining_time": "10:43:27"} +{"current_steps": 1315, "total_steps": 4396, "loss": 0.3137, "lr": 3.5372942795570644e-05, "epoch": 2.09402390438247, "percentage": 29.91, "elapsed_time": "4:34:05", "remaining_time": "10:42:12"} +{"current_steps": 1320, "total_steps": 4396, "loss": 0.2994, "lr": 3.532202315664658e-05, "epoch": 2.10199203187251, "percentage": 30.03, "elapsed_time": "4:35:04", "remaining_time": "10:40:59"} +{"current_steps": 1325, "total_steps": 4396, "loss": 0.3111, "lr": 3.527086194787121e-05, "epoch": 2.10996015936255, "percentage": 30.14, "elapsed_time": "4:36:03", "remaining_time": "10:39:50"} +{"current_steps": 1330, "total_steps": 4396, "loss": 0.3047, "lr": 3.521945997586162e-05, "epoch": 2.1179282868525897, "percentage": 30.25, "elapsed_time": "4:37:01", "remaining_time": "10:38:36"} +{"current_steps": 1335, "total_steps": 4396, "loss": 0.2978, "lr": 3.51678180510308e-05, "epoch": 2.1258964143426295, "percentage": 30.37, "elapsed_time": "4:37:59", "remaining_time": "10:37:24"} +{"current_steps": 1340, "total_steps": 4396, "loss": 0.3014, "lr": 3.511593698757491e-05, "epoch": 2.1338645418326694, "percentage": 30.48, "elapsed_time": "4:39:02", "remaining_time": "10:36:23"} +{"current_steps": 1345, "total_steps": 4396, "loss": 0.3102, "lr": 3.506381760346037e-05, "epoch": 2.141832669322709, "percentage": 30.6, "elapsed_time": "4:40:08", "remaining_time": "10:35:29"} +{"current_steps": 1350, "total_steps": 4396, "loss": 0.3101, "lr": 3.501146072041104e-05, "epoch": 2.149800796812749, "percentage": 30.71, "elapsed_time": "4:41:10", "remaining_time": "10:34:24"} +{"current_steps": 1355, "total_steps": 4396, "loss": 0.2966, "lr": 3.495886716389523e-05, "epoch": 2.157768924302789, "percentage": 30.82, "elapsed_time": "4:42:14", "remaining_time": "10:33:25"} +{"current_steps": 1360, "total_steps": 4396, "loss": 0.3191, "lr": 3.4906037763112665e-05, "epoch": 2.1657370517928287, "percentage": 30.94, "elapsed_time": "4:43:19", "remaining_time": "10:32:28"} +{"current_steps": 1365, "total_steps": 4396, "loss": 0.3037, "lr": 3.4852973350981464e-05, "epoch": 2.1737051792828685, "percentage": 31.05, "elapsed_time": "4:44:22", "remaining_time": "10:31:26"} +{"current_steps": 1370, "total_steps": 4396, "loss": 0.3007, "lr": 3.4799674764124956e-05, "epoch": 2.1816733067729084, "percentage": 31.16, "elapsed_time": "4:45:23", "remaining_time": "10:30:22"} +{"current_steps": 1375, "total_steps": 4396, "loss": 0.3022, "lr": 3.474614284285852e-05, "epoch": 2.189641434262948, "percentage": 31.28, "elapsed_time": "4:46:29", "remaining_time": "10:29:27"} +{"current_steps": 1380, "total_steps": 4396, "loss": 0.3068, "lr": 3.469237843117634e-05, "epoch": 2.197609561752988, "percentage": 31.39, "elapsed_time": "4:47:29", "remaining_time": "10:28:19"} +{"current_steps": 1385, "total_steps": 4396, "loss": 0.2876, "lr": 3.4638382376738064e-05, "epoch": 2.205577689243028, "percentage": 31.51, "elapsed_time": "4:48:32", "remaining_time": "10:27:18"} +{"current_steps": 1390, "total_steps": 4396, "loss": 0.3041, "lr": 3.458415553085548e-05, "epoch": 2.2135458167330677, "percentage": 31.62, "elapsed_time": "4:49:34", "remaining_time": "10:26:14"} +{"current_steps": 1395, "total_steps": 4396, "loss": 0.3073, "lr": 3.4529698748479075e-05, "epoch": 2.2215139442231076, "percentage": 31.73, "elapsed_time": "4:50:36", "remaining_time": "10:25:10"} +{"current_steps": 1400, "total_steps": 4396, "loss": 0.2922, "lr": 3.4475012888184536e-05, "epoch": 2.2294820717131474, "percentage": 31.85, "elapsed_time": "4:51:37", "remaining_time": "10:24:04"} +{"current_steps": 1405, "total_steps": 4396, "loss": 0.3156, "lr": 3.4420098812159266e-05, "epoch": 2.237450199203187, "percentage": 31.96, "elapsed_time": "4:52:40", "remaining_time": "10:23:03"} +{"current_steps": 1410, "total_steps": 4396, "loss": 0.2991, "lr": 3.4364957386188744e-05, "epoch": 2.245418326693227, "percentage": 32.07, "elapsed_time": "4:53:47", "remaining_time": "10:22:10"} +{"current_steps": 1415, "total_steps": 4396, "loss": 0.3093, "lr": 3.4309589479642894e-05, "epoch": 2.253386454183267, "percentage": 32.19, "elapsed_time": "4:54:56", "remaining_time": "10:21:22"} +{"current_steps": 1420, "total_steps": 4396, "loss": 0.3041, "lr": 3.425399596546237e-05, "epoch": 2.2613545816733067, "percentage": 32.3, "elapsed_time": "4:56:00", "remaining_time": "10:20:22"} +{"current_steps": 1425, "total_steps": 4396, "loss": 0.297, "lr": 3.4198177720144794e-05, "epoch": 2.2693227091633466, "percentage": 32.42, "elapsed_time": "4:56:59", "remaining_time": "10:19:12"} +{"current_steps": 1430, "total_steps": 4396, "loss": 0.2924, "lr": 3.4142135623730954e-05, "epoch": 2.2772908366533864, "percentage": 32.53, "elapsed_time": "4:58:01", "remaining_time": "10:18:08"} +{"current_steps": 1435, "total_steps": 4396, "loss": 0.3073, "lr": 3.4085870559790905e-05, "epoch": 2.285258964143426, "percentage": 32.64, "elapsed_time": "4:59:01", "remaining_time": "10:17:01"} +{"current_steps": 1440, "total_steps": 4396, "loss": 0.3064, "lr": 3.402938341541005e-05, "epoch": 2.293227091633466, "percentage": 32.76, "elapsed_time": "5:00:02", "remaining_time": "10:15:56"} +{"current_steps": 1445, "total_steps": 4396, "loss": 0.3058, "lr": 3.397267508117517e-05, "epoch": 2.301195219123506, "percentage": 32.87, "elapsed_time": "5:01:01", "remaining_time": "10:14:45"} +{"current_steps": 1450, "total_steps": 4396, "loss": 0.2977, "lr": 3.391574645116034e-05, "epoch": 2.3091633466135457, "percentage": 32.98, "elapsed_time": "5:02:07", "remaining_time": "10:13:49"} +{"current_steps": 1455, "total_steps": 4396, "loss": 0.3033, "lr": 3.385859842291287e-05, "epoch": 2.3171314741035856, "percentage": 33.1, "elapsed_time": "5:03:15", "remaining_time": "10:12:57"} +{"current_steps": 1460, "total_steps": 4396, "loss": 0.2997, "lr": 3.380123189743914e-05, "epoch": 2.3250996015936254, "percentage": 33.21, "elapsed_time": "5:04:17", "remaining_time": "10:11:54"} +{"current_steps": 1465, "total_steps": 4396, "loss": 0.3007, "lr": 3.374364777919042e-05, "epoch": 2.333067729083665, "percentage": 33.33, "elapsed_time": "5:05:22", "remaining_time": "10:10:58"} +{"current_steps": 1470, "total_steps": 4396, "loss": 0.3075, "lr": 3.368584697604856e-05, "epoch": 2.341035856573705, "percentage": 33.44, "elapsed_time": "5:06:19", "remaining_time": "10:09:44"} +{"current_steps": 1475, "total_steps": 4396, "loss": 0.3017, "lr": 3.362783039931172e-05, "epoch": 2.349003984063745, "percentage": 33.55, "elapsed_time": "5:07:19", "remaining_time": "10:08:37"} +{"current_steps": 1480, "total_steps": 4396, "loss": 0.3068, "lr": 3.356959896367997e-05, "epoch": 2.3569721115537847, "percentage": 33.67, "elapsed_time": "5:08:24", "remaining_time": "10:07:39"} +{"current_steps": 1485, "total_steps": 4396, "loss": 0.2925, "lr": 3.351115358724089e-05, "epoch": 2.3649402390438246, "percentage": 33.78, "elapsed_time": "5:09:23", "remaining_time": "10:06:28"} +{"current_steps": 1490, "total_steps": 4396, "loss": 0.3037, "lr": 3.345249519145512e-05, "epoch": 2.3729083665338644, "percentage": 33.89, "elapsed_time": "5:10:23", "remaining_time": "10:05:21"} +{"current_steps": 1495, "total_steps": 4396, "loss": 0.3101, "lr": 3.339362470114176e-05, "epoch": 2.380876494023904, "percentage": 34.01, "elapsed_time": "5:11:19", "remaining_time": "10:04:06"} +{"current_steps": 1500, "total_steps": 4396, "loss": 0.3039, "lr": 3.333454304446385e-05, "epoch": 2.388844621513944, "percentage": 34.12, "elapsed_time": "5:12:20", "remaining_time": "10:03:00"} +{"current_steps": 1505, "total_steps": 4396, "loss": 0.3155, "lr": 3.3275251152913735e-05, "epoch": 2.396812749003984, "percentage": 34.24, "elapsed_time": "5:13:58", "remaining_time": "10:03:07"} +{"current_steps": 1510, "total_steps": 4396, "loss": 0.3003, "lr": 3.3215749961298324e-05, "epoch": 2.4047808764940237, "percentage": 34.35, "elapsed_time": "5:14:54", "remaining_time": "10:01:53"} +{"current_steps": 1515, "total_steps": 4396, "loss": 0.3083, "lr": 3.315604040772442e-05, "epoch": 2.4127490039840636, "percentage": 34.46, "elapsed_time": "5:15:57", "remaining_time": "10:00:49"} +{"current_steps": 1520, "total_steps": 4396, "loss": 0.2996, "lr": 3.3096123433583886e-05, "epoch": 2.4207171314741034, "percentage": 34.58, "elapsed_time": "5:16:58", "remaining_time": "9:59:45"} +{"current_steps": 1525, "total_steps": 4396, "loss": 0.2932, "lr": 3.303599998353882e-05, "epoch": 2.4286852589641432, "percentage": 34.69, "elapsed_time": "5:17:59", "remaining_time": "9:58:38"} +{"current_steps": 1530, "total_steps": 4396, "loss": 0.313, "lr": 3.297567100550667e-05, "epoch": 2.436653386454183, "percentage": 34.8, "elapsed_time": "5:19:04", "remaining_time": "9:57:40"} +{"current_steps": 1535, "total_steps": 4396, "loss": 0.3021, "lr": 3.2915137450645245e-05, "epoch": 2.444621513944223, "percentage": 34.92, "elapsed_time": "5:20:02", "remaining_time": "9:56:29"} +{"current_steps": 1540, "total_steps": 4396, "loss": 0.3007, "lr": 3.285440027333777e-05, "epoch": 2.4525896414342627, "percentage": 35.03, "elapsed_time": "5:21:02", "remaining_time": "9:55:22"} +{"current_steps": 1545, "total_steps": 4396, "loss": 0.3068, "lr": 3.2793460431177827e-05, "epoch": 2.4605577689243026, "percentage": 35.15, "elapsed_time": "5:22:00", "remaining_time": "9:54:12"} +{"current_steps": 1550, "total_steps": 4396, "loss": 0.3018, "lr": 3.273231888495424e-05, "epoch": 2.4685258964143424, "percentage": 35.26, "elapsed_time": "5:23:01", "remaining_time": "9:53:07"} +{"current_steps": 1555, "total_steps": 4396, "loss": 0.2956, "lr": 3.267097659863592e-05, "epoch": 2.4764940239043822, "percentage": 35.37, "elapsed_time": "5:24:02", "remaining_time": "9:52:01"} +{"current_steps": 1560, "total_steps": 4396, "loss": 0.3013, "lr": 3.2609434539356726e-05, "epoch": 2.4844621513944225, "percentage": 35.49, "elapsed_time": "5:25:10", "remaining_time": "9:51:08"} +{"current_steps": 1565, "total_steps": 4396, "loss": 0.3049, "lr": 3.2547693677400126e-05, "epoch": 2.4924302788844623, "percentage": 35.6, "elapsed_time": "5:26:14", "remaining_time": "9:50:10"} +{"current_steps": 1570, "total_steps": 4396, "loss": 0.2984, "lr": 3.248575498618398e-05, "epoch": 2.5003984063745017, "percentage": 35.71, "elapsed_time": "5:27:16", "remaining_time": "9:49:06"} +{"current_steps": 1575, "total_steps": 4396, "loss": 0.2966, "lr": 3.242361944224515e-05, "epoch": 2.5083665338645416, "percentage": 35.83, "elapsed_time": "5:28:21", "remaining_time": "9:48:07"} +{"current_steps": 1580, "total_steps": 4396, "loss": 0.304, "lr": 3.236128802522411e-05, "epoch": 2.516334661354582, "percentage": 35.94, "elapsed_time": "5:29:25", "remaining_time": "9:47:07"} +{"current_steps": 1585, "total_steps": 4396, "loss": 0.3081, "lr": 3.229876171784952e-05, "epoch": 2.5243027888446217, "percentage": 36.06, "elapsed_time": "5:30:22", "remaining_time": "9:45:54"} +{"current_steps": 1590, "total_steps": 4396, "loss": 0.3038, "lr": 3.22360415059227e-05, "epoch": 2.5322709163346615, "percentage": 36.17, "elapsed_time": "5:31:28", "remaining_time": "9:44:58"} +{"current_steps": 1595, "total_steps": 4396, "loss": 0.2944, "lr": 3.217312837830212e-05, "epoch": 2.5402390438247013, "percentage": 36.28, "elapsed_time": "5:32:26", "remaining_time": "9:43:48"} +{"current_steps": 1600, "total_steps": 4396, "loss": 0.3056, "lr": 3.211002332688779e-05, "epoch": 2.548207171314741, "percentage": 36.4, "elapsed_time": "5:33:32", "remaining_time": "9:42:52"} +{"current_steps": 1605, "total_steps": 4396, "loss": 0.3076, "lr": 3.2046727346605604e-05, "epoch": 2.556175298804781, "percentage": 36.51, "elapsed_time": "5:34:38", "remaining_time": "9:41:55"} +{"current_steps": 1610, "total_steps": 4396, "loss": 0.3034, "lr": 3.198324143539172e-05, "epoch": 2.564143426294821, "percentage": 36.62, "elapsed_time": "5:35:47", "remaining_time": "9:41:04"} +{"current_steps": 1615, "total_steps": 4396, "loss": 0.3025, "lr": 3.191956659417674e-05, "epoch": 2.5721115537848607, "percentage": 36.74, "elapsed_time": "5:36:47", "remaining_time": "9:39:57"} +{"current_steps": 1620, "total_steps": 4396, "loss": 0.3071, "lr": 3.185570382687e-05, "epoch": 2.5800796812749005, "percentage": 36.85, "elapsed_time": "5:37:48", "remaining_time": "9:38:52"} +{"current_steps": 1625, "total_steps": 4396, "loss": 0.2994, "lr": 3.17916541403437e-05, "epoch": 2.5880478087649403, "percentage": 36.97, "elapsed_time": "5:38:49", "remaining_time": "9:37:45"} +{"current_steps": 1630, "total_steps": 4396, "loss": 0.2974, "lr": 3.172741854441704e-05, "epoch": 2.59601593625498, "percentage": 37.08, "elapsed_time": "5:39:51", "remaining_time": "9:36:42"} +{"current_steps": 1635, "total_steps": 4396, "loss": 0.2989, "lr": 3.1662998051840306e-05, "epoch": 2.60398406374502, "percentage": 37.19, "elapsed_time": "5:40:54", "remaining_time": "9:35:41"} +{"current_steps": 1640, "total_steps": 4396, "loss": 0.2995, "lr": 3.159839367827891e-05, "epoch": 2.61195219123506, "percentage": 37.31, "elapsed_time": "5:42:00", "remaining_time": "9:34:43"} +{"current_steps": 1645, "total_steps": 4396, "loss": 0.3055, "lr": 3.153360644229735e-05, "epoch": 2.6199203187250997, "percentage": 37.42, "elapsed_time": "5:43:02", "remaining_time": "9:33:40"} +{"current_steps": 1650, "total_steps": 4396, "loss": 0.2975, "lr": 3.146863736534317e-05, "epoch": 2.6278884462151395, "percentage": 37.53, "elapsed_time": "5:44:08", "remaining_time": "9:32:43"} +{"current_steps": 1655, "total_steps": 4396, "loss": 0.2979, "lr": 3.140348747173086e-05, "epoch": 2.6358565737051793, "percentage": 37.65, "elapsed_time": "5:45:06", "remaining_time": "9:31:33"} +{"current_steps": 1660, "total_steps": 4396, "loss": 0.3124, "lr": 3.1338157788625695e-05, "epoch": 2.643824701195219, "percentage": 37.76, "elapsed_time": "5:46:13", "remaining_time": "9:30:38"} +{"current_steps": 1665, "total_steps": 4396, "loss": 0.3076, "lr": 3.127264934602754e-05, "epoch": 2.651792828685259, "percentage": 37.88, "elapsed_time": "5:47:16", "remaining_time": "9:29:36"} +{"current_steps": 1670, "total_steps": 4396, "loss": 0.3171, "lr": 3.120696317675462e-05, "epoch": 2.659760956175299, "percentage": 37.99, "elapsed_time": "5:48:11", "remaining_time": "9:28:21"} +{"current_steps": 1675, "total_steps": 4396, "loss": 0.3085, "lr": 3.114110031642723e-05, "epoch": 2.6677290836653387, "percentage": 38.1, "elapsed_time": "5:49:12", "remaining_time": "9:27:16"} +{"current_steps": 1680, "total_steps": 4396, "loss": 0.3029, "lr": 3.1075061803451405e-05, "epoch": 2.6756972111553785, "percentage": 38.22, "elapsed_time": "5:50:11", "remaining_time": "9:26:08"} +{"current_steps": 1685, "total_steps": 4396, "loss": 0.3106, "lr": 3.100884867900257e-05, "epoch": 2.6836653386454183, "percentage": 38.33, "elapsed_time": "5:51:17", "remaining_time": "9:25:12"} +{"current_steps": 1690, "total_steps": 4396, "loss": 0.2964, "lr": 3.094246198700907e-05, "epoch": 2.691633466135458, "percentage": 38.44, "elapsed_time": "5:52:22", "remaining_time": "9:24:13"} +{"current_steps": 1695, "total_steps": 4396, "loss": 0.3068, "lr": 3.087590277413578e-05, "epoch": 2.699601593625498, "percentage": 38.56, "elapsed_time": "5:53:22", "remaining_time": "9:23:05"} +{"current_steps": 1700, "total_steps": 4396, "loss": 0.3048, "lr": 3.0809172089767576e-05, "epoch": 2.707569721115538, "percentage": 38.67, "elapsed_time": "5:54:26", "remaining_time": "9:22:05"} +{"current_steps": 1705, "total_steps": 4396, "loss": 0.3066, "lr": 3.0742270985992765e-05, "epoch": 2.7155378486055777, "percentage": 38.79, "elapsed_time": "5:55:33", "remaining_time": "9:21:09"} +{"current_steps": 1710, "total_steps": 4396, "loss": 0.3031, "lr": 3.067520051758651e-05, "epoch": 2.7235059760956175, "percentage": 38.9, "elapsed_time": "5:56:29", "remaining_time": "9:19:58"} +{"current_steps": 1715, "total_steps": 4396, "loss": 0.304, "lr": 3.060796174199424e-05, "epoch": 2.7314741035856573, "percentage": 39.01, "elapsed_time": "5:57:27", "remaining_time": "9:18:47"} +{"current_steps": 1720, "total_steps": 4396, "loss": 0.2977, "lr": 3.0540555719314914e-05, "epoch": 2.739442231075697, "percentage": 39.13, "elapsed_time": "5:58:29", "remaining_time": "9:17:45"} +{"current_steps": 1725, "total_steps": 4396, "loss": 0.3117, "lr": 3.0472983512284366e-05, "epoch": 2.747410358565737, "percentage": 39.24, "elapsed_time": "5:59:33", "remaining_time": "9:16:44"} +{"current_steps": 1730, "total_steps": 4396, "loss": 0.2951, "lr": 3.04052461862585e-05, "epoch": 2.755378486055777, "percentage": 39.35, "elapsed_time": "6:00:36", "remaining_time": "9:15:42"} +{"current_steps": 1735, "total_steps": 4396, "loss": 0.3127, "lr": 3.0337344809196547e-05, "epoch": 2.7633466135458167, "percentage": 39.47, "elapsed_time": "6:01:40", "remaining_time": "9:14:42"} +{"current_steps": 1740, "total_steps": 4396, "loss": 0.3156, "lr": 3.0269280451644155e-05, "epoch": 2.7713147410358565, "percentage": 39.58, "elapsed_time": "6:02:46", "remaining_time": "9:13:44"} +{"current_steps": 1745, "total_steps": 4396, "loss": 0.3022, "lr": 3.020105418671659e-05, "epoch": 2.7792828685258963, "percentage": 39.7, "elapsed_time": "6:03:47", "remaining_time": "9:12:41"} +{"current_steps": 1750, "total_steps": 4396, "loss": 0.3199, "lr": 3.0132667090081758e-05, "epoch": 2.787250996015936, "percentage": 39.81, "elapsed_time": "6:04:51", "remaining_time": "9:11:39"} +{"current_steps": 1755, "total_steps": 4396, "loss": 0.3003, "lr": 3.006412023994328e-05, "epoch": 2.795219123505976, "percentage": 39.92, "elapsed_time": "6:05:50", "remaining_time": "9:10:32"} +{"current_steps": 1760, "total_steps": 4396, "loss": 0.3004, "lr": 2.999541471702347e-05, "epoch": 2.803187250996016, "percentage": 40.04, "elapsed_time": "6:06:58", "remaining_time": "9:09:38"} +{"current_steps": 1765, "total_steps": 4396, "loss": 0.3049, "lr": 2.9926551604546312e-05, "epoch": 2.8111553784860557, "percentage": 40.15, "elapsed_time": "6:07:56", "remaining_time": "9:08:27"} +{"current_steps": 1770, "total_steps": 4396, "loss": 0.3043, "lr": 2.9857531988220385e-05, "epoch": 2.8191235059760955, "percentage": 40.26, "elapsed_time": "6:08:58", "remaining_time": "9:07:24"} +{"current_steps": 1775, "total_steps": 4396, "loss": 0.3122, "lr": 2.9788356956221712e-05, "epoch": 2.8270916334661353, "percentage": 40.38, "elapsed_time": "6:10:02", "remaining_time": "9:06:24"} +{"current_steps": 1780, "total_steps": 4396, "loss": 0.3074, "lr": 2.971902759917665e-05, "epoch": 2.835059760956175, "percentage": 40.49, "elapsed_time": "6:11:08", "remaining_time": "9:05:27"} +{"current_steps": 1785, "total_steps": 4396, "loss": 0.296, "lr": 2.964954501014467e-05, "epoch": 2.843027888446215, "percentage": 40.61, "elapsed_time": "6:12:10", "remaining_time": "9:04:24"} +{"current_steps": 1790, "total_steps": 4396, "loss": 0.3095, "lr": 2.957991028460112e-05, "epoch": 2.850996015936255, "percentage": 40.72, "elapsed_time": "6:13:10", "remaining_time": "9:03:16"} +{"current_steps": 1795, "total_steps": 4396, "loss": 0.3088, "lr": 2.951012452041997e-05, "epoch": 2.8589641434262947, "percentage": 40.83, "elapsed_time": "6:14:10", "remaining_time": "9:02:10"} +{"current_steps": 1800, "total_steps": 4396, "loss": 0.3034, "lr": 2.9440188817856478e-05, "epoch": 2.8669322709163345, "percentage": 40.95, "elapsed_time": "6:15:14", "remaining_time": "9:01:11"} +{"current_steps": 1805, "total_steps": 4396, "loss": 0.2924, "lr": 2.937010427952986e-05, "epoch": 2.8749003984063743, "percentage": 41.06, "elapsed_time": "6:16:16", "remaining_time": "9:00:08"} +{"current_steps": 1810, "total_steps": 4396, "loss": 0.3035, "lr": 2.929987201040593e-05, "epoch": 2.882868525896414, "percentage": 41.17, "elapsed_time": "6:17:14", "remaining_time": "8:58:58"} +{"current_steps": 1815, "total_steps": 4396, "loss": 0.2974, "lr": 2.922949311777962e-05, "epoch": 2.8908366533864545, "percentage": 41.29, "elapsed_time": "6:18:15", "remaining_time": "8:57:53"} +{"current_steps": 1820, "total_steps": 4396, "loss": 0.2878, "lr": 2.9158968711257576e-05, "epoch": 2.8988047808764943, "percentage": 41.4, "elapsed_time": "6:19:19", "remaining_time": "8:56:53"} +{"current_steps": 1825, "total_steps": 4396, "loss": 0.3059, "lr": 2.908829990274064e-05, "epoch": 2.906772908366534, "percentage": 41.52, "elapsed_time": "6:20:15", "remaining_time": "8:55:41"} +{"current_steps": 1830, "total_steps": 4396, "loss": 0.3032, "lr": 2.9017487806406312e-05, "epoch": 2.914741035856574, "percentage": 41.63, "elapsed_time": "6:21:17", "remaining_time": "8:54:37"} +{"current_steps": 1835, "total_steps": 4396, "loss": 0.3004, "lr": 2.89465335386912e-05, "epoch": 2.922709163346614, "percentage": 41.74, "elapsed_time": "6:22:19", "remaining_time": "8:53:35"} +{"current_steps": 1840, "total_steps": 4396, "loss": 0.304, "lr": 2.8875438218273423e-05, "epoch": 2.9306772908366536, "percentage": 41.86, "elapsed_time": "6:23:18", "remaining_time": "8:52:27"} +{"current_steps": 1845, "total_steps": 4396, "loss": 0.294, "lr": 2.880420296605494e-05, "epoch": 2.9386454183266935, "percentage": 41.97, "elapsed_time": "6:24:14", "remaining_time": "8:51:16"} +{"current_steps": 1850, "total_steps": 4396, "loss": 0.3017, "lr": 2.8732828905143938e-05, "epoch": 2.9466135458167333, "percentage": 42.08, "elapsed_time": "6:25:16", "remaining_time": "8:50:13"} +{"current_steps": 1855, "total_steps": 4396, "loss": 0.3055, "lr": 2.8661317160837038e-05, "epoch": 2.954581673306773, "percentage": 42.2, "elapsed_time": "6:26:15", "remaining_time": "8:49:05"} +{"current_steps": 1860, "total_steps": 4396, "loss": 0.3001, "lr": 2.8589668860601643e-05, "epoch": 2.962549800796813, "percentage": 42.31, "elapsed_time": "6:27:13", "remaining_time": "8:47:57"} +{"current_steps": 1865, "total_steps": 4396, "loss": 0.3024, "lr": 2.85178851340581e-05, "epoch": 2.970517928286853, "percentage": 42.42, "elapsed_time": "6:28:16", "remaining_time": "8:46:55"} +{"current_steps": 1870, "total_steps": 4396, "loss": 0.2913, "lr": 2.8445967112961928e-05, "epoch": 2.9784860557768926, "percentage": 42.54, "elapsed_time": "6:29:21", "remaining_time": "8:45:56"} +{"current_steps": 1875, "total_steps": 4396, "loss": 0.2976, "lr": 2.8373915931185946e-05, "epoch": 2.9864541832669325, "percentage": 42.65, "elapsed_time": "6:30:25", "remaining_time": "8:44:56"} +{"current_steps": 1880, "total_steps": 4396, "loss": 0.3057, "lr": 2.830173272470241e-05, "epoch": 2.9944223107569723, "percentage": 42.77, "elapsed_time": "6:31:24", "remaining_time": "8:43:48"} +{"current_steps": 1885, "total_steps": 4396, "loss": 0.2983, "lr": 2.822941863156512e-05, "epoch": 3.001593625498008, "percentage": 42.88, "elapsed_time": "6:32:15", "remaining_time": "8:42:30"} +{"current_steps": 1890, "total_steps": 4396, "loss": 0.2788, "lr": 2.8156974791891425e-05, "epoch": 3.0095617529880476, "percentage": 42.99, "elapsed_time": "6:33:15", "remaining_time": "8:41:25"} +{"current_steps": 1895, "total_steps": 4396, "loss": 0.2766, "lr": 2.8084402347844316e-05, "epoch": 3.0175298804780875, "percentage": 43.11, "elapsed_time": "6:34:18", "remaining_time": "8:40:23"} +{"current_steps": 1900, "total_steps": 4396, "loss": 0.2778, "lr": 2.801170244361436e-05, "epoch": 3.0254980079681273, "percentage": 43.22, "elapsed_time": "6:35:17", "remaining_time": "8:39:17"} +{"current_steps": 1905, "total_steps": 4396, "loss": 0.2867, "lr": 2.7938876225401714e-05, "epoch": 3.033466135458167, "percentage": 43.33, "elapsed_time": "6:36:15", "remaining_time": "8:38:09"} +{"current_steps": 1910, "total_steps": 4396, "loss": 0.2818, "lr": 2.7865924841397985e-05, "epoch": 3.041434262948207, "percentage": 43.45, "elapsed_time": "6:37:19", "remaining_time": "8:37:08"} +{"current_steps": 1915, "total_steps": 4396, "loss": 0.2868, "lr": 2.7792849441768194e-05, "epoch": 3.049402390438247, "percentage": 43.56, "elapsed_time": "6:38:24", "remaining_time": "8:36:10"} +{"current_steps": 1920, "total_steps": 4396, "loss": 0.2891, "lr": 2.7719651178632605e-05, "epoch": 3.057370517928287, "percentage": 43.68, "elapsed_time": "6:39:23", "remaining_time": "8:35:03"} +{"current_steps": 1925, "total_steps": 4396, "loss": 0.2783, "lr": 2.7646331206048586e-05, "epoch": 3.065338645418327, "percentage": 43.79, "elapsed_time": "6:40:25", "remaining_time": "8:34:00"} +{"current_steps": 1930, "total_steps": 4396, "loss": 0.2744, "lr": 2.7572890679992376e-05, "epoch": 3.0733067729083667, "percentage": 43.9, "elapsed_time": "6:41:23", "remaining_time": "8:32:52"} +{"current_steps": 1935, "total_steps": 4396, "loss": 0.2953, "lr": 2.7499330758340898e-05, "epoch": 3.0812749003984066, "percentage": 44.02, "elapsed_time": "6:42:28", "remaining_time": "8:31:52"} +{"current_steps": 1940, "total_steps": 4396, "loss": 0.2816, "lr": 2.742565260085348e-05, "epoch": 3.0892430278884464, "percentage": 44.13, "elapsed_time": "6:43:32", "remaining_time": "8:30:52"} +{"current_steps": 1945, "total_steps": 4396, "loss": 0.2818, "lr": 2.7351857369153595e-05, "epoch": 3.0972111553784862, "percentage": 44.24, "elapsed_time": "6:44:33", "remaining_time": "8:29:48"} +{"current_steps": 1950, "total_steps": 4396, "loss": 0.2896, "lr": 2.72779462267105e-05, "epoch": 3.105179282868526, "percentage": 44.36, "elapsed_time": "6:45:41", "remaining_time": "8:28:53"} +{"current_steps": 1955, "total_steps": 4396, "loss": 0.2963, "lr": 2.720392033882094e-05, "epoch": 3.113147410358566, "percentage": 44.47, "elapsed_time": "6:46:45", "remaining_time": "8:27:52"} +{"current_steps": 1960, "total_steps": 4396, "loss": 0.2782, "lr": 2.7129780872590768e-05, "epoch": 3.1211155378486057, "percentage": 44.59, "elapsed_time": "6:47:51", "remaining_time": "8:26:54"} +{"current_steps": 1965, "total_steps": 4396, "loss": 0.2876, "lr": 2.705552899691652e-05, "epoch": 3.1290836653386456, "percentage": 44.7, "elapsed_time": "6:48:49", "remaining_time": "8:25:47"} +{"current_steps": 1970, "total_steps": 4396, "loss": 0.2987, "lr": 2.6981165882466994e-05, "epoch": 3.1370517928286854, "percentage": 44.81, "elapsed_time": "6:49:55", "remaining_time": "8:24:48"} +{"current_steps": 1975, "total_steps": 4396, "loss": 0.2953, "lr": 2.6906692701664817e-05, "epoch": 3.1450199203187252, "percentage": 44.93, "elapsed_time": "6:50:53", "remaining_time": "8:23:40"} +{"current_steps": 1980, "total_steps": 4396, "loss": 0.2925, "lr": 2.683211062866792e-05, "epoch": 3.152988047808765, "percentage": 45.04, "elapsed_time": "6:52:00", "remaining_time": "8:22:43"} +{"current_steps": 1985, "total_steps": 4396, "loss": 0.2808, "lr": 2.6757420839351077e-05, "epoch": 3.160956175298805, "percentage": 45.15, "elapsed_time": "6:53:01", "remaining_time": "8:21:39"} +{"current_steps": 1990, "total_steps": 4396, "loss": 0.2758, "lr": 2.6682624511287315e-05, "epoch": 3.1689243027888447, "percentage": 45.27, "elapsed_time": "6:54:05", "remaining_time": "8:20:39"} +{"current_steps": 1995, "total_steps": 4396, "loss": 0.2893, "lr": 2.660772282372938e-05, "epoch": 3.1768924302788846, "percentage": 45.38, "elapsed_time": "6:55:08", "remaining_time": "8:19:37"} +{"current_steps": 2000, "total_steps": 4396, "loss": 0.2742, "lr": 2.6532716957591128e-05, "epoch": 3.1848605577689244, "percentage": 45.5, "elapsed_time": "6:56:16", "remaining_time": "8:18:42"} +{"current_steps": 2005, "total_steps": 4396, "loss": 0.2895, "lr": 2.6457608095428925e-05, "epoch": 3.1928286852589642, "percentage": 45.61, "elapsed_time": "6:57:24", "remaining_time": "8:17:46"} +{"current_steps": 2010, "total_steps": 4396, "loss": 0.2901, "lr": 2.6382397421422986e-05, "epoch": 3.200796812749004, "percentage": 45.72, "elapsed_time": "6:58:24", "remaining_time": "8:16:40"} +{"current_steps": 2015, "total_steps": 4396, "loss": 0.2848, "lr": 2.6307086121358706e-05, "epoch": 3.208764940239044, "percentage": 45.84, "elapsed_time": "6:59:25", "remaining_time": "8:15:36"} +{"current_steps": 2020, "total_steps": 4396, "loss": 0.2789, "lr": 2.6231675382607974e-05, "epoch": 3.2167330677290837, "percentage": 45.95, "elapsed_time": "7:00:23", "remaining_time": "8:14:29"} +{"current_steps": 2025, "total_steps": 4396, "loss": 0.2856, "lr": 2.6156166394110447e-05, "epoch": 3.2247011952191236, "percentage": 46.06, "elapsed_time": "7:01:21", "remaining_time": "8:13:21"} +{"current_steps": 2030, "total_steps": 4396, "loss": 0.2746, "lr": 2.60805603463548e-05, "epoch": 3.2326693227091634, "percentage": 46.18, "elapsed_time": "7:02:21", "remaining_time": "8:12:15"} +{"current_steps": 2035, "total_steps": 4396, "loss": 0.2828, "lr": 2.6004858431359972e-05, "epoch": 3.2406374501992032, "percentage": 46.29, "elapsed_time": "7:03:23", "remaining_time": "8:11:13"} +{"current_steps": 2040, "total_steps": 4396, "loss": 0.2852, "lr": 2.592906184265635e-05, "epoch": 3.248605577689243, "percentage": 46.41, "elapsed_time": "7:04:28", "remaining_time": "8:10:14"} +{"current_steps": 2045, "total_steps": 4396, "loss": 0.292, "lr": 2.585317177526699e-05, "epoch": 3.256573705179283, "percentage": 46.52, "elapsed_time": "7:05:28", "remaining_time": "8:09:08"} +{"current_steps": 2050, "total_steps": 4396, "loss": 0.2729, "lr": 2.5777189425688714e-05, "epoch": 3.2645418326693227, "percentage": 46.63, "elapsed_time": "7:06:30", "remaining_time": "8:08:06"} +{"current_steps": 2055, "total_steps": 4396, "loss": 0.29, "lr": 2.570111599187331e-05, "epoch": 3.2725099601593626, "percentage": 46.75, "elapsed_time": "7:07:30", "remaining_time": "8:07:00"} +{"current_steps": 2060, "total_steps": 4396, "loss": 0.2784, "lr": 2.5624952673208608e-05, "epoch": 3.2804780876494024, "percentage": 46.86, "elapsed_time": "7:08:34", "remaining_time": "8:05:59"} +{"current_steps": 2065, "total_steps": 4396, "loss": 0.2799, "lr": 2.5548700670499577e-05, "epoch": 3.2884462151394422, "percentage": 46.97, "elapsed_time": "7:09:32", "remaining_time": "8:04:51"} +{"current_steps": 2070, "total_steps": 4396, "loss": 0.2873, "lr": 2.5472361185949387e-05, "epoch": 3.296414342629482, "percentage": 47.09, "elapsed_time": "7:10:32", "remaining_time": "8:03:47"} +{"current_steps": 2075, "total_steps": 4396, "loss": 0.2732, "lr": 2.5395935423140487e-05, "epoch": 3.304382470119522, "percentage": 47.2, "elapsed_time": "7:11:34", "remaining_time": "8:02:44"} +{"current_steps": 2080, "total_steps": 4396, "loss": 0.2791, "lr": 2.5319424587015587e-05, "epoch": 3.3123505976095617, "percentage": 47.32, "elapsed_time": "7:12:35", "remaining_time": "8:01:40"} +{"current_steps": 2085, "total_steps": 4396, "loss": 0.2833, "lr": 2.524282988385867e-05, "epoch": 3.3203187250996016, "percentage": 47.43, "elapsed_time": "7:13:39", "remaining_time": "8:00:39"} +{"current_steps": 2090, "total_steps": 4396, "loss": 0.2869, "lr": 2.5166152521276014e-05, "epoch": 3.3282868525896414, "percentage": 47.54, "elapsed_time": "7:14:38", "remaining_time": "7:59:33"} +{"current_steps": 2095, "total_steps": 4396, "loss": 0.278, "lr": 2.5089393708177083e-05, "epoch": 3.3362549800796812, "percentage": 47.66, "elapsed_time": "7:15:34", "remaining_time": "7:58:24"} +{"current_steps": 2100, "total_steps": 4396, "loss": 0.2831, "lr": 2.501255465475553e-05, "epoch": 3.344223107569721, "percentage": 47.77, "elapsed_time": "7:16:34", "remaining_time": "7:57:19"} +{"current_steps": 2105, "total_steps": 4396, "loss": 0.2974, "lr": 2.4935636572470085e-05, "epoch": 3.352191235059761, "percentage": 47.88, "elapsed_time": "7:17:35", "remaining_time": "7:56:15"} +{"current_steps": 2110, "total_steps": 4396, "loss": 0.2851, "lr": 2.4858640674025464e-05, "epoch": 3.3601593625498007, "percentage": 48.0, "elapsed_time": "7:18:36", "remaining_time": "7:55:12"} +{"current_steps": 2115, "total_steps": 4396, "loss": 0.2833, "lr": 2.4781568173353234e-05, "epoch": 3.3681274900398406, "percentage": 48.11, "elapsed_time": "7:19:45", "remaining_time": "7:54:16"} +{"current_steps": 2120, "total_steps": 4396, "loss": 0.3009, "lr": 2.4704420285592718e-05, "epoch": 3.3760956175298804, "percentage": 48.23, "elapsed_time": "7:20:49", "remaining_time": "7:53:15"} +{"current_steps": 2125, "total_steps": 4396, "loss": 0.2801, "lr": 2.4627198227071764e-05, "epoch": 3.3840637450199202, "percentage": 48.34, "elapsed_time": "7:21:46", "remaining_time": "7:52:07"} +{"current_steps": 2130, "total_steps": 4396, "loss": 0.2888, "lr": 2.4549903215287635e-05, "epoch": 3.39203187250996, "percentage": 48.45, "elapsed_time": "7:22:49", "remaining_time": "7:51:06"} +{"current_steps": 2135, "total_steps": 4396, "loss": 0.2922, "lr": 2.4472536468887795e-05, "epoch": 3.4, "percentage": 48.57, "elapsed_time": "7:23:51", "remaining_time": "7:50:02"} +{"current_steps": 2140, "total_steps": 4396, "loss": 0.2862, "lr": 2.4395099207650673e-05, "epoch": 3.4079681274900397, "percentage": 48.68, "elapsed_time": "7:24:52", "remaining_time": "7:48:59"} +{"current_steps": 2145, "total_steps": 4396, "loss": 0.2814, "lr": 2.4317592652466444e-05, "epoch": 3.4159362549800796, "percentage": 48.79, "elapsed_time": "7:25:51", "remaining_time": "7:47:53"} +{"current_steps": 2150, "total_steps": 4396, "loss": 0.2794, "lr": 2.4240018025317812e-05, "epoch": 3.4239043824701194, "percentage": 48.91, "elapsed_time": "7:26:56", "remaining_time": "7:46:54"} +{"current_steps": 2155, "total_steps": 4396, "loss": 0.2858, "lr": 2.4162376549260685e-05, "epoch": 3.4318725099601592, "percentage": 49.02, "elapsed_time": "7:27:59", "remaining_time": "7:45:52"} +{"current_steps": 2160, "total_steps": 4396, "loss": 0.2885, "lr": 2.408466944840494e-05, "epoch": 3.439840637450199, "percentage": 49.14, "elapsed_time": "7:29:07", "remaining_time": "7:44:55"} +{"current_steps": 2165, "total_steps": 4396, "loss": 0.2781, "lr": 2.4006897947895097e-05, "epoch": 3.447808764940239, "percentage": 49.25, "elapsed_time": "7:30:11", "remaining_time": "7:43:54"} +{"current_steps": 2170, "total_steps": 4396, "loss": 0.2865, "lr": 2.392906327389103e-05, "epoch": 3.4557768924302787, "percentage": 49.36, "elapsed_time": "7:31:12", "remaining_time": "7:42:51"} +{"current_steps": 2175, "total_steps": 4396, "loss": 0.2787, "lr": 2.3851166653548603e-05, "epoch": 3.4637450199203186, "percentage": 49.48, "elapsed_time": "7:32:09", "remaining_time": "7:41:42"} +{"current_steps": 2180, "total_steps": 4396, "loss": 0.279, "lr": 2.3773209315000344e-05, "epoch": 3.4717131474103584, "percentage": 49.59, "elapsed_time": "7:33:11", "remaining_time": "7:40:40"} +{"current_steps": 2185, "total_steps": 4396, "loss": 0.279, "lr": 2.3695192487336064e-05, "epoch": 3.4796812749003982, "percentage": 49.7, "elapsed_time": "7:34:21", "remaining_time": "7:39:45"} +{"current_steps": 2190, "total_steps": 4396, "loss": 0.2975, "lr": 2.361711740058351e-05, "epoch": 3.487649402390438, "percentage": 49.82, "elapsed_time": "7:35:18", "remaining_time": "7:38:38"} +{"current_steps": 2195, "total_steps": 4396, "loss": 0.2877, "lr": 2.3538985285688934e-05, "epoch": 3.495617529880478, "percentage": 49.93, "elapsed_time": "7:36:20", "remaining_time": "7:37:35"} +{"current_steps": 2200, "total_steps": 4396, "loss": 0.2878, "lr": 2.3460797374497714e-05, "epoch": 3.503585657370518, "percentage": 50.05, "elapsed_time": "7:37:27", "remaining_time": "7:36:37"} +{"current_steps": 2205, "total_steps": 4396, "loss": 0.2696, "lr": 2.3382554899734917e-05, "epoch": 3.511553784860558, "percentage": 50.16, "elapsed_time": "7:38:29", "remaining_time": "7:35:34"} +{"current_steps": 2210, "total_steps": 4396, "loss": 0.2722, "lr": 2.3304259094985883e-05, "epoch": 3.519521912350598, "percentage": 50.27, "elapsed_time": "7:39:32", "remaining_time": "7:34:33"} +{"current_steps": 2215, "total_steps": 4396, "loss": 0.273, "lr": 2.322591119467674e-05, "epoch": 3.5274900398406377, "percentage": 50.39, "elapsed_time": "7:40:29", "remaining_time": "7:33:25"} +{"current_steps": 2220, "total_steps": 4396, "loss": 0.2905, "lr": 2.3147512434054988e-05, "epoch": 3.5354581673306775, "percentage": 50.5, "elapsed_time": "7:41:26", "remaining_time": "7:32:17"} +{"current_steps": 2225, "total_steps": 4396, "loss": 0.2834, "lr": 2.3069064049169985e-05, "epoch": 3.5434262948207174, "percentage": 50.61, "elapsed_time": "7:42:25", "remaining_time": "7:31:12"} +{"current_steps": 2230, "total_steps": 4396, "loss": 0.2956, "lr": 2.299056727685348e-05, "epoch": 3.551394422310757, "percentage": 50.73, "elapsed_time": "7:43:28", "remaining_time": "7:30:10"} +{"current_steps": 2235, "total_steps": 4396, "loss": 0.279, "lr": 2.2912023354700105e-05, "epoch": 3.559362549800797, "percentage": 50.84, "elapsed_time": "7:44:31", "remaining_time": "7:29:08"} +{"current_steps": 2240, "total_steps": 4396, "loss": 0.2822, "lr": 2.2833433521047853e-05, "epoch": 3.567330677290837, "percentage": 50.96, "elapsed_time": "7:45:32", "remaining_time": "7:28:05"} +{"current_steps": 2245, "total_steps": 4396, "loss": 0.286, "lr": 2.2754799014958597e-05, "epoch": 3.5752988047808767, "percentage": 51.07, "elapsed_time": "7:46:31", "remaining_time": "7:26:59"} +{"current_steps": 2250, "total_steps": 4396, "loss": 0.2793, "lr": 2.26761210761985e-05, "epoch": 3.5832669322709165, "percentage": 51.18, "elapsed_time": "7:47:30", "remaining_time": "7:25:54"} +{"current_steps": 2255, "total_steps": 4396, "loss": 0.2903, "lr": 2.259740094521849e-05, "epoch": 3.5912350597609564, "percentage": 51.3, "elapsed_time": "7:48:32", "remaining_time": "7:24:51"} +{"current_steps": 2260, "total_steps": 4396, "loss": 0.2881, "lr": 2.251863986313472e-05, "epoch": 3.599203187250996, "percentage": 51.41, "elapsed_time": "7:49:35", "remaining_time": "7:23:49"} +{"current_steps": 2265, "total_steps": 4396, "loss": 0.2814, "lr": 2.2439839071708988e-05, "epoch": 3.607171314741036, "percentage": 51.52, "elapsed_time": "7:50:33", "remaining_time": "7:22:43"} +{"current_steps": 2270, "total_steps": 4396, "loss": 0.2885, "lr": 2.2360999813329126e-05, "epoch": 3.615139442231076, "percentage": 51.64, "elapsed_time": "7:51:37", "remaining_time": "7:21:42"} +{"current_steps": 2275, "total_steps": 4396, "loss": 0.2796, "lr": 2.2282123330989482e-05, "epoch": 3.6231075697211157, "percentage": 51.75, "elapsed_time": "7:52:35", "remaining_time": "7:20:36"} +{"current_steps": 2280, "total_steps": 4396, "loss": 0.2759, "lr": 2.220321086827126e-05, "epoch": 3.6310756972111555, "percentage": 51.87, "elapsed_time": "7:53:41", "remaining_time": "7:19:37"} +{"current_steps": 2285, "total_steps": 4396, "loss": 0.2973, "lr": 2.2124263669322948e-05, "epoch": 3.6390438247011954, "percentage": 51.98, "elapsed_time": "7:54:39", "remaining_time": "7:18:30"} +{"current_steps": 2290, "total_steps": 4396, "loss": 0.2806, "lr": 2.2045282978840684e-05, "epoch": 3.647011952191235, "percentage": 52.09, "elapsed_time": "7:55:42", "remaining_time": "7:17:28"} +{"current_steps": 2295, "total_steps": 4396, "loss": 0.2833, "lr": 2.1966270042048655e-05, "epoch": 3.654980079681275, "percentage": 52.21, "elapsed_time": "7:56:43", "remaining_time": "7:16:25"} +{"current_steps": 2300, "total_steps": 4396, "loss": 0.2812, "lr": 2.188722610467942e-05, "epoch": 3.662948207171315, "percentage": 52.32, "elapsed_time": "7:57:41", "remaining_time": "7:15:19"} +{"current_steps": 2305, "total_steps": 4396, "loss": 0.2863, "lr": 2.180815241295433e-05, "epoch": 3.6709163346613547, "percentage": 52.43, "elapsed_time": "7:58:42", "remaining_time": "7:14:15"} +{"current_steps": 2310, "total_steps": 4396, "loss": 0.2818, "lr": 2.172905021356383e-05, "epoch": 3.6788844621513945, "percentage": 52.55, "elapsed_time": "7:59:43", "remaining_time": "7:13:12"} +{"current_steps": 2315, "total_steps": 4396, "loss": 0.2841, "lr": 2.1649920753647828e-05, "epoch": 3.6868525896414344, "percentage": 52.66, "elapsed_time": "8:00:49", "remaining_time": "7:12:13"} +{"current_steps": 2320, "total_steps": 4396, "loss": 0.2752, "lr": 2.157076528077603e-05, "epoch": 3.694820717131474, "percentage": 52.78, "elapsed_time": "8:01:42", "remaining_time": "7:11:02"} +{"current_steps": 2325, "total_steps": 4396, "loss": 0.2789, "lr": 2.149158504292826e-05, "epoch": 3.702788844621514, "percentage": 52.89, "elapsed_time": "8:02:49", "remaining_time": "7:10:04"} +{"current_steps": 2330, "total_steps": 4396, "loss": 0.2968, "lr": 2.1412381288474793e-05, "epoch": 3.710756972111554, "percentage": 53.0, "elapsed_time": "8:03:51", "remaining_time": "7:09:01"} +{"current_steps": 2335, "total_steps": 4396, "loss": 0.2867, "lr": 2.1333155266156676e-05, "epoch": 3.7187250996015937, "percentage": 53.12, "elapsed_time": "8:04:54", "remaining_time": "7:08:00"} +{"current_steps": 2340, "total_steps": 4396, "loss": 0.28, "lr": 2.1253908225066027e-05, "epoch": 3.7266932270916335, "percentage": 53.23, "elapsed_time": "8:05:56", "remaining_time": "7:06:57"} +{"current_steps": 2345, "total_steps": 4396, "loss": 0.277, "lr": 2.1174641414626366e-05, "epoch": 3.7346613545816734, "percentage": 53.34, "elapsed_time": "8:06:55", "remaining_time": "7:05:52"} +{"current_steps": 2350, "total_steps": 4396, "loss": 0.2683, "lr": 2.109535608457287e-05, "epoch": 3.742629482071713, "percentage": 53.46, "elapsed_time": "8:07:56", "remaining_time": "7:04:49"} +{"current_steps": 2355, "total_steps": 4396, "loss": 0.2843, "lr": 2.101605348493274e-05, "epoch": 3.750597609561753, "percentage": 53.57, "elapsed_time": "8:08:56", "remaining_time": "7:03:44"} +{"current_steps": 2360, "total_steps": 4396, "loss": 0.2747, "lr": 2.093673486600542e-05, "epoch": 3.758565737051793, "percentage": 53.69, "elapsed_time": "8:09:57", "remaining_time": "7:02:41"} +{"current_steps": 2365, "total_steps": 4396, "loss": 0.2784, "lr": 2.0857401478342925e-05, "epoch": 3.7665338645418327, "percentage": 53.8, "elapsed_time": "8:11:01", "remaining_time": "7:01:40"} +{"current_steps": 2370, "total_steps": 4396, "loss": 0.2722, "lr": 2.077805457273012e-05, "epoch": 3.7745019920318725, "percentage": 53.91, "elapsed_time": "8:11:58", "remaining_time": "7:00:34"} +{"current_steps": 2375, "total_steps": 4396, "loss": 0.284, "lr": 2.0698695400165e-05, "epoch": 3.7824701195219124, "percentage": 54.03, "elapsed_time": "8:13:00", "remaining_time": "6:59:31"} +{"current_steps": 2380, "total_steps": 4396, "loss": 0.2849, "lr": 2.061932521183896e-05, "epoch": 3.790438247011952, "percentage": 54.14, "elapsed_time": "8:14:02", "remaining_time": "6:58:28"} +{"current_steps": 2385, "total_steps": 4396, "loss": 0.2965, "lr": 2.0539945259117075e-05, "epoch": 3.798406374501992, "percentage": 54.25, "elapsed_time": "8:15:02", "remaining_time": "6:57:25"} +{"current_steps": 2390, "total_steps": 4396, "loss": 0.2856, "lr": 2.046055679351835e-05, "epoch": 3.806374501992032, "percentage": 54.37, "elapsed_time": "8:16:01", "remaining_time": "6:56:19"} +{"current_steps": 2395, "total_steps": 4396, "loss": 0.2857, "lr": 2.0381161066696025e-05, "epoch": 3.8143426294820717, "percentage": 54.48, "elapsed_time": "8:17:05", "remaining_time": "6:55:18"} +{"current_steps": 2400, "total_steps": 4396, "loss": 0.2831, "lr": 2.030175933041782e-05, "epoch": 3.8223107569721115, "percentage": 54.6, "elapsed_time": "8:18:04", "remaining_time": "6:54:13"} +{"current_steps": 2405, "total_steps": 4396, "loss": 0.2918, "lr": 2.022235283654619e-05, "epoch": 3.8302788844621514, "percentage": 54.71, "elapsed_time": "8:19:07", "remaining_time": "6:53:12"} +{"current_steps": 2410, "total_steps": 4396, "loss": 0.2788, "lr": 2.014294283701862e-05, "epoch": 3.838247011952191, "percentage": 54.82, "elapsed_time": "8:20:07", "remaining_time": "6:52:08"} +{"current_steps": 2415, "total_steps": 4396, "loss": 0.2858, "lr": 2.006353058382783e-05, "epoch": 3.846215139442231, "percentage": 54.94, "elapsed_time": "8:21:10", "remaining_time": "6:51:06"} +{"current_steps": 2420, "total_steps": 4396, "loss": 0.2753, "lr": 1.9984117329002112e-05, "epoch": 3.854183266932271, "percentage": 55.05, "elapsed_time": "8:22:13", "remaining_time": "6:50:04"} +{"current_steps": 2425, "total_steps": 4396, "loss": 0.2926, "lr": 1.9904704324585516e-05, "epoch": 3.8621513944223107, "percentage": 55.16, "elapsed_time": "8:23:10", "remaining_time": "6:48:58"} +{"current_steps": 2430, "total_steps": 4396, "loss": 0.2897, "lr": 1.9825292822618167e-05, "epoch": 3.8701195219123505, "percentage": 55.28, "elapsed_time": "8:24:10", "remaining_time": "6:47:54"} +{"current_steps": 2435, "total_steps": 4396, "loss": 0.2833, "lr": 1.9745884075116498e-05, "epoch": 3.8780876494023904, "percentage": 55.39, "elapsed_time": "8:25:11", "remaining_time": "6:46:51"} +{"current_steps": 2440, "total_steps": 4396, "loss": 0.2833, "lr": 1.9666479334053496e-05, "epoch": 3.88605577689243, "percentage": 55.51, "elapsed_time": "8:26:18", "remaining_time": "6:45:52"} +{"current_steps": 2445, "total_steps": 4396, "loss": 0.3002, "lr": 1.9587079851339016e-05, "epoch": 3.89402390438247, "percentage": 55.62, "elapsed_time": "8:27:19", "remaining_time": "6:44:49"} +{"current_steps": 2450, "total_steps": 4396, "loss": 0.2899, "lr": 1.9507686878799974e-05, "epoch": 3.90199203187251, "percentage": 55.73, "elapsed_time": "8:28:19", "remaining_time": "6:43:45"} +{"current_steps": 2455, "total_steps": 4396, "loss": 0.2844, "lr": 1.9428301668160674e-05, "epoch": 3.9099601593625497, "percentage": 55.85, "elapsed_time": "8:29:18", "remaining_time": "6:42:40"} +{"current_steps": 2460, "total_steps": 4396, "loss": 0.2884, "lr": 1.9348925471023023e-05, "epoch": 3.9179282868525895, "percentage": 55.96, "elapsed_time": "8:30:23", "remaining_time": "6:41:40"} +{"current_steps": 2465, "total_steps": 4396, "loss": 0.2679, "lr": 1.9269559538846823e-05, "epoch": 3.9258964143426294, "percentage": 56.07, "elapsed_time": "8:31:21", "remaining_time": "6:40:34"} +{"current_steps": 2470, "total_steps": 4396, "loss": 0.2961, "lr": 1.9190205122930056e-05, "epoch": 3.933864541832669, "percentage": 56.19, "elapsed_time": "8:32:30", "remaining_time": "6:39:37"} +{"current_steps": 2475, "total_steps": 4396, "loss": 0.276, "lr": 1.911086347438911e-05, "epoch": 3.941832669322709, "percentage": 56.3, "elapsed_time": "8:33:33", "remaining_time": "6:38:36"} +{"current_steps": 2480, "total_steps": 4396, "loss": 0.3036, "lr": 1.90315358441391e-05, "epoch": 3.949800796812749, "percentage": 56.41, "elapsed_time": "8:34:37", "remaining_time": "6:37:35"} +{"current_steps": 2485, "total_steps": 4396, "loss": 0.2803, "lr": 1.8952223482874114e-05, "epoch": 3.9577689243027887, "percentage": 56.53, "elapsed_time": "8:35:44", "remaining_time": "6:36:36"} +{"current_steps": 2490, "total_steps": 4396, "loss": 0.2809, "lr": 1.88729276410475e-05, "epoch": 3.9657370517928285, "percentage": 56.64, "elapsed_time": "8:36:49", "remaining_time": "6:35:36"} +{"current_steps": 2495, "total_steps": 4396, "loss": 0.2812, "lr": 1.8793649568852192e-05, "epoch": 3.9737051792828684, "percentage": 56.76, "elapsed_time": "8:37:45", "remaining_time": "6:34:29"} +{"current_steps": 2500, "total_steps": 4396, "loss": 0.2778, "lr": 1.871439051620092e-05, "epoch": 3.981673306772908, "percentage": 56.87, "elapsed_time": "8:38:45", "remaining_time": "6:33:25"} +{"current_steps": 2505, "total_steps": 4396, "loss": 0.2929, "lr": 1.8635151732706586e-05, "epoch": 3.989641434262948, "percentage": 56.98, "elapsed_time": "8:39:51", "remaining_time": "6:32:26"} +{"current_steps": 2510, "total_steps": 4396, "loss": 0.2849, "lr": 1.8555934467662485e-05, "epoch": 3.997609561752988, "percentage": 57.1, "elapsed_time": "8:40:57", "remaining_time": "6:31:26"} +{"current_steps": 2515, "total_steps": 4396, "loss": 0.26, "lr": 1.84767399700227e-05, "epoch": 4.004780876494024, "percentage": 57.21, "elapsed_time": "8:41:51", "remaining_time": "6:30:18"} +{"current_steps": 2520, "total_steps": 4396, "loss": 0.2816, "lr": 1.839756948838231e-05, "epoch": 4.012749003984064, "percentage": 57.32, "elapsed_time": "8:42:46", "remaining_time": "6:29:10"} +{"current_steps": 2525, "total_steps": 4396, "loss": 0.2595, "lr": 1.831842427095778e-05, "epoch": 4.020717131474104, "percentage": 57.44, "elapsed_time": "8:43:46", "remaining_time": "6:28:06"} +{"current_steps": 2530, "total_steps": 4396, "loss": 0.2627, "lr": 1.823930556556724e-05, "epoch": 4.028685258964144, "percentage": 57.55, "elapsed_time": "8:44:56", "remaining_time": "6:27:10"} +{"current_steps": 2535, "total_steps": 4396, "loss": 0.2542, "lr": 1.8160214619610843e-05, "epoch": 4.036653386454184, "percentage": 57.67, "elapsed_time": "8:46:04", "remaining_time": "6:26:11"} +{"current_steps": 2540, "total_steps": 4396, "loss": 0.2605, "lr": 1.8081152680051075e-05, "epoch": 4.044621513944223, "percentage": 57.78, "elapsed_time": "8:47:12", "remaining_time": "6:25:14"} +{"current_steps": 2545, "total_steps": 4396, "loss": 0.2725, "lr": 1.8002120993393095e-05, "epoch": 4.052589641434263, "percentage": 57.89, "elapsed_time": "8:48:13", "remaining_time": "6:24:10"} +{"current_steps": 2550, "total_steps": 4396, "loss": 0.2763, "lr": 1.7923120805665087e-05, "epoch": 4.060557768924303, "percentage": 58.01, "elapsed_time": "8:49:09", "remaining_time": "6:23:04"} +{"current_steps": 2555, "total_steps": 4396, "loss": 0.2742, "lr": 1.7844153362398638e-05, "epoch": 4.068525896414343, "percentage": 58.12, "elapsed_time": "8:50:14", "remaining_time": "6:22:03"} +{"current_steps": 2560, "total_steps": 4396, "loss": 0.2639, "lr": 1.776521990860905e-05, "epoch": 4.076494023904383, "percentage": 58.23, "elapsed_time": "8:51:15", "remaining_time": "6:21:00"} +{"current_steps": 2565, "total_steps": 4396, "loss": 0.272, "lr": 1.7686321688775772e-05, "epoch": 4.084462151394423, "percentage": 58.35, "elapsed_time": "8:52:14", "remaining_time": "6:19:56"} +{"current_steps": 2570, "total_steps": 4396, "loss": 0.2649, "lr": 1.7607459946822717e-05, "epoch": 4.092430278884462, "percentage": 58.46, "elapsed_time": "8:53:26", "remaining_time": "6:19:00"} +{"current_steps": 2575, "total_steps": 4396, "loss": 0.2528, "lr": 1.7528635926098715e-05, "epoch": 4.100398406374502, "percentage": 58.58, "elapsed_time": "8:54:26", "remaining_time": "6:17:57"} +{"current_steps": 2580, "total_steps": 4396, "loss": 0.2849, "lr": 1.7449850869357846e-05, "epoch": 4.108366533864542, "percentage": 58.69, "elapsed_time": "8:55:30", "remaining_time": "6:16:55"} +{"current_steps": 2585, "total_steps": 4396, "loss": 0.2682, "lr": 1.7371106018739886e-05, "epoch": 4.116334661354582, "percentage": 58.8, "elapsed_time": "8:56:32", "remaining_time": "6:15:53"} +{"current_steps": 2590, "total_steps": 4396, "loss": 0.2701, "lr": 1.729240261575072e-05, "epoch": 4.124302788844622, "percentage": 58.92, "elapsed_time": "8:57:37", "remaining_time": "6:14:52"} +{"current_steps": 2595, "total_steps": 4396, "loss": 0.2759, "lr": 1.7213741901242747e-05, "epoch": 4.132270916334662, "percentage": 59.03, "elapsed_time": "8:58:37", "remaining_time": "6:13:49"} +{"current_steps": 2600, "total_steps": 4396, "loss": 0.2743, "lr": 1.713512511539536e-05, "epoch": 4.140239043824701, "percentage": 59.14, "elapsed_time": "8:59:40", "remaining_time": "6:12:47"} +{"current_steps": 2605, "total_steps": 4396, "loss": 0.2618, "lr": 1.705655349769534e-05, "epoch": 4.148207171314741, "percentage": 59.26, "elapsed_time": "9:00:44", "remaining_time": "6:11:46"} +{"current_steps": 2610, "total_steps": 4396, "loss": 0.2636, "lr": 1.6978028286917336e-05, "epoch": 4.156175298804781, "percentage": 59.37, "elapsed_time": "9:01:42", "remaining_time": "6:10:41"} +{"current_steps": 2615, "total_steps": 4396, "loss": 0.2734, "lr": 1.6899550721104362e-05, "epoch": 4.164143426294821, "percentage": 59.49, "elapsed_time": "9:02:38", "remaining_time": "6:09:34"} +{"current_steps": 2620, "total_steps": 4396, "loss": 0.2724, "lr": 1.6821122037548223e-05, "epoch": 4.172111553784861, "percentage": 59.6, "elapsed_time": "9:03:41", "remaining_time": "6:08:33"} +{"current_steps": 2625, "total_steps": 4396, "loss": 0.2678, "lr": 1.6742743472770063e-05, "epoch": 4.180079681274901, "percentage": 59.71, "elapsed_time": "9:04:47", "remaining_time": "6:07:32"} +{"current_steps": 2630, "total_steps": 4396, "loss": 0.275, "lr": 1.666441626250083e-05, "epoch": 4.18804780876494, "percentage": 59.83, "elapsed_time": "9:05:52", "remaining_time": "6:06:32"} +{"current_steps": 2635, "total_steps": 4396, "loss": 0.2617, "lr": 1.65861416416618e-05, "epoch": 4.19601593625498, "percentage": 59.94, "elapsed_time": "9:06:53", "remaining_time": "6:05:29"} +{"current_steps": 2640, "total_steps": 4396, "loss": 0.2719, "lr": 1.6507920844345135e-05, "epoch": 4.20398406374502, "percentage": 60.05, "elapsed_time": "9:07:56", "remaining_time": "6:04:27"} +{"current_steps": 2645, "total_steps": 4396, "loss": 0.2575, "lr": 1.642975510379439e-05, "epoch": 4.21195219123506, "percentage": 60.17, "elapsed_time": "9:09:04", "remaining_time": "6:03:29"} +{"current_steps": 2650, "total_steps": 4396, "loss": 0.2687, "lr": 1.6351645652385095e-05, "epoch": 4.2199203187251, "percentage": 60.28, "elapsed_time": "9:10:06", "remaining_time": "6:02:27"} +{"current_steps": 2655, "total_steps": 4396, "loss": 0.2649, "lr": 1.6273593721605295e-05, "epoch": 4.22788844621514, "percentage": 60.4, "elapsed_time": "9:11:07", "remaining_time": "6:01:23"} +{"current_steps": 2660, "total_steps": 4396, "loss": 0.2731, "lr": 1.6195600542036188e-05, "epoch": 4.235856573705179, "percentage": 60.51, "elapsed_time": "9:12:06", "remaining_time": "6:00:19"} +{"current_steps": 2665, "total_steps": 4396, "loss": 0.256, "lr": 1.6117667343332658e-05, "epoch": 4.243824701195219, "percentage": 60.62, "elapsed_time": "9:13:05", "remaining_time": "5:59:15"} +{"current_steps": 2670, "total_steps": 4396, "loss": 0.2617, "lr": 1.6039795354203925e-05, "epoch": 4.251792828685259, "percentage": 60.74, "elapsed_time": "9:14:04", "remaining_time": "5:58:10"} +{"current_steps": 2675, "total_steps": 4396, "loss": 0.2665, "lr": 1.5961985802394195e-05, "epoch": 4.259760956175299, "percentage": 60.85, "elapsed_time": "9:15:09", "remaining_time": "5:57:10"} +{"current_steps": 2680, "total_steps": 4396, "loss": 0.264, "lr": 1.5884239914663232e-05, "epoch": 4.267729083665339, "percentage": 60.96, "elapsed_time": "9:16:02", "remaining_time": "5:56:02"} +{"current_steps": 2685, "total_steps": 4396, "loss": 0.269, "lr": 1.58065589167671e-05, "epoch": 4.275697211155379, "percentage": 61.08, "elapsed_time": "9:17:08", "remaining_time": "5:55:01"} +{"current_steps": 2690, "total_steps": 4396, "loss": 0.2651, "lr": 1.572894403343878e-05, "epoch": 4.283665338645418, "percentage": 61.19, "elapsed_time": "9:18:07", "remaining_time": "5:53:57"} +{"current_steps": 2695, "total_steps": 4396, "loss": 0.2786, "lr": 1.5651396488368863e-05, "epoch": 4.291633466135458, "percentage": 61.31, "elapsed_time": "9:19:05", "remaining_time": "5:52:53"} +{"current_steps": 2700, "total_steps": 4396, "loss": 0.2549, "lr": 1.5573917504186306e-05, "epoch": 4.299601593625498, "percentage": 61.42, "elapsed_time": "9:20:02", "remaining_time": "5:51:47"} +{"current_steps": 2705, "total_steps": 4396, "loss": 0.2724, "lr": 1.5496508302439096e-05, "epoch": 4.307569721115538, "percentage": 61.53, "elapsed_time": "9:21:03", "remaining_time": "5:50:44"} +{"current_steps": 2710, "total_steps": 4396, "loss": 0.2677, "lr": 1.5419170103575037e-05, "epoch": 4.315537848605578, "percentage": 61.65, "elapsed_time": "9:22:05", "remaining_time": "5:49:41"} +{"current_steps": 2715, "total_steps": 4396, "loss": 0.2686, "lr": 1.534190412692246e-05, "epoch": 4.323505976095618, "percentage": 61.76, "elapsed_time": "9:23:17", "remaining_time": "5:48:45"} +{"current_steps": 2720, "total_steps": 4396, "loss": 0.2629, "lr": 1.5264711590671067e-05, "epoch": 4.331474103585657, "percentage": 61.87, "elapsed_time": "9:24:20", "remaining_time": "5:47:44"} +{"current_steps": 2725, "total_steps": 4396, "loss": 0.2698, "lr": 1.5187593711852653e-05, "epoch": 4.339442231075697, "percentage": 61.99, "elapsed_time": "9:25:23", "remaining_time": "5:46:42"} +{"current_steps": 2730, "total_steps": 4396, "loss": 0.2666, "lr": 1.5110551706321952e-05, "epoch": 4.347410358565737, "percentage": 62.1, "elapsed_time": "9:26:26", "remaining_time": "5:45:40"} +{"current_steps": 2735, "total_steps": 4396, "loss": 0.2695, "lr": 1.5033586788737492e-05, "epoch": 4.355378486055777, "percentage": 62.22, "elapsed_time": "9:27:26", "remaining_time": "5:44:36"} +{"current_steps": 2740, "total_steps": 4396, "loss": 0.267, "lr": 1.495670017254238e-05, "epoch": 4.363346613545817, "percentage": 62.33, "elapsed_time": "9:28:25", "remaining_time": "5:43:32"} +{"current_steps": 2745, "total_steps": 4396, "loss": 0.2669, "lr": 1.487989306994525e-05, "epoch": 4.371314741035857, "percentage": 62.44, "elapsed_time": "9:29:25", "remaining_time": "5:42:28"} +{"current_steps": 2750, "total_steps": 4396, "loss": 0.2706, "lr": 1.480316669190108e-05, "epoch": 4.379282868525896, "percentage": 62.56, "elapsed_time": "9:30:23", "remaining_time": "5:41:24"} +{"current_steps": 2755, "total_steps": 4396, "loss": 0.2623, "lr": 1.4726522248092132e-05, "epoch": 4.387250996015936, "percentage": 62.67, "elapsed_time": "9:31:23", "remaining_time": "5:40:20"} +{"current_steps": 2760, "total_steps": 4396, "loss": 0.2608, "lr": 1.4649960946908897e-05, "epoch": 4.395219123505976, "percentage": 62.78, "elapsed_time": "9:32:23", "remaining_time": "5:39:17"} +{"current_steps": 2765, "total_steps": 4396, "loss": 0.2626, "lr": 1.4573483995430992e-05, "epoch": 4.403187250996016, "percentage": 62.9, "elapsed_time": "9:33:19", "remaining_time": "5:38:11"} +{"current_steps": 2770, "total_steps": 4396, "loss": 0.2754, "lr": 1.4497092599408207e-05, "epoch": 4.411155378486056, "percentage": 63.01, "elapsed_time": "9:34:19", "remaining_time": "5:37:07"} +{"current_steps": 2775, "total_steps": 4396, "loss": 0.2608, "lr": 1.4420787963241399e-05, "epoch": 4.419123505976096, "percentage": 63.13, "elapsed_time": "9:35:21", "remaining_time": "5:36:05"} +{"current_steps": 2780, "total_steps": 4396, "loss": 0.2624, "lr": 1.4344571289963592e-05, "epoch": 4.427091633466135, "percentage": 63.24, "elapsed_time": "9:36:22", "remaining_time": "5:35:02"} +{"current_steps": 2785, "total_steps": 4396, "loss": 0.268, "lr": 1.426844378122095e-05, "epoch": 4.435059760956175, "percentage": 63.35, "elapsed_time": "9:37:24", "remaining_time": "5:34:00"} +{"current_steps": 2790, "total_steps": 4396, "loss": 0.2684, "lr": 1.4192406637253853e-05, "epoch": 4.443027888446215, "percentage": 63.47, "elapsed_time": "9:38:23", "remaining_time": "5:32:56"} +{"current_steps": 2795, "total_steps": 4396, "loss": 0.2685, "lr": 1.4116461056877986e-05, "epoch": 4.450996015936255, "percentage": 63.58, "elapsed_time": "9:39:23", "remaining_time": "5:31:52"} +{"current_steps": 2800, "total_steps": 4396, "loss": 0.2795, "lr": 1.4040608237465412e-05, "epoch": 4.458964143426295, "percentage": 63.69, "elapsed_time": "9:40:25", "remaining_time": "5:30:50"} +{"current_steps": 2805, "total_steps": 4396, "loss": 0.274, "lr": 1.3964849374925712e-05, "epoch": 4.466932270916335, "percentage": 63.81, "elapsed_time": "9:41:30", "remaining_time": "5:29:49"} +{"current_steps": 2810, "total_steps": 4396, "loss": 0.2773, "lr": 1.3889185663687133e-05, "epoch": 4.474900398406374, "percentage": 63.92, "elapsed_time": "9:42:29", "remaining_time": "5:28:45"} +{"current_steps": 2815, "total_steps": 4396, "loss": 0.2793, "lr": 1.3813618296677734e-05, "epoch": 4.482868525896414, "percentage": 64.04, "elapsed_time": "9:43:27", "remaining_time": "5:27:41"} +{"current_steps": 2820, "total_steps": 4396, "loss": 0.2735, "lr": 1.3738148465306607e-05, "epoch": 4.490836653386454, "percentage": 64.15, "elapsed_time": "9:44:23", "remaining_time": "5:26:35"} +{"current_steps": 2825, "total_steps": 4396, "loss": 0.2569, "lr": 1.3662777359445065e-05, "epoch": 4.498804780876494, "percentage": 64.26, "elapsed_time": "9:45:28", "remaining_time": "5:25:35"} +{"current_steps": 2830, "total_steps": 4396, "loss": 0.2638, "lr": 1.3587506167407922e-05, "epoch": 4.506772908366534, "percentage": 64.38, "elapsed_time": "9:46:23", "remaining_time": "5:24:29"} +{"current_steps": 2835, "total_steps": 4396, "loss": 0.2678, "lr": 1.3512336075934704e-05, "epoch": 4.514741035856574, "percentage": 64.49, "elapsed_time": "9:47:18", "remaining_time": "5:23:23"} +{"current_steps": 2840, "total_steps": 4396, "loss": 0.2673, "lr": 1.3437268270170969e-05, "epoch": 4.522709163346613, "percentage": 64.6, "elapsed_time": "9:48:20", "remaining_time": "5:22:20"} +{"current_steps": 2845, "total_steps": 4396, "loss": 0.2857, "lr": 1.3362303933649648e-05, "epoch": 4.530677290836653, "percentage": 64.72, "elapsed_time": "9:49:27", "remaining_time": "5:21:21"} +{"current_steps": 2850, "total_steps": 4396, "loss": 0.27, "lr": 1.328744424827232e-05, "epoch": 4.538645418326693, "percentage": 64.83, "elapsed_time": "9:50:35", "remaining_time": "5:20:21"} +{"current_steps": 2855, "total_steps": 4396, "loss": 0.264, "lr": 1.3212690394290646e-05, "epoch": 4.546613545816733, "percentage": 64.95, "elapsed_time": "9:51:32", "remaining_time": "5:19:17"} +{"current_steps": 2860, "total_steps": 4396, "loss": 0.2705, "lr": 1.3138043550287707e-05, "epoch": 4.554581673306773, "percentage": 65.06, "elapsed_time": "9:52:37", "remaining_time": "5:18:16"} +{"current_steps": 2865, "total_steps": 4396, "loss": 0.2745, "lr": 1.3063504893159458e-05, "epoch": 4.562549800796813, "percentage": 65.17, "elapsed_time": "9:53:39", "remaining_time": "5:17:14"} +{"current_steps": 2870, "total_steps": 4396, "loss": 0.2701, "lr": 1.2989075598096148e-05, "epoch": 4.570517928286852, "percentage": 65.29, "elapsed_time": "9:54:43", "remaining_time": "5:16:13"} +{"current_steps": 2875, "total_steps": 4396, "loss": 0.2681, "lr": 1.2914756838563816e-05, "epoch": 4.578486055776892, "percentage": 65.4, "elapsed_time": "9:55:51", "remaining_time": "5:15:14"} +{"current_steps": 2880, "total_steps": 4396, "loss": 0.2693, "lr": 1.2840549786285776e-05, "epoch": 4.586454183266932, "percentage": 65.51, "elapsed_time": "9:56:58", "remaining_time": "5:14:14"} +{"current_steps": 2885, "total_steps": 4396, "loss": 0.2695, "lr": 1.2766455611224127e-05, "epoch": 4.594422310756972, "percentage": 65.63, "elapsed_time": "9:57:55", "remaining_time": "5:13:09"} +{"current_steps": 2890, "total_steps": 4396, "loss": 0.2657, "lr": 1.2692475481561357e-05, "epoch": 4.602390438247012, "percentage": 65.74, "elapsed_time": "9:58:52", "remaining_time": "5:12:04"} +{"current_steps": 2895, "total_steps": 4396, "loss": 0.2697, "lr": 1.2618610563681863e-05, "epoch": 4.610358565737052, "percentage": 65.86, "elapsed_time": "9:59:55", "remaining_time": "5:11:03"} +{"current_steps": 2900, "total_steps": 4396, "loss": 0.2518, "lr": 1.2544862022153601e-05, "epoch": 4.618326693227091, "percentage": 65.97, "elapsed_time": "10:00:54", "remaining_time": "5:09:59"} +{"current_steps": 2905, "total_steps": 4396, "loss": 0.2678, "lr": 1.2471231019709732e-05, "epoch": 4.626294820717131, "percentage": 66.08, "elapsed_time": "10:01:53", "remaining_time": "5:08:55"} +{"current_steps": 2910, "total_steps": 4396, "loss": 0.267, "lr": 1.2397718717230243e-05, "epoch": 4.634262948207171, "percentage": 66.2, "elapsed_time": "10:02:49", "remaining_time": "5:07:50"} +{"current_steps": 2915, "total_steps": 4396, "loss": 0.274, "lr": 1.2324326273723707e-05, "epoch": 4.642231075697211, "percentage": 66.31, "elapsed_time": "10:03:52", "remaining_time": "5:06:48"} +{"current_steps": 2920, "total_steps": 4396, "loss": 0.2648, "lr": 1.225105484630896e-05, "epoch": 4.650199203187251, "percentage": 66.42, "elapsed_time": "10:04:55", "remaining_time": "5:05:46"} +{"current_steps": 2925, "total_steps": 4396, "loss": 0.2708, "lr": 1.2177905590196884e-05, "epoch": 4.658167330677291, "percentage": 66.54, "elapsed_time": "10:05:54", "remaining_time": "5:04:43"} +{"current_steps": 2930, "total_steps": 4396, "loss": 0.2828, "lr": 1.2104879658672175e-05, "epoch": 4.66613545816733, "percentage": 66.65, "elapsed_time": "10:06:54", "remaining_time": "5:03:39"} +{"current_steps": 2935, "total_steps": 4396, "loss": 0.2533, "lr": 1.2031978203075172e-05, "epoch": 4.67410358565737, "percentage": 66.77, "elapsed_time": "10:07:53", "remaining_time": "5:02:36"} +{"current_steps": 2940, "total_steps": 4396, "loss": 0.2647, "lr": 1.1959202372783728e-05, "epoch": 4.68207171314741, "percentage": 66.88, "elapsed_time": "10:08:55", "remaining_time": "5:01:33"} +{"current_steps": 2945, "total_steps": 4396, "loss": 0.2607, "lr": 1.188655331519502e-05, "epoch": 4.69003984063745, "percentage": 66.99, "elapsed_time": "10:10:01", "remaining_time": "5:00:33"} +{"current_steps": 2950, "total_steps": 4396, "loss": 0.2717, "lr": 1.1814032175707556e-05, "epoch": 4.69800796812749, "percentage": 67.11, "elapsed_time": "10:11:05", "remaining_time": "4:59:32"} +{"current_steps": 2955, "total_steps": 4396, "loss": 0.2671, "lr": 1.1741640097703018e-05, "epoch": 4.70597609561753, "percentage": 67.22, "elapsed_time": "10:12:02", "remaining_time": "4:58:27"} +{"current_steps": 2960, "total_steps": 4396, "loss": 0.27, "lr": 1.1669378222528303e-05, "epoch": 4.713944223107569, "percentage": 67.33, "elapsed_time": "10:13:02", "remaining_time": "4:57:24"} +{"current_steps": 2965, "total_steps": 4396, "loss": 0.2791, "lr": 1.1597247689477502e-05, "epoch": 4.721912350597609, "percentage": 67.45, "elapsed_time": "10:14:07", "remaining_time": "4:56:23"} +{"current_steps": 2970, "total_steps": 4396, "loss": 0.266, "lr": 1.1525249635773935e-05, "epoch": 4.729880478087649, "percentage": 67.56, "elapsed_time": "10:15:12", "remaining_time": "4:55:22"} +{"current_steps": 2975, "total_steps": 4396, "loss": 0.265, "lr": 1.1453385196552247e-05, "epoch": 4.737848605577689, "percentage": 67.68, "elapsed_time": "10:16:18", "remaining_time": "4:54:22"} +{"current_steps": 2980, "total_steps": 4396, "loss": 0.2694, "lr": 1.1381655504840468e-05, "epoch": 4.745816733067729, "percentage": 67.79, "elapsed_time": "10:17:22", "remaining_time": "4:53:21"} +{"current_steps": 2985, "total_steps": 4396, "loss": 0.2606, "lr": 1.1310061691542198e-05, "epoch": 4.753784860557769, "percentage": 67.9, "elapsed_time": "10:18:25", "remaining_time": "4:52:19"} +{"current_steps": 2990, "total_steps": 4396, "loss": 0.2655, "lr": 1.1238604885418734e-05, "epoch": 4.761752988047808, "percentage": 68.02, "elapsed_time": "10:19:25", "remaining_time": "4:51:16"} +{"current_steps": 2995, "total_steps": 4396, "loss": 0.2782, "lr": 1.1167286213071293e-05, "epoch": 4.769721115537848, "percentage": 68.13, "elapsed_time": "10:20:28", "remaining_time": "4:50:14"} +{"current_steps": 3000, "total_steps": 4396, "loss": 0.2693, "lr": 1.109610679892327e-05, "epoch": 4.777689243027888, "percentage": 68.24, "elapsed_time": "10:21:25", "remaining_time": "4:49:10"} +{"current_steps": 3005, "total_steps": 4396, "loss": 0.2721, "lr": 1.102506776520246e-05, "epoch": 4.785657370517928, "percentage": 68.36, "elapsed_time": "10:23:04", "remaining_time": "4:48:24"} +{"current_steps": 3010, "total_steps": 4396, "loss": 0.2887, "lr": 1.0954170231923422e-05, "epoch": 4.793625498007968, "percentage": 68.47, "elapsed_time": "10:24:05", "remaining_time": "4:47:22"} +{"current_steps": 3015, "total_steps": 4396, "loss": 0.2562, "lr": 1.0883415316869775e-05, "epoch": 4.801593625498008, "percentage": 68.59, "elapsed_time": "10:25:08", "remaining_time": "4:46:20"} +{"current_steps": 3020, "total_steps": 4396, "loss": 0.2547, "lr": 1.0812804135576588e-05, "epoch": 4.8095617529880474, "percentage": 68.7, "elapsed_time": "10:26:09", "remaining_time": "4:45:17"} +{"current_steps": 3025, "total_steps": 4396, "loss": 0.2616, "lr": 1.0742337801312823e-05, "epoch": 4.817529880478087, "percentage": 68.81, "elapsed_time": "10:27:07", "remaining_time": "4:44:13"} +{"current_steps": 3030, "total_steps": 4396, "loss": 0.2517, "lr": 1.0672017425063727e-05, "epoch": 4.825498007968127, "percentage": 68.93, "elapsed_time": "10:28:12", "remaining_time": "4:43:12"} +{"current_steps": 3035, "total_steps": 4396, "loss": 0.2756, "lr": 1.0601844115513376e-05, "epoch": 4.833466135458167, "percentage": 69.04, "elapsed_time": "10:29:19", "remaining_time": "4:42:12"} +{"current_steps": 3040, "total_steps": 4396, "loss": 0.2652, "lr": 1.0531818979027136e-05, "epoch": 4.841434262948207, "percentage": 69.15, "elapsed_time": "10:30:25", "remaining_time": "4:41:12"} +{"current_steps": 3045, "total_steps": 4396, "loss": 0.2753, "lr": 1.0461943119634257e-05, "epoch": 4.849402390438247, "percentage": 69.27, "elapsed_time": "10:31:19", "remaining_time": "4:40:06"} +{"current_steps": 3050, "total_steps": 4396, "loss": 0.2781, "lr": 1.0392217639010478e-05, "epoch": 4.8573705179282864, "percentage": 69.38, "elapsed_time": "10:32:17", "remaining_time": "4:39:02"} +{"current_steps": 3055, "total_steps": 4396, "loss": 0.2616, "lr": 1.0322643636460619e-05, "epoch": 4.865338645418326, "percentage": 69.49, "elapsed_time": "10:33:15", "remaining_time": "4:37:58"} +{"current_steps": 3060, "total_steps": 4396, "loss": 0.2755, "lr": 1.0253222208901263e-05, "epoch": 4.873306772908366, "percentage": 69.61, "elapsed_time": "10:34:12", "remaining_time": "4:36:53"} +{"current_steps": 3065, "total_steps": 4396, "loss": 0.2592, "lr": 1.0183954450843493e-05, "epoch": 4.881274900398406, "percentage": 69.72, "elapsed_time": "10:35:19", "remaining_time": "4:35:53"} +{"current_steps": 3070, "total_steps": 4396, "loss": 0.2789, "lr": 1.0114841454375592e-05, "epoch": 4.889243027888446, "percentage": 69.84, "elapsed_time": "10:36:19", "remaining_time": "4:34:50"} +{"current_steps": 3075, "total_steps": 4396, "loss": 0.2766, "lr": 1.0045884309145846e-05, "epoch": 4.897211155378486, "percentage": 69.95, "elapsed_time": "10:37:20", "remaining_time": "4:33:47"} +{"current_steps": 3080, "total_steps": 4396, "loss": 0.2589, "lr": 9.97708410234535e-06, "epoch": 4.9051792828685254, "percentage": 70.06, "elapsed_time": "10:38:33", "remaining_time": "4:32:50"} +{"current_steps": 3085, "total_steps": 4396, "loss": 0.2703, "lr": 9.90844191869091e-06, "epoch": 4.913147410358565, "percentage": 70.18, "elapsed_time": "10:39:34", "remaining_time": "4:31:47"} +{"current_steps": 3090, "total_steps": 4396, "loss": 0.2787, "lr": 9.839958840407873e-06, "epoch": 4.921115537848605, "percentage": 70.29, "elapsed_time": "10:40:36", "remaining_time": "4:30:45"} +{"current_steps": 3095, "total_steps": 4396, "loss": 0.2743, "lr": 9.771635947213135e-06, "epoch": 4.929083665338645, "percentage": 70.4, "elapsed_time": "10:41:42", "remaining_time": "4:29:44"} +{"current_steps": 3100, "total_steps": 4396, "loss": 0.2573, "lr": 9.703474316298066e-06, "epoch": 4.937051792828685, "percentage": 70.52, "elapsed_time": "10:42:49", "remaining_time": "4:28:44"} +{"current_steps": 3105, "total_steps": 4396, "loss": 0.281, "lr": 9.635475022311528e-06, "epoch": 4.945019920318725, "percentage": 70.63, "elapsed_time": "10:43:52", "remaining_time": "4:27:42"} +{"current_steps": 3110, "total_steps": 4396, "loss": 0.2698, "lr": 9.567639137342997e-06, "epoch": 4.9529880478087644, "percentage": 70.75, "elapsed_time": "10:44:58", "remaining_time": "4:26:41"} +{"current_steps": 3115, "total_steps": 4396, "loss": 0.2719, "lr": 9.499967730905557e-06, "epoch": 4.960956175298804, "percentage": 70.86, "elapsed_time": "10:45:54", "remaining_time": "4:25:37"} +{"current_steps": 3120, "total_steps": 4396, "loss": 0.2641, "lr": 9.43246186991914e-06, "epoch": 4.968924302788845, "percentage": 70.97, "elapsed_time": "10:47:01", "remaining_time": "4:24:36"} +{"current_steps": 3125, "total_steps": 4396, "loss": 0.2658, "lr": 9.365122618693623e-06, "epoch": 4.976892430278885, "percentage": 71.09, "elapsed_time": "10:48:03", "remaining_time": "4:23:34"} +{"current_steps": 3130, "total_steps": 4396, "loss": 0.2744, "lr": 9.29795103891211e-06, "epoch": 4.984860557768925, "percentage": 71.2, "elapsed_time": "10:49:09", "remaining_time": "4:22:34"} +{"current_steps": 3135, "total_steps": 4396, "loss": 0.2593, "lr": 9.230948189614144e-06, "epoch": 4.9928286852589645, "percentage": 71.31, "elapsed_time": "10:50:11", "remaining_time": "4:21:31"} +{"current_steps": 3140, "total_steps": 4396, "loss": 0.2681, "lr": 9.164115127179038e-06, "epoch": 5.0, "percentage": 71.43, "elapsed_time": "10:51:04", "remaining_time": "4:20:25"} +{"current_steps": 3145, "total_steps": 4396, "loss": 0.2516, "lr": 9.09745290530923e-06, "epoch": 5.00796812749004, "percentage": 71.54, "elapsed_time": "10:52:11", "remaining_time": "4:19:25"} +{"current_steps": 3150, "total_steps": 4396, "loss": 0.2533, "lr": 9.030962575013622e-06, "epoch": 5.01593625498008, "percentage": 71.66, "elapsed_time": "10:53:13", "remaining_time": "4:18:23"} +{"current_steps": 3155, "total_steps": 4396, "loss": 0.2709, "lr": 8.964645184591082e-06, "epoch": 5.0239043824701195, "percentage": 71.77, "elapsed_time": "10:54:11", "remaining_time": "4:17:19"} +{"current_steps": 3160, "total_steps": 4396, "loss": 0.253, "lr": 8.898501779613842e-06, "epoch": 5.031872509960159, "percentage": 71.88, "elapsed_time": "10:55:19", "remaining_time": "4:16:19"} +{"current_steps": 3165, "total_steps": 4396, "loss": 0.2524, "lr": 8.832533402911056e-06, "epoch": 5.039840637450199, "percentage": 72.0, "elapsed_time": "10:56:22", "remaining_time": "4:15:17"} +{"current_steps": 3170, "total_steps": 4396, "loss": 0.2507, "lr": 8.766741094552368e-06, "epoch": 5.047808764940239, "percentage": 72.11, "elapsed_time": "10:57:23", "remaining_time": "4:14:14"} +{"current_steps": 3175, "total_steps": 4396, "loss": 0.2594, "lr": 8.70112589183147e-06, "epoch": 5.055776892430279, "percentage": 72.22, "elapsed_time": "10:58:29", "remaining_time": "4:13:14"} +{"current_steps": 3180, "total_steps": 4396, "loss": 0.2596, "lr": 8.63568882924979e-06, "epoch": 5.063745019920319, "percentage": 72.34, "elapsed_time": "10:59:31", "remaining_time": "4:12:11"} +{"current_steps": 3185, "total_steps": 4396, "loss": 0.2594, "lr": 8.570430938500155e-06, "epoch": 5.0717131474103585, "percentage": 72.45, "elapsed_time": "11:00:34", "remaining_time": "4:11:09"} +{"current_steps": 3190, "total_steps": 4396, "loss": 0.2487, "lr": 8.50535324845055e-06, "epoch": 5.079681274900398, "percentage": 72.57, "elapsed_time": "11:01:37", "remaining_time": "4:10:07"} +{"current_steps": 3195, "total_steps": 4396, "loss": 0.2654, "lr": 8.44045678512787e-06, "epoch": 5.087649402390438, "percentage": 72.68, "elapsed_time": "11:02:42", "remaining_time": "4:09:06"} +{"current_steps": 3200, "total_steps": 4396, "loss": 0.2634, "lr": 8.375742571701755e-06, "epoch": 5.095617529880478, "percentage": 72.79, "elapsed_time": "11:03:45", "remaining_time": "4:08:04"} +{"current_steps": 3205, "total_steps": 4396, "loss": 0.256, "lr": 8.311211628468477e-06, "epoch": 5.103585657370518, "percentage": 72.91, "elapsed_time": "11:04:51", "remaining_time": "4:07:03"} +{"current_steps": 3210, "total_steps": 4396, "loss": 0.261, "lr": 8.24686497283481e-06, "epoch": 5.111553784860558, "percentage": 73.02, "elapsed_time": "11:05:55", "remaining_time": "4:06:02"} +{"current_steps": 3215, "total_steps": 4396, "loss": 0.2704, "lr": 8.182703619302044e-06, "epoch": 5.1195219123505975, "percentage": 73.13, "elapsed_time": "11:07:02", "remaining_time": "4:05:01"} +{"current_steps": 3220, "total_steps": 4396, "loss": 0.2675, "lr": 8.118728579449937e-06, "epoch": 5.127490039840637, "percentage": 73.25, "elapsed_time": "11:08:00", "remaining_time": "4:03:57"} +{"current_steps": 3225, "total_steps": 4396, "loss": 0.2519, "lr": 8.054940861920797e-06, "epoch": 5.135458167330677, "percentage": 73.36, "elapsed_time": "11:09:05", "remaining_time": "4:02:56"} +{"current_steps": 3230, "total_steps": 4396, "loss": 0.2599, "lr": 7.991341472403593e-06, "epoch": 5.143426294820717, "percentage": 73.48, "elapsed_time": "11:10:06", "remaining_time": "4:01:54"} +{"current_steps": 3235, "total_steps": 4396, "loss": 0.2506, "lr": 7.927931413618049e-06, "epoch": 5.151394422310757, "percentage": 73.59, "elapsed_time": "11:11:09", "remaining_time": "4:00:52"} +{"current_steps": 3240, "total_steps": 4396, "loss": 0.2629, "lr": 7.864711685298894e-06, "epoch": 5.159362549800797, "percentage": 73.7, "elapsed_time": "11:12:07", "remaining_time": "3:59:48"} +{"current_steps": 3245, "total_steps": 4396, "loss": 0.2526, "lr": 7.80168328418005e-06, "epoch": 5.1673306772908365, "percentage": 73.82, "elapsed_time": "11:13:05", "remaining_time": "3:58:44"} +{"current_steps": 3250, "total_steps": 4396, "loss": 0.2422, "lr": 7.738847203978947e-06, "epoch": 5.175298804780876, "percentage": 73.93, "elapsed_time": "11:14:01", "remaining_time": "3:57:40"} +{"current_steps": 3255, "total_steps": 4396, "loss": 0.2598, "lr": 7.676204435380858e-06, "epoch": 5.183266932270916, "percentage": 74.04, "elapsed_time": "11:15:02", "remaining_time": "3:56:37"} +{"current_steps": 3260, "total_steps": 4396, "loss": 0.2628, "lr": 7.613755966023249e-06, "epoch": 5.191235059760956, "percentage": 74.16, "elapsed_time": "11:16:00", "remaining_time": "3:55:33"} +{"current_steps": 3265, "total_steps": 4396, "loss": 0.253, "lr": 7.551502780480251e-06, "epoch": 5.199203187250996, "percentage": 74.27, "elapsed_time": "11:16:56", "remaining_time": "3:54:29"} +{"current_steps": 3270, "total_steps": 4396, "loss": 0.2529, "lr": 7.4894458602470886e-06, "epoch": 5.207171314741036, "percentage": 74.39, "elapsed_time": "11:17:51", "remaining_time": "3:53:24"} +{"current_steps": 3275, "total_steps": 4396, "loss": 0.2668, "lr": 7.427586183724662e-06, "epoch": 5.2151394422310755, "percentage": 74.5, "elapsed_time": "11:18:52", "remaining_time": "3:52:22"} +{"current_steps": 3280, "total_steps": 4396, "loss": 0.2601, "lr": 7.365924726204063e-06, "epoch": 5.223107569721115, "percentage": 74.61, "elapsed_time": "11:19:55", "remaining_time": "3:51:20"} +{"current_steps": 3285, "total_steps": 4396, "loss": 0.256, "lr": 7.3044624598512406e-06, "epoch": 5.231075697211155, "percentage": 74.73, "elapsed_time": "11:21:00", "remaining_time": "3:50:19"} +{"current_steps": 3290, "total_steps": 4396, "loss": 0.2601, "lr": 7.243200353691653e-06, "epoch": 5.239043824701195, "percentage": 74.84, "elapsed_time": "11:21:59", "remaining_time": "3:49:16"} +{"current_steps": 3295, "total_steps": 4396, "loss": 0.2648, "lr": 7.18213937359499e-06, "epoch": 5.247011952191235, "percentage": 74.95, "elapsed_time": "11:23:02", "remaining_time": "3:48:13"} +{"current_steps": 3300, "total_steps": 4396, "loss": 0.2584, "lr": 7.121280482259976e-06, "epoch": 5.254980079681275, "percentage": 75.07, "elapsed_time": "11:24:10", "remaining_time": "3:47:13"} +{"current_steps": 3305, "total_steps": 4396, "loss": 0.2588, "lr": 7.060624639199138e-06, "epoch": 5.2629482071713145, "percentage": 75.18, "elapsed_time": "11:25:11", "remaining_time": "3:46:11"} +{"current_steps": 3310, "total_steps": 4396, "loss": 0.2562, "lr": 7.000172800723715e-06, "epoch": 5.270916334661354, "percentage": 75.3, "elapsed_time": "11:26:16", "remaining_time": "3:45:09"} +{"current_steps": 3315, "total_steps": 4396, "loss": 0.2541, "lr": 6.939925919928585e-06, "epoch": 5.278884462151394, "percentage": 75.41, "elapsed_time": "11:27:14", "remaining_time": "3:44:06"} +{"current_steps": 3320, "total_steps": 4396, "loss": 0.2469, "lr": 6.879884946677205e-06, "epoch": 5.286852589641434, "percentage": 75.52, "elapsed_time": "11:28:13", "remaining_time": "3:43:02"} +{"current_steps": 3325, "total_steps": 4396, "loss": 0.262, "lr": 6.8200508275866726e-06, "epoch": 5.294820717131474, "percentage": 75.64, "elapsed_time": "11:29:14", "remaining_time": "3:42:00"} +{"current_steps": 3330, "total_steps": 4396, "loss": 0.2589, "lr": 6.76042450601277e-06, "epoch": 5.302788844621514, "percentage": 75.75, "elapsed_time": "11:30:14", "remaining_time": "3:40:57"} +{"current_steps": 3335, "total_steps": 4396, "loss": 0.261, "lr": 6.701006922035125e-06, "epoch": 5.3107569721115535, "percentage": 75.86, "elapsed_time": "11:31:11", "remaining_time": "3:39:53"} +{"current_steps": 3340, "total_steps": 4396, "loss": 0.2547, "lr": 6.641799012442349e-06, "epoch": 5.318725099601593, "percentage": 75.98, "elapsed_time": "11:32:11", "remaining_time": "3:38:50"} +{"current_steps": 3345, "total_steps": 4396, "loss": 0.2505, "lr": 6.582801710717291e-06, "epoch": 5.326693227091633, "percentage": 76.09, "elapsed_time": "11:33:11", "remaining_time": "3:37:48"} +{"current_steps": 3350, "total_steps": 4396, "loss": 0.2572, "lr": 6.524015947022333e-06, "epoch": 5.334661354581673, "percentage": 76.21, "elapsed_time": "11:34:12", "remaining_time": "3:36:45"} +{"current_steps": 3355, "total_steps": 4396, "loss": 0.2555, "lr": 6.465442648184692e-06, "epoch": 5.342629482071713, "percentage": 76.32, "elapsed_time": "11:35:18", "remaining_time": "3:35:44"} +{"current_steps": 3360, "total_steps": 4396, "loss": 0.2548, "lr": 6.4070827376818424e-06, "epoch": 5.350597609561753, "percentage": 76.43, "elapsed_time": "11:36:20", "remaining_time": "3:34:42"} +{"current_steps": 3365, "total_steps": 4396, "loss": 0.2566, "lr": 6.348937135626922e-06, "epoch": 5.3585657370517925, "percentage": 76.55, "elapsed_time": "11:37:27", "remaining_time": "3:33:41"} +{"current_steps": 3370, "total_steps": 4396, "loss": 0.2511, "lr": 6.291006758754241e-06, "epoch": 5.366533864541832, "percentage": 76.66, "elapsed_time": "11:38:28", "remaining_time": "3:32:38"} +{"current_steps": 3375, "total_steps": 4396, "loss": 0.2492, "lr": 6.233292520404852e-06, "epoch": 5.374501992031872, "percentage": 76.77, "elapsed_time": "11:39:27", "remaining_time": "3:31:35"} +{"current_steps": 3380, "total_steps": 4396, "loss": 0.2552, "lr": 6.1757953305120975e-06, "epoch": 5.382470119521912, "percentage": 76.89, "elapsed_time": "11:40:27", "remaining_time": "3:30:33"} +{"current_steps": 3385, "total_steps": 4396, "loss": 0.2519, "lr": 6.118516095587321e-06, "epoch": 5.390438247011952, "percentage": 77.0, "elapsed_time": "11:41:28", "remaining_time": "3:29:30"} +{"current_steps": 3390, "total_steps": 4396, "loss": 0.2611, "lr": 6.0614557187055335e-06, "epoch": 5.398406374501992, "percentage": 77.12, "elapsed_time": "11:42:32", "remaining_time": "3:28:28"} +{"current_steps": 3395, "total_steps": 4396, "loss": 0.2629, "lr": 6.004615099491189e-06, "epoch": 5.4063745019920315, "percentage": 77.23, "elapsed_time": "11:43:30", "remaining_time": "3:27:25"} +{"current_steps": 3400, "total_steps": 4396, "loss": 0.2553, "lr": 5.947995134103999e-06, "epoch": 5.414342629482071, "percentage": 77.34, "elapsed_time": "11:44:29", "remaining_time": "3:26:22"} +{"current_steps": 3405, "total_steps": 4396, "loss": 0.2504, "lr": 5.891596715224821e-06, "epoch": 5.422310756972111, "percentage": 77.46, "elapsed_time": "11:45:29", "remaining_time": "3:25:19"} +{"current_steps": 3410, "total_steps": 4396, "loss": 0.2537, "lr": 5.835420732041557e-06, "epoch": 5.430278884462151, "percentage": 77.57, "elapsed_time": "11:46:31", "remaining_time": "3:24:17"} +{"current_steps": 3415, "total_steps": 4396, "loss": 0.2587, "lr": 5.779468070235139e-06, "epoch": 5.438247011952191, "percentage": 77.68, "elapsed_time": "11:47:35", "remaining_time": "3:23:15"} +{"current_steps": 3420, "total_steps": 4396, "loss": 0.2632, "lr": 5.7237396119655995e-06, "epoch": 5.446215139442231, "percentage": 77.8, "elapsed_time": "11:48:40", "remaining_time": "3:22:14"} +{"current_steps": 3425, "total_steps": 4396, "loss": 0.2558, "lr": 5.668236235858109e-06, "epoch": 5.4541832669322705, "percentage": 77.91, "elapsed_time": "11:49:44", "remaining_time": "3:21:12"} +{"current_steps": 3430, "total_steps": 4396, "loss": 0.2576, "lr": 5.61295881698916e-06, "epoch": 5.46215139442231, "percentage": 78.03, "elapsed_time": "11:50:46", "remaining_time": "3:20:10"} +{"current_steps": 3435, "total_steps": 4396, "loss": 0.2639, "lr": 5.557908226872775e-06, "epoch": 5.47011952191235, "percentage": 78.14, "elapsed_time": "11:51:45", "remaining_time": "3:19:07"} +{"current_steps": 3440, "total_steps": 4396, "loss": 0.2468, "lr": 5.503085333446727e-06, "epoch": 5.47808764940239, "percentage": 78.25, "elapsed_time": "11:52:45", "remaining_time": "3:18:04"} +{"current_steps": 3445, "total_steps": 4396, "loss": 0.2572, "lr": 5.448491001058909e-06, "epoch": 5.48605577689243, "percentage": 78.37, "elapsed_time": "11:53:46", "remaining_time": "3:17:02"} +{"current_steps": 3450, "total_steps": 4396, "loss": 0.2592, "lr": 5.394126090453655e-06, "epoch": 5.49402390438247, "percentage": 78.48, "elapsed_time": "11:54:52", "remaining_time": "3:16:01"} +{"current_steps": 3455, "total_steps": 4396, "loss": 0.2582, "lr": 5.3399914587582e-06, "epoch": 5.5019920318725095, "percentage": 78.59, "elapsed_time": "11:55:53", "remaining_time": "3:14:58"} +{"current_steps": 3460, "total_steps": 4396, "loss": 0.2649, "lr": 5.286087959469168e-06, "epoch": 5.509960159362549, "percentage": 78.71, "elapsed_time": "11:56:58", "remaining_time": "3:13:57"} +{"current_steps": 3465, "total_steps": 4396, "loss": 0.2556, "lr": 5.232416442439092e-06, "epoch": 5.517928286852589, "percentage": 78.82, "elapsed_time": "11:58:04", "remaining_time": "3:12:56"} +{"current_steps": 3470, "total_steps": 4396, "loss": 0.2533, "lr": 5.178977753863048e-06, "epoch": 5.525896414342629, "percentage": 78.94, "elapsed_time": "11:59:09", "remaining_time": "3:11:54"} +{"current_steps": 3475, "total_steps": 4396, "loss": 0.2545, "lr": 5.125772736265271e-06, "epoch": 5.533864541832669, "percentage": 79.05, "elapsed_time": "12:00:07", "remaining_time": "3:10:51"} +{"current_steps": 3480, "total_steps": 4396, "loss": 0.2543, "lr": 5.072802228485925e-06, "epoch": 5.541832669322709, "percentage": 79.16, "elapsed_time": "12:01:08", "remaining_time": "3:09:48"} +{"current_steps": 3485, "total_steps": 4396, "loss": 0.2639, "lr": 5.020067065667826e-06, "epoch": 5.5498007968127485, "percentage": 79.28, "elapsed_time": "12:02:18", "remaining_time": "3:08:48"} +{"current_steps": 3490, "total_steps": 4396, "loss": 0.262, "lr": 4.967568079243301e-06, "epoch": 5.557768924302788, "percentage": 79.39, "elapsed_time": "12:03:23", "remaining_time": "3:07:47"} +{"current_steps": 3495, "total_steps": 4396, "loss": 0.2691, "lr": 4.915306096921093e-06, "epoch": 5.565737051792829, "percentage": 79.5, "elapsed_time": "12:04:22", "remaining_time": "3:06:44"} +{"current_steps": 3500, "total_steps": 4396, "loss": 0.2521, "lr": 4.8632819426732705e-06, "epoch": 5.573705179282869, "percentage": 79.62, "elapsed_time": "12:05:20", "remaining_time": "3:05:41"} +{"current_steps": 3505, "total_steps": 4396, "loss": 0.2618, "lr": 4.811496436722285e-06, "epoch": 5.581673306772909, "percentage": 79.73, "elapsed_time": "12:06:22", "remaining_time": "3:04:39"} +{"current_steps": 3510, "total_steps": 4396, "loss": 0.2454, "lr": 4.7599503955279945e-06, "epoch": 5.589641434262949, "percentage": 79.85, "elapsed_time": "12:07:25", "remaining_time": "3:03:36"} +{"current_steps": 3515, "total_steps": 4396, "loss": 0.255, "lr": 4.708644631774819e-06, "epoch": 5.597609561752988, "percentage": 79.96, "elapsed_time": "12:08:25", "remaining_time": "3:02:34"} +{"current_steps": 3520, "total_steps": 4396, "loss": 0.2599, "lr": 4.657579954358924e-06, "epoch": 5.605577689243028, "percentage": 80.07, "elapsed_time": "12:09:27", "remaining_time": "3:01:32"} +{"current_steps": 3525, "total_steps": 4396, "loss": 0.2564, "lr": 4.606757168375451e-06, "epoch": 5.613545816733068, "percentage": 80.19, "elapsed_time": "12:10:26", "remaining_time": "3:00:29"} +{"current_steps": 3530, "total_steps": 4396, "loss": 0.2587, "lr": 4.556177075105857e-06, "epoch": 5.621513944223108, "percentage": 80.3, "elapsed_time": "12:11:28", "remaining_time": "2:59:26"} +{"current_steps": 3535, "total_steps": 4396, "loss": 0.2525, "lr": 4.505840472005236e-06, "epoch": 5.629482071713148, "percentage": 80.41, "elapsed_time": "12:12:31", "remaining_time": "2:58:25"} +{"current_steps": 3540, "total_steps": 4396, "loss": 0.2712, "lr": 4.455748152689796e-06, "epoch": 5.637450199203188, "percentage": 80.53, "elapsed_time": "12:13:37", "remaining_time": "2:57:23"} +{"current_steps": 3545, "total_steps": 4396, "loss": 0.2592, "lr": 4.405900906924303e-06, "epoch": 5.645418326693227, "percentage": 80.64, "elapsed_time": "12:14:39", "remaining_time": "2:56:21"} +{"current_steps": 3550, "total_steps": 4396, "loss": 0.2576, "lr": 4.35629952060965e-06, "epoch": 5.653386454183267, "percentage": 80.76, "elapsed_time": "12:15:39", "remaining_time": "2:55:18"} +{"current_steps": 3555, "total_steps": 4396, "loss": 0.2576, "lr": 4.306944775770479e-06, "epoch": 5.661354581673307, "percentage": 80.87, "elapsed_time": "12:16:35", "remaining_time": "2:54:15"} +{"current_steps": 3560, "total_steps": 4396, "loss": 0.2589, "lr": 4.2578374505428145e-06, "epoch": 5.669322709163347, "percentage": 80.98, "elapsed_time": "12:17:35", "remaining_time": "2:53:12"} +{"current_steps": 3565, "total_steps": 4396, "loss": 0.2556, "lr": 4.208978319161843e-06, "epoch": 5.677290836653387, "percentage": 81.1, "elapsed_time": "12:18:32", "remaining_time": "2:52:09"} +{"current_steps": 3570, "total_steps": 4396, "loss": 0.2572, "lr": 4.160368151949659e-06, "epoch": 5.685258964143427, "percentage": 81.21, "elapsed_time": "12:19:31", "remaining_time": "2:51:06"} +{"current_steps": 3575, "total_steps": 4396, "loss": 0.2632, "lr": 4.112007715303148e-06, "epoch": 5.693227091633466, "percentage": 81.32, "elapsed_time": "12:20:37", "remaining_time": "2:50:05"} +{"current_steps": 3580, "total_steps": 4396, "loss": 0.2552, "lr": 4.0638977716819105e-06, "epoch": 5.701195219123506, "percentage": 81.44, "elapsed_time": "12:21:36", "remaining_time": "2:49:02"} +{"current_steps": 3585, "total_steps": 4396, "loss": 0.2442, "lr": 4.016039079596204e-06, "epoch": 5.709163346613546, "percentage": 81.55, "elapsed_time": "12:22:33", "remaining_time": "2:47:58"} +{"current_steps": 3590, "total_steps": 4396, "loss": 0.2607, "lr": 3.968432393595034e-06, "epoch": 5.717131474103586, "percentage": 81.67, "elapsed_time": "12:23:38", "remaining_time": "2:46:57"} +{"current_steps": 3595, "total_steps": 4396, "loss": 0.2651, "lr": 3.921078464254204e-06, "epoch": 5.725099601593626, "percentage": 81.78, "elapsed_time": "12:24:43", "remaining_time": "2:45:55"} +{"current_steps": 3600, "total_steps": 4396, "loss": 0.2568, "lr": 3.873978038164537e-06, "epoch": 5.733067729083666, "percentage": 81.89, "elapsed_time": "12:25:47", "remaining_time": "2:44:54"} +{"current_steps": 3605, "total_steps": 4396, "loss": 0.2501, "lr": 3.8271318579200565e-06, "epoch": 5.741035856573705, "percentage": 82.01, "elapsed_time": "12:26:50", "remaining_time": "2:43:52"} +{"current_steps": 3610, "total_steps": 4396, "loss": 0.2651, "lr": 3.780540662106302e-06, "epoch": 5.749003984063745, "percentage": 82.12, "elapsed_time": "12:27:51", "remaining_time": "2:42:49"} +{"current_steps": 3615, "total_steps": 4396, "loss": 0.2631, "lr": 3.734205185288693e-06, "epoch": 5.756972111553785, "percentage": 82.23, "elapsed_time": "12:28:55", "remaining_time": "2:41:47"} +{"current_steps": 3620, "total_steps": 4396, "loss": 0.2483, "lr": 3.6881261580009242e-06, "epoch": 5.764940239043825, "percentage": 82.35, "elapsed_time": "12:29:56", "remaining_time": "2:40:45"} +{"current_steps": 3625, "total_steps": 4396, "loss": 0.271, "lr": 3.642304306733464e-06, "epoch": 5.772908366533865, "percentage": 82.46, "elapsed_time": "12:30:58", "remaining_time": "2:39:43"} +{"current_steps": 3630, "total_steps": 4396, "loss": 0.2613, "lr": 3.596740353922088e-06, "epoch": 5.780876494023905, "percentage": 82.58, "elapsed_time": "12:31:59", "remaining_time": "2:38:41"} +{"current_steps": 3635, "total_steps": 4396, "loss": 0.2499, "lr": 3.5514350179365176e-06, "epoch": 5.788844621513944, "percentage": 82.69, "elapsed_time": "12:33:03", "remaining_time": "2:37:39"} +{"current_steps": 3640, "total_steps": 4396, "loss": 0.2551, "lr": 3.5063890130690513e-06, "epoch": 5.796812749003984, "percentage": 82.8, "elapsed_time": "12:34:02", "remaining_time": "2:36:36"} +{"current_steps": 3645, "total_steps": 4396, "loss": 0.2455, "lr": 3.461603049523334e-06, "epoch": 5.804780876494024, "percentage": 82.92, "elapsed_time": "12:35:05", "remaining_time": "2:35:34"} +{"current_steps": 3650, "total_steps": 4396, "loss": 0.2585, "lr": 3.4170778334031595e-06, "epoch": 5.812749003984064, "percentage": 83.03, "elapsed_time": "12:36:06", "remaining_time": "2:34:32"} +{"current_steps": 3655, "total_steps": 4396, "loss": 0.2667, "lr": 3.3728140667013155e-06, "epoch": 5.820717131474104, "percentage": 83.14, "elapsed_time": "12:37:04", "remaining_time": "2:33:29"} +{"current_steps": 3660, "total_steps": 4396, "loss": 0.2593, "lr": 3.3288124472885318e-06, "epoch": 5.828685258964144, "percentage": 83.26, "elapsed_time": "12:38:09", "remaining_time": "2:32:27"} +{"current_steps": 3665, "total_steps": 4396, "loss": 0.2554, "lr": 3.2850736689024877e-06, "epoch": 5.836653386454183, "percentage": 83.37, "elapsed_time": "12:39:15", "remaining_time": "2:31:26"} +{"current_steps": 3670, "total_steps": 4396, "loss": 0.2603, "lr": 3.2415984211368446e-06, "epoch": 5.844621513944223, "percentage": 83.48, "elapsed_time": "12:40:12", "remaining_time": "2:30:23"} +{"current_steps": 3675, "total_steps": 4396, "loss": 0.2491, "lr": 3.1983873894304105e-06, "epoch": 5.852589641434263, "percentage": 83.6, "elapsed_time": "12:41:10", "remaining_time": "2:29:20"} +{"current_steps": 3680, "total_steps": 4396, "loss": 0.255, "lr": 3.1554412550562952e-06, "epoch": 5.860557768924303, "percentage": 83.71, "elapsed_time": "12:42:11", "remaining_time": "2:28:17"} +{"current_steps": 3685, "total_steps": 4396, "loss": 0.2602, "lr": 3.1127606951112056e-06, "epoch": 5.868525896414343, "percentage": 83.83, "elapsed_time": "12:43:17", "remaining_time": "2:27:16"} +{"current_steps": 3690, "total_steps": 4396, "loss": 0.2549, "lr": 3.070346382504743e-06, "epoch": 5.876494023904383, "percentage": 83.94, "elapsed_time": "12:44:19", "remaining_time": "2:26:14"} +{"current_steps": 3695, "total_steps": 4396, "loss": 0.2671, "lr": 3.028198985948796e-06, "epoch": 5.884462151394422, "percentage": 84.05, "elapsed_time": "12:45:23", "remaining_time": "2:25:12"} +{"current_steps": 3700, "total_steps": 4396, "loss": 0.2644, "lr": 2.9863191699470295e-06, "epoch": 5.892430278884462, "percentage": 84.17, "elapsed_time": "12:46:19", "remaining_time": "2:24:09"} +{"current_steps": 3705, "total_steps": 4396, "loss": 0.2505, "lr": 2.9447075947843573e-06, "epoch": 5.900398406374502, "percentage": 84.28, "elapsed_time": "12:47:25", "remaining_time": "2:23:07"} +{"current_steps": 3710, "total_steps": 4396, "loss": 0.2571, "lr": 2.9033649165165802e-06, "epoch": 5.908366533864542, "percentage": 84.39, "elapsed_time": "12:48:32", "remaining_time": "2:22:06"} +{"current_steps": 3715, "total_steps": 4396, "loss": 0.2475, "lr": 2.8622917869600053e-06, "epoch": 5.916334661354582, "percentage": 84.51, "elapsed_time": "12:49:38", "remaining_time": "2:21:05"} +{"current_steps": 3720, "total_steps": 4396, "loss": 0.2657, "lr": 2.821488853681187e-06, "epoch": 5.924302788844622, "percentage": 84.62, "elapsed_time": "12:50:41", "remaining_time": "2:20:03"} +{"current_steps": 3725, "total_steps": 4396, "loss": 0.2551, "lr": 2.7809567599867304e-06, "epoch": 5.932270916334661, "percentage": 84.74, "elapsed_time": "12:51:43", "remaining_time": "2:19:00"} +{"current_steps": 3730, "total_steps": 4396, "loss": 0.2541, "lr": 2.7406961449131153e-06, "epoch": 5.940239043824701, "percentage": 84.85, "elapsed_time": "12:52:48", "remaining_time": "2:17:59"} +{"current_steps": 3735, "total_steps": 4396, "loss": 0.2638, "lr": 2.7007076432166402e-06, "epoch": 5.948207171314741, "percentage": 84.96, "elapsed_time": "12:53:48", "remaining_time": "2:16:56"} +{"current_steps": 3740, "total_steps": 4396, "loss": 0.2546, "lr": 2.660991885363433e-06, "epoch": 5.956175298804781, "percentage": 85.08, "elapsed_time": "12:54:45", "remaining_time": "2:15:53"} +{"current_steps": 3745, "total_steps": 4396, "loss": 0.2667, "lr": 2.621549497519471e-06, "epoch": 5.964143426294821, "percentage": 85.19, "elapsed_time": "12:55:47", "remaining_time": "2:14:51"} +{"current_steps": 3750, "total_steps": 4396, "loss": 0.249, "lr": 2.5823811015407386e-06, "epoch": 5.972111553784861, "percentage": 85.3, "elapsed_time": "12:56:50", "remaining_time": "2:13:49"} +{"current_steps": 3755, "total_steps": 4396, "loss": 0.2509, "lr": 2.5434873149634045e-06, "epoch": 5.9800796812749, "percentage": 85.42, "elapsed_time": "12:57:47", "remaining_time": "2:12:46"} +{"current_steps": 3760, "total_steps": 4396, "loss": 0.25, "lr": 2.5048687509941163e-06, "epoch": 5.98804780876494, "percentage": 85.53, "elapsed_time": "12:58:48", "remaining_time": "2:11:43"} +{"current_steps": 3765, "total_steps": 4396, "loss": 0.2559, "lr": 2.4665260185002815e-06, "epoch": 5.99601593625498, "percentage": 85.65, "elapsed_time": "12:59:43", "remaining_time": "2:10:40"} +{"current_steps": 3770, "total_steps": 4396, "loss": 0.2497, "lr": 2.428459722000529e-06, "epoch": 6.003187250996016, "percentage": 85.76, "elapsed_time": "13:00:45", "remaining_time": "2:09:38"} +{"current_steps": 3775, "total_steps": 4396, "loss": 0.255, "lr": 2.39067046165512e-06, "epoch": 6.011155378486055, "percentage": 85.87, "elapsed_time": "13:01:54", "remaining_time": "2:08:37"} +{"current_steps": 3780, "total_steps": 4396, "loss": 0.2706, "lr": 2.3531588332565238e-06, "epoch": 6.019123505976095, "percentage": 85.99, "elapsed_time": "13:02:51", "remaining_time": "2:07:34"} +{"current_steps": 3785, "total_steps": 4396, "loss": 0.2473, "lr": 2.3159254282200207e-06, "epoch": 6.027091633466135, "percentage": 86.1, "elapsed_time": "13:03:55", "remaining_time": "2:06:32"} +{"current_steps": 3790, "total_steps": 4396, "loss": 0.2451, "lr": 2.27897083357435e-06, "epoch": 6.035059760956175, "percentage": 86.21, "elapsed_time": "13:04:57", "remaining_time": "2:05:30"} +{"current_steps": 3795, "total_steps": 4396, "loss": 0.2462, "lr": 2.242295631952496e-06, "epoch": 6.043027888446215, "percentage": 86.33, "elapsed_time": "13:05:56", "remaining_time": "2:04:28"} +{"current_steps": 3800, "total_steps": 4396, "loss": 0.2422, "lr": 2.205900401582466e-06, "epoch": 6.050996015936255, "percentage": 86.44, "elapsed_time": "13:07:02", "remaining_time": "2:03:26"} +{"current_steps": 3805, "total_steps": 4396, "loss": 0.2608, "lr": 2.169785716278199e-06, "epoch": 6.058964143426294, "percentage": 86.56, "elapsed_time": "13:08:04", "remaining_time": "2:02:24"} +{"current_steps": 3810, "total_steps": 4396, "loss": 0.2531, "lr": 2.133952145430502e-06, "epoch": 6.066932270916334, "percentage": 86.67, "elapsed_time": "13:09:07", "remaining_time": "2:01:22"} +{"current_steps": 3815, "total_steps": 4396, "loss": 0.2555, "lr": 2.0984002539980785e-06, "epoch": 6.074900398406374, "percentage": 86.78, "elapsed_time": "13:10:10", "remaining_time": "2:00:20"} +{"current_steps": 3820, "total_steps": 4396, "loss": 0.253, "lr": 2.0631306024986284e-06, "epoch": 6.082868525896414, "percentage": 86.9, "elapsed_time": "13:11:15", "remaining_time": "1:59:18"} +{"current_steps": 3825, "total_steps": 4396, "loss": 0.251, "lr": 2.0281437469999976e-06, "epoch": 6.090836653386454, "percentage": 87.01, "elapsed_time": "13:12:16", "remaining_time": "1:58:16"} +{"current_steps": 3830, "total_steps": 4396, "loss": 0.2492, "lr": 1.9934402391114283e-06, "epoch": 6.098804780876494, "percentage": 87.12, "elapsed_time": "13:13:15", "remaining_time": "1:57:13"} +{"current_steps": 3835, "total_steps": 4396, "loss": 0.2474, "lr": 1.9590206259748413e-06, "epoch": 6.106772908366533, "percentage": 87.24, "elapsed_time": "13:14:21", "remaining_time": "1:56:12"} +{"current_steps": 3840, "total_steps": 4396, "loss": 0.2401, "lr": 1.924885450256222e-06, "epoch": 6.114741035856574, "percentage": 87.35, "elapsed_time": "13:15:21", "remaining_time": "1:55:09"} +{"current_steps": 3845, "total_steps": 4396, "loss": 0.2419, "lr": 1.8910352501370677e-06, "epoch": 6.122709163346614, "percentage": 87.47, "elapsed_time": "13:16:21", "remaining_time": "1:54:07"} +{"current_steps": 3850, "total_steps": 4396, "loss": 0.2562, "lr": 1.8574705593058962e-06, "epoch": 6.130677290836654, "percentage": 87.58, "elapsed_time": "13:17:25", "remaining_time": "1:53:05"} +{"current_steps": 3855, "total_steps": 4396, "loss": 0.2498, "lr": 1.8241919069498281e-06, "epoch": 6.138645418326694, "percentage": 87.69, "elapsed_time": "13:18:29", "remaining_time": "1:52:03"} +{"current_steps": 3860, "total_steps": 4396, "loss": 0.2533, "lr": 1.7911998177462563e-06, "epoch": 6.1466135458167335, "percentage": 87.81, "elapsed_time": "13:19:32", "remaining_time": "1:51:01"} +{"current_steps": 3865, "total_steps": 4396, "loss": 0.2446, "lr": 1.758494811854554e-06, "epoch": 6.154581673306773, "percentage": 87.92, "elapsed_time": "13:20:35", "remaining_time": "1:49:59"} +{"current_steps": 3870, "total_steps": 4396, "loss": 0.2484, "lr": 1.7260774049079044e-06, "epoch": 6.162549800796813, "percentage": 88.03, "elapsed_time": "13:21:32", "remaining_time": "1:48:56"} +{"current_steps": 3875, "total_steps": 4396, "loss": 0.2456, "lr": 1.6939481080051324e-06, "epoch": 6.170517928286853, "percentage": 88.15, "elapsed_time": "13:22:36", "remaining_time": "1:47:54"} +{"current_steps": 3880, "total_steps": 4396, "loss": 0.2456, "lr": 1.6621074277026838e-06, "epoch": 6.178486055776893, "percentage": 88.26, "elapsed_time": "13:23:39", "remaining_time": "1:46:52"} +{"current_steps": 3885, "total_steps": 4396, "loss": 0.2487, "lr": 1.630555866006609e-06, "epoch": 6.186454183266933, "percentage": 88.38, "elapsed_time": "13:24:44", "remaining_time": "1:45:50"} +{"current_steps": 3890, "total_steps": 4396, "loss": 0.2576, "lr": 1.5992939203646706e-06, "epoch": 6.1944223107569725, "percentage": 88.49, "elapsed_time": "13:25:47", "remaining_time": "1:44:48"} +{"current_steps": 3895, "total_steps": 4396, "loss": 0.2543, "lr": 1.5683220836584822e-06, "epoch": 6.202390438247012, "percentage": 88.6, "elapsed_time": "13:26:52", "remaining_time": "1:43:47"} +{"current_steps": 3900, "total_steps": 4396, "loss": 0.2511, "lr": 1.5376408441957513e-06, "epoch": 6.210358565737052, "percentage": 88.72, "elapsed_time": "13:27:56", "remaining_time": "1:42:45"} +{"current_steps": 3905, "total_steps": 4396, "loss": 0.244, "lr": 1.5072506857025748e-06, "epoch": 6.218326693227092, "percentage": 88.83, "elapsed_time": "13:28:57", "remaining_time": "1:41:42"} +{"current_steps": 3910, "total_steps": 4396, "loss": 0.2624, "lr": 1.4771520873158118e-06, "epoch": 6.226294820717132, "percentage": 88.94, "elapsed_time": "13:30:02", "remaining_time": "1:40:41"} +{"current_steps": 3915, "total_steps": 4396, "loss": 0.2428, "lr": 1.4473455235755385e-06, "epoch": 6.234262948207172, "percentage": 89.06, "elapsed_time": "13:31:07", "remaining_time": "1:39:39"} +{"current_steps": 3920, "total_steps": 4396, "loss": 0.2493, "lr": 1.417831464417545e-06, "epoch": 6.2422310756972115, "percentage": 89.17, "elapsed_time": "13:32:07", "remaining_time": "1:38:36"} +{"current_steps": 3925, "total_steps": 4396, "loss": 0.2516, "lr": 1.3886103751659462e-06, "epoch": 6.250199203187251, "percentage": 89.29, "elapsed_time": "13:33:10", "remaining_time": "1:37:34"} +{"current_steps": 3930, "total_steps": 4396, "loss": 0.239, "lr": 1.3596827165258474e-06, "epoch": 6.258167330677291, "percentage": 89.4, "elapsed_time": "13:34:08", "remaining_time": "1:36:32"} +{"current_steps": 3935, "total_steps": 4396, "loss": 0.2531, "lr": 1.331048944576061e-06, "epoch": 6.266135458167331, "percentage": 89.51, "elapsed_time": "13:35:08", "remaining_time": "1:35:29"} +{"current_steps": 3940, "total_steps": 4396, "loss": 0.2499, "lr": 1.3027095107619347e-06, "epoch": 6.274103585657371, "percentage": 89.63, "elapsed_time": "13:36:17", "remaining_time": "1:34:28"} +{"current_steps": 3945, "total_steps": 4396, "loss": 0.2528, "lr": 1.2746648618882197e-06, "epoch": 6.282071713147411, "percentage": 89.74, "elapsed_time": "13:37:18", "remaining_time": "1:33:26"} +{"current_steps": 3950, "total_steps": 4396, "loss": 0.245, "lr": 1.2469154401120442e-06, "epoch": 6.2900398406374505, "percentage": 89.85, "elapsed_time": "13:38:20", "remaining_time": "1:32:24"} +{"current_steps": 3955, "total_steps": 4396, "loss": 0.2494, "lr": 1.2194616829359206e-06, "epoch": 6.29800796812749, "percentage": 89.97, "elapsed_time": "13:39:17", "remaining_time": "1:31:21"} +{"current_steps": 3960, "total_steps": 4396, "loss": 0.2722, "lr": 1.1923040232008653e-06, "epoch": 6.30597609561753, "percentage": 90.08, "elapsed_time": "13:40:23", "remaining_time": "1:30:19"} +{"current_steps": 3965, "total_steps": 4396, "loss": 0.2485, "lr": 1.1654428890795622e-06, "epoch": 6.31394422310757, "percentage": 90.2, "elapsed_time": "13:41:24", "remaining_time": "1:29:17"} +{"current_steps": 3970, "total_steps": 4396, "loss": 0.2598, "lr": 1.1388787040696215e-06, "epoch": 6.32191235059761, "percentage": 90.31, "elapsed_time": "13:42:27", "remaining_time": "1:28:15"} +{"current_steps": 3975, "total_steps": 4396, "loss": 0.2441, "lr": 1.1126118869868985e-06, "epoch": 6.32988047808765, "percentage": 90.42, "elapsed_time": "13:43:28", "remaining_time": "1:27:12"} +{"current_steps": 3980, "total_steps": 4396, "loss": 0.2723, "lr": 1.0866428519588923e-06, "epoch": 6.3378486055776895, "percentage": 90.54, "elapsed_time": "13:44:33", "remaining_time": "1:26:11"} +{"current_steps": 3985, "total_steps": 4396, "loss": 0.2485, "lr": 1.060972008418204e-06, "epoch": 6.345816733067729, "percentage": 90.65, "elapsed_time": "13:45:31", "remaining_time": "1:25:08"} +{"current_steps": 3990, "total_steps": 4396, "loss": 0.2592, "lr": 1.0355997610961132e-06, "epoch": 6.353784860557769, "percentage": 90.76, "elapsed_time": "13:46:34", "remaining_time": "1:24:06"} +{"current_steps": 3995, "total_steps": 4396, "loss": 0.2562, "lr": 1.0105265100161564e-06, "epoch": 6.361752988047809, "percentage": 90.88, "elapsed_time": "13:47:38", "remaining_time": "1:23:04"} +{"current_steps": 4000, "total_steps": 4396, "loss": 0.2489, "lr": 9.857526504878524e-07, "epoch": 6.369721115537849, "percentage": 90.99, "elapsed_time": "13:48:39", "remaining_time": "1:22:02"} +{"current_steps": 4005, "total_steps": 4396, "loss": 0.259, "lr": 9.612785731004525e-07, "epoch": 6.377689243027889, "percentage": 91.11, "elapsed_time": "13:49:45", "remaining_time": "1:21:00"} +{"current_steps": 4010, "total_steps": 4396, "loss": 0.2471, "lr": 9.371046637167835e-07, "epoch": 6.3856573705179285, "percentage": 91.22, "elapsed_time": "13:50:45", "remaining_time": "1:19:58"} +{"current_steps": 4015, "total_steps": 4396, "loss": 0.2552, "lr": 9.132313034671792e-07, "epoch": 6.393625498007968, "percentage": 91.33, "elapsed_time": "13:51:47", "remaining_time": "1:18:55"} +{"current_steps": 4020, "total_steps": 4396, "loss": 0.2447, "lr": 8.89658868743446e-07, "epoch": 6.401593625498008, "percentage": 91.45, "elapsed_time": "13:52:43", "remaining_time": "1:17:53"} +{"current_steps": 4025, "total_steps": 4396, "loss": 0.249, "lr": 8.663877311929569e-07, "epoch": 6.409561752988048, "percentage": 91.56, "elapsed_time": "13:53:38", "remaining_time": "1:16:50"} +{"current_steps": 4030, "total_steps": 4396, "loss": 0.2602, "lr": 8.43418257712767e-07, "epoch": 6.417529880478088, "percentage": 91.67, "elapsed_time": "13:54:43", "remaining_time": "1:15:48"} +{"current_steps": 4035, "total_steps": 4396, "loss": 0.2581, "lr": 8.207508104438467e-07, "epoch": 6.425498007968128, "percentage": 91.79, "elapsed_time": "13:55:40", "remaining_time": "1:14:45"} +{"current_steps": 4040, "total_steps": 4396, "loss": 0.2379, "lr": 7.983857467653599e-07, "epoch": 6.4334661354581675, "percentage": 91.9, "elapsed_time": "13:56:43", "remaining_time": "1:13:43"} +{"current_steps": 4045, "total_steps": 4396, "loss": 0.2443, "lr": 7.763234192890378e-07, "epoch": 6.441434262948207, "percentage": 92.02, "elapsed_time": "13:57:41", "remaining_time": "1:12:41"} +{"current_steps": 4050, "total_steps": 4396, "loss": 0.2472, "lr": 7.545641758536204e-07, "epoch": 6.449402390438247, "percentage": 92.13, "elapsed_time": "13:58:40", "remaining_time": "1:11:38"} +{"current_steps": 4055, "total_steps": 4396, "loss": 0.2498, "lr": 7.331083595193566e-07, "epoch": 6.457370517928287, "percentage": 92.24, "elapsed_time": "13:59:37", "remaining_time": "1:10:36"} +{"current_steps": 4060, "total_steps": 4396, "loss": 0.2509, "lr": 7.119563085626246e-07, "epoch": 6.465338645418327, "percentage": 92.36, "elapsed_time": "14:00:38", "remaining_time": "1:09:34"} +{"current_steps": 4065, "total_steps": 4396, "loss": 0.2526, "lr": 6.911083564705689e-07, "epoch": 6.473306772908367, "percentage": 92.47, "elapsed_time": "14:01:41", "remaining_time": "1:08:32"} +{"current_steps": 4070, "total_steps": 4396, "loss": 0.2428, "lr": 6.70564831935856e-07, "epoch": 6.4812749003984065, "percentage": 92.58, "elapsed_time": "14:02:40", "remaining_time": "1:07:29"} +{"current_steps": 4075, "total_steps": 4396, "loss": 0.2542, "lr": 6.503260588514959e-07, "epoch": 6.489243027888446, "percentage": 92.7, "elapsed_time": "14:03:41", "remaining_time": "1:06:27"} +{"current_steps": 4080, "total_steps": 4396, "loss": 0.2563, "lr": 6.303923563057291e-07, "epoch": 6.497211155378486, "percentage": 92.81, "elapsed_time": "14:04:50", "remaining_time": "1:05:26"} +{"current_steps": 4085, "total_steps": 4396, "loss": 0.2388, "lr": 6.107640385769964e-07, "epoch": 6.505179282868526, "percentage": 92.93, "elapsed_time": "14:05:52", "remaining_time": "1:04:23"} +{"current_steps": 4090, "total_steps": 4396, "loss": 0.2508, "lr": 5.91441415128986e-07, "epoch": 6.513147410358566, "percentage": 93.04, "elapsed_time": "14:06:52", "remaining_time": "1:03:21"} +{"current_steps": 4095, "total_steps": 4396, "loss": 0.2541, "lr": 5.724247906057545e-07, "epoch": 6.521115537848606, "percentage": 93.15, "elapsed_time": "14:07:54", "remaining_time": "1:02:19"} +{"current_steps": 4100, "total_steps": 4396, "loss": 0.251, "lr": 5.537144648269221e-07, "epoch": 6.5290836653386455, "percentage": 93.27, "elapsed_time": "14:09:00", "remaining_time": "1:01:17"} +{"current_steps": 4105, "total_steps": 4396, "loss": 0.2517, "lr": 5.35310732782941e-07, "epoch": 6.537051792828685, "percentage": 93.38, "elapsed_time": "14:10:08", "remaining_time": "1:00:15"} +{"current_steps": 4110, "total_steps": 4396, "loss": 0.2613, "lr": 5.172138846304609e-07, "epoch": 6.545019920318725, "percentage": 93.49, "elapsed_time": "14:11:16", "remaining_time": "0:59:14"} +{"current_steps": 4115, "total_steps": 4396, "loss": 0.2488, "lr": 4.994242056877352e-07, "epoch": 6.552988047808765, "percentage": 93.61, "elapsed_time": "14:12:13", "remaining_time": "0:58:11"} +{"current_steps": 4120, "total_steps": 4396, "loss": 0.2596, "lr": 4.819419764301314e-07, "epoch": 6.560956175298805, "percentage": 93.72, "elapsed_time": "14:13:23", "remaining_time": "0:57:10"} +{"current_steps": 4125, "total_steps": 4396, "loss": 0.2435, "lr": 4.647674724857143e-07, "epoch": 6.568924302788845, "percentage": 93.84, "elapsed_time": "14:14:26", "remaining_time": "0:56:08"} +{"current_steps": 4130, "total_steps": 4396, "loss": 0.2508, "lr": 4.4790096463088293e-07, "epoch": 6.5768924302788845, "percentage": 93.95, "elapsed_time": "14:15:24", "remaining_time": "0:55:05"} +{"current_steps": 4135, "total_steps": 4396, "loss": 0.2498, "lr": 4.313427187861252e-07, "epoch": 6.584860557768924, "percentage": 94.06, "elapsed_time": "14:16:26", "remaining_time": "0:54:03"} +{"current_steps": 4140, "total_steps": 4396, "loss": 0.2631, "lr": 4.1509299601180553e-07, "epoch": 6.592828685258964, "percentage": 94.18, "elapsed_time": "14:17:25", "remaining_time": "0:53:01"} +{"current_steps": 4145, "total_steps": 4396, "loss": 0.2623, "lr": 3.9915205250406153e-07, "epoch": 6.600796812749004, "percentage": 94.29, "elapsed_time": "14:18:27", "remaining_time": "0:51:59"} +{"current_steps": 4150, "total_steps": 4396, "loss": 0.2481, "lr": 3.83520139590754e-07, "epoch": 6.608764940239044, "percentage": 94.4, "elapsed_time": "14:19:30", "remaining_time": "0:50:56"} +{"current_steps": 4155, "total_steps": 4396, "loss": 0.2511, "lr": 3.6819750372751874e-07, "epoch": 6.616733067729084, "percentage": 94.52, "elapsed_time": "14:20:33", "remaining_time": "0:49:54"} +{"current_steps": 4160, "total_steps": 4396, "loss": 0.2533, "lr": 3.531843864938611e-07, "epoch": 6.6247011952191235, "percentage": 94.63, "elapsed_time": "14:21:33", "remaining_time": "0:48:52"} +{"current_steps": 4165, "total_steps": 4396, "loss": 0.242, "lr": 3.384810245893677e-07, "epoch": 6.632669322709163, "percentage": 94.75, "elapsed_time": "14:22:38", "remaining_time": "0:47:50"} +{"current_steps": 4170, "total_steps": 4396, "loss": 0.2534, "lr": 3.2408764982996056e-07, "epoch": 6.640637450199203, "percentage": 94.86, "elapsed_time": "14:23:35", "remaining_time": "0:46:48"} +{"current_steps": 4175, "total_steps": 4396, "loss": 0.2596, "lr": 3.1000448914425106e-07, "epoch": 6.648605577689243, "percentage": 94.97, "elapsed_time": "14:24:38", "remaining_time": "0:45:46"} +{"current_steps": 4180, "total_steps": 4396, "loss": 0.2616, "lr": 2.9623176456995195e-07, "epoch": 6.656573705179283, "percentage": 95.09, "elapsed_time": "14:25:46", "remaining_time": "0:44:44"} +{"current_steps": 4185, "total_steps": 4396, "loss": 0.2407, "lr": 2.8276969325038874e-07, "epoch": 6.664541832669323, "percentage": 95.2, "elapsed_time": "14:26:51", "remaining_time": "0:43:42"} +{"current_steps": 4190, "total_steps": 4396, "loss": 0.244, "lr": 2.696184874310692e-07, "epoch": 6.6725099601593625, "percentage": 95.31, "elapsed_time": "14:27:50", "remaining_time": "0:42:40"} +{"current_steps": 4195, "total_steps": 4396, "loss": 0.2674, "lr": 2.5677835445633515e-07, "epoch": 6.680478087649402, "percentage": 95.43, "elapsed_time": "14:28:52", "remaining_time": "0:41:37"} +{"current_steps": 4200, "total_steps": 4396, "loss": 0.2502, "lr": 2.44249496766098e-07, "epoch": 6.688446215139442, "percentage": 95.54, "elapsed_time": "14:29:54", "remaining_time": "0:40:35"} +{"current_steps": 4205, "total_steps": 4396, "loss": 0.2452, "lr": 2.3203211189264607e-07, "epoch": 6.696414342629482, "percentage": 95.66, "elapsed_time": "14:30:57", "remaining_time": "0:39:33"} +{"current_steps": 4210, "total_steps": 4396, "loss": 0.2379, "lr": 2.201263924575292e-07, "epoch": 6.704382470119522, "percentage": 95.77, "elapsed_time": "14:31:53", "remaining_time": "0:38:31"} +{"current_steps": 4215, "total_steps": 4396, "loss": 0.2694, "lr": 2.0853252616852338e-07, "epoch": 6.712350597609562, "percentage": 95.88, "elapsed_time": "14:32:59", "remaining_time": "0:37:29"} +{"current_steps": 4220, "total_steps": 4396, "loss": 0.2532, "lr": 1.9725069581666645e-07, "epoch": 6.7203187250996015, "percentage": 96.0, "elapsed_time": "14:33:59", "remaining_time": "0:36:27"} +{"current_steps": 4225, "total_steps": 4396, "loss": 0.2468, "lr": 1.862810792733849e-07, "epoch": 6.728286852589641, "percentage": 96.11, "elapsed_time": "14:35:00", "remaining_time": "0:35:24"} +{"current_steps": 4230, "total_steps": 4396, "loss": 0.2539, "lr": 1.7562384948768273e-07, "epoch": 6.736254980079681, "percentage": 96.22, "elapsed_time": "14:35:59", "remaining_time": "0:34:22"} +{"current_steps": 4235, "total_steps": 4396, "loss": 0.2396, "lr": 1.6527917448341478e-07, "epoch": 6.744223107569721, "percentage": 96.34, "elapsed_time": "14:37:02", "remaining_time": "0:33:20"} +{"current_steps": 4240, "total_steps": 4396, "loss": 0.2555, "lr": 1.5524721735663995e-07, "epoch": 6.752191235059761, "percentage": 96.45, "elapsed_time": "14:38:08", "remaining_time": "0:32:18"} +{"current_steps": 4245, "total_steps": 4396, "loss": 0.2421, "lr": 1.4552813627305208e-07, "epoch": 6.760159362549801, "percentage": 96.57, "elapsed_time": "14:39:14", "remaining_time": "0:31:16"} +{"current_steps": 4250, "total_steps": 4396, "loss": 0.2608, "lr": 1.361220844654798e-07, "epoch": 6.7681274900398405, "percentage": 96.68, "elapsed_time": "14:40:15", "remaining_time": "0:30:14"} +{"current_steps": 4255, "total_steps": 4396, "loss": 0.2529, "lr": 1.270292102314752e-07, "epoch": 6.77609561752988, "percentage": 96.79, "elapsed_time": "14:41:17", "remaining_time": "0:29:12"} +{"current_steps": 4260, "total_steps": 4396, "loss": 0.2683, "lr": 1.1824965693097767e-07, "epoch": 6.78406374501992, "percentage": 96.91, "elapsed_time": "14:42:17", "remaining_time": "0:28:10"} +{"current_steps": 4265, "total_steps": 4396, "loss": 0.2458, "lr": 1.0978356298404713e-07, "epoch": 6.79203187250996, "percentage": 97.02, "elapsed_time": "14:43:17", "remaining_time": "0:27:07"} +{"current_steps": 4270, "total_steps": 4396, "loss": 0.256, "lr": 1.0163106186868777e-07, "epoch": 6.8, "percentage": 97.13, "elapsed_time": "14:44:24", "remaining_time": "0:26:05"} +{"current_steps": 4275, "total_steps": 4396, "loss": 0.2544, "lr": 9.379228211873648e-08, "epoch": 6.80796812749004, "percentage": 97.25, "elapsed_time": "14:45:21", "remaining_time": "0:25:03"} +{"current_steps": 4280, "total_steps": 4396, "loss": 0.2531, "lr": 8.626734732185116e-08, "epoch": 6.8159362549800795, "percentage": 97.36, "elapsed_time": "14:46:19", "remaining_time": "0:24:01"} +{"current_steps": 4285, "total_steps": 4396, "loss": 0.2471, "lr": 7.905637611754114e-08, "epoch": 6.823904382470119, "percentage": 97.47, "elapsed_time": "14:47:19", "remaining_time": "0:22:59"} +{"current_steps": 4290, "total_steps": 4396, "loss": 0.2568, "lr": 7.21594821953131e-08, "epoch": 6.831872509960159, "percentage": 97.59, "elapsed_time": "14:48:16", "remaining_time": "0:21:56"} +{"current_steps": 4295, "total_steps": 4396, "loss": 0.2407, "lr": 6.557677429287257e-08, "epoch": 6.839840637450199, "percentage": 97.7, "elapsed_time": "14:49:22", "remaining_time": "0:20:54"} +{"current_steps": 4300, "total_steps": 4396, "loss": 0.249, "lr": 5.930835619441189e-08, "epoch": 6.847808764940239, "percentage": 97.82, "elapsed_time": "14:50:25", "remaining_time": "0:19:52"} +{"current_steps": 4305, "total_steps": 4396, "loss": 0.257, "lr": 5.335432672896712e-08, "epoch": 6.855776892430279, "percentage": 97.93, "elapsed_time": "14:51:33", "remaining_time": "0:18:50"} +{"current_steps": 4310, "total_steps": 4396, "loss": 0.2386, "lr": 4.77147797688704e-08, "epoch": 6.8637450199203185, "percentage": 98.04, "elapsed_time": "14:52:35", "remaining_time": "0:17:48"} +{"current_steps": 4315, "total_steps": 4396, "loss": 0.2462, "lr": 4.238980422826e-08, "epoch": 6.871713147410358, "percentage": 98.16, "elapsed_time": "14:53:33", "remaining_time": "0:16:46"} +{"current_steps": 4320, "total_steps": 4396, "loss": 0.2504, "lr": 3.737948406168812e-08, "epoch": 6.879681274900398, "percentage": 98.27, "elapsed_time": "14:54:35", "remaining_time": "0:15:44"} +{"current_steps": 4325, "total_steps": 4396, "loss": 0.2459, "lr": 3.26838982627864e-08, "epoch": 6.887649402390438, "percentage": 98.38, "elapsed_time": "14:55:37", "remaining_time": "0:14:42"} +{"current_steps": 4330, "total_steps": 4396, "loss": 0.2527, "lr": 2.8303120863033552e-08, "epoch": 6.895617529880478, "percentage": 98.5, "elapsed_time": "14:56:36", "remaining_time": "0:13:39"} +{"current_steps": 4335, "total_steps": 4396, "loss": 0.2416, "lr": 2.4237220930571904e-08, "epoch": 6.903585657370518, "percentage": 98.61, "elapsed_time": "14:57:38", "remaining_time": "0:12:37"} +{"current_steps": 4340, "total_steps": 4396, "loss": 0.2467, "lr": 2.0486262569132664e-08, "epoch": 6.9115537848605575, "percentage": 98.73, "elapsed_time": "14:58:35", "remaining_time": "0:11:35"} +{"current_steps": 4345, "total_steps": 4396, "loss": 0.2502, "lr": 1.70503049170212e-08, "epoch": 6.919521912350597, "percentage": 98.84, "elapsed_time": "14:59:35", "remaining_time": "0:10:33"} +{"current_steps": 4350, "total_steps": 4396, "loss": 0.2512, "lr": 1.3929402146179994e-08, "epoch": 6.927490039840637, "percentage": 98.95, "elapsed_time": "15:00:37", "remaining_time": "0:09:31"} +{"current_steps": 4355, "total_steps": 4396, "loss": 0.2497, "lr": 1.1123603461340449e-08, "epoch": 6.935458167330677, "percentage": 99.07, "elapsed_time": "15:01:33", "remaining_time": "0:08:29"} +{"current_steps": 4360, "total_steps": 4396, "loss": 0.2544, "lr": 8.632953099241282e-09, "epoch": 6.943426294820717, "percentage": 99.18, "elapsed_time": "15:02:33", "remaining_time": "0:07:27"} +{"current_steps": 4365, "total_steps": 4396, "loss": 0.253, "lr": 6.457490327940186e-09, "epoch": 6.951394422310757, "percentage": 99.29, "elapsed_time": "15:03:37", "remaining_time": "0:06:25"} +{"current_steps": 4370, "total_steps": 4396, "loss": 0.2578, "lr": 4.597249446183227e-09, "epoch": 6.9593625498007965, "percentage": 99.41, "elapsed_time": "15:04:37", "remaining_time": "0:05:22"} +{"current_steps": 4375, "total_steps": 4396, "loss": 0.2626, "lr": 3.0522597828719357e-09, "epoch": 6.967330677290836, "percentage": 99.52, "elapsed_time": "15:05:51", "remaining_time": "0:04:20"} +{"current_steps": 4380, "total_steps": 4396, "loss": 0.2625, "lr": 1.822545696601452e-09, "epoch": 6.975298804780876, "percentage": 99.64, "elapsed_time": "15:06:52", "remaining_time": "0:03:18"} +{"current_steps": 4385, "total_steps": 4396, "loss": 0.2357, "lr": 9.081265752697299e-10, "epoch": 6.983266932270916, "percentage": 99.75, "elapsed_time": "15:07:54", "remaining_time": "0:02:16"} +{"current_steps": 4390, "total_steps": 4396, "loss": 0.2528, "lr": 3.0901683577999606e-10, "epoch": 6.991235059760956, "percentage": 99.86, "elapsed_time": "15:08:57", "remaining_time": "0:01:14"} +{"current_steps": 4395, "total_steps": 4396, "loss": 0.2521, "lr": 2.522592380316269e-11, "epoch": 6.999203187250996, "percentage": 99.98, "elapsed_time": "15:09:58", "remaining_time": "0:00:12"} +{"current_steps": 4396, "total_steps": 4396, "epoch": 7.0, "percentage": 100.0, "elapsed_time": "15:10:42", "remaining_time": "0:00:00"} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..f088a78 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,9716 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 4396, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00796812749003984, + "grad_norm": 24.37495754265772, + "learning_rate": 3.6363636363636366e-07, + "loss": 0.9065, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4462226629257202, + "step": 5, + "valid_targets_mean": 3722.1, + "valid_targets_min": 1050 + }, + { + "epoch": 0.01593625498007968, + "grad_norm": 20.77111310942199, + "learning_rate": 8.181818181818182e-07, + "loss": 0.8968, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42043066024780273, + "step": 10, + "valid_targets_mean": 4730.2, + "valid_targets_min": 3538 + }, + { + "epoch": 0.02390438247011952, + "grad_norm": 16.688145610134242, + "learning_rate": 1.2727272727272728e-06, + "loss": 0.862, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4235057234764099, + "step": 15, + "valid_targets_mean": 4623.1, + "valid_targets_min": 3361 + }, + { + "epoch": 0.03187250996015936, + "grad_norm": 14.122564484492695, + "learning_rate": 1.7272727272727275e-06, + "loss": 0.814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.44331303238868713, + "step": 20, + "valid_targets_mean": 4180.2, + "valid_targets_min": 746 + }, + { + "epoch": 0.0398406374501992, + "grad_norm": 7.696548351443415, + "learning_rate": 2.181818181818182e-06, + "loss": 0.7668, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32791611552238464, + "step": 25, + "valid_targets_mean": 3369.4, + "valid_targets_min": 272 + }, + { + "epoch": 0.04780876494023904, + "grad_norm": 4.864489337884109, + "learning_rate": 2.6363636363636364e-06, + "loss": 0.7323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34147197008132935, + "step": 30, + "valid_targets_mean": 3219.8, + "valid_targets_min": 1244 + }, + { + "epoch": 0.055776892430278883, + "grad_norm": 2.65286621457725, + "learning_rate": 3.090909090909091e-06, + "loss": 0.7072, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.310516357421875, + "step": 35, + "valid_targets_mean": 3807.8, + "valid_targets_min": 987 + }, + { + "epoch": 0.06374501992031872, + "grad_norm": 2.0820354739310676, + "learning_rate": 3.5454545454545458e-06, + "loss": 0.6397, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30131199955940247, + "step": 40, + "valid_targets_mean": 3464.8, + "valid_targets_min": 324 + }, + { + "epoch": 0.07171314741035857, + "grad_norm": 1.5126088563806854, + "learning_rate": 4.000000000000001e-06, + "loss": 0.6265, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4162129759788513, + "step": 45, + "valid_targets_mean": 4792.8, + "valid_targets_min": 748 + }, + { + "epoch": 0.0796812749003984, + "grad_norm": 1.218992918685487, + "learning_rate": 4.454545454545455e-06, + "loss": 0.5906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3314875662326813, + "step": 50, + "valid_targets_mean": 4466.9, + "valid_targets_min": 2862 + }, + { + "epoch": 0.08764940239043825, + "grad_norm": 0.996370822345656, + "learning_rate": 4.90909090909091e-06, + "loss": 0.5989, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32610633969306946, + "step": 55, + "valid_targets_mean": 4328.5, + "valid_targets_min": 3474 + }, + { + "epoch": 0.09561752988047809, + "grad_norm": 0.9322106306908817, + "learning_rate": 5.3636363636363645e-06, + "loss": 0.5821, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26030367612838745, + "step": 60, + "valid_targets_mean": 3929.0, + "valid_targets_min": 3433 + }, + { + "epoch": 0.10358565737051793, + "grad_norm": 0.8948869786691818, + "learning_rate": 5.8181818181818185e-06, + "loss": 0.5673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33863985538482666, + "step": 65, + "valid_targets_mean": 4560.4, + "valid_targets_min": 273 + }, + { + "epoch": 0.11155378486055777, + "grad_norm": 0.8563354410048826, + "learning_rate": 6.2727272727272734e-06, + "loss": 0.5388, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24827717244625092, + "step": 70, + "valid_targets_mean": 3210.8, + "valid_targets_min": 1277 + }, + { + "epoch": 0.11952191235059761, + "grad_norm": 0.844051842541015, + "learning_rate": 6.7272727272727275e-06, + "loss": 0.5336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24409377574920654, + "step": 75, + "valid_targets_mean": 3379.8, + "valid_targets_min": 594 + }, + { + "epoch": 0.12749003984063745, + "grad_norm": 0.7965069165140631, + "learning_rate": 7.181818181818182e-06, + "loss": 0.5445, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2620013952255249, + "step": 80, + "valid_targets_mean": 3820.8, + "valid_targets_min": 888 + }, + { + "epoch": 0.13545816733067728, + "grad_norm": 0.9317949602217058, + "learning_rate": 7.636363636363638e-06, + "loss": 0.5108, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24453184008598328, + "step": 85, + "valid_targets_mean": 2961.5, + "valid_targets_min": 243 + }, + { + "epoch": 0.14342629482071714, + "grad_norm": 0.7842445927901606, + "learning_rate": 8.090909090909092e-06, + "loss": 0.4959, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22229188680648804, + "step": 90, + "valid_targets_mean": 3972.0, + "valid_targets_min": 1081 + }, + { + "epoch": 0.15139442231075698, + "grad_norm": 0.701161735833168, + "learning_rate": 8.545454545454546e-06, + "loss": 0.4978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1909436583518982, + "step": 95, + "valid_targets_mean": 3284.6, + "valid_targets_min": 777 + }, + { + "epoch": 0.1593625498007968, + "grad_norm": 0.7184451189685676, + "learning_rate": 9e-06, + "loss": 0.4816, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2568347156047821, + "step": 100, + "valid_targets_mean": 4385.9, + "valid_targets_min": 2552 + }, + { + "epoch": 0.16733067729083664, + "grad_norm": 0.7312987260079192, + "learning_rate": 9.454545454545456e-06, + "loss": 0.4759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2005646824836731, + "step": 105, + "valid_targets_mean": 3237.5, + "valid_targets_min": 1060 + }, + { + "epoch": 0.1752988047808765, + "grad_norm": 0.7873006873988114, + "learning_rate": 9.90909090909091e-06, + "loss": 0.4655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2670203447341919, + "step": 110, + "valid_targets_mean": 4206.8, + "valid_targets_min": 750 + }, + { + "epoch": 0.18326693227091634, + "grad_norm": 0.7473638820821069, + "learning_rate": 1.0363636363636364e-05, + "loss": 0.4568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21891140937805176, + "step": 115, + "valid_targets_mean": 4152.9, + "valid_targets_min": 2306 + }, + { + "epoch": 0.19123505976095617, + "grad_norm": 0.8257698623740443, + "learning_rate": 1.0818181818181818e-05, + "loss": 0.4528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18611110746860504, + "step": 120, + "valid_targets_mean": 3085.9, + "valid_targets_min": 838 + }, + { + "epoch": 0.199203187250996, + "grad_norm": 0.8625112182566368, + "learning_rate": 1.1272727272727272e-05, + "loss": 0.4818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30518537759780884, + "step": 125, + "valid_targets_mean": 3337.6, + "valid_targets_min": 281 + }, + { + "epoch": 0.20717131474103587, + "grad_norm": 0.8399932702279239, + "learning_rate": 1.1727272727272728e-05, + "loss": 0.4572, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19899678230285645, + "step": 130, + "valid_targets_mean": 3083.8, + "valid_targets_min": 475 + }, + { + "epoch": 0.2151394422310757, + "grad_norm": 0.7197396607669662, + "learning_rate": 1.2181818181818184e-05, + "loss": 0.4459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1982295960187912, + "step": 135, + "valid_targets_mean": 3466.6, + "valid_targets_min": 776 + }, + { + "epoch": 0.22310756972111553, + "grad_norm": 0.7587803874195033, + "learning_rate": 1.2636363636363638e-05, + "loss": 0.4371, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1760505735874176, + "step": 140, + "valid_targets_mean": 2922.8, + "valid_targets_min": 291 + }, + { + "epoch": 0.23107569721115537, + "grad_norm": 0.9910760086364537, + "learning_rate": 1.3090909090909092e-05, + "loss": 0.4506, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2816995680332184, + "step": 145, + "valid_targets_mean": 4115.8, + "valid_targets_min": 1413 + }, + { + "epoch": 0.23904382470119523, + "grad_norm": 0.7363394591670118, + "learning_rate": 1.3545454545454546e-05, + "loss": 0.4451, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24589544534683228, + "step": 150, + "valid_targets_mean": 3740.2, + "valid_targets_min": 553 + }, + { + "epoch": 0.24701195219123506, + "grad_norm": 0.8608747635138786, + "learning_rate": 1.4e-05, + "loss": 0.444, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1670471429824829, + "step": 155, + "valid_targets_mean": 1975.4, + "valid_targets_min": 541 + }, + { + "epoch": 0.2549800796812749, + "grad_norm": 0.766843036608719, + "learning_rate": 1.4454545454545457e-05, + "loss": 0.4459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2611636817455292, + "step": 160, + "valid_targets_mean": 4575.8, + "valid_targets_min": 3423 + }, + { + "epoch": 0.26294820717131473, + "grad_norm": 0.7373314833536057, + "learning_rate": 1.4909090909090911e-05, + "loss": 0.4336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21863976120948792, + "step": 165, + "valid_targets_mean": 4157.1, + "valid_targets_min": 982 + }, + { + "epoch": 0.27091633466135456, + "grad_norm": 0.7386566425113588, + "learning_rate": 1.5363636363636365e-05, + "loss": 0.4242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16179677844047546, + "step": 170, + "valid_targets_mean": 2649.9, + "valid_targets_min": 263 + }, + { + "epoch": 0.2788844621513944, + "grad_norm": 0.9052696105145834, + "learning_rate": 1.5818181818181818e-05, + "loss": 0.442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3230035901069641, + "step": 175, + "valid_targets_mean": 3192.4, + "valid_targets_min": 231 + }, + { + "epoch": 0.2868525896414343, + "grad_norm": 0.8419040745577814, + "learning_rate": 1.6272727272727273e-05, + "loss": 0.4318, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24606087803840637, + "step": 180, + "valid_targets_mean": 4607.6, + "valid_targets_min": 3713 + }, + { + "epoch": 0.2948207171314741, + "grad_norm": 0.9473280325221405, + "learning_rate": 1.672727272727273e-05, + "loss": 0.4262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1998061090707779, + "step": 185, + "valid_targets_mean": 3167.9, + "valid_targets_min": 250 + }, + { + "epoch": 0.30278884462151395, + "grad_norm": 0.8456848959662562, + "learning_rate": 1.7181818181818185e-05, + "loss": 0.411, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19775201380252838, + "step": 190, + "valid_targets_mean": 4045.4, + "valid_targets_min": 1434 + }, + { + "epoch": 0.3107569721115538, + "grad_norm": 1.0336922885537096, + "learning_rate": 1.7636363636363637e-05, + "loss": 0.4189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24612963199615479, + "step": 195, + "valid_targets_mean": 3500.5, + "valid_targets_min": 1091 + }, + { + "epoch": 0.3187250996015936, + "grad_norm": 0.6790199436123496, + "learning_rate": 1.8090909090909093e-05, + "loss": 0.4154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26405617594718933, + "step": 200, + "valid_targets_mean": 5522.4, + "valid_targets_min": 4352 + }, + { + "epoch": 0.32669322709163345, + "grad_norm": 0.7333217562417574, + "learning_rate": 1.8545454545454545e-05, + "loss": 0.4171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23619253933429718, + "step": 205, + "valid_targets_mean": 4490.4, + "valid_targets_min": 2425 + }, + { + "epoch": 0.3346613545816733, + "grad_norm": 0.726066810728795, + "learning_rate": 1.9e-05, + "loss": 0.4273, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3351441025733948, + "step": 210, + "valid_targets_mean": 4791.6, + "valid_targets_min": 292 + }, + { + "epoch": 0.3426294820717131, + "grad_norm": 0.6917093562564017, + "learning_rate": 1.9454545454545457e-05, + "loss": 0.4045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1841329038143158, + "step": 215, + "valid_targets_mean": 3697.2, + "valid_targets_min": 1114 + }, + { + "epoch": 0.350597609561753, + "grad_norm": 0.7693607796480706, + "learning_rate": 1.9909090909090913e-05, + "loss": 0.412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25000637769699097, + "step": 220, + "valid_targets_mean": 4362.8, + "valid_targets_min": 3874 + }, + { + "epoch": 0.35856573705179284, + "grad_norm": 0.8096629223285201, + "learning_rate": 2.0363636363636365e-05, + "loss": 0.4096, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22220614552497864, + "step": 225, + "valid_targets_mean": 3821.4, + "valid_targets_min": 275 + }, + { + "epoch": 0.3665338645418327, + "grad_norm": 1.0177917321036343, + "learning_rate": 2.081818181818182e-05, + "loss": 0.3877, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1806771159172058, + "step": 230, + "valid_targets_mean": 2983.0, + "valid_targets_min": 260 + }, + { + "epoch": 0.3745019920318725, + "grad_norm": 0.7333916228070542, + "learning_rate": 2.1272727272727276e-05, + "loss": 0.391, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17516730725765228, + "step": 235, + "valid_targets_mean": 3388.1, + "valid_targets_min": 854 + }, + { + "epoch": 0.38247011952191234, + "grad_norm": 0.9425301322102779, + "learning_rate": 2.172727272727273e-05, + "loss": 0.404, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1691473424434662, + "step": 240, + "valid_targets_mean": 2758.1, + "valid_targets_min": 479 + }, + { + "epoch": 0.3904382470119522, + "grad_norm": 0.7597593582484266, + "learning_rate": 2.2181818181818184e-05, + "loss": 0.3992, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1942702978849411, + "step": 245, + "valid_targets_mean": 3456.2, + "valid_targets_min": 980 + }, + { + "epoch": 0.398406374501992, + "grad_norm": 0.7719937467195934, + "learning_rate": 2.263636363636364e-05, + "loss": 0.3973, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22239413857460022, + "step": 250, + "valid_targets_mean": 3850.1, + "valid_targets_min": 534 + }, + { + "epoch": 0.4063745019920319, + "grad_norm": 0.8051124024795501, + "learning_rate": 2.3090909090909093e-05, + "loss": 0.4015, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16510990262031555, + "step": 255, + "valid_targets_mean": 3311.1, + "valid_targets_min": 590 + }, + { + "epoch": 0.41434262948207173, + "grad_norm": 0.774678875688182, + "learning_rate": 2.3545454545454548e-05, + "loss": 0.3969, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19007673859596252, + "step": 260, + "valid_targets_mean": 3814.5, + "valid_targets_min": 954 + }, + { + "epoch": 0.42231075697211157, + "grad_norm": 0.9737556189745716, + "learning_rate": 2.4e-05, + "loss": 0.3869, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17725446820259094, + "step": 265, + "valid_targets_mean": 3379.5, + "valid_targets_min": 276 + }, + { + "epoch": 0.4302788844621514, + "grad_norm": 0.6869188610961593, + "learning_rate": 2.4454545454545456e-05, + "loss": 0.4012, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20431405305862427, + "step": 270, + "valid_targets_mean": 4211.4, + "valid_targets_min": 2762 + }, + { + "epoch": 0.43824701195219123, + "grad_norm": 0.7518435455875745, + "learning_rate": 2.490909090909091e-05, + "loss": 0.3839, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22350144386291504, + "step": 275, + "valid_targets_mean": 5223.8, + "valid_targets_min": 4425 + }, + { + "epoch": 0.44621513944223107, + "grad_norm": 0.6801525553872718, + "learning_rate": 2.5363636363636364e-05, + "loss": 0.3901, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21610978245735168, + "step": 280, + "valid_targets_mean": 5043.2, + "valid_targets_min": 4456 + }, + { + "epoch": 0.4541832669322709, + "grad_norm": 0.7534382485600102, + "learning_rate": 2.5818181818181824e-05, + "loss": 0.3953, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17026452720165253, + "step": 285, + "valid_targets_mean": 3922.9, + "valid_targets_min": 3022 + }, + { + "epoch": 0.46215139442231074, + "grad_norm": 0.7600823974106152, + "learning_rate": 2.6272727272727276e-05, + "loss": 0.3651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14615437388420105, + "step": 290, + "valid_targets_mean": 2542.1, + "valid_targets_min": 241 + }, + { + "epoch": 0.4701195219123506, + "grad_norm": 0.8131679536977819, + "learning_rate": 2.672727272727273e-05, + "loss": 0.3881, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18951882421970367, + "step": 295, + "valid_targets_mean": 4425.0, + "valid_targets_min": 2871 + }, + { + "epoch": 0.47808764940239046, + "grad_norm": 0.704158682861056, + "learning_rate": 2.7181818181818184e-05, + "loss": 0.3817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2157367765903473, + "step": 300, + "valid_targets_mean": 4395.0, + "valid_targets_min": 2776 + }, + { + "epoch": 0.4860557768924303, + "grad_norm": 0.7334485862615044, + "learning_rate": 2.763636363636364e-05, + "loss": 0.3805, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.149525448679924, + "step": 305, + "valid_targets_mean": 3410.2, + "valid_targets_min": 494 + }, + { + "epoch": 0.4940239043824701, + "grad_norm": 0.6398803355562287, + "learning_rate": 2.8090909090909092e-05, + "loss": 0.3925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.155901700258255, + "step": 310, + "valid_targets_mean": 3745.0, + "valid_targets_min": 1345 + }, + { + "epoch": 0.50199203187251, + "grad_norm": 0.7424072063522883, + "learning_rate": 2.8545454545454548e-05, + "loss": 0.3891, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1601351946592331, + "step": 315, + "valid_targets_mean": 3158.5, + "valid_targets_min": 989 + }, + { + "epoch": 0.5099601593625498, + "grad_norm": 0.697644493947305, + "learning_rate": 2.9e-05, + "loss": 0.379, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17844626307487488, + "step": 320, + "valid_targets_mean": 3966.8, + "valid_targets_min": 256 + }, + { + "epoch": 0.5179282868525896, + "grad_norm": 0.8334718158966897, + "learning_rate": 2.9454545454545456e-05, + "loss": 0.3856, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1996501088142395, + "step": 325, + "valid_targets_mean": 3763.9, + "valid_targets_min": 577 + }, + { + "epoch": 0.5258964143426295, + "grad_norm": 0.6542673297605693, + "learning_rate": 2.9909090909090908e-05, + "loss": 0.3615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1474393904209137, + "step": 330, + "valid_targets_mean": 3912.9, + "valid_targets_min": 428 + }, + { + "epoch": 0.5338645418326693, + "grad_norm": 1.0898122627042852, + "learning_rate": 3.0363636363636364e-05, + "loss": 0.3939, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20399470627307892, + "step": 335, + "valid_targets_mean": 4056.0, + "valid_targets_min": 189 + }, + { + "epoch": 0.5418326693227091, + "grad_norm": 0.7745918715566812, + "learning_rate": 3.081818181818182e-05, + "loss": 0.3795, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25055158138275146, + "step": 340, + "valid_targets_mean": 4587.9, + "valid_targets_min": 2195 + }, + { + "epoch": 0.549800796812749, + "grad_norm": 0.7230172934249015, + "learning_rate": 3.127272727272728e-05, + "loss": 0.3721, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1816451996564865, + "step": 345, + "valid_targets_mean": 4361.4, + "valid_targets_min": 1472 + }, + { + "epoch": 0.5577689243027888, + "grad_norm": 0.7003320872740779, + "learning_rate": 3.172727272727273e-05, + "loss": 0.3843, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16478148102760315, + "step": 350, + "valid_targets_mean": 3774.4, + "valid_targets_min": 271 + }, + { + "epoch": 0.5657370517928287, + "grad_norm": 0.6804494412793238, + "learning_rate": 3.2181818181818184e-05, + "loss": 0.3659, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20293974876403809, + "step": 355, + "valid_targets_mean": 3941.6, + "valid_targets_min": 392 + }, + { + "epoch": 0.5737051792828686, + "grad_norm": 0.7743000131108165, + "learning_rate": 3.263636363636364e-05, + "loss": 0.3748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20128706097602844, + "step": 360, + "valid_targets_mean": 3755.8, + "valid_targets_min": 1966 + }, + { + "epoch": 0.5816733067729084, + "grad_norm": 0.7472256621802407, + "learning_rate": 3.3090909090909095e-05, + "loss": 0.3643, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19128525257110596, + "step": 365, + "valid_targets_mean": 4262.8, + "valid_targets_min": 1155 + }, + { + "epoch": 0.5896414342629482, + "grad_norm": 0.9585374708292493, + "learning_rate": 3.354545454545455e-05, + "loss": 0.3795, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2316116988658905, + "step": 370, + "valid_targets_mean": 4323.4, + "valid_targets_min": 364 + }, + { + "epoch": 0.5976095617529881, + "grad_norm": 0.9480533929864912, + "learning_rate": 3.4e-05, + "loss": 0.3749, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23274560272693634, + "step": 375, + "valid_targets_mean": 3637.2, + "valid_targets_min": 932 + }, + { + "epoch": 0.6055776892430279, + "grad_norm": 0.7021130871658416, + "learning_rate": 3.4454545454545455e-05, + "loss": 0.3898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2204621136188507, + "step": 380, + "valid_targets_mean": 4790.4, + "valid_targets_min": 493 + }, + { + "epoch": 0.6135458167330677, + "grad_norm": 0.8628186416823741, + "learning_rate": 3.490909090909091e-05, + "loss": 0.377, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17570717632770538, + "step": 385, + "valid_targets_mean": 3729.4, + "valid_targets_min": 972 + }, + { + "epoch": 0.6215139442231076, + "grad_norm": 0.7210336759502691, + "learning_rate": 3.536363636363637e-05, + "loss": 0.3712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21611185371875763, + "step": 390, + "valid_targets_mean": 4098.4, + "valid_targets_min": 3422 + }, + { + "epoch": 0.6294820717131474, + "grad_norm": 0.7615523380276196, + "learning_rate": 3.581818181818182e-05, + "loss": 0.3688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14733503758907318, + "step": 395, + "valid_targets_mean": 2655.8, + "valid_targets_min": 338 + }, + { + "epoch": 0.6374501992031872, + "grad_norm": 0.7351812946098347, + "learning_rate": 3.627272727272728e-05, + "loss": 0.3631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17947062849998474, + "step": 400, + "valid_targets_mean": 3434.1, + "valid_targets_min": 546 + }, + { + "epoch": 0.6454183266932271, + "grad_norm": 0.6473507101556931, + "learning_rate": 3.6727272727272734e-05, + "loss": 0.362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18027062714099884, + "step": 405, + "valid_targets_mean": 4050.0, + "valid_targets_min": 648 + }, + { + "epoch": 0.6533864541832669, + "grad_norm": 0.8170944353027576, + "learning_rate": 3.718181818181818e-05, + "loss": 0.3651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18333405256271362, + "step": 410, + "valid_targets_mean": 3280.4, + "valid_targets_min": 742 + }, + { + "epoch": 0.6613545816733067, + "grad_norm": 0.6522389597007201, + "learning_rate": 3.763636363636364e-05, + "loss": 0.3653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15798671543598175, + "step": 415, + "valid_targets_mean": 3973.9, + "valid_targets_min": 3181 + }, + { + "epoch": 0.6693227091633466, + "grad_norm": 0.7010470762692088, + "learning_rate": 3.8090909090909095e-05, + "loss": 0.3688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15806664526462555, + "step": 420, + "valid_targets_mean": 3355.4, + "valid_targets_min": 430 + }, + { + "epoch": 0.6772908366533864, + "grad_norm": 0.6759026499243866, + "learning_rate": 3.854545454545455e-05, + "loss": 0.381, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26875221729278564, + "step": 425, + "valid_targets_mean": 4280.9, + "valid_targets_min": 864 + }, + { + "epoch": 0.6852589641434262, + "grad_norm": 0.7683397961540518, + "learning_rate": 3.9e-05, + "loss": 0.3786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14716662466526031, + "step": 430, + "valid_targets_mean": 3156.4, + "valid_targets_min": 789 + }, + { + "epoch": 0.6932270916334662, + "grad_norm": 0.7376715268170536, + "learning_rate": 3.9454545454545455e-05, + "loss": 0.3839, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21732556819915771, + "step": 435, + "valid_targets_mean": 3848.8, + "valid_targets_min": 232 + }, + { + "epoch": 0.701195219123506, + "grad_norm": 0.6728184615746164, + "learning_rate": 3.990909090909091e-05, + "loss": 0.3629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16482621431350708, + "step": 440, + "valid_targets_mean": 3314.9, + "valid_targets_min": 533 + }, + { + "epoch": 0.7091633466135459, + "grad_norm": 0.6675755697367736, + "learning_rate": 3.999989909636843e-05, + "loss": 0.3743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22296589612960815, + "step": 445, + "valid_targets_mean": 4027.6, + "valid_targets_min": 225 + }, + { + "epoch": 0.7171314741035857, + "grad_norm": 0.6380622207496793, + "learning_rate": 3.999948917711013e-05, + "loss": 0.3685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18178203701972961, + "step": 450, + "valid_targets_mean": 4977.4, + "valid_targets_min": 3157 + }, + { + "epoch": 0.7250996015936255, + "grad_norm": 0.7249748715728896, + "learning_rate": 3.999876394220603e-05, + "loss": 0.3507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15942233800888062, + "step": 455, + "valid_targets_mean": 3487.6, + "valid_targets_min": 425 + }, + { + "epoch": 0.7330677290836654, + "grad_norm": 0.6364193871643289, + "learning_rate": 3.999772340309031e-05, + "loss": 0.3623, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17309603095054626, + "step": 460, + "valid_targets_mean": 4024.1, + "valid_targets_min": 2346 + }, + { + "epoch": 0.7410358565737052, + "grad_norm": 0.6534069223911941, + "learning_rate": 3.999636757616831e-05, + "loss": 0.3683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18539786338806152, + "step": 465, + "valid_targets_mean": 4150.1, + "valid_targets_min": 2355 + }, + { + "epoch": 0.749003984063745, + "grad_norm": 0.7623546621780052, + "learning_rate": 3.999469648281624e-05, + "loss": 0.3583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18554510176181793, + "step": 470, + "valid_targets_mean": 3821.2, + "valid_targets_min": 474 + }, + { + "epoch": 0.7569721115537849, + "grad_norm": 0.7478027946512054, + "learning_rate": 3.9992710149380875e-05, + "loss": 0.359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16699925065040588, + "step": 475, + "valid_targets_mean": 3427.5, + "valid_targets_min": 1163 + }, + { + "epoch": 0.7649402390438247, + "grad_norm": 0.696767701299578, + "learning_rate": 3.999040860717911e-05, + "loss": 0.3519, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17681604623794556, + "step": 480, + "valid_targets_mean": 3752.8, + "valid_targets_min": 502 + }, + { + "epoch": 0.7729083665338645, + "grad_norm": 0.6967704351910736, + "learning_rate": 3.998779189249749e-05, + "loss": 0.3414, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1511075496673584, + "step": 485, + "valid_targets_mean": 3213.6, + "valid_targets_min": 205 + }, + { + "epoch": 0.7808764940239044, + "grad_norm": 0.771706695920937, + "learning_rate": 3.998486004659162e-05, + "loss": 0.3545, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18651717901229858, + "step": 490, + "valid_targets_mean": 2901.6, + "valid_targets_min": 1091 + }, + { + "epoch": 0.7888446215139442, + "grad_norm": 0.6775930753323367, + "learning_rate": 3.9981613115685516e-05, + "loss": 0.3568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1885891556739807, + "step": 495, + "valid_targets_mean": 3702.8, + "valid_targets_min": 262 + }, + { + "epoch": 0.796812749003984, + "grad_norm": 0.695902107525504, + "learning_rate": 3.9978051150970906e-05, + "loss": 0.3595, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1813029646873474, + "step": 500, + "valid_targets_mean": 4663.6, + "valid_targets_min": 2700 + }, + { + "epoch": 0.8047808764940239, + "grad_norm": 0.728929159452013, + "learning_rate": 3.9974174208606376e-05, + "loss": 0.3556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1546524614095688, + "step": 505, + "valid_targets_mean": 2868.9, + "valid_targets_min": 621 + }, + { + "epoch": 0.8127490039840638, + "grad_norm": 0.8272162026985286, + "learning_rate": 3.996998234971652e-05, + "loss": 0.3581, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17594462633132935, + "step": 510, + "valid_targets_mean": 3446.9, + "valid_targets_min": 236 + }, + { + "epoch": 0.8207171314741036, + "grad_norm": 0.9446986936787952, + "learning_rate": 3.996547564039096e-05, + "loss": 0.3566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1588212251663208, + "step": 515, + "valid_targets_mean": 3624.6, + "valid_targets_min": 489 + }, + { + "epoch": 0.8286852589641435, + "grad_norm": 0.8017563324472775, + "learning_rate": 3.99606541516833e-05, + "loss": 0.3563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19778478145599365, + "step": 520, + "valid_targets_mean": 4328.5, + "valid_targets_min": 1124 + }, + { + "epoch": 0.8366533864541833, + "grad_norm": 0.7612012764629698, + "learning_rate": 3.995551795961004e-05, + "loss": 0.3571, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18733835220336914, + "step": 525, + "valid_targets_mean": 3156.9, + "valid_targets_min": 273 + }, + { + "epoch": 0.8446215139442231, + "grad_norm": 0.6954057631209731, + "learning_rate": 3.995006714514932e-05, + "loss": 0.3663, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2069414258003235, + "step": 530, + "valid_targets_mean": 4626.6, + "valid_targets_min": 942 + }, + { + "epoch": 0.852589641434263, + "grad_norm": 0.7380261470303369, + "learning_rate": 3.99443017942397e-05, + "loss": 0.35, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14619982242584229, + "step": 535, + "valid_targets_mean": 3010.8, + "valid_targets_min": 500 + }, + { + "epoch": 0.8605577689243028, + "grad_norm": 0.8077177991808565, + "learning_rate": 3.993822199777876e-05, + "loss": 0.3734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21335554122924805, + "step": 540, + "valid_targets_mean": 4808.2, + "valid_targets_min": 2905 + }, + { + "epoch": 0.8685258964143426, + "grad_norm": 0.674443430613216, + "learning_rate": 3.9931827851621694e-05, + "loss": 0.3531, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20609551668167114, + "step": 545, + "valid_targets_mean": 4898.8, + "valid_targets_min": 3383 + }, + { + "epoch": 0.8764940239043825, + "grad_norm": 0.6954171773040285, + "learning_rate": 3.99251194565798e-05, + "loss": 0.3537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16853263974189758, + "step": 550, + "valid_targets_mean": 3665.2, + "valid_targets_min": 1078 + }, + { + "epoch": 0.8844621513944223, + "grad_norm": 0.723199475265153, + "learning_rate": 3.991809691841888e-05, + "loss": 0.3615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16584350168704987, + "step": 555, + "valid_targets_mean": 2947.6, + "valid_targets_min": 447 + }, + { + "epoch": 0.8924302788844621, + "grad_norm": 0.6430285946102091, + "learning_rate": 3.9910760347857554e-05, + "loss": 0.3494, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15580511093139648, + "step": 560, + "valid_targets_mean": 3587.9, + "valid_targets_min": 921 + }, + { + "epoch": 0.900398406374502, + "grad_norm": 0.6820102983014715, + "learning_rate": 3.9903109860565565e-05, + "loss": 0.3542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18337659537792206, + "step": 565, + "valid_targets_mean": 3625.2, + "valid_targets_min": 518 + }, + { + "epoch": 0.9083665338645418, + "grad_norm": 0.7533993179464932, + "learning_rate": 3.98951455771619e-05, + "loss": 0.3596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1618381142616272, + "step": 570, + "valid_targets_mean": 3087.0, + "valid_targets_min": 598 + }, + { + "epoch": 0.9163346613545816, + "grad_norm": 0.5956981450863615, + "learning_rate": 3.988686762321293e-05, + "loss": 0.3469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21477267146110535, + "step": 575, + "valid_targets_mean": 5261.5, + "valid_targets_min": 2841 + }, + { + "epoch": 0.9243027888446215, + "grad_norm": 0.7564199919542755, + "learning_rate": 3.987827612923041e-05, + "loss": 0.3489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1756840944290161, + "step": 580, + "valid_targets_mean": 3110.1, + "valid_targets_min": 481 + }, + { + "epoch": 0.9322709163346613, + "grad_norm": 0.7011814423323345, + "learning_rate": 3.986937123066942e-05, + "loss": 0.36, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1822158396244049, + "step": 585, + "valid_targets_mean": 4179.5, + "valid_targets_min": 2827 + }, + { + "epoch": 0.9402390438247012, + "grad_norm": 1.2458428255009688, + "learning_rate": 3.9860153067926235e-05, + "loss": 0.3473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14599651098251343, + "step": 590, + "valid_targets_mean": 3402.9, + "valid_targets_min": 1095 + }, + { + "epoch": 0.9482071713147411, + "grad_norm": 0.6929967658492394, + "learning_rate": 3.985062178633612e-05, + "loss": 0.3475, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20835021138191223, + "step": 595, + "valid_targets_mean": 3821.9, + "valid_targets_min": 3372 + }, + { + "epoch": 0.9561752988047809, + "grad_norm": 0.7061228324123497, + "learning_rate": 3.9840777536171026e-05, + "loss": 0.347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14604054391384125, + "step": 600, + "valid_targets_mean": 3521.1, + "valid_targets_min": 328 + }, + { + "epoch": 0.9641434262948207, + "grad_norm": 0.6151137860103864, + "learning_rate": 3.9830620472637214e-05, + "loss": 0.3479, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1378212571144104, + "step": 605, + "valid_targets_mean": 3869.6, + "valid_targets_min": 2505 + }, + { + "epoch": 0.9721115537848606, + "grad_norm": 0.7078980854647497, + "learning_rate": 3.982015075587283e-05, + "loss": 0.3457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18525102734565735, + "step": 610, + "valid_targets_mean": 3604.5, + "valid_targets_min": 878 + }, + { + "epoch": 0.9800796812749004, + "grad_norm": 0.6675761629356032, + "learning_rate": 3.980936855094537e-05, + "loss": 0.3467, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.173541858792305, + "step": 615, + "valid_targets_mean": 4400.9, + "valid_targets_min": 465 + }, + { + "epoch": 0.9880478087649402, + "grad_norm": 0.7097226127876368, + "learning_rate": 3.979827402784906e-05, + "loss": 0.3748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17709887027740479, + "step": 620, + "valid_targets_mean": 2642.9, + "valid_targets_min": 636 + }, + { + "epoch": 0.9960159362549801, + "grad_norm": 0.6953431918337851, + "learning_rate": 3.978686736150221e-05, + "loss": 0.34, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15647362172603607, + "step": 625, + "valid_targets_mean": 3125.0, + "valid_targets_min": 245 + }, + { + "epoch": 1.003187250996016, + "grad_norm": 0.8380016134970487, + "learning_rate": 3.977514873174443e-05, + "loss": 0.3361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18529632687568665, + "step": 630, + "valid_targets_mean": 3895.5, + "valid_targets_min": 279 + }, + { + "epoch": 1.0111553784860559, + "grad_norm": 0.6180113975538215, + "learning_rate": 3.976311832333381e-05, + "loss": 0.3355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14978092908859253, + "step": 635, + "valid_targets_mean": 4151.5, + "valid_targets_min": 822 + }, + { + "epoch": 1.0191235059760957, + "grad_norm": 0.6477332225509156, + "learning_rate": 3.9750776325943984e-05, + "loss": 0.3278, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18527810275554657, + "step": 640, + "valid_targets_mean": 4084.5, + "valid_targets_min": 1761 + }, + { + "epoch": 1.0270916334661355, + "grad_norm": 0.7186538375171995, + "learning_rate": 3.9738122934161174e-05, + "loss": 0.3331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17046117782592773, + "step": 645, + "valid_targets_mean": 3528.1, + "valid_targets_min": 478 + }, + { + "epoch": 1.0350597609561754, + "grad_norm": 0.6735156586160675, + "learning_rate": 3.97251583474811e-05, + "loss": 0.3482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13899904489517212, + "step": 650, + "valid_targets_mean": 2988.0, + "valid_targets_min": 232 + }, + { + "epoch": 1.0430278884462152, + "grad_norm": 0.6583706162534825, + "learning_rate": 3.971188277030582e-05, + "loss": 0.3288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.202030748128891, + "step": 655, + "valid_targets_mean": 4802.2, + "valid_targets_min": 4000 + }, + { + "epoch": 1.050996015936255, + "grad_norm": 0.6522611837588745, + "learning_rate": 3.969829641194055e-05, + "loss": 0.3308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15542668104171753, + "step": 660, + "valid_targets_mean": 3543.8, + "valid_targets_min": 638 + }, + { + "epoch": 1.0589641434262949, + "grad_norm": 0.5991682727250625, + "learning_rate": 3.968439948659033e-05, + "loss": 0.3308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15176421403884888, + "step": 665, + "valid_targets_mean": 4226.6, + "valid_targets_min": 616 + }, + { + "epoch": 1.0669322709163347, + "grad_norm": 0.7906438594031066, + "learning_rate": 3.967019221335664e-05, + "loss": 0.3448, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22275003790855408, + "step": 670, + "valid_targets_mean": 4171.0, + "valid_targets_min": 3268 + }, + { + "epoch": 1.0749003984063745, + "grad_norm": 0.9001112914083158, + "learning_rate": 3.965567481623399e-05, + "loss": 0.3406, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16322645545005798, + "step": 675, + "valid_targets_mean": 3059.8, + "valid_targets_min": 189 + }, + { + "epoch": 1.0828685258964144, + "grad_norm": 0.6929341128744392, + "learning_rate": 3.9640847524106356e-05, + "loss": 0.3406, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1515749990940094, + "step": 680, + "valid_targets_mean": 3472.5, + "valid_targets_min": 411 + }, + { + "epoch": 1.0908366533864542, + "grad_norm": 0.6655570486924588, + "learning_rate": 3.9625710570743556e-05, + "loss": 0.3398, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16119533777236938, + "step": 685, + "valid_targets_mean": 3803.9, + "valid_targets_min": 511 + }, + { + "epoch": 1.098804780876494, + "grad_norm": 0.656880149153456, + "learning_rate": 3.96102641947976e-05, + "loss": 0.3196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1461721807718277, + "step": 690, + "valid_targets_mean": 2939.8, + "valid_targets_min": 274 + }, + { + "epoch": 1.1067729083665339, + "grad_norm": 0.9014777398348911, + "learning_rate": 3.959450863979891e-05, + "loss": 0.3415, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18205542862415314, + "step": 695, + "valid_targets_mean": 3970.0, + "valid_targets_min": 773 + }, + { + "epoch": 1.1147410358565737, + "grad_norm": 0.7286105371948465, + "learning_rate": 3.957844415415248e-05, + "loss": 0.3347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16195717453956604, + "step": 700, + "valid_targets_mean": 3414.6, + "valid_targets_min": 245 + }, + { + "epoch": 1.1227091633466135, + "grad_norm": 0.7140375479244369, + "learning_rate": 3.956207099113396e-05, + "loss": 0.3351, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1416376531124115, + "step": 705, + "valid_targets_mean": 3106.2, + "valid_targets_min": 590 + }, + { + "epoch": 1.1306772908366534, + "grad_norm": 0.6387675684821813, + "learning_rate": 3.954538940888567e-05, + "loss": 0.3385, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16593831777572632, + "step": 710, + "valid_targets_mean": 3963.5, + "valid_targets_min": 348 + }, + { + "epoch": 1.1386454183266932, + "grad_norm": 0.7108262306982716, + "learning_rate": 3.952839967041252e-05, + "loss": 0.3539, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1952948421239853, + "step": 715, + "valid_targets_mean": 3867.1, + "valid_targets_min": 268 + }, + { + "epoch": 1.146613545816733, + "grad_norm": 0.6923790988667464, + "learning_rate": 3.951110204357787e-05, + "loss": 0.3308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1314292848110199, + "step": 720, + "valid_targets_mean": 2712.4, + "valid_targets_min": 559 + }, + { + "epoch": 1.1545816733067729, + "grad_norm": 0.7761718139472028, + "learning_rate": 3.9493496801099306e-05, + "loss": 0.3379, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17276610434055328, + "step": 725, + "valid_targets_mean": 3137.4, + "valid_targets_min": 535 + }, + { + "epoch": 1.1625498007968127, + "grad_norm": 0.6421701750586802, + "learning_rate": 3.9475584220544335e-05, + "loss": 0.3349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1793321669101715, + "step": 730, + "valid_targets_mean": 5372.2, + "valid_targets_min": 2713 + }, + { + "epoch": 1.1705179282868525, + "grad_norm": 0.7749510793480261, + "learning_rate": 3.9457364584326005e-05, + "loss": 0.3348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1492847204208374, + "step": 735, + "valid_targets_mean": 3576.1, + "valid_targets_min": 280 + }, + { + "epoch": 1.1784860557768924, + "grad_norm": 0.7949932249458553, + "learning_rate": 3.94388381796985e-05, + "loss": 0.3388, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16831986606121063, + "step": 740, + "valid_targets_mean": 4208.0, + "valid_targets_min": 1011 + }, + { + "epoch": 1.1864541832669322, + "grad_norm": 0.6130986984958665, + "learning_rate": 3.942000529875251e-05, + "loss": 0.3393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1676281839609146, + "step": 745, + "valid_targets_mean": 4924.5, + "valid_targets_min": 1280 + }, + { + "epoch": 1.194422310756972, + "grad_norm": 0.6689286573258375, + "learning_rate": 3.9400866238410736e-05, + "loss": 0.3442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14720451831817627, + "step": 750, + "valid_targets_mean": 3518.8, + "valid_targets_min": 821 + }, + { + "epoch": 1.2023904382470119, + "grad_norm": 0.6475785428629414, + "learning_rate": 3.9381421300423145e-05, + "loss": 0.3316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18296414613723755, + "step": 755, + "valid_targets_mean": 3739.0, + "valid_targets_min": 1162 + }, + { + "epoch": 1.2103585657370517, + "grad_norm": 0.6781254648218166, + "learning_rate": 3.936167079136222e-05, + "loss": 0.3459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15673884749412537, + "step": 760, + "valid_targets_mean": 3204.5, + "valid_targets_min": 956 + }, + { + "epoch": 1.2183266932270915, + "grad_norm": 0.6427243656380253, + "learning_rate": 3.934161502261814e-05, + "loss": 0.3378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19556352496147156, + "step": 765, + "valid_targets_mean": 4977.9, + "valid_targets_min": 3219 + }, + { + "epoch": 1.2262948207171314, + "grad_norm": 0.7264390882508804, + "learning_rate": 3.932125431039387e-05, + "loss": 0.3412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15922430157661438, + "step": 770, + "valid_targets_mean": 2880.5, + "valid_targets_min": 266 + }, + { + "epoch": 1.2342629482071712, + "grad_norm": 0.7144672783858196, + "learning_rate": 3.930058897570016e-05, + "loss": 0.3331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1332297921180725, + "step": 775, + "valid_targets_mean": 2939.6, + "valid_targets_min": 242 + }, + { + "epoch": 1.2422310756972113, + "grad_norm": 0.6867672735917674, + "learning_rate": 3.9279619344350505e-05, + "loss": 0.3337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13223578035831451, + "step": 780, + "valid_targets_mean": 3395.1, + "valid_targets_min": 262 + }, + { + "epoch": 1.2501992031872509, + "grad_norm": 0.6135527493382645, + "learning_rate": 3.925834574695599e-05, + "loss": 0.3295, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15247970819473267, + "step": 785, + "valid_targets_mean": 3609.8, + "valid_targets_min": 897 + }, + { + "epoch": 1.258167330677291, + "grad_norm": 0.6088121739198172, + "learning_rate": 3.923676851892008e-05, + "loss": 0.3413, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15359815955162048, + "step": 790, + "valid_targets_mean": 3986.2, + "valid_targets_min": 278 + }, + { + "epoch": 1.2661354581673308, + "grad_norm": 0.646293293945006, + "learning_rate": 3.921488800043335e-05, + "loss": 0.3236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14105680584907532, + "step": 795, + "valid_targets_mean": 3519.8, + "valid_targets_min": 1042 + }, + { + "epoch": 1.2741035856573706, + "grad_norm": 0.6234293152369844, + "learning_rate": 3.9192704536468106e-05, + "loss": 0.3446, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1406230628490448, + "step": 800, + "valid_targets_mean": 3263.4, + "valid_targets_min": 288 + }, + { + "epoch": 1.2820717131474104, + "grad_norm": 0.6443699988991758, + "learning_rate": 3.917021847677294e-05, + "loss": 0.3549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1499442160129547, + "step": 805, + "valid_targets_mean": 3704.4, + "valid_targets_min": 302 + }, + { + "epoch": 1.2900398406374503, + "grad_norm": 0.602243226611313, + "learning_rate": 3.914743017586722e-05, + "loss": 0.3528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15198102593421936, + "step": 810, + "valid_targets_mean": 4328.8, + "valid_targets_min": 1028 + }, + { + "epoch": 1.29800796812749, + "grad_norm": 0.7296266783180043, + "learning_rate": 3.912433999303552e-05, + "loss": 0.3281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13439792394638062, + "step": 815, + "valid_targets_mean": 2737.6, + "valid_targets_min": 248 + }, + { + "epoch": 1.30597609561753, + "grad_norm": 0.6350956369195538, + "learning_rate": 3.910094829232194e-05, + "loss": 0.3313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1784883439540863, + "step": 820, + "valid_targets_mean": 4386.4, + "valid_targets_min": 1427 + }, + { + "epoch": 1.3139442231075698, + "grad_norm": 0.7219352485252675, + "learning_rate": 3.907725544252436e-05, + "loss": 0.3257, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1668669879436493, + "step": 825, + "valid_targets_mean": 3696.9, + "valid_targets_min": 255 + }, + { + "epoch": 1.3219123505976096, + "grad_norm": 0.6504102122640691, + "learning_rate": 3.905326181718862e-05, + "loss": 0.3208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1484387218952179, + "step": 830, + "valid_targets_mean": 3092.1, + "valid_targets_min": 359 + }, + { + "epoch": 1.3298804780876494, + "grad_norm": 0.6288078179306616, + "learning_rate": 3.902896779460266e-05, + "loss": 0.3266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14200851321220398, + "step": 835, + "valid_targets_mean": 3019.5, + "valid_targets_min": 514 + }, + { + "epoch": 1.3378486055776893, + "grad_norm": 0.8076767091954578, + "learning_rate": 3.900437375779055e-05, + "loss": 0.3316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1670466810464859, + "step": 840, + "valid_targets_mean": 3895.8, + "valid_targets_min": 498 + }, + { + "epoch": 1.345816733067729, + "grad_norm": 0.6140652183984848, + "learning_rate": 3.8979480094506394e-05, + "loss": 0.3406, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14599671959877014, + "step": 845, + "valid_targets_mean": 3340.5, + "valid_targets_min": 219 + }, + { + "epoch": 1.353784860557769, + "grad_norm": 0.6107968473694149, + "learning_rate": 3.895428719722832e-05, + "loss": 0.3255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1698405146598816, + "step": 850, + "valid_targets_mean": 4689.1, + "valid_targets_min": 810 + }, + { + "epoch": 1.3617529880478088, + "grad_norm": 0.9361963934323948, + "learning_rate": 3.8928795463152186e-05, + "loss": 0.3476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17818495631217957, + "step": 855, + "valid_targets_mean": 3841.5, + "valid_targets_min": 272 + }, + { + "epoch": 1.3697211155378486, + "grad_norm": 0.5664514572215686, + "learning_rate": 3.89030052941854e-05, + "loss": 0.3383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1609460860490799, + "step": 860, + "valid_targets_mean": 4430.2, + "valid_targets_min": 2145 + }, + { + "epoch": 1.3776892430278884, + "grad_norm": 0.6652826855162303, + "learning_rate": 3.8876917096940536e-05, + "loss": 0.3288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12637989223003387, + "step": 865, + "valid_targets_mean": 2741.5, + "valid_targets_min": 1030 + }, + { + "epoch": 1.3856573705179283, + "grad_norm": 0.671985336767791, + "learning_rate": 3.8850531282728934e-05, + "loss": 0.3441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19516661763191223, + "step": 870, + "valid_targets_mean": 3528.1, + "valid_targets_min": 343 + }, + { + "epoch": 1.393625498007968, + "grad_norm": 0.6477967928804484, + "learning_rate": 3.882384826755422e-05, + "loss": 0.3189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19669313728809357, + "step": 875, + "valid_targets_mean": 4708.4, + "valid_targets_min": 3502 + }, + { + "epoch": 1.401593625498008, + "grad_norm": 0.6262389017941296, + "learning_rate": 3.8796868472105746e-05, + "loss": 0.3197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1708228588104248, + "step": 880, + "valid_targets_mean": 5133.6, + "valid_targets_min": 3140 + }, + { + "epoch": 1.4095617529880478, + "grad_norm": 0.5665004228792191, + "learning_rate": 3.8769592321751964e-05, + "loss": 0.3194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1891600489616394, + "step": 885, + "valid_targets_mean": 4765.6, + "valid_targets_min": 2599 + }, + { + "epoch": 1.4175298804780876, + "grad_norm": 0.7184391120878334, + "learning_rate": 3.87420202465337e-05, + "loss": 0.3261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1749991774559021, + "step": 890, + "valid_targets_mean": 3819.4, + "valid_targets_min": 254 + }, + { + "epoch": 1.4254980079681274, + "grad_norm": 0.6043018244167944, + "learning_rate": 3.871415268115739e-05, + "loss": 0.3423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2269725501537323, + "step": 895, + "valid_targets_mean": 4961.9, + "valid_targets_min": 3451 + }, + { + "epoch": 1.4334661354581673, + "grad_norm": 0.5887212474653807, + "learning_rate": 3.868599006498823e-05, + "loss": 0.3239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1578642725944519, + "step": 900, + "valid_targets_mean": 4013.4, + "valid_targets_min": 184 + }, + { + "epoch": 1.441434262948207, + "grad_norm": 0.6249449772924667, + "learning_rate": 3.865753284204324e-05, + "loss": 0.3281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13083437085151672, + "step": 905, + "valid_targets_mean": 2994.2, + "valid_targets_min": 283 + }, + { + "epoch": 1.4494023904382471, + "grad_norm": 0.6444643290345756, + "learning_rate": 3.862878146098426e-05, + "loss": 0.3301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21578340232372284, + "step": 910, + "valid_targets_mean": 3560.9, + "valid_targets_min": 642 + }, + { + "epoch": 1.457370517928287, + "grad_norm": 0.6345924176843412, + "learning_rate": 3.859973637511086e-05, + "loss": 0.3258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16586630046367645, + "step": 915, + "valid_targets_mean": 4145.2, + "valid_targets_min": 341 + }, + { + "epoch": 1.4653386454183268, + "grad_norm": 0.7293833283228522, + "learning_rate": 3.857039804235327e-05, + "loss": 0.3393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20936918258666992, + "step": 920, + "valid_targets_mean": 3901.1, + "valid_targets_min": 530 + }, + { + "epoch": 1.4733067729083666, + "grad_norm": 0.6665691585288822, + "learning_rate": 3.854076692526505e-05, + "loss": 0.3279, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19952437281608582, + "step": 925, + "valid_targets_mean": 4302.2, + "valid_targets_min": 3314 + }, + { + "epoch": 1.4812749003984065, + "grad_norm": 0.5880262605957536, + "learning_rate": 3.8510843491015874e-05, + "loss": 0.337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16966915130615234, + "step": 930, + "valid_targets_mean": 4073.6, + "valid_targets_min": 3095 + }, + { + "epoch": 1.4892430278884463, + "grad_norm": 0.5671415238129563, + "learning_rate": 3.848062821138415e-05, + "loss": 0.3174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13870111107826233, + "step": 935, + "valid_targets_mean": 3917.1, + "valid_targets_min": 345 + }, + { + "epoch": 1.4972111553784861, + "grad_norm": 0.5664536721175603, + "learning_rate": 3.8450121562749565e-05, + "loss": 0.3263, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15465731918811798, + "step": 940, + "valid_targets_mean": 3790.1, + "valid_targets_min": 530 + }, + { + "epoch": 1.505179282868526, + "grad_norm": 0.6650849090179763, + "learning_rate": 3.841932402608557e-05, + "loss": 0.3337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1365033984184265, + "step": 945, + "valid_targets_mean": 2987.6, + "valid_targets_min": 379 + }, + { + "epoch": 1.5131474103585658, + "grad_norm": 0.5881270123449338, + "learning_rate": 3.838823608695185e-05, + "loss": 0.3338, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19470025599002838, + "step": 950, + "valid_targets_mean": 4070.0, + "valid_targets_min": 909 + }, + { + "epoch": 1.5211155378486056, + "grad_norm": 0.6805344923656982, + "learning_rate": 3.835685823548659e-05, + "loss": 0.3257, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1532375067472458, + "step": 955, + "valid_targets_mean": 3163.1, + "valid_targets_min": 243 + }, + { + "epoch": 1.5290836653386455, + "grad_norm": 0.6804754134582519, + "learning_rate": 3.832519096639879e-05, + "loss": 0.321, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1352333426475525, + "step": 960, + "valid_targets_mean": 3507.5, + "valid_targets_min": 261 + }, + { + "epoch": 1.5370517928286853, + "grad_norm": 0.6789653560379708, + "learning_rate": 3.829323477896048e-05, + "loss": 0.3384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16350319981575012, + "step": 965, + "valid_targets_mean": 2874.0, + "valid_targets_min": 223 + }, + { + "epoch": 1.5450199203187251, + "grad_norm": 0.6915509492873587, + "learning_rate": 3.8260990176998835e-05, + "loss": 0.3268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1757764220237732, + "step": 970, + "valid_targets_mean": 4496.0, + "valid_targets_min": 245 + }, + { + "epoch": 1.552988047808765, + "grad_norm": 0.6426073811661999, + "learning_rate": 3.82284576688882e-05, + "loss": 0.3366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18171828985214233, + "step": 975, + "valid_targets_mean": 3385.2, + "valid_targets_min": 949 + }, + { + "epoch": 1.5609561752988048, + "grad_norm": 1.1859300440389073, + "learning_rate": 3.819563776754212e-05, + "loss": 0.3312, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10818570852279663, + "step": 980, + "valid_targets_mean": 2652.6, + "valid_targets_min": 758 + }, + { + "epoch": 1.5689243027888446, + "grad_norm": 0.7378665803791654, + "learning_rate": 3.8162530990405235e-05, + "loss": 0.3297, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18052585422992706, + "step": 985, + "valid_targets_mean": 4138.9, + "valid_targets_min": 1227 + }, + { + "epoch": 1.5768924302788845, + "grad_norm": 0.7884738730445345, + "learning_rate": 3.8129137859445106e-05, + "loss": 0.3159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11578461527824402, + "step": 990, + "valid_targets_mean": 2230.2, + "valid_targets_min": 242 + }, + { + "epoch": 1.5848605577689243, + "grad_norm": 0.6764216933538416, + "learning_rate": 3.8095458901144014e-05, + "loss": 0.3195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14588657021522522, + "step": 995, + "valid_targets_mean": 2651.1, + "valid_targets_min": 273 + }, + { + "epoch": 1.5928286852589641, + "grad_norm": 0.7499955425166605, + "learning_rate": 3.806149464649066e-05, + "loss": 0.3337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16508588194847107, + "step": 1000, + "valid_targets_mean": 2787.5, + "valid_targets_min": 455 + }, + { + "epoch": 1.600796812749004, + "grad_norm": 0.5916363548759627, + "learning_rate": 3.802724563097175e-05, + "loss": 0.3366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14954009652137756, + "step": 1005, + "valid_targets_mean": 3623.4, + "valid_targets_min": 1240 + }, + { + "epoch": 1.6087649402390438, + "grad_norm": 0.734952901318973, + "learning_rate": 3.7992712394563606e-05, + "loss": 0.3412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2241578996181488, + "step": 1010, + "valid_targets_mean": 3893.6, + "valid_targets_min": 2274 + }, + { + "epoch": 1.6167330677290837, + "grad_norm": 0.6634751175431612, + "learning_rate": 3.795789548172362e-05, + "loss": 0.3314, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16153842210769653, + "step": 1015, + "valid_targets_mean": 3391.4, + "valid_targets_min": 495 + }, + { + "epoch": 1.6247011952191235, + "grad_norm": 0.6907352514309517, + "learning_rate": 3.7922795441381674e-05, + "loss": 0.3316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15194550156593323, + "step": 1020, + "valid_targets_mean": 2836.1, + "valid_targets_min": 796 + }, + { + "epoch": 1.6326693227091633, + "grad_norm": 0.6434692779607148, + "learning_rate": 3.78874128269315e-05, + "loss": 0.3335, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14861451089382172, + "step": 1025, + "valid_targets_mean": 3566.8, + "valid_targets_min": 493 + }, + { + "epoch": 1.6406374501992032, + "grad_norm": 0.6221059084208682, + "learning_rate": 3.785174819622195e-05, + "loss": 0.3382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1275479793548584, + "step": 1030, + "valid_targets_mean": 3273.4, + "valid_targets_min": 1054 + }, + { + "epoch": 1.648605577689243, + "grad_norm": 0.6489634971991323, + "learning_rate": 3.7815802111548185e-05, + "loss": 0.332, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17398403584957123, + "step": 1035, + "valid_targets_mean": 4010.5, + "valid_targets_min": 806 + }, + { + "epoch": 1.6565737051792828, + "grad_norm": 0.5827419736850272, + "learning_rate": 3.777957513964282e-05, + "loss": 0.3392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1529952734708786, + "step": 1040, + "valid_targets_mean": 4105.8, + "valid_targets_min": 571 + }, + { + "epoch": 1.6645418326693227, + "grad_norm": 0.605605540428135, + "learning_rate": 3.7743067851666994e-05, + "loss": 0.3275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1617601215839386, + "step": 1045, + "valid_targets_mean": 3637.4, + "valid_targets_min": 760 + }, + { + "epoch": 1.6725099601593625, + "grad_norm": 0.5958018681284462, + "learning_rate": 3.770628082320137e-05, + "loss": 0.326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18859803676605225, + "step": 1050, + "valid_targets_mean": 4714.4, + "valid_targets_min": 3961 + }, + { + "epoch": 1.6804780876494023, + "grad_norm": 0.5548342819557698, + "learning_rate": 3.766921463423704e-05, + "loss": 0.3273, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1481020450592041, + "step": 1055, + "valid_targets_mean": 3693.8, + "valid_targets_min": 942 + }, + { + "epoch": 1.6884462151394422, + "grad_norm": 0.6270607165257036, + "learning_rate": 3.7631869869166396e-05, + "loss": 0.32, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16427603363990784, + "step": 1060, + "valid_targets_mean": 3412.0, + "valid_targets_min": 800 + }, + { + "epoch": 1.696414342629482, + "grad_norm": 0.6298749105786624, + "learning_rate": 3.759424711677391e-05, + "loss": 0.325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17540724575519562, + "step": 1065, + "valid_targets_mean": 4121.4, + "valid_targets_min": 1091 + }, + { + "epoch": 1.7043824701195218, + "grad_norm": 0.6045268705855914, + "learning_rate": 3.755634697022686e-05, + "loss": 0.3268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19350652396678925, + "step": 1070, + "valid_targets_mean": 4344.9, + "valid_targets_min": 3294 + }, + { + "epoch": 1.7123505976095617, + "grad_norm": 0.778685455221528, + "learning_rate": 3.751817002706596e-05, + "loss": 0.3343, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20157337188720703, + "step": 1075, + "valid_targets_mean": 4317.4, + "valid_targets_min": 2981 + }, + { + "epoch": 1.7203187250996015, + "grad_norm": 0.676232749412372, + "learning_rate": 3.747971688919597e-05, + "loss": 0.3208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13573341071605682, + "step": 1080, + "valid_targets_mean": 2960.9, + "valid_targets_min": 749 + }, + { + "epoch": 1.7282868525896413, + "grad_norm": 0.5653574577331923, + "learning_rate": 3.744098816287616e-05, + "loss": 0.3275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1345827579498291, + "step": 1085, + "valid_targets_mean": 3466.1, + "valid_targets_min": 949 + }, + { + "epoch": 1.7362549800796812, + "grad_norm": 0.6351690816506902, + "learning_rate": 3.7401984458710796e-05, + "loss": 0.3316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14693620800971985, + "step": 1090, + "valid_targets_mean": 3326.4, + "valid_targets_min": 245 + }, + { + "epoch": 1.744223107569721, + "grad_norm": 0.6561424657402385, + "learning_rate": 3.73627063916395e-05, + "loss": 0.3268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14549244940280914, + "step": 1095, + "valid_targets_mean": 3336.9, + "valid_targets_min": 797 + }, + { + "epoch": 1.7521912350597608, + "grad_norm": 0.7612659059318599, + "learning_rate": 3.732315458092754e-05, + "loss": 0.3236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15325888991355896, + "step": 1100, + "valid_targets_mean": 3915.9, + "valid_targets_min": 1058 + }, + { + "epoch": 1.7601593625498007, + "grad_norm": 0.5708159840236217, + "learning_rate": 3.728332965015608e-05, + "loss": 0.3201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1499689221382141, + "step": 1105, + "valid_targets_mean": 4427.4, + "valid_targets_min": 320 + }, + { + "epoch": 1.7681274900398405, + "grad_norm": 0.6187362281801017, + "learning_rate": 3.724323222721234e-05, + "loss": 0.3285, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14799684286117554, + "step": 1110, + "valid_targets_mean": 4080.1, + "valid_targets_min": 3371 + }, + { + "epoch": 1.7760956175298803, + "grad_norm": 0.623009628447439, + "learning_rate": 3.720286294427972e-05, + "loss": 0.3256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13788515329360962, + "step": 1115, + "valid_targets_mean": 3574.0, + "valid_targets_min": 306 + }, + { + "epoch": 1.7840637450199202, + "grad_norm": 0.5652559360866017, + "learning_rate": 3.716222243782778e-05, + "loss": 0.3173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12565413117408752, + "step": 1120, + "valid_targets_mean": 3111.9, + "valid_targets_min": 531 + }, + { + "epoch": 1.7920318725099602, + "grad_norm": 0.5851792904377354, + "learning_rate": 3.712131134860229e-05, + "loss": 0.3214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15786227583885193, + "step": 1125, + "valid_targets_mean": 3497.2, + "valid_targets_min": 280 + }, + { + "epoch": 1.8, + "grad_norm": 0.5995685863739088, + "learning_rate": 3.708013032161502e-05, + "loss": 0.3344, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15148508548736572, + "step": 1130, + "valid_targets_mean": 3953.8, + "valid_targets_min": 559 + }, + { + "epoch": 1.8079681274900399, + "grad_norm": 0.6910535563908953, + "learning_rate": 3.703868000613369e-05, + "loss": 0.3229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17042243480682373, + "step": 1135, + "valid_targets_mean": 4433.4, + "valid_targets_min": 3430 + }, + { + "epoch": 1.8159362549800797, + "grad_norm": 0.6466466879164442, + "learning_rate": 3.6996961055671625e-05, + "loss": 0.3273, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17712724208831787, + "step": 1140, + "valid_targets_mean": 4915.8, + "valid_targets_min": 3569 + }, + { + "epoch": 1.8239043824701195, + "grad_norm": 0.7685923945953174, + "learning_rate": 3.695497412797751e-05, + "loss": 0.3125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1342412382364273, + "step": 1145, + "valid_targets_mean": 2233.5, + "valid_targets_min": 227 + }, + { + "epoch": 1.8318725099601594, + "grad_norm": 0.5981690154674036, + "learning_rate": 3.6912719885025026e-05, + "loss": 0.3268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15309564769268036, + "step": 1150, + "valid_targets_mean": 3787.4, + "valid_targets_min": 679 + }, + { + "epoch": 1.8398406374501992, + "grad_norm": 0.5774296637019503, + "learning_rate": 3.687019899300238e-05, + "loss": 0.315, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1481582522392273, + "step": 1155, + "valid_targets_mean": 4259.6, + "valid_targets_min": 2558 + }, + { + "epoch": 1.847808764940239, + "grad_norm": 0.6634038779165066, + "learning_rate": 3.6827412122301805e-05, + "loss": 0.3357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17690718173980713, + "step": 1160, + "valid_targets_mean": 3654.9, + "valid_targets_min": 1573 + }, + { + "epoch": 1.8557768924302789, + "grad_norm": 0.6847713609664849, + "learning_rate": 3.678435994750905e-05, + "loss": 0.3154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1776587814092636, + "step": 1165, + "valid_targets_mean": 3298.9, + "valid_targets_min": 543 + }, + { + "epoch": 1.8637450199203187, + "grad_norm": 0.6440304329296582, + "learning_rate": 3.6741043147392634e-05, + "loss": 0.3124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16797837615013123, + "step": 1170, + "valid_targets_mean": 4261.9, + "valid_targets_min": 3587 + }, + { + "epoch": 1.8717131474103585, + "grad_norm": 0.6507238996694699, + "learning_rate": 3.6697462404893257e-05, + "loss": 0.3229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15259000658988953, + "step": 1175, + "valid_targets_mean": 3302.1, + "valid_targets_min": 288 + }, + { + "epoch": 1.8796812749003984, + "grad_norm": 0.7378478562605171, + "learning_rate": 3.665361840711297e-05, + "loss": 0.3231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13854113221168518, + "step": 1180, + "valid_targets_mean": 2631.8, + "valid_targets_min": 462 + }, + { + "epoch": 1.8876494023904382, + "grad_norm": 0.6457405994361323, + "learning_rate": 3.660951184530434e-05, + "loss": 0.3217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1787298023700714, + "step": 1185, + "valid_targets_mean": 3707.5, + "valid_targets_min": 254 + }, + { + "epoch": 1.895617529880478, + "grad_norm": 0.6591963554947874, + "learning_rate": 3.656514341485959e-05, + "loss": 0.3111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1772817075252533, + "step": 1190, + "valid_targets_mean": 4114.0, + "valid_targets_min": 1187 + }, + { + "epoch": 1.9035856573705179, + "grad_norm": 0.5344285689213272, + "learning_rate": 3.65205138152996e-05, + "loss": 0.3153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16616226732730865, + "step": 1195, + "valid_targets_mean": 4997.6, + "valid_targets_min": 3276 + }, + { + "epoch": 1.9115537848605577, + "grad_norm": 0.7522572262927913, + "learning_rate": 3.647562375026289e-05, + "loss": 0.3301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16722427308559418, + "step": 1200, + "valid_targets_mean": 4735.5, + "valid_targets_min": 3503 + }, + { + "epoch": 1.9195219123505978, + "grad_norm": 0.6619997058919527, + "learning_rate": 3.643047392749453e-05, + "loss": 0.3288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2077215015888214, + "step": 1205, + "valid_targets_mean": 4616.6, + "valid_targets_min": 3346 + }, + { + "epoch": 1.9274900398406376, + "grad_norm": 0.6349179521539451, + "learning_rate": 3.638506505883497e-05, + "loss": 0.333, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1884724348783493, + "step": 1210, + "valid_targets_mean": 4218.8, + "valid_targets_min": 1537 + }, + { + "epoch": 1.9354581673306774, + "grad_norm": 0.5523245913514429, + "learning_rate": 3.633939786020884e-05, + "loss": 0.3176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13215897977352142, + "step": 1215, + "valid_targets_mean": 3504.0, + "valid_targets_min": 428 + }, + { + "epoch": 1.9434262948207173, + "grad_norm": 0.5915678062428961, + "learning_rate": 3.629347305161365e-05, + "loss": 0.3162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16069427132606506, + "step": 1220, + "valid_targets_mean": 3688.9, + "valid_targets_min": 599 + }, + { + "epoch": 1.951394422310757, + "grad_norm": 0.6738068743559218, + "learning_rate": 3.62472913571084e-05, + "loss": 0.3208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14029105007648468, + "step": 1225, + "valid_targets_mean": 3101.8, + "valid_targets_min": 233 + }, + { + "epoch": 1.959362549800797, + "grad_norm": 0.6403284264064206, + "learning_rate": 3.620085350480226e-05, + "loss": 0.3182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12314704805612564, + "step": 1230, + "valid_targets_mean": 2816.9, + "valid_targets_min": 1000 + }, + { + "epoch": 1.9673306772908368, + "grad_norm": 0.6325279233165706, + "learning_rate": 3.615416022684298e-05, + "loss": 0.319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16894102096557617, + "step": 1235, + "valid_targets_mean": 3623.5, + "valid_targets_min": 429 + }, + { + "epoch": 1.9752988047808766, + "grad_norm": 0.6533014597720369, + "learning_rate": 3.610721225940542e-05, + "loss": 0.3339, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20440031588077545, + "step": 1240, + "valid_targets_mean": 4533.1, + "valid_targets_min": 3572 + }, + { + "epoch": 1.9832669322709164, + "grad_norm": 0.7340801153525159, + "learning_rate": 3.606001034267992e-05, + "loss": 0.3208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15113261342048645, + "step": 1245, + "valid_targets_mean": 3354.2, + "valid_targets_min": 271 + }, + { + "epoch": 1.9912350597609563, + "grad_norm": 0.6771335347513721, + "learning_rate": 3.6012555220860634e-05, + "loss": 0.3173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15480926632881165, + "step": 1250, + "valid_targets_mean": 2704.0, + "valid_targets_min": 261 + }, + { + "epoch": 1.999203187250996, + "grad_norm": 0.5818311473977106, + "learning_rate": 3.596484764213379e-05, + "loss": 0.3369, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16493937373161316, + "step": 1255, + "valid_targets_mean": 3971.1, + "valid_targets_min": 777 + }, + { + "epoch": 2.006374501992032, + "grad_norm": 0.5836251192424022, + "learning_rate": 3.591688835866589e-05, + "loss": 0.2994, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17885245382785797, + "step": 1260, + "valid_targets_mean": 4970.4, + "valid_targets_min": 3162 + }, + { + "epoch": 2.014342629482072, + "grad_norm": 0.5629975095356654, + "learning_rate": 3.5868678126591884e-05, + "loss": 0.2972, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1434982866048813, + "step": 1265, + "valid_targets_mean": 4153.0, + "valid_targets_min": 3178 + }, + { + "epoch": 2.0223107569721117, + "grad_norm": 0.619157611992127, + "learning_rate": 3.5820217706003194e-05, + "loss": 0.3038, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1476842761039734, + "step": 1270, + "valid_targets_mean": 4041.4, + "valid_targets_min": 1166 + }, + { + "epoch": 2.0302788844621515, + "grad_norm": 0.6346864666948292, + "learning_rate": 3.577150786093579e-05, + "loss": 0.3117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16996094584465027, + "step": 1275, + "valid_targets_mean": 3509.5, + "valid_targets_min": 411 + }, + { + "epoch": 2.0382470119521914, + "grad_norm": 0.6778719480532754, + "learning_rate": 3.572254935935808e-05, + "loss": 0.3092, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1883283257484436, + "step": 1280, + "valid_targets_mean": 3941.2, + "valid_targets_min": 584 + }, + { + "epoch": 2.046215139442231, + "grad_norm": 0.6049255619263011, + "learning_rate": 3.567334297315887e-05, + "loss": 0.2966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14070260524749756, + "step": 1285, + "valid_targets_mean": 3641.1, + "valid_targets_min": 645 + }, + { + "epoch": 2.054183266932271, + "grad_norm": 0.6374919156353854, + "learning_rate": 3.562388947813514e-05, + "loss": 0.3171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15400946140289307, + "step": 1290, + "valid_targets_mean": 3828.2, + "valid_targets_min": 1073 + }, + { + "epoch": 2.062151394422311, + "grad_norm": 0.6895996869045218, + "learning_rate": 3.557418965397985e-05, + "loss": 0.302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1403256058692932, + "step": 1295, + "valid_targets_mean": 2555.0, + "valid_targets_min": 407 + }, + { + "epoch": 2.0701195219123507, + "grad_norm": 0.7053623742408442, + "learning_rate": 3.552424428426962e-05, + "loss": 0.3238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1919977366924286, + "step": 1300, + "valid_targets_mean": 4301.8, + "valid_targets_min": 3630 + }, + { + "epoch": 2.0780876494023905, + "grad_norm": 0.6095070399751719, + "learning_rate": 3.547405415645237e-05, + "loss": 0.3265, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13988962769508362, + "step": 1305, + "valid_targets_mean": 3645.9, + "valid_targets_min": 969 + }, + { + "epoch": 2.0860557768924304, + "grad_norm": 0.7010958997601787, + "learning_rate": 3.542362006183496e-05, + "loss": 0.3078, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16029886901378632, + "step": 1310, + "valid_targets_mean": 4184.8, + "valid_targets_min": 2397 + }, + { + "epoch": 2.09402390438247, + "grad_norm": 0.681550955504812, + "learning_rate": 3.5372942795570644e-05, + "loss": 0.3137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13907872140407562, + "step": 1315, + "valid_targets_mean": 2587.6, + "valid_targets_min": 655 + }, + { + "epoch": 2.10199203187251, + "grad_norm": 0.6872243323376277, + "learning_rate": 3.532202315664658e-05, + "loss": 0.2994, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16166797280311584, + "step": 1320, + "valid_targets_mean": 3661.5, + "valid_targets_min": 997 + }, + { + "epoch": 2.10996015936255, + "grad_norm": 0.678192201244478, + "learning_rate": 3.527086194787121e-05, + "loss": 0.3111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16050675511360168, + "step": 1325, + "valid_targets_mean": 3741.9, + "valid_targets_min": 233 + }, + { + "epoch": 2.1179282868525897, + "grad_norm": 0.6561493441706079, + "learning_rate": 3.521945997586162e-05, + "loss": 0.3047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.154448464512825, + "step": 1330, + "valid_targets_mean": 3963.8, + "valid_targets_min": 771 + }, + { + "epoch": 2.1258964143426295, + "grad_norm": 0.6432189194511014, + "learning_rate": 3.51678180510308e-05, + "loss": 0.2978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16677416861057281, + "step": 1335, + "valid_targets_mean": 4409.4, + "valid_targets_min": 2325 + }, + { + "epoch": 2.1338645418326694, + "grad_norm": 0.6549823203011039, + "learning_rate": 3.511593698757491e-05, + "loss": 0.3014, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1379939168691635, + "step": 1340, + "valid_targets_mean": 3496.5, + "valid_targets_min": 272 + }, + { + "epoch": 2.141832669322709, + "grad_norm": 0.5257721865369313, + "learning_rate": 3.506381760346037e-05, + "loss": 0.3102, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14774326980113983, + "step": 1345, + "valid_targets_mean": 5081.5, + "valid_targets_min": 234 + }, + { + "epoch": 2.149800796812749, + "grad_norm": 0.6432511783636684, + "learning_rate": 3.501146072041104e-05, + "loss": 0.3101, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10675960779190063, + "step": 1350, + "valid_targets_mean": 3111.6, + "valid_targets_min": 1187 + }, + { + "epoch": 2.157768924302789, + "grad_norm": 0.7306004464900633, + "learning_rate": 3.495886716389523e-05, + "loss": 0.2966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17563018202781677, + "step": 1355, + "valid_targets_mean": 4250.4, + "valid_targets_min": 535 + }, + { + "epoch": 2.1657370517928287, + "grad_norm": 0.5859975987411148, + "learning_rate": 3.4906037763112665e-05, + "loss": 0.3191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14896699786186218, + "step": 1360, + "valid_targets_mean": 3614.5, + "valid_targets_min": 218 + }, + { + "epoch": 2.1737051792828685, + "grad_norm": 0.6383870688004181, + "learning_rate": 3.4852973350981464e-05, + "loss": 0.3037, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17357799410820007, + "step": 1365, + "valid_targets_mean": 4153.1, + "valid_targets_min": 392 + }, + { + "epoch": 2.1816733067729084, + "grad_norm": 0.660721797475077, + "learning_rate": 3.4799674764124956e-05, + "loss": 0.3007, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1358649879693985, + "step": 1370, + "valid_targets_mean": 3674.2, + "valid_targets_min": 523 + }, + { + "epoch": 2.189641434262948, + "grad_norm": 0.5956420229552498, + "learning_rate": 3.474614284285852e-05, + "loss": 0.3022, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17175112664699554, + "step": 1375, + "valid_targets_mean": 4028.9, + "valid_targets_min": 3225 + }, + { + "epoch": 2.197609561752988, + "grad_norm": 0.6745536992357251, + "learning_rate": 3.469237843117634e-05, + "loss": 0.3068, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17130735516548157, + "step": 1380, + "valid_targets_mean": 3824.5, + "valid_targets_min": 935 + }, + { + "epoch": 2.205577689243028, + "grad_norm": 0.6404088633926213, + "learning_rate": 3.4638382376738064e-05, + "loss": 0.2876, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14818412065505981, + "step": 1385, + "valid_targets_mean": 4813.4, + "valid_targets_min": 3722 + }, + { + "epoch": 2.2135458167330677, + "grad_norm": 0.6647290971651754, + "learning_rate": 3.458415553085548e-05, + "loss": 0.3041, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16095389425754547, + "step": 1390, + "valid_targets_mean": 3601.6, + "valid_targets_min": 933 + }, + { + "epoch": 2.2215139442231076, + "grad_norm": 0.6505561635346703, + "learning_rate": 3.4529698748479075e-05, + "loss": 0.3073, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18228718638420105, + "step": 1395, + "valid_targets_mean": 4198.2, + "valid_targets_min": 2518 + }, + { + "epoch": 2.2294820717131474, + "grad_norm": 0.5705495422269455, + "learning_rate": 3.4475012888184536e-05, + "loss": 0.2922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16400834918022156, + "step": 1400, + "valid_targets_mean": 4684.5, + "valid_targets_min": 2803 + }, + { + "epoch": 2.237450199203187, + "grad_norm": 0.6139488510987134, + "learning_rate": 3.4420098812159266e-05, + "loss": 0.3156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1334049552679062, + "step": 1405, + "valid_targets_mean": 3723.1, + "valid_targets_min": 918 + }, + { + "epoch": 2.245418326693227, + "grad_norm": 0.612388930146584, + "learning_rate": 3.4364957386188744e-05, + "loss": 0.2991, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16954825818538666, + "step": 1410, + "valid_targets_mean": 4213.2, + "valid_targets_min": 706 + }, + { + "epoch": 2.253386454183267, + "grad_norm": 0.5997111383613154, + "learning_rate": 3.4309589479642894e-05, + "loss": 0.3093, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1729383021593094, + "step": 1415, + "valid_targets_mean": 3919.0, + "valid_targets_min": 533 + }, + { + "epoch": 2.2613545816733067, + "grad_norm": 0.9734635696926245, + "learning_rate": 3.425399596546237e-05, + "loss": 0.3041, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13724632561206818, + "step": 1420, + "valid_targets_mean": 3753.0, + "valid_targets_min": 1731 + }, + { + "epoch": 2.2693227091633466, + "grad_norm": 0.6508062353110562, + "learning_rate": 3.4198177720144794e-05, + "loss": 0.297, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14353463053703308, + "step": 1425, + "valid_targets_mean": 3168.0, + "valid_targets_min": 775 + }, + { + "epoch": 2.2772908366533864, + "grad_norm": 0.6387766951543817, + "learning_rate": 3.4142135623730954e-05, + "loss": 0.2924, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1348034292459488, + "step": 1430, + "valid_targets_mean": 3531.2, + "valid_targets_min": 1542 + }, + { + "epoch": 2.285258964143426, + "grad_norm": 0.6473111516507845, + "learning_rate": 3.4085870559790905e-05, + "loss": 0.3073, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1499640792608261, + "step": 1435, + "valid_targets_mean": 3610.5, + "valid_targets_min": 986 + }, + { + "epoch": 2.293227091633466, + "grad_norm": 0.5837371285483895, + "learning_rate": 3.402938341541005e-05, + "loss": 0.3064, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15251114964485168, + "step": 1440, + "valid_targets_mean": 3497.9, + "valid_targets_min": 269 + }, + { + "epoch": 2.301195219123506, + "grad_norm": 0.643445157252701, + "learning_rate": 3.397267508117517e-05, + "loss": 0.3058, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16282296180725098, + "step": 1445, + "valid_targets_mean": 4048.0, + "valid_targets_min": 1547 + }, + { + "epoch": 2.3091633466135457, + "grad_norm": 0.626513759608805, + "learning_rate": 3.391574645116034e-05, + "loss": 0.2977, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1480455845594406, + "step": 1450, + "valid_targets_mean": 4264.8, + "valid_targets_min": 3640 + }, + { + "epoch": 2.3171314741035856, + "grad_norm": 0.6006561956598667, + "learning_rate": 3.385859842291287e-05, + "loss": 0.3033, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14290261268615723, + "step": 1455, + "valid_targets_mean": 4117.2, + "valid_targets_min": 3252 + }, + { + "epoch": 2.3250996015936254, + "grad_norm": 0.6704738132790149, + "learning_rate": 3.380123189743914e-05, + "loss": 0.2997, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08275922387838364, + "step": 1460, + "valid_targets_mean": 2191.5, + "valid_targets_min": 246 + }, + { + "epoch": 2.333067729083665, + "grad_norm": 0.5883954078707374, + "learning_rate": 3.374364777919042e-05, + "loss": 0.3007, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14832335710525513, + "step": 1465, + "valid_targets_mean": 3908.2, + "valid_targets_min": 2545 + }, + { + "epoch": 2.341035856573705, + "grad_norm": 0.6709760261263392, + "learning_rate": 3.368584697604856e-05, + "loss": 0.3075, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15644146502017975, + "step": 1470, + "valid_targets_mean": 3209.9, + "valid_targets_min": 842 + }, + { + "epoch": 2.349003984063745, + "grad_norm": 0.6223432364326, + "learning_rate": 3.362783039931172e-05, + "loss": 0.3017, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1454828828573227, + "step": 1475, + "valid_targets_mean": 3254.8, + "valid_targets_min": 561 + }, + { + "epoch": 2.3569721115537847, + "grad_norm": 0.7653458669062861, + "learning_rate": 3.356959896367997e-05, + "loss": 0.3068, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20823490619659424, + "step": 1480, + "valid_targets_mean": 4071.9, + "valid_targets_min": 692 + }, + { + "epoch": 2.3649402390438246, + "grad_norm": 0.6555162840657145, + "learning_rate": 3.351115358724089e-05, + "loss": 0.2925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11996582895517349, + "step": 1485, + "valid_targets_mean": 2636.9, + "valid_targets_min": 278 + }, + { + "epoch": 2.3729083665338644, + "grad_norm": 0.6131098404603604, + "learning_rate": 3.345249519145512e-05, + "loss": 0.3037, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13851338624954224, + "step": 1490, + "valid_targets_mean": 3307.1, + "valid_targets_min": 249 + }, + { + "epoch": 2.380876494023904, + "grad_norm": 0.587155183133548, + "learning_rate": 3.339362470114176e-05, + "loss": 0.3101, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1548629105091095, + "step": 1495, + "valid_targets_mean": 4667.6, + "valid_targets_min": 2986 + }, + { + "epoch": 2.388844621513944, + "grad_norm": 0.6205162623877221, + "learning_rate": 3.333454304446385e-05, + "loss": 0.3039, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15081962943077087, + "step": 1500, + "valid_targets_mean": 3304.5, + "valid_targets_min": 845 + }, + { + "epoch": 2.396812749003984, + "grad_norm": 0.6088762919984781, + "learning_rate": 3.3275251152913735e-05, + "loss": 0.3155, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1803358793258667, + "step": 1505, + "valid_targets_mean": 3561.1, + "valid_targets_min": 313 + }, + { + "epoch": 2.4047808764940237, + "grad_norm": 0.6165816905297514, + "learning_rate": 3.3215749961298324e-05, + "loss": 0.3003, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14795807003974915, + "step": 1510, + "valid_targets_mean": 3361.0, + "valid_targets_min": 522 + }, + { + "epoch": 2.4127490039840636, + "grad_norm": 0.7093923887292809, + "learning_rate": 3.315604040772442e-05, + "loss": 0.3083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1511913388967514, + "step": 1515, + "valid_targets_mean": 4108.2, + "valid_targets_min": 1085 + }, + { + "epoch": 2.4207171314741034, + "grad_norm": 0.5461063106221496, + "learning_rate": 3.3096123433583886e-05, + "loss": 0.2996, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1274980902671814, + "step": 1520, + "valid_targets_mean": 3921.0, + "valid_targets_min": 453 + }, + { + "epoch": 2.4286852589641432, + "grad_norm": 0.5754317670287957, + "learning_rate": 3.303599998353882e-05, + "loss": 0.2932, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15238189697265625, + "step": 1525, + "valid_targets_mean": 3932.9, + "valid_targets_min": 742 + }, + { + "epoch": 2.436653386454183, + "grad_norm": 0.6267899882855434, + "learning_rate": 3.297567100550667e-05, + "loss": 0.313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1574801802635193, + "step": 1530, + "valid_targets_mean": 3298.8, + "valid_targets_min": 1051 + }, + { + "epoch": 2.444621513944223, + "grad_norm": 0.6178767344720022, + "learning_rate": 3.2915137450645245e-05, + "loss": 0.3021, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14424434304237366, + "step": 1535, + "valid_targets_mean": 3714.1, + "valid_targets_min": 673 + }, + { + "epoch": 2.4525896414342627, + "grad_norm": 0.5925864662994356, + "learning_rate": 3.285440027333777e-05, + "loss": 0.3007, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15461206436157227, + "step": 1540, + "valid_targets_mean": 4021.6, + "valid_targets_min": 3645 + }, + { + "epoch": 2.4605577689243026, + "grad_norm": 0.5067020381686023, + "learning_rate": 3.2793460431177827e-05, + "loss": 0.3068, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13205289840698242, + "step": 1545, + "valid_targets_mean": 4157.4, + "valid_targets_min": 1114 + }, + { + "epoch": 2.4685258964143424, + "grad_norm": 0.5570806905733114, + "learning_rate": 3.273231888495424e-05, + "loss": 0.3018, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14321979880332947, + "step": 1550, + "valid_targets_mean": 4194.4, + "valid_targets_min": 1027 + }, + { + "epoch": 2.4764940239043822, + "grad_norm": 0.715776387657958, + "learning_rate": 3.267097659863592e-05, + "loss": 0.2956, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14710450172424316, + "step": 1555, + "valid_targets_mean": 3978.6, + "valid_targets_min": 372 + }, + { + "epoch": 2.4844621513944225, + "grad_norm": 0.6019126948812167, + "learning_rate": 3.2609434539356726e-05, + "loss": 0.3013, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13887368142604828, + "step": 1560, + "valid_targets_mean": 3164.2, + "valid_targets_min": 1012 + }, + { + "epoch": 2.4924302788844623, + "grad_norm": 0.5842347130598545, + "learning_rate": 3.2547693677400126e-05, + "loss": 0.3049, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16767601668834686, + "step": 1565, + "valid_targets_mean": 4596.9, + "valid_targets_min": 2690 + }, + { + "epoch": 2.5003984063745017, + "grad_norm": 0.6014151039522305, + "learning_rate": 3.248575498618398e-05, + "loss": 0.2984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13801634311676025, + "step": 1570, + "valid_targets_mean": 3576.6, + "valid_targets_min": 2358 + }, + { + "epoch": 2.5083665338645416, + "grad_norm": 0.5988669443893535, + "learning_rate": 3.242361944224515e-05, + "loss": 0.2966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1550188958644867, + "step": 1575, + "valid_targets_mean": 4412.0, + "valid_targets_min": 939 + }, + { + "epoch": 2.516334661354582, + "grad_norm": 1.1602051750236582, + "learning_rate": 3.236128802522411e-05, + "loss": 0.304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16259071230888367, + "step": 1580, + "valid_targets_mean": 3310.6, + "valid_targets_min": 393 + }, + { + "epoch": 2.5243027888446217, + "grad_norm": 0.619395798668418, + "learning_rate": 3.229876171784952e-05, + "loss": 0.3081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16469460725784302, + "step": 1585, + "valid_targets_mean": 3745.2, + "valid_targets_min": 1495 + }, + { + "epoch": 2.5322709163346615, + "grad_norm": 0.5811774984361462, + "learning_rate": 3.22360415059227e-05, + "loss": 0.3038, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15185478329658508, + "step": 1590, + "valid_targets_mean": 3504.4, + "valid_targets_min": 516 + }, + { + "epoch": 2.5402390438247013, + "grad_norm": 0.6592375730142807, + "learning_rate": 3.217312837830212e-05, + "loss": 0.2944, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12678351998329163, + "step": 1595, + "valid_targets_mean": 2578.0, + "valid_targets_min": 329 + }, + { + "epoch": 2.548207171314741, + "grad_norm": 0.5982285666928621, + "learning_rate": 3.211002332688779e-05, + "loss": 0.3056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14462913572788239, + "step": 1600, + "valid_targets_mean": 3755.1, + "valid_targets_min": 473 + }, + { + "epoch": 2.556175298804781, + "grad_norm": 0.634918588914544, + "learning_rate": 3.2046727346605604e-05, + "loss": 0.3076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10820312798023224, + "step": 1605, + "valid_targets_mean": 2966.2, + "valid_targets_min": 245 + }, + { + "epoch": 2.564143426294821, + "grad_norm": 0.5690604204451986, + "learning_rate": 3.198324143539172e-05, + "loss": 0.3034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1273760050535202, + "step": 1610, + "valid_targets_mean": 3376.6, + "valid_targets_min": 1063 + }, + { + "epoch": 2.5721115537848607, + "grad_norm": 0.6754675686773965, + "learning_rate": 3.191956659417674e-05, + "loss": 0.3025, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14878042042255402, + "step": 1615, + "valid_targets_mean": 2537.5, + "valid_targets_min": 277 + }, + { + "epoch": 2.5800796812749005, + "grad_norm": 0.6115452619485489, + "learning_rate": 3.185570382687e-05, + "loss": 0.3071, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1950845569372177, + "step": 1620, + "valid_targets_mean": 4007.8, + "valid_targets_min": 918 + }, + { + "epoch": 2.5880478087649403, + "grad_norm": 0.5469768203006049, + "learning_rate": 3.17916541403437e-05, + "loss": 0.2994, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14569422602653503, + "step": 1625, + "valid_targets_mean": 3992.4, + "valid_targets_min": 3098 + }, + { + "epoch": 2.59601593625498, + "grad_norm": 0.5415260690855109, + "learning_rate": 3.172741854441704e-05, + "loss": 0.2974, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13133631646633148, + "step": 1630, + "valid_targets_mean": 4142.5, + "valid_targets_min": 3498 + }, + { + "epoch": 2.60398406374502, + "grad_norm": 0.6228669410969733, + "learning_rate": 3.1662998051840306e-05, + "loss": 0.2989, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15364228188991547, + "step": 1635, + "valid_targets_mean": 3908.6, + "valid_targets_min": 2193 + }, + { + "epoch": 2.61195219123506, + "grad_norm": 0.7163920939053808, + "learning_rate": 3.159839367827891e-05, + "loss": 0.2995, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16317997872829437, + "step": 1640, + "valid_targets_mean": 2720.8, + "valid_targets_min": 626 + }, + { + "epoch": 2.6199203187250997, + "grad_norm": 0.5093151040383717, + "learning_rate": 3.153360644229735e-05, + "loss": 0.3055, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13812774419784546, + "step": 1645, + "valid_targets_mean": 4209.6, + "valid_targets_min": 1835 + }, + { + "epoch": 2.6278884462151395, + "grad_norm": 0.5611384387918703, + "learning_rate": 3.146863736534317e-05, + "loss": 0.2975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1440148502588272, + "step": 1650, + "valid_targets_mean": 3927.5, + "valid_targets_min": 1248 + }, + { + "epoch": 2.6358565737051793, + "grad_norm": 0.5949756135166688, + "learning_rate": 3.140348747173086e-05, + "loss": 0.2979, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11527255177497864, + "step": 1655, + "valid_targets_mean": 3250.5, + "valid_targets_min": 590 + }, + { + "epoch": 2.643824701195219, + "grad_norm": 0.6260767960636067, + "learning_rate": 3.1338157788625695e-05, + "loss": 0.3124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17812351882457733, + "step": 1660, + "valid_targets_mean": 4397.9, + "valid_targets_min": 2449 + }, + { + "epoch": 2.651792828685259, + "grad_norm": 0.5851065166943961, + "learning_rate": 3.127264934602754e-05, + "loss": 0.3076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1575583666563034, + "step": 1665, + "valid_targets_mean": 3895.4, + "valid_targets_min": 1835 + }, + { + "epoch": 2.659760956175299, + "grad_norm": 0.6238084747272821, + "learning_rate": 3.120696317675462e-05, + "loss": 0.3171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15226903557777405, + "step": 1670, + "valid_targets_mean": 3680.2, + "valid_targets_min": 888 + }, + { + "epoch": 2.6677290836653387, + "grad_norm": 0.6150614739558937, + "learning_rate": 3.114110031642723e-05, + "loss": 0.3085, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17283207178115845, + "step": 1675, + "valid_targets_mean": 4499.8, + "valid_targets_min": 942 + }, + { + "epoch": 2.6756972111553785, + "grad_norm": 1.2399919193311193, + "learning_rate": 3.1075061803451405e-05, + "loss": 0.3029, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13976256549358368, + "step": 1680, + "valid_targets_mean": 3865.8, + "valid_targets_min": 2788 + }, + { + "epoch": 2.6836653386454183, + "grad_norm": 0.5350477326551074, + "learning_rate": 3.100884867900257e-05, + "loss": 0.3106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1667133867740631, + "step": 1685, + "valid_targets_mean": 4074.8, + "valid_targets_min": 944 + }, + { + "epoch": 2.691633466135458, + "grad_norm": 0.6672128201797747, + "learning_rate": 3.094246198700907e-05, + "loss": 0.2964, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11858796328306198, + "step": 1690, + "valid_targets_mean": 2839.6, + "valid_targets_min": 308 + }, + { + "epoch": 2.699601593625498, + "grad_norm": 0.6128810636239489, + "learning_rate": 3.087590277413578e-05, + "loss": 0.3068, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1811891496181488, + "step": 1695, + "valid_targets_mean": 4235.6, + "valid_targets_min": 811 + }, + { + "epoch": 2.707569721115538, + "grad_norm": 0.6116153083916679, + "learning_rate": 3.0809172089767576e-05, + "loss": 0.3048, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19102010130882263, + "step": 1700, + "valid_targets_mean": 3887.8, + "valid_targets_min": 219 + }, + { + "epoch": 2.7155378486055777, + "grad_norm": 0.5705410245041, + "learning_rate": 3.0742270985992765e-05, + "loss": 0.3066, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1410629004240036, + "step": 1705, + "valid_targets_mean": 3804.8, + "valid_targets_min": 1168 + }, + { + "epoch": 2.7235059760956175, + "grad_norm": 0.6645715015139396, + "learning_rate": 3.067520051758651e-05, + "loss": 0.3031, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1776059865951538, + "step": 1710, + "valid_targets_mean": 3863.4, + "valid_targets_min": 1000 + }, + { + "epoch": 2.7314741035856573, + "grad_norm": 0.6074009839238762, + "learning_rate": 3.060796174199424e-05, + "loss": 0.304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13484247028827667, + "step": 1715, + "valid_targets_mean": 2893.8, + "valid_targets_min": 436 + }, + { + "epoch": 2.739442231075697, + "grad_norm": 0.528055350299334, + "learning_rate": 3.0540555719314914e-05, + "loss": 0.2977, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14803050458431244, + "step": 1720, + "valid_targets_mean": 4187.0, + "valid_targets_min": 284 + }, + { + "epoch": 2.747410358565737, + "grad_norm": 0.587075736779692, + "learning_rate": 3.0472983512284366e-05, + "loss": 0.3117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17222487926483154, + "step": 1725, + "valid_targets_mean": 4210.5, + "valid_targets_min": 886 + }, + { + "epoch": 2.755378486055777, + "grad_norm": 0.6109700877997402, + "learning_rate": 3.04052461862585e-05, + "loss": 0.2951, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13396194577217102, + "step": 1730, + "valid_targets_mean": 3010.6, + "valid_targets_min": 471 + }, + { + "epoch": 2.7633466135458167, + "grad_norm": 0.6188577067928057, + "learning_rate": 3.0337344809196547e-05, + "loss": 0.3127, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07617945224046707, + "step": 1735, + "valid_targets_mean": 2067.8, + "valid_targets_min": 292 + }, + { + "epoch": 2.7713147410358565, + "grad_norm": 0.5239023857632885, + "learning_rate": 3.0269280451644155e-05, + "loss": 0.3156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15986491739749908, + "step": 1740, + "valid_targets_mean": 4151.2, + "valid_targets_min": 581 + }, + { + "epoch": 2.7792828685258963, + "grad_norm": 0.5362837482230216, + "learning_rate": 3.020105418671659e-05, + "loss": 0.3022, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1509944051504135, + "step": 1745, + "valid_targets_mean": 4105.1, + "valid_targets_min": 393 + }, + { + "epoch": 2.787250996015936, + "grad_norm": 0.5560190067839896, + "learning_rate": 3.0132667090081758e-05, + "loss": 0.3199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1459665596485138, + "step": 1750, + "valid_targets_mean": 3637.9, + "valid_targets_min": 241 + }, + { + "epoch": 2.795219123505976, + "grad_norm": 0.6117304860481148, + "learning_rate": 3.006412023994328e-05, + "loss": 0.3003, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1714390516281128, + "step": 1755, + "valid_targets_mean": 3695.4, + "valid_targets_min": 530 + }, + { + "epoch": 2.803187250996016, + "grad_norm": 0.6054263142624877, + "learning_rate": 2.999541471702347e-05, + "loss": 0.3004, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16323651373386383, + "step": 1760, + "valid_targets_mean": 4197.4, + "valid_targets_min": 875 + }, + { + "epoch": 2.8111553784860557, + "grad_norm": 0.5947895791279191, + "learning_rate": 2.9926551604546312e-05, + "loss": 0.3049, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15745720267295837, + "step": 1765, + "valid_targets_mean": 3358.5, + "valid_targets_min": 205 + }, + { + "epoch": 2.8191235059760955, + "grad_norm": 0.6001862046050473, + "learning_rate": 2.9857531988220385e-05, + "loss": 0.3043, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11560022830963135, + "step": 1770, + "valid_targets_mean": 2782.0, + "valid_targets_min": 306 + }, + { + "epoch": 2.8270916334661353, + "grad_norm": 0.6300307004340259, + "learning_rate": 2.9788356956221712e-05, + "loss": 0.3122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13865865767002106, + "step": 1775, + "valid_targets_mean": 3863.4, + "valid_targets_min": 276 + }, + { + "epoch": 2.835059760956175, + "grad_norm": 0.6182945054047809, + "learning_rate": 2.971902759917665e-05, + "loss": 0.3074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1466536521911621, + "step": 1780, + "valid_targets_mean": 3917.1, + "valid_targets_min": 773 + }, + { + "epoch": 2.843027888446215, + "grad_norm": 0.5298397429107443, + "learning_rate": 2.964954501014467e-05, + "loss": 0.296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11321134120225906, + "step": 1785, + "valid_targets_mean": 3792.2, + "valid_targets_min": 2869 + }, + { + "epoch": 2.850996015936255, + "grad_norm": 0.6273210190956005, + "learning_rate": 2.957991028460112e-05, + "loss": 0.3095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14284269511699677, + "step": 1790, + "valid_targets_mean": 2845.5, + "valid_targets_min": 616 + }, + { + "epoch": 2.8589641434262947, + "grad_norm": 0.5804735026959892, + "learning_rate": 2.951012452041997e-05, + "loss": 0.3088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.174922913312912, + "step": 1795, + "valid_targets_mean": 4494.4, + "valid_targets_min": 2700 + }, + { + "epoch": 2.8669322709163345, + "grad_norm": 0.7419190432191575, + "learning_rate": 2.9440188817856478e-05, + "loss": 0.3034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13581791520118713, + "step": 1800, + "valid_targets_mean": 3082.6, + "valid_targets_min": 339 + }, + { + "epoch": 2.8749003984063743, + "grad_norm": 0.5665491958828685, + "learning_rate": 2.937010427952986e-05, + "loss": 0.2924, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15435200929641724, + "step": 1805, + "valid_targets_mean": 4402.1, + "valid_targets_min": 3260 + }, + { + "epoch": 2.882868525896414, + "grad_norm": 0.6495565332447018, + "learning_rate": 2.929987201040593e-05, + "loss": 0.3035, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.188032865524292, + "step": 1810, + "valid_targets_mean": 4567.4, + "valid_targets_min": 252 + }, + { + "epoch": 2.8908366533864545, + "grad_norm": 0.5342486023226761, + "learning_rate": 2.922949311777962e-05, + "loss": 0.2974, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19005250930786133, + "step": 1815, + "valid_targets_mean": 4846.8, + "valid_targets_min": 3605 + }, + { + "epoch": 2.8988047808764943, + "grad_norm": 0.6653894464444504, + "learning_rate": 2.9158968711257576e-05, + "loss": 0.2878, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14081129431724548, + "step": 1820, + "valid_targets_mean": 2541.4, + "valid_targets_min": 291 + }, + { + "epoch": 2.906772908366534, + "grad_norm": 0.6262578124048611, + "learning_rate": 2.908829990274064e-05, + "loss": 0.3059, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14443422853946686, + "step": 1825, + "valid_targets_mean": 3446.8, + "valid_targets_min": 1030 + }, + { + "epoch": 2.914741035856574, + "grad_norm": 0.6347752827432807, + "learning_rate": 2.9017487806406312e-05, + "loss": 0.3032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19679532945156097, + "step": 1830, + "valid_targets_mean": 4701.9, + "valid_targets_min": 3952 + }, + { + "epoch": 2.922709163346614, + "grad_norm": 0.6689696315532441, + "learning_rate": 2.89465335386912e-05, + "loss": 0.3004, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12756000459194183, + "step": 1835, + "valid_targets_mean": 2977.2, + "valid_targets_min": 220 + }, + { + "epoch": 2.9306772908366536, + "grad_norm": 0.616831376153753, + "learning_rate": 2.8875438218273423e-05, + "loss": 0.304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18263155221939087, + "step": 1840, + "valid_targets_mean": 4662.1, + "valid_targets_min": 455 + }, + { + "epoch": 2.9386454183266935, + "grad_norm": 0.6565628369554742, + "learning_rate": 2.880420296605494e-05, + "loss": 0.294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19445011019706726, + "step": 1845, + "valid_targets_mean": 3611.1, + "valid_targets_min": 972 + }, + { + "epoch": 2.9466135458167333, + "grad_norm": 0.6340297598862685, + "learning_rate": 2.8732828905143938e-05, + "loss": 0.3017, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15914231538772583, + "step": 1850, + "valid_targets_mean": 3565.1, + "valid_targets_min": 1245 + }, + { + "epoch": 2.954581673306773, + "grad_norm": 0.6130063359000373, + "learning_rate": 2.8661317160837038e-05, + "loss": 0.3055, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15856942534446716, + "step": 1855, + "valid_targets_mean": 3286.4, + "valid_targets_min": 540 + }, + { + "epoch": 2.962549800796813, + "grad_norm": 0.5882736715432598, + "learning_rate": 2.8589668860601643e-05, + "loss": 0.3001, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15061055123806, + "step": 1860, + "valid_targets_mean": 3854.0, + "valid_targets_min": 1280 + }, + { + "epoch": 2.970517928286853, + "grad_norm": 0.5304952983378881, + "learning_rate": 2.85178851340581e-05, + "loss": 0.3024, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10105285048484802, + "step": 1865, + "valid_targets_mean": 2839.4, + "valid_targets_min": 271 + }, + { + "epoch": 2.9784860557768926, + "grad_norm": 0.5395075495945896, + "learning_rate": 2.8445967112961928e-05, + "loss": 0.2913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13914275169372559, + "step": 1870, + "valid_targets_mean": 4269.6, + "valid_targets_min": 215 + }, + { + "epoch": 2.9864541832669325, + "grad_norm": 0.5231744288139889, + "learning_rate": 2.8373915931185946e-05, + "loss": 0.2976, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15183329582214355, + "step": 1875, + "valid_targets_mean": 4896.2, + "valid_targets_min": 3180 + }, + { + "epoch": 2.9944223107569723, + "grad_norm": 0.5612640713997488, + "learning_rate": 2.830173272470241e-05, + "loss": 0.3057, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16358435153961182, + "step": 1880, + "valid_targets_mean": 4041.8, + "valid_targets_min": 579 + }, + { + "epoch": 3.001593625498008, + "grad_norm": 0.5995604976964911, + "learning_rate": 2.822941863156512e-05, + "loss": 0.2983, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13486389815807343, + "step": 1885, + "valid_targets_mean": 3476.0, + "valid_targets_min": 764 + }, + { + "epoch": 3.0095617529880476, + "grad_norm": 0.6366474422140981, + "learning_rate": 2.8156974791891425e-05, + "loss": 0.2788, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1563360095024109, + "step": 1890, + "valid_targets_mean": 4201.0, + "valid_targets_min": 2953 + }, + { + "epoch": 3.0175298804780875, + "grad_norm": 0.7305768682977837, + "learning_rate": 2.8084402347844316e-05, + "loss": 0.2766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1641981601715088, + "step": 1895, + "valid_targets_mean": 4095.1, + "valid_targets_min": 534 + }, + { + "epoch": 3.0254980079681273, + "grad_norm": 0.6142059058779309, + "learning_rate": 2.801170244361436e-05, + "loss": 0.2778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12227625399827957, + "step": 1900, + "valid_targets_mean": 4612.0, + "valid_targets_min": 3237 + }, + { + "epoch": 3.033466135458167, + "grad_norm": 0.5802754329647101, + "learning_rate": 2.7938876225401714e-05, + "loss": 0.2867, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15479183197021484, + "step": 1905, + "valid_targets_mean": 4091.9, + "valid_targets_min": 2449 + }, + { + "epoch": 3.041434262948207, + "grad_norm": 0.5122200468681064, + "learning_rate": 2.7865924841397985e-05, + "loss": 0.2818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14310495555400848, + "step": 1910, + "valid_targets_mean": 4970.1, + "valid_targets_min": 2545 + }, + { + "epoch": 3.049402390438247, + "grad_norm": 0.5468786402493747, + "learning_rate": 2.7792849441768194e-05, + "loss": 0.2868, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11139404028654099, + "step": 1915, + "valid_targets_mean": 3889.2, + "valid_targets_min": 989 + }, + { + "epoch": 3.057370517928287, + "grad_norm": 0.6358516773300867, + "learning_rate": 2.7719651178632605e-05, + "loss": 0.2891, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16447865962982178, + "step": 1920, + "valid_targets_mean": 3449.5, + "valid_targets_min": 288 + }, + { + "epoch": 3.065338645418327, + "grad_norm": 0.6357450523853115, + "learning_rate": 2.7646331206048586e-05, + "loss": 0.2783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1413956582546234, + "step": 1925, + "valid_targets_mean": 3402.6, + "valid_targets_min": 859 + }, + { + "epoch": 3.0733067729083667, + "grad_norm": 0.5958195249935571, + "learning_rate": 2.7572890679992376e-05, + "loss": 0.2744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14067938923835754, + "step": 1930, + "valid_targets_mean": 3301.1, + "valid_targets_min": 1026 + }, + { + "epoch": 3.0812749003984066, + "grad_norm": 0.5947140196007367, + "learning_rate": 2.7499330758340898e-05, + "loss": 0.2953, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16272953152656555, + "step": 1935, + "valid_targets_mean": 4757.6, + "valid_targets_min": 272 + }, + { + "epoch": 3.0892430278884464, + "grad_norm": 0.6594534851414603, + "learning_rate": 2.742565260085348e-05, + "loss": 0.2816, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1049199253320694, + "step": 1940, + "valid_targets_mean": 2760.9, + "valid_targets_min": 276 + }, + { + "epoch": 3.0972111553784862, + "grad_norm": 0.5898841686461007, + "learning_rate": 2.7351857369153595e-05, + "loss": 0.2818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12935137748718262, + "step": 1945, + "valid_targets_mean": 3645.2, + "valid_targets_min": 225 + }, + { + "epoch": 3.105179282868526, + "grad_norm": 0.5935297691180758, + "learning_rate": 2.72779462267105e-05, + "loss": 0.2896, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15314815938472748, + "step": 1950, + "valid_targets_mean": 4015.2, + "valid_targets_min": 845 + }, + { + "epoch": 3.113147410358566, + "grad_norm": 0.6018144142743058, + "learning_rate": 2.720392033882094e-05, + "loss": 0.2963, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15182068943977356, + "step": 1955, + "valid_targets_mean": 3905.0, + "valid_targets_min": 898 + }, + { + "epoch": 3.1211155378486057, + "grad_norm": 0.5963825090220352, + "learning_rate": 2.7129780872590768e-05, + "loss": 0.2782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14685726165771484, + "step": 1960, + "valid_targets_mean": 4461.5, + "valid_targets_min": 3677 + }, + { + "epoch": 3.1290836653386456, + "grad_norm": 0.6917481323512222, + "learning_rate": 2.705552899691652e-05, + "loss": 0.2876, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1306656301021576, + "step": 1965, + "valid_targets_mean": 3440.0, + "valid_targets_min": 308 + }, + { + "epoch": 3.1370517928286854, + "grad_norm": 1.2865263066763915, + "learning_rate": 2.6981165882466994e-05, + "loss": 0.2987, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16542966663837433, + "step": 1970, + "valid_targets_mean": 4034.5, + "valid_targets_min": 935 + }, + { + "epoch": 3.1450199203187252, + "grad_norm": 0.5862591014399262, + "learning_rate": 2.6906692701664817e-05, + "loss": 0.2953, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17163851857185364, + "step": 1975, + "valid_targets_mean": 4001.5, + "valid_targets_min": 2274 + }, + { + "epoch": 3.152988047808765, + "grad_norm": 0.5709660535338027, + "learning_rate": 2.683211062866792e-05, + "loss": 0.2925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13916179537773132, + "step": 1980, + "valid_targets_mean": 4306.9, + "valid_targets_min": 307 + }, + { + "epoch": 3.160956175298805, + "grad_norm": 0.6046602837151367, + "learning_rate": 2.6757420839351077e-05, + "loss": 0.2808, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09660614281892776, + "step": 1985, + "valid_targets_mean": 2486.8, + "valid_targets_min": 306 + }, + { + "epoch": 3.1689243027888447, + "grad_norm": 0.6491917941119068, + "learning_rate": 2.6682624511287315e-05, + "loss": 0.2758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13069605827331543, + "step": 1990, + "valid_targets_mean": 3085.6, + "valid_targets_min": 511 + }, + { + "epoch": 3.1768924302788846, + "grad_norm": 0.6423556168753874, + "learning_rate": 2.660772282372938e-05, + "loss": 0.2893, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10765212774276733, + "step": 1995, + "valid_targets_mean": 2737.4, + "valid_targets_min": 493 + }, + { + "epoch": 3.1848605577689244, + "grad_norm": 0.6396926948889184, + "learning_rate": 2.6532716957591128e-05, + "loss": 0.2742, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13146337866783142, + "step": 2000, + "valid_targets_mean": 3224.9, + "valid_targets_min": 671 + }, + { + "epoch": 3.1928286852589642, + "grad_norm": 0.5818042509176177, + "learning_rate": 2.6457608095428925e-05, + "loss": 0.2895, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09944739937782288, + "step": 2005, + "valid_targets_mean": 3452.5, + "valid_targets_min": 1066 + }, + { + "epoch": 3.200796812749004, + "grad_norm": 0.6402551975996129, + "learning_rate": 2.6382397421422986e-05, + "loss": 0.2901, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1358262300491333, + "step": 2010, + "valid_targets_mean": 3142.8, + "valid_targets_min": 534 + }, + { + "epoch": 3.208764940239044, + "grad_norm": 0.6042377415732846, + "learning_rate": 2.6307086121358706e-05, + "loss": 0.2848, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11764895915985107, + "step": 2015, + "valid_targets_mean": 4003.0, + "valid_targets_min": 609 + }, + { + "epoch": 3.2167330677290837, + "grad_norm": 0.6493343175064425, + "learning_rate": 2.6231675382607974e-05, + "loss": 0.2789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12565159797668457, + "step": 2020, + "valid_targets_mean": 2952.9, + "valid_targets_min": 562 + }, + { + "epoch": 3.2247011952191236, + "grad_norm": 0.6490494009274449, + "learning_rate": 2.6156166394110447e-05, + "loss": 0.2856, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16414640843868256, + "step": 2025, + "valid_targets_mean": 3937.4, + "valid_targets_min": 1063 + }, + { + "epoch": 3.2326693227091634, + "grad_norm": 0.6132887447825962, + "learning_rate": 2.60805603463548e-05, + "loss": 0.2746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1475113481283188, + "step": 2030, + "valid_targets_mean": 4180.5, + "valid_targets_min": 3326 + }, + { + "epoch": 3.2406374501992032, + "grad_norm": 0.679247026494716, + "learning_rate": 2.6004858431359972e-05, + "loss": 0.2828, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11567876487970352, + "step": 2035, + "valid_targets_mean": 2849.9, + "valid_targets_min": 623 + }, + { + "epoch": 3.248605577689243, + "grad_norm": 0.5648846871150982, + "learning_rate": 2.592906184265635e-05, + "loss": 0.2852, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1182098463177681, + "step": 2040, + "valid_targets_mean": 4033.9, + "valid_targets_min": 246 + }, + { + "epoch": 3.256573705179283, + "grad_norm": 0.6571665065508722, + "learning_rate": 2.585317177526699e-05, + "loss": 0.292, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11440694332122803, + "step": 2045, + "valid_targets_mean": 2681.2, + "valid_targets_min": 284 + }, + { + "epoch": 3.2645418326693227, + "grad_norm": 0.6077727307789242, + "learning_rate": 2.5777189425688714e-05, + "loss": 0.2729, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13683414459228516, + "step": 2050, + "valid_targets_mean": 3754.1, + "valid_targets_min": 1609 + }, + { + "epoch": 3.2725099601593626, + "grad_norm": 0.687572452632822, + "learning_rate": 2.570111599187331e-05, + "loss": 0.29, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1259164810180664, + "step": 2055, + "valid_targets_mean": 2959.9, + "valid_targets_min": 531 + }, + { + "epoch": 3.2804780876494024, + "grad_norm": 0.5609394014948552, + "learning_rate": 2.5624952673208608e-05, + "loss": 0.2784, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13147097826004028, + "step": 2060, + "valid_targets_mean": 4716.4, + "valid_targets_min": 2818 + }, + { + "epoch": 3.2884462151394422, + "grad_norm": 0.5757109006459055, + "learning_rate": 2.5548700670499577e-05, + "loss": 0.2799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13211728632450104, + "step": 2065, + "valid_targets_mean": 3288.0, + "valid_targets_min": 752 + }, + { + "epoch": 3.296414342629482, + "grad_norm": 0.6737818677216962, + "learning_rate": 2.5472361185949387e-05, + "loss": 0.2873, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1399116814136505, + "step": 2070, + "valid_targets_mean": 3550.8, + "valid_targets_min": 2399 + }, + { + "epoch": 3.304382470119522, + "grad_norm": 0.5984387043370892, + "learning_rate": 2.5395935423140487e-05, + "loss": 0.2732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11631085723638535, + "step": 2075, + "valid_targets_mean": 2886.1, + "valid_targets_min": 185 + }, + { + "epoch": 3.3123505976095617, + "grad_norm": 0.6826229831277852, + "learning_rate": 2.5319424587015587e-05, + "loss": 0.2791, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13441094756126404, + "step": 2080, + "valid_targets_mean": 3741.4, + "valid_targets_min": 750 + }, + { + "epoch": 3.3203187250996016, + "grad_norm": 0.6222883129214012, + "learning_rate": 2.524282988385867e-05, + "loss": 0.2833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09123936295509338, + "step": 2085, + "valid_targets_mean": 2420.9, + "valid_targets_min": 243 + }, + { + "epoch": 3.3282868525896414, + "grad_norm": 0.6036436875416724, + "learning_rate": 2.5166152521276014e-05, + "loss": 0.2869, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13198913633823395, + "step": 2090, + "valid_targets_mean": 3586.8, + "valid_targets_min": 1205 + }, + { + "epoch": 3.3362549800796812, + "grad_norm": 0.6259771803154971, + "learning_rate": 2.5089393708177083e-05, + "loss": 0.278, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13058066368103027, + "step": 2095, + "valid_targets_mean": 3258.1, + "valid_targets_min": 794 + }, + { + "epoch": 3.344223107569721, + "grad_norm": 0.5539352449420292, + "learning_rate": 2.501255465475553e-05, + "loss": 0.2831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12083698064088821, + "step": 2100, + "valid_targets_mean": 3466.0, + "valid_targets_min": 379 + }, + { + "epoch": 3.352191235059761, + "grad_norm": 0.6063032233740718, + "learning_rate": 2.4935636572470085e-05, + "loss": 0.2974, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1511157900094986, + "step": 2105, + "valid_targets_mean": 3660.9, + "valid_targets_min": 328 + }, + { + "epoch": 3.3601593625498007, + "grad_norm": 0.5836958251674884, + "learning_rate": 2.4858640674025464e-05, + "loss": 0.2851, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1489568054676056, + "step": 2110, + "valid_targets_mean": 3728.9, + "valid_targets_min": 749 + }, + { + "epoch": 3.3681274900398406, + "grad_norm": 0.7744785403219803, + "learning_rate": 2.4781568173353234e-05, + "loss": 0.2833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14987047016620636, + "step": 2115, + "valid_targets_mean": 4306.0, + "valid_targets_min": 742 + }, + { + "epoch": 3.3760956175298804, + "grad_norm": 0.5292398262457924, + "learning_rate": 2.4704420285592718e-05, + "loss": 0.3009, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1497262567281723, + "step": 2120, + "valid_targets_mean": 4323.2, + "valid_targets_min": 3539 + }, + { + "epoch": 3.3840637450199202, + "grad_norm": 0.6125782189718384, + "learning_rate": 2.4627198227071764e-05, + "loss": 0.2801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15631897747516632, + "step": 2125, + "valid_targets_mean": 4071.4, + "valid_targets_min": 453 + }, + { + "epoch": 3.39203187250996, + "grad_norm": 0.5797963912159338, + "learning_rate": 2.4549903215287635e-05, + "loss": 0.2888, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15331532061100006, + "step": 2130, + "valid_targets_mean": 4717.6, + "valid_targets_min": 3245 + }, + { + "epoch": 3.4, + "grad_norm": 0.6244701259197334, + "learning_rate": 2.4472536468887795e-05, + "loss": 0.2922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11838929355144501, + "step": 2135, + "valid_targets_mean": 3324.8, + "valid_targets_min": 284 + }, + { + "epoch": 3.4079681274900397, + "grad_norm": 0.6127920759841025, + "learning_rate": 2.4395099207650673e-05, + "loss": 0.2862, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13430649042129517, + "step": 2140, + "valid_targets_mean": 3650.8, + "valid_targets_min": 522 + }, + { + "epoch": 3.4159362549800796, + "grad_norm": 0.6998391345627126, + "learning_rate": 2.4317592652466444e-05, + "loss": 0.2814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09987592697143555, + "step": 2145, + "valid_targets_mean": 2595.4, + "valid_targets_min": 279 + }, + { + "epoch": 3.4239043824701194, + "grad_norm": 0.7244425160964371, + "learning_rate": 2.4240018025317812e-05, + "loss": 0.2794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16513580083847046, + "step": 2150, + "valid_targets_mean": 3812.9, + "valid_targets_min": 386 + }, + { + "epoch": 3.4318725099601592, + "grad_norm": 0.5458440187409629, + "learning_rate": 2.4162376549260685e-05, + "loss": 0.2858, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14888599514961243, + "step": 2155, + "valid_targets_mean": 4864.2, + "valid_targets_min": 3462 + }, + { + "epoch": 3.439840637450199, + "grad_norm": 0.6849029917454437, + "learning_rate": 2.408466944840494e-05, + "loss": 0.2885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1478743851184845, + "step": 2160, + "valid_targets_mean": 2458.1, + "valid_targets_min": 312 + }, + { + "epoch": 3.447808764940239, + "grad_norm": 0.5230213886309137, + "learning_rate": 2.4006897947895097e-05, + "loss": 0.2781, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15073207020759583, + "step": 2165, + "valid_targets_mean": 4047.8, + "valid_targets_min": 801 + }, + { + "epoch": 3.4557768924302787, + "grad_norm": 0.6894144536145977, + "learning_rate": 2.392906327389103e-05, + "loss": 0.2865, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12685012817382812, + "step": 2170, + "valid_targets_mean": 3707.5, + "valid_targets_min": 932 + }, + { + "epoch": 3.4637450199203186, + "grad_norm": 0.6182744233014824, + "learning_rate": 2.3851166653548603e-05, + "loss": 0.2787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1278611719608307, + "step": 2175, + "valid_targets_mean": 3423.9, + "valid_targets_min": 932 + }, + { + "epoch": 3.4717131474103584, + "grad_norm": 0.6072984899474033, + "learning_rate": 2.3773209315000344e-05, + "loss": 0.279, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17405426502227783, + "step": 2180, + "valid_targets_mean": 4520.2, + "valid_targets_min": 3757 + }, + { + "epoch": 3.4796812749003982, + "grad_norm": 0.6075109522285683, + "learning_rate": 2.3695192487336064e-05, + "loss": 0.279, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1332045942544937, + "step": 2185, + "valid_targets_mean": 3539.1, + "valid_targets_min": 837 + }, + { + "epoch": 3.487649402390438, + "grad_norm": 0.579318209786998, + "learning_rate": 2.361711740058351e-05, + "loss": 0.2975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16303032636642456, + "step": 2190, + "valid_targets_mean": 4565.4, + "valid_targets_min": 3886 + }, + { + "epoch": 3.495617529880478, + "grad_norm": 0.565535935820369, + "learning_rate": 2.3538985285688934e-05, + "loss": 0.2877, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1396387815475464, + "step": 2195, + "valid_targets_mean": 3660.1, + "valid_targets_min": 258 + }, + { + "epoch": 3.503585657370518, + "grad_norm": 0.6680724888114902, + "learning_rate": 2.3460797374497714e-05, + "loss": 0.2878, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17963314056396484, + "step": 2200, + "valid_targets_mean": 3727.4, + "valid_targets_min": 936 + }, + { + "epoch": 3.511553784860558, + "grad_norm": 0.7378675334761234, + "learning_rate": 2.3382554899734917e-05, + "loss": 0.2696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15196353197097778, + "step": 2205, + "valid_targets_mean": 3584.1, + "valid_targets_min": 905 + }, + { + "epoch": 3.519521912350598, + "grad_norm": 0.6117817117897517, + "learning_rate": 2.3304259094985883e-05, + "loss": 0.2722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16188624501228333, + "step": 2210, + "valid_targets_mean": 4811.2, + "valid_targets_min": 2638 + }, + { + "epoch": 3.5274900398406377, + "grad_norm": 0.7152359494553315, + "learning_rate": 2.322591119467674e-05, + "loss": 0.273, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12665317952632904, + "step": 2215, + "valid_targets_mean": 3055.8, + "valid_targets_min": 245 + }, + { + "epoch": 3.5354581673306775, + "grad_norm": 0.6521648585968394, + "learning_rate": 2.3147512434054988e-05, + "loss": 0.2905, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15076977014541626, + "step": 2220, + "valid_targets_mean": 3450.8, + "valid_targets_min": 462 + }, + { + "epoch": 3.5434262948207174, + "grad_norm": 0.6047539156215063, + "learning_rate": 2.3069064049169985e-05, + "loss": 0.2834, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13174653053283691, + "step": 2225, + "valid_targets_mean": 3577.0, + "valid_targets_min": 1110 + }, + { + "epoch": 3.551394422310757, + "grad_norm": 0.6521975076989384, + "learning_rate": 2.299056727685348e-05, + "loss": 0.2956, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12156250327825546, + "step": 2230, + "valid_targets_mean": 3268.8, + "valid_targets_min": 462 + }, + { + "epoch": 3.559362549800797, + "grad_norm": 0.5930785261067133, + "learning_rate": 2.2912023354700105e-05, + "loss": 0.279, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13798466324806213, + "step": 2235, + "valid_targets_mean": 4144.4, + "valid_targets_min": 1029 + }, + { + "epoch": 3.567330677290837, + "grad_norm": 0.6593164729144471, + "learning_rate": 2.2833433521047853e-05, + "loss": 0.2822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15233337879180908, + "step": 2240, + "valid_targets_mean": 3792.4, + "valid_targets_min": 436 + }, + { + "epoch": 3.5752988047808767, + "grad_norm": 0.5871339084922141, + "learning_rate": 2.2754799014958597e-05, + "loss": 0.286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1288946568965912, + "step": 2245, + "valid_targets_mean": 3557.2, + "valid_targets_min": 1000 + }, + { + "epoch": 3.5832669322709165, + "grad_norm": 0.5445899656318003, + "learning_rate": 2.26761210761985e-05, + "loss": 0.2793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12100610136985779, + "step": 2250, + "valid_targets_mean": 3635.0, + "valid_targets_min": 239 + }, + { + "epoch": 3.5912350597609564, + "grad_norm": 0.5465128751833384, + "learning_rate": 2.259740094521849e-05, + "loss": 0.2903, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13375595211982727, + "step": 2255, + "valid_targets_mean": 4219.5, + "valid_targets_min": 1229 + }, + { + "epoch": 3.599203187250996, + "grad_norm": 0.5582905069234687, + "learning_rate": 2.251863986313472e-05, + "loss": 0.2881, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12912191450595856, + "step": 2260, + "valid_targets_mean": 4051.5, + "valid_targets_min": 242 + }, + { + "epoch": 3.607171314741036, + "grad_norm": 0.5508307471622537, + "learning_rate": 2.2439839071708988e-05, + "loss": 0.2814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14723606407642365, + "step": 2265, + "valid_targets_mean": 4390.9, + "valid_targets_min": 3072 + }, + { + "epoch": 3.615139442231076, + "grad_norm": 0.7005723624233458, + "learning_rate": 2.2360999813329126e-05, + "loss": 0.2885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13096565008163452, + "step": 2270, + "valid_targets_mean": 3261.4, + "valid_targets_min": 843 + }, + { + "epoch": 3.6231075697211157, + "grad_norm": 0.5858584315320217, + "learning_rate": 2.2282123330989482e-05, + "loss": 0.2796, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13934998214244843, + "step": 2275, + "valid_targets_mean": 4086.1, + "valid_targets_min": 2620 + }, + { + "epoch": 3.6310756972111555, + "grad_norm": 0.5484935581764541, + "learning_rate": 2.220321086827126e-05, + "loss": 0.2759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1496485471725464, + "step": 2280, + "valid_targets_mean": 3966.0, + "valid_targets_min": 244 + }, + { + "epoch": 3.6390438247011954, + "grad_norm": 0.5558339001247016, + "learning_rate": 2.2124263669322948e-05, + "loss": 0.2973, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1396896243095398, + "step": 2285, + "valid_targets_mean": 4098.0, + "valid_targets_min": 2939 + }, + { + "epoch": 3.647011952191235, + "grad_norm": 0.6369737232360697, + "learning_rate": 2.2045282978840684e-05, + "loss": 0.2806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09870846569538116, + "step": 2290, + "valid_targets_mean": 2270.4, + "valid_targets_min": 644 + }, + { + "epoch": 3.654980079681275, + "grad_norm": 0.6153969894022138, + "learning_rate": 2.1966270042048655e-05, + "loss": 0.2833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1183319166302681, + "step": 2295, + "valid_targets_mean": 3203.2, + "valid_targets_min": 509 + }, + { + "epoch": 3.662948207171315, + "grad_norm": 0.538426036654963, + "learning_rate": 2.188722610467942e-05, + "loss": 0.2812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11018185317516327, + "step": 2300, + "valid_targets_mean": 3402.2, + "valid_targets_min": 458 + }, + { + "epoch": 3.6709163346613547, + "grad_norm": 0.5383962469152709, + "learning_rate": 2.180815241295433e-05, + "loss": 0.2863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12926092743873596, + "step": 2305, + "valid_targets_mean": 4177.4, + "valid_targets_min": 3502 + }, + { + "epoch": 3.6788844621513945, + "grad_norm": 0.6003161923557355, + "learning_rate": 2.172905021356383e-05, + "loss": 0.2818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12183894962072372, + "step": 2310, + "valid_targets_mean": 3276.9, + "valid_targets_min": 538 + }, + { + "epoch": 3.6868525896414344, + "grad_norm": 0.6518112235339004, + "learning_rate": 2.1649920753647828e-05, + "loss": 0.2841, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13412031531333923, + "step": 2315, + "valid_targets_mean": 3182.9, + "valid_targets_min": 264 + }, + { + "epoch": 3.694820717131474, + "grad_norm": 0.5205412554322436, + "learning_rate": 2.157076528077603e-05, + "loss": 0.2752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1912461221218109, + "step": 2320, + "valid_targets_mean": 6015.6, + "valid_targets_min": 3175 + }, + { + "epoch": 3.702788844621514, + "grad_norm": 0.5955171494796777, + "learning_rate": 2.149158504292826e-05, + "loss": 0.2789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12436293810606003, + "step": 2325, + "valid_targets_mean": 3384.1, + "valid_targets_min": 428 + }, + { + "epoch": 3.710756972111554, + "grad_norm": 0.6612962440281269, + "learning_rate": 2.1412381288474793e-05, + "loss": 0.2968, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12987712025642395, + "step": 2330, + "valid_targets_mean": 3765.6, + "valid_targets_min": 1065 + }, + { + "epoch": 3.7187250996015937, + "grad_norm": 0.6398347675218756, + "learning_rate": 2.1333155266156676e-05, + "loss": 0.2867, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11875618249177933, + "step": 2335, + "valid_targets_mean": 3575.2, + "valid_targets_min": 1910 + }, + { + "epoch": 3.7266932270916335, + "grad_norm": 0.6478178575347608, + "learning_rate": 2.1253908225066027e-05, + "loss": 0.28, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15330427885055542, + "step": 2340, + "valid_targets_mean": 2914.2, + "valid_targets_min": 605 + }, + { + "epoch": 3.7346613545816734, + "grad_norm": 0.5707342179380125, + "learning_rate": 2.1174641414626366e-05, + "loss": 0.277, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14285320043563843, + "step": 2345, + "valid_targets_mean": 3983.9, + "valid_targets_min": 883 + }, + { + "epoch": 3.742629482071713, + "grad_norm": 0.5923028249924766, + "learning_rate": 2.109535608457287e-05, + "loss": 0.2683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14899635314941406, + "step": 2350, + "valid_targets_mean": 3962.4, + "valid_targets_min": 3094 + }, + { + "epoch": 3.750597609561753, + "grad_norm": 0.6040850863740893, + "learning_rate": 2.101605348493274e-05, + "loss": 0.2843, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14101773500442505, + "step": 2355, + "valid_targets_mean": 3419.9, + "valid_targets_min": 277 + }, + { + "epoch": 3.758565737051793, + "grad_norm": 0.5613157416279575, + "learning_rate": 2.093673486600542e-05, + "loss": 0.2747, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12926945090293884, + "step": 2360, + "valid_targets_mean": 3742.1, + "valid_targets_min": 505 + }, + { + "epoch": 3.7665338645418327, + "grad_norm": 0.5399070734817591, + "learning_rate": 2.0857401478342925e-05, + "loss": 0.2784, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1257469207048416, + "step": 2365, + "valid_targets_mean": 3741.6, + "valid_targets_min": 2927 + }, + { + "epoch": 3.7745019920318725, + "grad_norm": 0.6817669899399026, + "learning_rate": 2.077805457273012e-05, + "loss": 0.2722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1523137092590332, + "step": 2370, + "valid_targets_mean": 3375.4, + "valid_targets_min": 871 + }, + { + "epoch": 3.7824701195219124, + "grad_norm": 0.5796059703785094, + "learning_rate": 2.0698695400165e-05, + "loss": 0.284, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1267370581626892, + "step": 2375, + "valid_targets_mean": 3524.1, + "valid_targets_min": 245 + }, + { + "epoch": 3.790438247011952, + "grad_norm": 0.5908006727183092, + "learning_rate": 2.061932521183896e-05, + "loss": 0.2849, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11956535279750824, + "step": 2380, + "valid_targets_mean": 3446.1, + "valid_targets_min": 1080 + }, + { + "epoch": 3.798406374501992, + "grad_norm": 0.5835483122225003, + "learning_rate": 2.0539945259117075e-05, + "loss": 0.2965, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13289514183998108, + "step": 2385, + "valid_targets_mean": 3167.5, + "valid_targets_min": 316 + }, + { + "epoch": 3.806374501992032, + "grad_norm": 0.5565972103068599, + "learning_rate": 2.046055679351835e-05, + "loss": 0.2856, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12216920405626297, + "step": 2390, + "valid_targets_mean": 3260.6, + "valid_targets_min": 862 + }, + { + "epoch": 3.8143426294820717, + "grad_norm": 0.5902112613689473, + "learning_rate": 2.0381161066696025e-05, + "loss": 0.2857, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12644429504871368, + "step": 2395, + "valid_targets_mean": 3576.1, + "valid_targets_min": 1302 + }, + { + "epoch": 3.8223107569721115, + "grad_norm": 0.5636459832337665, + "learning_rate": 2.030175933041782e-05, + "loss": 0.2831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13322488963603973, + "step": 2400, + "valid_targets_mean": 4208.4, + "valid_targets_min": 590 + }, + { + "epoch": 3.8302788844621514, + "grad_norm": 0.6362095035054844, + "learning_rate": 2.022235283654619e-05, + "loss": 0.2918, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15388166904449463, + "step": 2405, + "valid_targets_mean": 3620.5, + "valid_targets_min": 705 + }, + { + "epoch": 3.838247011952191, + "grad_norm": 0.622345383320758, + "learning_rate": 2.014294283701862e-05, + "loss": 0.2788, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1341124176979065, + "step": 2410, + "valid_targets_mean": 3622.9, + "valid_targets_min": 1092 + }, + { + "epoch": 3.846215139442231, + "grad_norm": 0.6346754661160846, + "learning_rate": 2.006353058382783e-05, + "loss": 0.2858, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13256362080574036, + "step": 2415, + "valid_targets_mean": 3270.0, + "valid_targets_min": 491 + }, + { + "epoch": 3.854183266932271, + "grad_norm": 0.5268023793829587, + "learning_rate": 1.9984117329002112e-05, + "loss": 0.2753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1353156566619873, + "step": 2420, + "valid_targets_mean": 4941.2, + "valid_targets_min": 1454 + }, + { + "epoch": 3.8621513944223107, + "grad_norm": 0.5300525675059939, + "learning_rate": 1.9904704324585516e-05, + "loss": 0.2926, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14425653219223022, + "step": 2425, + "valid_targets_mean": 4485.6, + "valid_targets_min": 3172 + }, + { + "epoch": 3.8701195219123505, + "grad_norm": 0.5388020022950969, + "learning_rate": 1.9825292822618167e-05, + "loss": 0.2897, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1312248259782791, + "step": 2430, + "valid_targets_mean": 3972.4, + "valid_targets_min": 239 + }, + { + "epoch": 3.8780876494023904, + "grad_norm": 0.560189511402876, + "learning_rate": 1.9745884075116498e-05, + "loss": 0.2833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13984884321689606, + "step": 2435, + "valid_targets_mean": 4401.2, + "valid_targets_min": 2858 + }, + { + "epoch": 3.88605577689243, + "grad_norm": 0.6421038748598413, + "learning_rate": 1.9666479334053496e-05, + "loss": 0.2833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12848687171936035, + "step": 2440, + "valid_targets_mean": 4030.8, + "valid_targets_min": 1054 + }, + { + "epoch": 3.89402390438247, + "grad_norm": 0.609169241185786, + "learning_rate": 1.9587079851339016e-05, + "loss": 0.3002, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1273653656244278, + "step": 2445, + "valid_targets_mean": 2882.1, + "valid_targets_min": 546 + }, + { + "epoch": 3.90199203187251, + "grad_norm": 0.6736514002310635, + "learning_rate": 1.9507686878799974e-05, + "loss": 0.2899, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1528712809085846, + "step": 2450, + "valid_targets_mean": 4030.1, + "valid_targets_min": 304 + }, + { + "epoch": 3.9099601593625497, + "grad_norm": 0.6055893558771849, + "learning_rate": 1.9428301668160674e-05, + "loss": 0.2844, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15029467642307281, + "step": 2455, + "valid_targets_mean": 4603.8, + "valid_targets_min": 2911 + }, + { + "epoch": 3.9179282868525895, + "grad_norm": 0.574308867524337, + "learning_rate": 1.9348925471023023e-05, + "loss": 0.2884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13215994834899902, + "step": 2460, + "valid_targets_mean": 3528.2, + "valid_targets_min": 986 + }, + { + "epoch": 3.9258964143426294, + "grad_norm": 0.5651712320533091, + "learning_rate": 1.9269559538846823e-05, + "loss": 0.2679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14778804779052734, + "step": 2465, + "valid_targets_mean": 3976.8, + "valid_targets_min": 795 + }, + { + "epoch": 3.933864541832669, + "grad_norm": 0.566217304542824, + "learning_rate": 1.9190205122930056e-05, + "loss": 0.2961, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13560554385185242, + "step": 2470, + "valid_targets_mean": 3654.2, + "valid_targets_min": 1762 + }, + { + "epoch": 3.941832669322709, + "grad_norm": 0.6866702891545322, + "learning_rate": 1.911086347438911e-05, + "loss": 0.276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11984483152627945, + "step": 2475, + "valid_targets_mean": 3167.0, + "valid_targets_min": 455 + }, + { + "epoch": 3.949800796812749, + "grad_norm": 0.6790922137521931, + "learning_rate": 1.90315358441391e-05, + "loss": 0.3036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1607200801372528, + "step": 2480, + "valid_targets_mean": 3555.1, + "valid_targets_min": 797 + }, + { + "epoch": 3.9577689243027887, + "grad_norm": 0.6047354406366248, + "learning_rate": 1.8952223482874114e-05, + "loss": 0.2803, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13502906262874603, + "step": 2485, + "valid_targets_mean": 3240.8, + "valid_targets_min": 340 + }, + { + "epoch": 3.9657370517928285, + "grad_norm": 0.5974440933919509, + "learning_rate": 1.88729276410475e-05, + "loss": 0.2809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10749666392803192, + "step": 2490, + "valid_targets_mean": 2338.4, + "valid_targets_min": 230 + }, + { + "epoch": 3.9737051792828684, + "grad_norm": 0.6287521749380471, + "learning_rate": 1.8793649568852192e-05, + "loss": 0.2812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14456066489219666, + "step": 2495, + "valid_targets_mean": 3407.4, + "valid_targets_min": 707 + }, + { + "epoch": 3.981673306772908, + "grad_norm": 0.5612414508377164, + "learning_rate": 1.871439051620092e-05, + "loss": 0.2778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.117288738489151, + "step": 2500, + "valid_targets_mean": 3686.9, + "valid_targets_min": 1045 + }, + { + "epoch": 3.989641434262948, + "grad_norm": 0.6472855144328568, + "learning_rate": 1.8635151732706586e-05, + "loss": 0.2929, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14853492379188538, + "step": 2505, + "valid_targets_mean": 4304.1, + "valid_targets_min": 1068 + }, + { + "epoch": 3.997609561752988, + "grad_norm": 0.5683164926052543, + "learning_rate": 1.8555934467662485e-05, + "loss": 0.2849, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16348713636398315, + "step": 2510, + "valid_targets_mean": 4443.6, + "valid_targets_min": 3530 + }, + { + "epoch": 4.004780876494024, + "grad_norm": 0.5720072003254708, + "learning_rate": 1.84767399700227e-05, + "loss": 0.26, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10269935429096222, + "step": 2515, + "valid_targets_mean": 3126.1, + "valid_targets_min": 268 + }, + { + "epoch": 4.012749003984064, + "grad_norm": 0.6401327200606226, + "learning_rate": 1.839756948838231e-05, + "loss": 0.2816, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14503172039985657, + "step": 2520, + "valid_targets_mean": 3696.2, + "valid_targets_min": 778 + }, + { + "epoch": 4.020717131474104, + "grad_norm": 0.6175905443941119, + "learning_rate": 1.831842427095778e-05, + "loss": 0.2595, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13108932971954346, + "step": 2525, + "valid_targets_mean": 4221.6, + "valid_targets_min": 2509 + }, + { + "epoch": 4.028685258964144, + "grad_norm": 0.609025602287618, + "learning_rate": 1.823930556556724e-05, + "loss": 0.2627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1457751840353012, + "step": 2530, + "valid_targets_mean": 4213.2, + "valid_targets_min": 3146 + }, + { + "epoch": 4.036653386454184, + "grad_norm": 0.5324632778008639, + "learning_rate": 1.8160214619610843e-05, + "loss": 0.2542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13213565945625305, + "step": 2535, + "valid_targets_mean": 5396.5, + "valid_targets_min": 1045 + }, + { + "epoch": 4.044621513944223, + "grad_norm": 0.5968218009537902, + "learning_rate": 1.8081152680051075e-05, + "loss": 0.2605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11637388169765472, + "step": 2540, + "valid_targets_mean": 3910.0, + "valid_targets_min": 2294 + }, + { + "epoch": 4.052589641434263, + "grad_norm": 0.5436350172753162, + "learning_rate": 1.8002120993393095e-05, + "loss": 0.2725, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11976222693920135, + "step": 2545, + "valid_targets_mean": 3767.1, + "valid_targets_min": 276 + }, + { + "epoch": 4.060557768924303, + "grad_norm": 0.5963199711651942, + "learning_rate": 1.7923120805665087e-05, + "loss": 0.2763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15962520241737366, + "step": 2550, + "valid_targets_mean": 4453.5, + "valid_targets_min": 3735 + }, + { + "epoch": 4.068525896414343, + "grad_norm": 0.5958728257310438, + "learning_rate": 1.7844153362398638e-05, + "loss": 0.2742, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11870819330215454, + "step": 2555, + "valid_targets_mean": 3804.8, + "valid_targets_min": 2116 + }, + { + "epoch": 4.076494023904383, + "grad_norm": 0.6362877665061702, + "learning_rate": 1.776521990860905e-05, + "loss": 0.2639, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14715123176574707, + "step": 2560, + "valid_targets_mean": 3874.2, + "valid_targets_min": 843 + }, + { + "epoch": 4.084462151394423, + "grad_norm": 0.627540440735645, + "learning_rate": 1.7686321688775772e-05, + "loss": 0.272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13024848699569702, + "step": 2565, + "valid_targets_mean": 4178.6, + "valid_targets_min": 379 + }, + { + "epoch": 4.092430278884462, + "grad_norm": 0.619079317681379, + "learning_rate": 1.7607459946822717e-05, + "loss": 0.2649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1171807050704956, + "step": 2570, + "valid_targets_mean": 3078.2, + "valid_targets_min": 262 + }, + { + "epoch": 4.100398406374502, + "grad_norm": 0.5968668664984488, + "learning_rate": 1.7528635926098715e-05, + "loss": 0.2528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10291929543018341, + "step": 2575, + "valid_targets_mean": 2617.1, + "valid_targets_min": 956 + }, + { + "epoch": 4.108366533864542, + "grad_norm": 0.6002374068933708, + "learning_rate": 1.7449850869357846e-05, + "loss": 0.2849, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13879713416099548, + "step": 2580, + "valid_targets_mean": 4197.0, + "valid_targets_min": 3299 + }, + { + "epoch": 4.116334661354582, + "grad_norm": 0.5889840886046992, + "learning_rate": 1.7371106018739886e-05, + "loss": 0.2682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16842882335186005, + "step": 2585, + "valid_targets_mean": 4845.5, + "valid_targets_min": 3076 + }, + { + "epoch": 4.124302788844622, + "grad_norm": 0.6293201945894611, + "learning_rate": 1.729240261575072e-05, + "loss": 0.2701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09744669497013092, + "step": 2590, + "valid_targets_mean": 2754.2, + "valid_targets_min": 325 + }, + { + "epoch": 4.132270916334662, + "grad_norm": 0.699730460411323, + "learning_rate": 1.7213741901242747e-05, + "loss": 0.2759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18035660684108734, + "step": 2595, + "valid_targets_mean": 3327.5, + "valid_targets_min": 282 + }, + { + "epoch": 4.140239043824701, + "grad_norm": 0.615311548996262, + "learning_rate": 1.713512511539536e-05, + "loss": 0.2743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12695595622062683, + "step": 2600, + "valid_targets_mean": 2938.9, + "valid_targets_min": 590 + }, + { + "epoch": 4.148207171314741, + "grad_norm": 0.6054858047449538, + "learning_rate": 1.705655349769534e-05, + "loss": 0.2618, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12634170055389404, + "step": 2605, + "valid_targets_mean": 3309.4, + "valid_targets_min": 561 + }, + { + "epoch": 4.156175298804781, + "grad_norm": 0.6011487774505819, + "learning_rate": 1.6978028286917336e-05, + "loss": 0.2636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.150556743144989, + "step": 2610, + "valid_targets_mean": 4359.9, + "valid_targets_min": 1257 + }, + { + "epoch": 4.164143426294821, + "grad_norm": 0.6354149285950226, + "learning_rate": 1.6899550721104362e-05, + "loss": 0.2734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10773111134767532, + "step": 2615, + "valid_targets_mean": 2906.2, + "valid_targets_min": 236 + }, + { + "epoch": 4.172111553784861, + "grad_norm": 0.6472900571793592, + "learning_rate": 1.6821122037548223e-05, + "loss": 0.2724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14156454801559448, + "step": 2620, + "valid_targets_mean": 3117.5, + "valid_targets_min": 1196 + }, + { + "epoch": 4.180079681274901, + "grad_norm": 1.7388435945929173, + "learning_rate": 1.6742743472770063e-05, + "loss": 0.2678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13756445050239563, + "step": 2625, + "valid_targets_mean": 3921.9, + "valid_targets_min": 767 + }, + { + "epoch": 4.18804780876494, + "grad_norm": 0.5719938609987489, + "learning_rate": 1.666441626250083e-05, + "loss": 0.275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13680854439735413, + "step": 2630, + "valid_targets_mean": 4360.4, + "valid_targets_min": 1362 + }, + { + "epoch": 4.19601593625498, + "grad_norm": 0.5598967171434873, + "learning_rate": 1.65861416416618e-05, + "loss": 0.2617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14067170023918152, + "step": 2635, + "valid_targets_mean": 4220.8, + "valid_targets_min": 2567 + }, + { + "epoch": 4.20398406374502, + "grad_norm": 0.5954103735840044, + "learning_rate": 1.6507920844345135e-05, + "loss": 0.2719, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10944996029138565, + "step": 2640, + "valid_targets_mean": 3185.2, + "valid_targets_min": 522 + }, + { + "epoch": 4.21195219123506, + "grad_norm": 0.6274287229021701, + "learning_rate": 1.642975510379439e-05, + "loss": 0.2575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11779697239398956, + "step": 2645, + "valid_targets_mean": 3483.0, + "valid_targets_min": 281 + }, + { + "epoch": 4.2199203187251, + "grad_norm": 0.6013008613155694, + "learning_rate": 1.6351645652385095e-05, + "loss": 0.2687, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.136363223195076, + "step": 2650, + "valid_targets_mean": 4350.0, + "valid_targets_min": 3070 + }, + { + "epoch": 4.22788844621514, + "grad_norm": 0.5579786913359722, + "learning_rate": 1.6273593721605295e-05, + "loss": 0.2649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1384572684764862, + "step": 2655, + "valid_targets_mean": 4822.0, + "valid_targets_min": 1009 + }, + { + "epoch": 4.235856573705179, + "grad_norm": 0.6151644516447127, + "learning_rate": 1.6195600542036188e-05, + "loss": 0.2731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11860912293195724, + "step": 2660, + "valid_targets_mean": 3096.6, + "valid_targets_min": 887 + }, + { + "epoch": 4.243824701195219, + "grad_norm": 0.6013137436944016, + "learning_rate": 1.6117667343332658e-05, + "loss": 0.256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1265154927968979, + "step": 2665, + "valid_targets_mean": 3917.5, + "valid_targets_min": 329 + }, + { + "epoch": 4.251792828685259, + "grad_norm": 0.5578421855801583, + "learning_rate": 1.6039795354203925e-05, + "loss": 0.2617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13051439821720123, + "step": 2670, + "valid_targets_mean": 4113.4, + "valid_targets_min": 3127 + }, + { + "epoch": 4.259760956175299, + "grad_norm": 0.6366682354964354, + "learning_rate": 1.5961985802394195e-05, + "loss": 0.2665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13450387120246887, + "step": 2675, + "valid_targets_mean": 3713.9, + "valid_targets_min": 244 + }, + { + "epoch": 4.267729083665339, + "grad_norm": 0.5645704713350099, + "learning_rate": 1.5884239914663232e-05, + "loss": 0.264, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11777573078870773, + "step": 2680, + "valid_targets_mean": 3160.2, + "valid_targets_min": 1035 + }, + { + "epoch": 4.275697211155379, + "grad_norm": 0.5992604755923369, + "learning_rate": 1.58065589167671e-05, + "loss": 0.269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13701510429382324, + "step": 2685, + "valid_targets_mean": 3644.9, + "valid_targets_min": 729 + }, + { + "epoch": 4.283665338645418, + "grad_norm": 0.5809410715544644, + "learning_rate": 1.572894403343878e-05, + "loss": 0.2651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.150016188621521, + "step": 2690, + "valid_targets_mean": 4719.0, + "valid_targets_min": 818 + }, + { + "epoch": 4.291633466135458, + "grad_norm": 0.6333561999558883, + "learning_rate": 1.5651396488368863e-05, + "loss": 0.2786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11701355874538422, + "step": 2695, + "valid_targets_mean": 2797.4, + "valid_targets_min": 568 + }, + { + "epoch": 4.299601593625498, + "grad_norm": 0.6265872692246324, + "learning_rate": 1.5573917504186306e-05, + "loss": 0.2549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09892305731773376, + "step": 2700, + "valid_targets_mean": 2866.5, + "valid_targets_min": 973 + }, + { + "epoch": 4.307569721115538, + "grad_norm": 0.7058453282378055, + "learning_rate": 1.5496508302439096e-05, + "loss": 0.2724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10564042627811432, + "step": 2705, + "valid_targets_mean": 3128.2, + "valid_targets_min": 559 + }, + { + "epoch": 4.315537848605578, + "grad_norm": 0.5810402160845162, + "learning_rate": 1.5419170103575037e-05, + "loss": 0.2677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12401340156793594, + "step": 2710, + "valid_targets_mean": 3585.1, + "valid_targets_min": 282 + }, + { + "epoch": 4.323505976095618, + "grad_norm": 0.6021195692971588, + "learning_rate": 1.534190412692246e-05, + "loss": 0.2686, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14363420009613037, + "step": 2715, + "valid_targets_mean": 4491.1, + "valid_targets_min": 1050 + }, + { + "epoch": 4.331474103585657, + "grad_norm": 0.6421757152001665, + "learning_rate": 1.5264711590671067e-05, + "loss": 0.2629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09448320418596268, + "step": 2720, + "valid_targets_mean": 2747.0, + "valid_targets_min": 302 + }, + { + "epoch": 4.339442231075697, + "grad_norm": 0.64221293405125, + "learning_rate": 1.5187593711852653e-05, + "loss": 0.2698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14570492506027222, + "step": 2725, + "valid_targets_mean": 3436.6, + "valid_targets_min": 248 + }, + { + "epoch": 4.347410358565737, + "grad_norm": 0.6063579652481405, + "learning_rate": 1.5110551706321952e-05, + "loss": 0.2666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15537774562835693, + "step": 2730, + "valid_targets_mean": 5108.0, + "valid_targets_min": 3796 + }, + { + "epoch": 4.355378486055777, + "grad_norm": 0.5627554063810711, + "learning_rate": 1.5033586788737492e-05, + "loss": 0.2695, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16793255507946014, + "step": 2735, + "valid_targets_mean": 6144.2, + "valid_targets_min": 1486 + }, + { + "epoch": 4.363346613545817, + "grad_norm": 0.6693086211462621, + "learning_rate": 1.495670017254238e-05, + "loss": 0.267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11684629321098328, + "step": 2740, + "valid_targets_mean": 2792.8, + "valid_targets_min": 248 + }, + { + "epoch": 4.371314741035857, + "grad_norm": 0.5520813726926395, + "learning_rate": 1.487989306994525e-05, + "loss": 0.2669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14985960721969604, + "step": 2745, + "valid_targets_mean": 4697.1, + "valid_targets_min": 3255 + }, + { + "epoch": 4.379282868525896, + "grad_norm": 0.5770594788479113, + "learning_rate": 1.480316669190108e-05, + "loss": 0.2706, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14154091477394104, + "step": 2750, + "valid_targets_mean": 4440.0, + "valid_targets_min": 1092 + }, + { + "epoch": 4.387250996015936, + "grad_norm": 0.6444824733637786, + "learning_rate": 1.4726522248092132e-05, + "loss": 0.2623, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14305931329727173, + "step": 2755, + "valid_targets_mean": 4230.0, + "valid_targets_min": 646 + }, + { + "epoch": 4.395219123505976, + "grad_norm": 0.651398166700227, + "learning_rate": 1.4649960946908897e-05, + "loss": 0.2608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13604432344436646, + "step": 2760, + "valid_targets_mean": 3322.8, + "valid_targets_min": 212 + }, + { + "epoch": 4.403187250996016, + "grad_norm": 1.0035239212131737, + "learning_rate": 1.4573483995430992e-05, + "loss": 0.2626, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0961909294128418, + "step": 2765, + "valid_targets_mean": 3236.6, + "valid_targets_min": 320 + }, + { + "epoch": 4.411155378486056, + "grad_norm": 0.6149666100569019, + "learning_rate": 1.4497092599408207e-05, + "loss": 0.2754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14706659317016602, + "step": 2770, + "valid_targets_mean": 3674.4, + "valid_targets_min": 256 + }, + { + "epoch": 4.419123505976096, + "grad_norm": 0.5944075105572708, + "learning_rate": 1.4420787963241399e-05, + "loss": 0.2608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14063294231891632, + "step": 2775, + "valid_targets_mean": 4041.8, + "valid_targets_min": 1192 + }, + { + "epoch": 4.427091633466135, + "grad_norm": 0.5683592411277911, + "learning_rate": 1.4344571289963592e-05, + "loss": 0.2624, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12611077725887299, + "step": 2780, + "valid_targets_mean": 4117.4, + "valid_targets_min": 2819 + }, + { + "epoch": 4.435059760956175, + "grad_norm": 0.6042269294027321, + "learning_rate": 1.426844378122095e-05, + "loss": 0.268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12424126267433167, + "step": 2785, + "valid_targets_mean": 3238.1, + "valid_targets_min": 495 + }, + { + "epoch": 4.443027888446215, + "grad_norm": 0.6076612649120969, + "learning_rate": 1.4192406637253853e-05, + "loss": 0.2684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15406498312950134, + "step": 2790, + "valid_targets_mean": 4695.2, + "valid_targets_min": 3038 + }, + { + "epoch": 4.450996015936255, + "grad_norm": 0.610314242877089, + "learning_rate": 1.4116461056877986e-05, + "loss": 0.2685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.139997661113739, + "step": 2795, + "valid_targets_mean": 3597.5, + "valid_targets_min": 971 + }, + { + "epoch": 4.458964143426295, + "grad_norm": 0.6407491494730748, + "learning_rate": 1.4040608237465412e-05, + "loss": 0.2795, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1259598433971405, + "step": 2800, + "valid_targets_mean": 3842.8, + "valid_targets_min": 764 + }, + { + "epoch": 4.466932270916335, + "grad_norm": 0.626833320757597, + "learning_rate": 1.3964849374925712e-05, + "loss": 0.274, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13756072521209717, + "step": 2805, + "valid_targets_mean": 3469.4, + "valid_targets_min": 823 + }, + { + "epoch": 4.474900398406374, + "grad_norm": 0.6113636018899783, + "learning_rate": 1.3889185663687133e-05, + "loss": 0.2773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16179078817367554, + "step": 2810, + "valid_targets_mean": 4718.1, + "valid_targets_min": 715 + }, + { + "epoch": 4.482868525896414, + "grad_norm": 0.6016491127594051, + "learning_rate": 1.3813618296677734e-05, + "loss": 0.2793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11874759197235107, + "step": 2815, + "valid_targets_mean": 3507.0, + "valid_targets_min": 1143 + }, + { + "epoch": 4.490836653386454, + "grad_norm": 0.587263270292815, + "learning_rate": 1.3738148465306607e-05, + "loss": 0.2735, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12201830744743347, + "step": 2820, + "valid_targets_mean": 3844.5, + "valid_targets_min": 2024 + }, + { + "epoch": 4.498804780876494, + "grad_norm": 0.5440429270288707, + "learning_rate": 1.3662777359445065e-05, + "loss": 0.2569, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12927262485027313, + "step": 2825, + "valid_targets_mean": 4035.0, + "valid_targets_min": 1042 + }, + { + "epoch": 4.506772908366534, + "grad_norm": 0.6806185747715815, + "learning_rate": 1.3587506167407922e-05, + "loss": 0.2638, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13687384128570557, + "step": 2830, + "valid_targets_mean": 3160.6, + "valid_targets_min": 276 + }, + { + "epoch": 4.514741035856574, + "grad_norm": 1.0865846082181532, + "learning_rate": 1.3512336075934704e-05, + "loss": 0.2678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1198994517326355, + "step": 2835, + "valid_targets_mean": 3477.8, + "valid_targets_min": 1011 + }, + { + "epoch": 4.522709163346613, + "grad_norm": 0.6067100547590222, + "learning_rate": 1.3437268270170969e-05, + "loss": 0.2673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1296362578868866, + "step": 2840, + "valid_targets_mean": 3557.1, + "valid_targets_min": 502 + }, + { + "epoch": 4.530677290836653, + "grad_norm": 0.6530018117969328, + "learning_rate": 1.3362303933649648e-05, + "loss": 0.2857, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1688781976699829, + "step": 2845, + "valid_targets_mean": 4241.9, + "valid_targets_min": 1212 + }, + { + "epoch": 4.538645418326693, + "grad_norm": 0.5678765737865717, + "learning_rate": 1.328744424827232e-05, + "loss": 0.27, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11141425371170044, + "step": 2850, + "valid_targets_mean": 3766.9, + "valid_targets_min": 205 + }, + { + "epoch": 4.546613545816733, + "grad_norm": 0.7560314376646058, + "learning_rate": 1.3212690394290646e-05, + "loss": 0.264, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1437414586544037, + "step": 2855, + "valid_targets_mean": 4350.5, + "valid_targets_min": 749 + }, + { + "epoch": 4.554581673306773, + "grad_norm": 0.5845211493049349, + "learning_rate": 1.3138043550287707e-05, + "loss": 0.2705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12323597818613052, + "step": 2860, + "valid_targets_mean": 3659.0, + "valid_targets_min": 771 + }, + { + "epoch": 4.562549800796813, + "grad_norm": 0.6486812981624971, + "learning_rate": 1.3063504893159458e-05, + "loss": 0.2745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1795152723789215, + "step": 2865, + "valid_targets_mean": 3804.5, + "valid_targets_min": 880 + }, + { + "epoch": 4.570517928286852, + "grad_norm": 0.6301843093431468, + "learning_rate": 1.2989075598096148e-05, + "loss": 0.2701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16830292344093323, + "step": 2870, + "valid_targets_mean": 4378.8, + "valid_targets_min": 3573 + }, + { + "epoch": 4.578486055776892, + "grad_norm": 0.6268135224867947, + "learning_rate": 1.2914756838563816e-05, + "loss": 0.2681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1396118551492691, + "step": 2875, + "valid_targets_mean": 3876.6, + "valid_targets_min": 1208 + }, + { + "epoch": 4.586454183266932, + "grad_norm": 0.5408965385460501, + "learning_rate": 1.2840549786285776e-05, + "loss": 0.2693, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.170441135764122, + "step": 2880, + "valid_targets_mean": 4401.6, + "valid_targets_min": 967 + }, + { + "epoch": 4.594422310756972, + "grad_norm": 0.5579675203394512, + "learning_rate": 1.2766455611224127e-05, + "loss": 0.2695, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09349963814020157, + "step": 2885, + "valid_targets_mean": 3135.0, + "valid_targets_min": 286 + }, + { + "epoch": 4.602390438247012, + "grad_norm": 0.5940088764164068, + "learning_rate": 1.2692475481561357e-05, + "loss": 0.2657, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1439450979232788, + "step": 2890, + "valid_targets_mean": 4247.5, + "valid_targets_min": 1646 + }, + { + "epoch": 4.610358565737052, + "grad_norm": 0.5896375920810674, + "learning_rate": 1.2618610563681863e-05, + "loss": 0.2697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14034605026245117, + "step": 2895, + "valid_targets_mean": 4082.8, + "valid_targets_min": 3274 + }, + { + "epoch": 4.618326693227091, + "grad_norm": 0.6081159186670604, + "learning_rate": 1.2544862022153601e-05, + "loss": 0.2518, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1338728368282318, + "step": 2900, + "valid_targets_mean": 3608.2, + "valid_targets_min": 1016 + }, + { + "epoch": 4.626294820717131, + "grad_norm": 0.6308840394377083, + "learning_rate": 1.2471231019709732e-05, + "loss": 0.2678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18884161114692688, + "step": 2905, + "valid_targets_mean": 4549.6, + "valid_targets_min": 3188 + }, + { + "epoch": 4.634262948207171, + "grad_norm": 0.6307301868840895, + "learning_rate": 1.2397718717230243e-05, + "loss": 0.267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11760175228118896, + "step": 2910, + "valid_targets_mean": 3715.1, + "valid_targets_min": 324 + }, + { + "epoch": 4.642231075697211, + "grad_norm": 0.6637131816811684, + "learning_rate": 1.2324326273723707e-05, + "loss": 0.274, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13860295712947845, + "step": 2915, + "valid_targets_mean": 3125.9, + "valid_targets_min": 495 + }, + { + "epoch": 4.650199203187251, + "grad_norm": 0.7079389576834997, + "learning_rate": 1.225105484630896e-05, + "loss": 0.2648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16028892993927002, + "step": 2920, + "valid_targets_mean": 3774.5, + "valid_targets_min": 1091 + }, + { + "epoch": 4.658167330677291, + "grad_norm": 0.6342776074519758, + "learning_rate": 1.2177905590196884e-05, + "loss": 0.2708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14029663801193237, + "step": 2925, + "valid_targets_mean": 3839.9, + "valid_targets_min": 3242 + }, + { + "epoch": 4.66613545816733, + "grad_norm": 0.5830877226753675, + "learning_rate": 1.2104879658672175e-05, + "loss": 0.2828, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13618014752864838, + "step": 2930, + "valid_targets_mean": 3554.6, + "valid_targets_min": 1261 + }, + { + "epoch": 4.67410358565737, + "grad_norm": 0.6253219779106525, + "learning_rate": 1.2031978203075172e-05, + "loss": 0.2533, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1254906952381134, + "step": 2935, + "valid_targets_mean": 3197.5, + "valid_targets_min": 508 + }, + { + "epoch": 4.68207171314741, + "grad_norm": 0.6141152336245717, + "learning_rate": 1.1959202372783728e-05, + "loss": 0.2647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13074913620948792, + "step": 2940, + "valid_targets_mean": 4284.1, + "valid_targets_min": 993 + }, + { + "epoch": 4.69003984063745, + "grad_norm": 0.6183014945928188, + "learning_rate": 1.188655331519502e-05, + "loss": 0.2607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11938630044460297, + "step": 2945, + "valid_targets_mean": 3510.5, + "valid_targets_min": 931 + }, + { + "epoch": 4.69800796812749, + "grad_norm": 0.6036958793292667, + "learning_rate": 1.1814032175707556e-05, + "loss": 0.2717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12805697321891785, + "step": 2950, + "valid_targets_mean": 3902.5, + "valid_targets_min": 2931 + }, + { + "epoch": 4.70597609561753, + "grad_norm": 0.762495220481728, + "learning_rate": 1.1741640097703018e-05, + "loss": 0.2671, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1260424256324768, + "step": 2955, + "valid_targets_mean": 4108.6, + "valid_targets_min": 280 + }, + { + "epoch": 4.713944223107569, + "grad_norm": 0.6123259514957903, + "learning_rate": 1.1669378222528303e-05, + "loss": 0.27, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17273962497711182, + "step": 2960, + "valid_targets_mean": 4228.9, + "valid_targets_min": 2193 + }, + { + "epoch": 4.721912350597609, + "grad_norm": 0.5751366213718874, + "learning_rate": 1.1597247689477502e-05, + "loss": 0.2791, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1270749866962433, + "step": 2965, + "valid_targets_mean": 3677.0, + "valid_targets_min": 909 + }, + { + "epoch": 4.729880478087649, + "grad_norm": 0.5121925447196681, + "learning_rate": 1.1525249635773935e-05, + "loss": 0.266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13193365931510925, + "step": 2970, + "valid_targets_mean": 4637.4, + "valid_targets_min": 2272 + }, + { + "epoch": 4.737848605577689, + "grad_norm": 0.5622471227682847, + "learning_rate": 1.1453385196552247e-05, + "loss": 0.265, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12934821844100952, + "step": 2975, + "valid_targets_mean": 3515.5, + "valid_targets_min": 1559 + }, + { + "epoch": 4.745816733067729, + "grad_norm": 0.5589445788982488, + "learning_rate": 1.1381655504840468e-05, + "loss": 0.2694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12661080062389374, + "step": 2980, + "valid_targets_mean": 3686.2, + "valid_targets_min": 553 + }, + { + "epoch": 4.753784860557769, + "grad_norm": 0.5686016273636768, + "learning_rate": 1.1310061691542198e-05, + "loss": 0.2606, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10433939844369888, + "step": 2985, + "valid_targets_mean": 2660.1, + "valid_targets_min": 707 + }, + { + "epoch": 4.761752988047808, + "grad_norm": 0.62086351110411, + "learning_rate": 1.1238604885418734e-05, + "loss": 0.2655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13418859243392944, + "step": 2990, + "valid_targets_mean": 3186.5, + "valid_targets_min": 343 + }, + { + "epoch": 4.769721115537848, + "grad_norm": 0.6999014478550488, + "learning_rate": 1.1167286213071293e-05, + "loss": 0.2782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17473706603050232, + "step": 2995, + "valid_targets_mean": 3881.5, + "valid_targets_min": 1199 + }, + { + "epoch": 4.777689243027888, + "grad_norm": 0.6581397019022929, + "learning_rate": 1.109610679892327e-05, + "loss": 0.2693, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1315963864326477, + "step": 3000, + "valid_targets_mean": 3558.2, + "valid_targets_min": 277 + }, + { + "epoch": 4.785657370517928, + "grad_norm": 0.5138979491365111, + "learning_rate": 1.102506776520246e-05, + "loss": 0.2721, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13793763518333435, + "step": 3005, + "valid_targets_mean": 4831.6, + "valid_targets_min": 3467 + }, + { + "epoch": 4.793625498007968, + "grad_norm": 0.6293574910876729, + "learning_rate": 1.0954170231923422e-05, + "loss": 0.2887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1142679750919342, + "step": 3010, + "valid_targets_mean": 2987.9, + "valid_targets_min": 750 + }, + { + "epoch": 4.801593625498008, + "grad_norm": 0.5640515125693804, + "learning_rate": 1.0883415316869775e-05, + "loss": 0.2562, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12808850407600403, + "step": 3015, + "valid_targets_mean": 4490.0, + "valid_targets_min": 3071 + }, + { + "epoch": 4.8095617529880474, + "grad_norm": 0.5351200613662318, + "learning_rate": 1.0812804135576588e-05, + "loss": 0.2547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12939012050628662, + "step": 3020, + "valid_targets_mean": 4176.9, + "valid_targets_min": 2148 + }, + { + "epoch": 4.817529880478087, + "grad_norm": 0.5478240000204622, + "learning_rate": 1.0742337801312823e-05, + "loss": 0.2616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1086738258600235, + "step": 3025, + "valid_targets_mean": 3624.8, + "valid_targets_min": 930 + }, + { + "epoch": 4.825498007968127, + "grad_norm": 0.6441739518704989, + "learning_rate": 1.0672017425063727e-05, + "loss": 0.2517, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15140973031520844, + "step": 3030, + "valid_targets_mean": 3325.4, + "valid_targets_min": 980 + }, + { + "epoch": 4.833466135458167, + "grad_norm": 0.5964090477642284, + "learning_rate": 1.0601844115513376e-05, + "loss": 0.2756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15871793031692505, + "step": 3035, + "valid_targets_mean": 3672.0, + "valid_targets_min": 244 + }, + { + "epoch": 4.841434262948207, + "grad_norm": 0.5476839569514874, + "learning_rate": 1.0531818979027136e-05, + "loss": 0.2652, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13524594902992249, + "step": 3040, + "valid_targets_mean": 4092.1, + "valid_targets_min": 1444 + }, + { + "epoch": 4.849402390438247, + "grad_norm": 0.6237182251210356, + "learning_rate": 1.0461943119634257e-05, + "loss": 0.2753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13669374585151672, + "step": 3045, + "valid_targets_mean": 3500.8, + "valid_targets_min": 1034 + }, + { + "epoch": 4.8573705179282864, + "grad_norm": 0.6452188999210264, + "learning_rate": 1.0392217639010478e-05, + "loss": 0.2781, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14306673407554626, + "step": 3050, + "valid_targets_mean": 3621.4, + "valid_targets_min": 1015 + }, + { + "epoch": 4.865338645418326, + "grad_norm": 0.556578150084834, + "learning_rate": 1.0322643636460619e-05, + "loss": 0.2616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14264307916164398, + "step": 3055, + "valid_targets_mean": 4356.8, + "valid_targets_min": 2297 + }, + { + "epoch": 4.873306772908366, + "grad_norm": 0.6371613150664773, + "learning_rate": 1.0253222208901263e-05, + "loss": 0.2755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12909969687461853, + "step": 3060, + "valid_targets_mean": 3354.9, + "valid_targets_min": 602 + }, + { + "epoch": 4.881274900398406, + "grad_norm": 0.5501207886817936, + "learning_rate": 1.0183954450843493e-05, + "loss": 0.2592, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14595386385917664, + "step": 3065, + "valid_targets_mean": 5109.0, + "valid_targets_min": 3403 + }, + { + "epoch": 4.889243027888446, + "grad_norm": 0.5872788387480288, + "learning_rate": 1.0114841454375592e-05, + "loss": 0.2789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14299675822257996, + "step": 3070, + "valid_targets_mean": 4005.0, + "valid_targets_min": 843 + }, + { + "epoch": 4.897211155378486, + "grad_norm": 0.577514005160258, + "learning_rate": 1.0045884309145846e-05, + "loss": 0.2766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1589789092540741, + "step": 3075, + "valid_targets_mean": 4120.2, + "valid_targets_min": 875 + }, + { + "epoch": 4.9051792828685254, + "grad_norm": 0.7255061859176135, + "learning_rate": 9.97708410234535e-06, + "loss": 0.2589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10865312069654465, + "step": 3080, + "valid_targets_mean": 3547.6, + "valid_targets_min": 271 + }, + { + "epoch": 4.913147410358565, + "grad_norm": 0.6320521664668232, + "learning_rate": 9.90844191869091e-06, + "loss": 0.2703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15063154697418213, + "step": 3085, + "valid_targets_mean": 4336.4, + "valid_targets_min": 262 + }, + { + "epoch": 4.921115537848605, + "grad_norm": 0.6382363135199773, + "learning_rate": 9.839958840407873e-06, + "loss": 0.2787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08313880115747452, + "step": 3090, + "valid_targets_mean": 1961.0, + "valid_targets_min": 290 + }, + { + "epoch": 4.929083665338645, + "grad_norm": 0.597234618248688, + "learning_rate": 9.771635947213135e-06, + "loss": 0.2743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1442553997039795, + "step": 3095, + "valid_targets_mean": 4394.2, + "valid_targets_min": 3447 + }, + { + "epoch": 4.937051792828685, + "grad_norm": 0.5433107080604842, + "learning_rate": 9.703474316298066e-06, + "loss": 0.2573, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1334853172302246, + "step": 3100, + "valid_targets_mean": 4664.4, + "valid_targets_min": 705 + }, + { + "epoch": 4.945019920318725, + "grad_norm": 0.547178955754686, + "learning_rate": 9.635475022311528e-06, + "loss": 0.281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09971381723880768, + "step": 3105, + "valid_targets_mean": 3355.2, + "valid_targets_min": 504 + }, + { + "epoch": 4.9529880478087644, + "grad_norm": 0.5494820644025523, + "learning_rate": 9.567639137342997e-06, + "loss": 0.2698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13351447880268097, + "step": 3110, + "valid_targets_mean": 4463.0, + "valid_targets_min": 1127 + }, + { + "epoch": 4.960956175298804, + "grad_norm": 0.5956550952853131, + "learning_rate": 9.499967730905557e-06, + "loss": 0.2719, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1420593410730362, + "step": 3115, + "valid_targets_mean": 3492.9, + "valid_targets_min": 878 + }, + { + "epoch": 4.968924302788845, + "grad_norm": 0.5890941314104784, + "learning_rate": 9.43246186991914e-06, + "loss": 0.2641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.107373908162117, + "step": 3120, + "valid_targets_mean": 2851.2, + "valid_targets_min": 581 + }, + { + "epoch": 4.976892430278885, + "grad_norm": 0.6330487368510908, + "learning_rate": 9.365122618693623e-06, + "loss": 0.2658, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14127232134342194, + "step": 3125, + "valid_targets_mean": 3870.2, + "valid_targets_min": 536 + }, + { + "epoch": 4.984860557768925, + "grad_norm": 0.5684114548678937, + "learning_rate": 9.29795103891211e-06, + "loss": 0.2744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10140170902013779, + "step": 3130, + "valid_targets_mean": 3053.9, + "valid_targets_min": 1016 + }, + { + "epoch": 4.9928286852589645, + "grad_norm": 0.5531437463797109, + "learning_rate": 9.230948189614144e-06, + "loss": 0.2593, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14458362758159637, + "step": 3135, + "valid_targets_mean": 3941.5, + "valid_targets_min": 1107 + }, + { + "epoch": 5.0, + "grad_norm": 0.7851376467007583, + "learning_rate": 9.164115127179038e-06, + "loss": 0.2681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.245457261800766, + "step": 3140, + "valid_targets_mean": 3788.1, + "valid_targets_min": 692 + }, + { + "epoch": 5.00796812749004, + "grad_norm": 0.6120181494813008, + "learning_rate": 9.09745290530923e-06, + "loss": 0.2516, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13366028666496277, + "step": 3145, + "valid_targets_mean": 3292.1, + "valid_targets_min": 902 + }, + { + "epoch": 5.01593625498008, + "grad_norm": 0.6173310357599049, + "learning_rate": 9.030962575013622e-06, + "loss": 0.2533, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10277126729488373, + "step": 3150, + "valid_targets_mean": 2644.2, + "valid_targets_min": 656 + }, + { + "epoch": 5.0239043824701195, + "grad_norm": 0.6320852166475259, + "learning_rate": 8.964645184591082e-06, + "loss": 0.2709, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1892041563987732, + "step": 3155, + "valid_targets_mean": 3817.2, + "valid_targets_min": 2602 + }, + { + "epoch": 5.031872509960159, + "grad_norm": 0.5864932038948057, + "learning_rate": 8.898501779613842e-06, + "loss": 0.253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0961039811372757, + "step": 3160, + "valid_targets_mean": 2533.5, + "valid_targets_min": 259 + }, + { + "epoch": 5.039840637450199, + "grad_norm": 0.6558564309235504, + "learning_rate": 8.832533402911056e-06, + "loss": 0.2524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1114492118358612, + "step": 3165, + "valid_targets_mean": 2848.4, + "valid_targets_min": 264 + }, + { + "epoch": 5.047808764940239, + "grad_norm": 0.604799911749898, + "learning_rate": 8.766741094552368e-06, + "loss": 0.2507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14596164226531982, + "step": 3170, + "valid_targets_mean": 4304.1, + "valid_targets_min": 2269 + }, + { + "epoch": 5.055776892430279, + "grad_norm": 0.6907387417878199, + "learning_rate": 8.70112589183147e-06, + "loss": 0.2594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20130327343940735, + "step": 3175, + "valid_targets_mean": 4048.9, + "valid_targets_min": 259 + }, + { + "epoch": 5.063745019920319, + "grad_norm": 0.6435296345766328, + "learning_rate": 8.63568882924979e-06, + "loss": 0.2596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12666499614715576, + "step": 3180, + "valid_targets_mean": 3742.2, + "valid_targets_min": 610 + }, + { + "epoch": 5.0717131474103585, + "grad_norm": 0.6121321847314146, + "learning_rate": 8.570430938500155e-06, + "loss": 0.2594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14254778623580933, + "step": 3185, + "valid_targets_mean": 4353.9, + "valid_targets_min": 942 + }, + { + "epoch": 5.079681274900398, + "grad_norm": 0.5950951684074194, + "learning_rate": 8.50535324845055e-06, + "loss": 0.2487, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1303534358739853, + "step": 3190, + "valid_targets_mean": 3913.5, + "valid_targets_min": 1085 + }, + { + "epoch": 5.087649402390438, + "grad_norm": 0.6009818742648716, + "learning_rate": 8.44045678512787e-06, + "loss": 0.2654, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09683337062597275, + "step": 3195, + "valid_targets_mean": 2937.6, + "valid_targets_min": 447 + }, + { + "epoch": 5.095617529880478, + "grad_norm": 0.5320908668862425, + "learning_rate": 8.375742571701755e-06, + "loss": 0.2634, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11519855260848999, + "step": 3200, + "valid_targets_mean": 4071.8, + "valid_targets_min": 3058 + }, + { + "epoch": 5.103585657370518, + "grad_norm": 0.633736222964632, + "learning_rate": 8.311211628468477e-06, + "loss": 0.256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1808081418275833, + "step": 3205, + "valid_targets_mean": 4401.2, + "valid_targets_min": 3670 + }, + { + "epoch": 5.111553784860558, + "grad_norm": 0.6358160265795091, + "learning_rate": 8.24686497283481e-06, + "loss": 0.261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11627238988876343, + "step": 3210, + "valid_targets_mean": 3284.1, + "valid_targets_min": 661 + }, + { + "epoch": 5.1195219123505975, + "grad_norm": 0.6214276392825285, + "learning_rate": 8.182703619302044e-06, + "loss": 0.2704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15138496458530426, + "step": 3215, + "valid_targets_mean": 4645.5, + "valid_targets_min": 3657 + }, + { + "epoch": 5.127490039840637, + "grad_norm": 0.631697844135404, + "learning_rate": 8.118728579449937e-06, + "loss": 0.2675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14449623227119446, + "step": 3220, + "valid_targets_mean": 3653.4, + "valid_targets_min": 936 + }, + { + "epoch": 5.135458167330677, + "grad_norm": 0.5634288323882933, + "learning_rate": 8.054940861920797e-06, + "loss": 0.2519, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15146976709365845, + "step": 3225, + "valid_targets_mean": 5302.8, + "valid_targets_min": 2097 + }, + { + "epoch": 5.143426294820717, + "grad_norm": 0.615565070732101, + "learning_rate": 7.991341472403593e-06, + "loss": 0.2599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10391537845134735, + "step": 3230, + "valid_targets_mean": 3142.2, + "valid_targets_min": 911 + }, + { + "epoch": 5.151394422310757, + "grad_norm": 0.6303560007127735, + "learning_rate": 7.927931413618049e-06, + "loss": 0.2506, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13570864498615265, + "step": 3235, + "valid_targets_mean": 4128.4, + "valid_targets_min": 1738 + }, + { + "epoch": 5.159362549800797, + "grad_norm": 0.6195753953084229, + "learning_rate": 7.864711685298894e-06, + "loss": 0.2629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12660765647888184, + "step": 3240, + "valid_targets_mean": 3844.6, + "valid_targets_min": 1092 + }, + { + "epoch": 5.1673306772908365, + "grad_norm": 0.6023833932451614, + "learning_rate": 7.80168328418005e-06, + "loss": 0.2526, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1314094513654709, + "step": 3245, + "valid_targets_mean": 4262.2, + "valid_targets_min": 3061 + }, + { + "epoch": 5.175298804780876, + "grad_norm": 0.6369710896263531, + "learning_rate": 7.738847203978947e-06, + "loss": 0.2422, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14783096313476562, + "step": 3250, + "valid_targets_mean": 3945.1, + "valid_targets_min": 913 + }, + { + "epoch": 5.183266932270916, + "grad_norm": 0.6290959951478746, + "learning_rate": 7.676204435380858e-06, + "loss": 0.2598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13241353631019592, + "step": 3255, + "valid_targets_mean": 4444.0, + "valid_targets_min": 2618 + }, + { + "epoch": 5.191235059760956, + "grad_norm": 0.5855838407019288, + "learning_rate": 7.613755966023249e-06, + "loss": 0.2628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13430337607860565, + "step": 3260, + "valid_targets_mean": 3815.6, + "valid_targets_min": 661 + }, + { + "epoch": 5.199203187250996, + "grad_norm": 0.6121148914572989, + "learning_rate": 7.551502780480251e-06, + "loss": 0.253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14205516874790192, + "step": 3265, + "valid_targets_mean": 4030.8, + "valid_targets_min": 1027 + }, + { + "epoch": 5.207171314741036, + "grad_norm": 0.6127443563266214, + "learning_rate": 7.4894458602470886e-06, + "loss": 0.2529, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14408063888549805, + "step": 3270, + "valid_targets_mean": 4403.8, + "valid_targets_min": 453 + }, + { + "epoch": 5.2151394422310755, + "grad_norm": 0.5652079759756923, + "learning_rate": 7.427586183724662e-06, + "loss": 0.2668, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12766605615615845, + "step": 3275, + "valid_targets_mean": 4511.5, + "valid_targets_min": 278 + }, + { + "epoch": 5.223107569721115, + "grad_norm": 0.5260422761564271, + "learning_rate": 7.365924726204063e-06, + "loss": 0.2601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0982002541422844, + "step": 3280, + "valid_targets_mean": 3441.4, + "valid_targets_min": 1126 + }, + { + "epoch": 5.231075697211155, + "grad_norm": 0.6101008470360397, + "learning_rate": 7.3044624598512406e-06, + "loss": 0.256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12030519545078278, + "step": 3285, + "valid_targets_mean": 4636.6, + "valid_targets_min": 3069 + }, + { + "epoch": 5.239043824701195, + "grad_norm": 0.6293104823597632, + "learning_rate": 7.243200353691653e-06, + "loss": 0.2601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1115085631608963, + "step": 3290, + "valid_targets_mean": 3239.8, + "valid_targets_min": 885 + }, + { + "epoch": 5.247011952191235, + "grad_norm": 1.0769677052879854, + "learning_rate": 7.18213937359499e-06, + "loss": 0.2648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11736782640218735, + "step": 3295, + "valid_targets_mean": 4504.4, + "valid_targets_min": 862 + }, + { + "epoch": 5.254980079681275, + "grad_norm": 0.6204834440821602, + "learning_rate": 7.121280482259976e-06, + "loss": 0.2584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13409659266471863, + "step": 3300, + "valid_targets_mean": 3729.5, + "valid_targets_min": 529 + }, + { + "epoch": 5.2629482071713145, + "grad_norm": 0.6292985982003821, + "learning_rate": 7.060624639199138e-06, + "loss": 0.2588, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12875115871429443, + "step": 3305, + "valid_targets_mean": 3483.2, + "valid_targets_min": 1078 + }, + { + "epoch": 5.270916334661354, + "grad_norm": 0.5638927036364227, + "learning_rate": 7.000172800723715e-06, + "loss": 0.2562, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13111388683319092, + "step": 3310, + "valid_targets_mean": 3953.2, + "valid_targets_min": 1557 + }, + { + "epoch": 5.278884462151394, + "grad_norm": 0.5207585707244314, + "learning_rate": 6.939925919928585e-06, + "loss": 0.2541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11312989145517349, + "step": 3315, + "valid_targets_mean": 3930.0, + "valid_targets_min": 1127 + }, + { + "epoch": 5.286852589641434, + "grad_norm": 0.6308579300090001, + "learning_rate": 6.879884946677205e-06, + "loss": 0.2469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12059182673692703, + "step": 3320, + "valid_targets_mean": 3784.1, + "valid_targets_min": 1289 + }, + { + "epoch": 5.294820717131474, + "grad_norm": 0.7137019369582966, + "learning_rate": 6.8200508275866726e-06, + "loss": 0.262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13358832895755768, + "step": 3325, + "valid_targets_mean": 2928.1, + "valid_targets_min": 282 + }, + { + "epoch": 5.302788844621514, + "grad_norm": 0.7600953451448714, + "learning_rate": 6.76042450601277e-06, + "loss": 0.2589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.135623961687088, + "step": 3330, + "valid_targets_mean": 2927.5, + "valid_targets_min": 809 + }, + { + "epoch": 5.3107569721115535, + "grad_norm": 0.5930150460883772, + "learning_rate": 6.701006922035125e-06, + "loss": 0.261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1559372842311859, + "step": 3335, + "valid_targets_mean": 4291.8, + "valid_targets_min": 539 + }, + { + "epoch": 5.318725099601593, + "grad_norm": 0.6303787749793871, + "learning_rate": 6.641799012442349e-06, + "loss": 0.2547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11322662234306335, + "step": 3340, + "valid_targets_mean": 3000.9, + "valid_targets_min": 843 + }, + { + "epoch": 5.326693227091633, + "grad_norm": 0.6274733616364941, + "learning_rate": 6.582801710717291e-06, + "loss": 0.2505, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14976182579994202, + "step": 3345, + "valid_targets_mean": 4425.8, + "valid_targets_min": 3556 + }, + { + "epoch": 5.334661354581673, + "grad_norm": 0.6404124255568916, + "learning_rate": 6.524015947022333e-06, + "loss": 0.2572, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14457091689109802, + "step": 3350, + "valid_targets_mean": 3645.4, + "valid_targets_min": 473 + }, + { + "epoch": 5.342629482071713, + "grad_norm": 0.7056415948297033, + "learning_rate": 6.465442648184692e-06, + "loss": 0.2555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10674829035997391, + "step": 3355, + "valid_targets_mean": 4155.5, + "valid_targets_min": 589 + }, + { + "epoch": 5.350597609561753, + "grad_norm": 0.713178600135085, + "learning_rate": 6.4070827376818424e-06, + "loss": 0.2548, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14119845628738403, + "step": 3360, + "valid_targets_mean": 4175.0, + "valid_targets_min": 3516 + }, + { + "epoch": 5.3585657370517925, + "grad_norm": 0.5363588089502721, + "learning_rate": 6.348937135626922e-06, + "loss": 0.2566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13907405734062195, + "step": 3365, + "valid_targets_mean": 4721.6, + "valid_targets_min": 876 + }, + { + "epoch": 5.366533864541832, + "grad_norm": 0.584369676262613, + "learning_rate": 6.291006758754241e-06, + "loss": 0.2511, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14466825127601624, + "step": 3370, + "valid_targets_mean": 3820.5, + "valid_targets_min": 533 + }, + { + "epoch": 5.374501992031872, + "grad_norm": 0.5681324843263479, + "learning_rate": 6.233292520404852e-06, + "loss": 0.2492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12271925806999207, + "step": 3375, + "valid_targets_mean": 4781.9, + "valid_targets_min": 2814 + }, + { + "epoch": 5.382470119521912, + "grad_norm": 0.5777887543850055, + "learning_rate": 6.1757953305120975e-06, + "loss": 0.2552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12985867261886597, + "step": 3380, + "valid_targets_mean": 3952.9, + "valid_targets_min": 316 + }, + { + "epoch": 5.390438247011952, + "grad_norm": 0.5530887525834984, + "learning_rate": 6.118516095587321e-06, + "loss": 0.2519, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10672642290592194, + "step": 3385, + "valid_targets_mean": 4004.9, + "valid_targets_min": 2262 + }, + { + "epoch": 5.398406374501992, + "grad_norm": 0.6313478636714507, + "learning_rate": 6.0614557187055335e-06, + "loss": 0.2611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11622016131877899, + "step": 3390, + "valid_targets_mean": 3472.5, + "valid_targets_min": 385 + }, + { + "epoch": 5.4063745019920315, + "grad_norm": 0.61809329445396, + "learning_rate": 6.004615099491189e-06, + "loss": 0.2629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13471439480781555, + "step": 3395, + "valid_targets_mean": 3704.6, + "valid_targets_min": 636 + }, + { + "epoch": 5.414342629482071, + "grad_norm": 0.7352906677939539, + "learning_rate": 5.947995134103999e-06, + "loss": 0.2553, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08384540677070618, + "step": 3400, + "valid_targets_mean": 2533.5, + "valid_targets_min": 188 + }, + { + "epoch": 5.422310756972111, + "grad_norm": 0.618641096906809, + "learning_rate": 5.891596715224821e-06, + "loss": 0.2504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16095472872257233, + "step": 3405, + "valid_targets_mean": 4694.6, + "valid_targets_min": 2258 + }, + { + "epoch": 5.430278884462151, + "grad_norm": 0.6816089290330103, + "learning_rate": 5.835420732041557e-06, + "loss": 0.2537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1280200034379959, + "step": 3410, + "valid_targets_mean": 3855.4, + "valid_targets_min": 278 + }, + { + "epoch": 5.438247011952191, + "grad_norm": 0.6971808790211675, + "learning_rate": 5.779468070235139e-06, + "loss": 0.2587, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1313139945268631, + "step": 3415, + "valid_targets_mean": 3432.0, + "valid_targets_min": 864 + }, + { + "epoch": 5.446215139442231, + "grad_norm": 0.6032503774011648, + "learning_rate": 5.7237396119655995e-06, + "loss": 0.2632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13556715846061707, + "step": 3420, + "valid_targets_mean": 4342.9, + "valid_targets_min": 3353 + }, + { + "epoch": 5.4541832669322705, + "grad_norm": 0.6273513927469573, + "learning_rate": 5.668236235858109e-06, + "loss": 0.2558, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12618348002433777, + "step": 3425, + "valid_targets_mean": 2915.1, + "valid_targets_min": 603 + }, + { + "epoch": 5.46215139442231, + "grad_norm": 0.6004260835295273, + "learning_rate": 5.61295881698916e-06, + "loss": 0.2576, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13639703392982483, + "step": 3430, + "valid_targets_mean": 4461.4, + "valid_targets_min": 3533 + }, + { + "epoch": 5.47011952191235, + "grad_norm": 0.7645690026214497, + "learning_rate": 5.557908226872775e-06, + "loss": 0.2639, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12604597210884094, + "step": 3435, + "valid_targets_mean": 3385.5, + "valid_targets_min": 451 + }, + { + "epoch": 5.47808764940239, + "grad_norm": 0.6086838874219482, + "learning_rate": 5.503085333446727e-06, + "loss": 0.2468, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1220838874578476, + "step": 3440, + "valid_targets_mean": 3682.1, + "valid_targets_min": 1168 + }, + { + "epoch": 5.48605577689243, + "grad_norm": 0.606693439874937, + "learning_rate": 5.448491001058909e-06, + "loss": 0.2572, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14827896654605865, + "step": 3445, + "valid_targets_mean": 4212.6, + "valid_targets_min": 1011 + }, + { + "epoch": 5.49402390438247, + "grad_norm": 0.7063285262397214, + "learning_rate": 5.394126090453655e-06, + "loss": 0.2592, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1105622798204422, + "step": 3450, + "valid_targets_mean": 2606.2, + "valid_targets_min": 242 + }, + { + "epoch": 5.5019920318725095, + "grad_norm": 0.5777296608486836, + "learning_rate": 5.3399914587582e-06, + "loss": 0.2582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15368403494358063, + "step": 3455, + "valid_targets_mean": 5300.1, + "valid_targets_min": 3578 + }, + { + "epoch": 5.509960159362549, + "grad_norm": 0.6146095361868158, + "learning_rate": 5.286087959469168e-06, + "loss": 0.2649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14480501413345337, + "step": 3460, + "valid_targets_mean": 4302.0, + "valid_targets_min": 1635 + }, + { + "epoch": 5.517928286852589, + "grad_norm": 0.6012435923967533, + "learning_rate": 5.232416442439092e-06, + "loss": 0.2556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14101041853427887, + "step": 3465, + "valid_targets_mean": 4215.5, + "valid_targets_min": 2317 + }, + { + "epoch": 5.525896414342629, + "grad_norm": 0.5611584475868379, + "learning_rate": 5.178977753863048e-06, + "loss": 0.2533, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09993009269237518, + "step": 3470, + "valid_targets_mean": 3897.8, + "valid_targets_min": 206 + }, + { + "epoch": 5.533864541832669, + "grad_norm": 0.5563902771441435, + "learning_rate": 5.125772736265271e-06, + "loss": 0.2545, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13861241936683655, + "step": 3475, + "valid_targets_mean": 4227.2, + "valid_targets_min": 922 + }, + { + "epoch": 5.541832669322709, + "grad_norm": 0.8791159048380199, + "learning_rate": 5.072802228485925e-06, + "loss": 0.2543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13441188633441925, + "step": 3480, + "valid_targets_mean": 3450.5, + "valid_targets_min": 256 + }, + { + "epoch": 5.5498007968127485, + "grad_norm": 0.5434635127655687, + "learning_rate": 5.020067065667826e-06, + "loss": 0.2639, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1299361139535904, + "step": 3485, + "valid_targets_mean": 4664.4, + "valid_targets_min": 1080 + }, + { + "epoch": 5.557768924302788, + "grad_norm": 0.5898325853645024, + "learning_rate": 4.967568079243301e-06, + "loss": 0.262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12370647490024567, + "step": 3490, + "valid_targets_mean": 4088.5, + "valid_targets_min": 2095 + }, + { + "epoch": 5.565737051792829, + "grad_norm": 0.5996111126593591, + "learning_rate": 4.915306096921093e-06, + "loss": 0.2691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1425367295742035, + "step": 3495, + "valid_targets_mean": 3613.0, + "valid_targets_min": 539 + }, + { + "epoch": 5.573705179282869, + "grad_norm": 0.5839869424101981, + "learning_rate": 4.8632819426732705e-06, + "loss": 0.2521, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13066698610782623, + "step": 3500, + "valid_targets_mean": 3647.0, + "valid_targets_min": 579 + }, + { + "epoch": 5.581673306772909, + "grad_norm": 0.6287370374618291, + "learning_rate": 4.811496436722285e-06, + "loss": 0.2618, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10785184800624847, + "step": 3505, + "valid_targets_mean": 2965.1, + "valid_targets_min": 245 + }, + { + "epoch": 5.589641434262949, + "grad_norm": 0.5892107222191396, + "learning_rate": 4.7599503955279945e-06, + "loss": 0.2454, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13893276453018188, + "step": 3510, + "valid_targets_mean": 4028.8, + "valid_targets_min": 3131 + }, + { + "epoch": 5.597609561752988, + "grad_norm": 0.6003581198787067, + "learning_rate": 4.708644631774819e-06, + "loss": 0.255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1262696385383606, + "step": 3515, + "valid_targets_mean": 3744.4, + "valid_targets_min": 1134 + }, + { + "epoch": 5.605577689243028, + "grad_norm": 0.5806890541188977, + "learning_rate": 4.657579954358924e-06, + "loss": 0.2599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15570898354053497, + "step": 3520, + "valid_targets_mean": 4482.2, + "valid_targets_min": 705 + }, + { + "epoch": 5.613545816733068, + "grad_norm": 0.6051899531594201, + "learning_rate": 4.606757168375451e-06, + "loss": 0.2564, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12347070872783661, + "step": 3525, + "valid_targets_mean": 3937.1, + "valid_targets_min": 780 + }, + { + "epoch": 5.621513944223108, + "grad_norm": 0.6648354607135956, + "learning_rate": 4.556177075105857e-06, + "loss": 0.2587, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10443254560232162, + "step": 3530, + "valid_targets_mean": 2838.1, + "valid_targets_min": 242 + }, + { + "epoch": 5.629482071713148, + "grad_norm": 0.8188241704628644, + "learning_rate": 4.505840472005236e-06, + "loss": 0.2525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11551107466220856, + "step": 3535, + "valid_targets_mean": 3775.8, + "valid_targets_min": 1049 + }, + { + "epoch": 5.637450199203188, + "grad_norm": 0.6174445040965998, + "learning_rate": 4.455748152689796e-06, + "loss": 0.2712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1448177695274353, + "step": 3540, + "valid_targets_mean": 4228.5, + "valid_targets_min": 486 + }, + { + "epoch": 5.645418326693227, + "grad_norm": 0.582006297269565, + "learning_rate": 4.405900906924303e-06, + "loss": 0.2592, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11946921050548553, + "step": 3545, + "valid_targets_mean": 3386.0, + "valid_targets_min": 232 + }, + { + "epoch": 5.653386454183267, + "grad_norm": 0.6235145696587125, + "learning_rate": 4.35629952060965e-06, + "loss": 0.2576, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11263646185398102, + "step": 3550, + "valid_targets_mean": 3223.4, + "valid_targets_min": 755 + }, + { + "epoch": 5.661354581673307, + "grad_norm": 0.7206037687758406, + "learning_rate": 4.306944775770479e-06, + "loss": 0.2576, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09978772699832916, + "step": 3555, + "valid_targets_mean": 2395.9, + "valid_targets_min": 273 + }, + { + "epoch": 5.669322709163347, + "grad_norm": 0.6638192351502472, + "learning_rate": 4.2578374505428145e-06, + "loss": 0.2589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14399829506874084, + "step": 3560, + "valid_targets_mean": 2907.1, + "valid_targets_min": 268 + }, + { + "epoch": 5.677290836653387, + "grad_norm": 0.5882996503712633, + "learning_rate": 4.208978319161843e-06, + "loss": 0.2556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12330938130617142, + "step": 3565, + "valid_targets_mean": 3894.4, + "valid_targets_min": 555 + }, + { + "epoch": 5.685258964143427, + "grad_norm": 0.6543040309553202, + "learning_rate": 4.160368151949659e-06, + "loss": 0.2572, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1259610503911972, + "step": 3570, + "valid_targets_mean": 3302.6, + "valid_targets_min": 589 + }, + { + "epoch": 5.693227091633466, + "grad_norm": 0.6143364204199971, + "learning_rate": 4.112007715303148e-06, + "loss": 0.2632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09602335095405579, + "step": 3575, + "valid_targets_mean": 2796.8, + "valid_targets_min": 273 + }, + { + "epoch": 5.701195219123506, + "grad_norm": 0.6206784374022006, + "learning_rate": 4.0638977716819105e-06, + "loss": 0.2552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13594505190849304, + "step": 3580, + "valid_targets_mean": 3717.2, + "valid_targets_min": 1244 + }, + { + "epoch": 5.709163346613546, + "grad_norm": 0.5615628549123262, + "learning_rate": 4.016039079596204e-06, + "loss": 0.2442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10886578261852264, + "step": 3585, + "valid_targets_mean": 3321.4, + "valid_targets_min": 1020 + }, + { + "epoch": 5.717131474103586, + "grad_norm": 0.5662573225062353, + "learning_rate": 3.968432393595034e-06, + "loss": 0.2607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13004595041275024, + "step": 3590, + "valid_targets_mean": 4096.1, + "valid_targets_min": 328 + }, + { + "epoch": 5.725099601593626, + "grad_norm": 0.5837892365616794, + "learning_rate": 3.921078464254204e-06, + "loss": 0.2651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13288232684135437, + "step": 3595, + "valid_targets_mean": 4558.6, + "valid_targets_min": 3170 + }, + { + "epoch": 5.733067729083666, + "grad_norm": 0.6089077319301412, + "learning_rate": 3.873978038164537e-06, + "loss": 0.2568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09915770590305328, + "step": 3600, + "valid_targets_mean": 3429.9, + "valid_targets_min": 200 + }, + { + "epoch": 5.741035856573705, + "grad_norm": 0.6917121360743607, + "learning_rate": 3.8271318579200565e-06, + "loss": 0.2501, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1068388819694519, + "step": 3605, + "valid_targets_mean": 2564.9, + "valid_targets_min": 246 + }, + { + "epoch": 5.749003984063745, + "grad_norm": 0.6843480696806137, + "learning_rate": 3.780540662106302e-06, + "loss": 0.2651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14033851027488708, + "step": 3610, + "valid_targets_mean": 3091.6, + "valid_targets_min": 254 + }, + { + "epoch": 5.756972111553785, + "grad_norm": 0.6003046764796326, + "learning_rate": 3.734205185288693e-06, + "loss": 0.2631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11629563570022583, + "step": 3615, + "valid_targets_mean": 3680.1, + "valid_targets_min": 228 + }, + { + "epoch": 5.764940239043825, + "grad_norm": 0.6694111442962223, + "learning_rate": 3.6881261580009242e-06, + "loss": 0.2483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1048082709312439, + "step": 3620, + "valid_targets_mean": 2768.4, + "valid_targets_min": 555 + }, + { + "epoch": 5.772908366533865, + "grad_norm": 0.6953453592075824, + "learning_rate": 3.642304306733464e-06, + "loss": 0.271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11646781116724014, + "step": 3625, + "valid_targets_mean": 3290.4, + "valid_targets_min": 456 + }, + { + "epoch": 5.780876494023905, + "grad_norm": 0.6599195373597444, + "learning_rate": 3.596740353922088e-06, + "loss": 0.2613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16503630578517914, + "step": 3630, + "valid_targets_mean": 4059.8, + "valid_targets_min": 517 + }, + { + "epoch": 5.788844621513944, + "grad_norm": 0.6682518386497261, + "learning_rate": 3.5514350179365176e-06, + "loss": 0.2499, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10960632562637329, + "step": 3635, + "valid_targets_mean": 3265.4, + "valid_targets_min": 258 + }, + { + "epoch": 5.796812749003984, + "grad_norm": 0.601595786597232, + "learning_rate": 3.5063890130690513e-06, + "loss": 0.2551, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15612296760082245, + "step": 3640, + "valid_targets_mean": 4444.1, + "valid_targets_min": 3802 + }, + { + "epoch": 5.804780876494024, + "grad_norm": 0.6496112640493846, + "learning_rate": 3.461603049523334e-06, + "loss": 0.2455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10309451818466187, + "step": 3645, + "valid_targets_mean": 3254.8, + "valid_targets_min": 262 + }, + { + "epoch": 5.812749003984064, + "grad_norm": 0.6288915508343855, + "learning_rate": 3.4170778334031595e-06, + "loss": 0.2585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16654735803604126, + "step": 3650, + "valid_targets_mean": 3907.5, + "valid_targets_min": 847 + }, + { + "epoch": 5.820717131474104, + "grad_norm": 0.6961504839578929, + "learning_rate": 3.3728140667013155e-06, + "loss": 0.2667, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13372699916362762, + "step": 3655, + "valid_targets_mean": 3155.8, + "valid_targets_min": 363 + }, + { + "epoch": 5.828685258964144, + "grad_norm": 0.6223010800317257, + "learning_rate": 3.3288124472885318e-06, + "loss": 0.2593, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15059836208820343, + "step": 3660, + "valid_targets_mean": 3522.4, + "valid_targets_min": 809 + }, + { + "epoch": 5.836653386454183, + "grad_norm": 0.6939497712021372, + "learning_rate": 3.2850736689024877e-06, + "loss": 0.2554, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1575852632522583, + "step": 3665, + "valid_targets_mean": 3594.9, + "valid_targets_min": 522 + }, + { + "epoch": 5.844621513944223, + "grad_norm": 0.5940310626609997, + "learning_rate": 3.2415984211368446e-06, + "loss": 0.2603, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11697559058666229, + "step": 3670, + "valid_targets_mean": 3811.5, + "valid_targets_min": 274 + }, + { + "epoch": 5.852589641434263, + "grad_norm": 0.6140475487399377, + "learning_rate": 3.1983873894304105e-06, + "loss": 0.2491, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1458444744348526, + "step": 3675, + "valid_targets_mean": 4887.2, + "valid_targets_min": 3166 + }, + { + "epoch": 5.860557768924303, + "grad_norm": 0.6879843367074118, + "learning_rate": 3.1554412550562952e-06, + "loss": 0.255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1462876796722412, + "step": 3680, + "valid_targets_mean": 4436.4, + "valid_targets_min": 3259 + }, + { + "epoch": 5.868525896414343, + "grad_norm": 0.5953728261449194, + "learning_rate": 3.1127606951112056e-06, + "loss": 0.2602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16630247235298157, + "step": 3685, + "valid_targets_mean": 4529.9, + "valid_targets_min": 722 + }, + { + "epoch": 5.876494023904383, + "grad_norm": 0.5944339968513348, + "learning_rate": 3.070346382504743e-06, + "loss": 0.2549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15230220556259155, + "step": 3690, + "valid_targets_mean": 4477.9, + "valid_targets_min": 3111 + }, + { + "epoch": 5.884462151394422, + "grad_norm": 0.5846695073715269, + "learning_rate": 3.028198985948796e-06, + "loss": 0.2671, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12972813844680786, + "step": 3695, + "valid_targets_mean": 4184.8, + "valid_targets_min": 3228 + }, + { + "epoch": 5.892430278884462, + "grad_norm": 0.86917738581549, + "learning_rate": 2.9863191699470295e-06, + "loss": 0.2644, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11308373510837555, + "step": 3700, + "valid_targets_mean": 2976.0, + "valid_targets_min": 308 + }, + { + "epoch": 5.900398406374502, + "grad_norm": 0.5774890125931904, + "learning_rate": 2.9447075947843573e-06, + "loss": 0.2505, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11549004912376404, + "step": 3705, + "valid_targets_mean": 4005.8, + "valid_targets_min": 277 + }, + { + "epoch": 5.908366533864542, + "grad_norm": 0.5740786078772951, + "learning_rate": 2.9033649165165802e-06, + "loss": 0.2571, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13311205804347992, + "step": 3710, + "valid_targets_mean": 3929.8, + "valid_targets_min": 765 + }, + { + "epoch": 5.916334661354582, + "grad_norm": 0.638209126073348, + "learning_rate": 2.8622917869600053e-06, + "loss": 0.2475, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13181473314762115, + "step": 3715, + "valid_targets_mean": 3157.6, + "valid_targets_min": 366 + }, + { + "epoch": 5.924302788844622, + "grad_norm": 0.662891750164014, + "learning_rate": 2.821488853681187e-06, + "loss": 0.2657, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13231581449508667, + "step": 3720, + "valid_targets_mean": 3815.1, + "valid_targets_min": 465 + }, + { + "epoch": 5.932270916334661, + "grad_norm": 0.6707296283611003, + "learning_rate": 2.7809567599867304e-06, + "loss": 0.2551, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10229024291038513, + "step": 3725, + "valid_targets_mean": 2831.9, + "valid_targets_min": 246 + }, + { + "epoch": 5.940239043824701, + "grad_norm": 0.6609159161446633, + "learning_rate": 2.7406961449131153e-06, + "loss": 0.2541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16223978996276855, + "step": 3730, + "valid_targets_mean": 4233.6, + "valid_targets_min": 1647 + }, + { + "epoch": 5.948207171314741, + "grad_norm": 0.6070202128566694, + "learning_rate": 2.7007076432166402e-06, + "loss": 0.2638, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10488445311784744, + "step": 3735, + "valid_targets_mean": 3504.9, + "valid_targets_min": 617 + }, + { + "epoch": 5.956175298804781, + "grad_norm": 0.5693033009319906, + "learning_rate": 2.660991885363433e-06, + "loss": 0.2546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12675464153289795, + "step": 3740, + "valid_targets_mean": 4333.9, + "valid_targets_min": 3605 + }, + { + "epoch": 5.964143426294821, + "grad_norm": 0.6344388659710755, + "learning_rate": 2.621549497519471e-06, + "loss": 0.2667, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12354440987110138, + "step": 3745, + "valid_targets_mean": 4249.4, + "valid_targets_min": 1259 + }, + { + "epoch": 5.972111553784861, + "grad_norm": 0.6153848272313728, + "learning_rate": 2.5823811015407386e-06, + "loss": 0.249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10808524489402771, + "step": 3750, + "valid_targets_mean": 3444.8, + "valid_targets_min": 273 + }, + { + "epoch": 5.9800796812749, + "grad_norm": 0.6100355257087904, + "learning_rate": 2.5434873149634045e-06, + "loss": 0.2509, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09660793840885162, + "step": 3755, + "valid_targets_mean": 2964.8, + "valid_targets_min": 306 + }, + { + "epoch": 5.98804780876494, + "grad_norm": 0.609935219988965, + "learning_rate": 2.5048687509941163e-06, + "loss": 0.25, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1303994059562683, + "step": 3760, + "valid_targets_mean": 3852.6, + "valid_targets_min": 522 + }, + { + "epoch": 5.99601593625498, + "grad_norm": 0.6089231262522933, + "learning_rate": 2.4665260185002815e-06, + "loss": 0.2559, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12429457902908325, + "step": 3765, + "valid_targets_mean": 3790.1, + "valid_targets_min": 1014 + }, + { + "epoch": 6.003187250996016, + "grad_norm": 0.6432421277276235, + "learning_rate": 2.428459722000529e-06, + "loss": 0.2497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14490444958209991, + "step": 3770, + "valid_targets_mean": 4511.8, + "valid_targets_min": 3330 + }, + { + "epoch": 6.011155378486055, + "grad_norm": 0.5751694281008249, + "learning_rate": 2.39067046165512e-06, + "loss": 0.255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12868493795394897, + "step": 3775, + "valid_targets_mean": 3961.5, + "valid_targets_min": 2894 + }, + { + "epoch": 6.019123505976095, + "grad_norm": 0.7067008056866969, + "learning_rate": 2.3531588332565238e-06, + "loss": 0.2706, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15841418504714966, + "step": 3780, + "valid_targets_mean": 3148.9, + "valid_targets_min": 719 + }, + { + "epoch": 6.027091633466135, + "grad_norm": 0.6668337546814035, + "learning_rate": 2.3159254282200207e-06, + "loss": 0.2473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10749612748622894, + "step": 3785, + "valid_targets_mean": 2676.1, + "valid_targets_min": 324 + }, + { + "epoch": 6.035059760956175, + "grad_norm": 0.6040929869770774, + "learning_rate": 2.27897083357435e-06, + "loss": 0.2451, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12229761481285095, + "step": 3790, + "valid_targets_mean": 3779.4, + "valid_targets_min": 1282 + }, + { + "epoch": 6.043027888446215, + "grad_norm": 0.5702540586454504, + "learning_rate": 2.242295631952496e-06, + "loss": 0.2462, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14322780072689056, + "step": 3795, + "valid_targets_mean": 4631.1, + "valid_targets_min": 4027 + }, + { + "epoch": 6.050996015936255, + "grad_norm": 0.6638784558122266, + "learning_rate": 2.205900401582466e-06, + "loss": 0.2422, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1431213617324829, + "step": 3800, + "valid_targets_mean": 3195.1, + "valid_targets_min": 447 + }, + { + "epoch": 6.058964143426294, + "grad_norm": 0.6371751394503381, + "learning_rate": 2.169785716278199e-06, + "loss": 0.2608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12928101420402527, + "step": 3805, + "valid_targets_mean": 3510.6, + "valid_targets_min": 1292 + }, + { + "epoch": 6.066932270916334, + "grad_norm": 0.605029891992074, + "learning_rate": 2.133952145430502e-06, + "loss": 0.2531, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12399262934923172, + "step": 3810, + "valid_targets_mean": 3561.8, + "valid_targets_min": 817 + }, + { + "epoch": 6.074900398406374, + "grad_norm": 0.6587525854023784, + "learning_rate": 2.0984002539980785e-06, + "loss": 0.2555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15116888284683228, + "step": 3815, + "valid_targets_mean": 4431.6, + "valid_targets_min": 3484 + }, + { + "epoch": 6.082868525896414, + "grad_norm": 0.5828610306338085, + "learning_rate": 2.0631306024986284e-06, + "loss": 0.253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10188622772693634, + "step": 3820, + "valid_targets_mean": 3481.1, + "valid_targets_min": 251 + }, + { + "epoch": 6.090836653386454, + "grad_norm": 0.6010393546690598, + "learning_rate": 2.0281437469999976e-06, + "loss": 0.251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12114863097667694, + "step": 3825, + "valid_targets_mean": 4196.4, + "valid_targets_min": 2129 + }, + { + "epoch": 6.098804780876494, + "grad_norm": 0.5702706809060334, + "learning_rate": 1.9934402391114283e-06, + "loss": 0.2492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14958012104034424, + "step": 3830, + "valid_targets_mean": 5032.5, + "valid_targets_min": 3602 + }, + { + "epoch": 6.106772908366533, + "grad_norm": 0.6537613374201687, + "learning_rate": 1.9590206259748413e-06, + "loss": 0.2474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14057272672653198, + "step": 3835, + "valid_targets_mean": 3877.0, + "valid_targets_min": 935 + }, + { + "epoch": 6.114741035856574, + "grad_norm": 0.585760262716983, + "learning_rate": 1.924885450256222e-06, + "loss": 0.2401, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10234951972961426, + "step": 3840, + "valid_targets_mean": 3211.5, + "valid_targets_min": 583 + }, + { + "epoch": 6.122709163346614, + "grad_norm": 0.5976124419885895, + "learning_rate": 1.8910352501370677e-06, + "loss": 0.2419, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11288703978061676, + "step": 3845, + "valid_targets_mean": 3741.0, + "valid_targets_min": 460 + }, + { + "epoch": 6.130677290836654, + "grad_norm": 0.6288849321286447, + "learning_rate": 1.8574705593058962e-06, + "loss": 0.2562, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1731109917163849, + "step": 3850, + "valid_targets_mean": 4558.8, + "valid_targets_min": 3570 + }, + { + "epoch": 6.138645418326694, + "grad_norm": 0.6929670892787564, + "learning_rate": 1.8241919069498281e-06, + "loss": 0.2498, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12311825156211853, + "step": 3855, + "valid_targets_mean": 4152.5, + "valid_targets_min": 1475 + }, + { + "epoch": 6.1466135458167335, + "grad_norm": 0.6693335839104627, + "learning_rate": 1.7911998177462563e-06, + "loss": 0.2533, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1238730400800705, + "step": 3860, + "valid_targets_mean": 3705.8, + "valid_targets_min": 650 + }, + { + "epoch": 6.154581673306773, + "grad_norm": 0.5686511959559429, + "learning_rate": 1.758494811854554e-06, + "loss": 0.2446, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14562711119651794, + "step": 3865, + "valid_targets_mean": 4748.6, + "valid_targets_min": 3413 + }, + { + "epoch": 6.162549800796813, + "grad_norm": 0.6040324036331258, + "learning_rate": 1.7260774049079044e-06, + "loss": 0.2484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16318969428539276, + "step": 3870, + "valid_targets_mean": 4559.2, + "valid_targets_min": 3556 + }, + { + "epoch": 6.170517928286853, + "grad_norm": 0.6111187412398503, + "learning_rate": 1.6939481080051324e-06, + "loss": 0.2456, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12175963819026947, + "step": 3875, + "valid_targets_mean": 4280.0, + "valid_targets_min": 2199 + }, + { + "epoch": 6.178486055776893, + "grad_norm": 0.607615016751425, + "learning_rate": 1.6621074277026838e-06, + "loss": 0.2456, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11982540041208267, + "step": 3880, + "valid_targets_mean": 3989.1, + "valid_targets_min": 884 + }, + { + "epoch": 6.186454183266933, + "grad_norm": 0.6190062881915447, + "learning_rate": 1.630555866006609e-06, + "loss": 0.2487, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12791648507118225, + "step": 3885, + "valid_targets_mean": 3625.5, + "valid_targets_min": 692 + }, + { + "epoch": 6.1944223107569725, + "grad_norm": 0.6544343561676963, + "learning_rate": 1.5992939203646706e-06, + "loss": 0.2576, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11502386629581451, + "step": 3890, + "valid_targets_mean": 3540.8, + "valid_targets_min": 223 + }, + { + "epoch": 6.202390438247012, + "grad_norm": 0.6488734360036593, + "learning_rate": 1.5683220836584822e-06, + "loss": 0.2543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14629307389259338, + "step": 3895, + "valid_targets_mean": 3591.6, + "valid_targets_min": 263 + }, + { + "epoch": 6.210358565737052, + "grad_norm": 0.6076624199204144, + "learning_rate": 1.5376408441957513e-06, + "loss": 0.2511, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11878538131713867, + "step": 3900, + "valid_targets_mean": 3426.6, + "valid_targets_min": 1292 + }, + { + "epoch": 6.218326693227092, + "grad_norm": 0.5920608700590033, + "learning_rate": 1.5072506857025748e-06, + "loss": 0.244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10741353780031204, + "step": 3905, + "valid_targets_mean": 4125.2, + "valid_targets_min": 1248 + }, + { + "epoch": 6.226294820717132, + "grad_norm": 0.5830741097908088, + "learning_rate": 1.4771520873158118e-06, + "loss": 0.2624, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13355758786201477, + "step": 3910, + "valid_targets_mean": 4088.6, + "valid_targets_min": 3169 + }, + { + "epoch": 6.234262948207172, + "grad_norm": 0.7774677063961954, + "learning_rate": 1.4473455235755385e-06, + "loss": 0.2428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10745563358068466, + "step": 3915, + "valid_targets_mean": 3174.9, + "valid_targets_min": 545 + }, + { + "epoch": 6.2422310756972115, + "grad_norm": 0.6414339471264867, + "learning_rate": 1.417831464417545e-06, + "loss": 0.2493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10562710464000702, + "step": 3920, + "valid_targets_mean": 3753.1, + "valid_targets_min": 2195 + }, + { + "epoch": 6.250199203187251, + "grad_norm": 0.6040908897361035, + "learning_rate": 1.3886103751659462e-06, + "loss": 0.2516, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11933998763561249, + "step": 3925, + "valid_targets_mean": 3794.1, + "valid_targets_min": 243 + }, + { + "epoch": 6.258167330677291, + "grad_norm": 0.5888051951119573, + "learning_rate": 1.3596827165258474e-06, + "loss": 0.239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1309901475906372, + "step": 3930, + "valid_targets_mean": 4263.9, + "valid_targets_min": 2155 + }, + { + "epoch": 6.266135458167331, + "grad_norm": 0.581996929840516, + "learning_rate": 1.331048944576061e-06, + "loss": 0.2531, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14565280079841614, + "step": 3935, + "valid_targets_mean": 4253.8, + "valid_targets_min": 949 + }, + { + "epoch": 6.274103585657371, + "grad_norm": 0.5876836628531714, + "learning_rate": 1.3027095107619347e-06, + "loss": 0.2499, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09295883029699326, + "step": 3940, + "valid_targets_mean": 3626.9, + "valid_targets_min": 706 + }, + { + "epoch": 6.282071713147411, + "grad_norm": 0.6080383151496606, + "learning_rate": 1.2746648618882197e-06, + "loss": 0.2528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11289983987808228, + "step": 3945, + "valid_targets_mean": 3987.1, + "valid_targets_min": 2348 + }, + { + "epoch": 6.2900398406374505, + "grad_norm": 0.5774994539577519, + "learning_rate": 1.2469154401120442e-06, + "loss": 0.245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12663111090660095, + "step": 3950, + "valid_targets_mean": 3853.5, + "valid_targets_min": 2981 + }, + { + "epoch": 6.29800796812749, + "grad_norm": 0.7210516288858115, + "learning_rate": 1.2194616829359206e-06, + "loss": 0.2494, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11826236546039581, + "step": 3955, + "valid_targets_mean": 3350.1, + "valid_targets_min": 810 + }, + { + "epoch": 6.30597609561753, + "grad_norm": 0.7225175533232717, + "learning_rate": 1.1923040232008653e-06, + "loss": 0.2722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14951607584953308, + "step": 3960, + "valid_targets_mean": 3520.2, + "valid_targets_min": 278 + }, + { + "epoch": 6.31394422310757, + "grad_norm": 0.7291038894240529, + "learning_rate": 1.1654428890795622e-06, + "loss": 0.2485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13940054178237915, + "step": 3965, + "valid_targets_mean": 3456.6, + "valid_targets_min": 1023 + }, + { + "epoch": 6.32191235059761, + "grad_norm": 0.5929982172160873, + "learning_rate": 1.1388787040696215e-06, + "loss": 0.2598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11806536465883255, + "step": 3970, + "valid_targets_mean": 3589.1, + "valid_targets_min": 291 + }, + { + "epoch": 6.32988047808765, + "grad_norm": 0.7037752359022759, + "learning_rate": 1.1126118869868985e-06, + "loss": 0.2441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12819892168045044, + "step": 3975, + "valid_targets_mean": 3012.1, + "valid_targets_min": 857 + }, + { + "epoch": 6.3378486055776895, + "grad_norm": 0.6620313014408482, + "learning_rate": 1.0866428519588923e-06, + "loss": 0.2723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13882631063461304, + "step": 3980, + "valid_targets_mean": 4492.8, + "valid_targets_min": 2264 + }, + { + "epoch": 6.345816733067729, + "grad_norm": 0.6380382089651276, + "learning_rate": 1.060972008418204e-06, + "loss": 0.2485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1222672387957573, + "step": 3985, + "valid_targets_mean": 3581.2, + "valid_targets_min": 255 + }, + { + "epoch": 6.353784860557769, + "grad_norm": 0.6081573976490425, + "learning_rate": 1.0355997610961132e-06, + "loss": 0.2592, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1143135279417038, + "step": 3990, + "valid_targets_mean": 3371.2, + "valid_targets_min": 282 + }, + { + "epoch": 6.361752988047809, + "grad_norm": 0.6223924820943934, + "learning_rate": 1.0105265100161564e-06, + "loss": 0.2562, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13434356451034546, + "step": 3995, + "valid_targets_mean": 3690.0, + "valid_targets_min": 922 + }, + { + "epoch": 6.369721115537849, + "grad_norm": 0.8007739791330828, + "learning_rate": 9.857526504878524e-07, + "loss": 0.2489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10666193068027496, + "step": 4000, + "valid_targets_mean": 3907.4, + "valid_targets_min": 740 + }, + { + "epoch": 6.377689243027889, + "grad_norm": 0.5828222085983297, + "learning_rate": 9.612785731004525e-07, + "loss": 0.259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1447790563106537, + "step": 4005, + "valid_targets_mean": 4335.6, + "valid_targets_min": 610 + }, + { + "epoch": 6.3856573705179285, + "grad_norm": 0.6612189894555205, + "learning_rate": 9.371046637167835e-07, + "loss": 0.2471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12845785915851593, + "step": 4010, + "valid_targets_mean": 3381.4, + "valid_targets_min": 491 + }, + { + "epoch": 6.393625498007968, + "grad_norm": 0.6542651258710315, + "learning_rate": 9.132313034671792e-07, + "loss": 0.2552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1381814181804657, + "step": 4015, + "valid_targets_mean": 3207.6, + "valid_targets_min": 673 + }, + { + "epoch": 6.401593625498008, + "grad_norm": 0.6328798463695975, + "learning_rate": 8.89658868743446e-07, + "loss": 0.2447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1083463802933693, + "step": 4020, + "valid_targets_mean": 3546.5, + "valid_targets_min": 425 + }, + { + "epoch": 6.409561752988048, + "grad_norm": 0.6334408197404385, + "learning_rate": 8.663877311929569e-07, + "loss": 0.249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11294161528348923, + "step": 4025, + "valid_targets_mean": 3244.4, + "valid_targets_min": 1028 + }, + { + "epoch": 6.417529880478088, + "grad_norm": 0.6057134984464468, + "learning_rate": 8.43418257712767e-07, + "loss": 0.2602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10949505865573883, + "step": 4030, + "valid_targets_mean": 3993.9, + "valid_targets_min": 811 + }, + { + "epoch": 6.425498007968128, + "grad_norm": 0.5778435806875283, + "learning_rate": 8.207508104438467e-07, + "loss": 0.2581, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10580522567033768, + "step": 4035, + "valid_targets_mean": 4066.9, + "valid_targets_min": 1115 + }, + { + "epoch": 6.4334661354581675, + "grad_norm": 0.5429928605370324, + "learning_rate": 7.983857467653599e-07, + "loss": 0.2379, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11019369959831238, + "step": 4040, + "valid_targets_mean": 4641.2, + "valid_targets_min": 3660 + }, + { + "epoch": 6.441434262948207, + "grad_norm": 0.6487210534399938, + "learning_rate": 7.763234192890378e-07, + "loss": 0.2443, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11472362279891968, + "step": 4045, + "valid_targets_mean": 3347.8, + "valid_targets_min": 259 + }, + { + "epoch": 6.449402390438247, + "grad_norm": 0.7047344926611184, + "learning_rate": 7.545641758536204e-07, + "loss": 0.2472, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12119234353303909, + "step": 4050, + "valid_targets_mean": 3398.9, + "valid_targets_min": 333 + }, + { + "epoch": 6.457370517928287, + "grad_norm": 0.6369030055079338, + "learning_rate": 7.331083595193566e-07, + "loss": 0.2498, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11330699920654297, + "step": 4055, + "valid_targets_mean": 3417.4, + "valid_targets_min": 273 + }, + { + "epoch": 6.465338645418327, + "grad_norm": 0.6845066985366877, + "learning_rate": 7.119563085626246e-07, + "loss": 0.2509, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16965419054031372, + "step": 4060, + "valid_targets_mean": 3622.8, + "valid_targets_min": 207 + }, + { + "epoch": 6.473306772908367, + "grad_norm": 0.5882207570553777, + "learning_rate": 6.911083564705689e-07, + "loss": 0.2526, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13698944449424744, + "step": 4065, + "valid_targets_mean": 4761.4, + "valid_targets_min": 4057 + }, + { + "epoch": 6.4812749003984065, + "grad_norm": 0.6554874702398878, + "learning_rate": 6.70564831935856e-07, + "loss": 0.2428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11183053255081177, + "step": 4070, + "valid_targets_mean": 2970.2, + "valid_targets_min": 721 + }, + { + "epoch": 6.489243027888446, + "grad_norm": 0.6234303433193248, + "learning_rate": 6.503260588514959e-07, + "loss": 0.2542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13360071182250977, + "step": 4075, + "valid_targets_mean": 3698.2, + "valid_targets_min": 1159 + }, + { + "epoch": 6.497211155378486, + "grad_norm": 0.5855960507592379, + "learning_rate": 6.303923563057291e-07, + "loss": 0.2563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13242430984973907, + "step": 4080, + "valid_targets_mean": 4189.8, + "valid_targets_min": 3117 + }, + { + "epoch": 6.505179282868526, + "grad_norm": 0.6322204475332664, + "learning_rate": 6.107640385769964e-07, + "loss": 0.2388, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14184370636940002, + "step": 4085, + "valid_targets_mean": 4107.6, + "valid_targets_min": 576 + }, + { + "epoch": 6.513147410358566, + "grad_norm": 1.0485414149705368, + "learning_rate": 5.91441415128986e-07, + "loss": 0.2508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12613211572170258, + "step": 4090, + "valid_targets_mean": 4314.0, + "valid_targets_min": 2611 + }, + { + "epoch": 6.521115537848606, + "grad_norm": 0.7762269781620317, + "learning_rate": 5.724247906057545e-07, + "loss": 0.2541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12296421825885773, + "step": 4095, + "valid_targets_mean": 3463.9, + "valid_targets_min": 517 + }, + { + "epoch": 6.5290836653386455, + "grad_norm": 0.6006370469850979, + "learning_rate": 5.537144648269221e-07, + "loss": 0.251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11015652865171432, + "step": 4100, + "valid_targets_mean": 3809.4, + "valid_targets_min": 1016 + }, + { + "epoch": 6.537051792828685, + "grad_norm": 0.5860933713134772, + "learning_rate": 5.35310732782941e-07, + "loss": 0.2517, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11739160865545273, + "step": 4105, + "valid_targets_mean": 4038.5, + "valid_targets_min": 2302 + }, + { + "epoch": 6.545019920318725, + "grad_norm": 0.6604373964511138, + "learning_rate": 5.172138846304609e-07, + "loss": 0.2613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12965252995491028, + "step": 4110, + "valid_targets_mean": 3493.2, + "valid_targets_min": 636 + }, + { + "epoch": 6.552988047808765, + "grad_norm": 0.6209094419618799, + "learning_rate": 4.994242056877352e-07, + "loss": 0.2488, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1375695765018463, + "step": 4115, + "valid_targets_mean": 3564.8, + "valid_targets_min": 248 + }, + { + "epoch": 6.560956175298805, + "grad_norm": 0.5516051600593797, + "learning_rate": 4.819419764301314e-07, + "loss": 0.2596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16178067028522491, + "step": 4120, + "valid_targets_mean": 4802.2, + "valid_targets_min": 3542 + }, + { + "epoch": 6.568924302788845, + "grad_norm": 0.7135404766623891, + "learning_rate": 4.647674724857143e-07, + "loss": 0.2435, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11924779415130615, + "step": 4125, + "valid_targets_mean": 3350.8, + "valid_targets_min": 896 + }, + { + "epoch": 6.5768924302788845, + "grad_norm": 0.5815975747573743, + "learning_rate": 4.4790096463088293e-07, + "loss": 0.2508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1157185435295105, + "step": 4130, + "valid_targets_mean": 3725.5, + "valid_targets_min": 567 + }, + { + "epoch": 6.584860557768924, + "grad_norm": 0.6273070480646679, + "learning_rate": 4.313427187861252e-07, + "loss": 0.2498, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14831429719924927, + "step": 4135, + "valid_targets_mean": 4227.1, + "valid_targets_min": 2552 + }, + { + "epoch": 6.592828685258964, + "grad_norm": 0.5850990481798938, + "learning_rate": 4.1509299601180553e-07, + "loss": 0.2631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15613797307014465, + "step": 4140, + "valid_targets_mean": 4252.5, + "valid_targets_min": 3624 + }, + { + "epoch": 6.600796812749004, + "grad_norm": 0.6685198631985739, + "learning_rate": 3.9915205250406153e-07, + "loss": 0.2623, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14067862927913666, + "step": 4145, + "valid_targets_mean": 3225.0, + "valid_targets_min": 539 + }, + { + "epoch": 6.608764940239044, + "grad_norm": 0.5978862286711235, + "learning_rate": 3.83520139590754e-07, + "loss": 0.2481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1057911068201065, + "step": 4150, + "valid_targets_mean": 3703.2, + "valid_targets_min": 706 + }, + { + "epoch": 6.616733067729084, + "grad_norm": 0.5686980976514994, + "learning_rate": 3.6819750372751874e-07, + "loss": 0.2511, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13568620383739471, + "step": 4155, + "valid_targets_mean": 4560.6, + "valid_targets_min": 3183 + }, + { + "epoch": 6.6247011952191235, + "grad_norm": 0.5958820374658117, + "learning_rate": 3.531843864938611e-07, + "loss": 0.2533, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12185736000537872, + "step": 4160, + "valid_targets_mean": 4474.1, + "valid_targets_min": 3739 + }, + { + "epoch": 6.632669322709163, + "grad_norm": 0.6726651729680071, + "learning_rate": 3.384810245893677e-07, + "loss": 0.242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10705474764108658, + "step": 4165, + "valid_targets_mean": 3190.9, + "valid_targets_min": 952 + }, + { + "epoch": 6.640637450199203, + "grad_norm": 0.5588675453049953, + "learning_rate": 3.2408764982996056e-07, + "loss": 0.2534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11087557673454285, + "step": 4170, + "valid_targets_mean": 3897.0, + "valid_targets_min": 1519 + }, + { + "epoch": 6.648605577689243, + "grad_norm": 0.6485560069091032, + "learning_rate": 3.1000448914425106e-07, + "loss": 0.2596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10861679911613464, + "step": 4175, + "valid_targets_mean": 2707.1, + "valid_targets_min": 1035 + }, + { + "epoch": 6.656573705179283, + "grad_norm": 0.5152247010486197, + "learning_rate": 2.9623176456995195e-07, + "loss": 0.2616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11714199185371399, + "step": 4180, + "valid_targets_mean": 4544.1, + "valid_targets_min": 3225 + }, + { + "epoch": 6.664541832669323, + "grad_norm": 0.6924372026026602, + "learning_rate": 2.8276969325038874e-07, + "loss": 0.2407, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1526404619216919, + "step": 4185, + "valid_targets_mean": 3647.8, + "valid_targets_min": 933 + }, + { + "epoch": 6.6725099601593625, + "grad_norm": 0.6565708140906135, + "learning_rate": 2.696184874310692e-07, + "loss": 0.244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1124889999628067, + "step": 4190, + "valid_targets_mean": 3418.2, + "valid_targets_min": 269 + }, + { + "epoch": 6.680478087649402, + "grad_norm": 0.6085711168138929, + "learning_rate": 2.5677835445633515e-07, + "loss": 0.2674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09558190405368805, + "step": 4195, + "valid_targets_mean": 3099.6, + "valid_targets_min": 260 + }, + { + "epoch": 6.688446215139442, + "grad_norm": 0.6163718517084988, + "learning_rate": 2.44249496766098e-07, + "loss": 0.2502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08738566190004349, + "step": 4200, + "valid_targets_mean": 3180.8, + "valid_targets_min": 316 + }, + { + "epoch": 6.696414342629482, + "grad_norm": 0.6645990900178848, + "learning_rate": 2.3203211189264607e-07, + "loss": 0.2452, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10755690932273865, + "step": 4205, + "valid_targets_mean": 3424.2, + "valid_targets_min": 244 + }, + { + "epoch": 6.704382470119522, + "grad_norm": 0.5784923268500493, + "learning_rate": 2.201263924575292e-07, + "loss": 0.2379, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10449139028787613, + "step": 4210, + "valid_targets_mean": 3429.8, + "valid_targets_min": 242 + }, + { + "epoch": 6.712350597609562, + "grad_norm": 0.6004625390135877, + "learning_rate": 2.0853252616852338e-07, + "loss": 0.2694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12962156534194946, + "step": 4215, + "valid_targets_mean": 3904.2, + "valid_targets_min": 3067 + }, + { + "epoch": 6.7203187250996015, + "grad_norm": 0.5952887978908022, + "learning_rate": 1.9725069581666645e-07, + "loss": 0.2532, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11442944407463074, + "step": 4220, + "valid_targets_mean": 3526.9, + "valid_targets_min": 227 + }, + { + "epoch": 6.728286852589641, + "grad_norm": 0.5819765604555225, + "learning_rate": 1.862810792733849e-07, + "loss": 0.2468, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1205209344625473, + "step": 4225, + "valid_targets_mean": 3931.8, + "valid_targets_min": 953 + }, + { + "epoch": 6.736254980079681, + "grad_norm": 0.5935505869923664, + "learning_rate": 1.7562384948768273e-07, + "loss": 0.2539, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12588539719581604, + "step": 4230, + "valid_targets_mean": 4015.5, + "valid_targets_min": 2934 + }, + { + "epoch": 6.744223107569721, + "grad_norm": 0.611139401136095, + "learning_rate": 1.6527917448341478e-07, + "loss": 0.2396, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11996519565582275, + "step": 4235, + "valid_targets_mean": 3138.9, + "valid_targets_min": 291 + }, + { + "epoch": 6.752191235059761, + "grad_norm": 0.6392028829283917, + "learning_rate": 1.5524721735663995e-07, + "loss": 0.2555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17184896767139435, + "step": 4240, + "valid_targets_mean": 4052.5, + "valid_targets_min": 1849 + }, + { + "epoch": 6.760159362549801, + "grad_norm": 0.5754622223791958, + "learning_rate": 1.4552813627305208e-07, + "loss": 0.2421, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13561061024665833, + "step": 4245, + "valid_targets_mean": 3938.9, + "valid_targets_min": 1101 + }, + { + "epoch": 6.7681274900398405, + "grad_norm": 0.6767350907038611, + "learning_rate": 1.361220844654798e-07, + "loss": 0.2608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09622784703969955, + "step": 4250, + "valid_targets_mean": 2196.9, + "valid_targets_min": 338 + }, + { + "epoch": 6.77609561752988, + "grad_norm": 0.6304243386042828, + "learning_rate": 1.270292102314752e-07, + "loss": 0.2529, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12174540013074875, + "step": 4255, + "valid_targets_mean": 3984.8, + "valid_targets_min": 285 + }, + { + "epoch": 6.78406374501992, + "grad_norm": 0.6227931181046218, + "learning_rate": 1.1824965693097767e-07, + "loss": 0.2683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13814817368984222, + "step": 4260, + "valid_targets_mean": 4148.8, + "valid_targets_min": 267 + }, + { + "epoch": 6.79203187250996, + "grad_norm": 0.6487946242842916, + "learning_rate": 1.0978356298404713e-07, + "loss": 0.2458, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14033278822898865, + "step": 4265, + "valid_targets_mean": 3367.5, + "valid_targets_min": 242 + }, + { + "epoch": 6.8, + "grad_norm": 0.615835745685907, + "learning_rate": 1.0163106186868777e-07, + "loss": 0.256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1257311999797821, + "step": 4270, + "valid_targets_mean": 3718.1, + "valid_targets_min": 862 + }, + { + "epoch": 6.80796812749004, + "grad_norm": 0.5515603477154181, + "learning_rate": 9.379228211873648e-08, + "loss": 0.2544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1278674602508545, + "step": 4275, + "valid_targets_mean": 4272.2, + "valid_targets_min": 541 + }, + { + "epoch": 6.8159362549800795, + "grad_norm": 0.6024657842293992, + "learning_rate": 8.626734732185116e-08, + "loss": 0.2531, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11025673151016235, + "step": 4280, + "valid_targets_mean": 3804.4, + "valid_targets_min": 808 + }, + { + "epoch": 6.823904382470119, + "grad_norm": 0.5808961396156915, + "learning_rate": 7.905637611754114e-08, + "loss": 0.2471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11888125538825989, + "step": 4285, + "valid_targets_mean": 4250.6, + "valid_targets_min": 795 + }, + { + "epoch": 6.831872509960159, + "grad_norm": 0.6648227294388135, + "learning_rate": 7.21594821953131e-08, + "loss": 0.2568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16212469339370728, + "step": 4290, + "valid_targets_mean": 3200.8, + "valid_targets_min": 268 + }, + { + "epoch": 6.839840637450199, + "grad_norm": 0.5933956324309776, + "learning_rate": 6.557677429287257e-08, + "loss": 0.2407, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13593024015426636, + "step": 4295, + "valid_targets_mean": 4707.9, + "valid_targets_min": 2371 + }, + { + "epoch": 6.847808764940239, + "grad_norm": 0.6957976732099579, + "learning_rate": 5.930835619441189e-08, + "loss": 0.249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1199740320444107, + "step": 4300, + "valid_targets_mean": 2310.2, + "valid_targets_min": 261 + }, + { + "epoch": 6.855776892430279, + "grad_norm": 0.5561054288493201, + "learning_rate": 5.335432672896712e-08, + "loss": 0.257, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10510461777448654, + "step": 4305, + "valid_targets_mean": 4765.1, + "valid_targets_min": 959 + }, + { + "epoch": 6.8637450199203185, + "grad_norm": 0.573876798093476, + "learning_rate": 4.77147797688704e-08, + "loss": 0.2386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14638984203338623, + "step": 4310, + "valid_targets_mean": 4447.1, + "valid_targets_min": 3156 + }, + { + "epoch": 6.871713147410358, + "grad_norm": 0.6475360444663981, + "learning_rate": 4.238980422826e-08, + "loss": 0.2462, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1150745302438736, + "step": 4315, + "valid_targets_mean": 3765.0, + "valid_targets_min": 1017 + }, + { + "epoch": 6.879681274900398, + "grad_norm": 0.6220352479403992, + "learning_rate": 3.737948406168812e-08, + "loss": 0.2504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10800758749246597, + "step": 4320, + "valid_targets_mean": 3688.0, + "valid_targets_min": 272 + }, + { + "epoch": 6.887649402390438, + "grad_norm": 0.6415167530434839, + "learning_rate": 3.26838982627864e-08, + "loss": 0.2459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14324162900447845, + "step": 4325, + "valid_targets_mean": 3372.5, + "valid_targets_min": 878 + }, + { + "epoch": 6.895617529880478, + "grad_norm": 0.6358132250393562, + "learning_rate": 2.8303120863033552e-08, + "loss": 0.2527, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1304199993610382, + "step": 4330, + "valid_targets_mean": 3848.9, + "valid_targets_min": 2545 + }, + { + "epoch": 6.903585657370518, + "grad_norm": 0.6448123337947649, + "learning_rate": 2.4237220930571904e-08, + "loss": 0.2416, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09287726134061813, + "step": 4335, + "valid_targets_mean": 2860.0, + "valid_targets_min": 410 + }, + { + "epoch": 6.9115537848605575, + "grad_norm": 0.5742771882266169, + "learning_rate": 2.0486262569132664e-08, + "loss": 0.2467, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11134955286979675, + "step": 4340, + "valid_targets_mean": 3606.9, + "valid_targets_min": 1362 + }, + { + "epoch": 6.919521912350597, + "grad_norm": 0.5609742392864686, + "learning_rate": 1.70503049170212e-08, + "loss": 0.2502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11778663098812103, + "step": 4345, + "valid_targets_mean": 4391.4, + "valid_targets_min": 941 + }, + { + "epoch": 6.927490039840637, + "grad_norm": 0.7002531287014088, + "learning_rate": 1.3929402146179994e-08, + "loss": 0.2512, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1366140991449356, + "step": 4350, + "valid_targets_mean": 3469.9, + "valid_targets_min": 495 + }, + { + "epoch": 6.935458167330677, + "grad_norm": 0.6549722486719072, + "learning_rate": 1.1123603461340449e-08, + "loss": 0.2497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11728227138519287, + "step": 4355, + "valid_targets_mean": 3456.1, + "valid_targets_min": 826 + }, + { + "epoch": 6.943426294820717, + "grad_norm": 0.6280634012494569, + "learning_rate": 8.632953099241282e-09, + "loss": 0.2544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12032538652420044, + "step": 4360, + "valid_targets_mean": 4182.5, + "valid_targets_min": 3547 + }, + { + "epoch": 6.951394422310757, + "grad_norm": 0.5713513465827403, + "learning_rate": 6.457490327940186e-09, + "loss": 0.253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11433510482311249, + "step": 4365, + "valid_targets_mean": 4045.8, + "valid_targets_min": 3534 + }, + { + "epoch": 6.9593625498007965, + "grad_norm": 0.6096363669276409, + "learning_rate": 4.597249446183227e-09, + "loss": 0.2578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14061906933784485, + "step": 4370, + "valid_targets_mean": 4232.0, + "valid_targets_min": 348 + }, + { + "epoch": 6.967330677290836, + "grad_norm": 0.5569203082593212, + "learning_rate": 3.0522597828719357e-09, + "loss": 0.2626, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11819716542959213, + "step": 4375, + "valid_targets_mean": 4388.1, + "valid_targets_min": 1463 + }, + { + "epoch": 6.975298804780876, + "grad_norm": 0.5903096458988022, + "learning_rate": 1.822545696601452e-09, + "loss": 0.2625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12359915673732758, + "step": 4380, + "valid_targets_mean": 4007.8, + "valid_targets_min": 1494 + }, + { + "epoch": 6.983266932270916, + "grad_norm": 0.7116026191593819, + "learning_rate": 9.081265752697299e-10, + "loss": 0.2357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12049850076436996, + "step": 4385, + "valid_targets_mean": 3735.2, + "valid_targets_min": 626 + }, + { + "epoch": 6.991235059760956, + "grad_norm": 0.6225155358400506, + "learning_rate": 3.0901683577999606e-10, + "loss": 0.2528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.134708970785141, + "step": 4390, + "valid_targets_mean": 3669.4, + "valid_targets_min": 982 + }, + { + "epoch": 6.999203187250996, + "grad_norm": 0.5318998756184224, + "learning_rate": 2.522592380316269e-11, + "loss": 0.2521, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13233494758605957, + "step": 4395, + "valid_targets_mean": 4332.0, + "valid_targets_min": 3516 + }, + { + "epoch": 7.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2810341417789459, + "step": 4396, + "total_flos": 2.4258565470369874e+18, + "train_loss": 0.30312070208478775, + "train_runtime": 54647.127, + "train_samples_per_second": 1.285, + "train_steps_per_second": 0.08, + "valid_targets_mean": 4356.1, + "valid_targets_min": 3101 + } + ], + "logging_steps": 5, + "max_steps": 4396, + "num_input_tokens_seen": 0, + "num_train_epochs": 7, + "save_steps": 1500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 2.4258565470369874e+18, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..1b8f615 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5362996204280b5d6e9e94aa57dcba9b25ac0eb1b03a1ee1583e5d909d71a4ba +size 8721 diff --git a/training_loss.png b/training_loss.png new file mode 100644 index 0000000..43c4e3c Binary files /dev/null and b/training_loss.png differ diff --git a/vocab.json b/vocab.json new file mode 100644 index 0000000..6c49fc6 --- /dev/null +++ b/vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910 +size 2776833