commit 52aedbacc2d31b368e68e0cf054b067b66369ef2 Author: ModelHub XC Date: Wed Jun 17 08:36:15 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: laion/exp-uns-r2egym-16_8x_glm_4_7_traces_jupiter Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..a540bf7 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,56 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text + + +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zstandard filter=lfs diff=lfs merge=lfs -text +*.tfevents* filter=lfs diff=lfs merge=lfs -text +*.db* filter=lfs diff=lfs merge=lfs -text +*.ark* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text + +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.gguf* filter=lfs diff=lfs merge=lfs -text +*.ggml filter=lfs diff=lfs merge=lfs -text +*.llamafile* filter=lfs diff=lfs merge=lfs -text +*.pt2 filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text + +model-00002-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text +vocab.json filter=lfs diff=lfs merge=lfs -text +model-00003-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text +training_args.bin filter=lfs diff=lfs merge=lfs -text +merges.txt filter=lfs diff=lfs merge=lfs -text +model-00001-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text +model-00004-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..3864651 --- /dev/null +++ b/README.md @@ -0,0 +1,61 @@ +--- +library_name: transformers +license: apache-2.0 +base_model: Qwen/Qwen3-8B +tags: +- llama-factory +- full +- generated_from_trainer +model-index: +- name: exp-uns-r2egym-16_8x_glm_4_7_traces_jupiter + results: [] +--- + + + +# exp-uns-r2egym-16_8x_glm_4_7_traces_jupiter + +This model is a fine-tuned version of [Qwen/Qwen3-8B](https://huggingface.co/Qwen/Qwen3-8B) on the /data/cat/ws/befe330h-befe330h-otagent/huggingface/hub/datasets--DCAgent--exp-uns-r2egym-16_8x_glm_4.7_traces_jupiter/snapshots/f351781469e77321a7f815f7e9f7789e9b57a34e_thinking_preprocessed dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 4e-05 +- train_batch_size: 1 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 8 +- gradient_accumulation_steps: 2 +- total_train_batch_size: 16 +- total_eval_batch_size: 64 +- optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.98) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.1 +- num_epochs: 7.0 + +### Training results + + + +### Framework versions + +- Transformers 4.57.6 +- Pytorch 2.9.0+cu128 +- Datasets 4.4.1 +- Tokenizers 0.22.2 diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000..b54f913 --- /dev/null +++ b/added_tokens.json @@ -0,0 +1,28 @@ +{ + "": 151668, + "": 151658, + "": 151666, + "": 151667, + "": 151657, + "": 151665, + "<|box_end|>": 151649, + "<|box_start|>": 151648, + "<|endoftext|>": 151643, + "<|file_sep|>": 151664, + "<|fim_middle|>": 151660, + "<|fim_pad|>": 151662, + "<|fim_prefix|>": 151659, + "<|fim_suffix|>": 151661, + "<|im_end|>": 151645, + "<|im_start|>": 151644, + "<|image_pad|>": 151655, + "<|object_ref_end|>": 151647, + "<|object_ref_start|>": 151646, + "<|quad_end|>": 151651, + "<|quad_start|>": 151650, + "<|repo_name|>": 151663, + "<|video_pad|>": 151656, + "<|vision_end|>": 151653, + "<|vision_pad|>": 151654, + "<|vision_start|>": 151652 +} diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..63ed26f --- /dev/null +++ b/all_results.json @@ -0,0 +1,16 @@ +{ + "achieved_tflops_per_gpu": 4.896264491712151, + "achieved_tflops_per_gpu_theoretical": 271.03236502585344, + "epoch": 7.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12460079044103622, + "mfu_percent": 0.4950722438536048, + "mfu_percent_theoretical": 27.40468807137042, + "total_flos": 2.3699064220633006e+18, + "train_loss": 0.2668512877930834, + "train_runtime": 60502.9208, + "train_samples_per_second": 1.273, + "train_steps_per_second": 0.08, + "valid_targets_mean": 4185.4, + "valid_targets_min": 557 +} \ No newline at end of file diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..01be9b3 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,89 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- messages[0].content + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('') and message.content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- if message.content is string %} + {%- set content = message.content %} + {%- else %} + {%- set content = '' %} + {%- endif %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is string %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in content %} + {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- set content = content.split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {%- if loop.last or (not loop.last and reasoning_content) %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..59b4193 --- /dev/null +++ b/config.json @@ -0,0 +1,68 @@ +{ + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "eos_token_id": 151645, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "pad_token_id": 151643, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "transformers_version": "4.57.6", + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..159097f --- /dev/null +++ b/configuration.json @@ -0,0 +1 @@ +{"framework": "pytorch", "task": "others", "allow_remote": true} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..9adbb28 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,12 @@ +{ + "do_sample": true, + "eos_token_id": [ + 151645, + 151643 + ], + "pad_token_id": 151643, + "temperature": 0.6, + "top_k": 20, + "top_p": 0.95, + "transformers_version": "4.57.6" +} diff --git a/merges.txt b/merges.txt new file mode 100644 index 0000000..80c1a19 --- /dev/null +++ b/merges.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5 +size 1671853 diff --git a/model-00001-of-00004.safetensors b/model-00001-of-00004.safetensors new file mode 100644 index 0000000..c61d040 --- /dev/null +++ b/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f62bdf6ce666a8f6ed26636b3d16faeea9fbe367e697fc4acc8d08c058076f04 +size 4902257696 diff --git a/model-00002-of-00004.safetensors b/model-00002-of-00004.safetensors new file mode 100644 index 0000000..3ba7cba --- /dev/null +++ b/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79030adbe796f3803e82d602f8c0adccb5e107daf35c33752db6b385162ef8b1 +size 4915960368 diff --git a/model-00003-of-00004.safetensors b/model-00003-of-00004.safetensors new file mode 100644 index 0000000..b2e5b6d --- /dev/null +++ b/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65bfa8fd8cd6bcd5d1347a71dedb8006cfe5705fcc026c479778306e4d014612 +size 4983068496 diff --git a/model-00004-of-00004.safetensors b/model-00004-of-00004.safetensors new file mode 100644 index 0000000..2571323 --- /dev/null +++ b/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f66c3aab293a18826974f75e5e58f6713b19bfcf0750ecda234edea11dad506 +size 1580230264 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..ba886c0 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,407 @@ +{ + "metadata": { + "total_parameters": 308224, + "total_size": 16381470720 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.32.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.33.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.33.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.34.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.35.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.35.mlp.down_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.35.mlp.gate_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.35.mlp.up_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.35.self_attn.k_norm.weight": "model-00004-of-00004.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.35.self_attn.q_norm.weight": "model-00004-of-00004.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.norm.weight": "model-00004-of-00004.safetensors" + } +} diff --git a/run_summary.json b/run_summary.json new file mode 100644 index 0000000..81583dd --- /dev/null +++ b/run_summary.json @@ -0,0 +1,12 @@ +{ + "agent_name": "f351781469e77321a7f815f7e9f7789e9b57a34e_thinking_preprocessed", + "training_start": null, + "training_end": null, + "created_by": "DCAgent", + "base_model_name": "Qwen/Qwen3-8B", + "dataset_name": "/data/cat/ws/befe330h-befe330h-otagent/huggingface/hub/datasets--DCAgent--exp-uns-r2egym-16_8x_glm_4.7_traces_jupiter/snapshots/f351781469e77321a7f815f7e9f7789e9b57a34e_thinking_preprocessed", + "training_type": "SFT", + "training_parameters": "https://huggingface.co/laion/exp-uns-r2egym-16_8x_glm_4_7_traces_jupiter/blob/main/config.json", + "wandb_link": "https://wandb.ai/dogml/OpenThoughts-Agent/runs/sft_exp-uns-r2egym-16_8x_glm_4-7_traces_jupiter_Qwen3-8B", + "traces_location_s3": null +} \ No newline at end of file diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..ac23c0a --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,31 @@ +{ + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..cd71f61 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4 +size 11422654 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..e9dc937 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,240 @@ +{ + "add_bos_token": false, + "add_prefix_space": false, + "added_tokens_decoder": { + "151643": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151644": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151645": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151646": { + "content": "<|object_ref_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151647": { + "content": "<|object_ref_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151648": { + "content": "<|box_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151649": { + "content": "<|box_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151650": { + "content": "<|quad_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151657": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151658": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151665": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151666": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151667": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151668": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": {}, + "model_max_length": 32768, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..63ed26f --- /dev/null +++ b/train_results.json @@ -0,0 +1,16 @@ +{ + "achieved_tflops_per_gpu": 4.896264491712151, + "achieved_tflops_per_gpu_theoretical": 271.03236502585344, + "epoch": 7.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12460079044103622, + "mfu_percent": 0.4950722438536048, + "mfu_percent_theoretical": 27.40468807137042, + "total_flos": 2.3699064220633006e+18, + "train_loss": 0.2668512877930834, + "train_runtime": 60502.9208, + "train_samples_per_second": 1.273, + "train_steps_per_second": 0.08, + "valid_targets_mean": 4185.4, + "valid_targets_min": 557 +} \ No newline at end of file diff --git a/trainer_log.jsonl b/trainer_log.jsonl new file mode 100644 index 0000000..ed09633 --- /dev/null +++ b/trainer_log.jsonl @@ -0,0 +1,964 @@ +{"current_steps": 5, "total_steps": 4816, "loss": 0.8619, "lr": 3.319502074688797e-07, "epoch": 0.007267441860465116, "percentage": 0.1, "elapsed_time": "0:01:11", "remaining_time": "19:10:15"} +{"current_steps": 10, "total_steps": 4816, "loss": 0.88, "lr": 7.468879668049793e-07, "epoch": 0.014534883720930232, "percentage": 0.21, "elapsed_time": "0:02:19", "remaining_time": "18:35:21"} +{"current_steps": 15, "total_steps": 4816, "loss": 0.8461, "lr": 1.161825726141079e-06, "epoch": 0.02180232558139535, "percentage": 0.31, "elapsed_time": "0:03:22", "remaining_time": "17:57:44"} +{"current_steps": 20, "total_steps": 4816, "loss": 0.7898, "lr": 1.5767634854771784e-06, "epoch": 0.029069767441860465, "percentage": 0.42, "elapsed_time": "0:04:34", "remaining_time": "18:15:13"} +{"current_steps": 25, "total_steps": 4816, "loss": 0.7522, "lr": 1.991701244813278e-06, "epoch": 0.036337209302325583, "percentage": 0.52, "elapsed_time": "0:05:37", "remaining_time": "17:57:43"} +{"current_steps": 30, "total_steps": 4816, "loss": 0.6776, "lr": 2.4066390041493776e-06, "epoch": 0.0436046511627907, "percentage": 0.62, "elapsed_time": "0:06:43", "remaining_time": "17:53:00"} +{"current_steps": 35, "total_steps": 4816, "loss": 0.647, "lr": 2.8215767634854773e-06, "epoch": 0.050872093023255814, "percentage": 0.73, "elapsed_time": "0:07:49", "remaining_time": "17:48:13"} +{"current_steps": 40, "total_steps": 4816, "loss": 0.6368, "lr": 3.2365145228215773e-06, "epoch": 0.05813953488372093, "percentage": 0.83, "elapsed_time": "0:08:59", "remaining_time": "17:52:52"} +{"current_steps": 45, "total_steps": 4816, "loss": 0.6042, "lr": 3.6514522821576765e-06, "epoch": 0.06540697674418605, "percentage": 0.93, "elapsed_time": "0:10:02", "remaining_time": "17:45:28"} +{"current_steps": 50, "total_steps": 4816, "loss": 0.6092, "lr": 4.0663900414937765e-06, "epoch": 0.07267441860465117, "percentage": 1.04, "elapsed_time": "0:11:03", "remaining_time": "17:33:45"} +{"current_steps": 55, "total_steps": 4816, "loss": 0.5623, "lr": 4.481327800829876e-06, "epoch": 0.07994186046511628, "percentage": 1.14, "elapsed_time": "0:12:14", "remaining_time": "17:40:12"} +{"current_steps": 60, "total_steps": 4816, "loss": 0.5582, "lr": 4.896265560165976e-06, "epoch": 0.0872093023255814, "percentage": 1.25, "elapsed_time": "0:13:16", "remaining_time": "17:31:42"} +{"current_steps": 65, "total_steps": 4816, "loss": 0.5279, "lr": 5.311203319502075e-06, "epoch": 0.09447674418604651, "percentage": 1.35, "elapsed_time": "0:14:26", "remaining_time": "17:35:44"} +{"current_steps": 70, "total_steps": 4816, "loss": 0.5329, "lr": 5.726141078838174e-06, "epoch": 0.10174418604651163, "percentage": 1.45, "elapsed_time": "0:15:28", "remaining_time": "17:29:29"} +{"current_steps": 75, "total_steps": 4816, "loss": 0.4821, "lr": 6.1410788381742745e-06, "epoch": 0.10901162790697674, "percentage": 1.56, "elapsed_time": "0:16:40", "remaining_time": "17:34:07"} +{"current_steps": 80, "total_steps": 4816, "loss": 0.4768, "lr": 6.556016597510374e-06, "epoch": 0.11627906976744186, "percentage": 1.66, "elapsed_time": "0:17:47", "remaining_time": "17:32:52"} +{"current_steps": 85, "total_steps": 4816, "loss": 0.4808, "lr": 6.970954356846473e-06, "epoch": 0.12354651162790697, "percentage": 1.76, "elapsed_time": "0:18:57", "remaining_time": "17:34:56"} +{"current_steps": 90, "total_steps": 4816, "loss": 0.4469, "lr": 7.385892116182573e-06, "epoch": 0.1308139534883721, "percentage": 1.87, "elapsed_time": "0:20:02", "remaining_time": "17:32:48"} +{"current_steps": 95, "total_steps": 4816, "loss": 0.4533, "lr": 7.800829875518673e-06, "epoch": 0.1380813953488372, "percentage": 1.97, "elapsed_time": "0:21:01", "remaining_time": "17:25:12"} +{"current_steps": 100, "total_steps": 4816, "loss": 0.4457, "lr": 8.215767634854772e-06, "epoch": 0.14534883720930233, "percentage": 2.08, "elapsed_time": "0:22:02", "remaining_time": "17:19:19"} +{"current_steps": 105, "total_steps": 4816, "loss": 0.4474, "lr": 8.630705394190872e-06, "epoch": 0.15261627906976744, "percentage": 2.18, "elapsed_time": "0:23:07", "remaining_time": "17:17:44"} +{"current_steps": 110, "total_steps": 4816, "loss": 0.4288, "lr": 9.045643153526971e-06, "epoch": 0.15988372093023256, "percentage": 2.28, "elapsed_time": "0:24:13", "remaining_time": "17:16:25"} +{"current_steps": 115, "total_steps": 4816, "loss": 0.4541, "lr": 9.460580912863071e-06, "epoch": 0.16715116279069767, "percentage": 2.39, "elapsed_time": "0:25:16", "remaining_time": "17:13:11"} +{"current_steps": 120, "total_steps": 4816, "loss": 0.4304, "lr": 9.875518672199172e-06, "epoch": 0.1744186046511628, "percentage": 2.49, "elapsed_time": "0:26:19", "remaining_time": "17:10:20"} +{"current_steps": 125, "total_steps": 4816, "loss": 0.41, "lr": 1.0290456431535269e-05, "epoch": 0.1816860465116279, "percentage": 2.6, "elapsed_time": "0:27:24", "remaining_time": "17:08:22"} +{"current_steps": 130, "total_steps": 4816, "loss": 0.4158, "lr": 1.070539419087137e-05, "epoch": 0.18895348837209303, "percentage": 2.7, "elapsed_time": "0:28:26", "remaining_time": "17:04:56"} +{"current_steps": 135, "total_steps": 4816, "loss": 0.3866, "lr": 1.112033195020747e-05, "epoch": 0.19622093023255813, "percentage": 2.8, "elapsed_time": "0:29:35", "remaining_time": "17:06:10"} +{"current_steps": 140, "total_steps": 4816, "loss": 0.3935, "lr": 1.1535269709543569e-05, "epoch": 0.20348837209302326, "percentage": 2.91, "elapsed_time": "0:30:40", "remaining_time": "17:04:45"} +{"current_steps": 145, "total_steps": 4816, "loss": 0.3836, "lr": 1.1950207468879669e-05, "epoch": 0.21075581395348839, "percentage": 3.01, "elapsed_time": "0:31:48", "remaining_time": "17:04:47"} +{"current_steps": 150, "total_steps": 4816, "loss": 0.3918, "lr": 1.236514522821577e-05, "epoch": 0.2180232558139535, "percentage": 3.11, "elapsed_time": "0:32:58", "remaining_time": "17:05:50"} +{"current_steps": 155, "total_steps": 4816, "loss": 0.3847, "lr": 1.2780082987551867e-05, "epoch": 0.22529069767441862, "percentage": 3.22, "elapsed_time": "0:34:00", "remaining_time": "17:02:29"} +{"current_steps": 160, "total_steps": 4816, "loss": 0.387, "lr": 1.3195020746887967e-05, "epoch": 0.23255813953488372, "percentage": 3.32, "elapsed_time": "0:34:58", "remaining_time": "16:57:40"} +{"current_steps": 165, "total_steps": 4816, "loss": 0.3834, "lr": 1.3609958506224067e-05, "epoch": 0.23982558139534885, "percentage": 3.43, "elapsed_time": "0:36:04", "remaining_time": "16:56:43"} +{"current_steps": 170, "total_steps": 4816, "loss": 0.3975, "lr": 1.4024896265560166e-05, "epoch": 0.24709302325581395, "percentage": 3.53, "elapsed_time": "0:37:09", "remaining_time": "16:55:25"} +{"current_steps": 175, "total_steps": 4816, "loss": 0.3808, "lr": 1.4439834024896267e-05, "epoch": 0.2543604651162791, "percentage": 3.63, "elapsed_time": "0:38:15", "remaining_time": "16:54:27"} +{"current_steps": 180, "total_steps": 4816, "loss": 0.3775, "lr": 1.4854771784232367e-05, "epoch": 0.2616279069767442, "percentage": 3.74, "elapsed_time": "0:39:19", "remaining_time": "16:52:52"} +{"current_steps": 185, "total_steps": 4816, "loss": 0.3938, "lr": 1.5269709543568464e-05, "epoch": 0.2688953488372093, "percentage": 3.84, "elapsed_time": "0:40:19", "remaining_time": "16:49:20"} +{"current_steps": 190, "total_steps": 4816, "loss": 0.383, "lr": 1.5684647302904566e-05, "epoch": 0.2761627906976744, "percentage": 3.95, "elapsed_time": "0:41:20", "remaining_time": "16:46:39"} +{"current_steps": 195, "total_steps": 4816, "loss": 0.3812, "lr": 1.6099585062240665e-05, "epoch": 0.28343023255813954, "percentage": 4.05, "elapsed_time": "0:42:26", "remaining_time": "16:45:42"} +{"current_steps": 200, "total_steps": 4816, "loss": 0.3761, "lr": 1.6514522821576764e-05, "epoch": 0.29069767441860467, "percentage": 4.15, "elapsed_time": "0:43:36", "remaining_time": "16:46:36"} +{"current_steps": 205, "total_steps": 4816, "loss": 0.3862, "lr": 1.6929460580912863e-05, "epoch": 0.29796511627906974, "percentage": 4.26, "elapsed_time": "0:44:41", "remaining_time": "16:45:12"} +{"current_steps": 210, "total_steps": 4816, "loss": 0.373, "lr": 1.7344398340248965e-05, "epoch": 0.30523255813953487, "percentage": 4.36, "elapsed_time": "0:45:41", "remaining_time": "16:42:06"} +{"current_steps": 215, "total_steps": 4816, "loss": 0.3787, "lr": 1.7759336099585064e-05, "epoch": 0.3125, "percentage": 4.46, "elapsed_time": "0:46:40", "remaining_time": "16:39:00"} +{"current_steps": 220, "total_steps": 4816, "loss": 0.3952, "lr": 1.8174273858921162e-05, "epoch": 0.31976744186046513, "percentage": 4.57, "elapsed_time": "0:47:45", "remaining_time": "16:37:33"} +{"current_steps": 225, "total_steps": 4816, "loss": 0.3555, "lr": 1.8589211618257264e-05, "epoch": 0.32703488372093026, "percentage": 4.67, "elapsed_time": "0:48:53", "remaining_time": "16:37:39"} +{"current_steps": 230, "total_steps": 4816, "loss": 0.3649, "lr": 1.9004149377593363e-05, "epoch": 0.33430232558139533, "percentage": 4.78, "elapsed_time": "0:49:55", "remaining_time": "16:35:21"} +{"current_steps": 235, "total_steps": 4816, "loss": 0.3644, "lr": 1.9419087136929462e-05, "epoch": 0.34156976744186046, "percentage": 4.88, "elapsed_time": "0:50:57", "remaining_time": "16:33:29"} +{"current_steps": 240, "total_steps": 4816, "loss": 0.3588, "lr": 1.983402489626556e-05, "epoch": 0.3488372093023256, "percentage": 4.98, "elapsed_time": "0:52:10", "remaining_time": "16:34:46"} +{"current_steps": 245, "total_steps": 4816, "loss": 0.3635, "lr": 2.024896265560166e-05, "epoch": 0.3561046511627907, "percentage": 5.09, "elapsed_time": "0:53:10", "remaining_time": "16:32:02"} +{"current_steps": 250, "total_steps": 4816, "loss": 0.3641, "lr": 2.066390041493776e-05, "epoch": 0.3633720930232558, "percentage": 5.19, "elapsed_time": "0:54:16", "remaining_time": "16:31:18"} +{"current_steps": 255, "total_steps": 4816, "loss": 0.3509, "lr": 2.107883817427386e-05, "epoch": 0.3706395348837209, "percentage": 5.29, "elapsed_time": "0:55:20", "remaining_time": "16:29:52"} +{"current_steps": 260, "total_steps": 4816, "loss": 0.3543, "lr": 2.149377593360996e-05, "epoch": 0.37790697674418605, "percentage": 5.4, "elapsed_time": "0:56:26", "remaining_time": "16:28:57"} +{"current_steps": 265, "total_steps": 4816, "loss": 0.3378, "lr": 2.190871369294606e-05, "epoch": 0.3851744186046512, "percentage": 5.5, "elapsed_time": "0:57:29", "remaining_time": "16:27:12"} +{"current_steps": 270, "total_steps": 4816, "loss": 0.3526, "lr": 2.232365145228216e-05, "epoch": 0.39244186046511625, "percentage": 5.61, "elapsed_time": "0:58:34", "remaining_time": "16:26:19"} +{"current_steps": 275, "total_steps": 4816, "loss": 0.3786, "lr": 2.273858921161826e-05, "epoch": 0.3997093023255814, "percentage": 5.71, "elapsed_time": "0:59:38", "remaining_time": "16:24:46"} +{"current_steps": 280, "total_steps": 4816, "loss": 0.3594, "lr": 2.315352697095436e-05, "epoch": 0.4069767441860465, "percentage": 5.81, "elapsed_time": "1:00:35", "remaining_time": "16:21:31"} +{"current_steps": 285, "total_steps": 4816, "loss": 0.3452, "lr": 2.356846473029046e-05, "epoch": 0.41424418604651164, "percentage": 5.92, "elapsed_time": "1:01:42", "remaining_time": "16:20:58"} +{"current_steps": 290, "total_steps": 4816, "loss": 0.3384, "lr": 2.398340248962656e-05, "epoch": 0.42151162790697677, "percentage": 6.02, "elapsed_time": "1:02:47", "remaining_time": "16:19:56"} +{"current_steps": 295, "total_steps": 4816, "loss": 0.3506, "lr": 2.439834024896266e-05, "epoch": 0.42877906976744184, "percentage": 6.13, "elapsed_time": "1:03:44", "remaining_time": "16:16:50"} +{"current_steps": 300, "total_steps": 4816, "loss": 0.3305, "lr": 2.481327800829876e-05, "epoch": 0.436046511627907, "percentage": 6.23, "elapsed_time": "1:04:50", "remaining_time": "16:16:01"} +{"current_steps": 305, "total_steps": 4816, "loss": 0.3556, "lr": 2.5228215767634855e-05, "epoch": 0.4433139534883721, "percentage": 6.33, "elapsed_time": "1:05:53", "remaining_time": "16:14:39"} +{"current_steps": 310, "total_steps": 4816, "loss": 0.3537, "lr": 2.5643153526970957e-05, "epoch": 0.45058139534883723, "percentage": 6.44, "elapsed_time": "1:06:54", "remaining_time": "16:12:37"} +{"current_steps": 315, "total_steps": 4816, "loss": 0.3433, "lr": 2.6058091286307056e-05, "epoch": 0.4578488372093023, "percentage": 6.54, "elapsed_time": "1:07:57", "remaining_time": "16:10:56"} +{"current_steps": 320, "total_steps": 4816, "loss": 0.3346, "lr": 2.6473029045643155e-05, "epoch": 0.46511627906976744, "percentage": 6.64, "elapsed_time": "1:08:54", "remaining_time": "16:08:08"} +{"current_steps": 325, "total_steps": 4816, "loss": 0.3331, "lr": 2.6887966804979257e-05, "epoch": 0.47238372093023256, "percentage": 6.75, "elapsed_time": "1:09:53", "remaining_time": "16:05:49"} +{"current_steps": 330, "total_steps": 4816, "loss": 0.3464, "lr": 2.7302904564315355e-05, "epoch": 0.4796511627906977, "percentage": 6.85, "elapsed_time": "1:10:58", "remaining_time": "16:04:53"} +{"current_steps": 335, "total_steps": 4816, "loss": 0.3464, "lr": 2.7717842323651454e-05, "epoch": 0.48691860465116277, "percentage": 6.96, "elapsed_time": "1:12:01", "remaining_time": "16:03:22"} +{"current_steps": 340, "total_steps": 4816, "loss": 0.3426, "lr": 2.8132780082987556e-05, "epoch": 0.4941860465116279, "percentage": 7.06, "elapsed_time": "1:13:08", "remaining_time": "16:02:47"} +{"current_steps": 345, "total_steps": 4816, "loss": 0.3435, "lr": 2.8547717842323655e-05, "epoch": 0.501453488372093, "percentage": 7.16, "elapsed_time": "1:14:08", "remaining_time": "16:00:49"} +{"current_steps": 350, "total_steps": 4816, "loss": 0.3442, "lr": 2.8962655601659754e-05, "epoch": 0.5087209302325582, "percentage": 7.27, "elapsed_time": "1:15:18", "remaining_time": "16:00:49"} +{"current_steps": 355, "total_steps": 4816, "loss": 0.3168, "lr": 2.9377593360995856e-05, "epoch": 0.5159883720930233, "percentage": 7.37, "elapsed_time": "1:16:15", "remaining_time": "15:58:14"} +{"current_steps": 360, "total_steps": 4816, "loss": 0.3223, "lr": 2.9792531120331955e-05, "epoch": 0.5232558139534884, "percentage": 7.48, "elapsed_time": "1:17:20", "remaining_time": "15:57:23"} +{"current_steps": 365, "total_steps": 4816, "loss": 0.3301, "lr": 3.020746887966805e-05, "epoch": 0.5305232558139535, "percentage": 7.58, "elapsed_time": "1:18:20", "remaining_time": "15:55:20"} +{"current_steps": 370, "total_steps": 4816, "loss": 0.3372, "lr": 3.062240663900415e-05, "epoch": 0.5377906976744186, "percentage": 7.68, "elapsed_time": "1:19:24", "remaining_time": "15:54:06"} +{"current_steps": 375, "total_steps": 4816, "loss": 0.3402, "lr": 3.103734439834025e-05, "epoch": 0.5450581395348837, "percentage": 7.79, "elapsed_time": "1:20:28", "remaining_time": "15:53:07"} +{"current_steps": 380, "total_steps": 4816, "loss": 0.3012, "lr": 3.145228215767635e-05, "epoch": 0.5523255813953488, "percentage": 7.89, "elapsed_time": "1:21:27", "remaining_time": "15:50:52"} +{"current_steps": 385, "total_steps": 4816, "loss": 0.3193, "lr": 3.186721991701245e-05, "epoch": 0.559593023255814, "percentage": 7.99, "elapsed_time": "1:22:28", "remaining_time": "15:49:14"} +{"current_steps": 390, "total_steps": 4816, "loss": 0.3267, "lr": 3.2282157676348554e-05, "epoch": 0.5668604651162791, "percentage": 8.1, "elapsed_time": "1:23:29", "remaining_time": "15:47:36"} +{"current_steps": 395, "total_steps": 4816, "loss": 0.3223, "lr": 3.2697095435684646e-05, "epoch": 0.5741279069767442, "percentage": 8.2, "elapsed_time": "1:24:34", "remaining_time": "15:46:36"} +{"current_steps": 400, "total_steps": 4816, "loss": 0.3517, "lr": 3.3112033195020745e-05, "epoch": 0.5813953488372093, "percentage": 8.31, "elapsed_time": "1:25:42", "remaining_time": "15:46:08"} +{"current_steps": 405, "total_steps": 4816, "loss": 0.3137, "lr": 3.352697095435685e-05, "epoch": 0.5886627906976745, "percentage": 8.41, "elapsed_time": "1:26:50", "remaining_time": "15:45:48"} +{"current_steps": 410, "total_steps": 4816, "loss": 0.326, "lr": 3.394190871369295e-05, "epoch": 0.5959302325581395, "percentage": 8.51, "elapsed_time": "1:27:48", "remaining_time": "15:43:37"} +{"current_steps": 415, "total_steps": 4816, "loss": 0.3415, "lr": 3.435684647302905e-05, "epoch": 0.6031976744186046, "percentage": 8.62, "elapsed_time": "1:28:51", "remaining_time": "15:42:16"} +{"current_steps": 420, "total_steps": 4816, "loss": 0.3226, "lr": 3.477178423236515e-05, "epoch": 0.6104651162790697, "percentage": 8.72, "elapsed_time": "1:29:47", "remaining_time": "15:39:49"} +{"current_steps": 425, "total_steps": 4816, "loss": 0.3392, "lr": 3.5186721991701245e-05, "epoch": 0.6177325581395349, "percentage": 8.82, "elapsed_time": "1:30:47", "remaining_time": "15:37:59"} +{"current_steps": 430, "total_steps": 4816, "loss": 0.3293, "lr": 3.5601659751037344e-05, "epoch": 0.625, "percentage": 8.93, "elapsed_time": "1:31:46", "remaining_time": "15:36:10"} +{"current_steps": 435, "total_steps": 4816, "loss": 0.3198, "lr": 3.601659751037345e-05, "epoch": 0.6322674418604651, "percentage": 9.03, "elapsed_time": "1:32:47", "remaining_time": "15:34:30"} +{"current_steps": 440, "total_steps": 4816, "loss": 0.3107, "lr": 3.643153526970955e-05, "epoch": 0.6395348837209303, "percentage": 9.14, "elapsed_time": "1:33:49", "remaining_time": "15:33:05"} +{"current_steps": 445, "total_steps": 4816, "loss": 0.3209, "lr": 3.684647302904565e-05, "epoch": 0.6468023255813954, "percentage": 9.24, "elapsed_time": "1:34:51", "remaining_time": "15:31:43"} +{"current_steps": 450, "total_steps": 4816, "loss": 0.3323, "lr": 3.7261410788381746e-05, "epoch": 0.6540697674418605, "percentage": 9.34, "elapsed_time": "1:35:55", "remaining_time": "15:30:37"} +{"current_steps": 455, "total_steps": 4816, "loss": 0.3105, "lr": 3.7676348547717845e-05, "epoch": 0.6613372093023255, "percentage": 9.45, "elapsed_time": "1:37:00", "remaining_time": "15:29:51"} +{"current_steps": 460, "total_steps": 4816, "loss": 0.3301, "lr": 3.8091286307053944e-05, "epoch": 0.6686046511627907, "percentage": 9.55, "elapsed_time": "1:38:01", "remaining_time": "15:28:18"} +{"current_steps": 465, "total_steps": 4816, "loss": 0.3154, "lr": 3.850622406639004e-05, "epoch": 0.6758720930232558, "percentage": 9.66, "elapsed_time": "1:39:05", "remaining_time": "15:27:09"} +{"current_steps": 470, "total_steps": 4816, "loss": 0.3254, "lr": 3.892116182572614e-05, "epoch": 0.6831395348837209, "percentage": 9.76, "elapsed_time": "1:40:04", "remaining_time": "15:25:18"} +{"current_steps": 475, "total_steps": 4816, "loss": 0.3074, "lr": 3.933609958506224e-05, "epoch": 0.690406976744186, "percentage": 9.86, "elapsed_time": "1:41:10", "remaining_time": "15:24:42"} +{"current_steps": 480, "total_steps": 4816, "loss": 0.3266, "lr": 3.9751037344398345e-05, "epoch": 0.6976744186046512, "percentage": 9.97, "elapsed_time": "1:42:16", "remaining_time": "15:23:52"} +{"current_steps": 485, "total_steps": 4816, "loss": 0.3114, "lr": 3.999997898246174e-05, "epoch": 0.7049418604651163, "percentage": 10.07, "elapsed_time": "1:43:19", "remaining_time": "15:22:37"} +{"current_steps": 490, "total_steps": 4816, "loss": 0.3187, "lr": 3.999974253566362e-05, "epoch": 0.7122093023255814, "percentage": 10.17, "elapsed_time": "1:44:21", "remaining_time": "15:21:24"} +{"current_steps": 495, "total_steps": 4816, "loss": 0.3126, "lr": 3.999924337326085e-05, "epoch": 0.7194767441860465, "percentage": 10.28, "elapsed_time": "1:45:24", "remaining_time": "15:20:07"} +{"current_steps": 500, "total_steps": 4816, "loss": 0.3149, "lr": 3.9998481501810414e-05, "epoch": 0.7267441860465116, "percentage": 10.38, "elapsed_time": "1:46:28", "remaining_time": "15:19:09"} +{"current_steps": 505, "total_steps": 4816, "loss": 0.2994, "lr": 3.999745693132021e-05, "epoch": 0.7340116279069767, "percentage": 10.49, "elapsed_time": "1:47:33", "remaining_time": "15:18:07"} +{"current_steps": 510, "total_steps": 4816, "loss": 0.318, "lr": 3.999616967524894e-05, "epoch": 0.7412790697674418, "percentage": 10.59, "elapsed_time": "1:48:39", "remaining_time": "15:17:27"} +{"current_steps": 515, "total_steps": 4816, "loss": 0.3382, "lr": 3.999461975050595e-05, "epoch": 0.748546511627907, "percentage": 10.69, "elapsed_time": "1:49:39", "remaining_time": "15:15:49"} +{"current_steps": 520, "total_steps": 4816, "loss": 0.3091, "lr": 3.9992807177450956e-05, "epoch": 0.7558139534883721, "percentage": 10.8, "elapsed_time": "1:50:36", "remaining_time": "15:13:47"} +{"current_steps": 525, "total_steps": 4816, "loss": 0.323, "lr": 3.9990731979893834e-05, "epoch": 0.7630813953488372, "percentage": 10.9, "elapsed_time": "1:51:40", "remaining_time": "15:12:46"} +{"current_steps": 530, "total_steps": 4816, "loss": 0.3152, "lr": 3.998839418509428e-05, "epoch": 0.7703488372093024, "percentage": 11.0, "elapsed_time": "1:52:42", "remaining_time": "15:11:26"} +{"current_steps": 535, "total_steps": 4816, "loss": 0.3181, "lr": 3.998579382376143e-05, "epoch": 0.7776162790697675, "percentage": 11.11, "elapsed_time": "1:53:47", "remaining_time": "15:10:35"} +{"current_steps": 540, "total_steps": 4816, "loss": 0.3197, "lr": 3.9982930930053515e-05, "epoch": 0.7848837209302325, "percentage": 11.21, "elapsed_time": "1:54:51", "remaining_time": "15:09:29"} +{"current_steps": 545, "total_steps": 4816, "loss": 0.3189, "lr": 3.9979805541577356e-05, "epoch": 0.7921511627906976, "percentage": 11.32, "elapsed_time": "1:55:53", "remaining_time": "15:08:16"} +{"current_steps": 550, "total_steps": 4816, "loss": 0.3178, "lr": 3.99764176993879e-05, "epoch": 0.7994186046511628, "percentage": 11.42, "elapsed_time": "1:56:55", "remaining_time": "15:06:52"} +{"current_steps": 555, "total_steps": 4816, "loss": 0.3121, "lr": 3.997276744798766e-05, "epoch": 0.8066860465116279, "percentage": 11.52, "elapsed_time": "1:57:54", "remaining_time": "15:05:12"} +{"current_steps": 560, "total_steps": 4816, "loss": 0.3126, "lr": 3.996885483532617e-05, "epoch": 0.813953488372093, "percentage": 11.63, "elapsed_time": "1:58:54", "remaining_time": "15:03:39"} +{"current_steps": 565, "total_steps": 4816, "loss": 0.3127, "lr": 3.9964679912799306e-05, "epoch": 0.8212209302325582, "percentage": 11.73, "elapsed_time": "1:59:55", "remaining_time": "15:02:20"} +{"current_steps": 570, "total_steps": 4816, "loss": 0.3341, "lr": 3.9960242735248626e-05, "epoch": 0.8284883720930233, "percentage": 11.84, "elapsed_time": "2:00:55", "remaining_time": "15:00:49"} +{"current_steps": 575, "total_steps": 4816, "loss": 0.3386, "lr": 3.995554336096069e-05, "epoch": 0.8357558139534884, "percentage": 11.94, "elapsed_time": "2:01:57", "remaining_time": "14:59:29"} +{"current_steps": 580, "total_steps": 4816, "loss": 0.3014, "lr": 3.995058185166623e-05, "epoch": 0.8430232558139535, "percentage": 12.04, "elapsed_time": "2:02:57", "remaining_time": "14:58:00"} +{"current_steps": 585, "total_steps": 4816, "loss": 0.3036, "lr": 3.9945358272539373e-05, "epoch": 0.8502906976744186, "percentage": 12.15, "elapsed_time": "2:04:01", "remaining_time": "14:56:59"} +{"current_steps": 590, "total_steps": 4816, "loss": 0.3081, "lr": 3.9939872692196805e-05, "epoch": 0.8575581395348837, "percentage": 12.25, "elapsed_time": "2:05:02", "remaining_time": "14:55:38"} +{"current_steps": 595, "total_steps": 4816, "loss": 0.3211, "lr": 3.993412518269682e-05, "epoch": 0.8648255813953488, "percentage": 12.35, "elapsed_time": "2:06:03", "remaining_time": "14:54:12"} +{"current_steps": 600, "total_steps": 4816, "loss": 0.3141, "lr": 3.9928115819538416e-05, "epoch": 0.872093023255814, "percentage": 12.46, "elapsed_time": "2:07:11", "remaining_time": "14:53:45"} +{"current_steps": 605, "total_steps": 4816, "loss": 0.3158, "lr": 3.992184468166028e-05, "epoch": 0.8793604651162791, "percentage": 12.56, "elapsed_time": "2:08:13", "remaining_time": "14:52:25"} +{"current_steps": 610, "total_steps": 4816, "loss": 0.2911, "lr": 3.991531185143977e-05, "epoch": 0.8866279069767442, "percentage": 12.67, "elapsed_time": "2:09:15", "remaining_time": "14:51:13"} +{"current_steps": 615, "total_steps": 4816, "loss": 0.3, "lr": 3.9908517414691806e-05, "epoch": 0.8938953488372093, "percentage": 12.77, "elapsed_time": "2:10:14", "remaining_time": "14:49:42"} +{"current_steps": 620, "total_steps": 4816, "loss": 0.3123, "lr": 3.990146146066776e-05, "epoch": 0.9011627906976745, "percentage": 12.87, "elapsed_time": "2:11:14", "remaining_time": "14:48:14"} +{"current_steps": 625, "total_steps": 4816, "loss": 0.3146, "lr": 3.98941440820543e-05, "epoch": 0.9084302325581395, "percentage": 12.98, "elapsed_time": "2:12:17", "remaining_time": "14:47:04"} +{"current_steps": 630, "total_steps": 4816, "loss": 0.3188, "lr": 3.988656537497213e-05, "epoch": 0.9156976744186046, "percentage": 13.08, "elapsed_time": "2:13:24", "remaining_time": "14:46:23"} +{"current_steps": 635, "total_steps": 4816, "loss": 0.3121, "lr": 3.987872543897477e-05, "epoch": 0.9229651162790697, "percentage": 13.19, "elapsed_time": "2:14:21", "remaining_time": "14:44:40"} +{"current_steps": 640, "total_steps": 4816, "loss": 0.3225, "lr": 3.9870624377047206e-05, "epoch": 0.9302325581395349, "percentage": 13.29, "elapsed_time": "2:15:29", "remaining_time": "14:44:04"} +{"current_steps": 645, "total_steps": 4816, "loss": 0.3029, "lr": 3.9862262295604594e-05, "epoch": 0.9375, "percentage": 13.39, "elapsed_time": "2:16:33", "remaining_time": "14:43:04"} +{"current_steps": 650, "total_steps": 4816, "loss": 0.2946, "lr": 3.9853639304490815e-05, "epoch": 0.9447674418604651, "percentage": 13.5, "elapsed_time": "2:17:29", "remaining_time": "14:41:13"} +{"current_steps": 655, "total_steps": 4816, "loss": 0.3185, "lr": 3.984475551697703e-05, "epoch": 0.9520348837209303, "percentage": 13.6, "elapsed_time": "2:18:28", "remaining_time": "14:39:39"} +{"current_steps": 660, "total_steps": 4816, "loss": 0.3232, "lr": 3.9835611049760216e-05, "epoch": 0.9593023255813954, "percentage": 13.7, "elapsed_time": "2:19:33", "remaining_time": "14:38:47"} +{"current_steps": 665, "total_steps": 4816, "loss": 0.3122, "lr": 3.982620602296166e-05, "epoch": 0.9665697674418605, "percentage": 13.81, "elapsed_time": "2:20:39", "remaining_time": "14:37:59"} +{"current_steps": 670, "total_steps": 4816, "loss": 0.3094, "lr": 3.981654056012529e-05, "epoch": 0.9738372093023255, "percentage": 13.91, "elapsed_time": "2:21:38", "remaining_time": "14:36:30"} +{"current_steps": 675, "total_steps": 4816, "loss": 0.3454, "lr": 3.980661478821614e-05, "epoch": 0.9811046511627907, "percentage": 14.02, "elapsed_time": "2:22:37", "remaining_time": "14:34:59"} +{"current_steps": 680, "total_steps": 4816, "loss": 0.2994, "lr": 3.979642883761866e-05, "epoch": 0.9883720930232558, "percentage": 14.12, "elapsed_time": "2:23:43", "remaining_time": "14:34:09"} +{"current_steps": 685, "total_steps": 4816, "loss": 0.2988, "lr": 3.978598284213497e-05, "epoch": 0.9956395348837209, "percentage": 14.22, "elapsed_time": "2:24:48", "remaining_time": "14:33:17"} +{"current_steps": 690, "total_steps": 4816, "loss": 0.3086, "lr": 3.9775276938983144e-05, "epoch": 1.002906976744186, "percentage": 14.33, "elapsed_time": "2:25:51", "remaining_time": "14:32:09"} +{"current_steps": 695, "total_steps": 4816, "loss": 0.2837, "lr": 3.9764311268795374e-05, "epoch": 1.010174418604651, "percentage": 14.43, "elapsed_time": "2:26:50", "remaining_time": "14:30:43"} +{"current_steps": 700, "total_steps": 4816, "loss": 0.3015, "lr": 3.9753085975616157e-05, "epoch": 1.0174418604651163, "percentage": 14.53, "elapsed_time": "2:27:48", "remaining_time": "14:29:08"} +{"current_steps": 705, "total_steps": 4816, "loss": 0.294, "lr": 3.9741601206900376e-05, "epoch": 1.0247093023255813, "percentage": 14.64, "elapsed_time": "2:28:49", "remaining_time": "14:27:52"} +{"current_steps": 710, "total_steps": 4816, "loss": 0.3069, "lr": 3.972985711351136e-05, "epoch": 1.0319767441860466, "percentage": 14.74, "elapsed_time": "2:29:52", "remaining_time": "14:26:44"} +{"current_steps": 715, "total_steps": 4816, "loss": 0.2876, "lr": 3.971785384971894e-05, "epoch": 1.0392441860465116, "percentage": 14.85, "elapsed_time": "2:30:57", "remaining_time": "14:25:49"} +{"current_steps": 720, "total_steps": 4816, "loss": 0.2858, "lr": 3.970559157319737e-05, "epoch": 1.0465116279069768, "percentage": 14.95, "elapsed_time": "2:32:00", "remaining_time": "14:24:44"} +{"current_steps": 725, "total_steps": 4816, "loss": 0.3006, "lr": 3.9693070445023297e-05, "epoch": 1.0537790697674418, "percentage": 15.05, "elapsed_time": "2:33:03", "remaining_time": "14:23:38"} +{"current_steps": 730, "total_steps": 4816, "loss": 0.3136, "lr": 3.968029062967363e-05, "epoch": 1.0610465116279069, "percentage": 15.16, "elapsed_time": "2:34:04", "remaining_time": "14:22:22"} +{"current_steps": 735, "total_steps": 4816, "loss": 0.2906, "lr": 3.966725229502336e-05, "epoch": 1.068313953488372, "percentage": 15.26, "elapsed_time": "2:35:14", "remaining_time": "14:21:55"} +{"current_steps": 740, "total_steps": 4816, "loss": 0.2918, "lr": 3.965395561234341e-05, "epoch": 1.0755813953488371, "percentage": 15.37, "elapsed_time": "2:36:22", "remaining_time": "14:21:20"} +{"current_steps": 745, "total_steps": 4816, "loss": 0.2938, "lr": 3.9640400756298325e-05, "epoch": 1.0828488372093024, "percentage": 15.47, "elapsed_time": "2:37:26", "remaining_time": "14:20:21"} +{"current_steps": 750, "total_steps": 4816, "loss": 0.2932, "lr": 3.9626587904943996e-05, "epoch": 1.0901162790697674, "percentage": 15.57, "elapsed_time": "2:38:31", "remaining_time": "14:19:23"} +{"current_steps": 755, "total_steps": 4816, "loss": 0.2966, "lr": 3.961251723972535e-05, "epoch": 1.0973837209302326, "percentage": 15.68, "elapsed_time": "2:39:37", "remaining_time": "14:18:34"} +{"current_steps": 760, "total_steps": 4816, "loss": 0.288, "lr": 3.959818894547393e-05, "epoch": 1.1046511627906976, "percentage": 15.78, "elapsed_time": "2:40:40", "remaining_time": "14:17:28"} +{"current_steps": 765, "total_steps": 4816, "loss": 0.2866, "lr": 3.9583603210405484e-05, "epoch": 1.1119186046511629, "percentage": 15.88, "elapsed_time": "2:41:49", "remaining_time": "14:16:56"} +{"current_steps": 770, "total_steps": 4816, "loss": 0.2917, "lr": 3.956876022611748e-05, "epoch": 1.119186046511628, "percentage": 15.99, "elapsed_time": "2:42:51", "remaining_time": "14:15:46"} +{"current_steps": 775, "total_steps": 4816, "loss": 0.2948, "lr": 3.9553660187586625e-05, "epoch": 1.1264534883720931, "percentage": 16.09, "elapsed_time": "2:43:58", "remaining_time": "14:15:00"} +{"current_steps": 780, "total_steps": 4816, "loss": 0.2819, "lr": 3.9538303293166243e-05, "epoch": 1.1337209302325582, "percentage": 16.2, "elapsed_time": "2:44:58", "remaining_time": "14:13:37"} +{"current_steps": 785, "total_steps": 4816, "loss": 0.3074, "lr": 3.952268974458373e-05, "epoch": 1.1409883720930232, "percentage": 16.3, "elapsed_time": "2:45:59", "remaining_time": "14:12:24"} +{"current_steps": 790, "total_steps": 4816, "loss": 0.2931, "lr": 3.950681974693787e-05, "epoch": 1.1482558139534884, "percentage": 16.4, "elapsed_time": "2:47:00", "remaining_time": "14:11:05"} +{"current_steps": 795, "total_steps": 4816, "loss": 0.3147, "lr": 3.949069350869614e-05, "epoch": 1.1555232558139534, "percentage": 16.51, "elapsed_time": "2:47:59", "remaining_time": "14:09:39"} +{"current_steps": 800, "total_steps": 4816, "loss": 0.3049, "lr": 3.9474311241691994e-05, "epoch": 1.1627906976744187, "percentage": 16.61, "elapsed_time": "2:49:09", "remaining_time": "14:09:08"} +{"current_steps": 805, "total_steps": 4816, "loss": 0.3073, "lr": 3.945767316112206e-05, "epoch": 1.1700581395348837, "percentage": 16.72, "elapsed_time": "2:50:11", "remaining_time": "14:08:01"} +{"current_steps": 810, "total_steps": 4816, "loss": 0.302, "lr": 3.944077948554333e-05, "epoch": 1.177325581395349, "percentage": 16.82, "elapsed_time": "2:51:12", "remaining_time": "14:06:44"} +{"current_steps": 815, "total_steps": 4816, "loss": 0.2843, "lr": 3.9423630436870255e-05, "epoch": 1.184593023255814, "percentage": 16.92, "elapsed_time": "2:52:13", "remaining_time": "14:05:29"} +{"current_steps": 820, "total_steps": 4816, "loss": 0.3015, "lr": 3.940622624037188e-05, "epoch": 1.191860465116279, "percentage": 17.03, "elapsed_time": "2:53:12", "remaining_time": "14:04:02"} +{"current_steps": 825, "total_steps": 4816, "loss": 0.2788, "lr": 3.938856712466885e-05, "epoch": 1.1991279069767442, "percentage": 17.13, "elapsed_time": "2:54:14", "remaining_time": "14:02:56"} +{"current_steps": 830, "total_steps": 4816, "loss": 0.29, "lr": 3.93706533217304e-05, "epoch": 1.2063953488372092, "percentage": 17.23, "elapsed_time": "2:55:13", "remaining_time": "14:01:30"} +{"current_steps": 835, "total_steps": 4816, "loss": 0.302, "lr": 3.935248506687136e-05, "epoch": 1.2136627906976745, "percentage": 17.34, "elapsed_time": "2:56:15", "remaining_time": "14:00:20"} +{"current_steps": 840, "total_steps": 4816, "loss": 0.2899, "lr": 3.9334062598748986e-05, "epoch": 1.2209302325581395, "percentage": 17.44, "elapsed_time": "2:57:20", "remaining_time": "13:59:24"} +{"current_steps": 845, "total_steps": 4816, "loss": 0.2746, "lr": 3.931538615935989e-05, "epoch": 1.2281976744186047, "percentage": 17.55, "elapsed_time": "2:58:20", "remaining_time": "13:58:04"} +{"current_steps": 850, "total_steps": 4816, "loss": 0.2905, "lr": 3.929645599403684e-05, "epoch": 1.2354651162790697, "percentage": 17.65, "elapsed_time": "2:59:21", "remaining_time": "13:56:53"} +{"current_steps": 855, "total_steps": 4816, "loss": 0.2858, "lr": 3.9277272351445524e-05, "epoch": 1.2427325581395348, "percentage": 17.75, "elapsed_time": "3:00:19", "remaining_time": "13:55:23"} +{"current_steps": 860, "total_steps": 4816, "loss": 0.2965, "lr": 3.9257835483581306e-05, "epoch": 1.25, "percentage": 17.86, "elapsed_time": "3:01:22", "remaining_time": "13:54:17"} +{"current_steps": 865, "total_steps": 4816, "loss": 0.287, "lr": 3.923814564576589e-05, "epoch": 1.2572674418604652, "percentage": 17.96, "elapsed_time": "3:02:20", "remaining_time": "13:52:50"} +{"current_steps": 870, "total_steps": 4816, "loss": 0.2969, "lr": 3.921820309664398e-05, "epoch": 1.2645348837209303, "percentage": 18.06, "elapsed_time": "3:03:19", "remaining_time": "13:51:28"} +{"current_steps": 875, "total_steps": 4816, "loss": 0.2987, "lr": 3.91980080981799e-05, "epoch": 1.2718023255813953, "percentage": 18.17, "elapsed_time": "3:04:21", "remaining_time": "13:50:19"} +{"current_steps": 880, "total_steps": 4816, "loss": 0.2951, "lr": 3.917756091565414e-05, "epoch": 1.2790697674418605, "percentage": 18.27, "elapsed_time": "3:05:22", "remaining_time": "13:49:05"} +{"current_steps": 885, "total_steps": 4816, "loss": 0.2985, "lr": 3.915686181765983e-05, "epoch": 1.2863372093023255, "percentage": 18.38, "elapsed_time": "3:06:25", "remaining_time": "13:48:04"} +{"current_steps": 890, "total_steps": 4816, "loss": 0.2912, "lr": 3.9135911076099286e-05, "epoch": 1.2936046511627908, "percentage": 18.48, "elapsed_time": "3:07:29", "remaining_time": "13:47:04"} +{"current_steps": 895, "total_steps": 4816, "loss": 0.2869, "lr": 3.9114708966180385e-05, "epoch": 1.3008720930232558, "percentage": 18.58, "elapsed_time": "3:08:34", "remaining_time": "13:46:10"} +{"current_steps": 900, "total_steps": 4816, "loss": 0.293, "lr": 3.909325576641298e-05, "epoch": 1.308139534883721, "percentage": 18.69, "elapsed_time": "3:09:36", "remaining_time": "13:45:00"} +{"current_steps": 905, "total_steps": 4816, "loss": 0.2764, "lr": 3.907155175860519e-05, "epoch": 1.315406976744186, "percentage": 18.79, "elapsed_time": "3:10:40", "remaining_time": "13:44:01"} +{"current_steps": 910, "total_steps": 4816, "loss": 0.2991, "lr": 3.904959722785978e-05, "epoch": 1.322674418604651, "percentage": 18.9, "elapsed_time": "3:11:46", "remaining_time": "13:43:11"} +{"current_steps": 915, "total_steps": 4816, "loss": 0.3012, "lr": 3.902739246257035e-05, "epoch": 1.3299418604651163, "percentage": 19.0, "elapsed_time": "3:12:42", "remaining_time": "13:41:36"} +{"current_steps": 920, "total_steps": 4816, "loss": 0.2879, "lr": 3.9004937754417576e-05, "epoch": 1.3372093023255813, "percentage": 19.1, "elapsed_time": "3:13:51", "remaining_time": "13:40:56"} +{"current_steps": 925, "total_steps": 4816, "loss": 0.2905, "lr": 3.898223339836535e-05, "epoch": 1.3444767441860466, "percentage": 19.21, "elapsed_time": "3:14:58", "remaining_time": "13:40:10"} +{"current_steps": 930, "total_steps": 4816, "loss": 0.2739, "lr": 3.8959279692656965e-05, "epoch": 1.3517441860465116, "percentage": 19.31, "elapsed_time": "3:16:03", "remaining_time": "13:39:14"} +{"current_steps": 935, "total_steps": 4816, "loss": 0.2888, "lr": 3.893607693881113e-05, "epoch": 1.3590116279069768, "percentage": 19.41, "elapsed_time": "3:17:04", "remaining_time": "13:38:01"} +{"current_steps": 940, "total_steps": 4816, "loss": 0.3246, "lr": 3.8912625441618034e-05, "epoch": 1.3662790697674418, "percentage": 19.52, "elapsed_time": "3:18:04", "remaining_time": "13:36:44"} +{"current_steps": 945, "total_steps": 4816, "loss": 0.2873, "lr": 3.8888925509135365e-05, "epoch": 1.3735465116279069, "percentage": 19.62, "elapsed_time": "3:19:09", "remaining_time": "13:35:47"} +{"current_steps": 950, "total_steps": 4816, "loss": 0.2943, "lr": 3.8864977452684235e-05, "epoch": 1.380813953488372, "percentage": 19.73, "elapsed_time": "3:20:12", "remaining_time": "13:34:46"} +{"current_steps": 955, "total_steps": 4816, "loss": 0.2838, "lr": 3.88407815868451e-05, "epoch": 1.3880813953488373, "percentage": 19.83, "elapsed_time": "3:21:10", "remaining_time": "13:33:18"} +{"current_steps": 960, "total_steps": 4816, "loss": 0.2948, "lr": 3.8816338229453616e-05, "epoch": 1.3953488372093024, "percentage": 19.93, "elapsed_time": "3:22:07", "remaining_time": "13:31:54"} +{"current_steps": 965, "total_steps": 4816, "loss": 0.2839, "lr": 3.879164770159651e-05, "epoch": 1.4026162790697674, "percentage": 20.04, "elapsed_time": "3:23:08", "remaining_time": "13:30:39"} +{"current_steps": 970, "total_steps": 4816, "loss": 0.291, "lr": 3.8766710327607275e-05, "epoch": 1.4098837209302326, "percentage": 20.14, "elapsed_time": "3:24:11", "remaining_time": "13:29:38"} +{"current_steps": 975, "total_steps": 4816, "loss": 0.2785, "lr": 3.874152643506202e-05, "epoch": 1.4171511627906976, "percentage": 20.25, "elapsed_time": "3:25:13", "remaining_time": "13:28:30"} +{"current_steps": 980, "total_steps": 4816, "loss": 0.3037, "lr": 3.871609635477507e-05, "epoch": 1.4244186046511627, "percentage": 20.35, "elapsed_time": "3:26:14", "remaining_time": "13:27:18"} +{"current_steps": 985, "total_steps": 4816, "loss": 0.2765, "lr": 3.869042042079467e-05, "epoch": 1.431686046511628, "percentage": 20.45, "elapsed_time": "3:27:18", "remaining_time": "13:26:17"} +{"current_steps": 990, "total_steps": 4816, "loss": 0.3149, "lr": 3.866449897039859e-05, "epoch": 1.4389534883720931, "percentage": 20.56, "elapsed_time": "3:28:25", "remaining_time": "13:25:28"} +{"current_steps": 995, "total_steps": 4816, "loss": 0.2817, "lr": 3.863833234408969e-05, "epoch": 1.4462209302325582, "percentage": 20.66, "elapsed_time": "3:29:22", "remaining_time": "13:24:02"} +{"current_steps": 1000, "total_steps": 4816, "loss": 0.2881, "lr": 3.861192088559144e-05, "epoch": 1.4534883720930232, "percentage": 20.76, "elapsed_time": "3:30:23", "remaining_time": "13:22:50"} +{"current_steps": 1005, "total_steps": 4816, "loss": 0.3142, "lr": 3.858526494184344e-05, "epoch": 1.4607558139534884, "percentage": 20.87, "elapsed_time": "3:31:25", "remaining_time": "13:21:43"} +{"current_steps": 1010, "total_steps": 4816, "loss": 0.2925, "lr": 3.8558364862996796e-05, "epoch": 1.4680232558139534, "percentage": 20.97, "elapsed_time": "3:32:26", "remaining_time": "13:20:34"} +{"current_steps": 1015, "total_steps": 4816, "loss": 0.3007, "lr": 3.853122100240959e-05, "epoch": 1.4752906976744187, "percentage": 21.08, "elapsed_time": "3:33:29", "remaining_time": "13:19:28"} +{"current_steps": 1020, "total_steps": 4816, "loss": 0.2846, "lr": 3.850383371664219e-05, "epoch": 1.4825581395348837, "percentage": 21.18, "elapsed_time": "3:34:34", "remaining_time": "13:18:33"} +{"current_steps": 1025, "total_steps": 4816, "loss": 0.2943, "lr": 3.8476203365452607e-05, "epoch": 1.489825581395349, "percentage": 21.28, "elapsed_time": "3:35:31", "remaining_time": "13:17:06"} +{"current_steps": 1030, "total_steps": 4816, "loss": 0.2835, "lr": 3.8448330311791735e-05, "epoch": 1.497093023255814, "percentage": 21.39, "elapsed_time": "3:36:37", "remaining_time": "13:16:16"} +{"current_steps": 1035, "total_steps": 4816, "loss": 0.2776, "lr": 3.842021492179858e-05, "epoch": 1.504360465116279, "percentage": 21.49, "elapsed_time": "3:37:35", "remaining_time": "13:14:52"} +{"current_steps": 1040, "total_steps": 4816, "loss": 0.2699, "lr": 3.8391857564795494e-05, "epoch": 1.5116279069767442, "percentage": 21.59, "elapsed_time": "3:38:35", "remaining_time": "13:13:38"} +{"current_steps": 1045, "total_steps": 4816, "loss": 0.2993, "lr": 3.836325861328328e-05, "epoch": 1.5188953488372094, "percentage": 21.7, "elapsed_time": "3:39:36", "remaining_time": "13:12:30"} +{"current_steps": 1050, "total_steps": 4816, "loss": 0.288, "lr": 3.83344184429363e-05, "epoch": 1.5261627906976745, "percentage": 21.8, "elapsed_time": "3:40:43", "remaining_time": "13:11:40"} +{"current_steps": 1055, "total_steps": 4816, "loss": 0.2854, "lr": 3.83053374325976e-05, "epoch": 1.5334302325581395, "percentage": 21.91, "elapsed_time": "3:41:50", "remaining_time": "13:10:50"} +{"current_steps": 1060, "total_steps": 4816, "loss": 0.2798, "lr": 3.827601596427383e-05, "epoch": 1.5406976744186047, "percentage": 22.01, "elapsed_time": "3:42:54", "remaining_time": "13:09:51"} +{"current_steps": 1065, "total_steps": 4816, "loss": 0.2896, "lr": 3.824645442313033e-05, "epoch": 1.5479651162790697, "percentage": 22.11, "elapsed_time": "3:43:54", "remaining_time": "13:08:35"} +{"current_steps": 1070, "total_steps": 4816, "loss": 0.2821, "lr": 3.8216653197486004e-05, "epoch": 1.5552325581395348, "percentage": 22.22, "elapsed_time": "3:45:03", "remaining_time": "13:07:54"} +{"current_steps": 1075, "total_steps": 4816, "loss": 0.3081, "lr": 3.818661267880823e-05, "epoch": 1.5625, "percentage": 22.32, "elapsed_time": "3:46:08", "remaining_time": "13:06:58"} +{"current_steps": 1080, "total_steps": 4816, "loss": 0.2622, "lr": 3.815633326170776e-05, "epoch": 1.5697674418604652, "percentage": 22.43, "elapsed_time": "3:47:13", "remaining_time": "13:06:00"} +{"current_steps": 1085, "total_steps": 4816, "loss": 0.2884, "lr": 3.812581534393347e-05, "epoch": 1.5770348837209303, "percentage": 22.53, "elapsed_time": "3:48:18", "remaining_time": "13:05:05"} +{"current_steps": 1090, "total_steps": 4816, "loss": 0.2781, "lr": 3.809505932636717e-05, "epoch": 1.5843023255813953, "percentage": 22.63, "elapsed_time": "3:49:18", "remaining_time": "13:03:50"} +{"current_steps": 1095, "total_steps": 4816, "loss": 0.2836, "lr": 3.8064065613018376e-05, "epoch": 1.5915697674418605, "percentage": 22.74, "elapsed_time": "3:50:20", "remaining_time": "13:02:45"} +{"current_steps": 1100, "total_steps": 4816, "loss": 0.2766, "lr": 3.803283461101892e-05, "epoch": 1.5988372093023255, "percentage": 22.84, "elapsed_time": "3:51:22", "remaining_time": "13:01:38"} +{"current_steps": 1105, "total_steps": 4816, "loss": 0.2986, "lr": 3.800136673061765e-05, "epoch": 1.6061046511627906, "percentage": 22.94, "elapsed_time": "3:52:25", "remaining_time": "13:00:33"} +{"current_steps": 1110, "total_steps": 4816, "loss": 0.293, "lr": 3.796966238517508e-05, "epoch": 1.6133720930232558, "percentage": 23.05, "elapsed_time": "3:53:30", "remaining_time": "12:59:36"} +{"current_steps": 1115, "total_steps": 4816, "loss": 0.2996, "lr": 3.793772199115786e-05, "epoch": 1.620639534883721, "percentage": 23.15, "elapsed_time": "3:54:35", "remaining_time": "12:58:41"} +{"current_steps": 1120, "total_steps": 4816, "loss": 0.2827, "lr": 3.790554596813339e-05, "epoch": 1.627906976744186, "percentage": 23.26, "elapsed_time": "3:55:35", "remaining_time": "12:57:26"} +{"current_steps": 1125, "total_steps": 4816, "loss": 0.2793, "lr": 3.787313473876429e-05, "epoch": 1.635174418604651, "percentage": 23.36, "elapsed_time": "3:56:36", "remaining_time": "12:56:17"} +{"current_steps": 1130, "total_steps": 4816, "loss": 0.2726, "lr": 3.784048872880282e-05, "epoch": 1.6424418604651163, "percentage": 23.46, "elapsed_time": "3:57:35", "remaining_time": "12:55:00"} +{"current_steps": 1135, "total_steps": 4816, "loss": 0.2761, "lr": 3.780760836708531e-05, "epoch": 1.6497093023255816, "percentage": 23.57, "elapsed_time": "3:58:39", "remaining_time": "12:54:00"} +{"current_steps": 1140, "total_steps": 4816, "loss": 0.2861, "lr": 3.777449408552653e-05, "epoch": 1.6569767441860463, "percentage": 23.67, "elapsed_time": "3:59:42", "remaining_time": "12:52:57"} +{"current_steps": 1145, "total_steps": 4816, "loss": 0.2841, "lr": 3.774114631911401e-05, "epoch": 1.6642441860465116, "percentage": 23.77, "elapsed_time": "4:00:47", "remaining_time": "12:51:58"} +{"current_steps": 1150, "total_steps": 4816, "loss": 0.2746, "lr": 3.770756550590231e-05, "epoch": 1.6715116279069768, "percentage": 23.88, "elapsed_time": "4:01:50", "remaining_time": "12:50:58"} +{"current_steps": 1155, "total_steps": 4816, "loss": 0.2878, "lr": 3.767375208700729e-05, "epoch": 1.6787790697674418, "percentage": 23.98, "elapsed_time": "4:02:55", "remaining_time": "12:50:00"} +{"current_steps": 1160, "total_steps": 4816, "loss": 0.2849, "lr": 3.763970650660032e-05, "epoch": 1.6860465116279069, "percentage": 24.09, "elapsed_time": "4:03:53", "remaining_time": "12:48:42"} +{"current_steps": 1165, "total_steps": 4816, "loss": 0.2782, "lr": 3.76054292119024e-05, "epoch": 1.693313953488372, "percentage": 24.19, "elapsed_time": "4:04:54", "remaining_time": "12:47:30"} +{"current_steps": 1170, "total_steps": 4816, "loss": 0.3035, "lr": 3.7570920653178355e-05, "epoch": 1.7005813953488373, "percentage": 24.29, "elapsed_time": "4:05:49", "remaining_time": "12:46:03"} +{"current_steps": 1175, "total_steps": 4816, "loss": 0.2802, "lr": 3.753618128373086e-05, "epoch": 1.7078488372093024, "percentage": 24.4, "elapsed_time": "4:06:53", "remaining_time": "12:45:03"} +{"current_steps": 1180, "total_steps": 4816, "loss": 0.2834, "lr": 3.75012115598945e-05, "epoch": 1.7151162790697674, "percentage": 24.5, "elapsed_time": "4:08:02", "remaining_time": "12:44:17"} +{"current_steps": 1185, "total_steps": 4816, "loss": 0.2967, "lr": 3.7466011941029806e-05, "epoch": 1.7223837209302326, "percentage": 24.61, "elapsed_time": "4:09:04", "remaining_time": "12:43:11"} +{"current_steps": 1190, "total_steps": 4816, "loss": 0.2827, "lr": 3.743058288951719e-05, "epoch": 1.7296511627906976, "percentage": 24.71, "elapsed_time": "4:10:10", "remaining_time": "12:42:16"} +{"current_steps": 1195, "total_steps": 4816, "loss": 0.2999, "lr": 3.739492487075087e-05, "epoch": 1.7369186046511627, "percentage": 24.81, "elapsed_time": "4:11:19", "remaining_time": "12:41:31"} +{"current_steps": 1200, "total_steps": 4816, "loss": 0.2705, "lr": 3.735903835313278e-05, "epoch": 1.744186046511628, "percentage": 24.92, "elapsed_time": "4:12:28", "remaining_time": "12:40:47"} +{"current_steps": 1205, "total_steps": 4816, "loss": 0.3053, "lr": 3.7322923808066394e-05, "epoch": 1.7514534883720931, "percentage": 25.02, "elapsed_time": "4:13:29", "remaining_time": "12:39:37"} +{"current_steps": 1210, "total_steps": 4816, "loss": 0.2919, "lr": 3.728658170995055e-05, "epoch": 1.7587209302325582, "percentage": 25.12, "elapsed_time": "4:14:31", "remaining_time": "12:38:31"} +{"current_steps": 1215, "total_steps": 4816, "loss": 0.3008, "lr": 3.72500125361732e-05, "epoch": 1.7659883720930232, "percentage": 25.23, "elapsed_time": "4:15:35", "remaining_time": "12:37:30"} +{"current_steps": 1220, "total_steps": 4816, "loss": 0.2845, "lr": 3.7213216767105165e-05, "epoch": 1.7732558139534884, "percentage": 25.33, "elapsed_time": "4:16:40", "remaining_time": "12:36:32"} +{"current_steps": 1225, "total_steps": 4816, "loss": 0.2835, "lr": 3.71761948860938e-05, "epoch": 1.7805232558139537, "percentage": 25.44, "elapsed_time": "4:17:40", "remaining_time": "12:35:21"} +{"current_steps": 1230, "total_steps": 4816, "loss": 0.2722, "lr": 3.713894737945666e-05, "epoch": 1.7877906976744184, "percentage": 25.54, "elapsed_time": "4:18:48", "remaining_time": "12:34:33"} +{"current_steps": 1235, "total_steps": 4816, "loss": 0.2866, "lr": 3.71014747364751e-05, "epoch": 1.7950581395348837, "percentage": 25.64, "elapsed_time": "4:19:55", "remaining_time": "12:33:41"} +{"current_steps": 1240, "total_steps": 4816, "loss": 0.2745, "lr": 3.7063777449387875e-05, "epoch": 1.802325581395349, "percentage": 25.75, "elapsed_time": "4:20:58", "remaining_time": "12:32:36"} +{"current_steps": 1245, "total_steps": 4816, "loss": 0.278, "lr": 3.702585601338461e-05, "epoch": 1.809593023255814, "percentage": 25.85, "elapsed_time": "4:21:59", "remaining_time": "12:31:28"} +{"current_steps": 1250, "total_steps": 4816, "loss": 0.2868, "lr": 3.698771092659939e-05, "epoch": 1.816860465116279, "percentage": 25.96, "elapsed_time": "4:23:04", "remaining_time": "12:30:30"} +{"current_steps": 1255, "total_steps": 4816, "loss": 0.2821, "lr": 3.694934269010414e-05, "epoch": 1.8241279069767442, "percentage": 26.06, "elapsed_time": "4:24:09", "remaining_time": "12:29:31"} +{"current_steps": 1260, "total_steps": 4816, "loss": 0.2845, "lr": 3.691075180790207e-05, "epoch": 1.8313953488372094, "percentage": 26.16, "elapsed_time": "4:25:13", "remaining_time": "12:28:31"} +{"current_steps": 1265, "total_steps": 4816, "loss": 0.2958, "lr": 3.6871938786921044e-05, "epoch": 1.8386627906976745, "percentage": 26.27, "elapsed_time": "4:26:13", "remaining_time": "12:27:20"} +{"current_steps": 1270, "total_steps": 4816, "loss": 0.2981, "lr": 3.683290413700695e-05, "epoch": 1.8459302325581395, "percentage": 26.37, "elapsed_time": "4:27:17", "remaining_time": "12:26:17"} +{"current_steps": 1275, "total_steps": 4816, "loss": 0.2781, "lr": 3.679364837091696e-05, "epoch": 1.8531976744186047, "percentage": 26.47, "elapsed_time": "4:28:21", "remaining_time": "12:25:18"} +{"current_steps": 1280, "total_steps": 4816, "loss": 0.2835, "lr": 3.675417200431284e-05, "epoch": 1.8604651162790697, "percentage": 26.58, "elapsed_time": "4:29:25", "remaining_time": "12:24:18"} +{"current_steps": 1285, "total_steps": 4816, "loss": 0.2816, "lr": 3.671447555575413e-05, "epoch": 1.8677325581395348, "percentage": 26.68, "elapsed_time": "4:30:28", "remaining_time": "12:23:13"} +{"current_steps": 1290, "total_steps": 4816, "loss": 0.2863, "lr": 3.667455954669138e-05, "epoch": 1.875, "percentage": 26.79, "elapsed_time": "4:31:29", "remaining_time": "12:22:04"} +{"current_steps": 1295, "total_steps": 4816, "loss": 0.2895, "lr": 3.663442450145926e-05, "epoch": 1.8822674418604652, "percentage": 26.89, "elapsed_time": "4:32:29", "remaining_time": "12:20:52"} +{"current_steps": 1300, "total_steps": 4816, "loss": 0.2821, "lr": 3.6594070947269675e-05, "epoch": 1.8895348837209303, "percentage": 26.99, "elapsed_time": "4:33:29", "remaining_time": "12:19:40"} +{"current_steps": 1305, "total_steps": 4816, "loss": 0.2722, "lr": 3.65534994142049e-05, "epoch": 1.8968023255813953, "percentage": 27.1, "elapsed_time": "4:34:31", "remaining_time": "12:18:35"} +{"current_steps": 1310, "total_steps": 4816, "loss": 0.2936, "lr": 3.6512710435210496e-05, "epoch": 1.9040697674418605, "percentage": 27.2, "elapsed_time": "4:35:34", "remaining_time": "12:17:30"} +{"current_steps": 1315, "total_steps": 4816, "loss": 0.3016, "lr": 3.647170454608846e-05, "epoch": 1.9113372093023255, "percentage": 27.3, "elapsed_time": "4:36:38", "remaining_time": "12:16:29"} +{"current_steps": 1320, "total_steps": 4816, "loss": 0.2843, "lr": 3.643048228549006e-05, "epoch": 1.9186046511627906, "percentage": 27.41, "elapsed_time": "4:37:41", "remaining_time": "12:15:26"} +{"current_steps": 1325, "total_steps": 4816, "loss": 0.3061, "lr": 3.638904419490882e-05, "epoch": 1.9258720930232558, "percentage": 27.51, "elapsed_time": "4:38:47", "remaining_time": "12:14:31"} +{"current_steps": 1330, "total_steps": 4816, "loss": 0.2964, "lr": 3.6347390818673406e-05, "epoch": 1.933139534883721, "percentage": 27.62, "elapsed_time": "4:39:50", "remaining_time": "12:13:29"} +{"current_steps": 1335, "total_steps": 4816, "loss": 0.2857, "lr": 3.6305522703940446e-05, "epoch": 1.940406976744186, "percentage": 27.72, "elapsed_time": "4:40:51", "remaining_time": "12:12:21"} +{"current_steps": 1340, "total_steps": 4816, "loss": 0.2818, "lr": 3.626344040068738e-05, "epoch": 1.947674418604651, "percentage": 27.82, "elapsed_time": "4:41:52", "remaining_time": "12:11:11"} +{"current_steps": 1345, "total_steps": 4816, "loss": 0.2838, "lr": 3.622114446170522e-05, "epoch": 1.9549418604651163, "percentage": 27.93, "elapsed_time": "4:42:56", "remaining_time": "12:10:09"} +{"current_steps": 1350, "total_steps": 4816, "loss": 0.2717, "lr": 3.617863544259127e-05, "epoch": 1.9622093023255816, "percentage": 28.03, "elapsed_time": "4:44:01", "remaining_time": "12:09:12"} +{"current_steps": 1355, "total_steps": 4816, "loss": 0.2658, "lr": 3.613591390174185e-05, "epoch": 1.9694767441860463, "percentage": 28.14, "elapsed_time": "4:45:04", "remaining_time": "12:08:08"} +{"current_steps": 1360, "total_steps": 4816, "loss": 0.2737, "lr": 3.609298040034496e-05, "epoch": 1.9767441860465116, "percentage": 28.24, "elapsed_time": "4:46:02", "remaining_time": "12:06:52"} +{"current_steps": 1365, "total_steps": 4816, "loss": 0.2996, "lr": 3.6049835502372894e-05, "epoch": 1.9840116279069768, "percentage": 28.34, "elapsed_time": "4:47:02", "remaining_time": "12:05:40"} +{"current_steps": 1370, "total_steps": 4816, "loss": 0.2752, "lr": 3.6006479774574846e-05, "epoch": 1.9912790697674418, "percentage": 28.45, "elapsed_time": "4:48:01", "remaining_time": "12:04:29"} +{"current_steps": 1375, "total_steps": 4816, "loss": 0.276, "lr": 3.596291378646947e-05, "epoch": 1.9985465116279069, "percentage": 28.55, "elapsed_time": "4:49:04", "remaining_time": "12:03:26"} +{"current_steps": 1380, "total_steps": 4816, "loss": 0.2784, "lr": 3.591913811033736e-05, "epoch": 2.005813953488372, "percentage": 28.65, "elapsed_time": "4:50:07", "remaining_time": "12:02:21"} +{"current_steps": 1385, "total_steps": 4816, "loss": 0.2654, "lr": 3.58751533212136e-05, "epoch": 2.0130813953488373, "percentage": 28.76, "elapsed_time": "4:51:10", "remaining_time": "12:01:18"} +{"current_steps": 1390, "total_steps": 4816, "loss": 0.263, "lr": 3.5830959996880134e-05, "epoch": 2.020348837209302, "percentage": 28.86, "elapsed_time": "4:52:16", "remaining_time": "12:00:23"} +{"current_steps": 1395, "total_steps": 4816, "loss": 0.2709, "lr": 3.578655871785824e-05, "epoch": 2.0276162790697674, "percentage": 28.97, "elapsed_time": "4:53:25", "remaining_time": "11:59:35"} +{"current_steps": 1400, "total_steps": 4816, "loss": 0.2538, "lr": 3.574195006740087e-05, "epoch": 2.0348837209302326, "percentage": 29.07, "elapsed_time": "4:54:28", "remaining_time": "11:58:31"} +{"current_steps": 1405, "total_steps": 4816, "loss": 0.2547, "lr": 3.5697134631485e-05, "epoch": 2.042151162790698, "percentage": 29.17, "elapsed_time": "4:55:33", "remaining_time": "11:57:31"} +{"current_steps": 1410, "total_steps": 4816, "loss": 0.2763, "lr": 3.5652112998803914e-05, "epoch": 2.0494186046511627, "percentage": 29.28, "elapsed_time": "4:56:32", "remaining_time": "11:56:19"} +{"current_steps": 1415, "total_steps": 4816, "loss": 0.2538, "lr": 3.560688576075951e-05, "epoch": 2.056686046511628, "percentage": 29.38, "elapsed_time": "4:57:36", "remaining_time": "11:55:17"} +{"current_steps": 1420, "total_steps": 4816, "loss": 0.278, "lr": 3.5561453511454485e-05, "epoch": 2.063953488372093, "percentage": 29.49, "elapsed_time": "4:58:44", "remaining_time": "11:54:27"} +{"current_steps": 1425, "total_steps": 4816, "loss": 0.2647, "lr": 3.551581684768457e-05, "epoch": 2.071220930232558, "percentage": 29.59, "elapsed_time": "4:59:46", "remaining_time": "11:53:21"} +{"current_steps": 1430, "total_steps": 4816, "loss": 0.2699, "lr": 3.546997636893067e-05, "epoch": 2.078488372093023, "percentage": 29.69, "elapsed_time": "5:00:43", "remaining_time": "11:52:04"} +{"current_steps": 1435, "total_steps": 4816, "loss": 0.2725, "lr": 3.542393267735098e-05, "epoch": 2.0857558139534884, "percentage": 29.8, "elapsed_time": "5:01:46", "remaining_time": "11:51:01"} +{"current_steps": 1440, "total_steps": 4816, "loss": 0.2795, "lr": 3.537768637777312e-05, "epoch": 2.0930232558139537, "percentage": 29.9, "elapsed_time": "5:02:52", "remaining_time": "11:50:05"} +{"current_steps": 1445, "total_steps": 4816, "loss": 0.2609, "lr": 3.533123807768612e-05, "epoch": 2.1002906976744184, "percentage": 30.0, "elapsed_time": "5:03:57", "remaining_time": "11:49:06"} +{"current_steps": 1450, "total_steps": 4816, "loss": 0.2708, "lr": 3.52845883872325e-05, "epoch": 2.1075581395348837, "percentage": 30.11, "elapsed_time": "5:05:03", "remaining_time": "11:48:08"} +{"current_steps": 1455, "total_steps": 4816, "loss": 0.2636, "lr": 3.523773791920023e-05, "epoch": 2.114825581395349, "percentage": 30.21, "elapsed_time": "5:06:06", "remaining_time": "11:47:04"} +{"current_steps": 1460, "total_steps": 4816, "loss": 0.267, "lr": 3.51906872890147e-05, "epoch": 2.1220930232558137, "percentage": 30.32, "elapsed_time": "5:07:06", "remaining_time": "11:45:56"} +{"current_steps": 1465, "total_steps": 4816, "loss": 0.2655, "lr": 3.514343711473058e-05, "epoch": 2.129360465116279, "percentage": 30.42, "elapsed_time": "5:08:03", "remaining_time": "11:44:37"} +{"current_steps": 1470, "total_steps": 4816, "loss": 0.2563, "lr": 3.509598801702378e-05, "epoch": 2.136627906976744, "percentage": 30.52, "elapsed_time": "5:09:03", "remaining_time": "11:43:29"} +{"current_steps": 1475, "total_steps": 4816, "loss": 0.2591, "lr": 3.504834061918324e-05, "epoch": 2.1438953488372094, "percentage": 30.63, "elapsed_time": "5:10:06", "remaining_time": "11:42:26"} +{"current_steps": 1480, "total_steps": 4816, "loss": 0.2664, "lr": 3.5000495547102766e-05, "epoch": 2.1511627906976742, "percentage": 30.73, "elapsed_time": "5:11:08", "remaining_time": "11:41:19"} +{"current_steps": 1485, "total_steps": 4816, "loss": 0.2625, "lr": 3.49524534292728e-05, "epoch": 2.1584302325581395, "percentage": 30.83, "elapsed_time": "5:12:10", "remaining_time": "11:40:15"} +{"current_steps": 1490, "total_steps": 4816, "loss": 0.2644, "lr": 3.490421489677217e-05, "epoch": 2.1656976744186047, "percentage": 30.94, "elapsed_time": "5:13:12", "remaining_time": "11:39:08"} +{"current_steps": 1495, "total_steps": 4816, "loss": 0.2737, "lr": 3.485578058325979e-05, "epoch": 2.17296511627907, "percentage": 31.04, "elapsed_time": "5:14:15", "remaining_time": "11:38:05"} +{"current_steps": 1500, "total_steps": 4816, "loss": 0.2628, "lr": 3.480715112496634e-05, "epoch": 2.1802325581395348, "percentage": 31.15, "elapsed_time": "5:15:19", "remaining_time": "11:37:05"} +{"current_steps": 1505, "total_steps": 4816, "loss": 0.2624, "lr": 3.475832716068595e-05, "epoch": 2.1875, "percentage": 31.25, "elapsed_time": "5:17:00", "remaining_time": "11:37:24"} +{"current_steps": 1510, "total_steps": 4816, "loss": 0.2595, "lr": 3.47093093317677e-05, "epoch": 2.1947674418604652, "percentage": 31.35, "elapsed_time": "5:18:06", "remaining_time": "11:36:27"} +{"current_steps": 1515, "total_steps": 4816, "loss": 0.2653, "lr": 3.4660098282107344e-05, "epoch": 2.20203488372093, "percentage": 31.46, "elapsed_time": "5:19:06", "remaining_time": "11:35:17"} +{"current_steps": 1520, "total_steps": 4816, "loss": 0.2725, "lr": 3.461069465813871e-05, "epoch": 2.2093023255813953, "percentage": 31.56, "elapsed_time": "5:20:04", "remaining_time": "11:34:03"} +{"current_steps": 1525, "total_steps": 4816, "loss": 0.2703, "lr": 3.4561099108825323e-05, "epoch": 2.2165697674418605, "percentage": 31.67, "elapsed_time": "5:21:05", "remaining_time": "11:32:55"} +{"current_steps": 1530, "total_steps": 4816, "loss": 0.2625, "lr": 3.451131228565179e-05, "epoch": 2.2238372093023258, "percentage": 31.77, "elapsed_time": "5:22:08", "remaining_time": "11:31:51"} +{"current_steps": 1535, "total_steps": 4816, "loss": 0.276, "lr": 3.446133484261529e-05, "epoch": 2.2311046511627906, "percentage": 31.87, "elapsed_time": "5:23:09", "remaining_time": "11:30:43"} +{"current_steps": 1540, "total_steps": 4816, "loss": 0.2742, "lr": 3.4411167436217004e-05, "epoch": 2.238372093023256, "percentage": 31.98, "elapsed_time": "5:24:16", "remaining_time": "11:29:49"} +{"current_steps": 1545, "total_steps": 4816, "loss": 0.2636, "lr": 3.436081072545343e-05, "epoch": 2.245639534883721, "percentage": 32.08, "elapsed_time": "5:25:19", "remaining_time": "11:28:46"} +{"current_steps": 1550, "total_steps": 4816, "loss": 0.276, "lr": 3.4310265371807775e-05, "epoch": 2.2529069767441863, "percentage": 32.18, "elapsed_time": "5:26:20", "remaining_time": "11:27:37"} +{"current_steps": 1555, "total_steps": 4816, "loss": 0.2708, "lr": 3.4259532039241234e-05, "epoch": 2.260174418604651, "percentage": 32.29, "elapsed_time": "5:27:19", "remaining_time": "11:26:25"} +{"current_steps": 1560, "total_steps": 4816, "loss": 0.2662, "lr": 3.420861139418429e-05, "epoch": 2.2674418604651163, "percentage": 32.39, "elapsed_time": "5:28:23", "remaining_time": "11:25:25"} +{"current_steps": 1565, "total_steps": 4816, "loss": 0.2667, "lr": 3.4157504105527976e-05, "epoch": 2.2747093023255816, "percentage": 32.5, "elapsed_time": "5:29:28", "remaining_time": "11:24:25"} +{"current_steps": 1570, "total_steps": 4816, "loss": 0.2877, "lr": 3.410621084461503e-05, "epoch": 2.2819767441860463, "percentage": 32.6, "elapsed_time": "5:30:34", "remaining_time": "11:23:27"} +{"current_steps": 1575, "total_steps": 4816, "loss": 0.2639, "lr": 3.405473228523114e-05, "epoch": 2.2892441860465116, "percentage": 32.7, "elapsed_time": "5:31:30", "remaining_time": "11:22:09"} +{"current_steps": 1580, "total_steps": 4816, "loss": 0.2711, "lr": 3.4003069103596034e-05, "epoch": 2.296511627906977, "percentage": 32.81, "elapsed_time": "5:32:38", "remaining_time": "11:21:16"} +{"current_steps": 1585, "total_steps": 4816, "loss": 0.2722, "lr": 3.395122197835467e-05, "epoch": 2.303779069767442, "percentage": 32.91, "elapsed_time": "5:33:37", "remaining_time": "11:20:05"} +{"current_steps": 1590, "total_steps": 4816, "loss": 0.2656, "lr": 3.389919159056825e-05, "epoch": 2.311046511627907, "percentage": 33.01, "elapsed_time": "5:34:42", "remaining_time": "11:19:06"} +{"current_steps": 1595, "total_steps": 4816, "loss": 0.2731, "lr": 3.384697862370531e-05, "epoch": 2.318313953488372, "percentage": 33.12, "elapsed_time": "5:35:42", "remaining_time": "11:17:56"} +{"current_steps": 1600, "total_steps": 4816, "loss": 0.2588, "lr": 3.379458376363274e-05, "epoch": 2.3255813953488373, "percentage": 33.22, "elapsed_time": "5:36:44", "remaining_time": "11:16:51"} +{"current_steps": 1605, "total_steps": 4816, "loss": 0.2614, "lr": 3.374200769860676e-05, "epoch": 2.332848837209302, "percentage": 33.33, "elapsed_time": "5:37:47", "remaining_time": "11:15:47"} +{"current_steps": 1610, "total_steps": 4816, "loss": 0.2749, "lr": 3.368925111926391e-05, "epoch": 2.3401162790697674, "percentage": 33.43, "elapsed_time": "5:38:47", "remaining_time": "11:14:39"} +{"current_steps": 1615, "total_steps": 4816, "loss": 0.2697, "lr": 3.363631471861194e-05, "epoch": 2.3473837209302326, "percentage": 33.53, "elapsed_time": "5:39:50", "remaining_time": "11:13:34"} +{"current_steps": 1620, "total_steps": 4816, "loss": 0.2671, "lr": 3.358319919202071e-05, "epoch": 2.354651162790698, "percentage": 33.64, "elapsed_time": "5:40:54", "remaining_time": "11:12:33"} +{"current_steps": 1625, "total_steps": 4816, "loss": 0.2756, "lr": 3.35299052372131e-05, "epoch": 2.3619186046511627, "percentage": 33.74, "elapsed_time": "5:41:55", "remaining_time": "11:11:26"} +{"current_steps": 1630, "total_steps": 4816, "loss": 0.2824, "lr": 3.3476433554255806e-05, "epoch": 2.369186046511628, "percentage": 33.85, "elapsed_time": "5:42:59", "remaining_time": "11:10:25"} +{"current_steps": 1635, "total_steps": 4816, "loss": 0.2686, "lr": 3.342278484555014e-05, "epoch": 2.376453488372093, "percentage": 33.95, "elapsed_time": "5:43:58", "remaining_time": "11:09:13"} +{"current_steps": 1640, "total_steps": 4816, "loss": 0.2863, "lr": 3.336895981582282e-05, "epoch": 2.383720930232558, "percentage": 34.05, "elapsed_time": "5:44:58", "remaining_time": "11:08:03"} +{"current_steps": 1645, "total_steps": 4816, "loss": 0.2655, "lr": 3.3314959172116705e-05, "epoch": 2.390988372093023, "percentage": 34.16, "elapsed_time": "5:46:07", "remaining_time": "11:07:12"} +{"current_steps": 1650, "total_steps": 4816, "loss": 0.2635, "lr": 3.326078362378152e-05, "epoch": 2.3982558139534884, "percentage": 34.26, "elapsed_time": "5:47:13", "remaining_time": "11:06:15"} +{"current_steps": 1655, "total_steps": 4816, "loss": 0.255, "lr": 3.320643388246452e-05, "epoch": 2.4055232558139537, "percentage": 34.36, "elapsed_time": "5:48:18", "remaining_time": "11:05:15"} +{"current_steps": 1660, "total_steps": 4816, "loss": 0.2727, "lr": 3.315191066210117e-05, "epoch": 2.4127906976744184, "percentage": 34.47, "elapsed_time": "5:49:19", "remaining_time": "11:04:08"} +{"current_steps": 1665, "total_steps": 4816, "loss": 0.2738, "lr": 3.309721467890571e-05, "epoch": 2.4200581395348837, "percentage": 34.57, "elapsed_time": "5:50:21", "remaining_time": "11:03:03"} +{"current_steps": 1670, "total_steps": 4816, "loss": 0.264, "lr": 3.3042346651361804e-05, "epoch": 2.427325581395349, "percentage": 34.68, "elapsed_time": "5:51:26", "remaining_time": "11:02:02"} +{"current_steps": 1675, "total_steps": 4816, "loss": 0.2645, "lr": 3.298730730021309e-05, "epoch": 2.4345930232558137, "percentage": 34.78, "elapsed_time": "5:52:28", "remaining_time": "11:00:57"} +{"current_steps": 1680, "total_steps": 4816, "loss": 0.2736, "lr": 3.2932097348453696e-05, "epoch": 2.441860465116279, "percentage": 34.88, "elapsed_time": "5:53:29", "remaining_time": "10:59:50"} +{"current_steps": 1685, "total_steps": 4816, "loss": 0.264, "lr": 3.287671752131875e-05, "epoch": 2.449127906976744, "percentage": 34.99, "elapsed_time": "5:54:32", "remaining_time": "10:58:48"} +{"current_steps": 1690, "total_steps": 4816, "loss": 0.2756, "lr": 3.282116854627485e-05, "epoch": 2.4563953488372094, "percentage": 35.09, "elapsed_time": "5:55:41", "remaining_time": "10:57:55"} +{"current_steps": 1695, "total_steps": 4816, "loss": 0.2739, "lr": 3.276545115301053e-05, "epoch": 2.4636627906976742, "percentage": 35.2, "elapsed_time": "5:56:45", "remaining_time": "10:56:54"} +{"current_steps": 1700, "total_steps": 4816, "loss": 0.2682, "lr": 3.270956607342663e-05, "epoch": 2.4709302325581395, "percentage": 35.3, "elapsed_time": "5:57:50", "remaining_time": "10:55:54"} +{"current_steps": 1705, "total_steps": 4816, "loss": 0.2761, "lr": 3.265351404162673e-05, "epoch": 2.4781976744186047, "percentage": 35.4, "elapsed_time": "5:58:57", "remaining_time": "10:54:58"} +{"current_steps": 1710, "total_steps": 4816, "loss": 0.2635, "lr": 3.259729579390749e-05, "epoch": 2.4854651162790695, "percentage": 35.51, "elapsed_time": "5:59:56", "remaining_time": "10:53:48"} +{"current_steps": 1715, "total_steps": 4816, "loss": 0.276, "lr": 3.254091206874895e-05, "epoch": 2.4927325581395348, "percentage": 35.61, "elapsed_time": "6:00:52", "remaining_time": "10:52:30"} +{"current_steps": 1720, "total_steps": 4816, "loss": 0.2559, "lr": 3.248436360680487e-05, "epoch": 2.5, "percentage": 35.71, "elapsed_time": "6:01:53", "remaining_time": "10:51:25"} +{"current_steps": 1725, "total_steps": 4816, "loss": 0.2675, "lr": 3.2427651150892984e-05, "epoch": 2.5072674418604652, "percentage": 35.82, "elapsed_time": "6:02:57", "remaining_time": "10:50:22"} +{"current_steps": 1730, "total_steps": 4816, "loss": 0.2744, "lr": 3.237077544598524e-05, "epoch": 2.5145348837209305, "percentage": 35.92, "elapsed_time": "6:04:03", "remaining_time": "10:49:24"} +{"current_steps": 1735, "total_steps": 4816, "loss": 0.2737, "lr": 3.2313737239198015e-05, "epoch": 2.5218023255813953, "percentage": 36.03, "elapsed_time": "6:05:06", "remaining_time": "10:48:21"} +{"current_steps": 1740, "total_steps": 4816, "loss": 0.2661, "lr": 3.22565372797823e-05, "epoch": 2.5290697674418605, "percentage": 36.13, "elapsed_time": "6:06:08", "remaining_time": "10:47:15"} +{"current_steps": 1745, "total_steps": 4816, "loss": 0.2655, "lr": 3.219917631911387e-05, "epoch": 2.5363372093023253, "percentage": 36.23, "elapsed_time": "6:07:10", "remaining_time": "10:46:10"} +{"current_steps": 1750, "total_steps": 4816, "loss": 0.2567, "lr": 3.2141655110683396e-05, "epoch": 2.5436046511627906, "percentage": 36.34, "elapsed_time": "6:08:03", "remaining_time": "10:44:49"} +{"current_steps": 1755, "total_steps": 4816, "loss": 0.2611, "lr": 3.208397441008655e-05, "epoch": 2.550872093023256, "percentage": 36.44, "elapsed_time": "6:09:04", "remaining_time": "10:43:44"} +{"current_steps": 1760, "total_steps": 4816, "loss": 0.2708, "lr": 3.20261349750141e-05, "epoch": 2.558139534883721, "percentage": 36.54, "elapsed_time": "6:10:07", "remaining_time": "10:42:40"} +{"current_steps": 1765, "total_steps": 4816, "loss": 0.2718, "lr": 3.1968137565241936e-05, "epoch": 2.5654069767441863, "percentage": 36.65, "elapsed_time": "6:11:13", "remaining_time": "10:41:41"} +{"current_steps": 1770, "total_steps": 4816, "loss": 0.2613, "lr": 3.1909982942621085e-05, "epoch": 2.572674418604651, "percentage": 36.75, "elapsed_time": "6:12:18", "remaining_time": "10:40:42"} +{"current_steps": 1775, "total_steps": 4816, "loss": 0.2698, "lr": 3.185167187106774e-05, "epoch": 2.5799418604651163, "percentage": 36.86, "elapsed_time": "6:13:22", "remaining_time": "10:39:41"} +{"current_steps": 1780, "total_steps": 4816, "loss": 0.2575, "lr": 3.179320511655317e-05, "epoch": 2.5872093023255816, "percentage": 36.96, "elapsed_time": "6:14:27", "remaining_time": "10:38:40"} +{"current_steps": 1785, "total_steps": 4816, "loss": 0.2739, "lr": 3.1734583447093696e-05, "epoch": 2.5944767441860463, "percentage": 37.06, "elapsed_time": "6:15:29", "remaining_time": "10:37:36"} +{"current_steps": 1790, "total_steps": 4816, "loss": 0.2731, "lr": 3.167580763274061e-05, "epoch": 2.6017441860465116, "percentage": 37.17, "elapsed_time": "6:16:35", "remaining_time": "10:36:37"} +{"current_steps": 1795, "total_steps": 4816, "loss": 0.2573, "lr": 3.161687844557002e-05, "epoch": 2.609011627906977, "percentage": 37.27, "elapsed_time": "6:17:35", "remaining_time": "10:35:29"} +{"current_steps": 1800, "total_steps": 4816, "loss": 0.2628, "lr": 3.155779665967275e-05, "epoch": 2.616279069767442, "percentage": 37.38, "elapsed_time": "6:18:34", "remaining_time": "10:34:18"} +{"current_steps": 1805, "total_steps": 4816, "loss": 0.2543, "lr": 3.149856305114416e-05, "epoch": 2.623546511627907, "percentage": 37.48, "elapsed_time": "6:19:37", "remaining_time": "10:33:16"} +{"current_steps": 1810, "total_steps": 4816, "loss": 0.2561, "lr": 3.1439178398073896e-05, "epoch": 2.630813953488372, "percentage": 37.58, "elapsed_time": "6:20:39", "remaining_time": "10:32:11"} +{"current_steps": 1815, "total_steps": 4816, "loss": 0.2704, "lr": 3.137964348053578e-05, "epoch": 2.6380813953488373, "percentage": 37.69, "elapsed_time": "6:21:45", "remaining_time": "10:31:12"} +{"current_steps": 1820, "total_steps": 4816, "loss": 0.2818, "lr": 3.1319959080577464e-05, "epoch": 2.645348837209302, "percentage": 37.79, "elapsed_time": "6:22:45", "remaining_time": "10:30:05"} +{"current_steps": 1825, "total_steps": 4816, "loss": 0.2582, "lr": 3.12601259822102e-05, "epoch": 2.6526162790697674, "percentage": 37.89, "elapsed_time": "6:23:52", "remaining_time": "10:29:07"} +{"current_steps": 1830, "total_steps": 4816, "loss": 0.2663, "lr": 3.120014497139853e-05, "epoch": 2.6598837209302326, "percentage": 38.0, "elapsed_time": "6:24:53", "remaining_time": "10:28:02"} +{"current_steps": 1835, "total_steps": 4816, "loss": 0.2655, "lr": 3.114001683604999e-05, "epoch": 2.667151162790698, "percentage": 38.1, "elapsed_time": "6:25:53", "remaining_time": "10:26:53"} +{"current_steps": 1840, "total_steps": 4816, "loss": 0.252, "lr": 3.1079742366004713e-05, "epoch": 2.6744186046511627, "percentage": 38.21, "elapsed_time": "6:26:57", "remaining_time": "10:25:51"} +{"current_steps": 1845, "total_steps": 4816, "loss": 0.2685, "lr": 3.101932235302508e-05, "epoch": 2.681686046511628, "percentage": 38.31, "elapsed_time": "6:27:55", "remaining_time": "10:24:40"} +{"current_steps": 1850, "total_steps": 4816, "loss": 0.2567, "lr": 3.095875759078532e-05, "epoch": 2.688953488372093, "percentage": 38.41, "elapsed_time": "6:28:52", "remaining_time": "10:23:27"} +{"current_steps": 1855, "total_steps": 4816, "loss": 0.2695, "lr": 3.089804887486109e-05, "epoch": 2.696220930232558, "percentage": 38.52, "elapsed_time": "6:29:59", "remaining_time": "10:22:30"} +{"current_steps": 1860, "total_steps": 4816, "loss": 0.2699, "lr": 3.083719700271899e-05, "epoch": 2.703488372093023, "percentage": 38.62, "elapsed_time": "6:31:05", "remaining_time": "10:21:32"} +{"current_steps": 1865, "total_steps": 4816, "loss": 0.2613, "lr": 3.0776202773706136e-05, "epoch": 2.7107558139534884, "percentage": 38.73, "elapsed_time": "6:32:09", "remaining_time": "10:20:31"} +{"current_steps": 1870, "total_steps": 4816, "loss": 0.2583, "lr": 3.0715066989039634e-05, "epoch": 2.7180232558139537, "percentage": 38.83, "elapsed_time": "6:33:15", "remaining_time": "10:19:32"} +{"current_steps": 1875, "total_steps": 4816, "loss": 0.2681, "lr": 3.0653790451796065e-05, "epoch": 2.7252906976744184, "percentage": 38.93, "elapsed_time": "6:34:19", "remaining_time": "10:18:30"} +{"current_steps": 1880, "total_steps": 4816, "loss": 0.2623, "lr": 3.05923739669009e-05, "epoch": 2.7325581395348837, "percentage": 39.04, "elapsed_time": "6:35:19", "remaining_time": "10:17:23"} +{"current_steps": 1885, "total_steps": 4816, "loss": 0.2822, "lr": 3.0530818341117974e-05, "epoch": 2.739825581395349, "percentage": 39.14, "elapsed_time": "6:36:24", "remaining_time": "10:16:23"} +{"current_steps": 1890, "total_steps": 4816, "loss": 0.2689, "lr": 3.046912438303887e-05, "epoch": 2.7470930232558137, "percentage": 39.24, "elapsed_time": "6:37:25", "remaining_time": "10:15:16"} +{"current_steps": 1895, "total_steps": 4816, "loss": 0.2767, "lr": 3.040729290307231e-05, "epoch": 2.754360465116279, "percentage": 39.35, "elapsed_time": "6:38:30", "remaining_time": "10:14:16"} +{"current_steps": 1900, "total_steps": 4816, "loss": 0.2717, "lr": 3.0345324713433454e-05, "epoch": 2.761627906976744, "percentage": 39.45, "elapsed_time": "6:39:26", "remaining_time": "10:13:02"} +{"current_steps": 1905, "total_steps": 4816, "loss": 0.2729, "lr": 3.0283220628133328e-05, "epoch": 2.7688953488372094, "percentage": 39.56, "elapsed_time": "6:40:24", "remaining_time": "10:11:51"} +{"current_steps": 1910, "total_steps": 4816, "loss": 0.2735, "lr": 3.0220981462968038e-05, "epoch": 2.7761627906976747, "percentage": 39.66, "elapsed_time": "6:41:30", "remaining_time": "10:10:52"} +{"current_steps": 1915, "total_steps": 4816, "loss": 0.2646, "lr": 3.0158608035508107e-05, "epoch": 2.7834302325581395, "percentage": 39.76, "elapsed_time": "6:42:30", "remaining_time": "10:09:44"} +{"current_steps": 1920, "total_steps": 4816, "loss": 0.2628, "lr": 3.0096101165087715e-05, "epoch": 2.7906976744186047, "percentage": 39.87, "elapsed_time": "6:43:35", "remaining_time": "10:08:44"} +{"current_steps": 1925, "total_steps": 4816, "loss": 0.2672, "lr": 3.0033461672793946e-05, "epoch": 2.7979651162790695, "percentage": 39.97, "elapsed_time": "6:44:35", "remaining_time": "10:07:37"} +{"current_steps": 1930, "total_steps": 4816, "loss": 0.2502, "lr": 2.9970690381456e-05, "epoch": 2.8052325581395348, "percentage": 40.07, "elapsed_time": "6:45:33", "remaining_time": "10:06:26"} +{"current_steps": 1935, "total_steps": 4816, "loss": 0.2541, "lr": 2.990778811563438e-05, "epoch": 2.8125, "percentage": 40.18, "elapsed_time": "6:46:30", "remaining_time": "10:05:15"} +{"current_steps": 1940, "total_steps": 4816, "loss": 0.2631, "lr": 2.984475570161005e-05, "epoch": 2.8197674418604652, "percentage": 40.28, "elapsed_time": "6:47:32", "remaining_time": "10:04:10"} +{"current_steps": 1945, "total_steps": 4816, "loss": 0.2622, "lr": 2.978159396737363e-05, "epoch": 2.8270348837209305, "percentage": 40.39, "elapsed_time": "6:48:34", "remaining_time": "10:03:05"} +{"current_steps": 1950, "total_steps": 4816, "loss": 0.2545, "lr": 2.9718303742614437e-05, "epoch": 2.8343023255813953, "percentage": 40.49, "elapsed_time": "6:49:39", "remaining_time": "10:02:05"} +{"current_steps": 1955, "total_steps": 4816, "loss": 0.2585, "lr": 2.9654885858709678e-05, "epoch": 2.8415697674418605, "percentage": 40.59, "elapsed_time": "6:50:42", "remaining_time": "10:01:01"} +{"current_steps": 1960, "total_steps": 4816, "loss": 0.2654, "lr": 2.9591341148713444e-05, "epoch": 2.8488372093023253, "percentage": 40.7, "elapsed_time": "6:51:47", "remaining_time": "10:00:03"} +{"current_steps": 1965, "total_steps": 4816, "loss": 0.269, "lr": 2.952767044734584e-05, "epoch": 2.8561046511627906, "percentage": 40.8, "elapsed_time": "6:52:45", "remaining_time": "9:58:51"} +{"current_steps": 1970, "total_steps": 4816, "loss": 0.2542, "lr": 2.946387459098196e-05, "epoch": 2.863372093023256, "percentage": 40.91, "elapsed_time": "6:53:50", "remaining_time": "9:57:52"} +{"current_steps": 1975, "total_steps": 4816, "loss": 0.265, "lr": 2.9399954417640956e-05, "epoch": 2.870639534883721, "percentage": 41.01, "elapsed_time": "6:54:51", "remaining_time": "9:56:45"} +{"current_steps": 1980, "total_steps": 4816, "loss": 0.2777, "lr": 2.9335910766974977e-05, "epoch": 2.8779069767441863, "percentage": 41.11, "elapsed_time": "6:55:56", "remaining_time": "9:55:46"} +{"current_steps": 1985, "total_steps": 4816, "loss": 0.2529, "lr": 2.9271744480258174e-05, "epoch": 2.885174418604651, "percentage": 41.22, "elapsed_time": "6:56:59", "remaining_time": "9:54:43"} +{"current_steps": 1990, "total_steps": 4816, "loss": 0.2762, "lr": 2.9207456400375646e-05, "epoch": 2.8924418604651163, "percentage": 41.32, "elapsed_time": "6:57:59", "remaining_time": "9:53:35"} +{"current_steps": 1995, "total_steps": 4816, "loss": 0.2845, "lr": 2.914304737181234e-05, "epoch": 2.8997093023255816, "percentage": 41.42, "elapsed_time": "6:59:03", "remaining_time": "9:52:33"} +{"current_steps": 2000, "total_steps": 4816, "loss": 0.2688, "lr": 2.9078518240642e-05, "epoch": 2.9069767441860463, "percentage": 41.53, "elapsed_time": "7:00:04", "remaining_time": "9:51:27"} +{"current_steps": 2005, "total_steps": 4816, "loss": 0.2543, "lr": 2.9013869854516028e-05, "epoch": 2.9142441860465116, "percentage": 41.63, "elapsed_time": "7:01:14", "remaining_time": "9:50:34"} +{"current_steps": 2010, "total_steps": 4816, "loss": 0.2635, "lr": 2.894910306265234e-05, "epoch": 2.921511627906977, "percentage": 41.74, "elapsed_time": "7:02:19", "remaining_time": "9:49:34"} +{"current_steps": 2015, "total_steps": 4816, "loss": 0.2726, "lr": 2.888421871582423e-05, "epoch": 2.928779069767442, "percentage": 41.84, "elapsed_time": "7:03:21", "remaining_time": "9:48:30"} +{"current_steps": 2020, "total_steps": 4816, "loss": 0.2622, "lr": 2.8819217666349198e-05, "epoch": 2.936046511627907, "percentage": 41.94, "elapsed_time": "7:04:18", "remaining_time": "9:47:19"} +{"current_steps": 2025, "total_steps": 4816, "loss": 0.2525, "lr": 2.8754100768077743e-05, "epoch": 2.943313953488372, "percentage": 42.05, "elapsed_time": "7:05:19", "remaining_time": "9:46:12"} +{"current_steps": 2030, "total_steps": 4816, "loss": 0.2582, "lr": 2.8688868876382122e-05, "epoch": 2.9505813953488373, "percentage": 42.15, "elapsed_time": "7:06:24", "remaining_time": "9:45:12"} +{"current_steps": 2035, "total_steps": 4816, "loss": 0.2594, "lr": 2.862352284814518e-05, "epoch": 2.957848837209302, "percentage": 42.25, "elapsed_time": "7:07:28", "remaining_time": "9:44:11"} +{"current_steps": 2040, "total_steps": 4816, "loss": 0.2699, "lr": 2.855806354174901e-05, "epoch": 2.9651162790697674, "percentage": 42.36, "elapsed_time": "7:08:32", "remaining_time": "9:43:09"} +{"current_steps": 2045, "total_steps": 4816, "loss": 0.2638, "lr": 2.8492491817063767e-05, "epoch": 2.9723837209302326, "percentage": 42.46, "elapsed_time": "7:09:39", "remaining_time": "9:42:11"} +{"current_steps": 2050, "total_steps": 4816, "loss": 0.254, "lr": 2.8426808535436294e-05, "epoch": 2.979651162790698, "percentage": 42.57, "elapsed_time": "7:10:45", "remaining_time": "9:41:11"} +{"current_steps": 2055, "total_steps": 4816, "loss": 0.2592, "lr": 2.8361014559678856e-05, "epoch": 2.9869186046511627, "percentage": 42.67, "elapsed_time": "7:11:44", "remaining_time": "9:40:04"} +{"current_steps": 2060, "total_steps": 4816, "loss": 0.2552, "lr": 2.8295110754057776e-05, "epoch": 2.994186046511628, "percentage": 42.77, "elapsed_time": "7:12:46", "remaining_time": "9:38:59"} +{"current_steps": 2065, "total_steps": 4816, "loss": 0.2498, "lr": 2.822909798428211e-05, "epoch": 3.001453488372093, "percentage": 42.88, "elapsed_time": "7:13:45", "remaining_time": "9:37:51"} +{"current_steps": 2070, "total_steps": 4816, "loss": 0.2634, "lr": 2.8162977117492257e-05, "epoch": 3.008720930232558, "percentage": 42.98, "elapsed_time": "7:14:51", "remaining_time": "9:36:51"} +{"current_steps": 2075, "total_steps": 4816, "loss": 0.2532, "lr": 2.809674902224857e-05, "epoch": 3.015988372093023, "percentage": 43.09, "elapsed_time": "7:15:53", "remaining_time": "9:35:47"} +{"current_steps": 2080, "total_steps": 4816, "loss": 0.2547, "lr": 2.8030414568519963e-05, "epoch": 3.0232558139534884, "percentage": 43.19, "elapsed_time": "7:16:53", "remaining_time": "9:34:40"} +{"current_steps": 2085, "total_steps": 4816, "loss": 0.2467, "lr": 2.796397462767245e-05, "epoch": 3.0305232558139537, "percentage": 43.29, "elapsed_time": "7:17:55", "remaining_time": "9:33:36"} +{"current_steps": 2090, "total_steps": 4816, "loss": 0.2504, "lr": 2.7897430072457733e-05, "epoch": 3.0377906976744184, "percentage": 43.4, "elapsed_time": "7:19:02", "remaining_time": "9:32:39"} +{"current_steps": 2095, "total_steps": 4816, "loss": 0.2642, "lr": 2.7830781777001706e-05, "epoch": 3.0450581395348837, "percentage": 43.5, "elapsed_time": "7:20:00", "remaining_time": "9:31:29"} +{"current_steps": 2100, "total_steps": 4816, "loss": 0.2567, "lr": 2.7764030616793017e-05, "epoch": 3.052325581395349, "percentage": 43.6, "elapsed_time": "7:21:02", "remaining_time": "9:30:24"} +{"current_steps": 2105, "total_steps": 4816, "loss": 0.2604, "lr": 2.7697177468671516e-05, "epoch": 3.059593023255814, "percentage": 43.71, "elapsed_time": "7:22:03", "remaining_time": "9:29:19"} +{"current_steps": 2110, "total_steps": 4816, "loss": 0.2604, "lr": 2.7630223210816765e-05, "epoch": 3.066860465116279, "percentage": 43.81, "elapsed_time": "7:23:05", "remaining_time": "9:28:14"} +{"current_steps": 2115, "total_steps": 4816, "loss": 0.2624, "lr": 2.7563168722736517e-05, "epoch": 3.074127906976744, "percentage": 43.92, "elapsed_time": "7:24:07", "remaining_time": "9:27:10"} +{"current_steps": 2120, "total_steps": 4816, "loss": 0.2511, "lr": 2.749601488525512e-05, "epoch": 3.0813953488372094, "percentage": 44.02, "elapsed_time": "7:25:11", "remaining_time": "9:26:09"} +{"current_steps": 2125, "total_steps": 4816, "loss": 0.2506, "lr": 2.7428762580501982e-05, "epoch": 3.0886627906976742, "percentage": 44.12, "elapsed_time": "7:26:15", "remaining_time": "9:25:07"} +{"current_steps": 2130, "total_steps": 4816, "loss": 0.2561, "lr": 2.7361412691899972e-05, "epoch": 3.0959302325581395, "percentage": 44.23, "elapsed_time": "7:27:19", "remaining_time": "9:24:05"} +{"current_steps": 2135, "total_steps": 4816, "loss": 0.2423, "lr": 2.7293966104153814e-05, "epoch": 3.1031976744186047, "percentage": 44.33, "elapsed_time": "7:28:19", "remaining_time": "9:22:58"} +{"current_steps": 2140, "total_steps": 4816, "loss": 0.2495, "lr": 2.722642370323847e-05, "epoch": 3.11046511627907, "percentage": 44.44, "elapsed_time": "7:29:26", "remaining_time": "9:22:00"} +{"current_steps": 2145, "total_steps": 4816, "loss": 0.2492, "lr": 2.7158786376387486e-05, "epoch": 3.1177325581395348, "percentage": 44.54, "elapsed_time": "7:30:21", "remaining_time": "9:20:47"} +{"current_steps": 2150, "total_steps": 4816, "loss": 0.2423, "lr": 2.7091055012081376e-05, "epoch": 3.125, "percentage": 44.64, "elapsed_time": "7:31:19", "remaining_time": "9:19:38"} +{"current_steps": 2155, "total_steps": 4816, "loss": 0.2499, "lr": 2.7023230500035896e-05, "epoch": 3.1322674418604652, "percentage": 44.75, "elapsed_time": "7:32:27", "remaining_time": "9:18:41"} +{"current_steps": 2160, "total_steps": 4816, "loss": 0.2605, "lr": 2.6955313731190412e-05, "epoch": 3.13953488372093, "percentage": 44.85, "elapsed_time": "7:33:26", "remaining_time": "9:17:33"} +{"current_steps": 2165, "total_steps": 4816, "loss": 0.2523, "lr": 2.688730559769615e-05, "epoch": 3.1468023255813953, "percentage": 44.95, "elapsed_time": "7:34:27", "remaining_time": "9:16:28"} +{"current_steps": 2170, "total_steps": 4816, "loss": 0.2504, "lr": 2.6819206992904508e-05, "epoch": 3.1540697674418605, "percentage": 45.06, "elapsed_time": "7:35:33", "remaining_time": "9:15:28"} +{"current_steps": 2175, "total_steps": 4816, "loss": 0.2463, "lr": 2.6751018811355307e-05, "epoch": 3.1613372093023258, "percentage": 45.16, "elapsed_time": "7:36:34", "remaining_time": "9:14:23"} +{"current_steps": 2180, "total_steps": 4816, "loss": 0.2454, "lr": 2.6682741948765047e-05, "epoch": 3.1686046511627906, "percentage": 45.27, "elapsed_time": "7:37:37", "remaining_time": "9:13:21"} +{"current_steps": 2185, "total_steps": 4816, "loss": 0.2548, "lr": 2.661437730201514e-05, "epoch": 3.175872093023256, "percentage": 45.37, "elapsed_time": "7:38:41", "remaining_time": "9:12:19"} +{"current_steps": 2190, "total_steps": 4816, "loss": 0.2451, "lr": 2.654592576914011e-05, "epoch": 3.183139534883721, "percentage": 45.47, "elapsed_time": "7:39:44", "remaining_time": "9:11:15"} +{"current_steps": 2195, "total_steps": 4816, "loss": 0.2446, "lr": 2.6477388249315836e-05, "epoch": 3.1904069767441863, "percentage": 45.58, "elapsed_time": "7:40:55", "remaining_time": "9:10:22"} +{"current_steps": 2200, "total_steps": 4816, "loss": 0.2558, "lr": 2.6408765642847698e-05, "epoch": 3.197674418604651, "percentage": 45.68, "elapsed_time": "7:41:58", "remaining_time": "9:09:19"} +{"current_steps": 2205, "total_steps": 4816, "loss": 0.2538, "lr": 2.6340058851158788e-05, "epoch": 3.2049418604651163, "percentage": 45.78, "elapsed_time": "7:42:57", "remaining_time": "9:08:12"} +{"current_steps": 2210, "total_steps": 4816, "loss": 0.2412, "lr": 2.6271268776778032e-05, "epoch": 3.2122093023255816, "percentage": 45.89, "elapsed_time": "7:44:00", "remaining_time": "9:07:08"} +{"current_steps": 2215, "total_steps": 4816, "loss": 0.2435, "lr": 2.6202396323328357e-05, "epoch": 3.2194767441860463, "percentage": 45.99, "elapsed_time": "7:45:02", "remaining_time": "9:06:04"} +{"current_steps": 2220, "total_steps": 4816, "loss": 0.2612, "lr": 2.6133442395514833e-05, "epoch": 3.2267441860465116, "percentage": 46.1, "elapsed_time": "7:46:10", "remaining_time": "9:05:08"} +{"current_steps": 2225, "total_steps": 4816, "loss": 0.2476, "lr": 2.606440789911276e-05, "epoch": 3.234011627906977, "percentage": 46.2, "elapsed_time": "7:47:09", "remaining_time": "9:03:59"} +{"current_steps": 2230, "total_steps": 4816, "loss": 0.2508, "lr": 2.599529374095578e-05, "epoch": 3.241279069767442, "percentage": 46.3, "elapsed_time": "7:48:10", "remaining_time": "9:02:54"} +{"current_steps": 2235, "total_steps": 4816, "loss": 0.2435, "lr": 2.5926100828923985e-05, "epoch": 3.248546511627907, "percentage": 46.41, "elapsed_time": "7:49:11", "remaining_time": "9:01:49"} +{"current_steps": 2240, "total_steps": 4816, "loss": 0.2583, "lr": 2.5856830071931944e-05, "epoch": 3.255813953488372, "percentage": 46.51, "elapsed_time": "7:50:11", "remaining_time": "9:00:42"} +{"current_steps": 2245, "total_steps": 4816, "loss": 0.2588, "lr": 2.578748237991682e-05, "epoch": 3.2630813953488373, "percentage": 46.62, "elapsed_time": "7:51:15", "remaining_time": "8:59:41"} +{"current_steps": 2250, "total_steps": 4816, "loss": 0.2553, "lr": 2.571805866382638e-05, "epoch": 3.270348837209302, "percentage": 46.72, "elapsed_time": "7:52:22", "remaining_time": "8:58:42"} +{"current_steps": 2255, "total_steps": 4816, "loss": 0.245, "lr": 2.5648559835607047e-05, "epoch": 3.2776162790697674, "percentage": 46.82, "elapsed_time": "7:53:28", "remaining_time": "8:57:43"} +{"current_steps": 2260, "total_steps": 4816, "loss": 0.2502, "lr": 2.5578986808191904e-05, "epoch": 3.2848837209302326, "percentage": 46.93, "elapsed_time": "7:54:31", "remaining_time": "8:56:40"} +{"current_steps": 2265, "total_steps": 4816, "loss": 0.2443, "lr": 2.5509340495488707e-05, "epoch": 3.292151162790698, "percentage": 47.03, "elapsed_time": "7:55:36", "remaining_time": "8:55:39"} +{"current_steps": 2270, "total_steps": 4816, "loss": 0.2558, "lr": 2.5439621812367907e-05, "epoch": 3.2994186046511627, "percentage": 47.13, "elapsed_time": "7:56:34", "remaining_time": "8:54:30"} +{"current_steps": 2275, "total_steps": 4816, "loss": 0.2504, "lr": 2.5369831674650572e-05, "epoch": 3.306686046511628, "percentage": 47.24, "elapsed_time": "7:57:36", "remaining_time": "8:53:27"} +{"current_steps": 2280, "total_steps": 4816, "loss": 0.2542, "lr": 2.529997099909643e-05, "epoch": 3.313953488372093, "percentage": 47.34, "elapsed_time": "7:58:31", "remaining_time": "8:52:15"} +{"current_steps": 2285, "total_steps": 4816, "loss": 0.2394, "lr": 2.5230040703391775e-05, "epoch": 3.321220930232558, "percentage": 47.45, "elapsed_time": "7:59:32", "remaining_time": "8:51:10"} +{"current_steps": 2290, "total_steps": 4816, "loss": 0.2497, "lr": 2.5160041706137424e-05, "epoch": 3.328488372093023, "percentage": 47.55, "elapsed_time": "8:00:31", "remaining_time": "8:50:02"} +{"current_steps": 2295, "total_steps": 4816, "loss": 0.2372, "lr": 2.508997492683666e-05, "epoch": 3.3357558139534884, "percentage": 47.65, "elapsed_time": "8:01:33", "remaining_time": "8:48:59"} +{"current_steps": 2300, "total_steps": 4816, "loss": 0.2378, "lr": 2.5019841285883143e-05, "epoch": 3.3430232558139537, "percentage": 47.76, "elapsed_time": "8:02:39", "remaining_time": "8:47:58"} +{"current_steps": 2305, "total_steps": 4816, "loss": 0.2378, "lr": 2.4949641704548834e-05, "epoch": 3.3502906976744184, "percentage": 47.86, "elapsed_time": "8:03:43", "remaining_time": "8:46:57"} +{"current_steps": 2310, "total_steps": 4816, "loss": 0.2462, "lr": 2.4879377104971863e-05, "epoch": 3.3575581395348837, "percentage": 47.97, "elapsed_time": "8:04:49", "remaining_time": "8:45:57"} +{"current_steps": 2315, "total_steps": 4816, "loss": 0.2461, "lr": 2.4809048410144467e-05, "epoch": 3.364825581395349, "percentage": 48.07, "elapsed_time": "8:05:51", "remaining_time": "8:44:53"} +{"current_steps": 2320, "total_steps": 4816, "loss": 0.2436, "lr": 2.4738656543900808e-05, "epoch": 3.3720930232558137, "percentage": 48.17, "elapsed_time": "8:06:55", "remaining_time": "8:43:52"} +{"current_steps": 2325, "total_steps": 4816, "loss": 0.2469, "lr": 2.4668202430904872e-05, "epoch": 3.379360465116279, "percentage": 48.28, "elapsed_time": "8:07:56", "remaining_time": "8:42:46"} +{"current_steps": 2330, "total_steps": 4816, "loss": 0.2584, "lr": 2.4597686996638334e-05, "epoch": 3.386627906976744, "percentage": 48.38, "elapsed_time": "8:08:55", "remaining_time": "8:41:39"} +{"current_steps": 2335, "total_steps": 4816, "loss": 0.2442, "lr": 2.452711116738834e-05, "epoch": 3.3938953488372094, "percentage": 48.48, "elapsed_time": "8:10:01", "remaining_time": "8:40:40"} +{"current_steps": 2340, "total_steps": 4816, "loss": 0.2468, "lr": 2.4456475870235433e-05, "epoch": 3.4011627906976742, "percentage": 48.59, "elapsed_time": "8:11:02", "remaining_time": "8:39:35"} +{"current_steps": 2345, "total_steps": 4816, "loss": 0.2333, "lr": 2.4385782033041282e-05, "epoch": 3.4084302325581395, "percentage": 48.69, "elapsed_time": "8:12:05", "remaining_time": "8:38:31"} +{"current_steps": 2350, "total_steps": 4816, "loss": 0.2453, "lr": 2.431503058443655e-05, "epoch": 3.4156976744186047, "percentage": 48.8, "elapsed_time": "8:13:11", "remaining_time": "8:37:32"} +{"current_steps": 2355, "total_steps": 4816, "loss": 0.2571, "lr": 2.4244222453808694e-05, "epoch": 3.4229651162790695, "percentage": 48.9, "elapsed_time": "8:14:14", "remaining_time": "8:36:28"} +{"current_steps": 2360, "total_steps": 4816, "loss": 0.2532, "lr": 2.4173358571289716e-05, "epoch": 3.4302325581395348, "percentage": 49.0, "elapsed_time": "8:15:15", "remaining_time": "8:35:24"} +{"current_steps": 2365, "total_steps": 4816, "loss": 0.2477, "lr": 2.4102439867743995e-05, "epoch": 3.4375, "percentage": 49.11, "elapsed_time": "8:16:17", "remaining_time": "8:34:20"} +{"current_steps": 2370, "total_steps": 4816, "loss": 0.2402, "lr": 2.4031467274756026e-05, "epoch": 3.4447674418604652, "percentage": 49.21, "elapsed_time": "8:17:23", "remaining_time": "8:33:20"} +{"current_steps": 2375, "total_steps": 4816, "loss": 0.2536, "lr": 2.3960441724618195e-05, "epoch": 3.4520348837209305, "percentage": 49.31, "elapsed_time": "8:18:24", "remaining_time": "8:32:15"} +{"current_steps": 2380, "total_steps": 4816, "loss": 0.2593, "lr": 2.3889364150318523e-05, "epoch": 3.4593023255813953, "percentage": 49.42, "elapsed_time": "8:19:31", "remaining_time": "8:31:16"} +{"current_steps": 2385, "total_steps": 4816, "loss": 0.2612, "lr": 2.3818235485528438e-05, "epoch": 3.4665697674418605, "percentage": 49.52, "elapsed_time": "8:20:39", "remaining_time": "8:30:19"} +{"current_steps": 2390, "total_steps": 4816, "loss": 0.2562, "lr": 2.374705666459046e-05, "epoch": 3.4738372093023258, "percentage": 49.63, "elapsed_time": "8:21:35", "remaining_time": "8:29:08"} +{"current_steps": 2395, "total_steps": 4816, "loss": 0.2451, "lr": 2.367582862250599e-05, "epoch": 3.4811046511627906, "percentage": 49.73, "elapsed_time": "8:22:40", "remaining_time": "8:28:07"} +{"current_steps": 2400, "total_steps": 4816, "loss": 0.2473, "lr": 2.3604552294922974e-05, "epoch": 3.488372093023256, "percentage": 49.83, "elapsed_time": "8:23:47", "remaining_time": "8:27:09"} +{"current_steps": 2405, "total_steps": 4816, "loss": 0.2542, "lr": 2.353322861812364e-05, "epoch": 3.495639534883721, "percentage": 49.94, "elapsed_time": "8:24:49", "remaining_time": "8:26:05"} +{"current_steps": 2410, "total_steps": 4816, "loss": 0.2423, "lr": 2.346185852901219e-05, "epoch": 3.5029069767441863, "percentage": 50.04, "elapsed_time": "8:25:51", "remaining_time": "8:25:00"} +{"current_steps": 2415, "total_steps": 4816, "loss": 0.2449, "lr": 2.3390442965102503e-05, "epoch": 3.510174418604651, "percentage": 50.15, "elapsed_time": "8:26:56", "remaining_time": "8:23:59"} +{"current_steps": 2420, "total_steps": 4816, "loss": 0.2572, "lr": 2.3318982864505806e-05, "epoch": 3.5174418604651163, "percentage": 50.25, "elapsed_time": "8:27:58", "remaining_time": "8:22:56"} +{"current_steps": 2425, "total_steps": 4816, "loss": 0.252, "lr": 2.324747916591836e-05, "epoch": 3.5247093023255816, "percentage": 50.35, "elapsed_time": "8:29:02", "remaining_time": "8:21:53"} +{"current_steps": 2430, "total_steps": 4816, "loss": 0.2333, "lr": 2.317593280860913e-05, "epoch": 3.5319767441860463, "percentage": 50.46, "elapsed_time": "8:30:03", "remaining_time": "8:20:49"} +{"current_steps": 2435, "total_steps": 4816, "loss": 0.2545, "lr": 2.3104344732407436e-05, "epoch": 3.5392441860465116, "percentage": 50.56, "elapsed_time": "8:31:09", "remaining_time": "8:19:49"} +{"current_steps": 2440, "total_steps": 4816, "loss": 0.2392, "lr": 2.3032715877690622e-05, "epoch": 3.546511627906977, "percentage": 50.66, "elapsed_time": "8:32:09", "remaining_time": "8:18:43"} +{"current_steps": 2445, "total_steps": 4816, "loss": 0.2527, "lr": 2.296104718537169e-05, "epoch": 3.553779069767442, "percentage": 50.77, "elapsed_time": "8:33:12", "remaining_time": "8:17:40"} +{"current_steps": 2450, "total_steps": 4816, "loss": 0.2504, "lr": 2.2889339596886958e-05, "epoch": 3.561046511627907, "percentage": 50.87, "elapsed_time": "8:34:15", "remaining_time": "8:16:37"} +{"current_steps": 2455, "total_steps": 4816, "loss": 0.2423, "lr": 2.2817594054183675e-05, "epoch": 3.568313953488372, "percentage": 50.98, "elapsed_time": "8:35:20", "remaining_time": "8:15:36"} +{"current_steps": 2460, "total_steps": 4816, "loss": 0.2575, "lr": 2.2745811499707645e-05, "epoch": 3.5755813953488373, "percentage": 51.08, "elapsed_time": "8:36:22", "remaining_time": "8:14:32"} +{"current_steps": 2465, "total_steps": 4816, "loss": 0.2395, "lr": 2.267399287639088e-05, "epoch": 3.582848837209302, "percentage": 51.18, "elapsed_time": "8:37:21", "remaining_time": "8:13:25"} +{"current_steps": 2470, "total_steps": 4816, "loss": 0.2458, "lr": 2.260213912763917e-05, "epoch": 3.5901162790697674, "percentage": 51.29, "elapsed_time": "8:38:17", "remaining_time": "8:12:16"} +{"current_steps": 2475, "total_steps": 4816, "loss": 0.2409, "lr": 2.2530251197319723e-05, "epoch": 3.5973837209302326, "percentage": 51.39, "elapsed_time": "8:39:18", "remaining_time": "8:11:11"} +{"current_steps": 2480, "total_steps": 4816, "loss": 0.2439, "lr": 2.2458330029748736e-05, "epoch": 3.604651162790698, "percentage": 51.5, "elapsed_time": "8:40:26", "remaining_time": "8:10:12"} +{"current_steps": 2485, "total_steps": 4816, "loss": 0.2604, "lr": 2.2386376569679036e-05, "epoch": 3.6119186046511627, "percentage": 51.6, "elapsed_time": "8:41:34", "remaining_time": "8:09:14"} +{"current_steps": 2490, "total_steps": 4816, "loss": 0.2457, "lr": 2.231439176228763e-05, "epoch": 3.619186046511628, "percentage": 51.7, "elapsed_time": "8:42:38", "remaining_time": "8:08:13"} +{"current_steps": 2495, "total_steps": 4816, "loss": 0.2473, "lr": 2.2242376553163286e-05, "epoch": 3.626453488372093, "percentage": 51.81, "elapsed_time": "8:43:40", "remaining_time": "8:07:09"} +{"current_steps": 2500, "total_steps": 4816, "loss": 0.2541, "lr": 2.217033188829416e-05, "epoch": 3.633720930232558, "percentage": 51.91, "elapsed_time": "8:44:46", "remaining_time": "8:06:09"} +{"current_steps": 2505, "total_steps": 4816, "loss": 0.2523, "lr": 2.2098258714055303e-05, "epoch": 3.640988372093023, "percentage": 52.01, "elapsed_time": "8:45:45", "remaining_time": "8:05:02"} +{"current_steps": 2510, "total_steps": 4816, "loss": 0.2436, "lr": 2.20261579771963e-05, "epoch": 3.6482558139534884, "percentage": 52.12, "elapsed_time": "8:46:49", "remaining_time": "8:04:00"} +{"current_steps": 2515, "total_steps": 4816, "loss": 0.2532, "lr": 2.1954030624828757e-05, "epoch": 3.6555232558139537, "percentage": 52.22, "elapsed_time": "8:47:48", "remaining_time": "8:02:53"} +{"current_steps": 2520, "total_steps": 4816, "loss": 0.2406, "lr": 2.1881877604413927e-05, "epoch": 3.6627906976744184, "percentage": 52.33, "elapsed_time": "8:48:42", "remaining_time": "8:01:43"} +{"current_steps": 2525, "total_steps": 4816, "loss": 0.2497, "lr": 2.1809699863750236e-05, "epoch": 3.6700581395348837, "percentage": 52.43, "elapsed_time": "8:49:41", "remaining_time": "8:00:35"} +{"current_steps": 2530, "total_steps": 4816, "loss": 0.2353, "lr": 2.1737498350960825e-05, "epoch": 3.677325581395349, "percentage": 52.53, "elapsed_time": "8:50:41", "remaining_time": "7:59:30"} +{"current_steps": 2535, "total_steps": 4816, "loss": 0.2528, "lr": 2.1665274014481112e-05, "epoch": 3.6845930232558137, "percentage": 52.64, "elapsed_time": "8:51:41", "remaining_time": "7:58:24"} +{"current_steps": 2540, "total_steps": 4816, "loss": 0.2494, "lr": 2.159302780304631e-05, "epoch": 3.691860465116279, "percentage": 52.74, "elapsed_time": "8:52:44", "remaining_time": "7:57:22"} +{"current_steps": 2545, "total_steps": 4816, "loss": 0.2538, "lr": 2.152076066567901e-05, "epoch": 3.699127906976744, "percentage": 52.84, "elapsed_time": "8:53:52", "remaining_time": "7:56:23"} +{"current_steps": 2550, "total_steps": 4816, "loss": 0.2427, "lr": 2.1448473551676644e-05, "epoch": 3.7063953488372094, "percentage": 52.95, "elapsed_time": "8:54:54", "remaining_time": "7:55:20"} +{"current_steps": 2555, "total_steps": 4816, "loss": 0.2523, "lr": 2.13761674105991e-05, "epoch": 3.7136627906976747, "percentage": 53.05, "elapsed_time": "8:55:58", "remaining_time": "7:54:18"} +{"current_steps": 2560, "total_steps": 4816, "loss": 0.2478, "lr": 2.130384319225617e-05, "epoch": 3.7209302325581395, "percentage": 53.16, "elapsed_time": "8:56:58", "remaining_time": "7:53:12"} +{"current_steps": 2565, "total_steps": 4816, "loss": 0.2399, "lr": 2.1231501846695128e-05, "epoch": 3.7281976744186047, "percentage": 53.26, "elapsed_time": "8:57:58", "remaining_time": "7:52:07"} +{"current_steps": 2570, "total_steps": 4816, "loss": 0.2729, "lr": 2.115914432418822e-05, "epoch": 3.7354651162790695, "percentage": 53.36, "elapsed_time": "8:58:59", "remaining_time": "7:51:02"} +{"current_steps": 2575, "total_steps": 4816, "loss": 0.25, "lr": 2.1086771575220203e-05, "epoch": 3.7427325581395348, "percentage": 53.47, "elapsed_time": "9:00:03", "remaining_time": "7:50:00"} +{"current_steps": 2580, "total_steps": 4816, "loss": 0.2516, "lr": 2.1014384550475836e-05, "epoch": 3.75, "percentage": 53.57, "elapsed_time": "9:01:09", "remaining_time": "7:48:59"} +{"current_steps": 2585, "total_steps": 4816, "loss": 0.2503, "lr": 2.0941984200827402e-05, "epoch": 3.7572674418604652, "percentage": 53.68, "elapsed_time": "9:02:09", "remaining_time": "7:47:54"} +{"current_steps": 2590, "total_steps": 4816, "loss": 0.2449, "lr": 2.0869571477322244e-05, "epoch": 3.7645348837209305, "percentage": 53.78, "elapsed_time": "9:03:14", "remaining_time": "7:46:53"} +{"current_steps": 2595, "total_steps": 4816, "loss": 0.2455, "lr": 2.079714733117021e-05, "epoch": 3.7718023255813953, "percentage": 53.88, "elapsed_time": "9:04:18", "remaining_time": "7:45:52"} +{"current_steps": 2600, "total_steps": 4816, "loss": 0.2485, "lr": 2.0724712713731226e-05, "epoch": 3.7790697674418605, "percentage": 53.99, "elapsed_time": "9:05:18", "remaining_time": "7:44:46"} +{"current_steps": 2605, "total_steps": 4816, "loss": 0.2429, "lr": 2.065226857650275e-05, "epoch": 3.7863372093023253, "percentage": 54.09, "elapsed_time": "9:06:20", "remaining_time": "7:43:42"} +{"current_steps": 2610, "total_steps": 4816, "loss": 0.2548, "lr": 2.0579815871107304e-05, "epoch": 3.7936046511627906, "percentage": 54.19, "elapsed_time": "9:07:19", "remaining_time": "7:42:36"} +{"current_steps": 2615, "total_steps": 4816, "loss": 0.2563, "lr": 2.0507355549279948e-05, "epoch": 3.800872093023256, "percentage": 54.3, "elapsed_time": "9:08:19", "remaining_time": "7:41:30"} +{"current_steps": 2620, "total_steps": 4816, "loss": 0.2539, "lr": 2.04348885628558e-05, "epoch": 3.808139534883721, "percentage": 54.4, "elapsed_time": "9:09:18", "remaining_time": "7:40:24"} +{"current_steps": 2625, "total_steps": 4816, "loss": 0.2587, "lr": 2.036241586375753e-05, "epoch": 3.8154069767441863, "percentage": 54.51, "elapsed_time": "9:10:20", "remaining_time": "7:39:21"} +{"current_steps": 2630, "total_steps": 4816, "loss": 0.2573, "lr": 2.0289938403982834e-05, "epoch": 3.822674418604651, "percentage": 54.61, "elapsed_time": "9:11:27", "remaining_time": "7:38:21"} +{"current_steps": 2635, "total_steps": 4816, "loss": 0.2504, "lr": 2.0217457135591957e-05, "epoch": 3.8299418604651163, "percentage": 54.71, "elapsed_time": "9:12:27", "remaining_time": "7:37:16"} +{"current_steps": 2640, "total_steps": 4816, "loss": 0.2381, "lr": 2.0144973010695157e-05, "epoch": 3.8372093023255816, "percentage": 54.82, "elapsed_time": "9:13:31", "remaining_time": "7:36:14"} +{"current_steps": 2645, "total_steps": 4816, "loss": 0.2644, "lr": 2.0072486981440237e-05, "epoch": 3.8444767441860463, "percentage": 54.92, "elapsed_time": "9:14:33", "remaining_time": "7:35:10"} +{"current_steps": 2650, "total_steps": 4816, "loss": 0.2434, "lr": 2e-05, "epoch": 3.8517441860465116, "percentage": 55.02, "elapsed_time": "9:15:35", "remaining_time": "7:34:06"} +{"current_steps": 2655, "total_steps": 4816, "loss": 0.2588, "lr": 1.9927513018559767e-05, "epoch": 3.859011627906977, "percentage": 55.13, "elapsed_time": "9:16:39", "remaining_time": "7:33:04"} +{"current_steps": 2660, "total_steps": 4816, "loss": 0.2484, "lr": 1.985502698930485e-05, "epoch": 3.866279069767442, "percentage": 55.23, "elapsed_time": "9:17:39", "remaining_time": "7:31:59"} +{"current_steps": 2665, "total_steps": 4816, "loss": 0.256, "lr": 1.978254286440805e-05, "epoch": 3.873546511627907, "percentage": 55.34, "elapsed_time": "9:18:38", "remaining_time": "7:30:54"} +{"current_steps": 2670, "total_steps": 4816, "loss": 0.2522, "lr": 1.9710061596017172e-05, "epoch": 3.880813953488372, "percentage": 55.44, "elapsed_time": "9:19:37", "remaining_time": "7:29:47"} +{"current_steps": 2675, "total_steps": 4816, "loss": 0.2443, "lr": 1.9637584136242474e-05, "epoch": 3.8880813953488373, "percentage": 55.54, "elapsed_time": "9:20:39", "remaining_time": "7:28:44"} +{"current_steps": 2680, "total_steps": 4816, "loss": 0.2456, "lr": 1.9565111437144204e-05, "epoch": 3.895348837209302, "percentage": 55.65, "elapsed_time": "9:21:43", "remaining_time": "7:27:42"} +{"current_steps": 2685, "total_steps": 4816, "loss": 0.2362, "lr": 1.949264445072006e-05, "epoch": 3.9026162790697674, "percentage": 55.75, "elapsed_time": "9:22:48", "remaining_time": "7:26:40"} +{"current_steps": 2690, "total_steps": 4816, "loss": 0.2542, "lr": 1.9420184128892702e-05, "epoch": 3.9098837209302326, "percentage": 55.86, "elapsed_time": "9:23:53", "remaining_time": "7:25:39"} +{"current_steps": 2695, "total_steps": 4816, "loss": 0.2389, "lr": 1.9347731423497255e-05, "epoch": 3.917151162790698, "percentage": 55.96, "elapsed_time": "9:24:56", "remaining_time": "7:24:37"} +{"current_steps": 2700, "total_steps": 4816, "loss": 0.2448, "lr": 1.927528728626878e-05, "epoch": 3.9244186046511627, "percentage": 56.06, "elapsed_time": "9:25:59", "remaining_time": "7:23:34"} +{"current_steps": 2705, "total_steps": 4816, "loss": 0.2359, "lr": 1.9202852668829796e-05, "epoch": 3.931686046511628, "percentage": 56.17, "elapsed_time": "9:27:03", "remaining_time": "7:22:32"} +{"current_steps": 2710, "total_steps": 4816, "loss": 0.26, "lr": 1.9130428522677762e-05, "epoch": 3.938953488372093, "percentage": 56.27, "elapsed_time": "9:28:04", "remaining_time": "7:21:28"} +{"current_steps": 2715, "total_steps": 4816, "loss": 0.2491, "lr": 1.90580157991726e-05, "epoch": 3.946220930232558, "percentage": 56.37, "elapsed_time": "9:29:03", "remaining_time": "7:20:22"} +{"current_steps": 2720, "total_steps": 4816, "loss": 0.2498, "lr": 1.898561544952417e-05, "epoch": 3.953488372093023, "percentage": 56.48, "elapsed_time": "9:30:06", "remaining_time": "7:19:19"} +{"current_steps": 2725, "total_steps": 4816, "loss": 0.2591, "lr": 1.8913228424779807e-05, "epoch": 3.9607558139534884, "percentage": 56.58, "elapsed_time": "9:31:10", "remaining_time": "7:18:16"} +{"current_steps": 2730, "total_steps": 4816, "loss": 0.2475, "lr": 1.8840855675811788e-05, "epoch": 3.9680232558139537, "percentage": 56.69, "elapsed_time": "9:32:17", "remaining_time": "7:17:17"} +{"current_steps": 2735, "total_steps": 4816, "loss": 0.2443, "lr": 1.876849815330488e-05, "epoch": 3.9752906976744184, "percentage": 56.79, "elapsed_time": "9:33:15", "remaining_time": "7:16:10"} +{"current_steps": 2740, "total_steps": 4816, "loss": 0.2465, "lr": 1.869615680774384e-05, "epoch": 3.9825581395348837, "percentage": 56.89, "elapsed_time": "9:34:19", "remaining_time": "7:15:08"} +{"current_steps": 2745, "total_steps": 4816, "loss": 0.2506, "lr": 1.862383258940091e-05, "epoch": 3.989825581395349, "percentage": 57.0, "elapsed_time": "9:35:21", "remaining_time": "7:14:05"} +{"current_steps": 2750, "total_steps": 4816, "loss": 0.2565, "lr": 1.8551526448323366e-05, "epoch": 3.9970930232558137, "percentage": 57.1, "elapsed_time": "9:36:22", "remaining_time": "7:13:00"} +{"current_steps": 2755, "total_steps": 4816, "loss": 0.2407, "lr": 1.8479239334321005e-05, "epoch": 4.004360465116279, "percentage": 57.21, "elapsed_time": "9:37:21", "remaining_time": "7:11:54"} +{"current_steps": 2760, "total_steps": 4816, "loss": 0.2441, "lr": 1.84069721969537e-05, "epoch": 4.011627906976744, "percentage": 57.31, "elapsed_time": "9:38:23", "remaining_time": "7:10:51"} +{"current_steps": 2765, "total_steps": 4816, "loss": 0.2303, "lr": 1.8334725985518898e-05, "epoch": 4.0188953488372094, "percentage": 57.41, "elapsed_time": "9:39:24", "remaining_time": "7:09:47"} +{"current_steps": 2770, "total_steps": 4816, "loss": 0.2244, "lr": 1.8262501649039178e-05, "epoch": 4.026162790697675, "percentage": 57.52, "elapsed_time": "9:40:24", "remaining_time": "7:08:42"} +{"current_steps": 2775, "total_steps": 4816, "loss": 0.2225, "lr": 1.819030013624977e-05, "epoch": 4.03343023255814, "percentage": 57.62, "elapsed_time": "9:41:28", "remaining_time": "7:07:40"} +{"current_steps": 2780, "total_steps": 4816, "loss": 0.2299, "lr": 1.8118122395586076e-05, "epoch": 4.040697674418604, "percentage": 57.72, "elapsed_time": "9:42:29", "remaining_time": "7:06:36"} +{"current_steps": 2785, "total_steps": 4816, "loss": 0.2274, "lr": 1.8045969375171257e-05, "epoch": 4.0479651162790695, "percentage": 57.83, "elapsed_time": "9:43:36", "remaining_time": "7:05:36"} +{"current_steps": 2790, "total_steps": 4816, "loss": 0.2341, "lr": 1.797384202280371e-05, "epoch": 4.055232558139535, "percentage": 57.93, "elapsed_time": "9:44:39", "remaining_time": "7:04:33"} +{"current_steps": 2795, "total_steps": 4816, "loss": 0.2428, "lr": 1.7901741285944703e-05, "epoch": 4.0625, "percentage": 58.04, "elapsed_time": "9:45:44", "remaining_time": "7:03:31"} +{"current_steps": 2800, "total_steps": 4816, "loss": 0.2292, "lr": 1.782966811170585e-05, "epoch": 4.069767441860465, "percentage": 58.14, "elapsed_time": "9:46:44", "remaining_time": "7:02:27"} +{"current_steps": 2805, "total_steps": 4816, "loss": 0.2364, "lr": 1.7757623446836718e-05, "epoch": 4.0770348837209305, "percentage": 58.24, "elapsed_time": "9:47:44", "remaining_time": "7:01:22"} +{"current_steps": 2810, "total_steps": 4816, "loss": 0.2415, "lr": 1.768560823771238e-05, "epoch": 4.084302325581396, "percentage": 58.35, "elapsed_time": "9:48:45", "remaining_time": "7:00:18"} +{"current_steps": 2815, "total_steps": 4816, "loss": 0.2312, "lr": 1.761362343032097e-05, "epoch": 4.09156976744186, "percentage": 58.45, "elapsed_time": "9:49:48", "remaining_time": "6:59:15"} +{"current_steps": 2820, "total_steps": 4816, "loss": 0.2305, "lr": 1.754166997025127e-05, "epoch": 4.098837209302325, "percentage": 58.55, "elapsed_time": "9:50:51", "remaining_time": "6:58:12"} +{"current_steps": 2825, "total_steps": 4816, "loss": 0.2419, "lr": 1.7469748802680284e-05, "epoch": 4.1061046511627906, "percentage": 58.66, "elapsed_time": "9:52:01", "remaining_time": "6:57:15"} +{"current_steps": 2830, "total_steps": 4816, "loss": 0.2266, "lr": 1.739786087236083e-05, "epoch": 4.113372093023256, "percentage": 58.76, "elapsed_time": "9:53:11", "remaining_time": "6:56:16"} +{"current_steps": 2835, "total_steps": 4816, "loss": 0.2294, "lr": 1.7326007123609123e-05, "epoch": 4.120639534883721, "percentage": 58.87, "elapsed_time": "9:54:11", "remaining_time": "6:55:11"} +{"current_steps": 2840, "total_steps": 4816, "loss": 0.2492, "lr": 1.7254188500292355e-05, "epoch": 4.127906976744186, "percentage": 58.97, "elapsed_time": "9:55:08", "remaining_time": "6:54:04"} +{"current_steps": 2845, "total_steps": 4816, "loss": 0.2389, "lr": 1.718240594581633e-05, "epoch": 4.1351744186046515, "percentage": 59.07, "elapsed_time": "9:56:03", "remaining_time": "6:52:57"} +{"current_steps": 2850, "total_steps": 4816, "loss": 0.2469, "lr": 1.7110660403113045e-05, "epoch": 4.142441860465116, "percentage": 59.18, "elapsed_time": "9:57:02", "remaining_time": "6:51:50"} +{"current_steps": 2855, "total_steps": 4816, "loss": 0.2403, "lr": 1.7038952814628312e-05, "epoch": 4.149709302325581, "percentage": 59.28, "elapsed_time": "9:58:06", "remaining_time": "6:50:49"} +{"current_steps": 2860, "total_steps": 4816, "loss": 0.2374, "lr": 1.6967284122309385e-05, "epoch": 4.156976744186046, "percentage": 59.39, "elapsed_time": "9:59:07", "remaining_time": "6:49:45"} +{"current_steps": 2865, "total_steps": 4816, "loss": 0.2343, "lr": 1.6895655267592567e-05, "epoch": 4.164244186046512, "percentage": 59.49, "elapsed_time": "10:00:12", "remaining_time": "6:48:43"} +{"current_steps": 2870, "total_steps": 4816, "loss": 0.238, "lr": 1.6824067191390872e-05, "epoch": 4.171511627906977, "percentage": 59.59, "elapsed_time": "10:01:18", "remaining_time": "6:47:42"} +{"current_steps": 2875, "total_steps": 4816, "loss": 0.2295, "lr": 1.675252083408164e-05, "epoch": 4.178779069767442, "percentage": 59.7, "elapsed_time": "10:02:14", "remaining_time": "6:46:35"} +{"current_steps": 2880, "total_steps": 4816, "loss": 0.2324, "lr": 1.6681017135494194e-05, "epoch": 4.186046511627907, "percentage": 59.8, "elapsed_time": "10:03:15", "remaining_time": "6:45:31"} +{"current_steps": 2885, "total_steps": 4816, "loss": 0.2362, "lr": 1.66095570348975e-05, "epoch": 4.1933139534883725, "percentage": 59.9, "elapsed_time": "10:04:24", "remaining_time": "6:44:32"} +{"current_steps": 2890, "total_steps": 4816, "loss": 0.2374, "lr": 1.653814147098781e-05, "epoch": 4.200581395348837, "percentage": 60.01, "elapsed_time": "10:05:29", "remaining_time": "6:43:31"} +{"current_steps": 2895, "total_steps": 4816, "loss": 0.2351, "lr": 1.6466771381876365e-05, "epoch": 4.207848837209302, "percentage": 60.11, "elapsed_time": "10:06:36", "remaining_time": "6:42:31"} +{"current_steps": 2900, "total_steps": 4816, "loss": 0.2366, "lr": 1.639544770507703e-05, "epoch": 4.215116279069767, "percentage": 60.22, "elapsed_time": "10:07:35", "remaining_time": "6:41:25"} +{"current_steps": 2905, "total_steps": 4816, "loss": 0.2399, "lr": 1.6324171377494015e-05, "epoch": 4.222383720930233, "percentage": 60.32, "elapsed_time": "10:08:38", "remaining_time": "6:40:22"} +{"current_steps": 2910, "total_steps": 4816, "loss": 0.233, "lr": 1.6252943335409542e-05, "epoch": 4.229651162790698, "percentage": 60.42, "elapsed_time": "10:09:43", "remaining_time": "6:39:21"} +{"current_steps": 2915, "total_steps": 4816, "loss": 0.231, "lr": 1.6181764514471566e-05, "epoch": 4.236918604651163, "percentage": 60.53, "elapsed_time": "10:10:46", "remaining_time": "6:38:19"} +{"current_steps": 2920, "total_steps": 4816, "loss": 0.2266, "lr": 1.611063584968148e-05, "epoch": 4.2441860465116275, "percentage": 60.63, "elapsed_time": "10:11:52", "remaining_time": "6:37:17"} +{"current_steps": 2925, "total_steps": 4816, "loss": 0.243, "lr": 1.6039558275381812e-05, "epoch": 4.251453488372093, "percentage": 60.74, "elapsed_time": "10:12:52", "remaining_time": "6:36:13"} +{"current_steps": 2930, "total_steps": 4816, "loss": 0.2305, "lr": 1.596853272524398e-05, "epoch": 4.258720930232558, "percentage": 60.84, "elapsed_time": "10:13:58", "remaining_time": "6:35:12"} +{"current_steps": 2935, "total_steps": 4816, "loss": 0.2348, "lr": 1.5897560132256008e-05, "epoch": 4.265988372093023, "percentage": 60.94, "elapsed_time": "10:15:05", "remaining_time": "6:34:12"} +{"current_steps": 2940, "total_steps": 4816, "loss": 0.2459, "lr": 1.582664142871029e-05, "epoch": 4.273255813953488, "percentage": 61.05, "elapsed_time": "10:16:10", "remaining_time": "6:33:10"} +{"current_steps": 2945, "total_steps": 4816, "loss": 0.2202, "lr": 1.5755777546191313e-05, "epoch": 4.280523255813954, "percentage": 61.15, "elapsed_time": "10:17:11", "remaining_time": "6:32:06"} +{"current_steps": 2950, "total_steps": 4816, "loss": 0.2319, "lr": 1.5684969415563456e-05, "epoch": 4.287790697674419, "percentage": 61.25, "elapsed_time": "10:18:13", "remaining_time": "6:31:03"} +{"current_steps": 2955, "total_steps": 4816, "loss": 0.2385, "lr": 1.5614217966958725e-05, "epoch": 4.295058139534884, "percentage": 61.36, "elapsed_time": "10:19:13", "remaining_time": "6:29:58"} +{"current_steps": 2960, "total_steps": 4816, "loss": 0.2318, "lr": 1.554352412976457e-05, "epoch": 4.3023255813953485, "percentage": 61.46, "elapsed_time": "10:20:17", "remaining_time": "6:28:56"} +{"current_steps": 2965, "total_steps": 4816, "loss": 0.2395, "lr": 1.5472888832611662e-05, "epoch": 4.309593023255814, "percentage": 61.57, "elapsed_time": "10:21:21", "remaining_time": "6:27:54"} +{"current_steps": 2970, "total_steps": 4816, "loss": 0.2455, "lr": 1.5402313003361676e-05, "epoch": 4.316860465116279, "percentage": 61.67, "elapsed_time": "10:22:19", "remaining_time": "6:26:48"} +{"current_steps": 2975, "total_steps": 4816, "loss": 0.2526, "lr": 1.533179756909513e-05, "epoch": 4.324127906976744, "percentage": 61.77, "elapsed_time": "10:23:25", "remaining_time": "6:25:47"} +{"current_steps": 2980, "total_steps": 4816, "loss": 0.2355, "lr": 1.52613434560992e-05, "epoch": 4.3313953488372094, "percentage": 61.88, "elapsed_time": "10:24:25", "remaining_time": "6:24:42"} +{"current_steps": 2985, "total_steps": 4816, "loss": 0.2357, "lr": 1.519095158985554e-05, "epoch": 4.338662790697675, "percentage": 61.98, "elapsed_time": "10:25:29", "remaining_time": "6:23:40"} +{"current_steps": 2990, "total_steps": 4816, "loss": 0.2331, "lr": 1.512062289502814e-05, "epoch": 4.34593023255814, "percentage": 62.08, "elapsed_time": "10:26:28", "remaining_time": "6:22:35"} +{"current_steps": 2995, "total_steps": 4816, "loss": 0.2347, "lr": 1.5050358295451173e-05, "epoch": 4.353197674418604, "percentage": 62.19, "elapsed_time": "10:27:33", "remaining_time": "6:21:33"} +{"current_steps": 3000, "total_steps": 4816, "loss": 0.2283, "lr": 1.4980158714116864e-05, "epoch": 4.3604651162790695, "percentage": 62.29, "elapsed_time": "10:28:39", "remaining_time": "6:20:33"} +{"current_steps": 3005, "total_steps": 4816, "loss": 0.2342, "lr": 1.4910025073163346e-05, "epoch": 4.367732558139535, "percentage": 62.4, "elapsed_time": "10:30:21", "remaining_time": "6:19:53"} +{"current_steps": 3010, "total_steps": 4816, "loss": 0.2446, "lr": 1.4839958293862582e-05, "epoch": 4.375, "percentage": 62.5, "elapsed_time": "10:31:27", "remaining_time": "6:18:52"} +{"current_steps": 3015, "total_steps": 4816, "loss": 0.2426, "lr": 1.4769959296608228e-05, "epoch": 4.382267441860465, "percentage": 62.6, "elapsed_time": "10:32:30", "remaining_time": "6:17:49"} +{"current_steps": 3020, "total_steps": 4816, "loss": 0.2412, "lr": 1.4700029000903575e-05, "epoch": 4.3895348837209305, "percentage": 62.71, "elapsed_time": "10:33:30", "remaining_time": "6:16:44"} +{"current_steps": 3025, "total_steps": 4816, "loss": 0.2526, "lr": 1.463016832534943e-05, "epoch": 4.396802325581396, "percentage": 62.81, "elapsed_time": "10:34:30", "remaining_time": "6:15:40"} +{"current_steps": 3030, "total_steps": 4816, "loss": 0.2349, "lr": 1.4560378187632101e-05, "epoch": 4.40406976744186, "percentage": 62.92, "elapsed_time": "10:35:33", "remaining_time": "6:14:37"} +{"current_steps": 3035, "total_steps": 4816, "loss": 0.2423, "lr": 1.4490659504511295e-05, "epoch": 4.411337209302325, "percentage": 63.02, "elapsed_time": "10:36:36", "remaining_time": "6:13:34"} +{"current_steps": 3040, "total_steps": 4816, "loss": 0.2309, "lr": 1.44210131918081e-05, "epoch": 4.4186046511627906, "percentage": 63.12, "elapsed_time": "10:37:39", "remaining_time": "6:12:31"} +{"current_steps": 3045, "total_steps": 4816, "loss": 0.2356, "lr": 1.4351440164392956e-05, "epoch": 4.425872093023256, "percentage": 63.23, "elapsed_time": "10:38:39", "remaining_time": "6:11:27"} +{"current_steps": 3050, "total_steps": 4816, "loss": 0.2305, "lr": 1.4281941336173621e-05, "epoch": 4.433139534883721, "percentage": 63.33, "elapsed_time": "10:39:42", "remaining_time": "6:10:24"} +{"current_steps": 3055, "total_steps": 4816, "loss": 0.2215, "lr": 1.4212517620083186e-05, "epoch": 4.440406976744186, "percentage": 63.43, "elapsed_time": "10:40:38", "remaining_time": "6:09:17"} +{"current_steps": 3060, "total_steps": 4816, "loss": 0.2333, "lr": 1.4143169928068061e-05, "epoch": 4.4476744186046515, "percentage": 63.54, "elapsed_time": "10:41:45", "remaining_time": "6:08:16"} +{"current_steps": 3065, "total_steps": 4816, "loss": 0.2287, "lr": 1.4073899171076022e-05, "epoch": 4.454941860465116, "percentage": 63.64, "elapsed_time": "10:42:46", "remaining_time": "6:07:12"} +{"current_steps": 3070, "total_steps": 4816, "loss": 0.2261, "lr": 1.400470625904422e-05, "epoch": 4.462209302325581, "percentage": 63.75, "elapsed_time": "10:43:48", "remaining_time": "6:06:08"} +{"current_steps": 3075, "total_steps": 4816, "loss": 0.2281, "lr": 1.3935592100887242e-05, "epoch": 4.469476744186046, "percentage": 63.85, "elapsed_time": "10:44:47", "remaining_time": "6:05:04"} +{"current_steps": 3080, "total_steps": 4816, "loss": 0.2353, "lr": 1.386655760448517e-05, "epoch": 4.476744186046512, "percentage": 63.95, "elapsed_time": "10:45:52", "remaining_time": "6:04:02"} +{"current_steps": 3085, "total_steps": 4816, "loss": 0.2344, "lr": 1.3797603676671646e-05, "epoch": 4.484011627906977, "percentage": 64.06, "elapsed_time": "10:46:51", "remaining_time": "6:02:57"} +{"current_steps": 3090, "total_steps": 4816, "loss": 0.229, "lr": 1.372873122322198e-05, "epoch": 4.491279069767442, "percentage": 64.16, "elapsed_time": "10:47:55", "remaining_time": "6:01:54"} +{"current_steps": 3095, "total_steps": 4816, "loss": 0.2391, "lr": 1.365994114884122e-05, "epoch": 4.498546511627907, "percentage": 64.26, "elapsed_time": "10:49:00", "remaining_time": "6:00:53"} +{"current_steps": 3100, "total_steps": 4816, "loss": 0.2357, "lr": 1.359123435715231e-05, "epoch": 4.5058139534883725, "percentage": 64.37, "elapsed_time": "10:50:02", "remaining_time": "5:59:49"} +{"current_steps": 3105, "total_steps": 4816, "loss": 0.2297, "lr": 1.3522611750684171e-05, "epoch": 4.513081395348837, "percentage": 64.47, "elapsed_time": "10:51:01", "remaining_time": "5:58:44"} +{"current_steps": 3110, "total_steps": 4816, "loss": 0.2428, "lr": 1.3454074230859896e-05, "epoch": 4.520348837209302, "percentage": 64.58, "elapsed_time": "10:52:03", "remaining_time": "5:57:41"} +{"current_steps": 3115, "total_steps": 4816, "loss": 0.2332, "lr": 1.3385622697984872e-05, "epoch": 4.527616279069767, "percentage": 64.68, "elapsed_time": "10:53:01", "remaining_time": "5:56:35"} +{"current_steps": 3120, "total_steps": 4816, "loss": 0.2386, "lr": 1.331725805123496e-05, "epoch": 4.534883720930233, "percentage": 64.78, "elapsed_time": "10:54:04", "remaining_time": "5:55:33"} +{"current_steps": 3125, "total_steps": 4816, "loss": 0.2374, "lr": 1.3248981188644703e-05, "epoch": 4.542151162790698, "percentage": 64.89, "elapsed_time": "10:55:06", "remaining_time": "5:54:29"} +{"current_steps": 3130, "total_steps": 4816, "loss": 0.2296, "lr": 1.3180793007095502e-05, "epoch": 4.549418604651163, "percentage": 64.99, "elapsed_time": "10:56:04", "remaining_time": "5:53:23"} +{"current_steps": 3135, "total_steps": 4816, "loss": 0.2527, "lr": 1.3112694402303863e-05, "epoch": 4.5566860465116275, "percentage": 65.1, "elapsed_time": "10:57:03", "remaining_time": "5:52:18"} +{"current_steps": 3140, "total_steps": 4816, "loss": 0.2489, "lr": 1.3044686268809596e-05, "epoch": 4.563953488372093, "percentage": 65.2, "elapsed_time": "10:58:04", "remaining_time": "5:51:15"} +{"current_steps": 3145, "total_steps": 4816, "loss": 0.236, "lr": 1.2976769499964109e-05, "epoch": 4.571220930232558, "percentage": 65.3, "elapsed_time": "10:59:02", "remaining_time": "5:50:09"} +{"current_steps": 3150, "total_steps": 4816, "loss": 0.2395, "lr": 1.2908944987918633e-05, "epoch": 4.578488372093023, "percentage": 65.41, "elapsed_time": "11:00:10", "remaining_time": "5:49:09"} +{"current_steps": 3155, "total_steps": 4816, "loss": 0.2301, "lr": 1.2841213623612519e-05, "epoch": 4.585755813953488, "percentage": 65.51, "elapsed_time": "11:01:13", "remaining_time": "5:48:06"} +{"current_steps": 3160, "total_steps": 4816, "loss": 0.2346, "lr": 1.2773576296761542e-05, "epoch": 4.593023255813954, "percentage": 65.61, "elapsed_time": "11:02:15", "remaining_time": "5:47:03"} +{"current_steps": 3165, "total_steps": 4816, "loss": 0.2265, "lr": 1.2706033895846192e-05, "epoch": 4.600290697674419, "percentage": 65.72, "elapsed_time": "11:03:22", "remaining_time": "5:46:02"} +{"current_steps": 3170, "total_steps": 4816, "loss": 0.2395, "lr": 1.2638587308100036e-05, "epoch": 4.607558139534884, "percentage": 65.82, "elapsed_time": "11:04:24", "remaining_time": "5:44:59"} +{"current_steps": 3175, "total_steps": 4816, "loss": 0.227, "lr": 1.2571237419498018e-05, "epoch": 4.6148255813953485, "percentage": 65.93, "elapsed_time": "11:05:26", "remaining_time": "5:43:55"} +{"current_steps": 3180, "total_steps": 4816, "loss": 0.2283, "lr": 1.2503985114744883e-05, "epoch": 4.622093023255814, "percentage": 66.03, "elapsed_time": "11:06:30", "remaining_time": "5:42:53"} +{"current_steps": 3185, "total_steps": 4816, "loss": 0.2497, "lr": 1.2436831277263481e-05, "epoch": 4.629360465116279, "percentage": 66.13, "elapsed_time": "11:07:37", "remaining_time": "5:41:53"} +{"current_steps": 3190, "total_steps": 4816, "loss": 0.2417, "lr": 1.2369776789183234e-05, "epoch": 4.636627906976744, "percentage": 66.24, "elapsed_time": "11:08:39", "remaining_time": "5:40:49"} +{"current_steps": 3195, "total_steps": 4816, "loss": 0.238, "lr": 1.230282253132849e-05, "epoch": 4.6438953488372094, "percentage": 66.34, "elapsed_time": "11:09:32", "remaining_time": "5:39:41"} +{"current_steps": 3200, "total_steps": 4816, "loss": 0.2459, "lr": 1.2235969383206987e-05, "epoch": 4.651162790697675, "percentage": 66.45, "elapsed_time": "11:10:35", "remaining_time": "5:38:39"} +{"current_steps": 3205, "total_steps": 4816, "loss": 0.2293, "lr": 1.2169218222998294e-05, "epoch": 4.658430232558139, "percentage": 66.55, "elapsed_time": "11:11:34", "remaining_time": "5:37:33"} +{"current_steps": 3210, "total_steps": 4816, "loss": 0.237, "lr": 1.2102569927542275e-05, "epoch": 4.665697674418604, "percentage": 66.65, "elapsed_time": "11:12:34", "remaining_time": "5:36:29"} +{"current_steps": 3215, "total_steps": 4816, "loss": 0.2292, "lr": 1.2036025372327553e-05, "epoch": 4.6729651162790695, "percentage": 66.76, "elapsed_time": "11:13:43", "remaining_time": "5:35:29"} +{"current_steps": 3220, "total_steps": 4816, "loss": 0.2418, "lr": 1.1969585431480037e-05, "epoch": 4.680232558139535, "percentage": 66.86, "elapsed_time": "11:14:39", "remaining_time": "5:34:23"} +{"current_steps": 3225, "total_steps": 4816, "loss": 0.2291, "lr": 1.1903250977751429e-05, "epoch": 4.6875, "percentage": 66.96, "elapsed_time": "11:15:31", "remaining_time": "5:33:15"} +{"current_steps": 3230, "total_steps": 4816, "loss": 0.2325, "lr": 1.1837022882507745e-05, "epoch": 4.694767441860465, "percentage": 67.07, "elapsed_time": "11:16:30", "remaining_time": "5:32:10"} +{"current_steps": 3235, "total_steps": 4816, "loss": 0.2303, "lr": 1.1770902015717894e-05, "epoch": 4.7020348837209305, "percentage": 67.17, "elapsed_time": "11:17:35", "remaining_time": "5:31:09"} +{"current_steps": 3240, "total_steps": 4816, "loss": 0.2349, "lr": 1.1704889245942229e-05, "epoch": 4.709302325581396, "percentage": 67.28, "elapsed_time": "11:18:39", "remaining_time": "5:30:06"} +{"current_steps": 3245, "total_steps": 4816, "loss": 0.2286, "lr": 1.163898544032115e-05, "epoch": 4.716569767441861, "percentage": 67.38, "elapsed_time": "11:19:42", "remaining_time": "5:29:03"} +{"current_steps": 3250, "total_steps": 4816, "loss": 0.2372, "lr": 1.1573191464563709e-05, "epoch": 4.723837209302325, "percentage": 67.48, "elapsed_time": "11:20:48", "remaining_time": "5:28:02"} +{"current_steps": 3255, "total_steps": 4816, "loss": 0.242, "lr": 1.1507508182936231e-05, "epoch": 4.7311046511627906, "percentage": 67.59, "elapsed_time": "11:21:48", "remaining_time": "5:26:58"} +{"current_steps": 3260, "total_steps": 4816, "loss": 0.2338, "lr": 1.144193645825099e-05, "epoch": 4.738372093023256, "percentage": 67.69, "elapsed_time": "11:22:54", "remaining_time": "5:25:57"} +{"current_steps": 3265, "total_steps": 4816, "loss": 0.2282, "lr": 1.1376477151854832e-05, "epoch": 4.745639534883721, "percentage": 67.79, "elapsed_time": "11:23:59", "remaining_time": "5:24:55"} +{"current_steps": 3270, "total_steps": 4816, "loss": 0.2343, "lr": 1.131113112361788e-05, "epoch": 4.752906976744186, "percentage": 67.9, "elapsed_time": "11:24:56", "remaining_time": "5:23:49"} +{"current_steps": 3275, "total_steps": 4816, "loss": 0.2296, "lr": 1.1245899231922265e-05, "epoch": 4.7601744186046515, "percentage": 68.0, "elapsed_time": "11:26:02", "remaining_time": "5:22:48"} +{"current_steps": 3280, "total_steps": 4816, "loss": 0.2324, "lr": 1.1180782333650807e-05, "epoch": 4.767441860465116, "percentage": 68.11, "elapsed_time": "11:27:04", "remaining_time": "5:21:45"} +{"current_steps": 3285, "total_steps": 4816, "loss": 0.2345, "lr": 1.1115781284175777e-05, "epoch": 4.774709302325581, "percentage": 68.21, "elapsed_time": "11:28:09", "remaining_time": "5:20:43"} +{"current_steps": 3290, "total_steps": 4816, "loss": 0.2495, "lr": 1.1050896937347666e-05, "epoch": 4.781976744186046, "percentage": 68.31, "elapsed_time": "11:29:10", "remaining_time": "5:19:39"} +{"current_steps": 3295, "total_steps": 4816, "loss": 0.2369, "lr": 1.098613014548398e-05, "epoch": 4.789244186046512, "percentage": 68.42, "elapsed_time": "11:30:13", "remaining_time": "5:18:36"} +{"current_steps": 3300, "total_steps": 4816, "loss": 0.2396, "lr": 1.0921481759358005e-05, "epoch": 4.796511627906977, "percentage": 68.52, "elapsed_time": "11:31:18", "remaining_time": "5:17:35"} +{"current_steps": 3305, "total_steps": 4816, "loss": 0.231, "lr": 1.0856952628187662e-05, "epoch": 4.803779069767442, "percentage": 68.63, "elapsed_time": "11:32:19", "remaining_time": "5:16:31"} +{"current_steps": 3310, "total_steps": 4816, "loss": 0.2336, "lr": 1.079254359962436e-05, "epoch": 4.811046511627907, "percentage": 68.73, "elapsed_time": "11:33:20", "remaining_time": "5:15:27"} +{"current_steps": 3315, "total_steps": 4816, "loss": 0.2372, "lr": 1.0728255519741831e-05, "epoch": 4.8183139534883725, "percentage": 68.83, "elapsed_time": "11:34:20", "remaining_time": "5:14:23"} +{"current_steps": 3320, "total_steps": 4816, "loss": 0.2455, "lr": 1.066408923302503e-05, "epoch": 4.825581395348837, "percentage": 68.94, "elapsed_time": "11:35:24", "remaining_time": "5:13:20"} +{"current_steps": 3325, "total_steps": 4816, "loss": 0.2407, "lr": 1.060004558235905e-05, "epoch": 4.832848837209302, "percentage": 69.04, "elapsed_time": "11:36:28", "remaining_time": "5:12:18"} +{"current_steps": 3330, "total_steps": 4816, "loss": 0.2304, "lr": 1.0536125409018043e-05, "epoch": 4.840116279069767, "percentage": 69.14, "elapsed_time": "11:37:33", "remaining_time": "5:11:17"} +{"current_steps": 3335, "total_steps": 4816, "loss": 0.2293, "lr": 1.0472329552654172e-05, "epoch": 4.847383720930233, "percentage": 69.25, "elapsed_time": "11:38:37", "remaining_time": "5:10:14"} +{"current_steps": 3340, "total_steps": 4816, "loss": 0.2346, "lr": 1.040865885128656e-05, "epoch": 4.854651162790698, "percentage": 69.35, "elapsed_time": "11:39:44", "remaining_time": "5:09:13"} +{"current_steps": 3345, "total_steps": 4816, "loss": 0.253, "lr": 1.034511414129033e-05, "epoch": 4.861918604651163, "percentage": 69.46, "elapsed_time": "11:40:42", "remaining_time": "5:08:08"} +{"current_steps": 3350, "total_steps": 4816, "loss": 0.2369, "lr": 1.0281696257385566e-05, "epoch": 4.8691860465116275, "percentage": 69.56, "elapsed_time": "11:41:44", "remaining_time": "5:07:05"} +{"current_steps": 3355, "total_steps": 4816, "loss": 0.2243, "lr": 1.0218406032626383e-05, "epoch": 4.876453488372093, "percentage": 69.66, "elapsed_time": "11:42:50", "remaining_time": "5:06:04"} +{"current_steps": 3360, "total_steps": 4816, "loss": 0.2348, "lr": 1.015524429838995e-05, "epoch": 4.883720930232558, "percentage": 69.77, "elapsed_time": "11:43:50", "remaining_time": "5:05:00"} +{"current_steps": 3365, "total_steps": 4816, "loss": 0.2313, "lr": 1.009221188436563e-05, "epoch": 4.890988372093023, "percentage": 69.87, "elapsed_time": "11:44:54", "remaining_time": "5:03:57"} +{"current_steps": 3370, "total_steps": 4816, "loss": 0.2366, "lr": 1.0029309618544008e-05, "epoch": 4.898255813953488, "percentage": 69.98, "elapsed_time": "11:45:54", "remaining_time": "5:02:53"} +{"current_steps": 3375, "total_steps": 4816, "loss": 0.2414, "lr": 9.966538327206055e-06, "epoch": 4.905523255813954, "percentage": 70.08, "elapsed_time": "11:46:56", "remaining_time": "5:01:50"} +{"current_steps": 3380, "total_steps": 4816, "loss": 0.2358, "lr": 9.903898834912288e-06, "epoch": 4.912790697674419, "percentage": 70.18, "elapsed_time": "11:47:58", "remaining_time": "5:00:47"} +{"current_steps": 3385, "total_steps": 4816, "loss": 0.2605, "lr": 9.8413919644919e-06, "epoch": 4.920058139534884, "percentage": 70.29, "elapsed_time": "11:49:07", "remaining_time": "4:59:46"} +{"current_steps": 3390, "total_steps": 4816, "loss": 0.2464, "lr": 9.77901853703197e-06, "epoch": 4.9273255813953485, "percentage": 70.39, "elapsed_time": "11:50:10", "remaining_time": "4:58:44"} +{"current_steps": 3395, "total_steps": 4816, "loss": 0.2372, "lr": 9.716779371866674e-06, "epoch": 4.934593023255814, "percentage": 70.49, "elapsed_time": "11:51:15", "remaining_time": "4:57:42"} +{"current_steps": 3400, "total_steps": 4816, "loss": 0.2347, "lr": 9.654675286566548e-06, "epoch": 4.941860465116279, "percentage": 70.6, "elapsed_time": "11:52:21", "remaining_time": "4:56:40"} +{"current_steps": 3405, "total_steps": 4816, "loss": 0.2219, "lr": 9.592707096927704e-06, "epoch": 4.949127906976744, "percentage": 70.7, "elapsed_time": "11:53:21", "remaining_time": "4:55:36"} +{"current_steps": 3410, "total_steps": 4816, "loss": 0.2336, "lr": 9.53087561696113e-06, "epoch": 4.9563953488372094, "percentage": 70.81, "elapsed_time": "11:54:21", "remaining_time": "4:54:32"} +{"current_steps": 3415, "total_steps": 4816, "loss": 0.2459, "lr": 9.469181658882034e-06, "epoch": 4.963662790697675, "percentage": 70.91, "elapsed_time": "11:55:22", "remaining_time": "4:53:28"} +{"current_steps": 3420, "total_steps": 4816, "loss": 0.238, "lr": 9.40762603309911e-06, "epoch": 4.970930232558139, "percentage": 71.01, "elapsed_time": "11:56:24", "remaining_time": "4:52:25"} +{"current_steps": 3425, "total_steps": 4816, "loss": 0.2319, "lr": 9.346209548203947e-06, "epoch": 4.978197674418604, "percentage": 71.12, "elapsed_time": "11:57:28", "remaining_time": "4:51:23"} +{"current_steps": 3430, "total_steps": 4816, "loss": 0.2512, "lr": 9.284933010960364e-06, "epoch": 4.9854651162790695, "percentage": 71.22, "elapsed_time": "11:58:34", "remaining_time": "4:50:21"} +{"current_steps": 3435, "total_steps": 4816, "loss": 0.231, "lr": 9.223797226293867e-06, "epoch": 4.992732558139535, "percentage": 71.32, "elapsed_time": "11:59:38", "remaining_time": "4:49:19"} +{"current_steps": 3440, "total_steps": 4816, "loss": 0.2371, "lr": 9.162802997281022e-06, "epoch": 5.0, "percentage": 71.43, "elapsed_time": "12:00:39", "remaining_time": "4:48:15"} +{"current_steps": 3445, "total_steps": 4816, "loss": 0.2274, "lr": 9.10195112513892e-06, "epoch": 5.007267441860465, "percentage": 71.53, "elapsed_time": "12:01:39", "remaining_time": "4:47:11"} +{"current_steps": 3450, "total_steps": 4816, "loss": 0.219, "lr": 9.041242409214686e-06, "epoch": 5.0145348837209305, "percentage": 71.64, "elapsed_time": "12:02:45", "remaining_time": "4:46:10"} +{"current_steps": 3455, "total_steps": 4816, "loss": 0.2222, "lr": 8.980677646974926e-06, "epoch": 5.021802325581396, "percentage": 71.74, "elapsed_time": "12:03:50", "remaining_time": "4:45:08"} +{"current_steps": 3460, "total_steps": 4816, "loss": 0.2215, "lr": 8.920257633995295e-06, "epoch": 5.02906976744186, "percentage": 71.84, "elapsed_time": "12:04:49", "remaining_time": "4:44:04"} +{"current_steps": 3465, "total_steps": 4816, "loss": 0.2304, "lr": 8.85998316395001e-06, "epoch": 5.036337209302325, "percentage": 71.95, "elapsed_time": "12:05:43", "remaining_time": "4:42:57"} +{"current_steps": 3470, "total_steps": 4816, "loss": 0.2313, "lr": 8.799855028601472e-06, "epoch": 5.0436046511627906, "percentage": 72.05, "elapsed_time": "12:06:45", "remaining_time": "4:41:54"} +{"current_steps": 3475, "total_steps": 4816, "loss": 0.223, "lr": 8.739874017789813e-06, "epoch": 5.050872093023256, "percentage": 72.16, "elapsed_time": "12:07:47", "remaining_time": "4:40:51"} +{"current_steps": 3480, "total_steps": 4816, "loss": 0.2288, "lr": 8.680040919422544e-06, "epoch": 5.058139534883721, "percentage": 72.26, "elapsed_time": "12:08:49", "remaining_time": "4:39:48"} +{"current_steps": 3485, "total_steps": 4816, "loss": 0.2262, "lr": 8.620356519464228e-06, "epoch": 5.065406976744186, "percentage": 72.36, "elapsed_time": "12:09:52", "remaining_time": "4:38:45"} +{"current_steps": 3490, "total_steps": 4816, "loss": 0.2416, "lr": 8.560821601926112e-06, "epoch": 5.0726744186046515, "percentage": 72.47, "elapsed_time": "12:10:58", "remaining_time": "4:37:43"} +{"current_steps": 3495, "total_steps": 4816, "loss": 0.2155, "lr": 8.501436948855857e-06, "epoch": 5.079941860465116, "percentage": 72.57, "elapsed_time": "12:12:01", "remaining_time": "4:36:41"} +{"current_steps": 3500, "total_steps": 4816, "loss": 0.2137, "lr": 8.44220334032725e-06, "epoch": 5.087209302325581, "percentage": 72.67, "elapsed_time": "12:13:03", "remaining_time": "4:35:37"} +{"current_steps": 3505, "total_steps": 4816, "loss": 0.2239, "lr": 8.383121554429985e-06, "epoch": 5.094476744186046, "percentage": 72.78, "elapsed_time": "12:14:07", "remaining_time": "4:34:35"} +{"current_steps": 3510, "total_steps": 4816, "loss": 0.222, "lr": 8.3241923672594e-06, "epoch": 5.101744186046512, "percentage": 72.88, "elapsed_time": "12:15:05", "remaining_time": "4:33:30"} +{"current_steps": 3515, "total_steps": 4816, "loss": 0.2308, "lr": 8.265416552906316e-06, "epoch": 5.109011627906977, "percentage": 72.99, "elapsed_time": "12:16:09", "remaining_time": "4:32:28"} +{"current_steps": 3520, "total_steps": 4816, "loss": 0.2181, "lr": 8.20679488344684e-06, "epoch": 5.116279069767442, "percentage": 73.09, "elapsed_time": "12:17:07", "remaining_time": "4:31:23"} +{"current_steps": 3525, "total_steps": 4816, "loss": 0.2286, "lr": 8.148328128932263e-06, "epoch": 5.123546511627907, "percentage": 73.19, "elapsed_time": "12:18:10", "remaining_time": "4:30:20"} +{"current_steps": 3530, "total_steps": 4816, "loss": 0.2332, "lr": 8.090017057378913e-06, "epoch": 5.1308139534883725, "percentage": 73.3, "elapsed_time": "12:19:16", "remaining_time": "4:29:19"} +{"current_steps": 3535, "total_steps": 4816, "loss": 0.2325, "lr": 8.03186243475807e-06, "epoch": 5.138081395348837, "percentage": 73.4, "elapsed_time": "12:20:10", "remaining_time": "4:28:13"} +{"current_steps": 3540, "total_steps": 4816, "loss": 0.2221, "lr": 7.9738650249859e-06, "epoch": 5.145348837209302, "percentage": 73.5, "elapsed_time": "12:21:08", "remaining_time": "4:27:08"} +{"current_steps": 3545, "total_steps": 4816, "loss": 0.2243, "lr": 7.916025589913452e-06, "epoch": 5.152616279069767, "percentage": 73.61, "elapsed_time": "12:22:08", "remaining_time": "4:26:05"} +{"current_steps": 3550, "total_steps": 4816, "loss": 0.2212, "lr": 7.858344889316611e-06, "epoch": 5.159883720930233, "percentage": 73.71, "elapsed_time": "12:23:13", "remaining_time": "4:25:02"} +{"current_steps": 3555, "total_steps": 4816, "loss": 0.224, "lr": 7.80082368088613e-06, "epoch": 5.167151162790698, "percentage": 73.82, "elapsed_time": "12:24:19", "remaining_time": "4:24:01"} +{"current_steps": 3560, "total_steps": 4816, "loss": 0.2348, "lr": 7.743462720217698e-06, "epoch": 5.174418604651163, "percentage": 73.92, "elapsed_time": "12:25:17", "remaining_time": "4:22:56"} +{"current_steps": 3565, "total_steps": 4816, "loss": 0.2211, "lr": 7.686262760801985e-06, "epoch": 5.1816860465116275, "percentage": 74.02, "elapsed_time": "12:26:16", "remaining_time": "4:21:52"} +{"current_steps": 3570, "total_steps": 4816, "loss": 0.231, "lr": 7.629224554014763e-06, "epoch": 5.188953488372093, "percentage": 74.13, "elapsed_time": "12:27:12", "remaining_time": "4:20:47"} +{"current_steps": 3575, "total_steps": 4816, "loss": 0.2208, "lr": 7.5723488491070116e-06, "epoch": 5.196220930232558, "percentage": 74.23, "elapsed_time": "12:28:11", "remaining_time": "4:19:43"} +{"current_steps": 3580, "total_steps": 4816, "loss": 0.2126, "lr": 7.515636393195129e-06, "epoch": 5.203488372093023, "percentage": 74.34, "elapsed_time": "12:29:13", "remaining_time": "4:18:40"} +{"current_steps": 3585, "total_steps": 4816, "loss": 0.2276, "lr": 7.459087931251052e-06, "epoch": 5.210755813953488, "percentage": 74.44, "elapsed_time": "12:30:17", "remaining_time": "4:17:37"} +{"current_steps": 3590, "total_steps": 4816, "loss": 0.2144, "lr": 7.402704206092508e-06, "epoch": 5.218023255813954, "percentage": 74.54, "elapsed_time": "12:31:08", "remaining_time": "4:16:31"} +{"current_steps": 3595, "total_steps": 4816, "loss": 0.2218, "lr": 7.346485958373266e-06, "epoch": 5.225290697674419, "percentage": 74.65, "elapsed_time": "12:32:15", "remaining_time": "4:15:29"} +{"current_steps": 3600, "total_steps": 4816, "loss": 0.2369, "lr": 7.290433926573373e-06, "epoch": 5.232558139534884, "percentage": 74.75, "elapsed_time": "12:33:14", "remaining_time": "4:14:25"} +{"current_steps": 3605, "total_steps": 4816, "loss": 0.2232, "lr": 7.234548846989478e-06, "epoch": 5.2398255813953485, "percentage": 74.85, "elapsed_time": "12:34:15", "remaining_time": "4:13:22"} +{"current_steps": 3610, "total_steps": 4816, "loss": 0.2318, "lr": 7.17883145372515e-06, "epoch": 5.247093023255814, "percentage": 74.96, "elapsed_time": "12:35:19", "remaining_time": "4:12:19"} +{"current_steps": 3615, "total_steps": 4816, "loss": 0.2341, "lr": 7.123282478681255e-06, "epoch": 5.254360465116279, "percentage": 75.06, "elapsed_time": "12:36:18", "remaining_time": "4:11:16"} +{"current_steps": 3620, "total_steps": 4816, "loss": 0.2314, "lr": 7.06790265154631e-06, "epoch": 5.261627906976744, "percentage": 75.17, "elapsed_time": "12:37:22", "remaining_time": "4:10:13"} +{"current_steps": 3625, "total_steps": 4816, "loss": 0.211, "lr": 7.012692699786918e-06, "epoch": 5.2688953488372094, "percentage": 75.27, "elapsed_time": "12:38:25", "remaining_time": "4:09:10"} +{"current_steps": 3630, "total_steps": 4816, "loss": 0.233, "lr": 6.9576533486382004e-06, "epoch": 5.276162790697675, "percentage": 75.37, "elapsed_time": "12:39:25", "remaining_time": "4:08:07"} +{"current_steps": 3635, "total_steps": 4816, "loss": 0.2186, "lr": 6.902785321094301e-06, "epoch": 5.28343023255814, "percentage": 75.48, "elapsed_time": "12:40:31", "remaining_time": "4:07:05"} +{"current_steps": 3640, "total_steps": 4816, "loss": 0.2314, "lr": 6.84808933789884e-06, "epoch": 5.290697674418604, "percentage": 75.58, "elapsed_time": "12:41:31", "remaining_time": "4:06:01"} +{"current_steps": 3645, "total_steps": 4816, "loss": 0.2234, "lr": 6.793566117535475e-06, "epoch": 5.2979651162790695, "percentage": 75.69, "elapsed_time": "12:42:29", "remaining_time": "4:04:57"} +{"current_steps": 3650, "total_steps": 4816, "loss": 0.2258, "lr": 6.739216376218483e-06, "epoch": 5.305232558139535, "percentage": 75.79, "elapsed_time": "12:43:35", "remaining_time": "4:03:55"} +{"current_steps": 3655, "total_steps": 4816, "loss": 0.2309, "lr": 6.6850408278833e-06, "epoch": 5.3125, "percentage": 75.89, "elapsed_time": "12:44:37", "remaining_time": "4:02:52"} +{"current_steps": 3660, "total_steps": 4816, "loss": 0.2197, "lr": 6.631040184177191e-06, "epoch": 5.319767441860465, "percentage": 76.0, "elapsed_time": "12:45:42", "remaining_time": "4:01:50"} +{"current_steps": 3665, "total_steps": 4816, "loss": 0.2209, "lr": 6.577215154449863e-06, "epoch": 5.3270348837209305, "percentage": 76.1, "elapsed_time": "12:46:43", "remaining_time": "4:00:47"} +{"current_steps": 3670, "total_steps": 4816, "loss": 0.2327, "lr": 6.523566445744196e-06, "epoch": 5.334302325581396, "percentage": 76.2, "elapsed_time": "12:47:47", "remaining_time": "3:59:45"} +{"current_steps": 3675, "total_steps": 4816, "loss": 0.2177, "lr": 6.470094762786901e-06, "epoch": 5.34156976744186, "percentage": 76.31, "elapsed_time": "12:48:49", "remaining_time": "3:58:41"} +{"current_steps": 3680, "total_steps": 4816, "loss": 0.2245, "lr": 6.4168008079792906e-06, "epoch": 5.348837209302325, "percentage": 76.41, "elapsed_time": "12:49:52", "remaining_time": "3:57:39"} +{"current_steps": 3685, "total_steps": 4816, "loss": 0.2243, "lr": 6.36368528138807e-06, "epoch": 5.3561046511627906, "percentage": 76.52, "elapsed_time": "12:50:56", "remaining_time": "3:56:37"} +{"current_steps": 3690, "total_steps": 4816, "loss": 0.2267, "lr": 6.310748880736095e-06, "epoch": 5.363372093023256, "percentage": 76.62, "elapsed_time": "12:52:02", "remaining_time": "3:55:35"} +{"current_steps": 3695, "total_steps": 4816, "loss": 0.2228, "lr": 6.2579923013932435e-06, "epoch": 5.370639534883721, "percentage": 76.72, "elapsed_time": "12:53:07", "remaining_time": "3:54:33"} +{"current_steps": 3700, "total_steps": 4816, "loss": 0.2313, "lr": 6.205416236367263e-06, "epoch": 5.377906976744186, "percentage": 76.83, "elapsed_time": "12:54:08", "remaining_time": "3:53:29"} +{"current_steps": 3705, "total_steps": 4816, "loss": 0.2281, "lr": 6.1530213762946944e-06, "epoch": 5.3851744186046515, "percentage": 76.93, "elapsed_time": "12:55:14", "remaining_time": "3:52:28"} +{"current_steps": 3710, "total_steps": 4816, "loss": 0.2274, "lr": 6.100808409431755e-06, "epoch": 5.392441860465116, "percentage": 77.03, "elapsed_time": "12:56:13", "remaining_time": "3:51:24"} +{"current_steps": 3715, "total_steps": 4816, "loss": 0.2131, "lr": 6.048778021645329e-06, "epoch": 5.399709302325581, "percentage": 77.14, "elapsed_time": "12:57:16", "remaining_time": "3:50:21"} +{"current_steps": 3720, "total_steps": 4816, "loss": 0.2309, "lr": 5.996930896403967e-06, "epoch": 5.406976744186046, "percentage": 77.24, "elapsed_time": "12:58:21", "remaining_time": "3:49:19"} +{"current_steps": 3725, "total_steps": 4816, "loss": 0.2224, "lr": 5.94526771476887e-06, "epoch": 5.414244186046512, "percentage": 77.35, "elapsed_time": "12:59:26", "remaining_time": "3:48:17"} +{"current_steps": 3730, "total_steps": 4816, "loss": 0.2227, "lr": 5.893789155384975e-06, "epoch": 5.421511627906977, "percentage": 77.45, "elapsed_time": "13:00:24", "remaining_time": "3:47:13"} +{"current_steps": 3735, "total_steps": 4816, "loss": 0.2256, "lr": 5.8424958944720245e-06, "epoch": 5.428779069767442, "percentage": 77.55, "elapsed_time": "13:01:30", "remaining_time": "3:46:11"} +{"current_steps": 3740, "total_steps": 4816, "loss": 0.2315, "lr": 5.791388605815709e-06, "epoch": 5.436046511627907, "percentage": 77.66, "elapsed_time": "13:02:37", "remaining_time": "3:45:09"} +{"current_steps": 3745, "total_steps": 4816, "loss": 0.2164, "lr": 5.740467960758776e-06, "epoch": 5.4433139534883725, "percentage": 77.76, "elapsed_time": "13:03:41", "remaining_time": "3:44:07"} +{"current_steps": 3750, "total_steps": 4816, "loss": 0.2222, "lr": 5.68973462819223e-06, "epoch": 5.450581395348837, "percentage": 77.87, "elapsed_time": "13:04:44", "remaining_time": "3:43:04"} +{"current_steps": 3755, "total_steps": 4816, "loss": 0.2219, "lr": 5.63918927454657e-06, "epoch": 5.457848837209302, "percentage": 77.97, "elapsed_time": "13:05:44", "remaining_time": "3:42:00"} +{"current_steps": 3760, "total_steps": 4816, "loss": 0.2212, "lr": 5.588832563783e-06, "epoch": 5.465116279069767, "percentage": 78.07, "elapsed_time": "13:06:44", "remaining_time": "3:40:57"} +{"current_steps": 3765, "total_steps": 4816, "loss": 0.2249, "lr": 5.538665157384715e-06, "epoch": 5.472383720930233, "percentage": 78.18, "elapsed_time": "13:07:53", "remaining_time": "3:39:56"} +{"current_steps": 3770, "total_steps": 4816, "loss": 0.226, "lr": 5.48868771434822e-06, "epoch": 5.479651162790698, "percentage": 78.28, "elapsed_time": "13:08:52", "remaining_time": "3:38:52"} +{"current_steps": 3775, "total_steps": 4816, "loss": 0.2369, "lr": 5.438900891174686e-06, "epoch": 5.486918604651163, "percentage": 78.38, "elapsed_time": "13:09:56", "remaining_time": "3:37:50"} +{"current_steps": 3780, "total_steps": 4816, "loss": 0.2369, "lr": 5.389305341861293e-06, "epoch": 5.4941860465116275, "percentage": 78.49, "elapsed_time": "13:11:00", "remaining_time": "3:36:47"} +{"current_steps": 3785, "total_steps": 4816, "loss": 0.2198, "lr": 5.3399017178926614e-06, "epoch": 5.501453488372093, "percentage": 78.59, "elapsed_time": "13:12:01", "remaining_time": "3:35:44"} +{"current_steps": 3790, "total_steps": 4816, "loss": 0.2283, "lr": 5.290690668232301e-06, "epoch": 5.508720930232558, "percentage": 78.7, "elapsed_time": "13:13:07", "remaining_time": "3:34:42"} +{"current_steps": 3795, "total_steps": 4816, "loss": 0.2269, "lr": 5.2416728393140624e-06, "epoch": 5.515988372093023, "percentage": 78.8, "elapsed_time": "13:14:11", "remaining_time": "3:33:40"} +{"current_steps": 3800, "total_steps": 4816, "loss": 0.2257, "lr": 5.192848875033663e-06, "epoch": 5.523255813953488, "percentage": 78.9, "elapsed_time": "13:15:11", "remaining_time": "3:32:36"} +{"current_steps": 3805, "total_steps": 4816, "loss": 0.238, "lr": 5.144219416740217e-06, "epoch": 5.530523255813954, "percentage": 79.01, "elapsed_time": "13:16:15", "remaining_time": "3:31:34"} +{"current_steps": 3810, "total_steps": 4816, "loss": 0.222, "lr": 5.095785103227835e-06, "epoch": 5.537790697674419, "percentage": 79.11, "elapsed_time": "13:17:12", "remaining_time": "3:30:29"} +{"current_steps": 3815, "total_steps": 4816, "loss": 0.2166, "lr": 5.047546570727205e-06, "epoch": 5.545058139534884, "percentage": 79.22, "elapsed_time": "13:18:11", "remaining_time": "3:29:26"} +{"current_steps": 3820, "total_steps": 4816, "loss": 0.2208, "lr": 4.999504452897232e-06, "epoch": 5.5523255813953485, "percentage": 79.32, "elapsed_time": "13:19:17", "remaining_time": "3:28:24"} +{"current_steps": 3825, "total_steps": 4816, "loss": 0.2301, "lr": 4.95165938081676e-06, "epoch": 5.559593023255814, "percentage": 79.42, "elapsed_time": "13:20:23", "remaining_time": "3:27:22"} +{"current_steps": 3830, "total_steps": 4816, "loss": 0.2317, "lr": 4.9040119829762246e-06, "epoch": 5.566860465116279, "percentage": 79.53, "elapsed_time": "13:21:28", "remaining_time": "3:26:20"} +{"current_steps": 3835, "total_steps": 4816, "loss": 0.2166, "lr": 4.856562885269427e-06, "epoch": 5.574127906976744, "percentage": 79.63, "elapsed_time": "13:22:28", "remaining_time": "3:25:16"} +{"current_steps": 3840, "total_steps": 4816, "loss": 0.2366, "lr": 4.809312710985308e-06, "epoch": 5.5813953488372094, "percentage": 79.73, "elapsed_time": "13:23:34", "remaining_time": "3:24:14"} +{"current_steps": 3845, "total_steps": 4816, "loss": 0.2396, "lr": 4.762262080799771e-06, "epoch": 5.588662790697675, "percentage": 79.84, "elapsed_time": "13:24:38", "remaining_time": "3:23:12"} +{"current_steps": 3850, "total_steps": 4816, "loss": 0.2127, "lr": 4.715411612767508e-06, "epoch": 5.595930232558139, "percentage": 79.94, "elapsed_time": "13:25:45", "remaining_time": "3:22:10"} +{"current_steps": 3855, "total_steps": 4816, "loss": 0.2235, "lr": 4.668761922313893e-06, "epoch": 5.603197674418604, "percentage": 80.05, "elapsed_time": "13:26:49", "remaining_time": "3:21:07"} +{"current_steps": 3860, "total_steps": 4816, "loss": 0.2248, "lr": 4.622313622226888e-06, "epoch": 5.6104651162790695, "percentage": 80.15, "elapsed_time": "13:27:51", "remaining_time": "3:20:04"} +{"current_steps": 3865, "total_steps": 4816, "loss": 0.2391, "lr": 4.5760673226490245e-06, "epoch": 5.617732558139535, "percentage": 80.25, "elapsed_time": "13:28:54", "remaining_time": "3:19:02"} +{"current_steps": 3870, "total_steps": 4816, "loss": 0.2326, "lr": 4.530023631069342e-06, "epoch": 5.625, "percentage": 80.36, "elapsed_time": "13:29:53", "remaining_time": "3:17:58"} +{"current_steps": 3875, "total_steps": 4816, "loss": 0.2223, "lr": 4.484183152315435e-06, "epoch": 5.632267441860465, "percentage": 80.46, "elapsed_time": "13:30:57", "remaining_time": "3:16:55"} +{"current_steps": 3880, "total_steps": 4816, "loss": 0.2206, "lr": 4.438546488545516e-06, "epoch": 5.6395348837209305, "percentage": 80.56, "elapsed_time": "13:32:00", "remaining_time": "3:15:53"} +{"current_steps": 3885, "total_steps": 4816, "loss": 0.2206, "lr": 4.393114239240495e-06, "epoch": 5.646802325581396, "percentage": 80.67, "elapsed_time": "13:33:02", "remaining_time": "3:14:50"} +{"current_steps": 3890, "total_steps": 4816, "loss": 0.2185, "lr": 4.347887001196089e-06, "epoch": 5.654069767441861, "percentage": 80.77, "elapsed_time": "13:34:07", "remaining_time": "3:13:47"} +{"current_steps": 3895, "total_steps": 4816, "loss": 0.215, "lr": 4.302865368515002e-06, "epoch": 5.661337209302325, "percentage": 80.88, "elapsed_time": "13:35:09", "remaining_time": "3:12:44"} +{"current_steps": 3900, "total_steps": 4816, "loss": 0.2277, "lr": 4.2580499325991284e-06, "epoch": 5.6686046511627906, "percentage": 80.98, "elapsed_time": "13:36:10", "remaining_time": "3:11:41"} +{"current_steps": 3905, "total_steps": 4816, "loss": 0.2243, "lr": 4.213441282141762e-06, "epoch": 5.675872093023256, "percentage": 81.08, "elapsed_time": "13:37:05", "remaining_time": "3:10:37"} +{"current_steps": 3910, "total_steps": 4816, "loss": 0.2297, "lr": 4.169040003119871e-06, "epoch": 5.683139534883721, "percentage": 81.19, "elapsed_time": "13:38:05", "remaining_time": "3:09:33"} +{"current_steps": 3915, "total_steps": 4816, "loss": 0.2212, "lr": 4.124846678786405e-06, "epoch": 5.690406976744186, "percentage": 81.29, "elapsed_time": "13:39:04", "remaining_time": "3:08:30"} +{"current_steps": 3920, "total_steps": 4816, "loss": 0.2324, "lr": 4.080861889662642e-06, "epoch": 5.6976744186046515, "percentage": 81.4, "elapsed_time": "13:40:07", "remaining_time": "3:07:27"} +{"current_steps": 3925, "total_steps": 4816, "loss": 0.2401, "lr": 4.037086213530539e-06, "epoch": 5.704941860465116, "percentage": 81.5, "elapsed_time": "13:41:09", "remaining_time": "3:06:24"} +{"current_steps": 3930, "total_steps": 4816, "loss": 0.235, "lr": 3.993520225425154e-06, "epoch": 5.712209302325581, "percentage": 81.6, "elapsed_time": "13:42:10", "remaining_time": "3:05:21"} +{"current_steps": 3935, "total_steps": 4816, "loss": 0.2258, "lr": 3.9501644976271095e-06, "epoch": 5.719476744186046, "percentage": 81.71, "elapsed_time": "13:43:16", "remaining_time": "3:04:19"} +{"current_steps": 3940, "total_steps": 4816, "loss": 0.2205, "lr": 3.907019599655044e-06, "epoch": 5.726744186046512, "percentage": 81.81, "elapsed_time": "13:44:18", "remaining_time": "3:03:16"} +{"current_steps": 3945, "total_steps": 4816, "loss": 0.2239, "lr": 3.864086098258153e-06, "epoch": 5.734011627906977, "percentage": 81.91, "elapsed_time": "13:45:20", "remaining_time": "3:02:13"} +{"current_steps": 3950, "total_steps": 4816, "loss": 0.2194, "lr": 3.8213645574087286e-06, "epoch": 5.741279069767442, "percentage": 82.02, "elapsed_time": "13:46:23", "remaining_time": "3:01:10"} +{"current_steps": 3955, "total_steps": 4816, "loss": 0.2226, "lr": 3.778855538294779e-06, "epoch": 5.748546511627907, "percentage": 82.12, "elapsed_time": "13:47:31", "remaining_time": "3:00:08"} +{"current_steps": 3960, "total_steps": 4816, "loss": 0.227, "lr": 3.736559599312619e-06, "epoch": 5.7558139534883725, "percentage": 82.23, "elapsed_time": "13:48:29", "remaining_time": "2:59:05"} +{"current_steps": 3965, "total_steps": 4816, "loss": 0.2268, "lr": 3.6944772960595597e-06, "epoch": 5.763081395348837, "percentage": 82.33, "elapsed_time": "13:49:30", "remaining_time": "2:58:02"} +{"current_steps": 3970, "total_steps": 4816, "loss": 0.2338, "lr": 3.652609181326601e-06, "epoch": 5.770348837209302, "percentage": 82.43, "elapsed_time": "13:50:37", "remaining_time": "2:57:00"} +{"current_steps": 3975, "total_steps": 4816, "loss": 0.217, "lr": 3.610955805091185e-06, "epoch": 5.777616279069767, "percentage": 82.54, "elapsed_time": "13:51:35", "remaining_time": "2:55:56"} +{"current_steps": 3980, "total_steps": 4816, "loss": 0.2204, "lr": 3.569517714509947e-06, "epoch": 5.784883720930233, "percentage": 82.64, "elapsed_time": "13:52:35", "remaining_time": "2:54:53"} +{"current_steps": 3985, "total_steps": 4816, "loss": 0.2307, "lr": 3.528295453911541e-06, "epoch": 5.792151162790698, "percentage": 82.75, "elapsed_time": "13:53:39", "remaining_time": "2:53:50"} +{"current_steps": 3990, "total_steps": 4816, "loss": 0.229, "lr": 3.4872895647895045e-06, "epoch": 5.799418604651163, "percentage": 82.85, "elapsed_time": "13:54:44", "remaining_time": "2:52:48"} +{"current_steps": 3995, "total_steps": 4816, "loss": 0.2182, "lr": 3.446500585795112e-06, "epoch": 5.8066860465116275, "percentage": 82.95, "elapsed_time": "13:55:49", "remaining_time": "2:51:45"} +{"current_steps": 4000, "total_steps": 4816, "loss": 0.2414, "lr": 3.4059290527303256e-06, "epoch": 5.813953488372093, "percentage": 83.06, "elapsed_time": "13:56:54", "remaining_time": "2:50:43"} +{"current_steps": 4005, "total_steps": 4816, "loss": 0.2335, "lr": 3.3655754985407453e-06, "epoch": 5.821220930232558, "percentage": 83.16, "elapsed_time": "13:57:53", "remaining_time": "2:49:40"} +{"current_steps": 4010, "total_steps": 4816, "loss": 0.2248, "lr": 3.3254404533086216e-06, "epoch": 5.828488372093023, "percentage": 83.26, "elapsed_time": "13:58:55", "remaining_time": "2:48:37"} +{"current_steps": 4015, "total_steps": 4816, "loss": 0.2373, "lr": 3.285524444245873e-06, "epoch": 5.835755813953488, "percentage": 83.37, "elapsed_time": "13:59:57", "remaining_time": "2:47:34"} +{"current_steps": 4020, "total_steps": 4816, "loss": 0.2215, "lr": 3.245827995687165e-06, "epoch": 5.843023255813954, "percentage": 83.47, "elapsed_time": "14:01:01", "remaining_time": "2:46:31"} +{"current_steps": 4025, "total_steps": 4816, "loss": 0.2311, "lr": 3.2063516290830445e-06, "epoch": 5.850290697674419, "percentage": 83.58, "elapsed_time": "14:02:03", "remaining_time": "2:45:28"} +{"current_steps": 4030, "total_steps": 4816, "loss": 0.2328, "lr": 3.1670958629930595e-06, "epoch": 5.857558139534884, "percentage": 83.68, "elapsed_time": "14:03:03", "remaining_time": "2:44:25"} +{"current_steps": 4035, "total_steps": 4816, "loss": 0.227, "lr": 3.1280612130789633e-06, "epoch": 5.8648255813953485, "percentage": 83.78, "elapsed_time": "14:04:07", "remaining_time": "2:43:23"} +{"current_steps": 4040, "total_steps": 4816, "loss": 0.2216, "lr": 3.0892481920979355e-06, "epoch": 5.872093023255814, "percentage": 83.89, "elapsed_time": "14:05:15", "remaining_time": "2:42:21"} +{"current_steps": 4045, "total_steps": 4816, "loss": 0.2299, "lr": 3.0506573098958613e-06, "epoch": 5.879360465116279, "percentage": 83.99, "elapsed_time": "14:06:20", "remaining_time": "2:41:19"} +{"current_steps": 4050, "total_steps": 4816, "loss": 0.222, "lr": 3.0122890734006114e-06, "epoch": 5.886627906976744, "percentage": 84.09, "elapsed_time": "14:07:23", "remaining_time": "2:40:16"} +{"current_steps": 4055, "total_steps": 4816, "loss": 0.2246, "lr": 2.97414398661539e-06, "epoch": 5.8938953488372094, "percentage": 84.2, "elapsed_time": "14:08:26", "remaining_time": "2:39:13"} +{"current_steps": 4060, "total_steps": 4816, "loss": 0.2167, "lr": 2.9362225506121357e-06, "epoch": 5.901162790697675, "percentage": 84.3, "elapsed_time": "14:09:30", "remaining_time": "2:38:11"} +{"current_steps": 4065, "total_steps": 4816, "loss": 0.2189, "lr": 2.8985252635249026e-06, "epoch": 5.908430232558139, "percentage": 84.41, "elapsed_time": "14:10:33", "remaining_time": "2:37:08"} +{"current_steps": 4070, "total_steps": 4816, "loss": 0.2314, "lr": 2.8610526205433476e-06, "epoch": 5.915697674418604, "percentage": 84.51, "elapsed_time": "14:11:42", "remaining_time": "2:36:06"} +{"current_steps": 4075, "total_steps": 4816, "loss": 0.218, "lr": 2.823805113906204e-06, "epoch": 5.9229651162790695, "percentage": 84.61, "elapsed_time": "14:12:44", "remaining_time": "2:35:03"} +{"current_steps": 4080, "total_steps": 4816, "loss": 0.2138, "lr": 2.7867832328948385e-06, "epoch": 5.930232558139535, "percentage": 84.72, "elapsed_time": "14:13:53", "remaining_time": "2:34:02"} +{"current_steps": 4085, "total_steps": 4816, "loss": 0.2265, "lr": 2.7499874638268044e-06, "epoch": 5.9375, "percentage": 84.82, "elapsed_time": "14:14:54", "remaining_time": "2:32:59"} +{"current_steps": 4090, "total_steps": 4816, "loss": 0.2314, "lr": 2.7134182900494542e-06, "epoch": 5.944767441860465, "percentage": 84.93, "elapsed_time": "14:15:51", "remaining_time": "2:31:55"} +{"current_steps": 4095, "total_steps": 4816, "loss": 0.2226, "lr": 2.6770761919336098e-06, "epoch": 5.9520348837209305, "percentage": 85.03, "elapsed_time": "14:16:53", "remaining_time": "2:30:52"} +{"current_steps": 4100, "total_steps": 4816, "loss": 0.2412, "lr": 2.640961646867224e-06, "epoch": 5.959302325581396, "percentage": 85.13, "elapsed_time": "14:17:59", "remaining_time": "2:29:50"} +{"current_steps": 4105, "total_steps": 4816, "loss": 0.2221, "lr": 2.605075129249135e-06, "epoch": 5.966569767441861, "percentage": 85.24, "elapsed_time": "14:19:06", "remaining_time": "2:28:47"} +{"current_steps": 4110, "total_steps": 4816, "loss": 0.2253, "lr": 2.5694171104828146e-06, "epoch": 5.973837209302325, "percentage": 85.34, "elapsed_time": "14:20:03", "remaining_time": "2:27:44"} +{"current_steps": 4115, "total_steps": 4816, "loss": 0.2212, "lr": 2.533988058970198e-06, "epoch": 5.9811046511627906, "percentage": 85.44, "elapsed_time": "14:21:01", "remaining_time": "2:26:40"} +{"current_steps": 4120, "total_steps": 4816, "loss": 0.224, "lr": 2.498788440105506e-06, "epoch": 5.988372093023256, "percentage": 85.55, "elapsed_time": "14:21:59", "remaining_time": "2:25:37"} +{"current_steps": 4125, "total_steps": 4816, "loss": 0.2239, "lr": 2.4638187162691487e-06, "epoch": 5.995639534883721, "percentage": 85.65, "elapsed_time": "14:23:01", "remaining_time": "2:24:34"} +{"current_steps": 4130, "total_steps": 4816, "loss": 0.2187, "lr": 2.42907934682165e-06, "epoch": 6.002906976744186, "percentage": 85.76, "elapsed_time": "14:24:01", "remaining_time": "2:23:30"} +{"current_steps": 4135, "total_steps": 4816, "loss": 0.218, "lr": 2.3945707880976034e-06, "epoch": 6.0101744186046515, "percentage": 85.86, "elapsed_time": "14:25:07", "remaining_time": "2:22:28"} +{"current_steps": 4140, "total_steps": 4816, "loss": 0.2308, "lr": 2.36029349339969e-06, "epoch": 6.017441860465116, "percentage": 85.96, "elapsed_time": "14:26:06", "remaining_time": "2:21:25"} +{"current_steps": 4145, "total_steps": 4816, "loss": 0.2236, "lr": 2.3262479129927116e-06, "epoch": 6.024709302325581, "percentage": 86.07, "elapsed_time": "14:27:09", "remaining_time": "2:20:22"} +{"current_steps": 4150, "total_steps": 4816, "loss": 0.2211, "lr": 2.2924344940976975e-06, "epoch": 6.031976744186046, "percentage": 86.17, "elapsed_time": "14:28:17", "remaining_time": "2:19:20"} +{"current_steps": 4155, "total_steps": 4816, "loss": 0.2125, "lr": 2.2588536808859975e-06, "epoch": 6.039244186046512, "percentage": 86.27, "elapsed_time": "14:29:17", "remaining_time": "2:18:17"} +{"current_steps": 4160, "total_steps": 4816, "loss": 0.2101, "lr": 2.225505914473469e-06, "epoch": 6.046511627906977, "percentage": 86.38, "elapsed_time": "14:30:16", "remaining_time": "2:17:14"} +{"current_steps": 4165, "total_steps": 4816, "loss": 0.2182, "lr": 2.19239163291469e-06, "epoch": 6.053779069767442, "percentage": 86.48, "elapsed_time": "14:31:19", "remaining_time": "2:16:11"} +{"current_steps": 4170, "total_steps": 4816, "loss": 0.2243, "lr": 2.1595112711971835e-06, "epoch": 6.061046511627907, "percentage": 86.59, "elapsed_time": "14:32:24", "remaining_time": "2:15:08"} +{"current_steps": 4175, "total_steps": 4816, "loss": 0.2347, "lr": 2.1268652612357153e-06, "epoch": 6.068313953488372, "percentage": 86.69, "elapsed_time": "14:33:20", "remaining_time": "2:14:05"} +{"current_steps": 4180, "total_steps": 4816, "loss": 0.2093, "lr": 2.0944540318666107e-06, "epoch": 6.075581395348837, "percentage": 86.79, "elapsed_time": "14:34:19", "remaining_time": "2:13:01"} +{"current_steps": 4185, "total_steps": 4816, "loss": 0.2354, "lr": 2.062278008842147e-06, "epoch": 6.082848837209302, "percentage": 86.9, "elapsed_time": "14:35:21", "remaining_time": "2:11:59"} +{"current_steps": 4190, "total_steps": 4816, "loss": 0.2242, "lr": 2.030337614824929e-06, "epoch": 6.090116279069767, "percentage": 87.0, "elapsed_time": "14:36:24", "remaining_time": "2:10:56"} +{"current_steps": 4195, "total_steps": 4816, "loss": 0.2206, "lr": 1.9986332693823487e-06, "epoch": 6.097383720930233, "percentage": 87.11, "elapsed_time": "14:37:22", "remaining_time": "2:09:52"} +{"current_steps": 4200, "total_steps": 4816, "loss": 0.2212, "lr": 1.9671653889810893e-06, "epoch": 6.104651162790698, "percentage": 87.21, "elapsed_time": "14:38:26", "remaining_time": "2:08:50"} +{"current_steps": 4205, "total_steps": 4816, "loss": 0.2173, "lr": 1.9359343869816307e-06, "epoch": 6.111918604651163, "percentage": 87.31, "elapsed_time": "14:39:23", "remaining_time": "2:07:46"} +{"current_steps": 4210, "total_steps": 4816, "loss": 0.2119, "lr": 1.9049406736328336e-06, "epoch": 6.119186046511628, "percentage": 87.42, "elapsed_time": "14:40:25", "remaining_time": "2:06:43"} +{"current_steps": 4215, "total_steps": 4816, "loss": 0.2188, "lr": 1.87418465606654e-06, "epoch": 6.126453488372093, "percentage": 87.52, "elapsed_time": "14:41:24", "remaining_time": "2:05:40"} +{"current_steps": 4220, "total_steps": 4816, "loss": 0.2197, "lr": 1.8436667382922468e-06, "epoch": 6.133720930232558, "percentage": 87.62, "elapsed_time": "14:42:23", "remaining_time": "2:04:37"} +{"current_steps": 4225, "total_steps": 4816, "loss": 0.2214, "lr": 1.8133873211917686e-06, "epoch": 6.140988372093023, "percentage": 87.73, "elapsed_time": "14:43:26", "remaining_time": "2:03:34"} +{"current_steps": 4230, "total_steps": 4816, "loss": 0.2334, "lr": 1.783346802514001e-06, "epoch": 6.148255813953488, "percentage": 87.83, "elapsed_time": "14:44:30", "remaining_time": "2:02:32"} +{"current_steps": 4235, "total_steps": 4816, "loss": 0.2243, "lr": 1.7535455768696686e-06, "epoch": 6.155523255813954, "percentage": 87.94, "elapsed_time": "14:45:34", "remaining_time": "2:01:29"} +{"current_steps": 4240, "total_steps": 4816, "loss": 0.2234, "lr": 1.7239840357261695e-06, "epoch": 6.162790697674419, "percentage": 88.04, "elapsed_time": "14:46:39", "remaining_time": "2:00:27"} +{"current_steps": 4245, "total_steps": 4816, "loss": 0.2216, "lr": 1.6946625674024053e-06, "epoch": 6.170058139534884, "percentage": 88.14, "elapsed_time": "14:47:43", "remaining_time": "1:59:24"} +{"current_steps": 4250, "total_steps": 4816, "loss": 0.2249, "lr": 1.6655815570637002e-06, "epoch": 6.1773255813953485, "percentage": 88.25, "elapsed_time": "14:48:42", "remaining_time": "1:58:21"} +{"current_steps": 4255, "total_steps": 4816, "loss": 0.2216, "lr": 1.636741386716727e-06, "epoch": 6.184593023255814, "percentage": 88.35, "elapsed_time": "14:49:36", "remaining_time": "1:57:17"} +{"current_steps": 4260, "total_steps": 4816, "loss": 0.2165, "lr": 1.6081424352045093e-06, "epoch": 6.191860465116279, "percentage": 88.46, "elapsed_time": "14:50:43", "remaining_time": "1:56:15"} +{"current_steps": 4265, "total_steps": 4816, "loss": 0.2177, "lr": 1.5797850782014236e-06, "epoch": 6.199127906976744, "percentage": 88.56, "elapsed_time": "14:51:49", "remaining_time": "1:55:12"} +{"current_steps": 4270, "total_steps": 4816, "loss": 0.2202, "lr": 1.5516696882082704e-06, "epoch": 6.2063953488372094, "percentage": 88.66, "elapsed_time": "14:52:54", "remaining_time": "1:54:10"} +{"current_steps": 4275, "total_steps": 4816, "loss": 0.2238, "lr": 1.5237966345473942e-06, "epoch": 6.213662790697675, "percentage": 88.77, "elapsed_time": "14:53:59", "remaining_time": "1:53:08"} +{"current_steps": 4280, "total_steps": 4816, "loss": 0.2218, "lr": 1.4961662833578117e-06, "epoch": 6.22093023255814, "percentage": 88.87, "elapsed_time": "14:54:58", "remaining_time": "1:52:04"} +{"current_steps": 4285, "total_steps": 4816, "loss": 0.2269, "lr": 1.4687789975904188e-06, "epoch": 6.228197674418604, "percentage": 88.97, "elapsed_time": "14:55:59", "remaining_time": "1:51:01"} +{"current_steps": 4290, "total_steps": 4816, "loss": 0.2202, "lr": 1.4416351370032077e-06, "epoch": 6.2354651162790695, "percentage": 89.08, "elapsed_time": "14:57:04", "remaining_time": "1:49:59"} +{"current_steps": 4295, "total_steps": 4816, "loss": 0.2265, "lr": 1.4147350581565644e-06, "epoch": 6.242732558139535, "percentage": 89.18, "elapsed_time": "14:58:05", "remaining_time": "1:48:56"} +{"current_steps": 4300, "total_steps": 4816, "loss": 0.2201, "lr": 1.3880791144085582e-06, "epoch": 6.25, "percentage": 89.29, "elapsed_time": "14:59:07", "remaining_time": "1:47:53"} +{"current_steps": 4305, "total_steps": 4816, "loss": 0.2205, "lr": 1.3616676559103104e-06, "epoch": 6.257267441860465, "percentage": 89.39, "elapsed_time": "15:00:10", "remaining_time": "1:46:51"} +{"current_steps": 4310, "total_steps": 4816, "loss": 0.2214, "lr": 1.3355010296014114e-06, "epoch": 6.2645348837209305, "percentage": 89.49, "elapsed_time": "15:01:14", "remaining_time": "1:45:48"} +{"current_steps": 4315, "total_steps": 4816, "loss": 0.2213, "lr": 1.3095795792053333e-06, "epoch": 6.271802325581396, "percentage": 89.6, "elapsed_time": "15:02:15", "remaining_time": "1:44:45"} +{"current_steps": 4320, "total_steps": 4816, "loss": 0.2246, "lr": 1.2839036452249354e-06, "epoch": 6.27906976744186, "percentage": 89.7, "elapsed_time": "15:03:21", "remaining_time": "1:43:43"} +{"current_steps": 4325, "total_steps": 4816, "loss": 0.2174, "lr": 1.258473564937981e-06, "epoch": 6.286337209302325, "percentage": 89.8, "elapsed_time": "15:04:26", "remaining_time": "1:42:40"} +{"current_steps": 4330, "total_steps": 4816, "loss": 0.2206, "lr": 1.2332896723927257e-06, "epoch": 6.2936046511627906, "percentage": 89.91, "elapsed_time": "15:05:29", "remaining_time": "1:41:37"} +{"current_steps": 4335, "total_steps": 4816, "loss": 0.2214, "lr": 1.2083522984035012e-06, "epoch": 6.300872093023256, "percentage": 90.01, "elapsed_time": "15:06:32", "remaining_time": "1:40:35"} +{"current_steps": 4340, "total_steps": 4816, "loss": 0.2205, "lr": 1.183661770546387e-06, "epoch": 6.308139534883721, "percentage": 90.12, "elapsed_time": "15:07:35", "remaining_time": "1:39:32"} +{"current_steps": 4345, "total_steps": 4816, "loss": 0.2246, "lr": 1.1592184131549056e-06, "epoch": 6.315406976744186, "percentage": 90.22, "elapsed_time": "15:08:39", "remaining_time": "1:38:29"} +{"current_steps": 4350, "total_steps": 4816, "loss": 0.2159, "lr": 1.1350225473157672e-06, "epoch": 6.3226744186046515, "percentage": 90.32, "elapsed_time": "15:09:44", "remaining_time": "1:37:27"} +{"current_steps": 4355, "total_steps": 4816, "loss": 0.2126, "lr": 1.1110744908646365e-06, "epoch": 6.329941860465116, "percentage": 90.43, "elapsed_time": "15:10:50", "remaining_time": "1:36:25"} +{"current_steps": 4360, "total_steps": 4816, "loss": 0.2242, "lr": 1.0873745583819661e-06, "epoch": 6.337209302325581, "percentage": 90.53, "elapsed_time": "15:11:48", "remaining_time": "1:35:21"} +{"current_steps": 4365, "total_steps": 4816, "loss": 0.2175, "lr": 1.063923061188874e-06, "epoch": 6.344476744186046, "percentage": 90.64, "elapsed_time": "15:12:50", "remaining_time": "1:34:19"} +{"current_steps": 4370, "total_steps": 4816, "loss": 0.2226, "lr": 1.040720307343035e-06, "epoch": 6.351744186046512, "percentage": 90.74, "elapsed_time": "15:13:53", "remaining_time": "1:33:16"} +{"current_steps": 4375, "total_steps": 4816, "loss": 0.2207, "lr": 1.017766601634651e-06, "epoch": 6.359011627906977, "percentage": 90.84, "elapsed_time": "15:14:56", "remaining_time": "1:32:13"} +{"current_steps": 4380, "total_steps": 4816, "loss": 0.218, "lr": 9.95062245582432e-07, "epoch": 6.366279069767442, "percentage": 90.95, "elapsed_time": "15:15:59", "remaining_time": "1:31:10"} +{"current_steps": 4385, "total_steps": 4816, "loss": 0.2113, "lr": 9.72607537429655e-07, "epoch": 6.373546511627907, "percentage": 91.05, "elapsed_time": "15:16:59", "remaining_time": "1:30:07"} +{"current_steps": 4390, "total_steps": 4816, "loss": 0.2222, "lr": 9.504027721402264e-07, "epoch": 6.3808139534883725, "percentage": 91.15, "elapsed_time": "15:18:01", "remaining_time": "1:29:05"} +{"current_steps": 4395, "total_steps": 4816, "loss": 0.2213, "lr": 9.284482413948148e-07, "epoch": 6.388081395348837, "percentage": 91.26, "elapsed_time": "15:19:07", "remaining_time": "1:28:02"} +{"current_steps": 4400, "total_steps": 4816, "loss": 0.2175, "lr": 9.067442335870313e-07, "epoch": 6.395348837209302, "percentage": 91.36, "elapsed_time": "15:20:06", "remaining_time": "1:26:59"} +{"current_steps": 4405, "total_steps": 4816, "loss": 0.2141, "lr": 8.852910338196152e-07, "epoch": 6.402616279069767, "percentage": 91.47, "elapsed_time": "15:21:06", "remaining_time": "1:25:56"} +{"current_steps": 4410, "total_steps": 4816, "loss": 0.222, "lr": 8.640889239007166e-07, "epoch": 6.409883720930233, "percentage": 91.57, "elapsed_time": "15:22:09", "remaining_time": "1:24:53"} +{"current_steps": 4415, "total_steps": 4816, "loss": 0.2209, "lr": 8.431381823401708e-07, "epoch": 6.417151162790698, "percentage": 91.67, "elapsed_time": "15:23:11", "remaining_time": "1:23:51"} +{"current_steps": 4420, "total_steps": 4816, "loss": 0.2207, "lr": 8.224390843458652e-07, "epoch": 6.424418604651163, "percentage": 91.78, "elapsed_time": "15:24:14", "remaining_time": "1:22:48"} +{"current_steps": 4425, "total_steps": 4816, "loss": 0.22, "lr": 8.019919018201005e-07, "epoch": 6.4316860465116275, "percentage": 91.88, "elapsed_time": "15:25:17", "remaining_time": "1:21:45"} +{"current_steps": 4430, "total_steps": 4816, "loss": 0.2226, "lr": 7.817969033560246e-07, "epoch": 6.438953488372093, "percentage": 91.99, "elapsed_time": "15:26:25", "remaining_time": "1:20:43"} +{"current_steps": 4435, "total_steps": 4816, "loss": 0.2168, "lr": 7.618543542341217e-07, "epoch": 6.446220930232558, "percentage": 92.09, "elapsed_time": "15:27:23", "remaining_time": "1:19:40"} +{"current_steps": 4440, "total_steps": 4816, "loss": 0.2207, "lr": 7.421645164187019e-07, "epoch": 6.453488372093023, "percentage": 92.19, "elapsed_time": "15:28:26", "remaining_time": "1:18:37"} +{"current_steps": 4445, "total_steps": 4816, "loss": 0.2136, "lr": 7.227276485544798e-07, "epoch": 6.460755813953488, "percentage": 92.3, "elapsed_time": "15:29:28", "remaining_time": "1:17:34"} +{"current_steps": 4450, "total_steps": 4816, "loss": 0.2087, "lr": 7.035440059631616e-07, "epoch": 6.468023255813954, "percentage": 92.4, "elapsed_time": "15:30:27", "remaining_time": "1:16:31"} +{"current_steps": 4455, "total_steps": 4816, "loss": 0.224, "lr": 6.846138406401137e-07, "epoch": 6.475290697674419, "percentage": 92.5, "elapsed_time": "15:31:28", "remaining_time": "1:15:28"} +{"current_steps": 4460, "total_steps": 4816, "loss": 0.2215, "lr": 6.6593740125102e-07, "epoch": 6.482558139534884, "percentage": 92.61, "elapsed_time": "15:32:26", "remaining_time": "1:14:25"} +{"current_steps": 4465, "total_steps": 4816, "loss": 0.2219, "lr": 6.475149331286457e-07, "epoch": 6.4898255813953485, "percentage": 92.71, "elapsed_time": "15:33:30", "remaining_time": "1:13:23"} +{"current_steps": 4470, "total_steps": 4816, "loss": 0.2244, "lr": 6.293466782696001e-07, "epoch": 6.497093023255814, "percentage": 92.82, "elapsed_time": "15:34:38", "remaining_time": "1:12:20"} +{"current_steps": 4475, "total_steps": 4816, "loss": 0.2307, "lr": 6.114328753311572e-07, "epoch": 6.504360465116279, "percentage": 92.92, "elapsed_time": "15:35:38", "remaining_time": "1:11:17"} +{"current_steps": 4480, "total_steps": 4816, "loss": 0.2319, "lr": 5.937737596281223e-07, "epoch": 6.511627906976744, "percentage": 93.02, "elapsed_time": "15:36:41", "remaining_time": "1:10:15"} +{"current_steps": 4485, "total_steps": 4816, "loss": 0.2294, "lr": 5.763695631297483e-07, "epoch": 6.5188953488372094, "percentage": 93.13, "elapsed_time": "15:37:44", "remaining_time": "1:09:12"} +{"current_steps": 4490, "total_steps": 4816, "loss": 0.2073, "lr": 5.592205144566753e-07, "epoch": 6.526162790697675, "percentage": 93.23, "elapsed_time": "15:38:45", "remaining_time": "1:08:09"} +{"current_steps": 4495, "total_steps": 4816, "loss": 0.2227, "lr": 5.423268388779424e-07, "epoch": 6.533430232558139, "percentage": 93.33, "elapsed_time": "15:39:47", "remaining_time": "1:07:06"} +{"current_steps": 4500, "total_steps": 4816, "loss": 0.2109, "lr": 5.256887583080094e-07, "epoch": 6.540697674418604, "percentage": 93.44, "elapsed_time": "15:40:55", "remaining_time": "1:06:04"} +{"current_steps": 4505, "total_steps": 4816, "loss": 0.2118, "lr": 5.093064913038648e-07, "epoch": 6.5479651162790695, "percentage": 93.54, "elapsed_time": "15:42:34", "remaining_time": "1:05:04"} +{"current_steps": 4510, "total_steps": 4816, "loss": 0.2096, "lr": 4.931802530621376e-07, "epoch": 6.555232558139535, "percentage": 93.65, "elapsed_time": "15:43:39", "remaining_time": "1:04:01"} +{"current_steps": 4515, "total_steps": 4816, "loss": 0.2288, "lr": 4.773102554162768e-07, "epoch": 6.5625, "percentage": 93.75, "elapsed_time": "15:44:42", "remaining_time": "1:02:58"} +{"current_steps": 4520, "total_steps": 4816, "loss": 0.215, "lr": 4.61696706833763e-07, "epoch": 6.569767441860465, "percentage": 93.85, "elapsed_time": "15:45:47", "remaining_time": "1:01:56"} +{"current_steps": 4525, "total_steps": 4816, "loss": 0.2195, "lr": 4.4633981241338333e-07, "epoch": 6.5770348837209305, "percentage": 93.96, "elapsed_time": "15:46:52", "remaining_time": "1:00:53"} +{"current_steps": 4530, "total_steps": 4816, "loss": 0.2258, "lr": 4.312397738825236e-07, "epoch": 6.584302325581396, "percentage": 94.06, "elapsed_time": "15:47:57", "remaining_time": "0:59:50"} +{"current_steps": 4535, "total_steps": 4816, "loss": 0.2183, "lr": 4.163967895945242e-07, "epoch": 6.591569767441861, "percentage": 94.17, "elapsed_time": "15:48:59", "remaining_time": "0:58:48"} +{"current_steps": 4540, "total_steps": 4816, "loss": 0.2142, "lr": 4.0181105452607563e-07, "epoch": 6.598837209302325, "percentage": 94.27, "elapsed_time": "15:49:56", "remaining_time": "0:57:45"} +{"current_steps": 4545, "total_steps": 4816, "loss": 0.2184, "lr": 3.874827602746556e-07, "epoch": 6.6061046511627906, "percentage": 94.37, "elapsed_time": "15:51:05", "remaining_time": "0:56:42"} +{"current_steps": 4550, "total_steps": 4816, "loss": 0.2242, "lr": 3.734120950560116e-07, "epoch": 6.613372093023256, "percentage": 94.48, "elapsed_time": "15:52:09", "remaining_time": "0:55:39"} +{"current_steps": 4555, "total_steps": 4816, "loss": 0.2143, "lr": 3.5959924370168487e-07, "epoch": 6.620639534883721, "percentage": 94.58, "elapsed_time": "15:53:14", "remaining_time": "0:54:37"} +{"current_steps": 4560, "total_steps": 4816, "loss": 0.2226, "lr": 3.4604438765659445e-07, "epoch": 6.627906976744186, "percentage": 94.68, "elapsed_time": "15:54:21", "remaining_time": "0:53:34"} +{"current_steps": 4565, "total_steps": 4816, "loss": 0.2208, "lr": 3.3274770497664365e-07, "epoch": 6.6351744186046515, "percentage": 94.79, "elapsed_time": "15:55:21", "remaining_time": "0:52:31"} +{"current_steps": 4570, "total_steps": 4816, "loss": 0.2171, "lr": 3.1970937032638206e-07, "epoch": 6.642441860465116, "percentage": 94.89, "elapsed_time": "15:56:24", "remaining_time": "0:51:28"} +{"current_steps": 4575, "total_steps": 4816, "loss": 0.2187, "lr": 3.0692955497670705e-07, "epoch": 6.649709302325581, "percentage": 95.0, "elapsed_time": "15:57:27", "remaining_time": "0:50:26"} +{"current_steps": 4580, "total_steps": 4816, "loss": 0.2285, "lr": 2.944084268026326e-07, "epoch": 6.656976744186046, "percentage": 95.1, "elapsed_time": "15:58:27", "remaining_time": "0:49:23"} +{"current_steps": 4585, "total_steps": 4816, "loss": 0.2193, "lr": 2.821461502810641e-07, "epoch": 6.664244186046512, "percentage": 95.2, "elapsed_time": "15:59:32", "remaining_time": "0:48:20"} +{"current_steps": 4590, "total_steps": 4816, "loss": 0.211, "lr": 2.701428864886402e-07, "epoch": 6.671511627906977, "percentage": 95.31, "elapsed_time": "16:00:36", "remaining_time": "0:47:17"} +{"current_steps": 4595, "total_steps": 4816, "loss": 0.2204, "lr": 2.583987930996279e-07, "epoch": 6.678779069767442, "percentage": 95.41, "elapsed_time": "16:01:41", "remaining_time": "0:46:15"} +{"current_steps": 4600, "total_steps": 4816, "loss": 0.2308, "lr": 2.469140243838464e-07, "epoch": 6.686046511627907, "percentage": 95.51, "elapsed_time": "16:02:41", "remaining_time": "0:45:12"} +{"current_steps": 4605, "total_steps": 4816, "loss": 0.2237, "lr": 2.3568873120462854e-07, "epoch": 6.6933139534883725, "percentage": 95.62, "elapsed_time": "16:03:44", "remaining_time": "0:44:09"} +{"current_steps": 4610, "total_steps": 4816, "loss": 0.2172, "lr": 2.247230610168627e-07, "epoch": 6.700581395348837, "percentage": 95.72, "elapsed_time": "16:04:49", "remaining_time": "0:43:06"} +{"current_steps": 4615, "total_steps": 4816, "loss": 0.2064, "lr": 2.1401715786503408e-07, "epoch": 6.707848837209302, "percentage": 95.83, "elapsed_time": "16:05:57", "remaining_time": "0:42:04"} +{"current_steps": 4620, "total_steps": 4816, "loss": 0.2172, "lr": 2.0357116238134633e-07, "epoch": 6.715116279069767, "percentage": 95.93, "elapsed_time": "16:06:59", "remaining_time": "0:41:01"} +{"current_steps": 4625, "total_steps": 4816, "loss": 0.2203, "lr": 1.9338521178386304e-07, "epoch": 6.722383720930233, "percentage": 96.03, "elapsed_time": "16:08:01", "remaining_time": "0:39:58"} +{"current_steps": 4630, "total_steps": 4816, "loss": 0.2162, "lr": 1.8345943987471804e-07, "epoch": 6.729651162790698, "percentage": 96.14, "elapsed_time": "16:09:04", "remaining_time": "0:38:55"} +{"current_steps": 4635, "total_steps": 4816, "loss": 0.2122, "lr": 1.7379397703834788e-07, "epoch": 6.736918604651163, "percentage": 96.24, "elapsed_time": "16:10:12", "remaining_time": "0:37:53"} +{"current_steps": 4640, "total_steps": 4816, "loss": 0.2259, "lr": 1.643889502397844e-07, "epoch": 6.7441860465116275, "percentage": 96.35, "elapsed_time": "16:11:16", "remaining_time": "0:36:50"} +{"current_steps": 4645, "total_steps": 4816, "loss": 0.2249, "lr": 1.5524448302297822e-07, "epoch": 6.751453488372093, "percentage": 96.45, "elapsed_time": "16:12:15", "remaining_time": "0:35:47"} +{"current_steps": 4650, "total_steps": 4816, "loss": 0.2201, "lr": 1.4636069550919118e-07, "epoch": 6.758720930232558, "percentage": 96.55, "elapsed_time": "16:13:18", "remaining_time": "0:34:44"} +{"current_steps": 4655, "total_steps": 4816, "loss": 0.2267, "lr": 1.3773770439540646e-07, "epoch": 6.765988372093023, "percentage": 96.66, "elapsed_time": "16:14:17", "remaining_time": "0:33:41"} +{"current_steps": 4660, "total_steps": 4816, "loss": 0.2163, "lr": 1.2937562295279648e-07, "epoch": 6.773255813953488, "percentage": 96.76, "elapsed_time": "16:15:15", "remaining_time": "0:32:38"} +{"current_steps": 4665, "total_steps": 4816, "loss": 0.2263, "lr": 1.2127456102523748e-07, "epoch": 6.780523255813954, "percentage": 96.86, "elapsed_time": "16:16:19", "remaining_time": "0:31:36"} +{"current_steps": 4670, "total_steps": 4816, "loss": 0.2252, "lr": 1.1343462502787506e-07, "epoch": 6.787790697674419, "percentage": 96.97, "elapsed_time": "16:17:15", "remaining_time": "0:30:33"} +{"current_steps": 4675, "total_steps": 4816, "loss": 0.2193, "lr": 1.0585591794570527e-07, "epoch": 6.795058139534884, "percentage": 97.07, "elapsed_time": "16:18:23", "remaining_time": "0:29:30"} +{"current_steps": 4680, "total_steps": 4816, "loss": 0.2213, "lr": 9.853853933224244e-08, "epoch": 6.8023255813953485, "percentage": 97.18, "elapsed_time": "16:19:24", "remaining_time": "0:28:27"} +{"current_steps": 4685, "total_steps": 4816, "loss": 0.2186, "lr": 9.14825853082002e-08, "epoch": 6.809593023255814, "percentage": 97.28, "elapsed_time": "16:20:30", "remaining_time": "0:27:24"} +{"current_steps": 4690, "total_steps": 4816, "loss": 0.2116, "lr": 8.468814856023466e-08, "epoch": 6.816860465116279, "percentage": 97.38, "elapsed_time": "16:21:30", "remaining_time": "0:26:22"} +{"current_steps": 4695, "total_steps": 4816, "loss": 0.2293, "lr": 7.815531833972323e-08, "epoch": 6.824127906976744, "percentage": 97.49, "elapsed_time": "16:22:30", "remaining_time": "0:25:19"} +{"current_steps": 4700, "total_steps": 4816, "loss": 0.2206, "lr": 7.188418046158996e-08, "epoch": 6.8313953488372094, "percentage": 97.59, "elapsed_time": "16:23:34", "remaining_time": "0:24:16"} +{"current_steps": 4705, "total_steps": 4816, "loss": 0.2113, "lr": 6.587481730318645e-08, "epoch": 6.838662790697675, "percentage": 97.7, "elapsed_time": "16:24:35", "remaining_time": "0:23:13"} +{"current_steps": 4710, "total_steps": 4816, "loss": 0.2271, "lr": 6.012730780320163e-08, "epoch": 6.845930232558139, "percentage": 97.8, "elapsed_time": "16:25:38", "remaining_time": "0:22:10"} +{"current_steps": 4715, "total_steps": 4816, "loss": 0.2208, "lr": 5.4641727460631455e-08, "epoch": 6.853197674418604, "percentage": 97.9, "elapsed_time": "16:26:46", "remaining_time": "0:21:08"} +{"current_steps": 4720, "total_steps": 4816, "loss": 0.2174, "lr": 4.941814833377745e-08, "epoch": 6.8604651162790695, "percentage": 98.01, "elapsed_time": "16:27:52", "remaining_time": "0:20:05"} +{"current_steps": 4725, "total_steps": 4816, "loss": 0.2141, "lr": 4.445663903931419e-08, "epoch": 6.867732558139535, "percentage": 98.11, "elapsed_time": "16:28:54", "remaining_time": "0:19:02"} +{"current_steps": 4730, "total_steps": 4816, "loss": 0.2245, "lr": 3.975726475137443e-08, "epoch": 6.875, "percentage": 98.21, "elapsed_time": "16:29:54", "remaining_time": "0:17:59"} +{"current_steps": 4735, "total_steps": 4816, "loss": 0.226, "lr": 3.5320087200698686e-08, "epoch": 6.882267441860465, "percentage": 98.32, "elapsed_time": "16:30:58", "remaining_time": "0:16:57"} +{"current_steps": 4740, "total_steps": 4816, "loss": 0.2203, "lr": 3.114516467383144e-08, "epoch": 6.8895348837209305, "percentage": 98.42, "elapsed_time": "16:31:58", "remaining_time": "0:15:54"} +{"current_steps": 4745, "total_steps": 4816, "loss": 0.2222, "lr": 2.7232552012339542e-08, "epoch": 6.896802325581396, "percentage": 98.53, "elapsed_time": "16:33:02", "remaining_time": "0:14:51"} +{"current_steps": 4750, "total_steps": 4816, "loss": 0.2166, "lr": 2.358230061210387e-08, "epoch": 6.904069767441861, "percentage": 98.63, "elapsed_time": "16:34:01", "remaining_time": "0:13:48"} +{"current_steps": 4755, "total_steps": 4816, "loss": 0.2075, "lr": 2.0194458422646557e-08, "epoch": 6.911337209302325, "percentage": 98.73, "elapsed_time": "16:35:01", "remaining_time": "0:12:45"} +{"current_steps": 4760, "total_steps": 4816, "loss": 0.2144, "lr": 1.706906994648705e-08, "epoch": 6.9186046511627906, "percentage": 98.84, "elapsed_time": "16:36:00", "remaining_time": "0:11:43"} +{"current_steps": 4765, "total_steps": 4816, "loss": 0.2132, "lr": 1.4206176238571457e-08, "epoch": 6.925872093023256, "percentage": 98.94, "elapsed_time": "16:37:02", "remaining_time": "0:10:40"} +{"current_steps": 4770, "total_steps": 4816, "loss": 0.2348, "lr": 1.160581490572632e-08, "epoch": 6.933139534883721, "percentage": 99.04, "elapsed_time": "16:38:08", "remaining_time": "0:09:37"} +{"current_steps": 4775, "total_steps": 4816, "loss": 0.2142, "lr": 9.268020106167896e-09, "epoch": 6.940406976744186, "percentage": 99.15, "elapsed_time": "16:39:12", "remaining_time": "0:08:34"} +{"current_steps": 4780, "total_steps": 4816, "loss": 0.2155, "lr": 7.192822549046963e-09, "epoch": 6.9476744186046515, "percentage": 99.25, "elapsed_time": "16:40:11", "remaining_time": "0:07:31"} +{"current_steps": 4785, "total_steps": 4816, "loss": 0.2268, "lr": 5.38024949405358e-09, "epoch": 6.954941860465116, "percentage": 99.36, "elapsed_time": "16:41:10", "remaining_time": "0:06:29"} +{"current_steps": 4790, "total_steps": 4816, "loss": 0.2196, "lr": 3.8303247510596e-09, "epoch": 6.962209302325581, "percentage": 99.46, "elapsed_time": "16:42:17", "remaining_time": "0:05:26"} +{"current_steps": 4795, "total_steps": 4816, "loss": 0.2158, "lr": 2.5430686797944803e-09, "epoch": 6.969476744186046, "percentage": 99.56, "elapsed_time": "16:43:20", "remaining_time": "0:04:23"} +{"current_steps": 4800, "total_steps": 4816, "loss": 0.2137, "lr": 1.5184981895899342e-09, "epoch": 6.976744186046512, "percentage": 99.67, "elapsed_time": "16:44:22", "remaining_time": "0:03:20"} +{"current_steps": 4805, "total_steps": 4816, "loss": 0.2238, "lr": 7.566267391512228e-10, "epoch": 6.984011627906977, "percentage": 99.77, "elapsed_time": "16:45:27", "remaining_time": "0:02:18"} +{"current_steps": 4810, "total_steps": 4816, "loss": 0.2351, "lr": 2.574643363839613e-10, "epoch": 6.991279069767442, "percentage": 99.88, "elapsed_time": "16:46:29", "remaining_time": "0:01:15"} +{"current_steps": 4815, "total_steps": 4816, "loss": 0.2106, "lr": 2.1017538260892367e-11, "epoch": 6.998546511627907, "percentage": 99.98, "elapsed_time": "16:47:27", "remaining_time": "0:00:12"} +{"current_steps": 4816, "total_steps": 4816, "epoch": 7.0, "percentage": 100.0, "elapsed_time": "16:48:19", "remaining_time": "0:00:00"} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..9a426f9 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,10640 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 4816, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.007267441860465116, + "grad_norm": 23.60395367303308, + "learning_rate": 3.319502074688797e-07, + "loss": 0.8619, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36206579208374023, + "step": 5, + "valid_targets_mean": 3788.0, + "valid_targets_min": 735 + }, + { + "epoch": 0.014534883720930232, + "grad_norm": 23.282874067744043, + "learning_rate": 7.468879668049793e-07, + "loss": 0.88, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34413787722587585, + "step": 10, + "valid_targets_mean": 3052.0, + "valid_targets_min": 514 + }, + { + "epoch": 0.02180232558139535, + "grad_norm": 20.544843116221944, + "learning_rate": 1.161825726141079e-06, + "loss": 0.8461, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37611252069473267, + "step": 15, + "valid_targets_mean": 4375.6, + "valid_targets_min": 629 + }, + { + "epoch": 0.029069767441860465, + "grad_norm": 15.377437328665666, + "learning_rate": 1.5767634854771784e-06, + "loss": 0.7898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3731459975242615, + "step": 20, + "valid_targets_mean": 5467.6, + "valid_targets_min": 3940 + }, + { + "epoch": 0.036337209302325583, + "grad_norm": 9.747878208294727, + "learning_rate": 1.991701244813278e-06, + "loss": 0.7522, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41458815336227417, + "step": 25, + "valid_targets_mean": 3558.1, + "valid_targets_min": 703 + }, + { + "epoch": 0.0436046511627907, + "grad_norm": 5.1933038029969305, + "learning_rate": 2.4066390041493776e-06, + "loss": 0.6776, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32391414046287537, + "step": 30, + "valid_targets_mean": 2797.5, + "valid_targets_min": 758 + }, + { + "epoch": 0.050872093023255814, + "grad_norm": 2.9153373109793437, + "learning_rate": 2.8215767634854773e-06, + "loss": 0.647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35981929302215576, + "step": 35, + "valid_targets_mean": 4440.8, + "valid_targets_min": 874 + }, + { + "epoch": 0.05813953488372093, + "grad_norm": 1.9464734332413474, + "learning_rate": 3.2365145228215773e-06, + "loss": 0.6368, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.297600120306015, + "step": 40, + "valid_targets_mean": 4015.2, + "valid_targets_min": 623 + }, + { + "epoch": 0.06540697674418605, + "grad_norm": 1.5811509244863968, + "learning_rate": 3.6514522821576765e-06, + "loss": 0.6042, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.235577791929245, + "step": 45, + "valid_targets_mean": 3494.8, + "valid_targets_min": 676 + }, + { + "epoch": 0.07267441860465117, + "grad_norm": 1.6191035025819336, + "learning_rate": 4.0663900414937765e-06, + "loss": 0.6092, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3790181875228882, + "step": 50, + "valid_targets_mean": 2875.0, + "valid_targets_min": 743 + }, + { + "epoch": 0.07994186046511628, + "grad_norm": 1.2139651902387136, + "learning_rate": 4.481327800829876e-06, + "loss": 0.5623, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23523655533790588, + "step": 55, + "valid_targets_mean": 2952.5, + "valid_targets_min": 634 + }, + { + "epoch": 0.0872093023255814, + "grad_norm": 1.107547629402263, + "learning_rate": 4.896265560165976e-06, + "loss": 0.5582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24975699186325073, + "step": 60, + "valid_targets_mean": 2733.1, + "valid_targets_min": 557 + }, + { + "epoch": 0.09447674418604651, + "grad_norm": 0.8171060334267412, + "learning_rate": 5.311203319502075e-06, + "loss": 0.5279, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2179132103919983, + "step": 65, + "valid_targets_mean": 3514.1, + "valid_targets_min": 728 + }, + { + "epoch": 0.10174418604651163, + "grad_norm": 0.8445616203520336, + "learning_rate": 5.726141078838174e-06, + "loss": 0.5329, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22933122515678406, + "step": 70, + "valid_targets_mean": 2726.6, + "valid_targets_min": 615 + }, + { + "epoch": 0.10901162790697674, + "grad_norm": 0.7363370039191905, + "learning_rate": 6.1410788381742745e-06, + "loss": 0.4821, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2222234010696411, + "step": 75, + "valid_targets_mean": 4008.4, + "valid_targets_min": 580 + }, + { + "epoch": 0.11627906976744186, + "grad_norm": 0.6626607153866264, + "learning_rate": 6.556016597510374e-06, + "loss": 0.4768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1906505525112152, + "step": 80, + "valid_targets_mean": 4183.4, + "valid_targets_min": 560 + }, + { + "epoch": 0.12354651162790697, + "grad_norm": 0.7275647245598297, + "learning_rate": 6.970954356846473e-06, + "loss": 0.4808, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23446953296661377, + "step": 85, + "valid_targets_mean": 4184.1, + "valid_targets_min": 527 + }, + { + "epoch": 0.1308139534883721, + "grad_norm": 0.6302365183817711, + "learning_rate": 7.385892116182573e-06, + "loss": 0.4469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22797903418540955, + "step": 90, + "valid_targets_mean": 5058.5, + "valid_targets_min": 3750 + }, + { + "epoch": 0.1380813953488372, + "grad_norm": 0.6928723527565587, + "learning_rate": 7.800829875518673e-06, + "loss": 0.4533, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23440513014793396, + "step": 95, + "valid_targets_mean": 3689.2, + "valid_targets_min": 713 + }, + { + "epoch": 0.14534883720930233, + "grad_norm": 0.9242860696284508, + "learning_rate": 8.215767634854772e-06, + "loss": 0.4457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24846482276916504, + "step": 100, + "valid_targets_mean": 3255.8, + "valid_targets_min": 597 + }, + { + "epoch": 0.15261627906976744, + "grad_norm": 0.6413774191381949, + "learning_rate": 8.630705394190872e-06, + "loss": 0.4474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19246378540992737, + "step": 105, + "valid_targets_mean": 3996.2, + "valid_targets_min": 632 + }, + { + "epoch": 0.15988372093023256, + "grad_norm": 0.6894681865347022, + "learning_rate": 9.045643153526971e-06, + "loss": 0.4288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19999036192893982, + "step": 110, + "valid_targets_mean": 3380.1, + "valid_targets_min": 629 + }, + { + "epoch": 0.16715116279069767, + "grad_norm": 0.7022454218409497, + "learning_rate": 9.460580912863071e-06, + "loss": 0.4541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16242238879203796, + "step": 115, + "valid_targets_mean": 3007.4, + "valid_targets_min": 712 + }, + { + "epoch": 0.1744186046511628, + "grad_norm": 0.6253461021305736, + "learning_rate": 9.875518672199172e-06, + "loss": 0.4304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14945435523986816, + "step": 120, + "valid_targets_mean": 3177.5, + "valid_targets_min": 819 + }, + { + "epoch": 0.1816860465116279, + "grad_norm": 0.7018027456233442, + "learning_rate": 1.0290456431535269e-05, + "loss": 0.41, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17015759646892548, + "step": 125, + "valid_targets_mean": 2437.5, + "valid_targets_min": 339 + }, + { + "epoch": 0.18895348837209303, + "grad_norm": 0.8319196430142065, + "learning_rate": 1.070539419087137e-05, + "loss": 0.4158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.206083744764328, + "step": 130, + "valid_targets_mean": 3995.8, + "valid_targets_min": 667 + }, + { + "epoch": 0.19622093023255813, + "grad_norm": 0.5700530676300132, + "learning_rate": 1.112033195020747e-05, + "loss": 0.3866, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23243868350982666, + "step": 135, + "valid_targets_mean": 5856.1, + "valid_targets_min": 3495 + }, + { + "epoch": 0.20348837209302326, + "grad_norm": 0.7259992884453891, + "learning_rate": 1.1535269709543569e-05, + "loss": 0.3935, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19784843921661377, + "step": 140, + "valid_targets_mean": 2655.4, + "valid_targets_min": 612 + }, + { + "epoch": 0.21075581395348839, + "grad_norm": 0.5805199736360525, + "learning_rate": 1.1950207468879669e-05, + "loss": 0.3836, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17293311655521393, + "step": 145, + "valid_targets_mean": 4135.4, + "valid_targets_min": 855 + }, + { + "epoch": 0.2180232558139535, + "grad_norm": 0.7106827535386514, + "learning_rate": 1.236514522821577e-05, + "loss": 0.3918, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2059229165315628, + "step": 150, + "valid_targets_mean": 4109.1, + "valid_targets_min": 1018 + }, + { + "epoch": 0.22529069767441862, + "grad_norm": 0.6454809853641186, + "learning_rate": 1.2780082987551867e-05, + "loss": 0.3847, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16915945708751678, + "step": 155, + "valid_targets_mean": 3834.5, + "valid_targets_min": 429 + }, + { + "epoch": 0.23255813953488372, + "grad_norm": 0.7811620851477677, + "learning_rate": 1.3195020746887967e-05, + "loss": 0.387, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17152553796768188, + "step": 160, + "valid_targets_mean": 2061.4, + "valid_targets_min": 572 + }, + { + "epoch": 0.23982558139534885, + "grad_norm": 0.6473284356559537, + "learning_rate": 1.3609958506224067e-05, + "loss": 0.3834, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16540473699569702, + "step": 165, + "valid_targets_mean": 3449.9, + "valid_targets_min": 512 + }, + { + "epoch": 0.24709302325581395, + "grad_norm": 0.6625475101770146, + "learning_rate": 1.4024896265560166e-05, + "loss": 0.3975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19903941452503204, + "step": 170, + "valid_targets_mean": 4140.4, + "valid_targets_min": 677 + }, + { + "epoch": 0.2543604651162791, + "grad_norm": 0.6564466177734336, + "learning_rate": 1.4439834024896267e-05, + "loss": 0.3808, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14273467659950256, + "step": 175, + "valid_targets_mean": 2623.4, + "valid_targets_min": 717 + }, + { + "epoch": 0.2616279069767442, + "grad_norm": 0.7554600141788899, + "learning_rate": 1.4854771784232367e-05, + "loss": 0.3775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19581112265586853, + "step": 180, + "valid_targets_mean": 3290.1, + "valid_targets_min": 594 + }, + { + "epoch": 0.2688953488372093, + "grad_norm": 0.8137765108506549, + "learning_rate": 1.5269709543568464e-05, + "loss": 0.3938, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20833677053451538, + "step": 185, + "valid_targets_mean": 2977.4, + "valid_targets_min": 789 + }, + { + "epoch": 0.2761627906976744, + "grad_norm": 0.819634775218101, + "learning_rate": 1.5684647302904566e-05, + "loss": 0.383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2356613427400589, + "step": 190, + "valid_targets_mean": 3226.0, + "valid_targets_min": 659 + }, + { + "epoch": 0.28343023255813954, + "grad_norm": 0.6515391834585635, + "learning_rate": 1.6099585062240665e-05, + "loss": 0.3812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.171596497297287, + "step": 195, + "valid_targets_mean": 3689.6, + "valid_targets_min": 638 + }, + { + "epoch": 0.29069767441860467, + "grad_norm": 0.6568499242368925, + "learning_rate": 1.6514522821576764e-05, + "loss": 0.3761, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1820891797542572, + "step": 200, + "valid_targets_mean": 3843.2, + "valid_targets_min": 631 + }, + { + "epoch": 0.29796511627906974, + "grad_norm": 0.7472077453103736, + "learning_rate": 1.6929460580912863e-05, + "loss": 0.3862, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2648097276687622, + "step": 205, + "valid_targets_mean": 3722.5, + "valid_targets_min": 666 + }, + { + "epoch": 0.30523255813953487, + "grad_norm": 0.877924169905687, + "learning_rate": 1.7344398340248965e-05, + "loss": 0.373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24992845952510834, + "step": 210, + "valid_targets_mean": 2855.9, + "valid_targets_min": 506 + }, + { + "epoch": 0.3125, + "grad_norm": 0.8416781774626408, + "learning_rate": 1.7759336099585064e-05, + "loss": 0.3787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22990360856056213, + "step": 215, + "valid_targets_mean": 3118.4, + "valid_targets_min": 800 + }, + { + "epoch": 0.31976744186046513, + "grad_norm": 0.6566273438654435, + "learning_rate": 1.8174273858921162e-05, + "loss": 0.3952, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14994342625141144, + "step": 220, + "valid_targets_mean": 3124.2, + "valid_targets_min": 572 + }, + { + "epoch": 0.32703488372093026, + "grad_norm": 0.6550126195062852, + "learning_rate": 1.8589211618257264e-05, + "loss": 0.3555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1906970590353012, + "step": 225, + "valid_targets_mean": 3964.2, + "valid_targets_min": 954 + }, + { + "epoch": 0.33430232558139533, + "grad_norm": 0.6948624664412582, + "learning_rate": 1.9004149377593363e-05, + "loss": 0.3649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2098342776298523, + "step": 230, + "valid_targets_mean": 3855.0, + "valid_targets_min": 297 + }, + { + "epoch": 0.34156976744186046, + "grad_norm": 0.694830010463169, + "learning_rate": 1.9419087136929462e-05, + "loss": 0.3644, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2014472782611847, + "step": 235, + "valid_targets_mean": 4444.5, + "valid_targets_min": 559 + }, + { + "epoch": 0.3488372093023256, + "grad_norm": 0.7125029122579649, + "learning_rate": 1.983402489626556e-05, + "loss": 0.3588, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1556605100631714, + "step": 240, + "valid_targets_mean": 3346.0, + "valid_targets_min": 516 + }, + { + "epoch": 0.3561046511627907, + "grad_norm": 0.6714853403607605, + "learning_rate": 2.024896265560166e-05, + "loss": 0.3635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19688832759857178, + "step": 245, + "valid_targets_mean": 4349.2, + "valid_targets_min": 694 + }, + { + "epoch": 0.3633720930232558, + "grad_norm": 0.6799567905357822, + "learning_rate": 2.066390041493776e-05, + "loss": 0.3641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14497721195220947, + "step": 250, + "valid_targets_mean": 3314.5, + "valid_targets_min": 803 + }, + { + "epoch": 0.3706395348837209, + "grad_norm": 0.7202375378999366, + "learning_rate": 2.107883817427386e-05, + "loss": 0.3509, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18096783757209778, + "step": 255, + "valid_targets_mean": 3134.6, + "valid_targets_min": 562 + }, + { + "epoch": 0.37790697674418605, + "grad_norm": 0.6155035214731711, + "learning_rate": 2.149377593360996e-05, + "loss": 0.3543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1524481177330017, + "step": 260, + "valid_targets_mean": 3271.9, + "valid_targets_min": 305 + }, + { + "epoch": 0.3851744186046512, + "grad_norm": 0.6335191320342396, + "learning_rate": 2.190871369294606e-05, + "loss": 0.3378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19276472926139832, + "step": 265, + "valid_targets_mean": 4571.8, + "valid_targets_min": 805 + }, + { + "epoch": 0.39244186046511625, + "grad_norm": 0.6827322142507465, + "learning_rate": 2.232365145228216e-05, + "loss": 0.3526, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18056687712669373, + "step": 270, + "valid_targets_mean": 3713.2, + "valid_targets_min": 576 + }, + { + "epoch": 0.3997093023255814, + "grad_norm": 0.6937020874439587, + "learning_rate": 2.273858921161826e-05, + "loss": 0.3786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19584697484970093, + "step": 275, + "valid_targets_mean": 4246.0, + "valid_targets_min": 632 + }, + { + "epoch": 0.4069767441860465, + "grad_norm": 0.7507183101664987, + "learning_rate": 2.315352697095436e-05, + "loss": 0.3594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16600695252418518, + "step": 280, + "valid_targets_mean": 2664.1, + "valid_targets_min": 704 + }, + { + "epoch": 0.41424418604651164, + "grad_norm": 0.6156181535415471, + "learning_rate": 2.356846473029046e-05, + "loss": 0.3452, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1752406358718872, + "step": 285, + "valid_targets_mean": 3996.6, + "valid_targets_min": 540 + }, + { + "epoch": 0.42151162790697677, + "grad_norm": 0.6532054067762535, + "learning_rate": 2.398340248962656e-05, + "loss": 0.3384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15145818889141083, + "step": 290, + "valid_targets_mean": 4135.5, + "valid_targets_min": 799 + }, + { + "epoch": 0.42877906976744184, + "grad_norm": 0.6821518031591993, + "learning_rate": 2.439834024896266e-05, + "loss": 0.3506, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13307440280914307, + "step": 295, + "valid_targets_mean": 2559.9, + "valid_targets_min": 770 + }, + { + "epoch": 0.436046511627907, + "grad_norm": 0.6168836807757696, + "learning_rate": 2.481327800829876e-05, + "loss": 0.3305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1336684226989746, + "step": 300, + "valid_targets_mean": 3724.6, + "valid_targets_min": 650 + }, + { + "epoch": 0.4433139534883721, + "grad_norm": 0.7333001387424685, + "learning_rate": 2.5228215767634855e-05, + "loss": 0.3556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15666671097278595, + "step": 305, + "valid_targets_mean": 2321.0, + "valid_targets_min": 867 + }, + { + "epoch": 0.45058139534883723, + "grad_norm": 0.6581684280891332, + "learning_rate": 2.5643153526970957e-05, + "loss": 0.3537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17739339172840118, + "step": 310, + "valid_targets_mean": 3477.4, + "valid_targets_min": 552 + }, + { + "epoch": 0.4578488372093023, + "grad_norm": 0.6691702027150105, + "learning_rate": 2.6058091286307056e-05, + "loss": 0.3433, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14868199825286865, + "step": 315, + "valid_targets_mean": 2954.4, + "valid_targets_min": 571 + }, + { + "epoch": 0.46511627906976744, + "grad_norm": 0.7240436681213814, + "learning_rate": 2.6473029045643155e-05, + "loss": 0.3346, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19649548828601837, + "step": 320, + "valid_targets_mean": 3651.5, + "valid_targets_min": 649 + }, + { + "epoch": 0.47238372093023256, + "grad_norm": 0.6691805651498327, + "learning_rate": 2.6887966804979257e-05, + "loss": 0.3331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18201938271522522, + "step": 325, + "valid_targets_mean": 4133.5, + "valid_targets_min": 625 + }, + { + "epoch": 0.4796511627906977, + "grad_norm": 0.6386940655448596, + "learning_rate": 2.7302904564315355e-05, + "loss": 0.3464, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17419925332069397, + "step": 330, + "valid_targets_mean": 4157.8, + "valid_targets_min": 790 + }, + { + "epoch": 0.48691860465116277, + "grad_norm": 0.592600128740923, + "learning_rate": 2.7717842323651454e-05, + "loss": 0.3464, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23807524144649506, + "step": 335, + "valid_targets_mean": 6100.8, + "valid_targets_min": 4148 + }, + { + "epoch": 0.4941860465116279, + "grad_norm": 0.6395879958003222, + "learning_rate": 2.8132780082987556e-05, + "loss": 0.3426, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11755307763814926, + "step": 340, + "valid_targets_mean": 2526.9, + "valid_targets_min": 564 + }, + { + "epoch": 0.501453488372093, + "grad_norm": 0.7373583172871605, + "learning_rate": 2.8547717842323655e-05, + "loss": 0.3435, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18158669769763947, + "step": 345, + "valid_targets_mean": 3016.9, + "valid_targets_min": 663 + }, + { + "epoch": 0.5087209302325582, + "grad_norm": 0.6359067322988392, + "learning_rate": 2.8962655601659754e-05, + "loss": 0.3442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13962212204933167, + "step": 350, + "valid_targets_mean": 3866.6, + "valid_targets_min": 602 + }, + { + "epoch": 0.5159883720930233, + "grad_norm": 0.6292053753904882, + "learning_rate": 2.9377593360995856e-05, + "loss": 0.3168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13878588378429413, + "step": 355, + "valid_targets_mean": 2948.9, + "valid_targets_min": 632 + }, + { + "epoch": 0.5232558139534884, + "grad_norm": 0.6465028968729859, + "learning_rate": 2.9792531120331955e-05, + "loss": 0.3223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15605293214321136, + "step": 360, + "valid_targets_mean": 2838.6, + "valid_targets_min": 550 + }, + { + "epoch": 0.5305232558139535, + "grad_norm": 0.599120544848729, + "learning_rate": 3.020746887966805e-05, + "loss": 0.3301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19232279062271118, + "step": 365, + "valid_targets_mean": 5084.5, + "valid_targets_min": 3708 + }, + { + "epoch": 0.5377906976744186, + "grad_norm": 0.6008656846497219, + "learning_rate": 3.062240663900415e-05, + "loss": 0.3372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18847373127937317, + "step": 370, + "valid_targets_mean": 4938.4, + "valid_targets_min": 912 + }, + { + "epoch": 0.5450581395348837, + "grad_norm": 0.6285704558404371, + "learning_rate": 3.103734439834025e-05, + "loss": 0.3402, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.170862078666687, + "step": 375, + "valid_targets_mean": 3930.4, + "valid_targets_min": 980 + }, + { + "epoch": 0.5523255813953488, + "grad_norm": 0.631234804649969, + "learning_rate": 3.145228215767635e-05, + "loss": 0.3012, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12571334838867188, + "step": 380, + "valid_targets_mean": 3135.5, + "valid_targets_min": 663 + }, + { + "epoch": 0.559593023255814, + "grad_norm": 0.6821774303746879, + "learning_rate": 3.186721991701245e-05, + "loss": 0.3193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14924553036689758, + "step": 385, + "valid_targets_mean": 3077.9, + "valid_targets_min": 545 + }, + { + "epoch": 0.5668604651162791, + "grad_norm": 0.9548135178922332, + "learning_rate": 3.2282157676348554e-05, + "loss": 0.3267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13922418653964996, + "step": 390, + "valid_targets_mean": 2856.1, + "valid_targets_min": 603 + }, + { + "epoch": 0.5741279069767442, + "grad_norm": 0.6667391470008851, + "learning_rate": 3.2697095435684646e-05, + "loss": 0.3223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1518375724554062, + "step": 395, + "valid_targets_mean": 3825.0, + "valid_targets_min": 563 + }, + { + "epoch": 0.5813953488372093, + "grad_norm": 0.7530532693129081, + "learning_rate": 3.3112033195020745e-05, + "loss": 0.3517, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16733457148075104, + "step": 400, + "valid_targets_mean": 3029.6, + "valid_targets_min": 657 + }, + { + "epoch": 0.5886627906976745, + "grad_norm": 0.6663888895860358, + "learning_rate": 3.352697095435685e-05, + "loss": 0.3137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14393126964569092, + "step": 405, + "valid_targets_mean": 4312.1, + "valid_targets_min": 780 + }, + { + "epoch": 0.5959302325581395, + "grad_norm": 0.6660172981348241, + "learning_rate": 3.394190871369295e-05, + "loss": 0.326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1679423302412033, + "step": 410, + "valid_targets_mean": 3628.2, + "valid_targets_min": 525 + }, + { + "epoch": 0.6031976744186046, + "grad_norm": 0.5769438912613716, + "learning_rate": 3.435684647302905e-05, + "loss": 0.3415, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13888345658779144, + "step": 415, + "valid_targets_mean": 2948.0, + "valid_targets_min": 629 + }, + { + "epoch": 0.6104651162790697, + "grad_norm": 0.6552132927034705, + "learning_rate": 3.477178423236515e-05, + "loss": 0.3226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1582183837890625, + "step": 420, + "valid_targets_mean": 3943.6, + "valid_targets_min": 805 + }, + { + "epoch": 0.6177325581395349, + "grad_norm": 0.605802257570494, + "learning_rate": 3.5186721991701245e-05, + "loss": 0.3392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1685292273759842, + "step": 425, + "valid_targets_mean": 3261.0, + "valid_targets_min": 853 + }, + { + "epoch": 0.625, + "grad_norm": 0.6087478907133542, + "learning_rate": 3.5601659751037344e-05, + "loss": 0.3293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1881280243396759, + "step": 430, + "valid_targets_mean": 4309.2, + "valid_targets_min": 729 + }, + { + "epoch": 0.6322674418604651, + "grad_norm": 0.7473680581058674, + "learning_rate": 3.601659751037345e-05, + "loss": 0.3198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1361180990934372, + "step": 435, + "valid_targets_mean": 3453.4, + "valid_targets_min": 662 + }, + { + "epoch": 0.6395348837209303, + "grad_norm": 0.6504764288016779, + "learning_rate": 3.643153526970955e-05, + "loss": 0.3107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1473781168460846, + "step": 440, + "valid_targets_mean": 3510.0, + "valid_targets_min": 490 + }, + { + "epoch": 0.6468023255813954, + "grad_norm": 0.7669144042878424, + "learning_rate": 3.684647302904565e-05, + "loss": 0.3209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2131597101688385, + "step": 445, + "valid_targets_mean": 3796.1, + "valid_targets_min": 476 + }, + { + "epoch": 0.6540697674418605, + "grad_norm": 0.6403304907544071, + "learning_rate": 3.7261410788381746e-05, + "loss": 0.3323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14401483535766602, + "step": 450, + "valid_targets_mean": 3364.4, + "valid_targets_min": 655 + }, + { + "epoch": 0.6613372093023255, + "grad_norm": 0.645985011133752, + "learning_rate": 3.7676348547717845e-05, + "loss": 0.3105, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.136003777384758, + "step": 455, + "valid_targets_mean": 2819.6, + "valid_targets_min": 687 + }, + { + "epoch": 0.6686046511627907, + "grad_norm": 0.7585240879141193, + "learning_rate": 3.8091286307053944e-05, + "loss": 0.3301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1501508355140686, + "step": 460, + "valid_targets_mean": 3316.0, + "valid_targets_min": 693 + }, + { + "epoch": 0.6758720930232558, + "grad_norm": 0.7281883856779292, + "learning_rate": 3.850622406639004e-05, + "loss": 0.3154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18056277930736542, + "step": 465, + "valid_targets_mean": 3833.4, + "valid_targets_min": 740 + }, + { + "epoch": 0.6831395348837209, + "grad_norm": 0.6535897957871524, + "learning_rate": 3.892116182572614e-05, + "loss": 0.3254, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.156234472990036, + "step": 470, + "valid_targets_mean": 3719.0, + "valid_targets_min": 824 + }, + { + "epoch": 0.690406976744186, + "grad_norm": 0.6187781320054627, + "learning_rate": 3.933609958506224e-05, + "loss": 0.3074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14327925443649292, + "step": 475, + "valid_targets_mean": 3327.1, + "valid_targets_min": 667 + }, + { + "epoch": 0.6976744186046512, + "grad_norm": 0.6409941888826423, + "learning_rate": 3.9751037344398345e-05, + "loss": 0.3266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18486058712005615, + "step": 480, + "valid_targets_mean": 4941.5, + "valid_targets_min": 729 + }, + { + "epoch": 0.7049418604651163, + "grad_norm": 0.6253158306971405, + "learning_rate": 3.999997898246174e-05, + "loss": 0.3114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1806681752204895, + "step": 485, + "valid_targets_mean": 4048.6, + "valid_targets_min": 659 + }, + { + "epoch": 0.7122093023255814, + "grad_norm": 0.6250751502415329, + "learning_rate": 3.999974253566362e-05, + "loss": 0.3187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16524715721607208, + "step": 490, + "valid_targets_mean": 3338.9, + "valid_targets_min": 594 + }, + { + "epoch": 0.7194767441860465, + "grad_norm": 0.6124032350122675, + "learning_rate": 3.999924337326085e-05, + "loss": 0.3126, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13034440577030182, + "step": 495, + "valid_targets_mean": 2214.9, + "valid_targets_min": 578 + }, + { + "epoch": 0.7267441860465116, + "grad_norm": 0.9029943456959502, + "learning_rate": 3.9998481501810414e-05, + "loss": 0.3149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1411435753107071, + "step": 500, + "valid_targets_mean": 3521.5, + "valid_targets_min": 634 + }, + { + "epoch": 0.7340116279069767, + "grad_norm": 0.6114689481192495, + "learning_rate": 3.999745693132021e-05, + "loss": 0.2994, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11579418182373047, + "step": 505, + "valid_targets_mean": 3004.8, + "valid_targets_min": 560 + }, + { + "epoch": 0.7412790697674418, + "grad_norm": 0.561556329434712, + "learning_rate": 3.999616967524894e-05, + "loss": 0.318, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16953226923942566, + "step": 510, + "valid_targets_mean": 4547.9, + "valid_targets_min": 971 + }, + { + "epoch": 0.748546511627907, + "grad_norm": 0.6387062020360811, + "learning_rate": 3.999461975050595e-05, + "loss": 0.3382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2010563313961029, + "step": 515, + "valid_targets_mean": 4711.6, + "valid_targets_min": 800 + }, + { + "epoch": 0.7558139534883721, + "grad_norm": 0.6150223941673152, + "learning_rate": 3.9992807177450956e-05, + "loss": 0.3091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1380336582660675, + "step": 520, + "valid_targets_mean": 2641.8, + "valid_targets_min": 535 + }, + { + "epoch": 0.7630813953488372, + "grad_norm": 0.6623311527635831, + "learning_rate": 3.9990731979893834e-05, + "loss": 0.323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17421071231365204, + "step": 525, + "valid_targets_mean": 3310.9, + "valid_targets_min": 815 + }, + { + "epoch": 0.7703488372093024, + "grad_norm": 0.644574318586418, + "learning_rate": 3.998839418509428e-05, + "loss": 0.3152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1878730207681656, + "step": 530, + "valid_targets_mean": 4664.6, + "valid_targets_min": 697 + }, + { + "epoch": 0.7776162790697675, + "grad_norm": 0.5151536823530193, + "learning_rate": 3.998579382376143e-05, + "loss": 0.3181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17093926668167114, + "step": 535, + "valid_targets_mean": 4806.2, + "valid_targets_min": 3993 + }, + { + "epoch": 0.7848837209302325, + "grad_norm": 0.5848122250774543, + "learning_rate": 3.9982930930053515e-05, + "loss": 0.3197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19000868499279022, + "step": 540, + "valid_targets_mean": 4433.6, + "valid_targets_min": 772 + }, + { + "epoch": 0.7921511627906976, + "grad_norm": 0.6907826286413288, + "learning_rate": 3.9979805541577356e-05, + "loss": 0.3189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12706348299980164, + "step": 545, + "valid_targets_mean": 2434.2, + "valid_targets_min": 647 + }, + { + "epoch": 0.7994186046511628, + "grad_norm": 0.5759344182112125, + "learning_rate": 3.99764176993879e-05, + "loss": 0.3178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13058075308799744, + "step": 550, + "valid_targets_mean": 3536.2, + "valid_targets_min": 803 + }, + { + "epoch": 0.8066860465116279, + "grad_norm": 0.5679171468274491, + "learning_rate": 3.997276744798766e-05, + "loss": 0.3121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18033181130886078, + "step": 555, + "valid_targets_mean": 4196.0, + "valid_targets_min": 907 + }, + { + "epoch": 0.813953488372093, + "grad_norm": 0.5257948098452856, + "learning_rate": 3.996885483532617e-05, + "loss": 0.3126, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16494852304458618, + "step": 560, + "valid_targets_mean": 4555.4, + "valid_targets_min": 707 + }, + { + "epoch": 0.8212209302325582, + "grad_norm": 0.6873649278599748, + "learning_rate": 3.9964679912799306e-05, + "loss": 0.3127, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1650257706642151, + "step": 565, + "valid_targets_mean": 3825.8, + "valid_targets_min": 632 + }, + { + "epoch": 0.8284883720930233, + "grad_norm": 0.5750252747412078, + "learning_rate": 3.9960242735248626e-05, + "loss": 0.3341, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1479925811290741, + "step": 570, + "valid_targets_mean": 3665.1, + "valid_targets_min": 946 + }, + { + "epoch": 0.8357558139534884, + "grad_norm": 0.600940591918952, + "learning_rate": 3.995554336096069e-05, + "loss": 0.3386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12769609689712524, + "step": 575, + "valid_targets_mean": 4007.4, + "valid_targets_min": 433 + }, + { + "epoch": 0.8430232558139535, + "grad_norm": 0.5800527572863078, + "learning_rate": 3.995058185166623e-05, + "loss": 0.3014, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13579922914505005, + "step": 580, + "valid_targets_mean": 3511.2, + "valid_targets_min": 510 + }, + { + "epoch": 0.8502906976744186, + "grad_norm": 0.5904946794486696, + "learning_rate": 3.9945358272539373e-05, + "loss": 0.3036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13859254121780396, + "step": 585, + "valid_targets_mean": 3177.5, + "valid_targets_min": 514 + }, + { + "epoch": 0.8575581395348837, + "grad_norm": 0.6700979896386047, + "learning_rate": 3.9939872692196805e-05, + "loss": 0.3081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18660062551498413, + "step": 590, + "valid_targets_mean": 2899.0, + "valid_targets_min": 559 + }, + { + "epoch": 0.8648255813953488, + "grad_norm": 0.5746632492683453, + "learning_rate": 3.993412518269682e-05, + "loss": 0.3211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1774902641773224, + "step": 595, + "valid_targets_mean": 4287.2, + "valid_targets_min": 743 + }, + { + "epoch": 0.872093023255814, + "grad_norm": 0.5380377747396158, + "learning_rate": 3.9928115819538416e-05, + "loss": 0.3141, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1711953580379486, + "step": 600, + "valid_targets_mean": 4814.6, + "valid_targets_min": 4037 + }, + { + "epoch": 0.8793604651162791, + "grad_norm": 0.6207951561335642, + "learning_rate": 3.992184468166028e-05, + "loss": 0.3158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1701497882604599, + "step": 605, + "valid_targets_mean": 3301.4, + "valid_targets_min": 655 + }, + { + "epoch": 0.8866279069767442, + "grad_norm": 0.6066474074945402, + "learning_rate": 3.991531185143977e-05, + "loss": 0.2911, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14605024456977844, + "step": 610, + "valid_targets_mean": 3370.5, + "valid_targets_min": 611 + }, + { + "epoch": 0.8938953488372093, + "grad_norm": 0.528974517437865, + "learning_rate": 3.9908517414691806e-05, + "loss": 0.3, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14421282708644867, + "step": 615, + "valid_targets_mean": 3871.6, + "valid_targets_min": 1000 + }, + { + "epoch": 0.9011627906976745, + "grad_norm": 0.5593482014821316, + "learning_rate": 3.990146146066776e-05, + "loss": 0.3123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15519869327545166, + "step": 620, + "valid_targets_mean": 4097.6, + "valid_targets_min": 878 + }, + { + "epoch": 0.9084302325581395, + "grad_norm": 0.6465303654402883, + "learning_rate": 3.98941440820543e-05, + "loss": 0.3146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1523018181324005, + "step": 625, + "valid_targets_mean": 2658.4, + "valid_targets_min": 534 + }, + { + "epoch": 0.9156976744186046, + "grad_norm": 0.6464191733061095, + "learning_rate": 3.988656537497213e-05, + "loss": 0.3188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15685781836509705, + "step": 630, + "valid_targets_mean": 2272.4, + "valid_targets_min": 707 + }, + { + "epoch": 0.9229651162790697, + "grad_norm": 0.6363180627039392, + "learning_rate": 3.987872543897477e-05, + "loss": 0.3121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16334350407123566, + "step": 635, + "valid_targets_mean": 3598.2, + "valid_targets_min": 808 + }, + { + "epoch": 0.9302325581395349, + "grad_norm": 0.6442381315821747, + "learning_rate": 3.9870624377047206e-05, + "loss": 0.3225, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17397740483283997, + "step": 640, + "valid_targets_mean": 3169.8, + "valid_targets_min": 668 + }, + { + "epoch": 0.9375, + "grad_norm": 0.5925230417676065, + "learning_rate": 3.9862262295604594e-05, + "loss": 0.3029, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1986408531665802, + "step": 645, + "valid_targets_mean": 4408.5, + "valid_targets_min": 636 + }, + { + "epoch": 0.9447674418604651, + "grad_norm": 0.5912132183519685, + "learning_rate": 3.9853639304490815e-05, + "loss": 0.2946, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19908586144447327, + "step": 650, + "valid_targets_mean": 4032.0, + "valid_targets_min": 788 + }, + { + "epoch": 0.9520348837209303, + "grad_norm": 0.5233197409282695, + "learning_rate": 3.984475551697703e-05, + "loss": 0.3185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1765996813774109, + "step": 655, + "valid_targets_mean": 3893.4, + "valid_targets_min": 811 + }, + { + "epoch": 0.9593023255813954, + "grad_norm": 0.5996865078000508, + "learning_rate": 3.9835611049760216e-05, + "loss": 0.3232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19216126203536987, + "step": 660, + "valid_targets_mean": 3420.6, + "valid_targets_min": 471 + }, + { + "epoch": 0.9665697674418605, + "grad_norm": 0.5167680016756202, + "learning_rate": 3.982620602296166e-05, + "loss": 0.3122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13381856679916382, + "step": 665, + "valid_targets_mean": 3309.5, + "valid_targets_min": 748 + }, + { + "epoch": 0.9738372093023255, + "grad_norm": 0.5746446225121188, + "learning_rate": 3.981654056012529e-05, + "loss": 0.3094, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11602400243282318, + "step": 670, + "valid_targets_mean": 2765.1, + "valid_targets_min": 549 + }, + { + "epoch": 0.9811046511627907, + "grad_norm": 0.6434502337970205, + "learning_rate": 3.980661478821614e-05, + "loss": 0.3454, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1325831413269043, + "step": 675, + "valid_targets_mean": 2170.1, + "valid_targets_min": 500 + }, + { + "epoch": 0.9883720930232558, + "grad_norm": 0.6068667229565959, + "learning_rate": 3.979642883761866e-05, + "loss": 0.2994, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21635717153549194, + "step": 680, + "valid_targets_mean": 4260.9, + "valid_targets_min": 732 + }, + { + "epoch": 0.9956395348837209, + "grad_norm": 0.4944733997393646, + "learning_rate": 3.978598284213497e-05, + "loss": 0.2988, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15469247102737427, + "step": 685, + "valid_targets_mean": 4212.6, + "valid_targets_min": 646 + }, + { + "epoch": 1.002906976744186, + "grad_norm": 0.5954258533422989, + "learning_rate": 3.9775276938983144e-05, + "loss": 0.3086, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1707223355770111, + "step": 690, + "valid_targets_mean": 3524.9, + "valid_targets_min": 889 + }, + { + "epoch": 1.010174418604651, + "grad_norm": 0.5020818956819496, + "learning_rate": 3.9764311268795374e-05, + "loss": 0.2837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13558289408683777, + "step": 695, + "valid_targets_mean": 4481.9, + "valid_targets_min": 612 + }, + { + "epoch": 1.0174418604651163, + "grad_norm": 0.5115399057944376, + "learning_rate": 3.9753085975616157e-05, + "loss": 0.3015, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15828119218349457, + "step": 700, + "valid_targets_mean": 4807.2, + "valid_targets_min": 621 + }, + { + "epoch": 1.0247093023255813, + "grad_norm": 0.6295735522719244, + "learning_rate": 3.9741601206900376e-05, + "loss": 0.294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12198431044816971, + "step": 705, + "valid_targets_mean": 2282.0, + "valid_targets_min": 628 + }, + { + "epoch": 1.0319767441860466, + "grad_norm": 0.6530251313106338, + "learning_rate": 3.972985711351136e-05, + "loss": 0.3069, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1703912913799286, + "step": 710, + "valid_targets_mean": 2788.5, + "valid_targets_min": 508 + }, + { + "epoch": 1.0392441860465116, + "grad_norm": 0.7380539475948441, + "learning_rate": 3.971785384971894e-05, + "loss": 0.2876, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18880873918533325, + "step": 715, + "valid_targets_mean": 5222.1, + "valid_targets_min": 550 + }, + { + "epoch": 1.0465116279069768, + "grad_norm": 0.5226027075387328, + "learning_rate": 3.970559157319737e-05, + "loss": 0.2858, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17227350175380707, + "step": 720, + "valid_targets_mean": 4441.9, + "valid_targets_min": 662 + }, + { + "epoch": 1.0537790697674418, + "grad_norm": 0.5203168649565341, + "learning_rate": 3.9693070445023297e-05, + "loss": 0.3006, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14260490238666534, + "step": 725, + "valid_targets_mean": 4482.8, + "valid_targets_min": 546 + }, + { + "epoch": 1.0610465116279069, + "grad_norm": 0.6553194107917635, + "learning_rate": 3.968029062967363e-05, + "loss": 0.3136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15954704582691193, + "step": 730, + "valid_targets_mean": 2214.1, + "valid_targets_min": 615 + }, + { + "epoch": 1.068313953488372, + "grad_norm": 0.5924767029335875, + "learning_rate": 3.966725229502336e-05, + "loss": 0.2906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13051603734493256, + "step": 735, + "valid_targets_mean": 2960.1, + "valid_targets_min": 852 + }, + { + "epoch": 1.0755813953488371, + "grad_norm": 0.6145150473623321, + "learning_rate": 3.965395561234341e-05, + "loss": 0.2918, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14821113646030426, + "step": 740, + "valid_targets_mean": 3790.0, + "valid_targets_min": 594 + }, + { + "epoch": 1.0828488372093024, + "grad_norm": 0.6427556361525683, + "learning_rate": 3.9640400756298325e-05, + "loss": 0.2938, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11527499556541443, + "step": 745, + "valid_targets_mean": 2748.6, + "valid_targets_min": 592 + }, + { + "epoch": 1.0901162790697674, + "grad_norm": 0.5381745813762965, + "learning_rate": 3.9626587904943996e-05, + "loss": 0.2932, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15444537997245789, + "step": 750, + "valid_targets_mean": 3721.0, + "valid_targets_min": 617 + }, + { + "epoch": 1.0973837209302326, + "grad_norm": 0.6262758723974868, + "learning_rate": 3.961251723972535e-05, + "loss": 0.2966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1222953200340271, + "step": 755, + "valid_targets_mean": 2428.2, + "valid_targets_min": 484 + }, + { + "epoch": 1.1046511627906976, + "grad_norm": 0.6024971681939152, + "learning_rate": 3.959818894547393e-05, + "loss": 0.288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1502443253993988, + "step": 760, + "valid_targets_mean": 3520.6, + "valid_targets_min": 667 + }, + { + "epoch": 1.1119186046511629, + "grad_norm": 0.6024643305619749, + "learning_rate": 3.9583603210405484e-05, + "loss": 0.2866, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1113622784614563, + "step": 765, + "valid_targets_mean": 2564.2, + "valid_targets_min": 469 + }, + { + "epoch": 1.119186046511628, + "grad_norm": 0.6075985529163336, + "learning_rate": 3.956876022611748e-05, + "loss": 0.2917, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1365235447883606, + "step": 770, + "valid_targets_mean": 2986.5, + "valid_targets_min": 772 + }, + { + "epoch": 1.1264534883720931, + "grad_norm": 0.5847751575794944, + "learning_rate": 3.9553660187586625e-05, + "loss": 0.2948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13294722139835358, + "step": 775, + "valid_targets_mean": 3096.1, + "valid_targets_min": 686 + }, + { + "epoch": 1.1337209302325582, + "grad_norm": 0.5594747695034635, + "learning_rate": 3.9538303293166243e-05, + "loss": 0.2819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13940271735191345, + "step": 780, + "valid_targets_mean": 3422.9, + "valid_targets_min": 912 + }, + { + "epoch": 1.1409883720930232, + "grad_norm": 0.5444550515911036, + "learning_rate": 3.952268974458373e-05, + "loss": 0.3074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13107138872146606, + "step": 785, + "valid_targets_mean": 4094.4, + "valid_targets_min": 586 + }, + { + "epoch": 1.1482558139534884, + "grad_norm": 0.8455637167751516, + "learning_rate": 3.950681974693787e-05, + "loss": 0.2931, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1299535632133484, + "step": 790, + "valid_targets_mean": 3531.8, + "valid_targets_min": 476 + }, + { + "epoch": 1.1555232558139534, + "grad_norm": 0.5600026310525352, + "learning_rate": 3.949069350869614e-05, + "loss": 0.3147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1660255640745163, + "step": 795, + "valid_targets_mean": 3931.5, + "valid_targets_min": 513 + }, + { + "epoch": 1.1627906976744187, + "grad_norm": 0.5787989442009414, + "learning_rate": 3.9474311241691994e-05, + "loss": 0.3049, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11841486394405365, + "step": 800, + "valid_targets_mean": 2689.1, + "valid_targets_min": 377 + }, + { + "epoch": 1.1700581395348837, + "grad_norm": 0.5607944017063098, + "learning_rate": 3.945767316112206e-05, + "loss": 0.3073, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1844930201768875, + "step": 805, + "valid_targets_mean": 4312.1, + "valid_targets_min": 596 + }, + { + "epoch": 1.177325581395349, + "grad_norm": 0.5505080993653617, + "learning_rate": 3.944077948554333e-05, + "loss": 0.302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14519767463207245, + "step": 810, + "valid_targets_mean": 3942.4, + "valid_targets_min": 666 + }, + { + "epoch": 1.184593023255814, + "grad_norm": 0.5661161246942287, + "learning_rate": 3.9423630436870255e-05, + "loss": 0.2843, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11194830387830734, + "step": 815, + "valid_targets_mean": 2164.8, + "valid_targets_min": 467 + }, + { + "epoch": 1.191860465116279, + "grad_norm": 0.5120139801918556, + "learning_rate": 3.940622624037188e-05, + "loss": 0.3015, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12605825066566467, + "step": 820, + "valid_targets_mean": 3943.4, + "valid_targets_min": 716 + }, + { + "epoch": 1.1991279069767442, + "grad_norm": 0.525392557133716, + "learning_rate": 3.938856712466885e-05, + "loss": 0.2788, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1255475878715515, + "step": 825, + "valid_targets_mean": 3209.6, + "valid_targets_min": 484 + }, + { + "epoch": 1.2063953488372092, + "grad_norm": 0.657741147392811, + "learning_rate": 3.93706533217304e-05, + "loss": 0.29, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15574149787425995, + "step": 830, + "valid_targets_mean": 2886.0, + "valid_targets_min": 750 + }, + { + "epoch": 1.2136627906976745, + "grad_norm": 0.5516109167421356, + "learning_rate": 3.935248506687136e-05, + "loss": 0.302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18783080577850342, + "step": 835, + "valid_targets_mean": 4719.0, + "valid_targets_min": 731 + }, + { + "epoch": 1.2209302325581395, + "grad_norm": 0.49925446777716886, + "learning_rate": 3.9334062598748986e-05, + "loss": 0.2899, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11280782520771027, + "step": 840, + "valid_targets_mean": 3761.0, + "valid_targets_min": 624 + }, + { + "epoch": 1.2281976744186047, + "grad_norm": 0.5011178404184398, + "learning_rate": 3.931538615935989e-05, + "loss": 0.2746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12659627199172974, + "step": 845, + "valid_targets_mean": 3629.0, + "valid_targets_min": 695 + }, + { + "epoch": 1.2354651162790697, + "grad_norm": 0.4930281326129707, + "learning_rate": 3.929645599403684e-05, + "loss": 0.2905, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11583171039819717, + "step": 850, + "valid_targets_mean": 3803.5, + "valid_targets_min": 518 + }, + { + "epoch": 1.2427325581395348, + "grad_norm": 0.5801439996466439, + "learning_rate": 3.9277272351445524e-05, + "loss": 0.2858, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13249947130680084, + "step": 855, + "valid_targets_mean": 2848.5, + "valid_targets_min": 795 + }, + { + "epoch": 1.25, + "grad_norm": 0.5622353243784237, + "learning_rate": 3.9257835483581306e-05, + "loss": 0.2965, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13136523962020874, + "step": 860, + "valid_targets_mean": 3588.1, + "valid_targets_min": 868 + }, + { + "epoch": 1.2572674418604652, + "grad_norm": 0.6131025997715869, + "learning_rate": 3.923814564576589e-05, + "loss": 0.287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15739820897579193, + "step": 865, + "valid_targets_mean": 3525.5, + "valid_targets_min": 700 + }, + { + "epoch": 1.2645348837209303, + "grad_norm": 0.5310602027404243, + "learning_rate": 3.921820309664398e-05, + "loss": 0.2969, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1796557605266571, + "step": 870, + "valid_targets_mean": 4290.6, + "valid_targets_min": 1106 + }, + { + "epoch": 1.2718023255813953, + "grad_norm": 0.5665637728934327, + "learning_rate": 3.91980080981799e-05, + "loss": 0.2987, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15872395038604736, + "step": 875, + "valid_targets_mean": 3355.1, + "valid_targets_min": 479 + }, + { + "epoch": 1.2790697674418605, + "grad_norm": 0.5123883343895528, + "learning_rate": 3.917756091565414e-05, + "loss": 0.2951, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12424245476722717, + "step": 880, + "valid_targets_mean": 3292.4, + "valid_targets_min": 591 + }, + { + "epoch": 1.2863372093023255, + "grad_norm": 0.5418967521351696, + "learning_rate": 3.915686181765983e-05, + "loss": 0.2985, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12708592414855957, + "step": 885, + "valid_targets_mean": 2999.8, + "valid_targets_min": 655 + }, + { + "epoch": 1.2936046511627908, + "grad_norm": 0.5430718445751821, + "learning_rate": 3.9135911076099286e-05, + "loss": 0.2912, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1379009634256363, + "step": 890, + "valid_targets_mean": 3595.6, + "valid_targets_min": 556 + }, + { + "epoch": 1.3008720930232558, + "grad_norm": 0.526588721715433, + "learning_rate": 3.9114708966180385e-05, + "loss": 0.2869, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15102040767669678, + "step": 895, + "valid_targets_mean": 4072.5, + "valid_targets_min": 891 + }, + { + "epoch": 1.308139534883721, + "grad_norm": 0.5047952119967307, + "learning_rate": 3.909325576641298e-05, + "loss": 0.293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1750989854335785, + "step": 900, + "valid_targets_mean": 5263.9, + "valid_targets_min": 3460 + }, + { + "epoch": 1.315406976744186, + "grad_norm": 0.496714389262161, + "learning_rate": 3.907155175860519e-05, + "loss": 0.2764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12206344306468964, + "step": 905, + "valid_targets_mean": 3841.0, + "valid_targets_min": 840 + }, + { + "epoch": 1.322674418604651, + "grad_norm": 0.548533268898909, + "learning_rate": 3.904959722785978e-05, + "loss": 0.2991, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1320100873708725, + "step": 910, + "valid_targets_mean": 3564.2, + "valid_targets_min": 428 + }, + { + "epoch": 1.3299418604651163, + "grad_norm": 0.5709238019153562, + "learning_rate": 3.902739246257035e-05, + "loss": 0.3012, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16724197566509247, + "step": 915, + "valid_targets_mean": 3494.5, + "valid_targets_min": 771 + }, + { + "epoch": 1.3372093023255813, + "grad_norm": 0.6026091154187614, + "learning_rate": 3.9004937754417576e-05, + "loss": 0.2879, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1599448174238205, + "step": 920, + "valid_targets_mean": 3057.8, + "valid_targets_min": 590 + }, + { + "epoch": 1.3444767441860466, + "grad_norm": 0.5583822762143399, + "learning_rate": 3.898223339836535e-05, + "loss": 0.2905, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13246658444404602, + "step": 925, + "valid_targets_mean": 2905.2, + "valid_targets_min": 332 + }, + { + "epoch": 1.3517441860465116, + "grad_norm": 0.4972947567573365, + "learning_rate": 3.8959279692656965e-05, + "loss": 0.2739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11695317924022675, + "step": 930, + "valid_targets_mean": 3299.1, + "valid_targets_min": 763 + }, + { + "epoch": 1.3590116279069768, + "grad_norm": 0.6349529472272955, + "learning_rate": 3.893607693881113e-05, + "loss": 0.2888, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13736936450004578, + "step": 935, + "valid_targets_mean": 2173.0, + "valid_targets_min": 570 + }, + { + "epoch": 1.3662790697674418, + "grad_norm": 0.5235242978606963, + "learning_rate": 3.8912625441618034e-05, + "loss": 0.3246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22577857971191406, + "step": 940, + "valid_targets_mean": 5353.6, + "valid_targets_min": 779 + }, + { + "epoch": 1.3735465116279069, + "grad_norm": 0.507118960736435, + "learning_rate": 3.8888925509135365e-05, + "loss": 0.2873, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15891097486019135, + "step": 945, + "valid_targets_mean": 4224.1, + "valid_targets_min": 565 + }, + { + "epoch": 1.380813953488372, + "grad_norm": 0.5727509164803489, + "learning_rate": 3.8864977452684235e-05, + "loss": 0.2943, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11320075392723083, + "step": 950, + "valid_targets_mean": 3410.9, + "valid_targets_min": 671 + }, + { + "epoch": 1.3880813953488373, + "grad_norm": 0.6089894761450095, + "learning_rate": 3.88407815868451e-05, + "loss": 0.2838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12657169997692108, + "step": 955, + "valid_targets_mean": 2158.1, + "valid_targets_min": 587 + }, + { + "epoch": 1.3953488372093024, + "grad_norm": 0.5874352781433988, + "learning_rate": 3.8816338229453616e-05, + "loss": 0.2948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15347419679164886, + "step": 960, + "valid_targets_mean": 3367.5, + "valid_targets_min": 666 + }, + { + "epoch": 1.4026162790697674, + "grad_norm": 0.6166326785595897, + "learning_rate": 3.879164770159651e-05, + "loss": 0.2839, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14318858087062836, + "step": 965, + "valid_targets_mean": 3344.9, + "valid_targets_min": 608 + }, + { + "epoch": 1.4098837209302326, + "grad_norm": 0.557062845918987, + "learning_rate": 3.8766710327607275e-05, + "loss": 0.291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1791156828403473, + "step": 970, + "valid_targets_mean": 4556.5, + "valid_targets_min": 563 + }, + { + "epoch": 1.4171511627906976, + "grad_norm": 0.5911823376876226, + "learning_rate": 3.874152643506202e-05, + "loss": 0.2785, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14642855525016785, + "step": 975, + "valid_targets_mean": 2476.5, + "valid_targets_min": 562 + }, + { + "epoch": 1.4244186046511627, + "grad_norm": 0.6477050154264477, + "learning_rate": 3.871609635477507e-05, + "loss": 0.3037, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17030063271522522, + "step": 980, + "valid_targets_mean": 3118.4, + "valid_targets_min": 690 + }, + { + "epoch": 1.431686046511628, + "grad_norm": 0.5264548444989446, + "learning_rate": 3.869042042079467e-05, + "loss": 0.2765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12108216434717178, + "step": 985, + "valid_targets_mean": 3474.5, + "valid_targets_min": 569 + }, + { + "epoch": 1.4389534883720931, + "grad_norm": 0.5521838915160564, + "learning_rate": 3.866449897039859e-05, + "loss": 0.3149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1818712055683136, + "step": 990, + "valid_targets_mean": 4667.5, + "valid_targets_min": 496 + }, + { + "epoch": 1.4462209302325582, + "grad_norm": 0.5572670157606036, + "learning_rate": 3.863833234408969e-05, + "loss": 0.2817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0819392278790474, + "step": 995, + "valid_targets_mean": 1904.2, + "valid_targets_min": 507 + }, + { + "epoch": 1.4534883720930232, + "grad_norm": 0.5393974949240506, + "learning_rate": 3.861192088559144e-05, + "loss": 0.2881, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14820337295532227, + "step": 1000, + "valid_targets_mean": 3899.6, + "valid_targets_min": 696 + }, + { + "epoch": 1.4607558139534884, + "grad_norm": 0.5683915302410261, + "learning_rate": 3.858526494184344e-05, + "loss": 0.3142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12288520485162735, + "step": 1005, + "valid_targets_mean": 2486.5, + "valid_targets_min": 483 + }, + { + "epoch": 1.4680232558139534, + "grad_norm": 0.5783311941630546, + "learning_rate": 3.8558364862996796e-05, + "loss": 0.2925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13479480147361755, + "step": 1010, + "valid_targets_mean": 2884.5, + "valid_targets_min": 723 + }, + { + "epoch": 1.4752906976744187, + "grad_norm": 0.6207113680745554, + "learning_rate": 3.853122100240959e-05, + "loss": 0.3007, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16201730072498322, + "step": 1015, + "valid_targets_mean": 3314.5, + "valid_targets_min": 627 + }, + { + "epoch": 1.4825581395348837, + "grad_norm": 0.5494676329745364, + "learning_rate": 3.850383371664219e-05, + "loss": 0.2846, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14080458879470825, + "step": 1020, + "valid_targets_mean": 3708.6, + "valid_targets_min": 679 + }, + { + "epoch": 1.489825581395349, + "grad_norm": 0.5543402259634419, + "learning_rate": 3.8476203365452607e-05, + "loss": 0.2943, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14013566076755524, + "step": 1025, + "valid_targets_mean": 2788.2, + "valid_targets_min": 662 + }, + { + "epoch": 1.497093023255814, + "grad_norm": 0.5148659577451574, + "learning_rate": 3.8448330311791735e-05, + "loss": 0.2835, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16535460948944092, + "step": 1030, + "valid_targets_mean": 5268.8, + "valid_targets_min": 3758 + }, + { + "epoch": 1.504360465116279, + "grad_norm": 0.5245117965273571, + "learning_rate": 3.842021492179858e-05, + "loss": 0.2776, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10683688521385193, + "step": 1035, + "valid_targets_mean": 2680.6, + "valid_targets_min": 736 + }, + { + "epoch": 1.5116279069767442, + "grad_norm": 0.4994303593565894, + "learning_rate": 3.8391857564795494e-05, + "loss": 0.2699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13968025147914886, + "step": 1040, + "valid_targets_mean": 4212.2, + "valid_targets_min": 534 + }, + { + "epoch": 1.5188953488372094, + "grad_norm": 0.598042521831162, + "learning_rate": 3.836325861328328e-05, + "loss": 0.2993, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1949174851179123, + "step": 1045, + "valid_targets_mean": 3461.8, + "valid_targets_min": 623 + }, + { + "epoch": 1.5261627906976745, + "grad_norm": 0.5229294300773233, + "learning_rate": 3.83344184429363e-05, + "loss": 0.288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14252832531929016, + "step": 1050, + "valid_targets_mean": 3490.0, + "valid_targets_min": 159 + }, + { + "epoch": 1.5334302325581395, + "grad_norm": 0.4725224756008864, + "learning_rate": 3.83053374325976e-05, + "loss": 0.2854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15991975367069244, + "step": 1055, + "valid_targets_mean": 5041.9, + "valid_targets_min": 1040 + }, + { + "epoch": 1.5406976744186047, + "grad_norm": 0.4953697177639167, + "learning_rate": 3.827601596427383e-05, + "loss": 0.2798, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15688475966453552, + "step": 1060, + "valid_targets_mean": 4240.1, + "valid_targets_min": 881 + }, + { + "epoch": 1.5479651162790697, + "grad_norm": 0.5420901673992, + "learning_rate": 3.824645442313033e-05, + "loss": 0.2896, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1448085904121399, + "step": 1065, + "valid_targets_mean": 3815.6, + "valid_targets_min": 699 + }, + { + "epoch": 1.5552325581395348, + "grad_norm": 0.5260497967025354, + "learning_rate": 3.8216653197486004e-05, + "loss": 0.2821, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13990908861160278, + "step": 1070, + "valid_targets_mean": 4333.5, + "valid_targets_min": 620 + }, + { + "epoch": 1.5625, + "grad_norm": 0.49690016233693, + "learning_rate": 3.818661267880823e-05, + "loss": 0.3081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1375221610069275, + "step": 1075, + "valid_targets_mean": 4047.8, + "valid_targets_min": 847 + }, + { + "epoch": 1.5697674418604652, + "grad_norm": 0.5168520175806998, + "learning_rate": 3.815633326170776e-05, + "loss": 0.2622, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10871457308530807, + "step": 1080, + "valid_targets_mean": 3121.6, + "valid_targets_min": 679 + }, + { + "epoch": 1.5770348837209303, + "grad_norm": 0.5329589909031591, + "learning_rate": 3.812581534393347e-05, + "loss": 0.2884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1447029411792755, + "step": 1085, + "valid_targets_mean": 3910.0, + "valid_targets_min": 716 + }, + { + "epoch": 1.5843023255813953, + "grad_norm": 0.5281908929906156, + "learning_rate": 3.809505932636717e-05, + "loss": 0.2781, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16305357217788696, + "step": 1090, + "valid_targets_mean": 4355.1, + "valid_targets_min": 660 + }, + { + "epoch": 1.5915697674418605, + "grad_norm": 0.6180623277335446, + "learning_rate": 3.8064065613018376e-05, + "loss": 0.2836, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.151731476187706, + "step": 1095, + "valid_targets_mean": 3038.8, + "valid_targets_min": 527 + }, + { + "epoch": 1.5988372093023255, + "grad_norm": 0.5003352743399826, + "learning_rate": 3.803283461101892e-05, + "loss": 0.2766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14992684125900269, + "step": 1100, + "valid_targets_mean": 4808.9, + "valid_targets_min": 727 + }, + { + "epoch": 1.6061046511627906, + "grad_norm": 0.5582492530325496, + "learning_rate": 3.800136673061765e-05, + "loss": 0.2986, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15017104148864746, + "step": 1105, + "valid_targets_mean": 3318.8, + "valid_targets_min": 599 + }, + { + "epoch": 1.6133720930232558, + "grad_norm": 0.48010674994635283, + "learning_rate": 3.796966238517508e-05, + "loss": 0.293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09976556897163391, + "step": 1110, + "valid_targets_mean": 3718.4, + "valid_targets_min": 456 + }, + { + "epoch": 1.620639534883721, + "grad_norm": 0.47774041262137673, + "learning_rate": 3.793772199115786e-05, + "loss": 0.2996, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15033277869224548, + "step": 1115, + "valid_targets_mean": 4473.0, + "valid_targets_min": 635 + }, + { + "epoch": 1.627906976744186, + "grad_norm": 0.508476646299974, + "learning_rate": 3.790554596813339e-05, + "loss": 0.2827, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13769778609275818, + "step": 1120, + "valid_targets_mean": 3927.5, + "valid_targets_min": 615 + }, + { + "epoch": 1.635174418604651, + "grad_norm": 0.5352158793527734, + "learning_rate": 3.787313473876429e-05, + "loss": 0.2793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13205015659332275, + "step": 1125, + "valid_targets_mean": 3941.2, + "valid_targets_min": 741 + }, + { + "epoch": 1.6424418604651163, + "grad_norm": 0.551833691323417, + "learning_rate": 3.784048872880282e-05, + "loss": 0.2726, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14076612889766693, + "step": 1130, + "valid_targets_mean": 3053.6, + "valid_targets_min": 545 + }, + { + "epoch": 1.6497093023255816, + "grad_norm": 0.48623918737753347, + "learning_rate": 3.780760836708531e-05, + "loss": 0.2761, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1337115466594696, + "step": 1135, + "valid_targets_mean": 4660.6, + "valid_targets_min": 3615 + }, + { + "epoch": 1.6569767441860463, + "grad_norm": 0.5265160121639417, + "learning_rate": 3.777449408552653e-05, + "loss": 0.2861, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15630273520946503, + "step": 1140, + "valid_targets_mean": 3580.8, + "valid_targets_min": 709 + }, + { + "epoch": 1.6642441860465116, + "grad_norm": 0.6484297812170937, + "learning_rate": 3.774114631911401e-05, + "loss": 0.2841, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14704245328903198, + "step": 1145, + "valid_targets_mean": 3001.8, + "valid_targets_min": 697 + }, + { + "epoch": 1.6715116279069768, + "grad_norm": 0.477713717581914, + "learning_rate": 3.770756550590231e-05, + "loss": 0.2746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12983140349388123, + "step": 1150, + "valid_targets_mean": 4782.8, + "valid_targets_min": 2777 + }, + { + "epoch": 1.6787790697674418, + "grad_norm": 0.5302430878469313, + "learning_rate": 3.767375208700729e-05, + "loss": 0.2878, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15250588953495026, + "step": 1155, + "valid_targets_mean": 3942.6, + "valid_targets_min": 578 + }, + { + "epoch": 1.6860465116279069, + "grad_norm": 1.0261147639745576, + "learning_rate": 3.763970650660032e-05, + "loss": 0.2849, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15077000856399536, + "step": 1160, + "valid_targets_mean": 4066.6, + "valid_targets_min": 766 + }, + { + "epoch": 1.693313953488372, + "grad_norm": 0.5350975912828917, + "learning_rate": 3.76054292119024e-05, + "loss": 0.2782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12772193551063538, + "step": 1165, + "valid_targets_mean": 3557.6, + "valid_targets_min": 850 + }, + { + "epoch": 1.7005813953488373, + "grad_norm": 0.5502847590101302, + "learning_rate": 3.7570920653178355e-05, + "loss": 0.3035, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1232425644993782, + "step": 1170, + "valid_targets_mean": 2857.4, + "valid_targets_min": 626 + }, + { + "epoch": 1.7078488372093024, + "grad_norm": 0.5051049922272837, + "learning_rate": 3.753618128373086e-05, + "loss": 0.2802, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1397297978401184, + "step": 1175, + "valid_targets_mean": 4253.1, + "valid_targets_min": 496 + }, + { + "epoch": 1.7151162790697674, + "grad_norm": 0.5075044635586806, + "learning_rate": 3.75012115598945e-05, + "loss": 0.2834, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1724143773317337, + "step": 1180, + "valid_targets_mean": 4327.4, + "valid_targets_min": 532 + }, + { + "epoch": 1.7223837209302326, + "grad_norm": 0.5879909794286214, + "learning_rate": 3.7466011941029806e-05, + "loss": 0.2967, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17726115882396698, + "step": 1185, + "valid_targets_mean": 2947.8, + "valid_targets_min": 612 + }, + { + "epoch": 1.7296511627906976, + "grad_norm": 0.4971577954565775, + "learning_rate": 3.743058288951719e-05, + "loss": 0.2827, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12558473646640778, + "step": 1190, + "valid_targets_mean": 3530.9, + "valid_targets_min": 782 + }, + { + "epoch": 1.7369186046511627, + "grad_norm": 0.5910499715345675, + "learning_rate": 3.739492487075087e-05, + "loss": 0.2999, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18601062893867493, + "step": 1195, + "valid_targets_mean": 4114.2, + "valid_targets_min": 305 + }, + { + "epoch": 1.744186046511628, + "grad_norm": 0.6130589859372811, + "learning_rate": 3.735903835313278e-05, + "loss": 0.2705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10830438137054443, + "step": 1200, + "valid_targets_mean": 4013.6, + "valid_targets_min": 727 + }, + { + "epoch": 1.7514534883720931, + "grad_norm": 0.498529960515945, + "learning_rate": 3.7322923808066394e-05, + "loss": 0.3053, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1307220160961151, + "step": 1205, + "valid_targets_mean": 3688.9, + "valid_targets_min": 624 + }, + { + "epoch": 1.7587209302325582, + "grad_norm": 0.5482650416789371, + "learning_rate": 3.728658170995055e-05, + "loss": 0.2919, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16106528043746948, + "step": 1210, + "valid_targets_mean": 3593.9, + "valid_targets_min": 768 + }, + { + "epoch": 1.7659883720930232, + "grad_norm": 0.5477151270821805, + "learning_rate": 3.72500125361732e-05, + "loss": 0.3008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13051432371139526, + "step": 1215, + "valid_targets_mean": 2720.8, + "valid_targets_min": 729 + }, + { + "epoch": 1.7732558139534884, + "grad_norm": 0.4407679268853417, + "learning_rate": 3.7213216767105165e-05, + "loss": 0.2845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13464666903018951, + "step": 1220, + "valid_targets_mean": 4206.9, + "valid_targets_min": 626 + }, + { + "epoch": 1.7805232558139537, + "grad_norm": 0.5940188606823525, + "learning_rate": 3.71761948860938e-05, + "loss": 0.2835, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14552396535873413, + "step": 1225, + "valid_targets_mean": 2963.9, + "valid_targets_min": 440 + }, + { + "epoch": 1.7877906976744184, + "grad_norm": 0.5095933376877391, + "learning_rate": 3.713894737945666e-05, + "loss": 0.2722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13193480670452118, + "step": 1230, + "valid_targets_mean": 3336.2, + "valid_targets_min": 651 + }, + { + "epoch": 1.7950581395348837, + "grad_norm": 0.5624492527204488, + "learning_rate": 3.71014747364751e-05, + "loss": 0.2866, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17303186655044556, + "step": 1235, + "valid_targets_mean": 3972.0, + "valid_targets_min": 497 + }, + { + "epoch": 1.802325581395349, + "grad_norm": 0.48051926305800385, + "learning_rate": 3.7063777449387875e-05, + "loss": 0.2745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10240213572978973, + "step": 1240, + "valid_targets_mean": 2702.4, + "valid_targets_min": 483 + }, + { + "epoch": 1.809593023255814, + "grad_norm": 0.5544074964305465, + "learning_rate": 3.702585601338461e-05, + "loss": 0.278, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14542116224765778, + "step": 1245, + "valid_targets_mean": 3104.0, + "valid_targets_min": 748 + }, + { + "epoch": 1.816860465116279, + "grad_norm": 0.4922917541072786, + "learning_rate": 3.698771092659939e-05, + "loss": 0.2868, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14152829349040985, + "step": 1250, + "valid_targets_mean": 3484.1, + "valid_targets_min": 760 + }, + { + "epoch": 1.8241279069767442, + "grad_norm": 0.5277454142458948, + "learning_rate": 3.694934269010414e-05, + "loss": 0.2821, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14739757776260376, + "step": 1255, + "valid_targets_mean": 4395.9, + "valid_targets_min": 774 + }, + { + "epoch": 1.8313953488372094, + "grad_norm": 0.5099127031213423, + "learning_rate": 3.691075180790207e-05, + "loss": 0.2845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12729209661483765, + "step": 1260, + "valid_targets_mean": 3339.8, + "valid_targets_min": 692 + }, + { + "epoch": 1.8386627906976745, + "grad_norm": 0.9200745360414155, + "learning_rate": 3.6871938786921044e-05, + "loss": 0.2958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14698383212089539, + "step": 1265, + "valid_targets_mean": 3025.5, + "valid_targets_min": 476 + }, + { + "epoch": 1.8459302325581395, + "grad_norm": 0.6084589743816421, + "learning_rate": 3.683290413700695e-05, + "loss": 0.2981, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15879669785499573, + "step": 1270, + "valid_targets_mean": 2718.5, + "valid_targets_min": 627 + }, + { + "epoch": 1.8531976744186047, + "grad_norm": 0.4727978345525131, + "learning_rate": 3.679364837091696e-05, + "loss": 0.2781, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.180962935090065, + "step": 1275, + "valid_targets_mean": 5369.4, + "valid_targets_min": 3737 + }, + { + "epoch": 1.8604651162790697, + "grad_norm": 0.4700331160562772, + "learning_rate": 3.675417200431284e-05, + "loss": 0.2835, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13727298378944397, + "step": 1280, + "valid_targets_mean": 4292.6, + "valid_targets_min": 820 + }, + { + "epoch": 1.8677325581395348, + "grad_norm": 0.4849236576687317, + "learning_rate": 3.671447555575413e-05, + "loss": 0.2816, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.127630814909935, + "step": 1285, + "valid_targets_mean": 3766.6, + "valid_targets_min": 620 + }, + { + "epoch": 1.875, + "grad_norm": 0.5276934366840089, + "learning_rate": 3.667455954669138e-05, + "loss": 0.2863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15197885036468506, + "step": 1290, + "valid_targets_mean": 3873.1, + "valid_targets_min": 803 + }, + { + "epoch": 1.8822674418604652, + "grad_norm": 0.7132774309429776, + "learning_rate": 3.663442450145926e-05, + "loss": 0.2895, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1851046234369278, + "step": 1295, + "valid_targets_mean": 3551.5, + "valid_targets_min": 795 + }, + { + "epoch": 1.8895348837209303, + "grad_norm": 0.47800429867582084, + "learning_rate": 3.6594070947269675e-05, + "loss": 0.2821, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1899186372756958, + "step": 1300, + "valid_targets_mean": 5676.8, + "valid_targets_min": 4211 + }, + { + "epoch": 1.8968023255813953, + "grad_norm": 0.5684669309927136, + "learning_rate": 3.65534994142049e-05, + "loss": 0.2722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1318185180425644, + "step": 1305, + "valid_targets_mean": 2172.1, + "valid_targets_min": 543 + }, + { + "epoch": 1.9040697674418605, + "grad_norm": 0.6094430087247472, + "learning_rate": 3.6512710435210496e-05, + "loss": 0.2936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12776127457618713, + "step": 1310, + "valid_targets_mean": 2547.8, + "valid_targets_min": 509 + }, + { + "epoch": 1.9113372093023255, + "grad_norm": 0.7826305476307706, + "learning_rate": 3.647170454608846e-05, + "loss": 0.3016, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12746961414813995, + "step": 1315, + "valid_targets_mean": 3581.9, + "valid_targets_min": 707 + }, + { + "epoch": 1.9186046511627906, + "grad_norm": 0.5711124612833212, + "learning_rate": 3.643048228549006e-05, + "loss": 0.2843, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16838140785694122, + "step": 1320, + "valid_targets_mean": 3616.0, + "valid_targets_min": 696 + }, + { + "epoch": 1.9258720930232558, + "grad_norm": 1.088493409088202, + "learning_rate": 3.638904419490882e-05, + "loss": 0.3061, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15989214181900024, + "step": 1325, + "valid_targets_mean": 4129.2, + "valid_targets_min": 681 + }, + { + "epoch": 1.933139534883721, + "grad_norm": 0.5637639370882057, + "learning_rate": 3.6347390818673406e-05, + "loss": 0.2964, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13954320549964905, + "step": 1330, + "valid_targets_mean": 3279.0, + "valid_targets_min": 756 + }, + { + "epoch": 1.940406976744186, + "grad_norm": 0.5270815044047604, + "learning_rate": 3.6305522703940446e-05, + "loss": 0.2857, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14545057713985443, + "step": 1335, + "valid_targets_mean": 4070.2, + "valid_targets_min": 652 + }, + { + "epoch": 1.947674418604651, + "grad_norm": 0.5231244830899889, + "learning_rate": 3.626344040068738e-05, + "loss": 0.2818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12065362930297852, + "step": 1340, + "valid_targets_mean": 2788.0, + "valid_targets_min": 621 + }, + { + "epoch": 1.9549418604651163, + "grad_norm": 0.48402756241669825, + "learning_rate": 3.622114446170522e-05, + "loss": 0.2838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11438912153244019, + "step": 1345, + "valid_targets_mean": 3629.8, + "valid_targets_min": 636 + }, + { + "epoch": 1.9622093023255816, + "grad_norm": 0.4797720882059406, + "learning_rate": 3.617863544259127e-05, + "loss": 0.2717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13530614972114563, + "step": 1350, + "valid_targets_mean": 4314.6, + "valid_targets_min": 907 + }, + { + "epoch": 1.9694767441860463, + "grad_norm": 0.4840730467813608, + "learning_rate": 3.613591390174185e-05, + "loss": 0.2658, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13704459369182587, + "step": 1355, + "valid_targets_mean": 3770.6, + "valid_targets_min": 856 + }, + { + "epoch": 1.9767441860465116, + "grad_norm": 0.5333319045054723, + "learning_rate": 3.609298040034496e-05, + "loss": 0.2737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10413102060556412, + "step": 1360, + "valid_targets_mean": 2400.1, + "valid_targets_min": 467 + }, + { + "epoch": 1.9840116279069768, + "grad_norm": 0.5848922636884715, + "learning_rate": 3.6049835502372894e-05, + "loss": 0.2996, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1518581211566925, + "step": 1365, + "valid_targets_mean": 3005.8, + "valid_targets_min": 129 + }, + { + "epoch": 1.9912790697674418, + "grad_norm": 0.47721552430914416, + "learning_rate": 3.6006479774574846e-05, + "loss": 0.2752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13737758994102478, + "step": 1370, + "valid_targets_mean": 4180.2, + "valid_targets_min": 994 + }, + { + "epoch": 1.9985465116279069, + "grad_norm": 0.5364106208178079, + "learning_rate": 3.596291378646947e-05, + "loss": 0.276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09656524658203125, + "step": 1375, + "valid_targets_mean": 1684.8, + "valid_targets_min": 516 + }, + { + "epoch": 2.005813953488372, + "grad_norm": 0.5376377641998679, + "learning_rate": 3.591913811033736e-05, + "loss": 0.2784, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08827735483646393, + "step": 1380, + "valid_targets_mean": 2703.1, + "valid_targets_min": 374 + }, + { + "epoch": 2.0130813953488373, + "grad_norm": 0.5848775272946124, + "learning_rate": 3.58751533212136e-05, + "loss": 0.2654, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18071289360523224, + "step": 1385, + "valid_targets_mean": 3698.2, + "valid_targets_min": 646 + }, + { + "epoch": 2.020348837209302, + "grad_norm": 0.5127749221390476, + "learning_rate": 3.5830959996880134e-05, + "loss": 0.263, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15570735931396484, + "step": 1390, + "valid_targets_mean": 4655.9, + "valid_targets_min": 948 + }, + { + "epoch": 2.0276162790697674, + "grad_norm": 0.4743043393339027, + "learning_rate": 3.578655871785824e-05, + "loss": 0.2709, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1249096691608429, + "step": 1395, + "valid_targets_mean": 4346.9, + "valid_targets_min": 765 + }, + { + "epoch": 2.0348837209302326, + "grad_norm": 0.49366831615834167, + "learning_rate": 3.574195006740087e-05, + "loss": 0.2538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12072114646434784, + "step": 1400, + "valid_targets_mean": 4137.4, + "valid_targets_min": 719 + }, + { + "epoch": 2.042151162790698, + "grad_norm": 0.49849763580575895, + "learning_rate": 3.5697134631485e-05, + "loss": 0.2547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11655795574188232, + "step": 1405, + "valid_targets_mean": 4410.6, + "valid_targets_min": 1031 + }, + { + "epoch": 2.0494186046511627, + "grad_norm": 0.5234158764293848, + "learning_rate": 3.5652112998803914e-05, + "loss": 0.2763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12006824463605881, + "step": 1410, + "valid_targets_mean": 3905.4, + "valid_targets_min": 855 + }, + { + "epoch": 2.056686046511628, + "grad_norm": 0.4846891544610793, + "learning_rate": 3.560688576075951e-05, + "loss": 0.2538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14693428575992584, + "step": 1415, + "valid_targets_mean": 4482.4, + "valid_targets_min": 448 + }, + { + "epoch": 2.063953488372093, + "grad_norm": 0.47199386841521335, + "learning_rate": 3.5561453511454485e-05, + "loss": 0.278, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11529761552810669, + "step": 1420, + "valid_targets_mean": 4109.1, + "valid_targets_min": 1126 + }, + { + "epoch": 2.071220930232558, + "grad_norm": 0.48678567559248875, + "learning_rate": 3.551581684768457e-05, + "loss": 0.2647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1414426863193512, + "step": 1425, + "valid_targets_mean": 4217.1, + "valid_targets_min": 634 + }, + { + "epoch": 2.078488372093023, + "grad_norm": 0.6118910096762987, + "learning_rate": 3.546997636893067e-05, + "loss": 0.2699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10148027539253235, + "step": 1430, + "valid_targets_mean": 2007.8, + "valid_targets_min": 487 + }, + { + "epoch": 2.0857558139534884, + "grad_norm": 0.5224485792193987, + "learning_rate": 3.542393267735098e-05, + "loss": 0.2725, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1559983342885971, + "step": 1435, + "valid_targets_mean": 4773.1, + "valid_targets_min": 908 + }, + { + "epoch": 2.0930232558139537, + "grad_norm": 0.554028772115384, + "learning_rate": 3.537768637777312e-05, + "loss": 0.2795, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19308879971504211, + "step": 1440, + "valid_targets_mean": 4352.8, + "valid_targets_min": 741 + }, + { + "epoch": 2.1002906976744184, + "grad_norm": 0.4621989703112275, + "learning_rate": 3.533123807768612e-05, + "loss": 0.2609, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1113363653421402, + "step": 1445, + "valid_targets_mean": 3969.4, + "valid_targets_min": 773 + }, + { + "epoch": 2.1075581395348837, + "grad_norm": 0.49525226186923427, + "learning_rate": 3.52845883872325e-05, + "loss": 0.2708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.135954812169075, + "step": 1450, + "valid_targets_mean": 4239.1, + "valid_targets_min": 890 + }, + { + "epoch": 2.114825581395349, + "grad_norm": 0.5536797366019663, + "learning_rate": 3.523773791920023e-05, + "loss": 0.2636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1700039952993393, + "step": 1455, + "valid_targets_mean": 3623.2, + "valid_targets_min": 670 + }, + { + "epoch": 2.1220930232558137, + "grad_norm": 0.5583987169370682, + "learning_rate": 3.51906872890147e-05, + "loss": 0.267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11081169545650482, + "step": 1460, + "valid_targets_mean": 3194.4, + "valid_targets_min": 766 + }, + { + "epoch": 2.129360465116279, + "grad_norm": 0.675993039275515, + "learning_rate": 3.514343711473058e-05, + "loss": 0.2655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12630599737167358, + "step": 1465, + "valid_targets_mean": 2452.6, + "valid_targets_min": 614 + }, + { + "epoch": 2.136627906976744, + "grad_norm": 0.5146630082127743, + "learning_rate": 3.509598801702378e-05, + "loss": 0.2563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1256990283727646, + "step": 1470, + "valid_targets_mean": 3438.5, + "valid_targets_min": 595 + }, + { + "epoch": 2.1438953488372094, + "grad_norm": 0.4820414562746019, + "learning_rate": 3.504834061918324e-05, + "loss": 0.2591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13081446290016174, + "step": 1475, + "valid_targets_mean": 3915.8, + "valid_targets_min": 814 + }, + { + "epoch": 2.1511627906976742, + "grad_norm": 0.5260061998630678, + "learning_rate": 3.5000495547102766e-05, + "loss": 0.2664, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13811007142066956, + "step": 1480, + "valid_targets_mean": 4041.0, + "valid_targets_min": 788 + }, + { + "epoch": 2.1584302325581395, + "grad_norm": 0.5293063498281043, + "learning_rate": 3.49524534292728e-05, + "loss": 0.2625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1273302435874939, + "step": 1485, + "valid_targets_mean": 3316.5, + "valid_targets_min": 459 + }, + { + "epoch": 2.1656976744186047, + "grad_norm": 0.5661275028710806, + "learning_rate": 3.490421489677217e-05, + "loss": 0.2644, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11345352232456207, + "step": 1490, + "valid_targets_mean": 2989.9, + "valid_targets_min": 534 + }, + { + "epoch": 2.17296511627907, + "grad_norm": 0.5426548944493367, + "learning_rate": 3.485578058325979e-05, + "loss": 0.2737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10773831605911255, + "step": 1495, + "valid_targets_mean": 2618.4, + "valid_targets_min": 796 + }, + { + "epoch": 2.1802325581395348, + "grad_norm": 0.5358415375531889, + "learning_rate": 3.480715112496634e-05, + "loss": 0.2628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12474462389945984, + "step": 1500, + "valid_targets_mean": 3247.8, + "valid_targets_min": 556 + }, + { + "epoch": 2.1875, + "grad_norm": 0.5091256086283701, + "learning_rate": 3.475832716068595e-05, + "loss": 0.2624, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1339842975139618, + "step": 1505, + "valid_targets_mean": 3784.9, + "valid_targets_min": 612 + }, + { + "epoch": 2.1947674418604652, + "grad_norm": 0.4526849642798776, + "learning_rate": 3.47093093317677e-05, + "loss": 0.2595, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12192752957344055, + "step": 1510, + "valid_targets_mean": 4210.1, + "valid_targets_min": 763 + }, + { + "epoch": 2.20203488372093, + "grad_norm": 0.6418251014891065, + "learning_rate": 3.4660098282107344e-05, + "loss": 0.2653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17555320262908936, + "step": 1515, + "valid_targets_mean": 3093.1, + "valid_targets_min": 571 + }, + { + "epoch": 2.2093023255813953, + "grad_norm": 0.4978248898803813, + "learning_rate": 3.461069465813871e-05, + "loss": 0.2725, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14208854734897614, + "step": 1520, + "valid_targets_mean": 3839.2, + "valid_targets_min": 804 + }, + { + "epoch": 2.2165697674418605, + "grad_norm": 0.48938820010360695, + "learning_rate": 3.4561099108825323e-05, + "loss": 0.2703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09456305205821991, + "step": 1525, + "valid_targets_mean": 3440.4, + "valid_targets_min": 665 + }, + { + "epoch": 2.2238372093023258, + "grad_norm": 0.5494920221285224, + "learning_rate": 3.451131228565179e-05, + "loss": 0.2625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09877356886863708, + "step": 1530, + "valid_targets_mean": 2651.8, + "valid_targets_min": 658 + }, + { + "epoch": 2.2311046511627906, + "grad_norm": 0.49439302152720127, + "learning_rate": 3.446133484261529e-05, + "loss": 0.276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17575523257255554, + "step": 1535, + "valid_targets_mean": 4729.0, + "valid_targets_min": 3501 + }, + { + "epoch": 2.238372093023256, + "grad_norm": 0.5307421275306936, + "learning_rate": 3.4411167436217004e-05, + "loss": 0.2742, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16329185664653778, + "step": 1540, + "valid_targets_mean": 4051.8, + "valid_targets_min": 693 + }, + { + "epoch": 2.245639534883721, + "grad_norm": 0.5270385519838316, + "learning_rate": 3.436081072545343e-05, + "loss": 0.2636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14654135704040527, + "step": 1545, + "valid_targets_mean": 3702.0, + "valid_targets_min": 546 + }, + { + "epoch": 2.2529069767441863, + "grad_norm": 0.5552539214544302, + "learning_rate": 3.4310265371807775e-05, + "loss": 0.276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10971195250749588, + "step": 1550, + "valid_targets_mean": 2676.8, + "valid_targets_min": 562 + }, + { + "epoch": 2.260174418604651, + "grad_norm": 0.5352876944143984, + "learning_rate": 3.4259532039241234e-05, + "loss": 0.2708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13568298518657684, + "step": 1555, + "valid_targets_mean": 3398.1, + "valid_targets_min": 767 + }, + { + "epoch": 2.2674418604651163, + "grad_norm": 0.4369482033410833, + "learning_rate": 3.420861139418429e-05, + "loss": 0.2662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1382427215576172, + "step": 1560, + "valid_targets_mean": 5266.9, + "valid_targets_min": 809 + }, + { + "epoch": 2.2747093023255816, + "grad_norm": 0.515678213997273, + "learning_rate": 3.4157504105527976e-05, + "loss": 0.2667, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16098284721374512, + "step": 1565, + "valid_targets_mean": 4429.6, + "valid_targets_min": 758 + }, + { + "epoch": 2.2819767441860463, + "grad_norm": 0.5097940592273604, + "learning_rate": 3.410621084461503e-05, + "loss": 0.2877, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14763861894607544, + "step": 1570, + "valid_targets_mean": 3918.2, + "valid_targets_min": 759 + }, + { + "epoch": 2.2892441860465116, + "grad_norm": 0.534212765089729, + "learning_rate": 3.405473228523114e-05, + "loss": 0.2639, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12504583597183228, + "step": 1575, + "valid_targets_mean": 3447.0, + "valid_targets_min": 711 + }, + { + "epoch": 2.296511627906977, + "grad_norm": 0.45809911560904876, + "learning_rate": 3.4003069103596034e-05, + "loss": 0.2711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12107375264167786, + "step": 1580, + "valid_targets_mean": 4068.2, + "valid_targets_min": 513 + }, + { + "epoch": 2.303779069767442, + "grad_norm": 0.5058709902336606, + "learning_rate": 3.395122197835467e-05, + "loss": 0.2722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09252685308456421, + "step": 1585, + "valid_targets_mean": 2139.4, + "valid_targets_min": 524 + }, + { + "epoch": 2.311046511627907, + "grad_norm": 0.4841137765066096, + "learning_rate": 3.389919159056825e-05, + "loss": 0.2656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13058248162269592, + "step": 1590, + "valid_targets_mean": 3712.4, + "valid_targets_min": 587 + }, + { + "epoch": 2.318313953488372, + "grad_norm": 0.6314252709339271, + "learning_rate": 3.384697862370531e-05, + "loss": 0.2731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12193869054317474, + "step": 1595, + "valid_targets_mean": 4012.1, + "valid_targets_min": 689 + }, + { + "epoch": 2.3255813953488373, + "grad_norm": 0.4791491037613609, + "learning_rate": 3.379458376363274e-05, + "loss": 0.2588, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11508563160896301, + "step": 1600, + "valid_targets_mean": 3614.6, + "valid_targets_min": 876 + }, + { + "epoch": 2.332848837209302, + "grad_norm": 0.4932888283945881, + "learning_rate": 3.374200769860676e-05, + "loss": 0.2614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1386955976486206, + "step": 1605, + "valid_targets_mean": 4039.6, + "valid_targets_min": 301 + }, + { + "epoch": 2.3401162790697674, + "grad_norm": 0.549901709584174, + "learning_rate": 3.368925111926391e-05, + "loss": 0.2749, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1280840039253235, + "step": 1610, + "valid_targets_mean": 3070.8, + "valid_targets_min": 652 + }, + { + "epoch": 2.3473837209302326, + "grad_norm": 0.5945830691027769, + "learning_rate": 3.363631471861194e-05, + "loss": 0.2697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09050530195236206, + "step": 1615, + "valid_targets_mean": 1939.2, + "valid_targets_min": 508 + }, + { + "epoch": 2.354651162790698, + "grad_norm": 0.4744283976817756, + "learning_rate": 3.358319919202071e-05, + "loss": 0.2671, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11523960530757904, + "step": 1620, + "valid_targets_mean": 4096.6, + "valid_targets_min": 645 + }, + { + "epoch": 2.3619186046511627, + "grad_norm": 0.4847224210381442, + "learning_rate": 3.35299052372131e-05, + "loss": 0.2756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13617539405822754, + "step": 1625, + "valid_targets_mean": 3855.5, + "valid_targets_min": 1132 + }, + { + "epoch": 2.369186046511628, + "grad_norm": 0.49840708948568, + "learning_rate": 3.3476433554255806e-05, + "loss": 0.2824, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1390097290277481, + "step": 1630, + "valid_targets_mean": 3952.4, + "valid_targets_min": 748 + }, + { + "epoch": 2.376453488372093, + "grad_norm": 0.5001556376410387, + "learning_rate": 3.342278484555014e-05, + "loss": 0.2686, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13502594828605652, + "step": 1635, + "valid_targets_mean": 3625.5, + "valid_targets_min": 597 + }, + { + "epoch": 2.383720930232558, + "grad_norm": 0.635635101849715, + "learning_rate": 3.336895981582282e-05, + "loss": 0.2863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13348835706710815, + "step": 1640, + "valid_targets_mean": 2283.8, + "valid_targets_min": 760 + }, + { + "epoch": 2.390988372093023, + "grad_norm": 0.5264719953651338, + "learning_rate": 3.3314959172116705e-05, + "loss": 0.2655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09972628951072693, + "step": 1645, + "valid_targets_mean": 2570.2, + "valid_targets_min": 592 + }, + { + "epoch": 2.3982558139534884, + "grad_norm": 0.832672751160306, + "learning_rate": 3.326078362378152e-05, + "loss": 0.2635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14847052097320557, + "step": 1650, + "valid_targets_mean": 3953.1, + "valid_targets_min": 720 + }, + { + "epoch": 2.4055232558139537, + "grad_norm": 0.5583772379822669, + "learning_rate": 3.320643388246452e-05, + "loss": 0.255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1574249118566513, + "step": 1655, + "valid_targets_mean": 3535.5, + "valid_targets_min": 799 + }, + { + "epoch": 2.4127906976744184, + "grad_norm": 0.5046469808226955, + "learning_rate": 3.315191066210117e-05, + "loss": 0.2727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16248942911624908, + "step": 1660, + "valid_targets_mean": 4482.1, + "valid_targets_min": 614 + }, + { + "epoch": 2.4200581395348837, + "grad_norm": 0.5294602291358438, + "learning_rate": 3.309721467890571e-05, + "loss": 0.2738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1385262906551361, + "step": 1665, + "valid_targets_mean": 3637.4, + "valid_targets_min": 536 + }, + { + "epoch": 2.427325581395349, + "grad_norm": 0.4704899630611958, + "learning_rate": 3.3042346651361804e-05, + "loss": 0.264, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11646417528390884, + "step": 1670, + "valid_targets_mean": 3672.1, + "valid_targets_min": 692 + }, + { + "epoch": 2.4345930232558137, + "grad_norm": 0.5379513628865499, + "learning_rate": 3.298730730021309e-05, + "loss": 0.2645, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1307636946439743, + "step": 1675, + "valid_targets_mean": 3688.2, + "valid_targets_min": 510 + }, + { + "epoch": 2.441860465116279, + "grad_norm": 0.5717757647803172, + "learning_rate": 3.2932097348453696e-05, + "loss": 0.2736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13288994133472443, + "step": 1680, + "valid_targets_mean": 2601.6, + "valid_targets_min": 551 + }, + { + "epoch": 2.449127906976744, + "grad_norm": 0.5799851581117721, + "learning_rate": 3.287671752131875e-05, + "loss": 0.264, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12425574660301208, + "step": 1685, + "valid_targets_mean": 2572.4, + "valid_targets_min": 509 + }, + { + "epoch": 2.4563953488372094, + "grad_norm": 0.5051100514029051, + "learning_rate": 3.282116854627485e-05, + "loss": 0.2756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12104862928390503, + "step": 1690, + "valid_targets_mean": 3290.8, + "valid_targets_min": 556 + }, + { + "epoch": 2.4636627906976742, + "grad_norm": 0.4884476414997209, + "learning_rate": 3.276545115301053e-05, + "loss": 0.2739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14237235486507416, + "step": 1695, + "valid_targets_mean": 4186.4, + "valid_targets_min": 861 + }, + { + "epoch": 2.4709302325581395, + "grad_norm": 0.453706781086496, + "learning_rate": 3.270956607342663e-05, + "loss": 0.2682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1469501554965973, + "step": 1700, + "valid_targets_mean": 4740.9, + "valid_targets_min": 546 + }, + { + "epoch": 2.4781976744186047, + "grad_norm": 0.588829728213914, + "learning_rate": 3.265351404162673e-05, + "loss": 0.2761, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09756440669298172, + "step": 1705, + "valid_targets_mean": 2120.9, + "valid_targets_min": 600 + }, + { + "epoch": 2.4854651162790695, + "grad_norm": 0.4613022019381626, + "learning_rate": 3.259729579390749e-05, + "loss": 0.2635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11998298764228821, + "step": 1710, + "valid_targets_mean": 4440.6, + "valid_targets_min": 685 + }, + { + "epoch": 2.4927325581395348, + "grad_norm": 0.5431743435441446, + "learning_rate": 3.254091206874895e-05, + "loss": 0.276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1468546837568283, + "step": 1715, + "valid_targets_mean": 3191.4, + "valid_targets_min": 819 + }, + { + "epoch": 2.5, + "grad_norm": 0.45513843549770333, + "learning_rate": 3.248436360680487e-05, + "loss": 0.2559, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14219807088375092, + "step": 1720, + "valid_targets_mean": 4397.4, + "valid_targets_min": 1024 + }, + { + "epoch": 2.5072674418604652, + "grad_norm": 0.5024140936757149, + "learning_rate": 3.2427651150892984e-05, + "loss": 0.2675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15499092638492584, + "step": 1725, + "valid_targets_mean": 4194.4, + "valid_targets_min": 615 + }, + { + "epoch": 2.5145348837209305, + "grad_norm": 0.4698206860809935, + "learning_rate": 3.237077544598524e-05, + "loss": 0.2744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12705135345458984, + "step": 1730, + "valid_targets_mean": 3559.4, + "valid_targets_min": 559 + }, + { + "epoch": 2.5218023255813953, + "grad_norm": 0.46964481606078307, + "learning_rate": 3.2313737239198015e-05, + "loss": 0.2737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14902839064598083, + "step": 1735, + "valid_targets_mean": 4454.0, + "valid_targets_min": 575 + }, + { + "epoch": 2.5290697674418605, + "grad_norm": 0.5092078095756714, + "learning_rate": 3.22565372797823e-05, + "loss": 0.2661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13162916898727417, + "step": 1740, + "valid_targets_mean": 4042.2, + "valid_targets_min": 796 + }, + { + "epoch": 2.5363372093023253, + "grad_norm": 0.4532399507806315, + "learning_rate": 3.219917631911387e-05, + "loss": 0.2655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12575411796569824, + "step": 1745, + "valid_targets_mean": 4582.4, + "valid_targets_min": 765 + }, + { + "epoch": 2.5436046511627906, + "grad_norm": 0.4970025077230629, + "learning_rate": 3.2141655110683396e-05, + "loss": 0.2567, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1303708553314209, + "step": 1750, + "valid_targets_mean": 3502.8, + "valid_targets_min": 596 + }, + { + "epoch": 2.550872093023256, + "grad_norm": 0.46718017929321, + "learning_rate": 3.208397441008655e-05, + "loss": 0.2611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11836321651935577, + "step": 1755, + "valid_targets_mean": 4335.2, + "valid_targets_min": 884 + }, + { + "epoch": 2.558139534883721, + "grad_norm": 0.4799250453339413, + "learning_rate": 3.20261349750141e-05, + "loss": 0.2708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1215079128742218, + "step": 1760, + "valid_targets_mean": 3528.9, + "valid_targets_min": 678 + }, + { + "epoch": 2.5654069767441863, + "grad_norm": 0.45115735719212735, + "learning_rate": 3.1968137565241936e-05, + "loss": 0.2718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1389511227607727, + "step": 1765, + "valid_targets_mean": 4299.8, + "valid_targets_min": 547 + }, + { + "epoch": 2.572674418604651, + "grad_norm": 0.46289799998348813, + "learning_rate": 3.1909982942621085e-05, + "loss": 0.2613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12753698229789734, + "step": 1770, + "valid_targets_mean": 4688.4, + "valid_targets_min": 4064 + }, + { + "epoch": 2.5799418604651163, + "grad_norm": 0.5129108516304612, + "learning_rate": 3.185167187106774e-05, + "loss": 0.2698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19303864240646362, + "step": 1775, + "valid_targets_mean": 4826.6, + "valid_targets_min": 580 + }, + { + "epoch": 2.5872093023255816, + "grad_norm": 0.468090560312537, + "learning_rate": 3.179320511655317e-05, + "loss": 0.2575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11583010852336884, + "step": 1780, + "valid_targets_mean": 3707.1, + "valid_targets_min": 666 + }, + { + "epoch": 2.5944767441860463, + "grad_norm": 0.4703450689865459, + "learning_rate": 3.1734583447093696e-05, + "loss": 0.2739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12104140222072601, + "step": 1785, + "valid_targets_mean": 4700.1, + "valid_targets_min": 4102 + }, + { + "epoch": 2.6017441860465116, + "grad_norm": 0.4869676856169924, + "learning_rate": 3.167580763274061e-05, + "loss": 0.2731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16636613011360168, + "step": 1790, + "valid_targets_mean": 4863.0, + "valid_targets_min": 1046 + }, + { + "epoch": 2.609011627906977, + "grad_norm": 0.4709917726328331, + "learning_rate": 3.161687844557002e-05, + "loss": 0.2573, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1224159300327301, + "step": 1795, + "valid_targets_mean": 3573.0, + "valid_targets_min": 581 + }, + { + "epoch": 2.616279069767442, + "grad_norm": 0.5171426552209677, + "learning_rate": 3.155779665967275e-05, + "loss": 0.2628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11179779469966888, + "step": 1800, + "valid_targets_mean": 2782.5, + "valid_targets_min": 743 + }, + { + "epoch": 2.623546511627907, + "grad_norm": 0.6081646224080811, + "learning_rate": 3.149856305114416e-05, + "loss": 0.2543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10924814641475677, + "step": 1805, + "valid_targets_mean": 2735.4, + "valid_targets_min": 477 + }, + { + "epoch": 2.630813953488372, + "grad_norm": 0.4581547282472409, + "learning_rate": 3.1439178398073896e-05, + "loss": 0.2561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10915856063365936, + "step": 1810, + "valid_targets_mean": 3889.6, + "valid_targets_min": 545 + }, + { + "epoch": 2.6380813953488373, + "grad_norm": 0.49063719223863217, + "learning_rate": 3.137964348053578e-05, + "loss": 0.2704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11040124297142029, + "step": 1815, + "valid_targets_mean": 3438.1, + "valid_targets_min": 642 + }, + { + "epoch": 2.645348837209302, + "grad_norm": 0.5163640076530557, + "learning_rate": 3.1319959080577464e-05, + "loss": 0.2818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12543052434921265, + "step": 1820, + "valid_targets_mean": 3359.4, + "valid_targets_min": 905 + }, + { + "epoch": 2.6526162790697674, + "grad_norm": 0.5524842388527516, + "learning_rate": 3.12601259822102e-05, + "loss": 0.2582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1084834486246109, + "step": 1825, + "valid_targets_mean": 2511.4, + "valid_targets_min": 571 + }, + { + "epoch": 2.6598837209302326, + "grad_norm": 0.4564108099525801, + "learning_rate": 3.120014497139853e-05, + "loss": 0.2663, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13041335344314575, + "step": 1830, + "valid_targets_mean": 4113.4, + "valid_targets_min": 751 + }, + { + "epoch": 2.667151162790698, + "grad_norm": 0.5154330112071223, + "learning_rate": 3.114001683604999e-05, + "loss": 0.2655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13782872259616852, + "step": 1835, + "valid_targets_mean": 3823.5, + "valid_targets_min": 579 + }, + { + "epoch": 2.6744186046511627, + "grad_norm": 0.463060723588905, + "learning_rate": 3.1079742366004713e-05, + "loss": 0.252, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1174917072057724, + "step": 1840, + "valid_targets_mean": 3915.1, + "valid_targets_min": 576 + }, + { + "epoch": 2.681686046511628, + "grad_norm": 0.5961585496445909, + "learning_rate": 3.101932235302508e-05, + "loss": 0.2685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10630868375301361, + "step": 1845, + "valid_targets_mean": 2058.0, + "valid_targets_min": 586 + }, + { + "epoch": 2.688953488372093, + "grad_norm": 0.5109244436734283, + "learning_rate": 3.095875759078532e-05, + "loss": 0.2567, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13750335574150085, + "step": 1850, + "valid_targets_mean": 4089.5, + "valid_targets_min": 605 + }, + { + "epoch": 2.696220930232558, + "grad_norm": 0.5571546603571644, + "learning_rate": 3.089804887486109e-05, + "loss": 0.2695, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1483413279056549, + "step": 1855, + "valid_targets_mean": 3544.2, + "valid_targets_min": 506 + }, + { + "epoch": 2.703488372093023, + "grad_norm": 0.45865410869498385, + "learning_rate": 3.083719700271899e-05, + "loss": 0.2699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13529357314109802, + "step": 1860, + "valid_targets_mean": 4811.1, + "valid_targets_min": 3742 + }, + { + "epoch": 2.7107558139534884, + "grad_norm": 0.48848398241959, + "learning_rate": 3.0776202773706136e-05, + "loss": 0.2613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1369183510541916, + "step": 1865, + "valid_targets_mean": 4284.4, + "valid_targets_min": 773 + }, + { + "epoch": 2.7180232558139537, + "grad_norm": 0.4383701642333588, + "learning_rate": 3.0715066989039634e-05, + "loss": 0.2583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12336073815822601, + "step": 1870, + "valid_targets_mean": 4134.5, + "valid_targets_min": 776 + }, + { + "epoch": 2.7252906976744184, + "grad_norm": 0.5109306368537913, + "learning_rate": 3.0653790451796065e-05, + "loss": 0.2681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12276113033294678, + "step": 1875, + "valid_targets_mean": 3308.4, + "valid_targets_min": 729 + }, + { + "epoch": 2.7325581395348837, + "grad_norm": 0.48931231779838447, + "learning_rate": 3.05923739669009e-05, + "loss": 0.2623, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1452603042125702, + "step": 1880, + "valid_targets_mean": 4120.5, + "valid_targets_min": 509 + }, + { + "epoch": 2.739825581395349, + "grad_norm": 0.4782124055851078, + "learning_rate": 3.0530818341117974e-05, + "loss": 0.2822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19867317378520966, + "step": 1885, + "valid_targets_mean": 5492.8, + "valid_targets_min": 3347 + }, + { + "epoch": 2.7470930232558137, + "grad_norm": 0.5970060105644365, + "learning_rate": 3.046912438303887e-05, + "loss": 0.2689, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.111583411693573, + "step": 1890, + "valid_targets_mean": 2921.2, + "valid_targets_min": 547 + }, + { + "epoch": 2.754360465116279, + "grad_norm": 0.5695113796330928, + "learning_rate": 3.040729290307231e-05, + "loss": 0.2767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1445031762123108, + "step": 1895, + "valid_targets_mean": 3292.2, + "valid_targets_min": 446 + }, + { + "epoch": 2.761627906976744, + "grad_norm": 0.5380566024640052, + "learning_rate": 3.0345324713433454e-05, + "loss": 0.2717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12390387058258057, + "step": 1900, + "valid_targets_mean": 2912.6, + "valid_targets_min": 467 + }, + { + "epoch": 2.7688953488372094, + "grad_norm": 0.5700404751058241, + "learning_rate": 3.0283220628133328e-05, + "loss": 0.2729, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17504948377609253, + "step": 1905, + "valid_targets_mean": 2833.1, + "valid_targets_min": 711 + }, + { + "epoch": 2.7761627906976747, + "grad_norm": 0.5501099285719143, + "learning_rate": 3.0220981462968038e-05, + "loss": 0.2735, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11852732300758362, + "step": 1910, + "valid_targets_mean": 2703.9, + "valid_targets_min": 653 + }, + { + "epoch": 2.7834302325581395, + "grad_norm": 0.5057554599317371, + "learning_rate": 3.0158608035508107e-05, + "loss": 0.2646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13264814019203186, + "step": 1915, + "valid_targets_mean": 3944.9, + "valid_targets_min": 599 + }, + { + "epoch": 2.7906976744186047, + "grad_norm": 0.5250685596753014, + "learning_rate": 3.0096101165087715e-05, + "loss": 0.2628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1443638652563095, + "step": 1920, + "valid_targets_mean": 3738.5, + "valid_targets_min": 729 + }, + { + "epoch": 2.7979651162790695, + "grad_norm": 0.47324933535416364, + "learning_rate": 3.0033461672793946e-05, + "loss": 0.2672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.125032439827919, + "step": 1925, + "valid_targets_mean": 3828.5, + "valid_targets_min": 743 + }, + { + "epoch": 2.8052325581395348, + "grad_norm": 0.4955622085267105, + "learning_rate": 2.9970690381456e-05, + "loss": 0.2502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1393406242132187, + "step": 1930, + "valid_targets_mean": 3930.9, + "valid_targets_min": 603 + }, + { + "epoch": 2.8125, + "grad_norm": 0.5130630695006796, + "learning_rate": 2.990778811563438e-05, + "loss": 0.2541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12806552648544312, + "step": 1935, + "valid_targets_mean": 3769.0, + "valid_targets_min": 691 + }, + { + "epoch": 2.8197674418604652, + "grad_norm": 0.6055425321713648, + "learning_rate": 2.984475570161005e-05, + "loss": 0.2631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17337648570537567, + "step": 1940, + "valid_targets_mean": 3236.2, + "valid_targets_min": 867 + }, + { + "epoch": 2.8270348837209305, + "grad_norm": 0.5284530228759448, + "learning_rate": 2.978159396737363e-05, + "loss": 0.2622, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1735924780368805, + "step": 1945, + "valid_targets_mean": 4311.1, + "valid_targets_min": 715 + }, + { + "epoch": 2.8343023255813953, + "grad_norm": 0.510598428920252, + "learning_rate": 2.9718303742614437e-05, + "loss": 0.2545, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15028700232505798, + "step": 1950, + "valid_targets_mean": 3324.8, + "valid_targets_min": 746 + }, + { + "epoch": 2.8415697674418605, + "grad_norm": 0.5268476903116217, + "learning_rate": 2.9654885858709678e-05, + "loss": 0.2585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14516228437423706, + "step": 1955, + "valid_targets_mean": 3818.1, + "valid_targets_min": 646 + }, + { + "epoch": 2.8488372093023253, + "grad_norm": 0.6076295209820001, + "learning_rate": 2.9591341148713444e-05, + "loss": 0.2654, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16147010028362274, + "step": 1960, + "valid_targets_mean": 2661.6, + "valid_targets_min": 459 + }, + { + "epoch": 2.8561046511627906, + "grad_norm": 0.5399299301824988, + "learning_rate": 2.952767044734584e-05, + "loss": 0.269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16280090808868408, + "step": 1965, + "valid_targets_mean": 3895.5, + "valid_targets_min": 953 + }, + { + "epoch": 2.863372093023256, + "grad_norm": 0.4600176373509559, + "learning_rate": 2.946387459098196e-05, + "loss": 0.2542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1348821073770523, + "step": 1970, + "valid_targets_mean": 4237.5, + "valid_targets_min": 693 + }, + { + "epoch": 2.870639534883721, + "grad_norm": 0.48877197485140134, + "learning_rate": 2.9399954417640956e-05, + "loss": 0.265, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11835235357284546, + "step": 1975, + "valid_targets_mean": 3062.9, + "valid_targets_min": 736 + }, + { + "epoch": 2.8779069767441863, + "grad_norm": 0.5079540827052327, + "learning_rate": 2.9335910766974977e-05, + "loss": 0.2777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13968515396118164, + "step": 1980, + "valid_targets_mean": 3577.6, + "valid_targets_min": 654 + }, + { + "epoch": 2.885174418604651, + "grad_norm": 0.4539764539432686, + "learning_rate": 2.9271744480258174e-05, + "loss": 0.2529, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1398291289806366, + "step": 1985, + "valid_targets_mean": 4915.9, + "valid_targets_min": 3623 + }, + { + "epoch": 2.8924418604651163, + "grad_norm": 0.49174216690290445, + "learning_rate": 2.9207456400375646e-05, + "loss": 0.2762, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12449312955141068, + "step": 1990, + "valid_targets_mean": 3380.0, + "valid_targets_min": 697 + }, + { + "epoch": 2.8997093023255816, + "grad_norm": 0.5689923342535651, + "learning_rate": 2.914304737181234e-05, + "loss": 0.2845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14871911704540253, + "step": 1995, + "valid_targets_mean": 3037.6, + "valid_targets_min": 810 + }, + { + "epoch": 2.9069767441860463, + "grad_norm": 0.5300076144454899, + "learning_rate": 2.9078518240642e-05, + "loss": 0.2688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14786851406097412, + "step": 2000, + "valid_targets_mean": 3872.2, + "valid_targets_min": 634 + }, + { + "epoch": 2.9142441860465116, + "grad_norm": 0.5110415901667488, + "learning_rate": 2.9013869854516028e-05, + "loss": 0.2543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10360009968280792, + "step": 2005, + "valid_targets_mean": 2512.8, + "valid_targets_min": 567 + }, + { + "epoch": 2.921511627906977, + "grad_norm": 0.5373767986536778, + "learning_rate": 2.894910306265234e-05, + "loss": 0.2635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12088128924369812, + "step": 2010, + "valid_targets_mean": 2757.8, + "valid_targets_min": 409 + }, + { + "epoch": 2.928779069767442, + "grad_norm": 0.4797863204246029, + "learning_rate": 2.888421871582423e-05, + "loss": 0.2726, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13703224062919617, + "step": 2015, + "valid_targets_mean": 3601.5, + "valid_targets_min": 718 + }, + { + "epoch": 2.936046511627907, + "grad_norm": 0.45472505465476615, + "learning_rate": 2.8819217666349198e-05, + "loss": 0.2622, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16491886973381042, + "step": 2020, + "valid_targets_mean": 5240.4, + "valid_targets_min": 696 + }, + { + "epoch": 2.943313953488372, + "grad_norm": 0.4669182737298547, + "learning_rate": 2.8754100768077743e-05, + "loss": 0.2525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14050084352493286, + "step": 2025, + "valid_targets_mean": 4352.5, + "valid_targets_min": 556 + }, + { + "epoch": 2.9505813953488373, + "grad_norm": 0.4969448731159028, + "learning_rate": 2.8688868876382122e-05, + "loss": 0.2582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18493834137916565, + "step": 2030, + "valid_targets_mean": 5092.4, + "valid_targets_min": 712 + }, + { + "epoch": 2.957848837209302, + "grad_norm": 0.5489219835086259, + "learning_rate": 2.862352284814518e-05, + "loss": 0.2594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16094838082790375, + "step": 2035, + "valid_targets_mean": 3309.8, + "valid_targets_min": 594 + }, + { + "epoch": 2.9651162790697674, + "grad_norm": 0.5671735062946283, + "learning_rate": 2.855806354174901e-05, + "loss": 0.2699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13544762134552002, + "step": 2040, + "valid_targets_mean": 2860.4, + "valid_targets_min": 727 + }, + { + "epoch": 2.9723837209302326, + "grad_norm": 0.43043332486830155, + "learning_rate": 2.8492491817063767e-05, + "loss": 0.2638, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12342862784862518, + "step": 2045, + "valid_targets_mean": 4274.9, + "valid_targets_min": 692 + }, + { + "epoch": 2.979651162790698, + "grad_norm": 0.4888778059104538, + "learning_rate": 2.8426808535436294e-05, + "loss": 0.254, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12098248302936554, + "step": 2050, + "valid_targets_mean": 3508.0, + "valid_targets_min": 736 + }, + { + "epoch": 2.9869186046511627, + "grad_norm": 0.4868623719386895, + "learning_rate": 2.8361014559678856e-05, + "loss": 0.2592, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12850888073444366, + "step": 2055, + "valid_targets_mean": 3452.1, + "valid_targets_min": 532 + }, + { + "epoch": 2.994186046511628, + "grad_norm": 0.543954289354496, + "learning_rate": 2.8295110754057776e-05, + "loss": 0.2552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11905788630247116, + "step": 2060, + "valid_targets_mean": 2484.2, + "valid_targets_min": 753 + }, + { + "epoch": 3.001453488372093, + "grad_norm": 0.46396982089080563, + "learning_rate": 2.822909798428211e-05, + "loss": 0.2498, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11284950375556946, + "step": 2065, + "valid_targets_mean": 3787.8, + "valid_targets_min": 832 + }, + { + "epoch": 3.008720930232558, + "grad_norm": 0.5132727999413913, + "learning_rate": 2.8162977117492257e-05, + "loss": 0.2634, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12710949778556824, + "step": 2070, + "valid_targets_mean": 3843.4, + "valid_targets_min": 753 + }, + { + "epoch": 3.015988372093023, + "grad_norm": 0.5336380637710425, + "learning_rate": 2.809674902224857e-05, + "loss": 0.2532, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1240164041519165, + "step": 2075, + "valid_targets_mean": 3290.1, + "valid_targets_min": 748 + }, + { + "epoch": 3.0232558139534884, + "grad_norm": 0.46893864837995336, + "learning_rate": 2.8030414568519963e-05, + "loss": 0.2547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13958200812339783, + "step": 2080, + "valid_targets_mean": 4292.2, + "valid_targets_min": 855 + }, + { + "epoch": 3.0305232558139537, + "grad_norm": 0.4471657539573157, + "learning_rate": 2.796397462767245e-05, + "loss": 0.2467, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1111362874507904, + "step": 2085, + "valid_targets_mean": 5043.8, + "valid_targets_min": 3454 + }, + { + "epoch": 3.0377906976744184, + "grad_norm": 0.5752757770371824, + "learning_rate": 2.7897430072457733e-05, + "loss": 0.2504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12422748655080795, + "step": 2090, + "valid_targets_mean": 3479.4, + "valid_targets_min": 599 + }, + { + "epoch": 3.0450581395348837, + "grad_norm": 0.506386742480055, + "learning_rate": 2.7830781777001706e-05, + "loss": 0.2642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.099161796271801, + "step": 2095, + "valid_targets_mean": 3415.2, + "valid_targets_min": 483 + }, + { + "epoch": 3.052325581395349, + "grad_norm": 0.5905823245995147, + "learning_rate": 2.7764030616793017e-05, + "loss": 0.2567, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13965487480163574, + "step": 2100, + "valid_targets_mean": 2516.0, + "valid_targets_min": 528 + }, + { + "epoch": 3.059593023255814, + "grad_norm": 0.5387909462740589, + "learning_rate": 2.7697177468671516e-05, + "loss": 0.2604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1169048324227333, + "step": 2105, + "valid_targets_mean": 3728.0, + "valid_targets_min": 552 + }, + { + "epoch": 3.066860465116279, + "grad_norm": 0.5149001542580065, + "learning_rate": 2.7630223210816765e-05, + "loss": 0.2604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11615493893623352, + "step": 2110, + "valid_targets_mean": 3848.2, + "valid_targets_min": 737 + }, + { + "epoch": 3.074127906976744, + "grad_norm": 0.4829088029098308, + "learning_rate": 2.7563168722736517e-05, + "loss": 0.2624, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11458876729011536, + "step": 2115, + "valid_targets_mean": 4425.9, + "valid_targets_min": 587 + }, + { + "epoch": 3.0813953488372094, + "grad_norm": 0.6073559587335657, + "learning_rate": 2.749601488525512e-05, + "loss": 0.2511, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13436372578144073, + "step": 2120, + "valid_targets_mean": 3094.0, + "valid_targets_min": 509 + }, + { + "epoch": 3.0886627906976742, + "grad_norm": 0.49969662117819635, + "learning_rate": 2.7428762580501982e-05, + "loss": 0.2506, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14222140610218048, + "step": 2125, + "valid_targets_mean": 4020.0, + "valid_targets_min": 907 + }, + { + "epoch": 3.0959302325581395, + "grad_norm": 0.5204880529542931, + "learning_rate": 2.7361412691899972e-05, + "loss": 0.2561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10934726893901825, + "step": 2130, + "valid_targets_mean": 3369.4, + "valid_targets_min": 774 + }, + { + "epoch": 3.1031976744186047, + "grad_norm": 0.5592250434484849, + "learning_rate": 2.7293966104153814e-05, + "loss": 0.2423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0901477187871933, + "step": 2135, + "valid_targets_mean": 2053.5, + "valid_targets_min": 597 + }, + { + "epoch": 3.11046511627907, + "grad_norm": 0.5152668405523907, + "learning_rate": 2.722642370323847e-05, + "loss": 0.2495, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11756094545125961, + "step": 2140, + "valid_targets_mean": 3801.4, + "valid_targets_min": 741 + }, + { + "epoch": 3.1177325581395348, + "grad_norm": 0.5272762330048814, + "learning_rate": 2.7158786376387486e-05, + "loss": 0.2492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13304001092910767, + "step": 2145, + "valid_targets_mean": 3903.8, + "valid_targets_min": 3133 + }, + { + "epoch": 3.125, + "grad_norm": 0.6150488499289717, + "learning_rate": 2.7091055012081376e-05, + "loss": 0.2423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12594833970069885, + "step": 2150, + "valid_targets_mean": 2698.0, + "valid_targets_min": 609 + }, + { + "epoch": 3.1322674418604652, + "grad_norm": 0.4675225384238192, + "learning_rate": 2.7023230500035896e-05, + "loss": 0.2499, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12618444859981537, + "step": 2155, + "valid_targets_mean": 4309.4, + "valid_targets_min": 677 + }, + { + "epoch": 3.13953488372093, + "grad_norm": 0.6332751429296548, + "learning_rate": 2.6955313731190412e-05, + "loss": 0.2605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1332174837589264, + "step": 2160, + "valid_targets_mean": 2770.2, + "valid_targets_min": 622 + }, + { + "epoch": 3.1468023255813953, + "grad_norm": 0.8559144896636022, + "learning_rate": 2.688730559769615e-05, + "loss": 0.2523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12001748383045197, + "step": 2165, + "valid_targets_mean": 4595.1, + "valid_targets_min": 600 + }, + { + "epoch": 3.1540697674418605, + "grad_norm": 0.5931117865157438, + "learning_rate": 2.6819206992904508e-05, + "loss": 0.2504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14907360076904297, + "step": 2170, + "valid_targets_mean": 2909.9, + "valid_targets_min": 387 + }, + { + "epoch": 3.1613372093023258, + "grad_norm": 0.5490654340936475, + "learning_rate": 2.6751018811355307e-05, + "loss": 0.2463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10953619331121445, + "step": 2175, + "valid_targets_mean": 2857.0, + "valid_targets_min": 778 + }, + { + "epoch": 3.1686046511627906, + "grad_norm": 0.5199051841020532, + "learning_rate": 2.6682741948765047e-05, + "loss": 0.2454, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13250568509101868, + "step": 2180, + "valid_targets_mean": 3317.5, + "valid_targets_min": 702 + }, + { + "epoch": 3.175872093023256, + "grad_norm": 0.48877762491217913, + "learning_rate": 2.661437730201514e-05, + "loss": 0.2548, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1184430867433548, + "step": 2185, + "valid_targets_mean": 3868.5, + "valid_targets_min": 527 + }, + { + "epoch": 3.183139534883721, + "grad_norm": 0.5413434386944661, + "learning_rate": 2.654592576914011e-05, + "loss": 0.2451, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11572098731994629, + "step": 2190, + "valid_targets_mean": 2643.5, + "valid_targets_min": 840 + }, + { + "epoch": 3.1904069767441863, + "grad_norm": 0.4826257777252111, + "learning_rate": 2.6477388249315836e-05, + "loss": 0.2446, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11999189108610153, + "step": 2195, + "valid_targets_mean": 4661.1, + "valid_targets_min": 700 + }, + { + "epoch": 3.197674418604651, + "grad_norm": 0.532187618449715, + "learning_rate": 2.6408765642847698e-05, + "loss": 0.2558, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1451462358236313, + "step": 2200, + "valid_targets_mean": 3591.5, + "valid_targets_min": 612 + }, + { + "epoch": 3.2049418604651163, + "grad_norm": 0.49122871352752695, + "learning_rate": 2.6340058851158788e-05, + "loss": 0.2538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12171737849712372, + "step": 2205, + "valid_targets_mean": 3890.9, + "valid_targets_min": 925 + }, + { + "epoch": 3.2122093023255816, + "grad_norm": 0.48972445158403766, + "learning_rate": 2.6271268776778032e-05, + "loss": 0.2412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13735152781009674, + "step": 2210, + "valid_targets_mean": 4153.9, + "valid_targets_min": 753 + }, + { + "epoch": 3.2194767441860463, + "grad_norm": 0.46822601101112704, + "learning_rate": 2.6202396323328357e-05, + "loss": 0.2435, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1055523157119751, + "step": 2215, + "valid_targets_mean": 3463.5, + "valid_targets_min": 497 + }, + { + "epoch": 3.2267441860465116, + "grad_norm": 0.5416109151148425, + "learning_rate": 2.6133442395514833e-05, + "loss": 0.2612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1550188809633255, + "step": 2220, + "valid_targets_mean": 3869.2, + "valid_targets_min": 313 + }, + { + "epoch": 3.234011627906977, + "grad_norm": 0.4642704975191872, + "learning_rate": 2.606440789911276e-05, + "loss": 0.2476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10453011095523834, + "step": 2225, + "valid_targets_mean": 3596.6, + "valid_targets_min": 605 + }, + { + "epoch": 3.241279069767442, + "grad_norm": 0.4661250426831108, + "learning_rate": 2.599529374095578e-05, + "loss": 0.2508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.135253444314003, + "step": 2230, + "valid_targets_mean": 4895.1, + "valid_targets_min": 684 + }, + { + "epoch": 3.248546511627907, + "grad_norm": 0.5039451072942192, + "learning_rate": 2.5926100828923985e-05, + "loss": 0.2435, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12384626269340515, + "step": 2235, + "valid_targets_mean": 4168.6, + "valid_targets_min": 805 + }, + { + "epoch": 3.255813953488372, + "grad_norm": 0.46502877427362305, + "learning_rate": 2.5856830071931944e-05, + "loss": 0.2583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11560146510601044, + "step": 2240, + "valid_targets_mean": 4750.0, + "valid_targets_min": 4164 + }, + { + "epoch": 3.2630813953488373, + "grad_norm": 0.5425286991972637, + "learning_rate": 2.578748237991682e-05, + "loss": 0.2588, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13858449459075928, + "step": 2245, + "valid_targets_mean": 3679.4, + "valid_targets_min": 850 + }, + { + "epoch": 3.270348837209302, + "grad_norm": 0.5496468651661625, + "learning_rate": 2.571805866382638e-05, + "loss": 0.2553, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.108754001557827, + "step": 2250, + "valid_targets_mean": 3159.8, + "valid_targets_min": 818 + }, + { + "epoch": 3.2776162790697674, + "grad_norm": 0.4963269407533553, + "learning_rate": 2.5648559835607047e-05, + "loss": 0.245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1490362286567688, + "step": 2255, + "valid_targets_mean": 4346.9, + "valid_targets_min": 560 + }, + { + "epoch": 3.2848837209302326, + "grad_norm": 0.5040364962140519, + "learning_rate": 2.5578986808191904e-05, + "loss": 0.2502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13290151953697205, + "step": 2260, + "valid_targets_mean": 4253.6, + "valid_targets_min": 882 + }, + { + "epoch": 3.292151162790698, + "grad_norm": 0.517643889938687, + "learning_rate": 2.5509340495488707e-05, + "loss": 0.2443, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18137292563915253, + "step": 2265, + "valid_targets_mean": 4854.5, + "valid_targets_min": 4012 + }, + { + "epoch": 3.2994186046511627, + "grad_norm": 0.5599724940409381, + "learning_rate": 2.5439621812367907e-05, + "loss": 0.2558, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09955041110515594, + "step": 2270, + "valid_targets_mean": 2542.8, + "valid_targets_min": 471 + }, + { + "epoch": 3.306686046511628, + "grad_norm": 0.42419128765888825, + "learning_rate": 2.5369831674650572e-05, + "loss": 0.2504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12508805096149445, + "step": 2275, + "valid_targets_mean": 5078.4, + "valid_targets_min": 714 + }, + { + "epoch": 3.313953488372093, + "grad_norm": 0.524949050763777, + "learning_rate": 2.529997099909643e-05, + "loss": 0.2542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14017917215824127, + "step": 2280, + "valid_targets_mean": 3980.0, + "valid_targets_min": 867 + }, + { + "epoch": 3.321220930232558, + "grad_norm": 0.44629089394261284, + "learning_rate": 2.5230040703391775e-05, + "loss": 0.2394, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09534334391355515, + "step": 2285, + "valid_targets_mean": 4047.8, + "valid_targets_min": 3198 + }, + { + "epoch": 3.328488372093023, + "grad_norm": 0.5471725041255697, + "learning_rate": 2.5160041706137424e-05, + "loss": 0.2497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13267022371292114, + "step": 2290, + "valid_targets_mean": 4100.4, + "valid_targets_min": 988 + }, + { + "epoch": 3.3357558139534884, + "grad_norm": 0.47706431071080707, + "learning_rate": 2.508997492683666e-05, + "loss": 0.2372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1036720871925354, + "step": 2295, + "valid_targets_mean": 3625.9, + "valid_targets_min": 766 + }, + { + "epoch": 3.3430232558139537, + "grad_norm": 0.46979061384683135, + "learning_rate": 2.5019841285883143e-05, + "loss": 0.2378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14231424033641815, + "step": 2300, + "valid_targets_mean": 4415.0, + "valid_targets_min": 746 + }, + { + "epoch": 3.3502906976744184, + "grad_norm": 0.46840855749044336, + "learning_rate": 2.4949641704548834e-05, + "loss": 0.2378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10291890799999237, + "step": 2305, + "valid_targets_mean": 3752.5, + "valid_targets_min": 594 + }, + { + "epoch": 3.3575581395348837, + "grad_norm": 0.4787805064264873, + "learning_rate": 2.4879377104971863e-05, + "loss": 0.2462, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11151669174432755, + "step": 2310, + "valid_targets_mean": 4007.1, + "valid_targets_min": 570 + }, + { + "epoch": 3.364825581395349, + "grad_norm": 0.5134464582083824, + "learning_rate": 2.4809048410144467e-05, + "loss": 0.2461, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.136276975274086, + "step": 2315, + "valid_targets_mean": 3642.0, + "valid_targets_min": 671 + }, + { + "epoch": 3.3720930232558137, + "grad_norm": 0.4545347794500175, + "learning_rate": 2.4738656543900808e-05, + "loss": 0.2436, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10940147936344147, + "step": 2320, + "valid_targets_mean": 3730.6, + "valid_targets_min": 953 + }, + { + "epoch": 3.379360465116279, + "grad_norm": 0.46413507712775337, + "learning_rate": 2.4668202430904872e-05, + "loss": 0.2469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12754136323928833, + "step": 2325, + "valid_targets_mean": 4426.5, + "valid_targets_min": 1194 + }, + { + "epoch": 3.386627906976744, + "grad_norm": 0.49443156483084405, + "learning_rate": 2.4597686996638334e-05, + "loss": 0.2584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11726079881191254, + "step": 2330, + "valid_targets_mean": 3938.8, + "valid_targets_min": 800 + }, + { + "epoch": 3.3938953488372094, + "grad_norm": 0.4885507906087015, + "learning_rate": 2.452711116738834e-05, + "loss": 0.2442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10095968097448349, + "step": 2335, + "valid_targets_mean": 3261.5, + "valid_targets_min": 626 + }, + { + "epoch": 3.4011627906976742, + "grad_norm": 0.5343896962809889, + "learning_rate": 2.4456475870235433e-05, + "loss": 0.2468, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16157086193561554, + "step": 2340, + "valid_targets_mean": 3889.4, + "valid_targets_min": 796 + }, + { + "epoch": 3.4084302325581395, + "grad_norm": 0.4808257217663416, + "learning_rate": 2.4385782033041282e-05, + "loss": 0.2333, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09912088513374329, + "step": 2345, + "valid_targets_mean": 3184.6, + "valid_targets_min": 726 + }, + { + "epoch": 3.4156976744186047, + "grad_norm": 0.48604724440506825, + "learning_rate": 2.431503058443655e-05, + "loss": 0.2453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11162015795707703, + "step": 2350, + "valid_targets_mean": 3422.1, + "valid_targets_min": 667 + }, + { + "epoch": 3.4229651162790695, + "grad_norm": 0.4864948711793094, + "learning_rate": 2.4244222453808694e-05, + "loss": 0.2571, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1151517927646637, + "step": 2355, + "valid_targets_mean": 4209.6, + "valid_targets_min": 857 + }, + { + "epoch": 3.4302325581395348, + "grad_norm": 0.5073998403953838, + "learning_rate": 2.4173358571289716e-05, + "loss": 0.2532, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16314566135406494, + "step": 2360, + "valid_targets_mean": 4723.2, + "valid_targets_min": 810 + }, + { + "epoch": 3.4375, + "grad_norm": 0.543424876760199, + "learning_rate": 2.4102439867743995e-05, + "loss": 0.2477, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09115774184465408, + "step": 2365, + "valid_targets_mean": 2121.8, + "valid_targets_min": 586 + }, + { + "epoch": 3.4447674418604652, + "grad_norm": 0.43803750016335147, + "learning_rate": 2.4031467274756026e-05, + "loss": 0.2402, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13034743070602417, + "step": 2370, + "valid_targets_mean": 4575.1, + "valid_targets_min": 1213 + }, + { + "epoch": 3.4520348837209305, + "grad_norm": 1.022884642404884, + "learning_rate": 2.3960441724618195e-05, + "loss": 0.2536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10168780386447906, + "step": 2375, + "valid_targets_mean": 3031.5, + "valid_targets_min": 627 + }, + { + "epoch": 3.4593023255813953, + "grad_norm": 0.47224341467442843, + "learning_rate": 2.3889364150318523e-05, + "loss": 0.2593, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11175704002380371, + "step": 2380, + "valid_targets_mean": 4139.5, + "valid_targets_min": 790 + }, + { + "epoch": 3.4665697674418605, + "grad_norm": 0.5537350873058593, + "learning_rate": 2.3818235485528438e-05, + "loss": 0.2612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09244590997695923, + "step": 2385, + "valid_targets_mean": 2139.6, + "valid_targets_min": 626 + }, + { + "epoch": 3.4738372093023258, + "grad_norm": 0.48412371898831763, + "learning_rate": 2.374705666459046e-05, + "loss": 0.2562, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11267127841711044, + "step": 2390, + "valid_targets_mean": 3562.9, + "valid_targets_min": 693 + }, + { + "epoch": 3.4811046511627906, + "grad_norm": 0.5112459040365906, + "learning_rate": 2.367582862250599e-05, + "loss": 0.2451, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11210276186466217, + "step": 2395, + "valid_targets_mean": 3276.8, + "valid_targets_min": 591 + }, + { + "epoch": 3.488372093023256, + "grad_norm": 0.4904007106936307, + "learning_rate": 2.3604552294922974e-05, + "loss": 0.2473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12592178583145142, + "step": 2400, + "valid_targets_mean": 4156.2, + "valid_targets_min": 805 + }, + { + "epoch": 3.495639534883721, + "grad_norm": 0.5533230425308654, + "learning_rate": 2.353322861812364e-05, + "loss": 0.2542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10514762997627258, + "step": 2405, + "valid_targets_mean": 2583.9, + "valid_targets_min": 246 + }, + { + "epoch": 3.5029069767441863, + "grad_norm": 0.4769266460610174, + "learning_rate": 2.346185852901219e-05, + "loss": 0.2423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10916031897068024, + "step": 2410, + "valid_targets_mean": 3479.1, + "valid_targets_min": 807 + }, + { + "epoch": 3.510174418604651, + "grad_norm": 0.4891779772353748, + "learning_rate": 2.3390442965102503e-05, + "loss": 0.2449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1259600967168808, + "step": 2415, + "valid_targets_mean": 3885.2, + "valid_targets_min": 650 + }, + { + "epoch": 3.5174418604651163, + "grad_norm": 0.45544094475272356, + "learning_rate": 2.3318982864505806e-05, + "loss": 0.2572, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11584748327732086, + "step": 2420, + "valid_targets_mean": 4185.8, + "valid_targets_min": 480 + }, + { + "epoch": 3.5247093023255816, + "grad_norm": 0.4766624640907669, + "learning_rate": 2.324747916591836e-05, + "loss": 0.252, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13836249709129333, + "step": 2425, + "valid_targets_mean": 4399.9, + "valid_targets_min": 1044 + }, + { + "epoch": 3.5319767441860463, + "grad_norm": 0.4650029600411716, + "learning_rate": 2.317593280860913e-05, + "loss": 0.2333, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12125978618860245, + "step": 2430, + "valid_targets_mean": 4253.5, + "valid_targets_min": 635 + }, + { + "epoch": 3.5392441860465116, + "grad_norm": 0.5028829055009032, + "learning_rate": 2.3104344732407436e-05, + "loss": 0.2545, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11511494219303131, + "step": 2435, + "valid_targets_mean": 3393.1, + "valid_targets_min": 986 + }, + { + "epoch": 3.546511627906977, + "grad_norm": 0.5174631620772431, + "learning_rate": 2.3032715877690622e-05, + "loss": 0.2392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1459505259990692, + "step": 2440, + "valid_targets_mean": 4194.1, + "valid_targets_min": 750 + }, + { + "epoch": 3.553779069767442, + "grad_norm": 0.46778152593245964, + "learning_rate": 2.296104718537169e-05, + "loss": 0.2527, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16303589940071106, + "step": 2445, + "valid_targets_mean": 5621.5, + "valid_targets_min": 516 + }, + { + "epoch": 3.561046511627907, + "grad_norm": 0.5035401466652747, + "learning_rate": 2.2889339596886958e-05, + "loss": 0.2504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12182118743658066, + "step": 2450, + "valid_targets_mean": 4179.8, + "valid_targets_min": 711 + }, + { + "epoch": 3.568313953488372, + "grad_norm": 0.4949500654402859, + "learning_rate": 2.2817594054183675e-05, + "loss": 0.2423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12568366527557373, + "step": 2455, + "valid_targets_mean": 4049.9, + "valid_targets_min": 792 + }, + { + "epoch": 3.5755813953488373, + "grad_norm": 0.44379194690985047, + "learning_rate": 2.2745811499707645e-05, + "loss": 0.2575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10165221989154816, + "step": 2460, + "valid_targets_mean": 3895.0, + "valid_targets_min": 702 + }, + { + "epoch": 3.582848837209302, + "grad_norm": 0.4609499146509756, + "learning_rate": 2.267399287639088e-05, + "loss": 0.2395, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13126936554908752, + "step": 2465, + "valid_targets_mean": 4307.1, + "valid_targets_min": 805 + }, + { + "epoch": 3.5901162790697674, + "grad_norm": 0.4231243658860352, + "learning_rate": 2.260213912763917e-05, + "loss": 0.2458, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1243281364440918, + "step": 2470, + "valid_targets_mean": 4944.2, + "valid_targets_min": 2949 + }, + { + "epoch": 3.5973837209302326, + "grad_norm": 0.47982014413778806, + "learning_rate": 2.2530251197319723e-05, + "loss": 0.2409, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.108958899974823, + "step": 2475, + "valid_targets_mean": 3449.6, + "valid_targets_min": 493 + }, + { + "epoch": 3.604651162790698, + "grad_norm": 0.45227044431491636, + "learning_rate": 2.2458330029748736e-05, + "loss": 0.2439, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14309027791023254, + "step": 2480, + "valid_targets_mean": 4818.6, + "valid_targets_min": 3806 + }, + { + "epoch": 3.6119186046511627, + "grad_norm": 0.5649909883102694, + "learning_rate": 2.2386376569679036e-05, + "loss": 0.2604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.173073410987854, + "step": 2485, + "valid_targets_mean": 4637.1, + "valid_targets_min": 3900 + }, + { + "epoch": 3.619186046511628, + "grad_norm": 0.47610124778582524, + "learning_rate": 2.231439176228763e-05, + "loss": 0.2457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1240810751914978, + "step": 2490, + "valid_targets_mean": 3676.1, + "valid_targets_min": 595 + }, + { + "epoch": 3.626453488372093, + "grad_norm": 0.46700625613611785, + "learning_rate": 2.2242376553163286e-05, + "loss": 0.2473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14469364285469055, + "step": 2495, + "valid_targets_mean": 4407.5, + "valid_targets_min": 620 + }, + { + "epoch": 3.633720930232558, + "grad_norm": 0.4756222601783613, + "learning_rate": 2.217033188829416e-05, + "loss": 0.2541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1307680308818817, + "step": 2500, + "valid_targets_mean": 4861.6, + "valid_targets_min": 640 + }, + { + "epoch": 3.640988372093023, + "grad_norm": 0.5348602172378426, + "learning_rate": 2.2098258714055303e-05, + "loss": 0.2523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16622218489646912, + "step": 2505, + "valid_targets_mean": 3662.9, + "valid_targets_min": 744 + }, + { + "epoch": 3.6482558139534884, + "grad_norm": 0.493726960795882, + "learning_rate": 2.20261579771963e-05, + "loss": 0.2436, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13390150666236877, + "step": 2510, + "valid_targets_mean": 3553.0, + "valid_targets_min": 556 + }, + { + "epoch": 3.6555232558139537, + "grad_norm": 0.48859839931015386, + "learning_rate": 2.1954030624828757e-05, + "loss": 0.2532, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14289015531539917, + "step": 2515, + "valid_targets_mean": 4269.1, + "valid_targets_min": 690 + }, + { + "epoch": 3.6627906976744184, + "grad_norm": 0.46297329620447086, + "learning_rate": 2.1881877604413927e-05, + "loss": 0.2406, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1050468236207962, + "step": 2520, + "valid_targets_mean": 3698.4, + "valid_targets_min": 760 + }, + { + "epoch": 3.6700581395348837, + "grad_norm": 0.5273537998892968, + "learning_rate": 2.1809699863750236e-05, + "loss": 0.2497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14382243156433105, + "step": 2525, + "valid_targets_mean": 3749.8, + "valid_targets_min": 626 + }, + { + "epoch": 3.677325581395349, + "grad_norm": 0.4888083234986187, + "learning_rate": 2.1737498350960825e-05, + "loss": 0.2353, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12603764235973358, + "step": 2530, + "valid_targets_mean": 3474.1, + "valid_targets_min": 532 + }, + { + "epoch": 3.6845930232558137, + "grad_norm": 0.5194974843269244, + "learning_rate": 2.1665274014481112e-05, + "loss": 0.2528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13663990795612335, + "step": 2535, + "valid_targets_mean": 3533.2, + "valid_targets_min": 661 + }, + { + "epoch": 3.691860465116279, + "grad_norm": 0.48415859865118777, + "learning_rate": 2.159302780304631e-05, + "loss": 0.2494, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14904913306236267, + "step": 2540, + "valid_targets_mean": 5181.0, + "valid_targets_min": 3993 + }, + { + "epoch": 3.699127906976744, + "grad_norm": 0.5174902951602472, + "learning_rate": 2.152076066567901e-05, + "loss": 0.2538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09818239510059357, + "step": 2545, + "valid_targets_mean": 3285.4, + "valid_targets_min": 477 + }, + { + "epoch": 3.7063953488372094, + "grad_norm": 0.45632633047088905, + "learning_rate": 2.1448473551676644e-05, + "loss": 0.2427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12770359218120575, + "step": 2550, + "valid_targets_mean": 3832.9, + "valid_targets_min": 584 + }, + { + "epoch": 3.7136627906976747, + "grad_norm": 0.49662233543481427, + "learning_rate": 2.13761674105991e-05, + "loss": 0.2523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14436696469783783, + "step": 2555, + "valid_targets_mean": 4149.1, + "valid_targets_min": 544 + }, + { + "epoch": 3.7209302325581395, + "grad_norm": 0.5614542258504909, + "learning_rate": 2.130384319225617e-05, + "loss": 0.2478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1258370727300644, + "step": 2560, + "valid_targets_mean": 2940.0, + "valid_targets_min": 689 + }, + { + "epoch": 3.7281976744186047, + "grad_norm": 0.45572529846881993, + "learning_rate": 2.1231501846695128e-05, + "loss": 0.2399, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11863773316144943, + "step": 2565, + "valid_targets_mean": 3968.6, + "valid_targets_min": 620 + }, + { + "epoch": 3.7354651162790695, + "grad_norm": 0.6015378265452319, + "learning_rate": 2.115914432418822e-05, + "loss": 0.2729, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11300826072692871, + "step": 2570, + "valid_targets_mean": 2512.4, + "valid_targets_min": 305 + }, + { + "epoch": 3.7427325581395348, + "grad_norm": 0.4742783086567257, + "learning_rate": 2.1086771575220203e-05, + "loss": 0.25, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11191293597221375, + "step": 2575, + "valid_targets_mean": 3764.5, + "valid_targets_min": 692 + }, + { + "epoch": 3.75, + "grad_norm": 0.616691924655716, + "learning_rate": 2.1014384550475836e-05, + "loss": 0.2516, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12944871187210083, + "step": 2580, + "valid_targets_mean": 3347.6, + "valid_targets_min": 559 + }, + { + "epoch": 3.7572674418604652, + "grad_norm": 0.5232814265115104, + "learning_rate": 2.0941984200827402e-05, + "loss": 0.2503, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0992535650730133, + "step": 2585, + "valid_targets_mean": 2465.5, + "valid_targets_min": 607 + }, + { + "epoch": 3.7645348837209305, + "grad_norm": 0.5258965525649895, + "learning_rate": 2.0869571477322244e-05, + "loss": 0.2449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13085459172725677, + "step": 2590, + "valid_targets_mean": 3935.5, + "valid_targets_min": 763 + }, + { + "epoch": 3.7718023255813953, + "grad_norm": 0.44500756957595106, + "learning_rate": 2.079714733117021e-05, + "loss": 0.2455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11444681882858276, + "step": 2595, + "valid_targets_mean": 3957.0, + "valid_targets_min": 550 + }, + { + "epoch": 3.7790697674418605, + "grad_norm": 0.5892004703984094, + "learning_rate": 2.0724712713731226e-05, + "loss": 0.2485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15046679973602295, + "step": 2600, + "valid_targets_mean": 4229.1, + "valid_targets_min": 580 + }, + { + "epoch": 3.7863372093023253, + "grad_norm": 0.47684359150895883, + "learning_rate": 2.065226857650275e-05, + "loss": 0.2429, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1092211902141571, + "step": 2605, + "valid_targets_mean": 3531.9, + "valid_targets_min": 605 + }, + { + "epoch": 3.7936046511627906, + "grad_norm": 0.5363383293906026, + "learning_rate": 2.0579815871107304e-05, + "loss": 0.2548, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16444739699363708, + "step": 2610, + "valid_targets_mean": 4181.2, + "valid_targets_min": 1082 + }, + { + "epoch": 3.800872093023256, + "grad_norm": 0.526393012658088, + "learning_rate": 2.0507355549279948e-05, + "loss": 0.2563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.123906210064888, + "step": 2615, + "valid_targets_mean": 3755.0, + "valid_targets_min": 623 + }, + { + "epoch": 3.808139534883721, + "grad_norm": 0.5722494539766595, + "learning_rate": 2.04348885628558e-05, + "loss": 0.2539, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10296878218650818, + "step": 2620, + "valid_targets_mean": 2965.0, + "valid_targets_min": 762 + }, + { + "epoch": 3.8154069767441863, + "grad_norm": 0.48156563178261913, + "learning_rate": 2.036241586375753e-05, + "loss": 0.2587, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11730596423149109, + "step": 2625, + "valid_targets_mean": 3191.0, + "valid_targets_min": 784 + }, + { + "epoch": 3.822674418604651, + "grad_norm": 0.45226777167929455, + "learning_rate": 2.0289938403982834e-05, + "loss": 0.2573, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12582166492938995, + "step": 2630, + "valid_targets_mean": 4109.1, + "valid_targets_min": 440 + }, + { + "epoch": 3.8299418604651163, + "grad_norm": 0.5165198595050065, + "learning_rate": 2.0217457135591957e-05, + "loss": 0.2504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12305717170238495, + "step": 2635, + "valid_targets_mean": 3467.5, + "valid_targets_min": 672 + }, + { + "epoch": 3.8372093023255816, + "grad_norm": 0.4587107429102401, + "learning_rate": 2.0144973010695157e-05, + "loss": 0.2381, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11890917271375656, + "step": 2640, + "valid_targets_mean": 4045.8, + "valid_targets_min": 497 + }, + { + "epoch": 3.8444767441860463, + "grad_norm": 0.4206311855873854, + "learning_rate": 2.0072486981440237e-05, + "loss": 0.2644, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1283896565437317, + "step": 2645, + "valid_targets_mean": 5033.2, + "valid_targets_min": 430 + }, + { + "epoch": 3.8517441860465116, + "grad_norm": 0.5199399403871814, + "learning_rate": 2e-05, + "loss": 0.2434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13101065158843994, + "step": 2650, + "valid_targets_mean": 4126.1, + "valid_targets_min": 623 + }, + { + "epoch": 3.859011627906977, + "grad_norm": 0.530627097236436, + "learning_rate": 1.9927513018559767e-05, + "loss": 0.2588, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14129270613193512, + "step": 2655, + "valid_targets_mean": 3838.5, + "valid_targets_min": 646 + }, + { + "epoch": 3.866279069767442, + "grad_norm": 0.5182581748716937, + "learning_rate": 1.985502698930485e-05, + "loss": 0.2484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11419911682605743, + "step": 2660, + "valid_targets_mean": 3422.4, + "valid_targets_min": 470 + }, + { + "epoch": 3.873546511627907, + "grad_norm": 0.637313644513782, + "learning_rate": 1.978254286440805e-05, + "loss": 0.256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13794487714767456, + "step": 2665, + "valid_targets_mean": 2414.5, + "valid_targets_min": 486 + }, + { + "epoch": 3.880813953488372, + "grad_norm": 0.5972117317746191, + "learning_rate": 1.9710061596017172e-05, + "loss": 0.2522, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10965625196695328, + "step": 2670, + "valid_targets_mean": 2143.2, + "valid_targets_min": 343 + }, + { + "epoch": 3.8880813953488373, + "grad_norm": 0.49321698475016185, + "learning_rate": 1.9637584136242474e-05, + "loss": 0.2443, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11436263471841812, + "step": 2675, + "valid_targets_mean": 3455.9, + "valid_targets_min": 834 + }, + { + "epoch": 3.895348837209302, + "grad_norm": 0.45465791335008415, + "learning_rate": 1.9565111437144204e-05, + "loss": 0.2456, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12260943651199341, + "step": 2680, + "valid_targets_mean": 3845.8, + "valid_targets_min": 649 + }, + { + "epoch": 3.9026162790697674, + "grad_norm": 0.4530349993977751, + "learning_rate": 1.949264445072006e-05, + "loss": 0.2362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1056285873055458, + "step": 2685, + "valid_targets_mean": 4397.9, + "valid_targets_min": 3450 + }, + { + "epoch": 3.9098837209302326, + "grad_norm": 0.5537681968399243, + "learning_rate": 1.9420184128892702e-05, + "loss": 0.2542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1130150854587555, + "step": 2690, + "valid_targets_mean": 2776.1, + "valid_targets_min": 590 + }, + { + "epoch": 3.917151162790698, + "grad_norm": 0.49150433914817926, + "learning_rate": 1.9347731423497255e-05, + "loss": 0.2389, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10140958428382874, + "step": 2695, + "valid_targets_mean": 2692.9, + "valid_targets_min": 264 + }, + { + "epoch": 3.9244186046511627, + "grad_norm": 0.463733543118443, + "learning_rate": 1.927528728626878e-05, + "loss": 0.2448, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08858418464660645, + "step": 2700, + "valid_targets_mean": 3079.4, + "valid_targets_min": 605 + }, + { + "epoch": 3.931686046511628, + "grad_norm": 0.43522051270813206, + "learning_rate": 1.9202852668829796e-05, + "loss": 0.2359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10103869438171387, + "step": 2705, + "valid_targets_mean": 3345.5, + "valid_targets_min": 355 + }, + { + "epoch": 3.938953488372093, + "grad_norm": 0.529140425090723, + "learning_rate": 1.9130428522677762e-05, + "loss": 0.26, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13211511075496674, + "step": 2710, + "valid_targets_mean": 3707.6, + "valid_targets_min": 200 + }, + { + "epoch": 3.946220930232558, + "grad_norm": 0.5146828971465376, + "learning_rate": 1.90580157991726e-05, + "loss": 0.2491, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16741503775119781, + "step": 2715, + "valid_targets_mean": 4682.1, + "valid_targets_min": 599 + }, + { + "epoch": 3.953488372093023, + "grad_norm": 0.48420609335106185, + "learning_rate": 1.898561544952417e-05, + "loss": 0.2498, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10657159984111786, + "step": 2720, + "valid_targets_mean": 4151.6, + "valid_targets_min": 806 + }, + { + "epoch": 3.9607558139534884, + "grad_norm": 0.5015110455391134, + "learning_rate": 1.8913228424779807e-05, + "loss": 0.2591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1371941715478897, + "step": 2725, + "valid_targets_mean": 4309.9, + "valid_targets_min": 454 + }, + { + "epoch": 3.9680232558139537, + "grad_norm": 0.4908011439298694, + "learning_rate": 1.8840855675811788e-05, + "loss": 0.2475, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12342453002929688, + "step": 2730, + "valid_targets_mean": 3556.0, + "valid_targets_min": 417 + }, + { + "epoch": 3.9752906976744184, + "grad_norm": 0.5588263603143331, + "learning_rate": 1.876849815330488e-05, + "loss": 0.2443, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13135354220867157, + "step": 2735, + "valid_targets_mean": 2600.1, + "valid_targets_min": 418 + }, + { + "epoch": 3.9825581395348837, + "grad_norm": 0.47777874175500507, + "learning_rate": 1.869615680774384e-05, + "loss": 0.2465, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12580084800720215, + "step": 2740, + "valid_targets_mean": 4400.2, + "valid_targets_min": 3574 + }, + { + "epoch": 3.989825581395349, + "grad_norm": 0.44558810186639114, + "learning_rate": 1.862383258940091e-05, + "loss": 0.2506, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13136231899261475, + "step": 2745, + "valid_targets_mean": 4948.2, + "valid_targets_min": 3743 + }, + { + "epoch": 3.9970930232558137, + "grad_norm": 0.5080444419554163, + "learning_rate": 1.8551526448323366e-05, + "loss": 0.2565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12025821208953857, + "step": 2750, + "valid_targets_mean": 3291.0, + "valid_targets_min": 752 + }, + { + "epoch": 4.004360465116279, + "grad_norm": 0.48352137501238807, + "learning_rate": 1.8479239334321005e-05, + "loss": 0.2407, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09484989941120148, + "step": 2755, + "valid_targets_mean": 2745.0, + "valid_targets_min": 578 + }, + { + "epoch": 4.011627906976744, + "grad_norm": 0.47367899779143086, + "learning_rate": 1.84069721969537e-05, + "loss": 0.2441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1370040476322174, + "step": 2760, + "valid_targets_mean": 4967.2, + "valid_targets_min": 3396 + }, + { + "epoch": 4.0188953488372094, + "grad_norm": 0.54231002499266, + "learning_rate": 1.8334725985518898e-05, + "loss": 0.2303, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13151651620864868, + "step": 2765, + "valid_targets_mean": 3523.4, + "valid_targets_min": 542 + }, + { + "epoch": 4.026162790697675, + "grad_norm": 0.5238485007583942, + "learning_rate": 1.8262501649039178e-05, + "loss": 0.2244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12579235434532166, + "step": 2770, + "valid_targets_mean": 4157.2, + "valid_targets_min": 742 + }, + { + "epoch": 4.03343023255814, + "grad_norm": 0.45102153752627877, + "learning_rate": 1.819030013624977e-05, + "loss": 0.2225, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1172993928194046, + "step": 2775, + "valid_targets_mean": 4752.1, + "valid_targets_min": 946 + }, + { + "epoch": 4.040697674418604, + "grad_norm": 0.5285857229676709, + "learning_rate": 1.8118122395586076e-05, + "loss": 0.2299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10665304958820343, + "step": 2780, + "valid_targets_mean": 3128.1, + "valid_targets_min": 343 + }, + { + "epoch": 4.0479651162790695, + "grad_norm": 0.5909177336340645, + "learning_rate": 1.8045969375171257e-05, + "loss": 0.2274, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08512631803750992, + "step": 2785, + "valid_targets_mean": 2385.5, + "valid_targets_min": 587 + }, + { + "epoch": 4.055232558139535, + "grad_norm": 0.5095522606610547, + "learning_rate": 1.797384202280371e-05, + "loss": 0.2341, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1267683058977127, + "step": 2790, + "valid_targets_mean": 4405.8, + "valid_targets_min": 900 + }, + { + "epoch": 4.0625, + "grad_norm": 0.5144409488432192, + "learning_rate": 1.7901741285944703e-05, + "loss": 0.2428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08859217166900635, + "step": 2795, + "valid_targets_mean": 2686.4, + "valid_targets_min": 547 + }, + { + "epoch": 4.069767441860465, + "grad_norm": 0.4876395280578797, + "learning_rate": 1.782966811170585e-05, + "loss": 0.2292, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10345523804426193, + "step": 2800, + "valid_targets_mean": 3438.8, + "valid_targets_min": 577 + }, + { + "epoch": 4.0770348837209305, + "grad_norm": 0.5231245613946751, + "learning_rate": 1.7757623446836718e-05, + "loss": 0.2364, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11259502172470093, + "step": 2805, + "valid_targets_mean": 3414.0, + "valid_targets_min": 632 + }, + { + "epoch": 4.084302325581396, + "grad_norm": 0.6811959784490158, + "learning_rate": 1.768560823771238e-05, + "loss": 0.2415, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12798871099948883, + "step": 2810, + "valid_targets_mean": 2597.0, + "valid_targets_min": 567 + }, + { + "epoch": 4.09156976744186, + "grad_norm": 0.48633463675024335, + "learning_rate": 1.761362343032097e-05, + "loss": 0.2312, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1147952675819397, + "step": 2815, + "valid_targets_mean": 4718.0, + "valid_targets_min": 727 + }, + { + "epoch": 4.098837209302325, + "grad_norm": 0.49148880429117153, + "learning_rate": 1.754166997025127e-05, + "loss": 0.2305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10578566789627075, + "step": 2820, + "valid_targets_mean": 3059.4, + "valid_targets_min": 441 + }, + { + "epoch": 4.1061046511627906, + "grad_norm": 0.5393816683284496, + "learning_rate": 1.7469748802680284e-05, + "loss": 0.2419, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1297258585691452, + "step": 2825, + "valid_targets_mean": 3596.8, + "valid_targets_min": 427 + }, + { + "epoch": 4.113372093023256, + "grad_norm": 0.47219143164141136, + "learning_rate": 1.739786087236083e-05, + "loss": 0.2266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10674222558736801, + "step": 2830, + "valid_targets_mean": 3580.6, + "valid_targets_min": 509 + }, + { + "epoch": 4.120639534883721, + "grad_norm": 0.6061970319339015, + "learning_rate": 1.7326007123609123e-05, + "loss": 0.2294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1318361610174179, + "step": 2835, + "valid_targets_mean": 4211.0, + "valid_targets_min": 568 + }, + { + "epoch": 4.127906976744186, + "grad_norm": 0.5711398358480915, + "learning_rate": 1.7254188500292355e-05, + "loss": 0.2492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10918092727661133, + "step": 2840, + "valid_targets_mean": 2878.5, + "valid_targets_min": 728 + }, + { + "epoch": 4.1351744186046515, + "grad_norm": 0.5016679889674767, + "learning_rate": 1.718240594581633e-05, + "loss": 0.2389, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11699994653463364, + "step": 2845, + "valid_targets_mean": 4029.2, + "valid_targets_min": 837 + }, + { + "epoch": 4.142441860465116, + "grad_norm": 0.5587655319921159, + "learning_rate": 1.7110660403113045e-05, + "loss": 0.2469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12280265986919403, + "step": 2850, + "valid_targets_mean": 3420.0, + "valid_targets_min": 608 + }, + { + "epoch": 4.149709302325581, + "grad_norm": 0.5700563652644026, + "learning_rate": 1.7038952814628312e-05, + "loss": 0.2403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09359048306941986, + "step": 2855, + "valid_targets_mean": 2218.0, + "valid_targets_min": 433 + }, + { + "epoch": 4.156976744186046, + "grad_norm": 0.5315568391996941, + "learning_rate": 1.6967284122309385e-05, + "loss": 0.2374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10375300794839859, + "step": 2860, + "valid_targets_mean": 2193.4, + "valid_targets_min": 603 + }, + { + "epoch": 4.164244186046512, + "grad_norm": 0.46104529117658105, + "learning_rate": 1.6895655267592567e-05, + "loss": 0.2343, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13019639253616333, + "step": 2865, + "valid_targets_mean": 5004.2, + "valid_targets_min": 4129 + }, + { + "epoch": 4.171511627906977, + "grad_norm": 0.5017086926945395, + "learning_rate": 1.6824067191390872e-05, + "loss": 0.238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12526315450668335, + "step": 2870, + "valid_targets_mean": 3909.9, + "valid_targets_min": 665 + }, + { + "epoch": 4.178779069767442, + "grad_norm": 0.4971611429298911, + "learning_rate": 1.675252083408164e-05, + "loss": 0.2295, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10105648636817932, + "step": 2875, + "valid_targets_mean": 3390.0, + "valid_targets_min": 810 + }, + { + "epoch": 4.186046511627907, + "grad_norm": 0.63719445496211, + "learning_rate": 1.6681017135494194e-05, + "loss": 0.2324, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12060322612524033, + "step": 2880, + "valid_targets_mean": 2759.0, + "valid_targets_min": 700 + }, + { + "epoch": 4.1933139534883725, + "grad_norm": 0.6313240833284652, + "learning_rate": 1.66095570348975e-05, + "loss": 0.2362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14655247330665588, + "step": 2885, + "valid_targets_mean": 3164.0, + "valid_targets_min": 688 + }, + { + "epoch": 4.200581395348837, + "grad_norm": 0.48700081990187133, + "learning_rate": 1.653814147098781e-05, + "loss": 0.2374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11685729026794434, + "step": 2890, + "valid_targets_mean": 4256.5, + "valid_targets_min": 885 + }, + { + "epoch": 4.207848837209302, + "grad_norm": 0.6232894954120523, + "learning_rate": 1.6466771381876365e-05, + "loss": 0.2351, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11992734670639038, + "step": 2895, + "valid_targets_mean": 3321.0, + "valid_targets_min": 662 + }, + { + "epoch": 4.215116279069767, + "grad_norm": 0.4917083432054533, + "learning_rate": 1.639544770507703e-05, + "loss": 0.2366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12429793179035187, + "step": 2900, + "valid_targets_mean": 4300.4, + "valid_targets_min": 286 + }, + { + "epoch": 4.222383720930233, + "grad_norm": 0.5210145808392682, + "learning_rate": 1.6324171377494015e-05, + "loss": 0.2399, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11657124012708664, + "step": 2905, + "valid_targets_mean": 3128.8, + "valid_targets_min": 749 + }, + { + "epoch": 4.229651162790698, + "grad_norm": 0.5162901164850011, + "learning_rate": 1.6252943335409542e-05, + "loss": 0.233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10562814772129059, + "step": 2910, + "valid_targets_mean": 3470.4, + "valid_targets_min": 679 + }, + { + "epoch": 4.236918604651163, + "grad_norm": 0.564598628475888, + "learning_rate": 1.6181764514471566e-05, + "loss": 0.231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09816817939281464, + "step": 2915, + "valid_targets_mean": 2720.0, + "valid_targets_min": 545 + }, + { + "epoch": 4.2441860465116275, + "grad_norm": 0.5262034337491491, + "learning_rate": 1.611063584968148e-05, + "loss": 0.2266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14536155760288239, + "step": 2920, + "valid_targets_mean": 4571.6, + "valid_targets_min": 868 + }, + { + "epoch": 4.251453488372093, + "grad_norm": 0.5443324066129965, + "learning_rate": 1.6039558275381812e-05, + "loss": 0.243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08932915329933167, + "step": 2925, + "valid_targets_mean": 2445.5, + "valid_targets_min": 691 + }, + { + "epoch": 4.258720930232558, + "grad_norm": 0.524434173056238, + "learning_rate": 1.596853272524398e-05, + "loss": 0.2305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12700089812278748, + "step": 2930, + "valid_targets_mean": 4030.2, + "valid_targets_min": 459 + }, + { + "epoch": 4.265988372093023, + "grad_norm": 0.6159042780695054, + "learning_rate": 1.5897560132256008e-05, + "loss": 0.2348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09032024443149567, + "step": 2935, + "valid_targets_mean": 2270.6, + "valid_targets_min": 565 + }, + { + "epoch": 4.273255813953488, + "grad_norm": 0.5937855344203165, + "learning_rate": 1.582664142871029e-05, + "loss": 0.2459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1407282054424286, + "step": 2940, + "valid_targets_mean": 3228.5, + "valid_targets_min": 654 + }, + { + "epoch": 4.280523255813954, + "grad_norm": 0.544590296799247, + "learning_rate": 1.5755777546191313e-05, + "loss": 0.2202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08907799422740936, + "step": 2945, + "valid_targets_mean": 2940.5, + "valid_targets_min": 597 + }, + { + "epoch": 4.287790697674419, + "grad_norm": 0.5014969531374299, + "learning_rate": 1.5684969415563456e-05, + "loss": 0.2319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13449829816818237, + "step": 2950, + "valid_targets_mean": 3845.8, + "valid_targets_min": 527 + }, + { + "epoch": 4.295058139534884, + "grad_norm": 0.5494535644403714, + "learning_rate": 1.5614217966958725e-05, + "loss": 0.2385, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10309483855962753, + "step": 2955, + "valid_targets_mean": 2985.2, + "valid_targets_min": 698 + }, + { + "epoch": 4.3023255813953485, + "grad_norm": 0.4382128256233561, + "learning_rate": 1.554352412976457e-05, + "loss": 0.2318, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10149140655994415, + "step": 2960, + "valid_targets_mean": 4764.4, + "valid_targets_min": 3860 + }, + { + "epoch": 4.309593023255814, + "grad_norm": 0.5049094177590457, + "learning_rate": 1.5472888832611662e-05, + "loss": 0.2395, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13525943458080292, + "step": 2965, + "valid_targets_mean": 4419.8, + "valid_targets_min": 666 + }, + { + "epoch": 4.316860465116279, + "grad_norm": 0.6480364173889341, + "learning_rate": 1.5402313003361676e-05, + "loss": 0.2455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08497101068496704, + "step": 2970, + "valid_targets_mean": 1814.9, + "valid_targets_min": 676 + }, + { + "epoch": 4.324127906976744, + "grad_norm": 0.6067490783183469, + "learning_rate": 1.533179756909513e-05, + "loss": 0.2526, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20556490123271942, + "step": 2975, + "valid_targets_mean": 4119.9, + "valid_targets_min": 1004 + }, + { + "epoch": 4.3313953488372094, + "grad_norm": 0.6669582826595135, + "learning_rate": 1.52613434560992e-05, + "loss": 0.2355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10513143986463547, + "step": 2980, + "valid_targets_mean": 2029.0, + "valid_targets_min": 359 + }, + { + "epoch": 4.338662790697675, + "grad_norm": 0.4563801791059827, + "learning_rate": 1.519095158985554e-05, + "loss": 0.2357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1262395679950714, + "step": 2985, + "valid_targets_mean": 4772.6, + "valid_targets_min": 783 + }, + { + "epoch": 4.34593023255814, + "grad_norm": 0.4771548492793852, + "learning_rate": 1.512062289502814e-05, + "loss": 0.2331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1244804859161377, + "step": 2990, + "valid_targets_mean": 4250.5, + "valid_targets_min": 595 + }, + { + "epoch": 4.353197674418604, + "grad_norm": 0.4968998207030734, + "learning_rate": 1.5050358295451173e-05, + "loss": 0.2347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10179959237575531, + "step": 2995, + "valid_targets_mean": 2665.2, + "valid_targets_min": 553 + }, + { + "epoch": 4.3604651162790695, + "grad_norm": 0.5349989482615872, + "learning_rate": 1.4980158714116864e-05, + "loss": 0.2283, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12053326517343521, + "step": 3000, + "valid_targets_mean": 3655.0, + "valid_targets_min": 512 + }, + { + "epoch": 4.367732558139535, + "grad_norm": 0.45795082624788974, + "learning_rate": 1.4910025073163346e-05, + "loss": 0.2342, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11071191728115082, + "step": 3005, + "valid_targets_mean": 4073.0, + "valid_targets_min": 799 + }, + { + "epoch": 4.375, + "grad_norm": 0.5488486813247619, + "learning_rate": 1.4839958293862582e-05, + "loss": 0.2446, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11098312586545944, + "step": 3010, + "valid_targets_mean": 2934.9, + "valid_targets_min": 830 + }, + { + "epoch": 4.382267441860465, + "grad_norm": 0.4999364379943187, + "learning_rate": 1.4769959296608228e-05, + "loss": 0.2426, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11597448587417603, + "step": 3015, + "valid_targets_mean": 3370.1, + "valid_targets_min": 562 + }, + { + "epoch": 4.3895348837209305, + "grad_norm": 0.6797002456964418, + "learning_rate": 1.4700029000903575e-05, + "loss": 0.2412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12890125811100006, + "step": 3020, + "valid_targets_mean": 1933.5, + "valid_targets_min": 418 + }, + { + "epoch": 4.396802325581396, + "grad_norm": 0.6080778703915236, + "learning_rate": 1.463016832534943e-05, + "loss": 0.2526, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12549175322055817, + "step": 3025, + "valid_targets_mean": 3266.6, + "valid_targets_min": 663 + }, + { + "epoch": 4.40406976744186, + "grad_norm": 0.5399637076155757, + "learning_rate": 1.4560378187632101e-05, + "loss": 0.2349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10789905488491058, + "step": 3030, + "valid_targets_mean": 2986.1, + "valid_targets_min": 586 + }, + { + "epoch": 4.411337209302325, + "grad_norm": 0.5178778926974974, + "learning_rate": 1.4490659504511295e-05, + "loss": 0.2423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09591800719499588, + "step": 3035, + "valid_targets_mean": 3687.2, + "valid_targets_min": 615 + }, + { + "epoch": 4.4186046511627906, + "grad_norm": 0.5030583012505943, + "learning_rate": 1.44210131918081e-05, + "loss": 0.2309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.121453195810318, + "step": 3040, + "valid_targets_mean": 4075.1, + "valid_targets_min": 569 + }, + { + "epoch": 4.425872093023256, + "grad_norm": 0.5641263917257686, + "learning_rate": 1.4351440164392956e-05, + "loss": 0.2356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1182078868150711, + "step": 3045, + "valid_targets_mean": 3217.6, + "valid_targets_min": 607 + }, + { + "epoch": 4.433139534883721, + "grad_norm": 0.5939975448891784, + "learning_rate": 1.4281941336173621e-05, + "loss": 0.2305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10988223552703857, + "step": 3050, + "valid_targets_mean": 2759.2, + "valid_targets_min": 535 + }, + { + "epoch": 4.440406976744186, + "grad_norm": 0.546560793902984, + "learning_rate": 1.4212517620083186e-05, + "loss": 0.2215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08663086593151093, + "step": 3055, + "valid_targets_mean": 2932.1, + "valid_targets_min": 519 + }, + { + "epoch": 4.4476744186046515, + "grad_norm": 0.5318778359190953, + "learning_rate": 1.4143169928068061e-05, + "loss": 0.2333, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10948213934898376, + "step": 3060, + "valid_targets_mean": 3642.5, + "valid_targets_min": 729 + }, + { + "epoch": 4.454941860465116, + "grad_norm": 0.5341511378601346, + "learning_rate": 1.4073899171076022e-05, + "loss": 0.2287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12564794719219208, + "step": 3065, + "valid_targets_mean": 3746.2, + "valid_targets_min": 720 + }, + { + "epoch": 4.462209302325581, + "grad_norm": 0.6152522741279195, + "learning_rate": 1.400470625904422e-05, + "loss": 0.2261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09698937088251114, + "step": 3070, + "valid_targets_mean": 3055.4, + "valid_targets_min": 300 + }, + { + "epoch": 4.469476744186046, + "grad_norm": 0.4894759594735516, + "learning_rate": 1.3935592100887242e-05, + "loss": 0.2281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09913764894008636, + "step": 3075, + "valid_targets_mean": 3156.9, + "valid_targets_min": 691 + }, + { + "epoch": 4.476744186046512, + "grad_norm": 0.5350561295503559, + "learning_rate": 1.386655760448517e-05, + "loss": 0.2353, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14611050486564636, + "step": 3080, + "valid_targets_mean": 5077.9, + "valid_targets_min": 3504 + }, + { + "epoch": 4.484011627906977, + "grad_norm": 0.5317175369468766, + "learning_rate": 1.3797603676671646e-05, + "loss": 0.2344, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09551812708377838, + "step": 3085, + "valid_targets_mean": 3002.4, + "valid_targets_min": 558 + }, + { + "epoch": 4.491279069767442, + "grad_norm": 0.4435122144509018, + "learning_rate": 1.372873122322198e-05, + "loss": 0.229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0975491851568222, + "step": 3090, + "valid_targets_mean": 3885.2, + "valid_targets_min": 764 + }, + { + "epoch": 4.498546511627907, + "grad_norm": 0.5024935576623654, + "learning_rate": 1.365994114884122e-05, + "loss": 0.2391, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11409218609333038, + "step": 3095, + "valid_targets_mean": 4141.4, + "valid_targets_min": 587 + }, + { + "epoch": 4.5058139534883725, + "grad_norm": 0.5434331061009773, + "learning_rate": 1.359123435715231e-05, + "loss": 0.2357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09311437606811523, + "step": 3100, + "valid_targets_mean": 2983.4, + "valid_targets_min": 815 + }, + { + "epoch": 4.513081395348837, + "grad_norm": 0.45263073099805023, + "learning_rate": 1.3522611750684171e-05, + "loss": 0.2297, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11372917890548706, + "step": 3105, + "valid_targets_mean": 3772.0, + "valid_targets_min": 431 + }, + { + "epoch": 4.520348837209302, + "grad_norm": 0.6272229613079687, + "learning_rate": 1.3454074230859896e-05, + "loss": 0.2428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16346322000026703, + "step": 3110, + "valid_targets_mean": 3129.0, + "valid_targets_min": 552 + }, + { + "epoch": 4.527616279069767, + "grad_norm": 0.5038465829473497, + "learning_rate": 1.3385622697984872e-05, + "loss": 0.2332, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11544954776763916, + "step": 3115, + "valid_targets_mean": 3430.9, + "valid_targets_min": 395 + }, + { + "epoch": 4.534883720930233, + "grad_norm": 0.5887483028437017, + "learning_rate": 1.331725805123496e-05, + "loss": 0.2386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10782928764820099, + "step": 3120, + "valid_targets_mean": 2734.6, + "valid_targets_min": 665 + }, + { + "epoch": 4.542151162790698, + "grad_norm": 0.5010126807103301, + "learning_rate": 1.3248981188644703e-05, + "loss": 0.2374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15959912538528442, + "step": 3125, + "valid_targets_mean": 4801.8, + "valid_targets_min": 3198 + }, + { + "epoch": 4.549418604651163, + "grad_norm": 0.5303282635413874, + "learning_rate": 1.3180793007095502e-05, + "loss": 0.2296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1328737735748291, + "step": 3130, + "valid_targets_mean": 3531.9, + "valid_targets_min": 916 + }, + { + "epoch": 4.5566860465116275, + "grad_norm": 0.48575033418771363, + "learning_rate": 1.3112694402303863e-05, + "loss": 0.2527, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16913628578186035, + "step": 3135, + "valid_targets_mean": 5586.6, + "valid_targets_min": 806 + }, + { + "epoch": 4.563953488372093, + "grad_norm": 0.48981867193444983, + "learning_rate": 1.3044686268809596e-05, + "loss": 0.2489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13943105936050415, + "step": 3140, + "valid_targets_mean": 4203.0, + "valid_targets_min": 1091 + }, + { + "epoch": 4.571220930232558, + "grad_norm": 0.5305974079239972, + "learning_rate": 1.2976769499964109e-05, + "loss": 0.236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15359435975551605, + "step": 3145, + "valid_targets_mean": 4305.5, + "valid_targets_min": 550 + }, + { + "epoch": 4.578488372093023, + "grad_norm": 0.5630598119690939, + "learning_rate": 1.2908944987918633e-05, + "loss": 0.2395, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15561619400978088, + "step": 3150, + "valid_targets_mean": 3796.2, + "valid_targets_min": 720 + }, + { + "epoch": 4.585755813953488, + "grad_norm": 0.4690829743543244, + "learning_rate": 1.2841213623612519e-05, + "loss": 0.2301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10580013692378998, + "step": 3155, + "valid_targets_mean": 3367.6, + "valid_targets_min": 611 + }, + { + "epoch": 4.593023255813954, + "grad_norm": 0.5289397793275402, + "learning_rate": 1.2773576296761542e-05, + "loss": 0.2346, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13507477939128876, + "step": 3160, + "valid_targets_mean": 3805.9, + "valid_targets_min": 743 + }, + { + "epoch": 4.600290697674419, + "grad_norm": 0.5426964454812239, + "learning_rate": 1.2706033895846192e-05, + "loss": 0.2265, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09324119985103607, + "step": 3165, + "valid_targets_mean": 2972.5, + "valid_targets_min": 590 + }, + { + "epoch": 4.607558139534884, + "grad_norm": 0.5634432609885214, + "learning_rate": 1.2638587308100036e-05, + "loss": 0.2395, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11684857308864594, + "step": 3170, + "valid_targets_mean": 3209.4, + "valid_targets_min": 577 + }, + { + "epoch": 4.6148255813953485, + "grad_norm": 0.48953334327704895, + "learning_rate": 1.2571237419498018e-05, + "loss": 0.227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11784234642982483, + "step": 3175, + "valid_targets_mean": 4379.2, + "valid_targets_min": 968 + }, + { + "epoch": 4.622093023255814, + "grad_norm": 0.5210611303116327, + "learning_rate": 1.2503985114744883e-05, + "loss": 0.2283, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10520151257514954, + "step": 3180, + "valid_targets_mean": 4008.8, + "valid_targets_min": 3184 + }, + { + "epoch": 4.629360465116279, + "grad_norm": 0.4773875572630403, + "learning_rate": 1.2436831277263481e-05, + "loss": 0.2497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1177573874592781, + "step": 3185, + "valid_targets_mean": 3870.4, + "valid_targets_min": 670 + }, + { + "epoch": 4.636627906976744, + "grad_norm": 0.6583518666009744, + "learning_rate": 1.2369776789183234e-05, + "loss": 0.2417, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14675956964492798, + "step": 3190, + "valid_targets_mean": 3113.1, + "valid_targets_min": 677 + }, + { + "epoch": 4.6438953488372094, + "grad_norm": 0.4450353778257948, + "learning_rate": 1.230282253132849e-05, + "loss": 0.238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11362966150045395, + "step": 3195, + "valid_targets_mean": 4771.6, + "valid_targets_min": 3887 + }, + { + "epoch": 4.651162790697675, + "grad_norm": 0.5094860056017274, + "learning_rate": 1.2235969383206987e-05, + "loss": 0.2459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11538674682378769, + "step": 3200, + "valid_targets_mean": 3672.0, + "valid_targets_min": 509 + }, + { + "epoch": 4.658430232558139, + "grad_norm": 0.5084867559413941, + "learning_rate": 1.2169218222998294e-05, + "loss": 0.2293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10229187458753586, + "step": 3205, + "valid_targets_mean": 3424.5, + "valid_targets_min": 755 + }, + { + "epoch": 4.665697674418604, + "grad_norm": 0.5637648552818592, + "learning_rate": 1.2102569927542275e-05, + "loss": 0.237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12403546273708344, + "step": 3210, + "valid_targets_mean": 4107.8, + "valid_targets_min": 702 + }, + { + "epoch": 4.6729651162790695, + "grad_norm": 0.4948101308113288, + "learning_rate": 1.2036025372327553e-05, + "loss": 0.2292, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12994790077209473, + "step": 3215, + "valid_targets_mean": 3731.0, + "valid_targets_min": 547 + }, + { + "epoch": 4.680232558139535, + "grad_norm": 0.5394534093008194, + "learning_rate": 1.1969585431480037e-05, + "loss": 0.2418, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10351639986038208, + "step": 3220, + "valid_targets_mean": 3190.5, + "valid_targets_min": 689 + }, + { + "epoch": 4.6875, + "grad_norm": 0.4404560213423327, + "learning_rate": 1.1903250977751429e-05, + "loss": 0.2291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09289942681789398, + "step": 3225, + "valid_targets_mean": 3678.2, + "valid_targets_min": 708 + }, + { + "epoch": 4.694767441860465, + "grad_norm": 0.5081100321617489, + "learning_rate": 1.1837022882507745e-05, + "loss": 0.2325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12247206270694733, + "step": 3230, + "valid_targets_mean": 4082.9, + "valid_targets_min": 690 + }, + { + "epoch": 4.7020348837209305, + "grad_norm": 0.4441735432541099, + "learning_rate": 1.1770902015717894e-05, + "loss": 0.2303, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09333371371030807, + "step": 3235, + "valid_targets_mean": 3859.1, + "valid_targets_min": 614 + }, + { + "epoch": 4.709302325581396, + "grad_norm": 0.6011417337828853, + "learning_rate": 1.1704889245942229e-05, + "loss": 0.2349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10990115255117416, + "step": 3240, + "valid_targets_mean": 2285.1, + "valid_targets_min": 522 + }, + { + "epoch": 4.716569767441861, + "grad_norm": 0.5021507632398494, + "learning_rate": 1.163898544032115e-05, + "loss": 0.2286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11612962186336517, + "step": 3245, + "valid_targets_mean": 3578.5, + "valid_targets_min": 776 + }, + { + "epoch": 4.723837209302325, + "grad_norm": 0.5880169464524365, + "learning_rate": 1.1573191464563709e-05, + "loss": 0.2372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09853021800518036, + "step": 3250, + "valid_targets_mean": 2618.2, + "valid_targets_min": 677 + }, + { + "epoch": 4.7311046511627906, + "grad_norm": 0.48959783668226775, + "learning_rate": 1.1507508182936231e-05, + "loss": 0.242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09494741261005402, + "step": 3255, + "valid_targets_mean": 3397.2, + "valid_targets_min": 698 + }, + { + "epoch": 4.738372093023256, + "grad_norm": 0.46056950109789613, + "learning_rate": 1.144193645825099e-05, + "loss": 0.2338, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12248848378658295, + "step": 3260, + "valid_targets_mean": 4731.5, + "valid_targets_min": 507 + }, + { + "epoch": 4.745639534883721, + "grad_norm": 0.5214044669092526, + "learning_rate": 1.1376477151854832e-05, + "loss": 0.2282, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11522860825061798, + "step": 3265, + "valid_targets_mean": 3560.9, + "valid_targets_min": 566 + }, + { + "epoch": 4.752906976744186, + "grad_norm": 0.5820601949492391, + "learning_rate": 1.131113112361788e-05, + "loss": 0.2343, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12622776627540588, + "step": 3270, + "valid_targets_mean": 3919.5, + "valid_targets_min": 471 + }, + { + "epoch": 4.7601744186046515, + "grad_norm": 0.4664751855760365, + "learning_rate": 1.1245899231922265e-05, + "loss": 0.2296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12376340478658676, + "step": 3275, + "valid_targets_mean": 4601.4, + "valid_targets_min": 469 + }, + { + "epoch": 4.767441860465116, + "grad_norm": 0.6667212632991735, + "learning_rate": 1.1180782333650807e-05, + "loss": 0.2324, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09619486331939697, + "step": 3280, + "valid_targets_mean": 1562.5, + "valid_targets_min": 581 + }, + { + "epoch": 4.774709302325581, + "grad_norm": 0.45412715795498276, + "learning_rate": 1.1115781284175777e-05, + "loss": 0.2345, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12009938806295395, + "step": 3285, + "valid_targets_mean": 4625.8, + "valid_targets_min": 849 + }, + { + "epoch": 4.781976744186046, + "grad_norm": 0.5704599440014428, + "learning_rate": 1.1050896937347666e-05, + "loss": 0.2495, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0995192751288414, + "step": 3290, + "valid_targets_mean": 2872.5, + "valid_targets_min": 602 + }, + { + "epoch": 4.789244186046512, + "grad_norm": 0.49543292260155836, + "learning_rate": 1.098613014548398e-05, + "loss": 0.2369, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11472610384225845, + "step": 3295, + "valid_targets_mean": 3716.9, + "valid_targets_min": 684 + }, + { + "epoch": 4.796511627906977, + "grad_norm": 0.537807694105439, + "learning_rate": 1.0921481759358005e-05, + "loss": 0.2396, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13546626269817352, + "step": 3300, + "valid_targets_mean": 4543.0, + "valid_targets_min": 571 + }, + { + "epoch": 4.803779069767442, + "grad_norm": 0.5678524865707387, + "learning_rate": 1.0856952628187662e-05, + "loss": 0.231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06101011484861374, + "step": 3305, + "valid_targets_mean": 1192.0, + "valid_targets_min": 412 + }, + { + "epoch": 4.811046511627907, + "grad_norm": 0.49246729295539404, + "learning_rate": 1.079254359962436e-05, + "loss": 0.2336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09559541195631027, + "step": 3310, + "valid_targets_mean": 3340.8, + "valid_targets_min": 647 + }, + { + "epoch": 4.8183139534883725, + "grad_norm": 0.4743174571088282, + "learning_rate": 1.0728255519741831e-05, + "loss": 0.2372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13542385399341583, + "step": 3315, + "valid_targets_mean": 4489.4, + "valid_targets_min": 791 + }, + { + "epoch": 4.825581395348837, + "grad_norm": 0.4984821091753099, + "learning_rate": 1.066408923302503e-05, + "loss": 0.2455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11396318674087524, + "step": 3320, + "valid_targets_mean": 3852.9, + "valid_targets_min": 447 + }, + { + "epoch": 4.832848837209302, + "grad_norm": 0.5098307423292979, + "learning_rate": 1.060004558235905e-05, + "loss": 0.2407, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12900030612945557, + "step": 3325, + "valid_targets_mean": 3694.8, + "valid_targets_min": 632 + }, + { + "epoch": 4.840116279069767, + "grad_norm": 0.47560010661218727, + "learning_rate": 1.0536125409018043e-05, + "loss": 0.2304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10933175683021545, + "step": 3330, + "valid_targets_mean": 3970.6, + "valid_targets_min": 513 + }, + { + "epoch": 4.847383720930233, + "grad_norm": 0.47044486233428945, + "learning_rate": 1.0472329552654172e-05, + "loss": 0.2293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11043363064527512, + "step": 3335, + "valid_targets_mean": 3795.6, + "valid_targets_min": 578 + }, + { + "epoch": 4.854651162790698, + "grad_norm": 0.5639925542779571, + "learning_rate": 1.040865885128656e-05, + "loss": 0.2346, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10802865028381348, + "step": 3340, + "valid_targets_mean": 3206.9, + "valid_targets_min": 564 + }, + { + "epoch": 4.861918604651163, + "grad_norm": 0.446505868059933, + "learning_rate": 1.034511414129033e-05, + "loss": 0.253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13078966736793518, + "step": 3345, + "valid_targets_mean": 5018.2, + "valid_targets_min": 652 + }, + { + "epoch": 4.8691860465116275, + "grad_norm": 0.5184499276874123, + "learning_rate": 1.0281696257385566e-05, + "loss": 0.2369, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12428499758243561, + "step": 3350, + "valid_targets_mean": 3502.9, + "valid_targets_min": 690 + }, + { + "epoch": 4.876453488372093, + "grad_norm": 0.5018714295797971, + "learning_rate": 1.0218406032626383e-05, + "loss": 0.2243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10743530094623566, + "step": 3355, + "valid_targets_mean": 3596.0, + "valid_targets_min": 731 + }, + { + "epoch": 4.883720930232558, + "grad_norm": 0.5735331401509087, + "learning_rate": 1.015524429838995e-05, + "loss": 0.2348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10622027516365051, + "step": 3360, + "valid_targets_mean": 2885.0, + "valid_targets_min": 605 + }, + { + "epoch": 4.890988372093023, + "grad_norm": 0.4840757316363492, + "learning_rate": 1.009221188436563e-05, + "loss": 0.2313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1269492208957672, + "step": 3365, + "valid_targets_mean": 4277.8, + "valid_targets_min": 784 + }, + { + "epoch": 4.898255813953488, + "grad_norm": 0.49774705420226517, + "learning_rate": 1.0029309618544008e-05, + "loss": 0.2366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13551053404808044, + "step": 3370, + "valid_targets_mean": 4511.5, + "valid_targets_min": 690 + }, + { + "epoch": 4.905523255813954, + "grad_norm": 0.6044199194468696, + "learning_rate": 9.966538327206055e-06, + "loss": 0.2414, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10581943392753601, + "step": 3375, + "valid_targets_mean": 2411.6, + "valid_targets_min": 507 + }, + { + "epoch": 4.912790697674419, + "grad_norm": 0.5239169019089047, + "learning_rate": 9.903898834912288e-06, + "loss": 0.2358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12706102430820465, + "step": 3380, + "valid_targets_mean": 4133.9, + "valid_targets_min": 603 + }, + { + "epoch": 4.920058139534884, + "grad_norm": 0.6359657440783766, + "learning_rate": 9.8413919644919e-06, + "loss": 0.2605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13760952651500702, + "step": 3385, + "valid_targets_mean": 3341.1, + "valid_targets_min": 579 + }, + { + "epoch": 4.9273255813953485, + "grad_norm": 0.526652947386687, + "learning_rate": 9.77901853703197e-06, + "loss": 0.2464, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12862738966941833, + "step": 3390, + "valid_targets_mean": 3435.9, + "valid_targets_min": 446 + }, + { + "epoch": 4.934593023255814, + "grad_norm": 0.5209569537688338, + "learning_rate": 9.716779371866674e-06, + "loss": 0.2372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12216047942638397, + "step": 3395, + "valid_targets_mean": 3661.0, + "valid_targets_min": 641 + }, + { + "epoch": 4.941860465116279, + "grad_norm": 0.4874489939699638, + "learning_rate": 9.654675286566548e-06, + "loss": 0.2347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09232984483242035, + "step": 3400, + "valid_targets_mean": 3675.9, + "valid_targets_min": 702 + }, + { + "epoch": 4.949127906976744, + "grad_norm": 0.5282339564188945, + "learning_rate": 9.592707096927704e-06, + "loss": 0.2219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16324245929718018, + "step": 3405, + "valid_targets_mean": 4671.8, + "valid_targets_min": 398 + }, + { + "epoch": 4.9563953488372094, + "grad_norm": 0.522763578277042, + "learning_rate": 9.53087561696113e-06, + "loss": 0.2336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10933259129524231, + "step": 3410, + "valid_targets_mean": 3505.0, + "valid_targets_min": 914 + }, + { + "epoch": 4.963662790697675, + "grad_norm": 0.5013393924009135, + "learning_rate": 9.469181658882034e-06, + "loss": 0.2459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10687707364559174, + "step": 3415, + "valid_targets_mean": 3193.2, + "valid_targets_min": 802 + }, + { + "epoch": 4.970930232558139, + "grad_norm": 0.4813101013967105, + "learning_rate": 9.40762603309911e-06, + "loss": 0.238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08028747141361237, + "step": 3420, + "valid_targets_mean": 2682.6, + "valid_targets_min": 430 + }, + { + "epoch": 4.978197674418604, + "grad_norm": 0.5220023231881794, + "learning_rate": 9.346209548203947e-06, + "loss": 0.2319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13090111315250397, + "step": 3425, + "valid_targets_mean": 5490.4, + "valid_targets_min": 4037 + }, + { + "epoch": 4.9854651162790695, + "grad_norm": 0.47855880716995414, + "learning_rate": 9.284933010960364e-06, + "loss": 0.2512, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1151789128780365, + "step": 3430, + "valid_targets_mean": 4014.4, + "valid_targets_min": 735 + }, + { + "epoch": 4.992732558139535, + "grad_norm": 0.5519432983605355, + "learning_rate": 9.223797226293867e-06, + "loss": 0.231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11323521286249161, + "step": 3435, + "valid_targets_mean": 3503.9, + "valid_targets_min": 667 + }, + { + "epoch": 5.0, + "grad_norm": 0.5180141428234212, + "learning_rate": 9.162802997281022e-06, + "loss": 0.2371, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14478221535682678, + "step": 3440, + "valid_targets_mean": 3696.8, + "valid_targets_min": 759 + }, + { + "epoch": 5.007267441860465, + "grad_norm": 0.5064465775517101, + "learning_rate": 9.10195112513892e-06, + "loss": 0.2274, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11103392392396927, + "step": 3445, + "valid_targets_mean": 3689.1, + "valid_targets_min": 527 + }, + { + "epoch": 5.0145348837209305, + "grad_norm": 0.5163608827235715, + "learning_rate": 9.041242409214686e-06, + "loss": 0.219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10698623955249786, + "step": 3450, + "valid_targets_mean": 3850.9, + "valid_targets_min": 732 + }, + { + "epoch": 5.021802325581396, + "grad_norm": 0.5841669567924161, + "learning_rate": 8.980677646974926e-06, + "loss": 0.2222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16529354453086853, + "step": 3455, + "valid_targets_mean": 4314.1, + "valid_targets_min": 636 + }, + { + "epoch": 5.02906976744186, + "grad_norm": 0.5776954665741999, + "learning_rate": 8.920257633995295e-06, + "loss": 0.2215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12802791595458984, + "step": 3460, + "valid_targets_mean": 3402.9, + "valid_targets_min": 704 + }, + { + "epoch": 5.036337209302325, + "grad_norm": 0.5039901178948153, + "learning_rate": 8.85998316395001e-06, + "loss": 0.2304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12501442432403564, + "step": 3465, + "valid_targets_mean": 4067.0, + "valid_targets_min": 806 + }, + { + "epoch": 5.0436046511627906, + "grad_norm": 0.5973873035633013, + "learning_rate": 8.799855028601472e-06, + "loss": 0.2313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12955757975578308, + "step": 3470, + "valid_targets_mean": 3599.6, + "valid_targets_min": 621 + }, + { + "epoch": 5.050872093023256, + "grad_norm": 0.5425804505001001, + "learning_rate": 8.739874017789813e-06, + "loss": 0.223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10548754036426544, + "step": 3475, + "valid_targets_mean": 2965.9, + "valid_targets_min": 238 + }, + { + "epoch": 5.058139534883721, + "grad_norm": 0.5200649497723702, + "learning_rate": 8.680040919422544e-06, + "loss": 0.2288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13549460470676422, + "step": 3480, + "valid_targets_mean": 4974.2, + "valid_targets_min": 3582 + }, + { + "epoch": 5.065406976744186, + "grad_norm": 0.5864066094152384, + "learning_rate": 8.620356519464228e-06, + "loss": 0.2262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12549281120300293, + "step": 3485, + "valid_targets_mean": 4444.4, + "valid_targets_min": 672 + }, + { + "epoch": 5.0726744186046515, + "grad_norm": 0.5523220644419992, + "learning_rate": 8.560821601926112e-06, + "loss": 0.2416, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11209696531295776, + "step": 3490, + "valid_targets_mean": 3835.5, + "valid_targets_min": 582 + }, + { + "epoch": 5.079941860465116, + "grad_norm": 0.5657939357844483, + "learning_rate": 8.501436948855857e-06, + "loss": 0.2155, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10303827375173569, + "step": 3495, + "valid_targets_mean": 3019.0, + "valid_targets_min": 773 + }, + { + "epoch": 5.087209302325581, + "grad_norm": 0.5147529737841077, + "learning_rate": 8.44220334032725e-06, + "loss": 0.2137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11031322181224823, + "step": 3500, + "valid_targets_mean": 3997.8, + "valid_targets_min": 542 + }, + { + "epoch": 5.094476744186046, + "grad_norm": 0.462171392787495, + "learning_rate": 8.383121554429985e-06, + "loss": 0.2239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09460602700710297, + "step": 3505, + "valid_targets_mean": 3166.2, + "valid_targets_min": 603 + }, + { + "epoch": 5.101744186046512, + "grad_norm": 0.5004999228518526, + "learning_rate": 8.3241923672594e-06, + "loss": 0.222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11931440234184265, + "step": 3510, + "valid_targets_mean": 4244.2, + "valid_targets_min": 740 + }, + { + "epoch": 5.109011627906977, + "grad_norm": 0.5405328149776527, + "learning_rate": 8.265416552906316e-06, + "loss": 0.2308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12601076066493988, + "step": 3515, + "valid_targets_mean": 4365.4, + "valid_targets_min": 759 + }, + { + "epoch": 5.116279069767442, + "grad_norm": 0.5425947774119614, + "learning_rate": 8.20679488344684e-06, + "loss": 0.2181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10105065256357193, + "step": 3520, + "valid_targets_mean": 3867.1, + "valid_targets_min": 623 + }, + { + "epoch": 5.123546511627907, + "grad_norm": 0.4766640353998748, + "learning_rate": 8.148328128932263e-06, + "loss": 0.2286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10325950384140015, + "step": 3525, + "valid_targets_mean": 3969.1, + "valid_targets_min": 821 + }, + { + "epoch": 5.1308139534883725, + "grad_norm": 0.5279713250696507, + "learning_rate": 8.090017057378913e-06, + "loss": 0.2332, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10024862736463547, + "step": 3530, + "valid_targets_mean": 4346.2, + "valid_targets_min": 767 + }, + { + "epoch": 5.138081395348837, + "grad_norm": 0.6487999870575004, + "learning_rate": 8.03186243475807e-06, + "loss": 0.2325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14460906386375427, + "step": 3535, + "valid_targets_mean": 3415.9, + "valid_targets_min": 773 + }, + { + "epoch": 5.145348837209302, + "grad_norm": 0.5270915274072561, + "learning_rate": 7.9738650249859e-06, + "loss": 0.2221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11179886013269424, + "step": 3540, + "valid_targets_mean": 3993.8, + "valid_targets_min": 624 + }, + { + "epoch": 5.152616279069767, + "grad_norm": 0.5287461236460308, + "learning_rate": 7.916025589913452e-06, + "loss": 0.2243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11516192555427551, + "step": 3545, + "valid_targets_mean": 3947.0, + "valid_targets_min": 742 + }, + { + "epoch": 5.159883720930233, + "grad_norm": 0.5450827809641174, + "learning_rate": 7.858344889316611e-06, + "loss": 0.2212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11309966444969177, + "step": 3550, + "valid_targets_mean": 3872.1, + "valid_targets_min": 605 + }, + { + "epoch": 5.167151162790698, + "grad_norm": 0.4721896964111328, + "learning_rate": 7.80082368088613e-06, + "loss": 0.224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1209426149725914, + "step": 3555, + "valid_targets_mean": 5368.0, + "valid_targets_min": 4502 + }, + { + "epoch": 5.174418604651163, + "grad_norm": 0.5628603862929638, + "learning_rate": 7.743462720217698e-06, + "loss": 0.2348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09959325939416885, + "step": 3560, + "valid_targets_mean": 3553.9, + "valid_targets_min": 842 + }, + { + "epoch": 5.1816860465116275, + "grad_norm": 0.5071148846714348, + "learning_rate": 7.686262760801985e-06, + "loss": 0.2211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13151785731315613, + "step": 3565, + "valid_targets_mean": 4163.6, + "valid_targets_min": 1394 + }, + { + "epoch": 5.188953488372093, + "grad_norm": 0.5170664010112574, + "learning_rate": 7.629224554014763e-06, + "loss": 0.231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11399704217910767, + "step": 3570, + "valid_targets_mean": 3945.2, + "valid_targets_min": 735 + }, + { + "epoch": 5.196220930232558, + "grad_norm": 0.5006143865508161, + "learning_rate": 7.5723488491070116e-06, + "loss": 0.2208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11205872893333435, + "step": 3575, + "valid_targets_mean": 4519.0, + "valid_targets_min": 625 + }, + { + "epoch": 5.203488372093023, + "grad_norm": 0.6146302140772092, + "learning_rate": 7.515636393195129e-06, + "loss": 0.2126, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10611201822757721, + "step": 3580, + "valid_targets_mean": 2733.1, + "valid_targets_min": 629 + }, + { + "epoch": 5.210755813953488, + "grad_norm": 0.5416120670446422, + "learning_rate": 7.459087931251052e-06, + "loss": 0.2276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08908810466527939, + "step": 3585, + "valid_targets_mean": 3024.0, + "valid_targets_min": 780 + }, + { + "epoch": 5.218023255813954, + "grad_norm": 0.6388381534227345, + "learning_rate": 7.402704206092508e-06, + "loss": 0.2144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09935657680034637, + "step": 3590, + "valid_targets_mean": 2290.0, + "valid_targets_min": 740 + }, + { + "epoch": 5.225290697674419, + "grad_norm": 0.5758476899160144, + "learning_rate": 7.346485958373266e-06, + "loss": 0.2218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11001452803611755, + "step": 3595, + "valid_targets_mean": 3376.2, + "valid_targets_min": 516 + }, + { + "epoch": 5.232558139534884, + "grad_norm": 0.5940721029348974, + "learning_rate": 7.290433926573373e-06, + "loss": 0.2369, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11747955530881882, + "step": 3600, + "valid_targets_mean": 2742.5, + "valid_targets_min": 774 + }, + { + "epoch": 5.2398255813953485, + "grad_norm": 0.7197251133332507, + "learning_rate": 7.234548846989478e-06, + "loss": 0.2232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09308021515607834, + "step": 3605, + "valid_targets_mean": 2550.1, + "valid_targets_min": 467 + }, + { + "epoch": 5.247093023255814, + "grad_norm": 0.5111088589057889, + "learning_rate": 7.17883145372515e-06, + "loss": 0.2318, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1333235502243042, + "step": 3610, + "valid_targets_mean": 4213.5, + "valid_targets_min": 628 + }, + { + "epoch": 5.254360465116279, + "grad_norm": 0.5399044782488424, + "learning_rate": 7.123282478681255e-06, + "loss": 0.2341, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12484262883663177, + "step": 3615, + "valid_targets_mean": 3860.6, + "valid_targets_min": 982 + }, + { + "epoch": 5.261627906976744, + "grad_norm": 0.6089362087155372, + "learning_rate": 7.06790265154631e-06, + "loss": 0.2314, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09410004317760468, + "step": 3620, + "valid_targets_mean": 2744.4, + "valid_targets_min": 727 + }, + { + "epoch": 5.2688953488372094, + "grad_norm": 0.4993540684053602, + "learning_rate": 7.012692699786918e-06, + "loss": 0.211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12370635569095612, + "step": 3625, + "valid_targets_mean": 4632.6, + "valid_targets_min": 716 + }, + { + "epoch": 5.276162790697675, + "grad_norm": 0.5038961889630662, + "learning_rate": 6.9576533486382004e-06, + "loss": 0.233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11674419045448303, + "step": 3630, + "valid_targets_mean": 4505.5, + "valid_targets_min": 942 + }, + { + "epoch": 5.28343023255814, + "grad_norm": 0.46578591540410913, + "learning_rate": 6.902785321094301e-06, + "loss": 0.2186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09898681193590164, + "step": 3635, + "valid_targets_mean": 4460.9, + "valid_targets_min": 301 + }, + { + "epoch": 5.290697674418604, + "grad_norm": 0.5426836571471386, + "learning_rate": 6.84808933789884e-06, + "loss": 0.2314, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11427856236696243, + "step": 3640, + "valid_targets_mean": 4147.2, + "valid_targets_min": 660 + }, + { + "epoch": 5.2979651162790695, + "grad_norm": 0.5289085011659265, + "learning_rate": 6.793566117535475e-06, + "loss": 0.2234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1171591728925705, + "step": 3645, + "valid_targets_mean": 3967.5, + "valid_targets_min": 832 + }, + { + "epoch": 5.305232558139535, + "grad_norm": 0.5842762111741557, + "learning_rate": 6.739216376218483e-06, + "loss": 0.2258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1301005482673645, + "step": 3650, + "valid_targets_mean": 3883.5, + "valid_targets_min": 717 + }, + { + "epoch": 5.3125, + "grad_norm": 0.5229828592303515, + "learning_rate": 6.6850408278833e-06, + "loss": 0.2309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10519362986087799, + "step": 3655, + "valid_targets_mean": 3604.9, + "valid_targets_min": 914 + }, + { + "epoch": 5.319767441860465, + "grad_norm": 0.5537040247119486, + "learning_rate": 6.631040184177191e-06, + "loss": 0.2197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1311875283718109, + "step": 3660, + "valid_targets_mean": 3819.0, + "valid_targets_min": 508 + }, + { + "epoch": 5.3270348837209305, + "grad_norm": 0.5061352169459574, + "learning_rate": 6.577215154449863e-06, + "loss": 0.2209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09532980620861053, + "step": 3665, + "valid_targets_mean": 3654.8, + "valid_targets_min": 729 + }, + { + "epoch": 5.334302325581396, + "grad_norm": 0.45550595784330383, + "learning_rate": 6.523566445744196e-06, + "loss": 0.2327, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09947185218334198, + "step": 3670, + "valid_targets_mean": 4620.1, + "valid_targets_min": 801 + }, + { + "epoch": 5.34156976744186, + "grad_norm": 0.5885273568890355, + "learning_rate": 6.470094762786901e-06, + "loss": 0.2177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12775710225105286, + "step": 3675, + "valid_targets_mean": 3814.5, + "valid_targets_min": 792 + }, + { + "epoch": 5.348837209302325, + "grad_norm": 0.5972530769456403, + "learning_rate": 6.4168008079792906e-06, + "loss": 0.2245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08653900772333145, + "step": 3680, + "valid_targets_mean": 2589.9, + "valid_targets_min": 509 + }, + { + "epoch": 5.3561046511627906, + "grad_norm": 0.489387591827279, + "learning_rate": 6.36368528138807e-06, + "loss": 0.2243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.130646213889122, + "step": 3685, + "valid_targets_mean": 4938.5, + "valid_targets_min": 984 + }, + { + "epoch": 5.363372093023256, + "grad_norm": 0.6170411592431482, + "learning_rate": 6.310748880736095e-06, + "loss": 0.2267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09561936557292938, + "step": 3690, + "valid_targets_mean": 3151.4, + "valid_targets_min": 638 + }, + { + "epoch": 5.370639534883721, + "grad_norm": 0.5081130790237884, + "learning_rate": 6.2579923013932435e-06, + "loss": 0.2228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1221495121717453, + "step": 3695, + "valid_targets_mean": 4372.9, + "valid_targets_min": 943 + }, + { + "epoch": 5.377906976744186, + "grad_norm": 0.6454509271848846, + "learning_rate": 6.205416236367263e-06, + "loss": 0.2313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09919290244579315, + "step": 3700, + "valid_targets_mean": 2139.8, + "valid_targets_min": 495 + }, + { + "epoch": 5.3851744186046515, + "grad_norm": 0.557518677664274, + "learning_rate": 6.1530213762946944e-06, + "loss": 0.2281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1259082555770874, + "step": 3705, + "valid_targets_mean": 4007.5, + "valid_targets_min": 745 + }, + { + "epoch": 5.392441860465116, + "grad_norm": 0.5793527159329909, + "learning_rate": 6.100808409431755e-06, + "loss": 0.2274, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12197981774806976, + "step": 3710, + "valid_targets_mean": 3392.2, + "valid_targets_min": 563 + }, + { + "epoch": 5.399709302325581, + "grad_norm": 0.5386985816616846, + "learning_rate": 6.048778021645329e-06, + "loss": 0.2131, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10789743065834045, + "step": 3715, + "valid_targets_mean": 3452.0, + "valid_targets_min": 612 + }, + { + "epoch": 5.406976744186046, + "grad_norm": 0.621910014250417, + "learning_rate": 5.996930896403967e-06, + "loss": 0.2309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1227702647447586, + "step": 3720, + "valid_targets_mean": 3322.1, + "valid_targets_min": 790 + }, + { + "epoch": 5.414244186046512, + "grad_norm": 0.5057315708259351, + "learning_rate": 5.94526771476887e-06, + "loss": 0.2224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10889577120542526, + "step": 3725, + "valid_targets_mean": 4153.8, + "valid_targets_min": 772 + }, + { + "epoch": 5.421511627906977, + "grad_norm": 0.5308377606082096, + "learning_rate": 5.893789155384975e-06, + "loss": 0.2227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09790430963039398, + "step": 3730, + "valid_targets_mean": 3238.1, + "valid_targets_min": 778 + }, + { + "epoch": 5.428779069767442, + "grad_norm": 0.47748576601578613, + "learning_rate": 5.8424958944720245e-06, + "loss": 0.2256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1386643946170807, + "step": 3735, + "valid_targets_mean": 5128.6, + "valid_targets_min": 392 + }, + { + "epoch": 5.436046511627907, + "grad_norm": 0.5129310083697562, + "learning_rate": 5.791388605815709e-06, + "loss": 0.2315, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11397480964660645, + "step": 3740, + "valid_targets_mean": 4230.2, + "valid_targets_min": 589 + }, + { + "epoch": 5.4433139534883725, + "grad_norm": 0.586622641537686, + "learning_rate": 5.740467960758776e-06, + "loss": 0.2164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13233549892902374, + "step": 3745, + "valid_targets_mean": 3652.4, + "valid_targets_min": 737 + }, + { + "epoch": 5.450581395348837, + "grad_norm": 0.47656954097512755, + "learning_rate": 5.68973462819223e-06, + "loss": 0.2222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08915632963180542, + "step": 3750, + "valid_targets_mean": 4145.4, + "valid_targets_min": 476 + }, + { + "epoch": 5.457848837209302, + "grad_norm": 0.47752527551101914, + "learning_rate": 5.63918927454657e-06, + "loss": 0.2219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08590822666883469, + "step": 3755, + "valid_targets_mean": 3623.9, + "valid_targets_min": 534 + }, + { + "epoch": 5.465116279069767, + "grad_norm": 0.6775039949217503, + "learning_rate": 5.588832563783e-06, + "loss": 0.2212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10755671560764313, + "step": 3760, + "valid_targets_mean": 1813.9, + "valid_targets_min": 601 + }, + { + "epoch": 5.472383720930233, + "grad_norm": 0.4616676959594202, + "learning_rate": 5.538665157384715e-06, + "loss": 0.2249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11233869940042496, + "step": 3765, + "valid_targets_mean": 4682.6, + "valid_targets_min": 3531 + }, + { + "epoch": 5.479651162790698, + "grad_norm": 0.5377360862025549, + "learning_rate": 5.48868771434822e-06, + "loss": 0.226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1236083060503006, + "step": 3770, + "valid_targets_mean": 3839.6, + "valid_targets_min": 752 + }, + { + "epoch": 5.486918604651163, + "grad_norm": 0.6338269834891624, + "learning_rate": 5.438900891174686e-06, + "loss": 0.2369, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13257811963558197, + "step": 3775, + "valid_targets_mean": 3260.4, + "valid_targets_min": 417 + }, + { + "epoch": 5.4941860465116275, + "grad_norm": 0.5183978177248446, + "learning_rate": 5.389305341861293e-06, + "loss": 0.2369, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10330487787723541, + "step": 3780, + "valid_targets_mean": 3676.6, + "valid_targets_min": 622 + }, + { + "epoch": 5.501453488372093, + "grad_norm": 0.5006609696903801, + "learning_rate": 5.3399017178926614e-06, + "loss": 0.2198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09554791450500488, + "step": 3785, + "valid_targets_mean": 3681.5, + "valid_targets_min": 503 + }, + { + "epoch": 5.508720930232558, + "grad_norm": 0.5369589728239931, + "learning_rate": 5.290690668232301e-06, + "loss": 0.2283, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12455598264932632, + "step": 3790, + "valid_targets_mean": 3967.4, + "valid_targets_min": 822 + }, + { + "epoch": 5.515988372093023, + "grad_norm": 0.5412191392066285, + "learning_rate": 5.2416728393140624e-06, + "loss": 0.2269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11805573850870132, + "step": 3795, + "valid_targets_mean": 3910.9, + "valid_targets_min": 611 + }, + { + "epoch": 5.523255813953488, + "grad_norm": 0.5237261735625945, + "learning_rate": 5.192848875033663e-06, + "loss": 0.2257, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11130371689796448, + "step": 3800, + "valid_targets_mean": 3325.5, + "valid_targets_min": 525 + }, + { + "epoch": 5.530523255813954, + "grad_norm": 0.4855369276832591, + "learning_rate": 5.144219416740217e-06, + "loss": 0.238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1295335739850998, + "step": 3805, + "valid_targets_mean": 4685.5, + "valid_targets_min": 916 + }, + { + "epoch": 5.537790697674419, + "grad_norm": 0.5357362667911629, + "learning_rate": 5.095785103227835e-06, + "loss": 0.222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10563298314809799, + "step": 3810, + "valid_targets_mean": 3333.4, + "valid_targets_min": 459 + }, + { + "epoch": 5.545058139534884, + "grad_norm": 0.4783226797446443, + "learning_rate": 5.047546570727205e-06, + "loss": 0.2166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08839955925941467, + "step": 3815, + "valid_targets_mean": 3058.4, + "valid_targets_min": 626 + }, + { + "epoch": 5.5523255813953485, + "grad_norm": 0.603397173592423, + "learning_rate": 4.999504452897232e-06, + "loss": 0.2208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14669716358184814, + "step": 3820, + "valid_targets_mean": 3794.2, + "valid_targets_min": 880 + }, + { + "epoch": 5.559593023255814, + "grad_norm": 0.5498293837910059, + "learning_rate": 4.95165938081676e-06, + "loss": 0.2301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15765151381492615, + "step": 3825, + "valid_targets_mean": 3928.4, + "valid_targets_min": 685 + }, + { + "epoch": 5.566860465116279, + "grad_norm": 0.5075169876722437, + "learning_rate": 4.9040119829762246e-06, + "loss": 0.2317, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12888945639133453, + "step": 3830, + "valid_targets_mean": 4282.1, + "valid_targets_min": 800 + }, + { + "epoch": 5.574127906976744, + "grad_norm": 0.553383225462878, + "learning_rate": 4.856562885269427e-06, + "loss": 0.2166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10596640408039093, + "step": 3835, + "valid_targets_mean": 2909.5, + "valid_targets_min": 619 + }, + { + "epoch": 5.5813953488372094, + "grad_norm": 0.7686235169257939, + "learning_rate": 4.809312710985308e-06, + "loss": 0.2366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17210009694099426, + "step": 3840, + "valid_targets_mean": 2873.5, + "valid_targets_min": 484 + }, + { + "epoch": 5.588662790697675, + "grad_norm": 0.554861312349065, + "learning_rate": 4.762262080799771e-06, + "loss": 0.2396, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13465741276741028, + "step": 3845, + "valid_targets_mean": 4018.6, + "valid_targets_min": 735 + }, + { + "epoch": 5.595930232558139, + "grad_norm": 0.46848899820454354, + "learning_rate": 4.715411612767508e-06, + "loss": 0.2127, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11969712376594543, + "step": 3850, + "valid_targets_mean": 4644.1, + "valid_targets_min": 3761 + }, + { + "epoch": 5.603197674418604, + "grad_norm": 0.46405211577543315, + "learning_rate": 4.668761922313893e-06, + "loss": 0.2235, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09280385076999664, + "step": 3855, + "valid_targets_mean": 3501.6, + "valid_targets_min": 602 + }, + { + "epoch": 5.6104651162790695, + "grad_norm": 0.5308472326139155, + "learning_rate": 4.622313622226888e-06, + "loss": 0.2248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0978182926774025, + "step": 3860, + "valid_targets_mean": 3168.2, + "valid_targets_min": 246 + }, + { + "epoch": 5.617732558139535, + "grad_norm": 0.5701733988373806, + "learning_rate": 4.5760673226490245e-06, + "loss": 0.2391, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09062608331441879, + "step": 3865, + "valid_targets_mean": 2962.1, + "valid_targets_min": 506 + }, + { + "epoch": 5.625, + "grad_norm": 0.5600910428886875, + "learning_rate": 4.530023631069342e-06, + "loss": 0.2326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.112250916659832, + "step": 3870, + "valid_targets_mean": 3511.9, + "valid_targets_min": 781 + }, + { + "epoch": 5.632267441860465, + "grad_norm": 0.4847013463582278, + "learning_rate": 4.484183152315435e-06, + "loss": 0.2223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.131978377699852, + "step": 3875, + "valid_targets_mean": 5273.6, + "valid_targets_min": 735 + }, + { + "epoch": 5.6395348837209305, + "grad_norm": 0.5504896810558689, + "learning_rate": 4.438546488545516e-06, + "loss": 0.2206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08465667814016342, + "step": 3880, + "valid_targets_mean": 2705.5, + "valid_targets_min": 550 + }, + { + "epoch": 5.646802325581396, + "grad_norm": 0.4930889515925386, + "learning_rate": 4.393114239240495e-06, + "loss": 0.2206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09775549173355103, + "step": 3885, + "valid_targets_mean": 3736.1, + "valid_targets_min": 486 + }, + { + "epoch": 5.654069767441861, + "grad_norm": 0.6661086629183974, + "learning_rate": 4.347887001196089e-06, + "loss": 0.2185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13360783457756042, + "step": 3890, + "valid_targets_mean": 2850.1, + "valid_targets_min": 447 + }, + { + "epoch": 5.661337209302325, + "grad_norm": 0.49494538033843166, + "learning_rate": 4.302865368515002e-06, + "loss": 0.215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11445395648479462, + "step": 3895, + "valid_targets_mean": 3998.5, + "valid_targets_min": 634 + }, + { + "epoch": 5.6686046511627906, + "grad_norm": 0.5172249268234121, + "learning_rate": 4.2580499325991284e-06, + "loss": 0.2277, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12026748061180115, + "step": 3900, + "valid_targets_mean": 4291.1, + "valid_targets_min": 1215 + }, + { + "epoch": 5.675872093023256, + "grad_norm": 0.584906298842366, + "learning_rate": 4.213441282141762e-06, + "loss": 0.2243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10975706577301025, + "step": 3905, + "valid_targets_mean": 3372.2, + "valid_targets_min": 707 + }, + { + "epoch": 5.683139534883721, + "grad_norm": 0.4644507284510041, + "learning_rate": 4.169040003119871e-06, + "loss": 0.2297, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12260288000106812, + "step": 3910, + "valid_targets_mean": 5018.1, + "valid_targets_min": 2599 + }, + { + "epoch": 5.690406976744186, + "grad_norm": 0.4916359177514743, + "learning_rate": 4.124846678786405e-06, + "loss": 0.2212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1183990091085434, + "step": 3915, + "valid_targets_mean": 4028.6, + "valid_targets_min": 737 + }, + { + "epoch": 5.6976744186046515, + "grad_norm": 0.5389463683484641, + "learning_rate": 4.080861889662642e-06, + "loss": 0.2324, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14097467064857483, + "step": 3920, + "valid_targets_mean": 4007.8, + "valid_targets_min": 745 + }, + { + "epoch": 5.704941860465116, + "grad_norm": 0.5394672937113356, + "learning_rate": 4.037086213530539e-06, + "loss": 0.2401, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1080642119050026, + "step": 3925, + "valid_targets_mean": 3299.9, + "valid_targets_min": 431 + }, + { + "epoch": 5.712209302325581, + "grad_norm": 0.5031746817464193, + "learning_rate": 3.993520225425154e-06, + "loss": 0.235, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09760770946741104, + "step": 3930, + "valid_targets_mean": 3632.9, + "valid_targets_min": 567 + }, + { + "epoch": 5.719476744186046, + "grad_norm": 0.5497879839779796, + "learning_rate": 3.9501644976271095e-06, + "loss": 0.2258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11778296530246735, + "step": 3935, + "valid_targets_mean": 4291.6, + "valid_targets_min": 572 + }, + { + "epoch": 5.726744186046512, + "grad_norm": 0.4776129835677131, + "learning_rate": 3.907019599655044e-06, + "loss": 0.2205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10025591403245926, + "step": 3940, + "valid_targets_mean": 4037.6, + "valid_targets_min": 736 + }, + { + "epoch": 5.734011627906977, + "grad_norm": 0.5246341815995155, + "learning_rate": 3.864086098258153e-06, + "loss": 0.2239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12622231245040894, + "step": 3945, + "valid_targets_mean": 3679.9, + "valid_targets_min": 607 + }, + { + "epoch": 5.741279069767442, + "grad_norm": 0.6086055176247579, + "learning_rate": 3.8213645574087286e-06, + "loss": 0.2194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10311783850193024, + "step": 3950, + "valid_targets_mean": 2590.5, + "valid_targets_min": 633 + }, + { + "epoch": 5.748546511627907, + "grad_norm": 0.45195401508843, + "learning_rate": 3.778855538294779e-06, + "loss": 0.2226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09347233176231384, + "step": 3955, + "valid_targets_mean": 4437.8, + "valid_targets_min": 607 + }, + { + "epoch": 5.7558139534883725, + "grad_norm": 0.6060623213653233, + "learning_rate": 3.736559599312619e-06, + "loss": 0.227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11307326704263687, + "step": 3960, + "valid_targets_mean": 3571.6, + "valid_targets_min": 785 + }, + { + "epoch": 5.763081395348837, + "grad_norm": 0.5002442442836875, + "learning_rate": 3.6944772960595597e-06, + "loss": 0.2268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1352311074733734, + "step": 3965, + "valid_targets_mean": 5457.2, + "valid_targets_min": 3562 + }, + { + "epoch": 5.770348837209302, + "grad_norm": 0.5754115654946838, + "learning_rate": 3.652609181326601e-06, + "loss": 0.2338, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12333385646343231, + "step": 3970, + "valid_targets_mean": 3621.1, + "valid_targets_min": 647 + }, + { + "epoch": 5.777616279069767, + "grad_norm": 0.5231121941154192, + "learning_rate": 3.610955805091185e-06, + "loss": 0.217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09638865292072296, + "step": 3975, + "valid_targets_mean": 3402.5, + "valid_targets_min": 776 + }, + { + "epoch": 5.784883720930233, + "grad_norm": 0.5508341437323052, + "learning_rate": 3.569517714509947e-06, + "loss": 0.2204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13687901198863983, + "step": 3980, + "valid_targets_mean": 4811.4, + "valid_targets_min": 4256 + }, + { + "epoch": 5.792151162790698, + "grad_norm": 0.5254922241131528, + "learning_rate": 3.528295453911541e-06, + "loss": 0.2307, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1139884814620018, + "step": 3985, + "valid_targets_mean": 3759.5, + "valid_targets_min": 676 + }, + { + "epoch": 5.799418604651163, + "grad_norm": 0.6129810405498609, + "learning_rate": 3.4872895647895045e-06, + "loss": 0.229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12669987976551056, + "step": 3990, + "valid_targets_mean": 3018.9, + "valid_targets_min": 432 + }, + { + "epoch": 5.8066860465116275, + "grad_norm": 0.5650351578983417, + "learning_rate": 3.446500585795112e-06, + "loss": 0.2182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1187901720404625, + "step": 3995, + "valid_targets_mean": 3951.6, + "valid_targets_min": 532 + }, + { + "epoch": 5.813953488372093, + "grad_norm": 0.5922160583846725, + "learning_rate": 3.4059290527303256e-06, + "loss": 0.2414, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0975690633058548, + "step": 4000, + "valid_targets_mean": 2933.4, + "valid_targets_min": 543 + }, + { + "epoch": 5.821220930232558, + "grad_norm": 0.5524185962304792, + "learning_rate": 3.3655754985407453e-06, + "loss": 0.2335, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.144732266664505, + "step": 4005, + "valid_targets_mean": 4334.1, + "valid_targets_min": 699 + }, + { + "epoch": 5.828488372093023, + "grad_norm": 0.554719491811411, + "learning_rate": 3.3254404533086216e-06, + "loss": 0.2248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11786288022994995, + "step": 4010, + "valid_targets_mean": 3635.6, + "valid_targets_min": 883 + }, + { + "epoch": 5.835755813953488, + "grad_norm": 0.525008145807584, + "learning_rate": 3.285524444245873e-06, + "loss": 0.2373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13068491220474243, + "step": 4015, + "valid_targets_mean": 4179.9, + "valid_targets_min": 887 + }, + { + "epoch": 5.843023255813954, + "grad_norm": 0.5177901449861642, + "learning_rate": 3.245827995687165e-06, + "loss": 0.2215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1204511895775795, + "step": 4020, + "valid_targets_mean": 4649.1, + "valid_targets_min": 796 + }, + { + "epoch": 5.850290697674419, + "grad_norm": 0.5388393937448382, + "learning_rate": 3.2063516290830445e-06, + "loss": 0.2311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13081350922584534, + "step": 4025, + "valid_targets_mean": 4260.2, + "valid_targets_min": 870 + }, + { + "epoch": 5.857558139534884, + "grad_norm": 0.5869572044002439, + "learning_rate": 3.1670958629930595e-06, + "loss": 0.2328, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09571430832147598, + "step": 4030, + "valid_targets_mean": 1790.9, + "valid_targets_min": 200 + }, + { + "epoch": 5.8648255813953485, + "grad_norm": 0.5432452195757204, + "learning_rate": 3.1280612130789633e-06, + "loss": 0.227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15068915486335754, + "step": 4035, + "valid_targets_mean": 4574.0, + "valid_targets_min": 629 + }, + { + "epoch": 5.872093023255814, + "grad_norm": 0.541853575741418, + "learning_rate": 3.0892481920979355e-06, + "loss": 0.2216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11317667365074158, + "step": 4040, + "valid_targets_mean": 3943.2, + "valid_targets_min": 600 + }, + { + "epoch": 5.879360465116279, + "grad_norm": 0.5979049468845229, + "learning_rate": 3.0506573098958613e-06, + "loss": 0.2299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10876569151878357, + "step": 4045, + "valid_targets_mean": 3525.4, + "valid_targets_min": 555 + }, + { + "epoch": 5.886627906976744, + "grad_norm": 0.5448360093244398, + "learning_rate": 3.0122890734006114e-06, + "loss": 0.222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09871428459882736, + "step": 4050, + "valid_targets_mean": 3317.1, + "valid_targets_min": 615 + }, + { + "epoch": 5.8938953488372094, + "grad_norm": 0.5699959641756522, + "learning_rate": 2.97414398661539e-06, + "loss": 0.2246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10872367769479752, + "step": 4055, + "valid_targets_mean": 3118.6, + "valid_targets_min": 632 + }, + { + "epoch": 5.901162790697675, + "grad_norm": 0.4759006211842608, + "learning_rate": 2.9362225506121357e-06, + "loss": 0.2167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08648262172937393, + "step": 4060, + "valid_targets_mean": 3458.9, + "valid_targets_min": 591 + }, + { + "epoch": 5.908430232558139, + "grad_norm": 0.5187120792922781, + "learning_rate": 2.8985252635249026e-06, + "loss": 0.2189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13931739330291748, + "step": 4065, + "valid_targets_mean": 4865.6, + "valid_targets_min": 690 + }, + { + "epoch": 5.915697674418604, + "grad_norm": 0.44646041509070417, + "learning_rate": 2.8610526205433476e-06, + "loss": 0.2314, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1304367482662201, + "step": 4070, + "valid_targets_mean": 5352.8, + "valid_targets_min": 4351 + }, + { + "epoch": 5.9229651162790695, + "grad_norm": 0.5127187092080205, + "learning_rate": 2.823805113906204e-06, + "loss": 0.218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1429927945137024, + "step": 4075, + "valid_targets_mean": 4904.5, + "valid_targets_min": 3744 + }, + { + "epoch": 5.930232558139535, + "grad_norm": 0.5192855903363226, + "learning_rate": 2.7867832328948385e-06, + "loss": 0.2138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.090521439909935, + "step": 4080, + "valid_targets_mean": 3285.5, + "valid_targets_min": 633 + }, + { + "epoch": 5.9375, + "grad_norm": 0.4794115204926523, + "learning_rate": 2.7499874638268044e-06, + "loss": 0.2265, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12026579678058624, + "step": 4085, + "valid_targets_mean": 5181.1, + "valid_targets_min": 3352 + }, + { + "epoch": 5.944767441860465, + "grad_norm": 0.5009908186316896, + "learning_rate": 2.7134182900494542e-06, + "loss": 0.2314, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09819701313972473, + "step": 4090, + "valid_targets_mean": 3623.4, + "valid_targets_min": 578 + }, + { + "epoch": 5.9520348837209305, + "grad_norm": 0.456827041300496, + "learning_rate": 2.6770761919336098e-06, + "loss": 0.2226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11058850586414337, + "step": 4095, + "valid_targets_mean": 4412.2, + "valid_targets_min": 3777 + }, + { + "epoch": 5.959302325581396, + "grad_norm": 0.5665319932821282, + "learning_rate": 2.640961646867224e-06, + "loss": 0.2412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0975506603717804, + "step": 4100, + "valid_targets_mean": 3505.1, + "valid_targets_min": 734 + }, + { + "epoch": 5.966569767441861, + "grad_norm": 0.5428484473133973, + "learning_rate": 2.605075129249135e-06, + "loss": 0.2221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1216345876455307, + "step": 4105, + "valid_targets_mean": 4187.2, + "valid_targets_min": 608 + }, + { + "epoch": 5.973837209302325, + "grad_norm": 0.6032813933057201, + "learning_rate": 2.5694171104828146e-06, + "loss": 0.2253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10044913738965988, + "step": 4110, + "valid_targets_mean": 2797.5, + "valid_targets_min": 844 + }, + { + "epoch": 5.9811046511627906, + "grad_norm": 0.5147925778815827, + "learning_rate": 2.533988058970198e-06, + "loss": 0.2212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10603871941566467, + "step": 4115, + "valid_targets_mean": 3845.6, + "valid_targets_min": 584 + }, + { + "epoch": 5.988372093023256, + "grad_norm": 0.5327598308654197, + "learning_rate": 2.498788440105506e-06, + "loss": 0.224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11044016480445862, + "step": 4120, + "valid_targets_mean": 4463.9, + "valid_targets_min": 807 + }, + { + "epoch": 5.995639534883721, + "grad_norm": 0.5854269304654619, + "learning_rate": 2.4638187162691487e-06, + "loss": 0.2239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12129954993724823, + "step": 4125, + "valid_targets_mean": 3566.0, + "valid_targets_min": 512 + }, + { + "epoch": 6.002906976744186, + "grad_norm": 0.5313957720724034, + "learning_rate": 2.42907934682165e-06, + "loss": 0.2187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1027924120426178, + "step": 4130, + "valid_targets_mean": 3559.5, + "valid_targets_min": 612 + }, + { + "epoch": 6.0101744186046515, + "grad_norm": 0.5483117826436876, + "learning_rate": 2.3945707880976034e-06, + "loss": 0.218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13372516632080078, + "step": 4135, + "valid_targets_mean": 5047.0, + "valid_targets_min": 631 + }, + { + "epoch": 6.017441860465116, + "grad_norm": 0.5090642043437736, + "learning_rate": 2.36029349339969e-06, + "loss": 0.2308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10333339869976044, + "step": 4140, + "valid_targets_mean": 3638.2, + "valid_targets_min": 698 + }, + { + "epoch": 6.024709302325581, + "grad_norm": 0.5066239118036157, + "learning_rate": 2.3262479129927116e-06, + "loss": 0.2236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11342179775238037, + "step": 4145, + "valid_targets_mean": 4146.6, + "valid_targets_min": 657 + }, + { + "epoch": 6.031976744186046, + "grad_norm": 0.48087895739663883, + "learning_rate": 2.2924344940976975e-06, + "loss": 0.2211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09436848759651184, + "step": 4150, + "valid_targets_mean": 3816.1, + "valid_targets_min": 666 + }, + { + "epoch": 6.039244186046512, + "grad_norm": 0.5537030876418265, + "learning_rate": 2.2588536808859975e-06, + "loss": 0.2125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11316031217575073, + "step": 4155, + "valid_targets_mean": 4260.8, + "valid_targets_min": 727 + }, + { + "epoch": 6.046511627906977, + "grad_norm": 0.5833788098367146, + "learning_rate": 2.225505914473469e-06, + "loss": 0.2101, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12212025374174118, + "step": 4160, + "valid_targets_mean": 3621.4, + "valid_targets_min": 572 + }, + { + "epoch": 6.053779069767442, + "grad_norm": 0.7362174701066857, + "learning_rate": 2.19239163291469e-06, + "loss": 0.2182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09100019931793213, + "step": 4165, + "valid_targets_mean": 3448.5, + "valid_targets_min": 667 + }, + { + "epoch": 6.061046511627907, + "grad_norm": 0.4740406178811528, + "learning_rate": 2.1595112711971835e-06, + "loss": 0.2243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09100145101547241, + "step": 4170, + "valid_targets_mean": 4132.0, + "valid_targets_min": 722 + }, + { + "epoch": 6.068313953488372, + "grad_norm": 0.6161720448401335, + "learning_rate": 2.1268652612357153e-06, + "loss": 0.2347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13534143567085266, + "step": 4175, + "valid_targets_mean": 3147.4, + "valid_targets_min": 602 + }, + { + "epoch": 6.075581395348837, + "grad_norm": 0.5205546045427543, + "learning_rate": 2.0944540318666107e-06, + "loss": 0.2093, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1053255945444107, + "step": 4180, + "valid_targets_mean": 3569.5, + "valid_targets_min": 524 + }, + { + "epoch": 6.082848837209302, + "grad_norm": 0.6116553776699947, + "learning_rate": 2.062278008842147e-06, + "loss": 0.2354, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09495720267295837, + "step": 4185, + "valid_targets_mean": 2838.4, + "valid_targets_min": 591 + }, + { + "epoch": 6.090116279069767, + "grad_norm": 0.5276279180760463, + "learning_rate": 2.030337614824929e-06, + "loss": 0.2242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11515341699123383, + "step": 4190, + "valid_targets_mean": 3900.5, + "valid_targets_min": 548 + }, + { + "epoch": 6.097383720930233, + "grad_norm": 0.5208050337446745, + "learning_rate": 1.9986332693823487e-06, + "loss": 0.2206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07439148426055908, + "step": 4195, + "valid_targets_mean": 2552.5, + "valid_targets_min": 582 + }, + { + "epoch": 6.104651162790698, + "grad_norm": 0.5694598831791498, + "learning_rate": 1.9671653889810893e-06, + "loss": 0.2212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09752744436264038, + "step": 4200, + "valid_targets_mean": 3513.8, + "valid_targets_min": 583 + }, + { + "epoch": 6.111918604651163, + "grad_norm": 0.49122429844668036, + "learning_rate": 1.9359343869816307e-06, + "loss": 0.2173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10171881318092346, + "step": 4205, + "valid_targets_mean": 3564.2, + "valid_targets_min": 707 + }, + { + "epoch": 6.119186046511628, + "grad_norm": 0.5429562792251613, + "learning_rate": 1.9049406736328336e-06, + "loss": 0.2119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1320454478263855, + "step": 4210, + "valid_targets_mean": 3916.1, + "valid_targets_min": 882 + }, + { + "epoch": 6.126453488372093, + "grad_norm": 0.5930470816888453, + "learning_rate": 1.87418465606654e-06, + "loss": 0.2188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.115715891122818, + "step": 4215, + "valid_targets_mean": 3784.2, + "valid_targets_min": 652 + }, + { + "epoch": 6.133720930232558, + "grad_norm": 0.4980643141344037, + "learning_rate": 1.8436667382922468e-06, + "loss": 0.2197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11309342831373215, + "step": 4220, + "valid_targets_mean": 4678.9, + "valid_targets_min": 1008 + }, + { + "epoch": 6.140988372093023, + "grad_norm": 0.5369540725587354, + "learning_rate": 1.8133873211917686e-06, + "loss": 0.2214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11754552274942398, + "step": 4225, + "valid_targets_mean": 4220.1, + "valid_targets_min": 1120 + }, + { + "epoch": 6.148255813953488, + "grad_norm": 0.5125697181326947, + "learning_rate": 1.783346802514001e-06, + "loss": 0.2334, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10513949394226074, + "step": 4230, + "valid_targets_mean": 3621.4, + "valid_targets_min": 642 + }, + { + "epoch": 6.155523255813954, + "grad_norm": 0.5837456425846023, + "learning_rate": 1.7535455768696686e-06, + "loss": 0.2243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1067032665014267, + "step": 4235, + "valid_targets_mean": 3622.5, + "valid_targets_min": 537 + }, + { + "epoch": 6.162790697674419, + "grad_norm": 0.6370646912449455, + "learning_rate": 1.7239840357261695e-06, + "loss": 0.2234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10105766355991364, + "step": 4240, + "valid_targets_mean": 2271.6, + "valid_targets_min": 684 + }, + { + "epoch": 6.170058139534884, + "grad_norm": 0.6028206483589204, + "learning_rate": 1.6946625674024053e-06, + "loss": 0.2216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12402664870023727, + "step": 4245, + "valid_targets_mean": 3750.8, + "valid_targets_min": 635 + }, + { + "epoch": 6.1773255813953485, + "grad_norm": 0.48050804435313044, + "learning_rate": 1.6655815570637002e-06, + "loss": 0.2249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13276661932468414, + "step": 4250, + "valid_targets_mean": 5509.6, + "valid_targets_min": 3861 + }, + { + "epoch": 6.184593023255814, + "grad_norm": 0.5069577336702119, + "learning_rate": 1.636741386716727e-06, + "loss": 0.2216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10672836005687714, + "step": 4255, + "valid_targets_mean": 3849.8, + "valid_targets_min": 559 + }, + { + "epoch": 6.191860465116279, + "grad_norm": 0.5080026242663115, + "learning_rate": 1.6081424352045093e-06, + "loss": 0.2165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09645294398069382, + "step": 4260, + "valid_targets_mean": 4101.6, + "valid_targets_min": 737 + }, + { + "epoch": 6.199127906976744, + "grad_norm": 0.5273845094067898, + "learning_rate": 1.5797850782014236e-06, + "loss": 0.2177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1336199790239334, + "step": 4265, + "valid_targets_mean": 4568.8, + "valid_targets_min": 596 + }, + { + "epoch": 6.2063953488372094, + "grad_norm": 0.4797757782978097, + "learning_rate": 1.5516696882082704e-06, + "loss": 0.2202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08058105409145355, + "step": 4270, + "valid_targets_mean": 3192.5, + "valid_targets_min": 662 + }, + { + "epoch": 6.213662790697675, + "grad_norm": 0.5417432832678817, + "learning_rate": 1.5237966345473942e-06, + "loss": 0.2238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09182320535182953, + "step": 4275, + "valid_targets_mean": 3194.9, + "valid_targets_min": 803 + }, + { + "epoch": 6.22093023255814, + "grad_norm": 0.554513792637474, + "learning_rate": 1.4961662833578117e-06, + "loss": 0.2218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11547038704156876, + "step": 4280, + "valid_targets_mean": 3828.1, + "valid_targets_min": 391 + }, + { + "epoch": 6.228197674418604, + "grad_norm": 0.5063980695402167, + "learning_rate": 1.4687789975904188e-06, + "loss": 0.2269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10384702682495117, + "step": 4285, + "valid_targets_mean": 4186.2, + "valid_targets_min": 771 + }, + { + "epoch": 6.2354651162790695, + "grad_norm": 0.5851372180371623, + "learning_rate": 1.4416351370032077e-06, + "loss": 0.2202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12599419057369232, + "step": 4290, + "valid_targets_mean": 4214.8, + "valid_targets_min": 701 + }, + { + "epoch": 6.242732558139535, + "grad_norm": 0.49149694835880525, + "learning_rate": 1.4147350581565644e-06, + "loss": 0.2265, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09488339722156525, + "step": 4295, + "valid_targets_mean": 4413.6, + "valid_targets_min": 3553 + }, + { + "epoch": 6.25, + "grad_norm": 0.5118955700863174, + "learning_rate": 1.3880791144085582e-06, + "loss": 0.2201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10951229184865952, + "step": 4300, + "valid_targets_mean": 4576.1, + "valid_targets_min": 731 + }, + { + "epoch": 6.257267441860465, + "grad_norm": 0.5463767287892208, + "learning_rate": 1.3616676559103104e-06, + "loss": 0.2205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10446491092443466, + "step": 4305, + "valid_targets_mean": 3518.8, + "valid_targets_min": 438 + }, + { + "epoch": 6.2645348837209305, + "grad_norm": 0.49764498999090684, + "learning_rate": 1.3355010296014114e-06, + "loss": 0.2214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08116332441568375, + "step": 4310, + "valid_targets_mean": 2714.1, + "valid_targets_min": 623 + }, + { + "epoch": 6.271802325581396, + "grad_norm": 0.5963734494507665, + "learning_rate": 1.3095795792053333e-06, + "loss": 0.2213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11170788109302521, + "step": 4315, + "valid_targets_mean": 2810.2, + "valid_targets_min": 533 + }, + { + "epoch": 6.27906976744186, + "grad_norm": 0.5953286940405174, + "learning_rate": 1.2839036452249354e-06, + "loss": 0.2246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10700145363807678, + "step": 4320, + "valid_targets_mean": 2889.1, + "valid_targets_min": 519 + }, + { + "epoch": 6.286337209302325, + "grad_norm": 0.50442415172854, + "learning_rate": 1.258473564937981e-06, + "loss": 0.2174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09842683374881744, + "step": 4325, + "valid_targets_mean": 3742.9, + "valid_targets_min": 498 + }, + { + "epoch": 6.2936046511627906, + "grad_norm": 0.5110182940973045, + "learning_rate": 1.2332896723927257e-06, + "loss": 0.2206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1370856612920761, + "step": 4330, + "valid_targets_mean": 4617.8, + "valid_targets_min": 603 + }, + { + "epoch": 6.300872093023256, + "grad_norm": 0.5524502877034365, + "learning_rate": 1.2083522984035012e-06, + "loss": 0.2214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10689415782690048, + "step": 4335, + "valid_targets_mean": 4125.2, + "valid_targets_min": 687 + }, + { + "epoch": 6.308139534883721, + "grad_norm": 0.5041840537184159, + "learning_rate": 1.183661770546387e-06, + "loss": 0.2205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1000676080584526, + "step": 4340, + "valid_targets_mean": 4452.4, + "valid_targets_min": 634 + }, + { + "epoch": 6.315406976744186, + "grad_norm": 0.5507381719553612, + "learning_rate": 1.1592184131549056e-06, + "loss": 0.2246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10032293200492859, + "step": 4345, + "valid_targets_mean": 3549.0, + "valid_targets_min": 799 + }, + { + "epoch": 6.3226744186046515, + "grad_norm": 0.6529547645313896, + "learning_rate": 1.1350225473157672e-06, + "loss": 0.2159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11068548262119293, + "step": 4350, + "valid_targets_mean": 2916.9, + "valid_targets_min": 594 + }, + { + "epoch": 6.329941860465116, + "grad_norm": 0.5210812553212651, + "learning_rate": 1.1110744908646365e-06, + "loss": 0.2126, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0965781882405281, + "step": 4355, + "valid_targets_mean": 3368.5, + "valid_targets_min": 430 + }, + { + "epoch": 6.337209302325581, + "grad_norm": 0.5332519718749112, + "learning_rate": 1.0873745583819661e-06, + "loss": 0.2242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1284990906715393, + "step": 4360, + "valid_targets_mean": 4401.9, + "valid_targets_min": 506 + }, + { + "epoch": 6.344476744186046, + "grad_norm": 0.591577869670093, + "learning_rate": 1.063923061188874e-06, + "loss": 0.2175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09908381849527359, + "step": 4365, + "valid_targets_mean": 3131.2, + "valid_targets_min": 692 + }, + { + "epoch": 6.351744186046512, + "grad_norm": 0.491072911458214, + "learning_rate": 1.040720307343035e-06, + "loss": 0.2226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11375901103019714, + "step": 4370, + "valid_targets_mean": 3776.2, + "valid_targets_min": 611 + }, + { + "epoch": 6.359011627906977, + "grad_norm": 0.4770207504307759, + "learning_rate": 1.017766601634651e-06, + "loss": 0.2207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10760487616062164, + "step": 4375, + "valid_targets_mean": 4155.9, + "valid_targets_min": 955 + }, + { + "epoch": 6.366279069767442, + "grad_norm": 0.5874644357170348, + "learning_rate": 9.95062245582432e-07, + "loss": 0.218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11503146588802338, + "step": 4380, + "valid_targets_mean": 3213.0, + "valid_targets_min": 708 + }, + { + "epoch": 6.373546511627907, + "grad_norm": 0.5843827386614309, + "learning_rate": 9.72607537429655e-07, + "loss": 0.2113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08797569572925568, + "step": 4385, + "valid_targets_mean": 2145.5, + "valid_targets_min": 603 + }, + { + "epoch": 6.3808139534883725, + "grad_norm": 0.5332039227041807, + "learning_rate": 9.504027721402264e-07, + "loss": 0.2222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10198168456554413, + "step": 4390, + "valid_targets_mean": 3549.4, + "valid_targets_min": 646 + }, + { + "epoch": 6.388081395348837, + "grad_norm": 0.5561080394886057, + "learning_rate": 9.284482413948148e-07, + "loss": 0.2213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09928165376186371, + "step": 4395, + "valid_targets_mean": 3426.6, + "valid_targets_min": 498 + }, + { + "epoch": 6.395348837209302, + "grad_norm": 0.5387550513714358, + "learning_rate": 9.067442335870313e-07, + "loss": 0.2175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11906737089157104, + "step": 4400, + "valid_targets_mean": 4181.0, + "valid_targets_min": 700 + }, + { + "epoch": 6.402616279069767, + "grad_norm": 0.5325231325865825, + "learning_rate": 8.852910338196152e-07, + "loss": 0.2141, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11549727618694305, + "step": 4405, + "valid_targets_mean": 3811.0, + "valid_targets_min": 755 + }, + { + "epoch": 6.409883720930233, + "grad_norm": 0.6373446805590439, + "learning_rate": 8.640889239007166e-07, + "loss": 0.222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14769718050956726, + "step": 4410, + "valid_targets_mean": 4159.0, + "valid_targets_min": 647 + }, + { + "epoch": 6.417151162790698, + "grad_norm": 0.5824414070647802, + "learning_rate": 8.431381823401708e-07, + "loss": 0.2209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12444661557674408, + "step": 4415, + "valid_targets_mean": 4532.4, + "valid_targets_min": 709 + }, + { + "epoch": 6.424418604651163, + "grad_norm": 0.5213493346535095, + "learning_rate": 8.224390843458652e-07, + "loss": 0.2207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08908454328775406, + "step": 4420, + "valid_targets_mean": 3492.6, + "valid_targets_min": 784 + }, + { + "epoch": 6.4316860465116275, + "grad_norm": 0.5660283158101067, + "learning_rate": 8.019919018201005e-07, + "loss": 0.22, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07104974240064621, + "step": 4425, + "valid_targets_mean": 2456.0, + "valid_targets_min": 417 + }, + { + "epoch": 6.438953488372093, + "grad_norm": 0.5321869433718921, + "learning_rate": 7.817969033560246e-07, + "loss": 0.2226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11819642782211304, + "step": 4430, + "valid_targets_mean": 4258.0, + "valid_targets_min": 446 + }, + { + "epoch": 6.446220930232558, + "grad_norm": 0.4867961180366948, + "learning_rate": 7.618543542341217e-07, + "loss": 0.2168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0961524099111557, + "step": 4435, + "valid_targets_mean": 3816.1, + "valid_targets_min": 599 + }, + { + "epoch": 6.453488372093023, + "grad_norm": 0.5508115967507313, + "learning_rate": 7.421645164187019e-07, + "loss": 0.2207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07896394282579422, + "step": 4440, + "valid_targets_mean": 2026.9, + "valid_targets_min": 671 + }, + { + "epoch": 6.460755813953488, + "grad_norm": 0.5667325021853805, + "learning_rate": 7.227276485544798e-07, + "loss": 0.2136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12289373576641083, + "step": 4445, + "valid_targets_mean": 3741.6, + "valid_targets_min": 617 + }, + { + "epoch": 6.468023255813954, + "grad_norm": 0.5400905479292796, + "learning_rate": 7.035440059631616e-07, + "loss": 0.2087, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0943833738565445, + "step": 4450, + "valid_targets_mean": 3401.0, + "valid_targets_min": 485 + }, + { + "epoch": 6.475290697674419, + "grad_norm": 0.5008963346942175, + "learning_rate": 6.846138406401137e-07, + "loss": 0.224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09646037220954895, + "step": 4455, + "valid_targets_mean": 3926.9, + "valid_targets_min": 679 + }, + { + "epoch": 6.482558139534884, + "grad_norm": 0.6244635618997402, + "learning_rate": 6.6593740125102e-07, + "loss": 0.2215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12762530148029327, + "step": 4460, + "valid_targets_mean": 3475.9, + "valid_targets_min": 561 + }, + { + "epoch": 6.4898255813953485, + "grad_norm": 0.6847427969655168, + "learning_rate": 6.475149331286457e-07, + "loss": 0.2219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10304777324199677, + "step": 4465, + "valid_targets_mean": 2130.4, + "valid_targets_min": 689 + }, + { + "epoch": 6.497093023255814, + "grad_norm": 0.4907430697763535, + "learning_rate": 6.293466782696001e-07, + "loss": 0.2244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09903063625097275, + "step": 4470, + "valid_targets_mean": 4228.6, + "valid_targets_min": 858 + }, + { + "epoch": 6.504360465116279, + "grad_norm": 0.4878310457069805, + "learning_rate": 6.114328753311572e-07, + "loss": 0.2307, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08721013367176056, + "step": 4475, + "valid_targets_mean": 3588.0, + "valid_targets_min": 589 + }, + { + "epoch": 6.511627906976744, + "grad_norm": 0.6392602373957003, + "learning_rate": 5.937737596281223e-07, + "loss": 0.2319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1122431606054306, + "step": 4480, + "valid_targets_mean": 2587.9, + "valid_targets_min": 468 + }, + { + "epoch": 6.5188953488372094, + "grad_norm": 0.6956929712450983, + "learning_rate": 5.763695631297483e-07, + "loss": 0.2294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1025511771440506, + "step": 4485, + "valid_targets_mean": 2927.0, + "valid_targets_min": 710 + }, + { + "epoch": 6.526162790697675, + "grad_norm": 0.5589149132557204, + "learning_rate": 5.592205144566753e-07, + "loss": 0.2073, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11511734127998352, + "step": 4490, + "valid_targets_mean": 4215.8, + "valid_targets_min": 260 + }, + { + "epoch": 6.533430232558139, + "grad_norm": 0.5601364320314534, + "learning_rate": 5.423268388779424e-07, + "loss": 0.2227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10099512338638306, + "step": 4495, + "valid_targets_mean": 3608.0, + "valid_targets_min": 665 + }, + { + "epoch": 6.540697674418604, + "grad_norm": 0.4749372904201485, + "learning_rate": 5.256887583080094e-07, + "loss": 0.2109, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10849292576313019, + "step": 4500, + "valid_targets_mean": 4887.2, + "valid_targets_min": 4227 + }, + { + "epoch": 6.5479651162790695, + "grad_norm": 0.517398445885059, + "learning_rate": 5.093064913038648e-07, + "loss": 0.2118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0925350934267044, + "step": 4505, + "valid_targets_mean": 3727.9, + "valid_targets_min": 693 + }, + { + "epoch": 6.555232558139535, + "grad_norm": 0.4669579345900138, + "learning_rate": 4.931802530621376e-07, + "loss": 0.2096, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10471238940954208, + "step": 4510, + "valid_targets_mean": 4466.2, + "valid_targets_min": 721 + }, + { + "epoch": 6.5625, + "grad_norm": 0.5158823636602122, + "learning_rate": 4.773102554162768e-07, + "loss": 0.2288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09145092964172363, + "step": 4515, + "valid_targets_mean": 3328.9, + "valid_targets_min": 439 + }, + { + "epoch": 6.569767441860465, + "grad_norm": 0.6807986618835836, + "learning_rate": 4.61696706833763e-07, + "loss": 0.215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12569642066955566, + "step": 4520, + "valid_targets_mean": 3699.1, + "valid_targets_min": 623 + }, + { + "epoch": 6.5770348837209305, + "grad_norm": 0.5600951397322557, + "learning_rate": 4.4633981241338333e-07, + "loss": 0.2195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13266706466674805, + "step": 4525, + "valid_targets_mean": 4428.1, + "valid_targets_min": 238 + }, + { + "epoch": 6.584302325581396, + "grad_norm": 0.5827853749348123, + "learning_rate": 4.312397738825236e-07, + "loss": 0.2258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12651139497756958, + "step": 4530, + "valid_targets_mean": 3696.9, + "valid_targets_min": 587 + }, + { + "epoch": 6.591569767441861, + "grad_norm": 0.5438654554473624, + "learning_rate": 4.163967895945242e-07, + "loss": 0.2183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12943975627422333, + "step": 4535, + "valid_targets_mean": 4721.0, + "valid_targets_min": 857 + }, + { + "epoch": 6.598837209302325, + "grad_norm": 0.4830117424828023, + "learning_rate": 4.0181105452607563e-07, + "loss": 0.2142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11082278192043304, + "step": 4540, + "valid_targets_mean": 4415.0, + "valid_targets_min": 3320 + }, + { + "epoch": 6.6061046511627906, + "grad_norm": 0.5336132946381207, + "learning_rate": 3.874827602746556e-07, + "loss": 0.2184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12187427282333374, + "step": 4545, + "valid_targets_mean": 4065.5, + "valid_targets_min": 577 + }, + { + "epoch": 6.613372093023256, + "grad_norm": 0.5781379453512802, + "learning_rate": 3.734120950560116e-07, + "loss": 0.2242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1307642161846161, + "step": 4550, + "valid_targets_mean": 3438.9, + "valid_targets_min": 337 + }, + { + "epoch": 6.620639534883721, + "grad_norm": 0.5505669899387781, + "learning_rate": 3.5959924370168487e-07, + "loss": 0.2143, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08358632028102875, + "step": 4555, + "valid_targets_mean": 3144.0, + "valid_targets_min": 596 + }, + { + "epoch": 6.627906976744186, + "grad_norm": 0.5629291249358289, + "learning_rate": 3.4604438765659445e-07, + "loss": 0.2226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09669582545757294, + "step": 4560, + "valid_targets_mean": 2941.8, + "valid_targets_min": 529 + }, + { + "epoch": 6.6351744186046515, + "grad_norm": 0.5841625684995547, + "learning_rate": 3.3274770497664365e-07, + "loss": 0.2208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13941574096679688, + "step": 4565, + "valid_targets_mean": 4221.9, + "valid_targets_min": 869 + }, + { + "epoch": 6.642441860465116, + "grad_norm": 0.5442309971971446, + "learning_rate": 3.1970937032638206e-07, + "loss": 0.2171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08730822801589966, + "step": 4570, + "valid_targets_mean": 3145.8, + "valid_targets_min": 541 + }, + { + "epoch": 6.649709302325581, + "grad_norm": 0.6272064657031894, + "learning_rate": 3.0692955497670705e-07, + "loss": 0.2187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10662917047739029, + "step": 4575, + "valid_targets_mean": 3310.1, + "valid_targets_min": 756 + }, + { + "epoch": 6.656976744186046, + "grad_norm": 0.5451476495831158, + "learning_rate": 2.944084268026326e-07, + "loss": 0.2285, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10781296342611313, + "step": 4580, + "valid_targets_mean": 2932.6, + "valid_targets_min": 589 + }, + { + "epoch": 6.664244186046512, + "grad_norm": 0.4471826485431661, + "learning_rate": 2.821461502810641e-07, + "loss": 0.2193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12207210063934326, + "step": 4585, + "valid_targets_mean": 4912.2, + "valid_targets_min": 4070 + }, + { + "epoch": 6.671511627906977, + "grad_norm": 0.5301911172654994, + "learning_rate": 2.701428864886402e-07, + "loss": 0.211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1026616096496582, + "step": 4590, + "valid_targets_mean": 3659.4, + "valid_targets_min": 751 + }, + { + "epoch": 6.678779069767442, + "grad_norm": 0.7337321372097355, + "learning_rate": 2.583987930996279e-07, + "loss": 0.2204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10018828511238098, + "step": 4595, + "valid_targets_mean": 4107.9, + "valid_targets_min": 1046 + }, + { + "epoch": 6.686046511627907, + "grad_norm": 0.6997967805226685, + "learning_rate": 2.469140243838464e-07, + "loss": 0.2308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11243093013763428, + "step": 4600, + "valid_targets_mean": 2284.9, + "valid_targets_min": 521 + }, + { + "epoch": 6.6933139534883725, + "grad_norm": 0.5247933139316745, + "learning_rate": 2.3568873120462854e-07, + "loss": 0.2237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09087327122688293, + "step": 4605, + "valid_targets_mean": 3116.1, + "valid_targets_min": 581 + }, + { + "epoch": 6.700581395348837, + "grad_norm": 0.5013207071106418, + "learning_rate": 2.247230610168627e-07, + "loss": 0.2172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12086968123912811, + "step": 4610, + "valid_targets_mean": 4454.2, + "valid_targets_min": 3719 + }, + { + "epoch": 6.707848837209302, + "grad_norm": 0.49047249454640285, + "learning_rate": 2.1401715786503408e-07, + "loss": 0.2064, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10399701446294785, + "step": 4615, + "valid_targets_mean": 4197.2, + "valid_targets_min": 968 + }, + { + "epoch": 6.715116279069767, + "grad_norm": 0.5270903466326124, + "learning_rate": 2.0357116238134633e-07, + "loss": 0.2172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09298289567232132, + "step": 4620, + "valid_targets_mean": 3860.4, + "valid_targets_min": 599 + }, + { + "epoch": 6.722383720930233, + "grad_norm": 0.5686367907075806, + "learning_rate": 1.9338521178386304e-07, + "loss": 0.2203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13162291049957275, + "step": 4625, + "valid_targets_mean": 4046.5, + "valid_targets_min": 673 + }, + { + "epoch": 6.729651162790698, + "grad_norm": 0.5106997928463469, + "learning_rate": 1.8345943987471804e-07, + "loss": 0.2162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.121708944439888, + "step": 4630, + "valid_targets_mean": 4563.0, + "valid_targets_min": 3324 + }, + { + "epoch": 6.736918604651163, + "grad_norm": 0.5831411162492427, + "learning_rate": 1.7379397703834788e-07, + "loss": 0.2122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11596697568893433, + "step": 4635, + "valid_targets_mean": 3143.0, + "valid_targets_min": 696 + }, + { + "epoch": 6.7441860465116275, + "grad_norm": 0.5961973276477247, + "learning_rate": 1.643889502397844e-07, + "loss": 0.2259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.129440039396286, + "step": 4640, + "valid_targets_mean": 2991.0, + "valid_targets_min": 630 + }, + { + "epoch": 6.751453488372093, + "grad_norm": 0.5907922312322912, + "learning_rate": 1.5524448302297822e-07, + "loss": 0.2249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13513025641441345, + "step": 4645, + "valid_targets_mean": 4267.2, + "valid_targets_min": 834 + }, + { + "epoch": 6.758720930232558, + "grad_norm": 0.5781430139225441, + "learning_rate": 1.4636069550919118e-07, + "loss": 0.2201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13518787920475006, + "step": 4650, + "valid_targets_mean": 4008.6, + "valid_targets_min": 633 + }, + { + "epoch": 6.765988372093023, + "grad_norm": 0.6234589323531933, + "learning_rate": 1.3773770439540646e-07, + "loss": 0.2267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12242134660482407, + "step": 4655, + "valid_targets_mean": 3451.1, + "valid_targets_min": 562 + }, + { + "epoch": 6.773255813953488, + "grad_norm": 0.5007798140831097, + "learning_rate": 1.2937562295279648e-07, + "loss": 0.2163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09039414674043655, + "step": 4660, + "valid_targets_mean": 3912.1, + "valid_targets_min": 882 + }, + { + "epoch": 6.780523255813954, + "grad_norm": 0.5885595688241299, + "learning_rate": 1.2127456102523748e-07, + "loss": 0.2263, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10255330055952072, + "step": 4665, + "valid_targets_mean": 2942.2, + "valid_targets_min": 464 + }, + { + "epoch": 6.787790697674419, + "grad_norm": 0.562684289446446, + "learning_rate": 1.1343462502787506e-07, + "loss": 0.2252, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13460709154605865, + "step": 4670, + "valid_targets_mean": 3817.2, + "valid_targets_min": 600 + }, + { + "epoch": 6.795058139534884, + "grad_norm": 0.6026537059681291, + "learning_rate": 1.0585591794570527e-07, + "loss": 0.2193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1313938945531845, + "step": 4675, + "valid_targets_mean": 3602.8, + "valid_targets_min": 763 + }, + { + "epoch": 6.8023255813953485, + "grad_norm": 0.5853621048092735, + "learning_rate": 9.853853933224244e-08, + "loss": 0.2213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10932928323745728, + "step": 4680, + "valid_targets_mean": 3041.2, + "valid_targets_min": 698 + }, + { + "epoch": 6.809593023255814, + "grad_norm": 0.6080383955492051, + "learning_rate": 9.14825853082002e-08, + "loss": 0.2186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11644819378852844, + "step": 4685, + "valid_targets_mean": 3385.8, + "valid_targets_min": 605 + }, + { + "epoch": 6.816860465116279, + "grad_norm": 0.4775678543427417, + "learning_rate": 8.468814856023466e-08, + "loss": 0.2116, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1143902912735939, + "step": 4690, + "valid_targets_mean": 5123.6, + "valid_targets_min": 3314 + }, + { + "epoch": 6.824127906976744, + "grad_norm": 0.528062844579276, + "learning_rate": 7.815531833972323e-08, + "loss": 0.2293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10117469727993011, + "step": 4695, + "valid_targets_mean": 3858.6, + "valid_targets_min": 412 + }, + { + "epoch": 6.8313953488372094, + "grad_norm": 0.5341213195324499, + "learning_rate": 7.188418046158996e-08, + "loss": 0.2206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1261979341506958, + "step": 4700, + "valid_targets_mean": 4170.6, + "valid_targets_min": 622 + }, + { + "epoch": 6.838662790697675, + "grad_norm": 0.5077496161516507, + "learning_rate": 6.587481730318645e-08, + "loss": 0.2113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11185050010681152, + "step": 4705, + "valid_targets_mean": 4248.8, + "valid_targets_min": 1089 + }, + { + "epoch": 6.845930232558139, + "grad_norm": 0.4752393870528188, + "learning_rate": 6.012730780320163e-08, + "loss": 0.2271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12355662882328033, + "step": 4710, + "valid_targets_mean": 5046.0, + "valid_targets_min": 934 + }, + { + "epoch": 6.853197674418604, + "grad_norm": 0.5391686062982739, + "learning_rate": 5.4641727460631455e-08, + "loss": 0.2208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11547373980283737, + "step": 4715, + "valid_targets_mean": 3947.9, + "valid_targets_min": 562 + }, + { + "epoch": 6.8604651162790695, + "grad_norm": 0.5353702001037038, + "learning_rate": 4.941814833377745e-08, + "loss": 0.2174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1061892881989479, + "step": 4720, + "valid_targets_mean": 3186.4, + "valid_targets_min": 745 + }, + { + "epoch": 6.867732558139535, + "grad_norm": 0.5141334293805976, + "learning_rate": 4.445663903931419e-08, + "loss": 0.2141, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07714653015136719, + "step": 4725, + "valid_targets_mean": 3283.6, + "valid_targets_min": 512 + }, + { + "epoch": 6.875, + "grad_norm": 0.6642814032204781, + "learning_rate": 3.975726475137443e-08, + "loss": 0.2245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1309489905834198, + "step": 4730, + "valid_targets_mean": 2474.1, + "valid_targets_min": 522 + }, + { + "epoch": 6.882267441860465, + "grad_norm": 0.6511885413405972, + "learning_rate": 3.5320087200698686e-08, + "loss": 0.226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14132645726203918, + "step": 4735, + "valid_targets_mean": 3458.9, + "valid_targets_min": 672 + }, + { + "epoch": 6.8895348837209305, + "grad_norm": 0.635448048551765, + "learning_rate": 3.114516467383144e-08, + "loss": 0.2203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10060547292232513, + "step": 4740, + "valid_targets_mean": 2998.4, + "valid_targets_min": 520 + }, + { + "epoch": 6.896802325581396, + "grad_norm": 0.5539102046278637, + "learning_rate": 2.7232552012339542e-08, + "loss": 0.2222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08430768549442291, + "step": 4745, + "valid_targets_mean": 2591.6, + "valid_targets_min": 300 + }, + { + "epoch": 6.904069767441861, + "grad_norm": 0.48564974306250575, + "learning_rate": 2.358230061210387e-08, + "loss": 0.2166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12091777473688126, + "step": 4750, + "valid_targets_mean": 4487.0, + "valid_targets_min": 1123 + }, + { + "epoch": 6.911337209302325, + "grad_norm": 0.486240918272119, + "learning_rate": 2.0194458422646557e-08, + "loss": 0.2075, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09467769414186478, + "step": 4755, + "valid_targets_mean": 3418.2, + "valid_targets_min": 744 + }, + { + "epoch": 6.9186046511627906, + "grad_norm": 0.5006300347186905, + "learning_rate": 1.706906994648705e-08, + "loss": 0.2144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11444921046495438, + "step": 4760, + "valid_targets_mean": 4181.8, + "valid_targets_min": 673 + }, + { + "epoch": 6.925872093023256, + "grad_norm": 0.5326767776519343, + "learning_rate": 1.4206176238571457e-08, + "loss": 0.2132, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09992027282714844, + "step": 4765, + "valid_targets_mean": 3968.4, + "valid_targets_min": 807 + }, + { + "epoch": 6.933139534883721, + "grad_norm": 0.6524069067054485, + "learning_rate": 1.160581490572632e-08, + "loss": 0.2348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1270563006401062, + "step": 4770, + "valid_targets_mean": 3597.4, + "valid_targets_min": 778 + }, + { + "epoch": 6.940406976744186, + "grad_norm": 0.5455272591274605, + "learning_rate": 9.268020106167896e-09, + "loss": 0.2142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09810951352119446, + "step": 4775, + "valid_targets_mean": 3681.2, + "valid_targets_min": 599 + }, + { + "epoch": 6.9476744186046515, + "grad_norm": 0.5273676324333582, + "learning_rate": 7.192822549046963e-09, + "loss": 0.2155, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11775662750005722, + "step": 4780, + "valid_targets_mean": 4525.5, + "valid_targets_min": 634 + }, + { + "epoch": 6.954941860465116, + "grad_norm": 0.5131194731831497, + "learning_rate": 5.38024949405358e-09, + "loss": 0.2268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09795769304037094, + "step": 4785, + "valid_targets_mean": 3921.0, + "valid_targets_min": 696 + }, + { + "epoch": 6.962209302325581, + "grad_norm": 0.5857771734829496, + "learning_rate": 3.8303247510596e-09, + "loss": 0.2196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12851554155349731, + "step": 4790, + "valid_targets_mean": 4447.5, + "valid_targets_min": 1117 + }, + { + "epoch": 6.969476744186046, + "grad_norm": 0.5688605253517679, + "learning_rate": 2.5430686797944803e-09, + "loss": 0.2158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09657136350870132, + "step": 4795, + "valid_targets_mean": 3530.4, + "valid_targets_min": 874 + }, + { + "epoch": 6.976744186046512, + "grad_norm": 0.6648295757131324, + "learning_rate": 1.5184981895899342e-09, + "loss": 0.2137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1428152322769165, + "step": 4800, + "valid_targets_mean": 3244.0, + "valid_targets_min": 636 + }, + { + "epoch": 6.984011627906977, + "grad_norm": 0.5347563819472954, + "learning_rate": 7.566267391512228e-10, + "loss": 0.2238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09766152501106262, + "step": 4805, + "valid_targets_mean": 3221.5, + "valid_targets_min": 571 + }, + { + "epoch": 6.991279069767442, + "grad_norm": 0.5588348867166488, + "learning_rate": 2.574643363839613e-10, + "loss": 0.2351, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13627320528030396, + "step": 4810, + "valid_targets_mean": 3636.0, + "valid_targets_min": 547 + }, + { + "epoch": 6.998546511627907, + "grad_norm": 0.5159770970836671, + "learning_rate": 2.1017538260892367e-11, + "loss": 0.2106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08966579288244247, + "step": 4815, + "valid_targets_mean": 3309.4, + "valid_targets_min": 567 + }, + { + "epoch": 7.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12460079044103622, + "step": 4816, + "total_flos": 2.3699064220633006e+18, + "train_loss": 0.2668512877930834, + "train_runtime": 60502.9208, + "train_samples_per_second": 1.273, + "train_steps_per_second": 0.08, + "valid_targets_mean": 4185.4, + "valid_targets_min": 557 + } + ], + "logging_steps": 5, + "max_steps": 4816, + "num_input_tokens_seen": 0, + "num_train_epochs": 7, + "save_steps": 1500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 2.3699064220633006e+18, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..398112a --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f98ed4e5da804586bf2e8a4c939209b0b5ec6f6522d7b0926fffefb7374f1785 +size 8657 diff --git a/training_loss.png b/training_loss.png new file mode 100644 index 0000000..f5ebfe0 Binary files /dev/null and b/training_loss.png differ diff --git a/vocab.json b/vocab.json new file mode 100644 index 0000000..6c49fc6 --- /dev/null +++ b/vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910 +size 2776833