commit c283208a36839659bf923093242d7956e1106e38 Author: ModelHub XC Date: Sat Jun 13 01:32:12 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: laion/exp-syh-r2egym-askllm-hardened_glm_4_7_traces_jupiter Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..183f36c --- /dev/null +++ b/.gitattributes @@ -0,0 +1,56 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text + + +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zstandard filter=lfs diff=lfs merge=lfs -text +*.tfevents* filter=lfs diff=lfs merge=lfs -text +*.db* filter=lfs diff=lfs merge=lfs -text +*.ark* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text + +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.gguf* filter=lfs diff=lfs merge=lfs -text +*.ggml filter=lfs diff=lfs merge=lfs -text +*.llamafile* filter=lfs diff=lfs merge=lfs -text +*.pt2 filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text + +merges.txt filter=lfs diff=lfs merge=lfs -text +training_args.bin filter=lfs diff=lfs merge=lfs -text +model-00004-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text +vocab.json filter=lfs diff=lfs merge=lfs -text +model-00001-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text +model-00002-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text +model-00003-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..0b38fcb --- /dev/null +++ b/README.md @@ -0,0 +1,61 @@ +--- +library_name: transformers +license: apache-2.0 +base_model: Qwen/Qwen3-8B +tags: +- llama-factory +- full +- generated_from_trainer +model-index: +- name: exp-syh-r2egym-askllm-hardened_glm_4_7_traces_jupiter + results: [] +--- + + + +# exp-syh-r2egym-askllm-hardened_glm_4_7_traces_jupiter + +This model is a fine-tuned version of [Qwen/Qwen3-8B](https://huggingface.co/Qwen/Qwen3-8B) on the /data/cat/ws/befe330h-befe330h-otagent/huggingface/hub/datasets--DCAgent--exp-syh-r2egym-askllm-hardened_glm_4.7_traces_jupiter/snapshots/625842bb217a7168a4b563bc70dc391100b5f483_thinking_preprocessed dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 4e-05 +- train_batch_size: 1 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 8 +- gradient_accumulation_steps: 2 +- total_train_batch_size: 16 +- total_eval_batch_size: 64 +- optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.98) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.1 +- num_epochs: 7.0 + +### Training results + + + +### Framework versions + +- Transformers 4.57.6 +- Pytorch 2.9.0+cu128 +- Datasets 4.4.1 +- Tokenizers 0.22.2 diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000..b54f913 --- /dev/null +++ b/added_tokens.json @@ -0,0 +1,28 @@ +{ + "": 151668, + "": 151658, + "": 151666, + "": 151667, + "": 151657, + "": 151665, + "<|box_end|>": 151649, + "<|box_start|>": 151648, + "<|endoftext|>": 151643, + "<|file_sep|>": 151664, + "<|fim_middle|>": 151660, + "<|fim_pad|>": 151662, + "<|fim_prefix|>": 151659, + "<|fim_suffix|>": 151661, + "<|im_end|>": 151645, + "<|im_start|>": 151644, + "<|image_pad|>": 151655, + "<|object_ref_end|>": 151647, + "<|object_ref_start|>": 151646, + "<|quad_end|>": 151651, + "<|quad_start|>": 151650, + "<|repo_name|>": 151663, + "<|video_pad|>": 151656, + "<|vision_end|>": 151653, + "<|vision_pad|>": 151654, + "<|vision_start|>": 151652 +} diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..3ab143a --- /dev/null +++ b/all_results.json @@ -0,0 +1,16 @@ +{ + "achieved_tflops_per_gpu": 5.399802498952449, + "achieved_tflops_per_gpu_theoretical": 233.76541483479863, + "epoch": 7.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17104065418243408, + "mfu_percent": 0.545986096961825, + "mfu_percent_theoretical": 23.636543461556993, + "total_flos": 2.6783135993776046e+18, + "train_loss": 0.2845736916749181, + "train_runtime": 62000.2676, + "train_samples_per_second": 1.109, + "train_steps_per_second": 0.069, + "valid_targets_mean": 7242.6, + "valid_targets_min": 3287 +} \ No newline at end of file diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..01be9b3 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,89 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- messages[0].content + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('') and message.content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- if message.content is string %} + {%- set content = message.content %} + {%- else %} + {%- set content = '' %} + {%- endif %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is string %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in content %} + {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- set content = content.split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {%- if loop.last or (not loop.last and reasoning_content) %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..59b4193 --- /dev/null +++ b/config.json @@ -0,0 +1,68 @@ +{ + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "eos_token_id": 151645, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "pad_token_id": 151643, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "transformers_version": "4.57.6", + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..159097f --- /dev/null +++ b/configuration.json @@ -0,0 +1 @@ +{"framework": "pytorch", "task": "others", "allow_remote": true} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..9adbb28 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,12 @@ +{ + "do_sample": true, + "eos_token_id": [ + 151645, + 151643 + ], + "pad_token_id": 151643, + "temperature": 0.6, + "top_k": 20, + "top_p": 0.95, + "transformers_version": "4.57.6" +} diff --git a/merges.txt b/merges.txt new file mode 100644 index 0000000..80c1a19 --- /dev/null +++ b/merges.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5 +size 1671853 diff --git a/model-00001-of-00004.safetensors b/model-00001-of-00004.safetensors new file mode 100644 index 0000000..71e434c --- /dev/null +++ b/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3faf1980d72eae24a9e21864d6469d191db6926154ad65146673fcbfa4b98a7c +size 4902257696 diff --git a/model-00002-of-00004.safetensors b/model-00002-of-00004.safetensors new file mode 100644 index 0000000..00784b2 --- /dev/null +++ b/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b80c13fd8077453c9d14b1c436956956e8a6aadf1cbcc468e89f2a3c2dbe3d03 +size 4915960368 diff --git a/model-00003-of-00004.safetensors b/model-00003-of-00004.safetensors new file mode 100644 index 0000000..5451243 --- /dev/null +++ b/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67e97be69e33c25a5fde3e5bb42f95090f4dee50eedb09678252e8116a0583c7 +size 4983068496 diff --git a/model-00004-of-00004.safetensors b/model-00004-of-00004.safetensors new file mode 100644 index 0000000..2309146 --- /dev/null +++ b/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:172e54d4c0b1bf20d3967441f9bdf0f449eda94f07b1babc33727265823b4e57 +size 1580230264 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..ba886c0 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,407 @@ +{ + "metadata": { + "total_parameters": 308224, + "total_size": 16381470720 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.32.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.33.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.33.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.34.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.35.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.35.mlp.down_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.35.mlp.gate_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.35.mlp.up_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.35.self_attn.k_norm.weight": "model-00004-of-00004.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.35.self_attn.q_norm.weight": "model-00004-of-00004.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.norm.weight": "model-00004-of-00004.safetensors" + } +} diff --git a/run_summary.json b/run_summary.json new file mode 100644 index 0000000..72db9ae --- /dev/null +++ b/run_summary.json @@ -0,0 +1,12 @@ +{ + "agent_name": "625842bb217a7168a4b563bc70dc391100b5f483_thinking_preprocessed", + "training_start": null, + "training_end": null, + "created_by": "DCAgent", + "base_model_name": "Qwen/Qwen3-8B", + "dataset_name": "/data/cat/ws/befe330h-befe330h-otagent/huggingface/hub/datasets--DCAgent--exp-syh-r2egym-askllm-hardened_glm_4.7_traces_jupiter/snapshots/625842bb217a7168a4b563bc70dc391100b5f483_thinking_preprocessed", + "training_type": "SFT", + "training_parameters": "https://huggingface.co/laion/exp-syh-r2egym-askllm-hardened_glm_4_7_traces_jupiter/blob/main/config.json", + "wandb_link": "https://wandb.ai/dogml/OpenThoughts-Agent/runs/sft_exp-syh-r2egym-askllm-hardened_glm_4-7_traces_jupiter_Qwen3-8B", + "traces_location_s3": null +} \ No newline at end of file diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..ac23c0a --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,31 @@ +{ + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..cd71f61 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4 +size 11422654 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..e9dc937 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,240 @@ +{ + "add_bos_token": false, + "add_prefix_space": false, + "added_tokens_decoder": { + "151643": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151644": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151645": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151646": { + "content": "<|object_ref_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151647": { + "content": "<|object_ref_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151648": { + "content": "<|box_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151649": { + "content": "<|box_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151650": { + "content": "<|quad_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151657": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151658": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151665": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151666": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151667": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151668": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": {}, + "model_max_length": 32768, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..3ab143a --- /dev/null +++ b/train_results.json @@ -0,0 +1,16 @@ +{ + "achieved_tflops_per_gpu": 5.399802498952449, + "achieved_tflops_per_gpu_theoretical": 233.76541483479863, + "epoch": 7.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17104065418243408, + "mfu_percent": 0.545986096961825, + "mfu_percent_theoretical": 23.636543461556993, + "total_flos": 2.6783135993776046e+18, + "train_loss": 0.2845736916749181, + "train_runtime": 62000.2676, + "train_samples_per_second": 1.109, + "train_steps_per_second": 0.069, + "valid_targets_mean": 7242.6, + "valid_targets_min": 3287 +} \ No newline at end of file diff --git a/trainer_log.jsonl b/trainer_log.jsonl new file mode 100644 index 0000000..d392b16 --- /dev/null +++ b/trainer_log.jsonl @@ -0,0 +1,862 @@ +{"current_steps": 5, "total_steps": 4305, "loss": 0.7715, "lr": 3.7122969837587006e-07, "epoch": 0.008136696501220505, "percentage": 0.12, "elapsed_time": "0:01:37", "remaining_time": "23:20:03"} +{"current_steps": 10, "total_steps": 4305, "loss": 0.7117, "lr": 8.352668213457077e-07, "epoch": 0.01627339300244101, "percentage": 0.23, "elapsed_time": "0:02:57", "remaining_time": "21:10:15"} +{"current_steps": 15, "total_steps": 4305, "loss": 0.694, "lr": 1.2993039443155453e-06, "epoch": 0.024410089503661515, "percentage": 0.35, "elapsed_time": "0:04:13", "remaining_time": "20:08:44"} +{"current_steps": 20, "total_steps": 4305, "loss": 0.6943, "lr": 1.7633410672853829e-06, "epoch": 0.03254678600488202, "percentage": 0.46, "elapsed_time": "0:05:30", "remaining_time": "19:40:42"} +{"current_steps": 25, "total_steps": 4305, "loss": 0.6218, "lr": 2.2273781902552207e-06, "epoch": 0.04068348250610252, "percentage": 0.58, "elapsed_time": "0:06:45", "remaining_time": "19:16:56"} +{"current_steps": 30, "total_steps": 4305, "loss": 0.5883, "lr": 2.691415313225058e-06, "epoch": 0.04882017900732303, "percentage": 0.7, "elapsed_time": "0:07:57", "remaining_time": "18:54:09"} +{"current_steps": 35, "total_steps": 4305, "loss": 0.5962, "lr": 3.155452436194896e-06, "epoch": 0.05695687550854353, "percentage": 0.81, "elapsed_time": "0:09:09", "remaining_time": "18:38:18"} +{"current_steps": 40, "total_steps": 4305, "loss": 0.5804, "lr": 3.6194895591647333e-06, "epoch": 0.06509357200976404, "percentage": 0.93, "elapsed_time": "0:10:19", "remaining_time": "18:21:22"} +{"current_steps": 45, "total_steps": 4305, "loss": 0.5337, "lr": 4.083526682134571e-06, "epoch": 0.07323026851098453, "percentage": 1.05, "elapsed_time": "0:11:36", "remaining_time": "18:18:57"} +{"current_steps": 50, "total_steps": 4305, "loss": 0.5084, "lr": 4.547563805104409e-06, "epoch": 0.08136696501220504, "percentage": 1.16, "elapsed_time": "0:12:49", "remaining_time": "18:11:23"} +{"current_steps": 55, "total_steps": 4305, "loss": 0.5385, "lr": 5.011600928074246e-06, "epoch": 0.08950366151342555, "percentage": 1.28, "elapsed_time": "0:13:57", "remaining_time": "17:58:24"} +{"current_steps": 60, "total_steps": 4305, "loss": 0.5077, "lr": 5.4756380510440845e-06, "epoch": 0.09764035801464606, "percentage": 1.39, "elapsed_time": "0:15:11", "remaining_time": "17:55:13"} +{"current_steps": 65, "total_steps": 4305, "loss": 0.5264, "lr": 5.939675174013921e-06, "epoch": 0.10577705451586655, "percentage": 1.51, "elapsed_time": "0:16:28", "remaining_time": "17:54:35"} +{"current_steps": 70, "total_steps": 4305, "loss": 0.525, "lr": 6.403712296983759e-06, "epoch": 0.11391375101708706, "percentage": 1.63, "elapsed_time": "0:17:34", "remaining_time": "17:43:26"} +{"current_steps": 75, "total_steps": 4305, "loss": 0.4679, "lr": 6.867749419953597e-06, "epoch": 0.12205044751830757, "percentage": 1.74, "elapsed_time": "0:18:49", "remaining_time": "17:41:38"} +{"current_steps": 80, "total_steps": 4305, "loss": 0.4843, "lr": 7.331786542923435e-06, "epoch": 0.13018714401952808, "percentage": 1.86, "elapsed_time": "0:20:03", "remaining_time": "17:39:21"} +{"current_steps": 85, "total_steps": 4305, "loss": 0.4312, "lr": 7.795823665893271e-06, "epoch": 0.1383238405207486, "percentage": 1.97, "elapsed_time": "0:21:18", "remaining_time": "17:37:43"} +{"current_steps": 90, "total_steps": 4305, "loss": 0.4646, "lr": 8.25986078886311e-06, "epoch": 0.14646053702196907, "percentage": 2.09, "elapsed_time": "0:22:28", "remaining_time": "17:32:16"} +{"current_steps": 95, "total_steps": 4305, "loss": 0.456, "lr": 8.723897911832948e-06, "epoch": 0.15459723352318958, "percentage": 2.21, "elapsed_time": "0:23:44", "remaining_time": "17:32:03"} +{"current_steps": 100, "total_steps": 4305, "loss": 0.4707, "lr": 9.187935034802784e-06, "epoch": 0.16273393002441008, "percentage": 2.32, "elapsed_time": "0:25:00", "remaining_time": "17:31:33"} +{"current_steps": 105, "total_steps": 4305, "loss": 0.4261, "lr": 9.651972157772623e-06, "epoch": 0.1708706265256306, "percentage": 2.44, "elapsed_time": "0:26:18", "remaining_time": "17:32:37"} +{"current_steps": 110, "total_steps": 4305, "loss": 0.4521, "lr": 1.011600928074246e-05, "epoch": 0.1790073230268511, "percentage": 2.56, "elapsed_time": "0:27:34", "remaining_time": "17:31:37"} +{"current_steps": 115, "total_steps": 4305, "loss": 0.4236, "lr": 1.0580046403712299e-05, "epoch": 0.1871440195280716, "percentage": 2.67, "elapsed_time": "0:28:50", "remaining_time": "17:31:05"} +{"current_steps": 120, "total_steps": 4305, "loss": 0.423, "lr": 1.1044083526682134e-05, "epoch": 0.19528071602929212, "percentage": 2.79, "elapsed_time": "0:29:58", "remaining_time": "17:25:06"} +{"current_steps": 125, "total_steps": 4305, "loss": 0.441, "lr": 1.1508120649651972e-05, "epoch": 0.20341741253051263, "percentage": 2.9, "elapsed_time": "0:31:13", "remaining_time": "17:24:19"} +{"current_steps": 130, "total_steps": 4305, "loss": 0.4045, "lr": 1.197215777262181e-05, "epoch": 0.2115541090317331, "percentage": 3.02, "elapsed_time": "0:32:23", "remaining_time": "17:20:12"} +{"current_steps": 135, "total_steps": 4305, "loss": 0.3979, "lr": 1.2436194895591649e-05, "epoch": 0.21969080553295361, "percentage": 3.14, "elapsed_time": "0:33:38", "remaining_time": "17:19:17"} +{"current_steps": 140, "total_steps": 4305, "loss": 0.4539, "lr": 1.2900232018561485e-05, "epoch": 0.22782750203417412, "percentage": 3.25, "elapsed_time": "0:34:53", "remaining_time": "17:18:15"} +{"current_steps": 145, "total_steps": 4305, "loss": 0.4195, "lr": 1.3364269141531323e-05, "epoch": 0.23596419853539463, "percentage": 3.37, "elapsed_time": "0:36:07", "remaining_time": "17:16:29"} +{"current_steps": 150, "total_steps": 4305, "loss": 0.3832, "lr": 1.3828306264501162e-05, "epoch": 0.24410089503661514, "percentage": 3.48, "elapsed_time": "0:37:22", "remaining_time": "17:15:22"} +{"current_steps": 155, "total_steps": 4305, "loss": 0.3907, "lr": 1.4292343387471e-05, "epoch": 0.25223759153783565, "percentage": 3.6, "elapsed_time": "0:38:41", "remaining_time": "17:16:07"} +{"current_steps": 160, "total_steps": 4305, "loss": 0.4102, "lr": 1.4756380510440838e-05, "epoch": 0.26037428803905616, "percentage": 3.72, "elapsed_time": "0:39:53", "remaining_time": "17:13:20"} +{"current_steps": 165, "total_steps": 4305, "loss": 0.3853, "lr": 1.5220417633410673e-05, "epoch": 0.26851098454027666, "percentage": 3.83, "elapsed_time": "0:41:04", "remaining_time": "17:10:43"} +{"current_steps": 170, "total_steps": 4305, "loss": 0.3823, "lr": 1.5684454756380513e-05, "epoch": 0.2766476810414972, "percentage": 3.95, "elapsed_time": "0:42:23", "remaining_time": "17:11:05"} +{"current_steps": 175, "total_steps": 4305, "loss": 0.3878, "lr": 1.614849187935035e-05, "epoch": 0.2847843775427177, "percentage": 4.07, "elapsed_time": "0:43:37", "remaining_time": "17:09:37"} +{"current_steps": 180, "total_steps": 4305, "loss": 0.3899, "lr": 1.661252900232019e-05, "epoch": 0.29292107404393813, "percentage": 4.18, "elapsed_time": "0:44:53", "remaining_time": "17:08:46"} +{"current_steps": 185, "total_steps": 4305, "loss": 0.3979, "lr": 1.7076566125290022e-05, "epoch": 0.30105777054515864, "percentage": 4.3, "elapsed_time": "0:46:08", "remaining_time": "17:07:36"} +{"current_steps": 190, "total_steps": 4305, "loss": 0.3761, "lr": 1.7540603248259862e-05, "epoch": 0.30919446704637915, "percentage": 4.41, "elapsed_time": "0:47:26", "remaining_time": "17:07:21"} +{"current_steps": 195, "total_steps": 4305, "loss": 0.4093, "lr": 1.80046403712297e-05, "epoch": 0.31733116354759966, "percentage": 4.53, "elapsed_time": "0:48:42", "remaining_time": "17:06:36"} +{"current_steps": 200, "total_steps": 4305, "loss": 0.4148, "lr": 1.846867749419954e-05, "epoch": 0.32546786004882017, "percentage": 4.65, "elapsed_time": "0:49:57", "remaining_time": "17:05:33"} +{"current_steps": 205, "total_steps": 4305, "loss": 0.4007, "lr": 1.8932714617169375e-05, "epoch": 0.3336045565500407, "percentage": 4.76, "elapsed_time": "0:51:08", "remaining_time": "17:02:55"} +{"current_steps": 210, "total_steps": 4305, "loss": 0.359, "lr": 1.9396751740139212e-05, "epoch": 0.3417412530512612, "percentage": 4.88, "elapsed_time": "0:52:14", "remaining_time": "16:58:38"} +{"current_steps": 215, "total_steps": 4305, "loss": 0.3616, "lr": 1.986078886310905e-05, "epoch": 0.3498779495524817, "percentage": 4.99, "elapsed_time": "0:53:25", "remaining_time": "16:56:10"} +{"current_steps": 220, "total_steps": 4305, "loss": 0.3707, "lr": 2.0324825986078888e-05, "epoch": 0.3580146460537022, "percentage": 5.11, "elapsed_time": "0:54:37", "remaining_time": "16:54:16"} +{"current_steps": 225, "total_steps": 4305, "loss": 0.4264, "lr": 2.0788863109048725e-05, "epoch": 0.3661513425549227, "percentage": 5.23, "elapsed_time": "0:55:48", "remaining_time": "16:52:03"} +{"current_steps": 230, "total_steps": 4305, "loss": 0.3609, "lr": 2.125290023201856e-05, "epoch": 0.3742880390561432, "percentage": 5.34, "elapsed_time": "0:57:02", "remaining_time": "16:50:46"} +{"current_steps": 235, "total_steps": 4305, "loss": 0.3729, "lr": 2.17169373549884e-05, "epoch": 0.3824247355573637, "percentage": 5.46, "elapsed_time": "0:58:17", "remaining_time": "16:49:33"} +{"current_steps": 240, "total_steps": 4305, "loss": 0.356, "lr": 2.2180974477958238e-05, "epoch": 0.39056143205858423, "percentage": 5.57, "elapsed_time": "0:59:34", "remaining_time": "16:49:08"} +{"current_steps": 245, "total_steps": 4305, "loss": 0.3773, "lr": 2.2645011600928078e-05, "epoch": 0.39869812855980474, "percentage": 5.69, "elapsed_time": "1:00:44", "remaining_time": "16:46:32"} +{"current_steps": 250, "total_steps": 4305, "loss": 0.3638, "lr": 2.3109048723897914e-05, "epoch": 0.40683482506102525, "percentage": 5.81, "elapsed_time": "1:01:57", "remaining_time": "16:44:52"} +{"current_steps": 255, "total_steps": 4305, "loss": 0.3884, "lr": 2.357308584686775e-05, "epoch": 0.4149715215622457, "percentage": 5.92, "elapsed_time": "1:03:10", "remaining_time": "16:43:19"} +{"current_steps": 260, "total_steps": 4305, "loss": 0.3825, "lr": 2.4037122969837587e-05, "epoch": 0.4231082180634662, "percentage": 6.04, "elapsed_time": "1:04:19", "remaining_time": "16:40:51"} +{"current_steps": 265, "total_steps": 4305, "loss": 0.3741, "lr": 2.4501160092807427e-05, "epoch": 0.4312449145646867, "percentage": 6.16, "elapsed_time": "1:05:34", "remaining_time": "16:39:39"} +{"current_steps": 270, "total_steps": 4305, "loss": 0.378, "lr": 2.4965197215777264e-05, "epoch": 0.43938161106590723, "percentage": 6.27, "elapsed_time": "1:06:45", "remaining_time": "16:37:34"} +{"current_steps": 275, "total_steps": 4305, "loss": 0.3781, "lr": 2.54292343387471e-05, "epoch": 0.44751830756712774, "percentage": 6.39, "elapsed_time": "1:07:55", "remaining_time": "16:35:30"} +{"current_steps": 280, "total_steps": 4305, "loss": 0.3909, "lr": 2.589327146171694e-05, "epoch": 0.45565500406834825, "percentage": 6.5, "elapsed_time": "1:09:05", "remaining_time": "16:33:10"} +{"current_steps": 285, "total_steps": 4305, "loss": 0.3599, "lr": 2.6357308584686777e-05, "epoch": 0.46379170056956875, "percentage": 6.62, "elapsed_time": "1:10:21", "remaining_time": "16:32:28"} +{"current_steps": 290, "total_steps": 4305, "loss": 0.4067, "lr": 2.6821345707656617e-05, "epoch": 0.47192839707078926, "percentage": 6.74, "elapsed_time": "1:11:37", "remaining_time": "16:31:36"} +{"current_steps": 295, "total_steps": 4305, "loss": 0.3392, "lr": 2.7285382830626453e-05, "epoch": 0.48006509357200977, "percentage": 6.85, "elapsed_time": "1:12:53", "remaining_time": "16:30:44"} +{"current_steps": 300, "total_steps": 4305, "loss": 0.3658, "lr": 2.774941995359629e-05, "epoch": 0.4882017900732303, "percentage": 6.97, "elapsed_time": "1:14:04", "remaining_time": "16:28:59"} +{"current_steps": 305, "total_steps": 4305, "loss": 0.3616, "lr": 2.8213457076566126e-05, "epoch": 0.4963384865744508, "percentage": 7.08, "elapsed_time": "1:15:18", "remaining_time": "16:27:33"} +{"current_steps": 310, "total_steps": 4305, "loss": 0.3422, "lr": 2.8677494199535966e-05, "epoch": 0.5044751830756713, "percentage": 7.2, "elapsed_time": "1:16:31", "remaining_time": "16:26:14"} +{"current_steps": 315, "total_steps": 4305, "loss": 0.3643, "lr": 2.9141531322505803e-05, "epoch": 0.5126118795768918, "percentage": 7.32, "elapsed_time": "1:17:47", "remaining_time": "16:25:23"} +{"current_steps": 320, "total_steps": 4305, "loss": 0.3502, "lr": 2.9605568445475643e-05, "epoch": 0.5207485760781123, "percentage": 7.43, "elapsed_time": "1:19:00", "remaining_time": "16:23:57"} +{"current_steps": 325, "total_steps": 4305, "loss": 0.3665, "lr": 3.006960556844548e-05, "epoch": 0.5288852725793328, "percentage": 7.55, "elapsed_time": "1:20:16", "remaining_time": "16:23:01"} +{"current_steps": 330, "total_steps": 4305, "loss": 0.3597, "lr": 3.053364269141532e-05, "epoch": 0.5370219690805533, "percentage": 7.67, "elapsed_time": "1:21:22", "remaining_time": "16:20:11"} +{"current_steps": 335, "total_steps": 4305, "loss": 0.353, "lr": 3.099767981438515e-05, "epoch": 0.5451586655817738, "percentage": 7.78, "elapsed_time": "1:22:37", "remaining_time": "16:19:12"} +{"current_steps": 340, "total_steps": 4305, "loss": 0.3822, "lr": 3.146171693735499e-05, "epoch": 0.5532953620829943, "percentage": 7.9, "elapsed_time": "1:23:55", "remaining_time": "16:18:38"} +{"current_steps": 345, "total_steps": 4305, "loss": 0.3614, "lr": 3.1925754060324825e-05, "epoch": 0.5614320585842149, "percentage": 8.01, "elapsed_time": "1:25:07", "remaining_time": "16:17:09"} +{"current_steps": 350, "total_steps": 4305, "loss": 0.3891, "lr": 3.2389791183294665e-05, "epoch": 0.5695687550854354, "percentage": 8.13, "elapsed_time": "1:26:21", "remaining_time": "16:15:51"} +{"current_steps": 355, "total_steps": 4305, "loss": 0.3645, "lr": 3.2853828306264505e-05, "epoch": 0.5777054515866559, "percentage": 8.25, "elapsed_time": "1:27:36", "remaining_time": "16:14:43"} +{"current_steps": 360, "total_steps": 4305, "loss": 0.3647, "lr": 3.3317865429234345e-05, "epoch": 0.5858421480878763, "percentage": 8.36, "elapsed_time": "1:28:48", "remaining_time": "16:13:08"} +{"current_steps": 365, "total_steps": 4305, "loss": 0.3712, "lr": 3.378190255220418e-05, "epoch": 0.5939788445890968, "percentage": 8.48, "elapsed_time": "1:30:02", "remaining_time": "16:11:59"} +{"current_steps": 370, "total_steps": 4305, "loss": 0.3691, "lr": 3.424593967517402e-05, "epoch": 0.6021155410903173, "percentage": 8.59, "elapsed_time": "1:31:13", "remaining_time": "16:10:14"} +{"current_steps": 375, "total_steps": 4305, "loss": 0.3772, "lr": 3.470997679814386e-05, "epoch": 0.6102522375915378, "percentage": 8.71, "elapsed_time": "1:32:23", "remaining_time": "16:08:19"} +{"current_steps": 380, "total_steps": 4305, "loss": 0.3627, "lr": 3.517401392111369e-05, "epoch": 0.6183889340927583, "percentage": 8.83, "elapsed_time": "1:33:37", "remaining_time": "16:07:05"} +{"current_steps": 385, "total_steps": 4305, "loss": 0.357, "lr": 3.563805104408353e-05, "epoch": 0.6265256305939788, "percentage": 8.94, "elapsed_time": "1:34:53", "remaining_time": "16:06:10"} +{"current_steps": 390, "total_steps": 4305, "loss": 0.3394, "lr": 3.6102088167053364e-05, "epoch": 0.6346623270951993, "percentage": 9.06, "elapsed_time": "1:36:08", "remaining_time": "16:05:09"} +{"current_steps": 395, "total_steps": 4305, "loss": 0.3407, "lr": 3.6566125290023204e-05, "epoch": 0.6427990235964198, "percentage": 9.18, "elapsed_time": "1:37:22", "remaining_time": "16:03:52"} +{"current_steps": 400, "total_steps": 4305, "loss": 0.3382, "lr": 3.7030162412993044e-05, "epoch": 0.6509357200976403, "percentage": 9.29, "elapsed_time": "1:38:35", "remaining_time": "16:02:25"} +{"current_steps": 405, "total_steps": 4305, "loss": 0.3437, "lr": 3.7494199535962884e-05, "epoch": 0.6590724165988608, "percentage": 9.41, "elapsed_time": "1:39:47", "remaining_time": "16:00:56"} +{"current_steps": 410, "total_steps": 4305, "loss": 0.3395, "lr": 3.795823665893272e-05, "epoch": 0.6672091131000814, "percentage": 9.52, "elapsed_time": "1:40:56", "remaining_time": "15:59:01"} +{"current_steps": 415, "total_steps": 4305, "loss": 0.3535, "lr": 3.842227378190256e-05, "epoch": 0.6753458096013019, "percentage": 9.64, "elapsed_time": "1:42:06", "remaining_time": "15:57:03"} +{"current_steps": 420, "total_steps": 4305, "loss": 0.3517, "lr": 3.888631090487239e-05, "epoch": 0.6834825061025224, "percentage": 9.76, "elapsed_time": "1:43:15", "remaining_time": "15:55:09"} +{"current_steps": 425, "total_steps": 4305, "loss": 0.3492, "lr": 3.935034802784223e-05, "epoch": 0.6916192026037429, "percentage": 9.87, "elapsed_time": "1:44:27", "remaining_time": "15:53:41"} +{"current_steps": 430, "total_steps": 4305, "loss": 0.3574, "lr": 3.981438515081207e-05, "epoch": 0.6997558991049634, "percentage": 9.99, "elapsed_time": "1:45:35", "remaining_time": "15:51:35"} +{"current_steps": 435, "total_steps": 4305, "loss": 0.3372, "lr": 3.9999940813479674e-05, "epoch": 0.7078925956061839, "percentage": 10.1, "elapsed_time": "1:46:50", "remaining_time": "15:50:26"} +{"current_steps": 440, "total_steps": 4305, "loss": 0.3515, "lr": 3.999957911934624e-05, "epoch": 0.7160292921074044, "percentage": 10.22, "elapsed_time": "1:47:59", "remaining_time": "15:48:33"} +{"current_steps": 445, "total_steps": 4305, "loss": 0.3373, "lr": 3.9998888618418865e-05, "epoch": 0.7241659886086249, "percentage": 10.34, "elapsed_time": "1:49:12", "remaining_time": "15:47:20"} +{"current_steps": 450, "total_steps": 4305, "loss": 0.3785, "lr": 3.999786932204985e-05, "epoch": 0.7323026851098454, "percentage": 10.45, "elapsed_time": "1:50:20", "remaining_time": "15:45:15"} +{"current_steps": 455, "total_steps": 4305, "loss": 0.3475, "lr": 3.999652124699712e-05, "epoch": 0.7404393816110659, "percentage": 10.57, "elapsed_time": "1:51:33", "remaining_time": "15:43:56"} +{"current_steps": 460, "total_steps": 4305, "loss": 0.3407, "lr": 3.999484441542395e-05, "epoch": 0.7485760781122864, "percentage": 10.69, "elapsed_time": "1:52:50", "remaining_time": "15:43:08"} +{"current_steps": 465, "total_steps": 4305, "loss": 0.3414, "lr": 3.999283885489861e-05, "epoch": 0.7567127746135069, "percentage": 10.8, "elapsed_time": "1:54:06", "remaining_time": "15:42:15"} +{"current_steps": 470, "total_steps": 4305, "loss": 0.3584, "lr": 3.999050459839389e-05, "epoch": 0.7648494711147275, "percentage": 10.92, "elapsed_time": "1:55:24", "remaining_time": "15:41:40"} +{"current_steps": 475, "total_steps": 4305, "loss": 0.332, "lr": 3.998784168428657e-05, "epoch": 0.772986167615948, "percentage": 11.03, "elapsed_time": "1:56:31", "remaining_time": "15:39:33"} +{"current_steps": 480, "total_steps": 4305, "loss": 0.3432, "lr": 3.998485015635677e-05, "epoch": 0.7811228641171685, "percentage": 11.15, "elapsed_time": "1:57:39", "remaining_time": "15:37:35"} +{"current_steps": 485, "total_steps": 4305, "loss": 0.3606, "lr": 3.998153006378727e-05, "epoch": 0.789259560618389, "percentage": 11.27, "elapsed_time": "1:58:49", "remaining_time": "15:35:54"} +{"current_steps": 490, "total_steps": 4305, "loss": 0.3246, "lr": 3.997788146116267e-05, "epoch": 0.7973962571196095, "percentage": 11.38, "elapsed_time": "1:59:56", "remaining_time": "15:33:48"} +{"current_steps": 495, "total_steps": 4305, "loss": 0.3342, "lr": 3.99739044084685e-05, "epoch": 0.80553295362083, "percentage": 11.5, "elapsed_time": "2:01:09", "remaining_time": "15:32:32"} +{"current_steps": 500, "total_steps": 4305, "loss": 0.3485, "lr": 3.9969598971090225e-05, "epoch": 0.8136696501220505, "percentage": 11.61, "elapsed_time": "2:02:21", "remaining_time": "15:31:11"} +{"current_steps": 505, "total_steps": 4305, "loss": 0.3367, "lr": 3.99649652198122e-05, "epoch": 0.8218063466232709, "percentage": 11.73, "elapsed_time": "2:03:33", "remaining_time": "15:29:48"} +{"current_steps": 510, "total_steps": 4305, "loss": 0.3289, "lr": 3.9960003230816456e-05, "epoch": 0.8299430431244914, "percentage": 11.85, "elapsed_time": "2:04:48", "remaining_time": "15:28:39"} +{"current_steps": 515, "total_steps": 4305, "loss": 0.3588, "lr": 3.9954713085681504e-05, "epoch": 0.8380797396257119, "percentage": 11.96, "elapsed_time": "2:06:05", "remaining_time": "15:27:55"} +{"current_steps": 520, "total_steps": 4305, "loss": 0.329, "lr": 3.994909487138096e-05, "epoch": 0.8462164361269324, "percentage": 12.08, "elapsed_time": "2:07:22", "remaining_time": "15:27:08"} +{"current_steps": 525, "total_steps": 4305, "loss": 0.3711, "lr": 3.994314868028212e-05, "epoch": 0.8543531326281529, "percentage": 12.2, "elapsed_time": "2:08:34", "remaining_time": "15:25:43"} +{"current_steps": 530, "total_steps": 4305, "loss": 0.3396, "lr": 3.9936874610144445e-05, "epoch": 0.8624898291293734, "percentage": 12.31, "elapsed_time": "2:09:46", "remaining_time": "15:24:21"} +{"current_steps": 535, "total_steps": 4305, "loss": 0.3405, "lr": 3.993027276411793e-05, "epoch": 0.870626525630594, "percentage": 12.43, "elapsed_time": "2:10:53", "remaining_time": "15:22:22"} +{"current_steps": 540, "total_steps": 4305, "loss": 0.3486, "lr": 3.992334325074148e-05, "epoch": 0.8787632221318145, "percentage": 12.54, "elapsed_time": "2:12:04", "remaining_time": "15:20:52"} +{"current_steps": 545, "total_steps": 4305, "loss": 0.3533, "lr": 3.991608618394102e-05, "epoch": 0.886899918633035, "percentage": 12.66, "elapsed_time": "2:13:16", "remaining_time": "15:19:27"} +{"current_steps": 550, "total_steps": 4305, "loss": 0.3288, "lr": 3.9908501683027726e-05, "epoch": 0.8950366151342555, "percentage": 12.78, "elapsed_time": "2:14:29", "remaining_time": "15:18:09"} +{"current_steps": 555, "total_steps": 4305, "loss": 0.3331, "lr": 3.990058987269597e-05, "epoch": 0.903173311635476, "percentage": 12.89, "elapsed_time": "2:15:34", "remaining_time": "15:16:05"} +{"current_steps": 560, "total_steps": 4305, "loss": 0.3461, "lr": 3.9892350883021366e-05, "epoch": 0.9113100081366965, "percentage": 13.01, "elapsed_time": "2:16:42", "remaining_time": "15:14:11"} +{"current_steps": 565, "total_steps": 4305, "loss": 0.3102, "lr": 3.988378484945853e-05, "epoch": 0.919446704637917, "percentage": 13.12, "elapsed_time": "2:17:47", "remaining_time": "15:12:05"} +{"current_steps": 570, "total_steps": 4305, "loss": 0.3416, "lr": 3.987489191283894e-05, "epoch": 0.9275834011391375, "percentage": 13.24, "elapsed_time": "2:18:58", "remaining_time": "15:10:36"} +{"current_steps": 575, "total_steps": 4305, "loss": 0.3199, "lr": 3.9865672219368574e-05, "epoch": 0.935720097640358, "percentage": 13.36, "elapsed_time": "2:20:08", "remaining_time": "15:09:04"} +{"current_steps": 580, "total_steps": 4305, "loss": 0.3344, "lr": 3.98561259206255e-05, "epoch": 0.9438567941415785, "percentage": 13.47, "elapsed_time": "2:21:20", "remaining_time": "15:07:48"} +{"current_steps": 585, "total_steps": 4305, "loss": 0.3281, "lr": 3.984625317355743e-05, "epoch": 0.951993490642799, "percentage": 13.59, "elapsed_time": "2:22:35", "remaining_time": "15:06:43"} +{"current_steps": 590, "total_steps": 4305, "loss": 0.3609, "lr": 3.983605414047908e-05, "epoch": 0.9601301871440195, "percentage": 13.7, "elapsed_time": "2:23:43", "remaining_time": "15:05:01"} +{"current_steps": 595, "total_steps": 4305, "loss": 0.3562, "lr": 3.982552898906956e-05, "epoch": 0.96826688364524, "percentage": 13.82, "elapsed_time": "2:24:59", "remaining_time": "15:04:01"} +{"current_steps": 600, "total_steps": 4305, "loss": 0.3419, "lr": 3.981467789236958e-05, "epoch": 0.9764035801464606, "percentage": 13.94, "elapsed_time": "2:26:11", "remaining_time": "15:02:42"} +{"current_steps": 605, "total_steps": 4305, "loss": 0.3361, "lr": 3.98035010287786e-05, "epoch": 0.9845402766476811, "percentage": 14.05, "elapsed_time": "2:27:28", "remaining_time": "15:01:52"} +{"current_steps": 610, "total_steps": 4305, "loss": 0.3263, "lr": 3.979199858205192e-05, "epoch": 0.9926769731489016, "percentage": 14.17, "elapsed_time": "2:28:42", "remaining_time": "15:00:49"} +{"current_steps": 615, "total_steps": 4305, "loss": 0.3348, "lr": 3.9780170741297655e-05, "epoch": 1.0, "percentage": 14.29, "elapsed_time": "2:29:45", "remaining_time": "14:58:32"} +{"current_steps": 620, "total_steps": 4305, "loss": 0.32, "lr": 3.976801770097361e-05, "epoch": 1.0081366965012204, "percentage": 14.4, "elapsed_time": "2:31:00", "remaining_time": "14:57:28"} +{"current_steps": 625, "total_steps": 4305, "loss": 0.3018, "lr": 3.975553966088412e-05, "epoch": 1.016273393002441, "percentage": 14.52, "elapsed_time": "2:32:08", "remaining_time": "14:55:49"} +{"current_steps": 630, "total_steps": 4305, "loss": 0.3306, "lr": 3.9742736826176706e-05, "epoch": 1.0244100895036614, "percentage": 14.63, "elapsed_time": "2:33:21", "remaining_time": "14:54:35"} +{"current_steps": 635, "total_steps": 4305, "loss": 0.3294, "lr": 3.9729609407338745e-05, "epoch": 1.032546786004882, "percentage": 14.75, "elapsed_time": "2:34:38", "remaining_time": "14:53:44"} +{"current_steps": 640, "total_steps": 4305, "loss": 0.3404, "lr": 3.971615762019401e-05, "epoch": 1.0406834825061024, "percentage": 14.87, "elapsed_time": "2:35:52", "remaining_time": "14:52:40"} +{"current_steps": 645, "total_steps": 4305, "loss": 0.3239, "lr": 3.970238168589911e-05, "epoch": 1.048820179007323, "percentage": 14.98, "elapsed_time": "2:36:59", "remaining_time": "14:50:48"} +{"current_steps": 650, "total_steps": 4305, "loss": 0.3076, "lr": 3.968828183093984e-05, "epoch": 1.0569568755085434, "percentage": 15.1, "elapsed_time": "2:38:11", "remaining_time": "14:49:34"} +{"current_steps": 655, "total_steps": 4305, "loss": 0.3016, "lr": 3.9673858287127484e-05, "epoch": 1.065093572009764, "percentage": 15.21, "elapsed_time": "2:39:25", "remaining_time": "14:48:23"} +{"current_steps": 660, "total_steps": 4305, "loss": 0.3308, "lr": 3.965911129159501e-05, "epoch": 1.0732302685109845, "percentage": 15.33, "elapsed_time": "2:40:37", "remaining_time": "14:47:06"} +{"current_steps": 665, "total_steps": 4305, "loss": 0.3322, "lr": 3.9644041086793115e-05, "epoch": 1.081366965012205, "percentage": 15.45, "elapsed_time": "2:41:46", "remaining_time": "14:45:31"} +{"current_steps": 670, "total_steps": 4305, "loss": 0.3372, "lr": 3.9628647920486313e-05, "epoch": 1.0895036615134255, "percentage": 15.56, "elapsed_time": "2:42:55", "remaining_time": "14:43:55"} +{"current_steps": 675, "total_steps": 4305, "loss": 0.3426, "lr": 3.961293204574881e-05, "epoch": 1.097640358014646, "percentage": 15.68, "elapsed_time": "2:44:04", "remaining_time": "14:42:23"} +{"current_steps": 680, "total_steps": 4305, "loss": 0.3242, "lr": 3.959689372096034e-05, "epoch": 1.1057770545158665, "percentage": 15.8, "elapsed_time": "2:45:18", "remaining_time": "14:41:13"} +{"current_steps": 685, "total_steps": 4305, "loss": 0.3085, "lr": 3.9580533209802e-05, "epoch": 1.1139137510170871, "percentage": 15.91, "elapsed_time": "2:46:29", "remaining_time": "14:39:51"} +{"current_steps": 690, "total_steps": 4305, "loss": 0.3274, "lr": 3.9563850781251785e-05, "epoch": 1.1220504475183075, "percentage": 16.03, "elapsed_time": "2:47:42", "remaining_time": "14:38:41"} +{"current_steps": 695, "total_steps": 4305, "loss": 0.3027, "lr": 3.954684670958027e-05, "epoch": 1.1301871440195281, "percentage": 16.14, "elapsed_time": "2:48:55", "remaining_time": "14:37:27"} +{"current_steps": 700, "total_steps": 4305, "loss": 0.3181, "lr": 3.9529521274346036e-05, "epoch": 1.1383238405207485, "percentage": 16.26, "elapsed_time": "2:50:04", "remaining_time": "14:35:55"} +{"current_steps": 705, "total_steps": 4305, "loss": 0.3532, "lr": 3.951187476039114e-05, "epoch": 1.1464605370219692, "percentage": 16.38, "elapsed_time": "2:51:19", "remaining_time": "14:34:50"} +{"current_steps": 710, "total_steps": 4305, "loss": 0.3157, "lr": 3.9493907457836355e-05, "epoch": 1.1545972335231895, "percentage": 16.49, "elapsed_time": "2:52:30", "remaining_time": "14:33:27"} +{"current_steps": 715, "total_steps": 4305, "loss": 0.3283, "lr": 3.947561966207646e-05, "epoch": 1.1627339300244102, "percentage": 16.61, "elapsed_time": "2:53:34", "remaining_time": "14:31:32"} +{"current_steps": 720, "total_steps": 4305, "loss": 0.314, "lr": 3.945701167377537e-05, "epoch": 1.1708706265256306, "percentage": 16.72, "elapsed_time": "2:54:47", "remaining_time": "14:30:21"} +{"current_steps": 725, "total_steps": 4305, "loss": 0.3013, "lr": 3.9438083798861145e-05, "epoch": 1.1790073230268512, "percentage": 16.84, "elapsed_time": "2:55:58", "remaining_time": "14:28:57"} +{"current_steps": 730, "total_steps": 4305, "loss": 0.3499, "lr": 3.9418836348521045e-05, "epoch": 1.1871440195280716, "percentage": 16.96, "elapsed_time": "2:57:10", "remaining_time": "14:27:41"} +{"current_steps": 735, "total_steps": 4305, "loss": 0.3171, "lr": 3.939926963919635e-05, "epoch": 1.1952807160292922, "percentage": 17.07, "elapsed_time": "2:58:22", "remaining_time": "14:26:22"} +{"current_steps": 740, "total_steps": 4305, "loss": 0.3294, "lr": 3.9379383992577166e-05, "epoch": 1.2034174125305126, "percentage": 17.19, "elapsed_time": "2:59:34", "remaining_time": "14:25:06"} +{"current_steps": 745, "total_steps": 4305, "loss": 0.3076, "lr": 3.9359179735597174e-05, "epoch": 1.211554109031733, "percentage": 17.31, "elapsed_time": "3:00:44", "remaining_time": "14:23:42"} +{"current_steps": 750, "total_steps": 4305, "loss": 0.3116, "lr": 3.9338657200428215e-05, "epoch": 1.2196908055329536, "percentage": 17.42, "elapsed_time": "3:01:59", "remaining_time": "14:22:38"} +{"current_steps": 755, "total_steps": 4305, "loss": 0.3015, "lr": 3.931781672447482e-05, "epoch": 1.2278275020341742, "percentage": 17.54, "elapsed_time": "3:03:06", "remaining_time": "14:20:58"} +{"current_steps": 760, "total_steps": 4305, "loss": 0.3423, "lr": 3.9296658650368707e-05, "epoch": 1.2359641985353946, "percentage": 17.65, "elapsed_time": "3:04:15", "remaining_time": "14:19:26"} +{"current_steps": 765, "total_steps": 4305, "loss": 0.3199, "lr": 3.927518332596313e-05, "epoch": 1.244100895036615, "percentage": 17.77, "elapsed_time": "3:05:32", "remaining_time": "14:18:37"} +{"current_steps": 770, "total_steps": 4305, "loss": 0.3008, "lr": 3.925339110432716e-05, "epoch": 1.2522375915378356, "percentage": 17.89, "elapsed_time": "3:06:40", "remaining_time": "14:17:00"} +{"current_steps": 775, "total_steps": 4305, "loss": 0.3232, "lr": 3.923128234373984e-05, "epoch": 1.2603742880390563, "percentage": 18.0, "elapsed_time": "3:07:53", "remaining_time": "14:15:50"} +{"current_steps": 780, "total_steps": 4305, "loss": 0.3161, "lr": 3.9208857407684356e-05, "epoch": 1.2685109845402767, "percentage": 18.12, "elapsed_time": "3:09:05", "remaining_time": "14:14:32"} +{"current_steps": 785, "total_steps": 4305, "loss": 0.3243, "lr": 3.918611666484205e-05, "epoch": 1.276647681041497, "percentage": 18.23, "elapsed_time": "3:10:20", "remaining_time": "14:13:30"} +{"current_steps": 790, "total_steps": 4305, "loss": 0.3169, "lr": 3.9163060489086305e-05, "epoch": 1.2847843775427177, "percentage": 18.35, "elapsed_time": "3:11:33", "remaining_time": "14:12:16"} +{"current_steps": 795, "total_steps": 4305, "loss": 0.319, "lr": 3.913968925947647e-05, "epoch": 1.292921074043938, "percentage": 18.47, "elapsed_time": "3:12:40", "remaining_time": "14:10:42"} +{"current_steps": 800, "total_steps": 4305, "loss": 0.3296, "lr": 3.91160033602516e-05, "epoch": 1.3010577705451587, "percentage": 18.58, "elapsed_time": "3:13:52", "remaining_time": "14:09:26"} +{"current_steps": 805, "total_steps": 4305, "loss": 0.2945, "lr": 3.909200318082409e-05, "epoch": 1.309194467046379, "percentage": 18.7, "elapsed_time": "3:15:08", "remaining_time": "14:08:26"} +{"current_steps": 810, "total_steps": 4305, "loss": 0.3023, "lr": 3.906768911577337e-05, "epoch": 1.3173311635475997, "percentage": 18.82, "elapsed_time": "3:16:19", "remaining_time": "14:07:06"} +{"current_steps": 815, "total_steps": 4305, "loss": 0.3098, "lr": 3.9043061564839325e-05, "epoch": 1.3254678600488201, "percentage": 18.93, "elapsed_time": "3:17:28", "remaining_time": "14:05:39"} +{"current_steps": 820, "total_steps": 4305, "loss": 0.3153, "lr": 3.901812093291579e-05, "epoch": 1.3336045565500407, "percentage": 19.05, "elapsed_time": "3:18:43", "remaining_time": "14:04:35"} +{"current_steps": 825, "total_steps": 4305, "loss": 0.3185, "lr": 3.8992867630043855e-05, "epoch": 1.3417412530512611, "percentage": 19.16, "elapsed_time": "3:19:56", "remaining_time": "14:03:22"} +{"current_steps": 830, "total_steps": 4305, "loss": 0.3315, "lr": 3.896730207140512e-05, "epoch": 1.3498779495524817, "percentage": 19.28, "elapsed_time": "3:21:10", "remaining_time": "14:02:16"} +{"current_steps": 835, "total_steps": 4305, "loss": 0.3268, "lr": 3.894142467731492e-05, "epoch": 1.3580146460537021, "percentage": 19.4, "elapsed_time": "3:22:17", "remaining_time": "14:00:39"} +{"current_steps": 840, "total_steps": 4305, "loss": 0.291, "lr": 3.891523587321534e-05, "epoch": 1.3661513425549228, "percentage": 19.51, "elapsed_time": "3:23:31", "remaining_time": "13:59:31"} +{"current_steps": 845, "total_steps": 4305, "loss": 0.3087, "lr": 3.888873608966828e-05, "epoch": 1.3742880390561432, "percentage": 19.63, "elapsed_time": "3:24:44", "remaining_time": "13:58:19"} +{"current_steps": 850, "total_steps": 4305, "loss": 0.3135, "lr": 3.886192576234836e-05, "epoch": 1.3824247355573638, "percentage": 19.74, "elapsed_time": "3:26:02", "remaining_time": "13:57:31"} +{"current_steps": 855, "total_steps": 4305, "loss": 0.3145, "lr": 3.883480533203574e-05, "epoch": 1.3905614320585842, "percentage": 19.86, "elapsed_time": "3:27:15", "remaining_time": "13:56:17"} +{"current_steps": 860, "total_steps": 4305, "loss": 0.3135, "lr": 3.880737524460888e-05, "epoch": 1.3986981285598048, "percentage": 19.98, "elapsed_time": "3:28:20", "remaining_time": "13:54:36"} +{"current_steps": 865, "total_steps": 4305, "loss": 0.3101, "lr": 3.877963595103725e-05, "epoch": 1.4068348250610252, "percentage": 20.09, "elapsed_time": "3:29:33", "remaining_time": "13:53:24"} +{"current_steps": 870, "total_steps": 4305, "loss": 0.3196, "lr": 3.875158790737383e-05, "epoch": 1.4149715215622458, "percentage": 20.21, "elapsed_time": "3:30:47", "remaining_time": "13:52:15"} +{"current_steps": 875, "total_steps": 4305, "loss": 0.3416, "lr": 3.87232315747477e-05, "epoch": 1.4231082180634662, "percentage": 20.33, "elapsed_time": "3:31:57", "remaining_time": "13:50:51"} +{"current_steps": 880, "total_steps": 4305, "loss": 0.3561, "lr": 3.8694567419356414e-05, "epoch": 1.4312449145646866, "percentage": 20.44, "elapsed_time": "3:33:12", "remaining_time": "13:49:49"} +{"current_steps": 885, "total_steps": 4305, "loss": 0.3001, "lr": 3.8665595912458346e-05, "epoch": 1.4393816110659072, "percentage": 20.56, "elapsed_time": "3:34:23", "remaining_time": "13:48:30"} +{"current_steps": 890, "total_steps": 4305, "loss": 0.3084, "lr": 3.863631753036492e-05, "epoch": 1.4475183075671278, "percentage": 20.67, "elapsed_time": "3:35:36", "remaining_time": "13:47:19"} +{"current_steps": 895, "total_steps": 4305, "loss": 0.2917, "lr": 3.860673275443283e-05, "epoch": 1.4556550040683482, "percentage": 20.79, "elapsed_time": "3:36:45", "remaining_time": "13:45:52"} +{"current_steps": 900, "total_steps": 4305, "loss": 0.3089, "lr": 3.857684207105606e-05, "epoch": 1.4637917005695686, "percentage": 20.91, "elapsed_time": "3:38:00", "remaining_time": "13:44:46"} +{"current_steps": 905, "total_steps": 4305, "loss": 0.3106, "lr": 3.854664597165795e-05, "epoch": 1.4719283970707893, "percentage": 21.02, "elapsed_time": "3:39:08", "remaining_time": "13:43:18"} +{"current_steps": 910, "total_steps": 4305, "loss": 0.2903, "lr": 3.851614495268308e-05, "epoch": 1.4800650935720099, "percentage": 21.14, "elapsed_time": "3:40:22", "remaining_time": "13:42:10"} +{"current_steps": 915, "total_steps": 4305, "loss": 0.3058, "lr": 3.848533951558912e-05, "epoch": 1.4882017900732303, "percentage": 21.25, "elapsed_time": "3:41:32", "remaining_time": "13:40:49"} +{"current_steps": 920, "total_steps": 4305, "loss": 0.2982, "lr": 3.845423016683856e-05, "epoch": 1.4963384865744507, "percentage": 21.37, "elapsed_time": "3:42:46", "remaining_time": "13:39:41"} +{"current_steps": 925, "total_steps": 4305, "loss": 0.3, "lr": 3.842281741789044e-05, "epoch": 1.5044751830756713, "percentage": 21.49, "elapsed_time": "3:44:04", "remaining_time": "13:38:46"} +{"current_steps": 930, "total_steps": 4305, "loss": 0.2817, "lr": 3.839110178519189e-05, "epoch": 1.512611879576892, "percentage": 21.6, "elapsed_time": "3:45:19", "remaining_time": "13:37:42"} +{"current_steps": 935, "total_steps": 4305, "loss": 0.3194, "lr": 3.835908379016966e-05, "epoch": 1.5207485760781123, "percentage": 21.72, "elapsed_time": "3:46:31", "remaining_time": "13:36:28"} +{"current_steps": 940, "total_steps": 4305, "loss": 0.3233, "lr": 3.832676395922153e-05, "epoch": 1.5288852725793327, "percentage": 21.84, "elapsed_time": "3:47:45", "remaining_time": "13:35:18"} +{"current_steps": 945, "total_steps": 4305, "loss": 0.3199, "lr": 3.82941428237077e-05, "epoch": 1.5370219690805533, "percentage": 21.95, "elapsed_time": "3:49:02", "remaining_time": "13:34:21"} +{"current_steps": 950, "total_steps": 4305, "loss": 0.3119, "lr": 3.826122091994198e-05, "epoch": 1.545158665581774, "percentage": 22.07, "elapsed_time": "3:50:16", "remaining_time": "13:33:14"} +{"current_steps": 955, "total_steps": 4305, "loss": 0.2975, "lr": 3.822799878918307e-05, "epoch": 1.5532953620829943, "percentage": 22.18, "elapsed_time": "3:51:25", "remaining_time": "13:31:48"} +{"current_steps": 960, "total_steps": 4305, "loss": 0.289, "lr": 3.8194476977625556e-05, "epoch": 1.5614320585842147, "percentage": 22.3, "elapsed_time": "3:52:30", "remaining_time": "13:30:09"} +{"current_steps": 965, "total_steps": 4305, "loss": 0.2967, "lr": 3.8160656036391024e-05, "epoch": 1.5695687550854354, "percentage": 22.42, "elapsed_time": "3:53:38", "remaining_time": "13:28:40"} +{"current_steps": 970, "total_steps": 4305, "loss": 0.3151, "lr": 3.812653652151893e-05, "epoch": 1.577705451586656, "percentage": 22.53, "elapsed_time": "3:54:51", "remaining_time": "13:27:26"} +{"current_steps": 975, "total_steps": 4305, "loss": 0.3306, "lr": 3.809211899395749e-05, "epoch": 1.5858421480878762, "percentage": 22.65, "elapsed_time": "3:56:01", "remaining_time": "13:26:05"} +{"current_steps": 980, "total_steps": 4305, "loss": 0.3164, "lr": 3.8057404019554464e-05, "epoch": 1.5939788445890968, "percentage": 22.76, "elapsed_time": "3:57:11", "remaining_time": "13:24:44"} +{"current_steps": 985, "total_steps": 4305, "loss": 0.2934, "lr": 3.802239216904782e-05, "epoch": 1.6021155410903174, "percentage": 22.88, "elapsed_time": "3:58:09", "remaining_time": "13:22:44"} +{"current_steps": 990, "total_steps": 4305, "loss": 0.3299, "lr": 3.79870840180564e-05, "epoch": 1.6102522375915378, "percentage": 23.0, "elapsed_time": "3:59:18", "remaining_time": "13:21:17"} +{"current_steps": 995, "total_steps": 4305, "loss": 0.3248, "lr": 3.795148014707042e-05, "epoch": 1.6183889340927582, "percentage": 23.11, "elapsed_time": "4:00:25", "remaining_time": "13:19:48"} +{"current_steps": 1000, "total_steps": 4305, "loss": 0.3253, "lr": 3.791558114144192e-05, "epoch": 1.6265256305939788, "percentage": 23.23, "elapsed_time": "4:01:41", "remaining_time": "13:18:48"} +{"current_steps": 1005, "total_steps": 4305, "loss": 0.3049, "lr": 3.7879387591375174e-05, "epoch": 1.6346623270951994, "percentage": 23.34, "elapsed_time": "4:02:51", "remaining_time": "13:17:27"} +{"current_steps": 1010, "total_steps": 4305, "loss": 0.3145, "lr": 3.7842900091916956e-05, "epoch": 1.6427990235964198, "percentage": 23.46, "elapsed_time": "4:03:56", "remaining_time": "13:15:50"} +{"current_steps": 1015, "total_steps": 4305, "loss": 0.3003, "lr": 3.7806119242946785e-05, "epoch": 1.6509357200976402, "percentage": 23.58, "elapsed_time": "4:05:03", "remaining_time": "13:14:20"} +{"current_steps": 1020, "total_steps": 4305, "loss": 0.3154, "lr": 3.7769045649167034e-05, "epoch": 1.6590724165988608, "percentage": 23.69, "elapsed_time": "4:06:16", "remaining_time": "13:13:08"} +{"current_steps": 1025, "total_steps": 4305, "loss": 0.3137, "lr": 3.7731679920093e-05, "epoch": 1.6672091131000815, "percentage": 23.81, "elapsed_time": "4:07:31", "remaining_time": "13:12:05"} +{"current_steps": 1030, "total_steps": 4305, "loss": 0.3071, "lr": 3.7694022670042894e-05, "epoch": 1.6753458096013019, "percentage": 23.93, "elapsed_time": "4:08:45", "remaining_time": "13:10:55"} +{"current_steps": 1035, "total_steps": 4305, "loss": 0.2847, "lr": 3.765607451812773e-05, "epoch": 1.6834825061025223, "percentage": 24.04, "elapsed_time": "4:10:01", "remaining_time": "13:09:57"} +{"current_steps": 1040, "total_steps": 4305, "loss": 0.3082, "lr": 3.7617836088241144e-05, "epoch": 1.6916192026037429, "percentage": 24.16, "elapsed_time": "4:11:15", "remaining_time": "13:08:48"} +{"current_steps": 1045, "total_steps": 4305, "loss": 0.3302, "lr": 3.757930800904914e-05, "epoch": 1.6997558991049635, "percentage": 24.27, "elapsed_time": "4:12:24", "remaining_time": "13:07:23"} +{"current_steps": 1050, "total_steps": 4305, "loss": 0.2831, "lr": 3.754049091397976e-05, "epoch": 1.707892595606184, "percentage": 24.39, "elapsed_time": "4:13:40", "remaining_time": "13:06:25"} +{"current_steps": 1055, "total_steps": 4305, "loss": 0.3182, "lr": 3.7501385441212664e-05, "epoch": 1.7160292921074043, "percentage": 24.51, "elapsed_time": "4:14:47", "remaining_time": "13:04:54"} +{"current_steps": 1060, "total_steps": 4305, "loss": 0.3143, "lr": 3.746199223366863e-05, "epoch": 1.724165988608625, "percentage": 24.62, "elapsed_time": "4:15:58", "remaining_time": "13:03:37"} +{"current_steps": 1065, "total_steps": 4305, "loss": 0.3179, "lr": 3.7422311938999013e-05, "epoch": 1.7323026851098455, "percentage": 24.74, "elapsed_time": "4:17:10", "remaining_time": "13:02:22"} +{"current_steps": 1070, "total_steps": 4305, "loss": 0.3068, "lr": 3.738234520957506e-05, "epoch": 1.740439381611066, "percentage": 24.85, "elapsed_time": "4:18:23", "remaining_time": "13:01:14"} +{"current_steps": 1075, "total_steps": 4305, "loss": 0.3197, "lr": 3.73420927024772e-05, "epoch": 1.7485760781122863, "percentage": 24.97, "elapsed_time": "4:19:42", "remaining_time": "13:00:18"} +{"current_steps": 1080, "total_steps": 4305, "loss": 0.3209, "lr": 3.730155507948426e-05, "epoch": 1.756712774613507, "percentage": 25.09, "elapsed_time": "4:21:01", "remaining_time": "12:59:26"} +{"current_steps": 1085, "total_steps": 4305, "loss": 0.3125, "lr": 3.726073300706256e-05, "epoch": 1.7648494711147276, "percentage": 25.2, "elapsed_time": "4:22:12", "remaining_time": "12:58:09"} +{"current_steps": 1090, "total_steps": 4305, "loss": 0.2814, "lr": 3.721962715635495e-05, "epoch": 1.772986167615948, "percentage": 25.32, "elapsed_time": "4:23:24", "remaining_time": "12:56:54"} +{"current_steps": 1095, "total_steps": 4305, "loss": 0.3098, "lr": 3.7178238203169804e-05, "epoch": 1.7811228641171684, "percentage": 25.44, "elapsed_time": "4:24:37", "remaining_time": "12:55:44"} +{"current_steps": 1100, "total_steps": 4305, "loss": 0.3057, "lr": 3.7136566827969895e-05, "epoch": 1.789259560618389, "percentage": 25.55, "elapsed_time": "4:25:47", "remaining_time": "12:54:26"} +{"current_steps": 1105, "total_steps": 4305, "loss": 0.3051, "lr": 3.70946137158612e-05, "epoch": 1.7973962571196096, "percentage": 25.67, "elapsed_time": "4:27:00", "remaining_time": "12:53:14"} +{"current_steps": 1110, "total_steps": 4305, "loss": 0.3045, "lr": 3.705237955658166e-05, "epoch": 1.80553295362083, "percentage": 25.78, "elapsed_time": "4:28:09", "remaining_time": "12:51:52"} +{"current_steps": 1115, "total_steps": 4305, "loss": 0.3174, "lr": 3.70098650444898e-05, "epoch": 1.8136696501220504, "percentage": 25.9, "elapsed_time": "4:29:14", "remaining_time": "12:50:18"} +{"current_steps": 1120, "total_steps": 4305, "loss": 0.2939, "lr": 3.6967070878553346e-05, "epoch": 1.821806346623271, "percentage": 26.02, "elapsed_time": "4:30:29", "remaining_time": "12:49:13"} +{"current_steps": 1125, "total_steps": 4305, "loss": 0.3012, "lr": 3.692399776233775e-05, "epoch": 1.8299430431244914, "percentage": 26.13, "elapsed_time": "4:31:41", "remaining_time": "12:47:59"} +{"current_steps": 1130, "total_steps": 4305, "loss": 0.3206, "lr": 3.688064640399456e-05, "epoch": 1.8380797396257118, "percentage": 26.25, "elapsed_time": "4:32:51", "remaining_time": "12:46:40"} +{"current_steps": 1135, "total_steps": 4305, "loss": 0.3086, "lr": 3.683701751624983e-05, "epoch": 1.8462164361269324, "percentage": 26.36, "elapsed_time": "4:33:56", "remaining_time": "12:45:07"} +{"current_steps": 1140, "total_steps": 4305, "loss": 0.3114, "lr": 3.67931118163924e-05, "epoch": 1.854353132628153, "percentage": 26.48, "elapsed_time": "4:35:06", "remaining_time": "12:43:46"} +{"current_steps": 1145, "total_steps": 4305, "loss": 0.3281, "lr": 3.674893002626208e-05, "epoch": 1.8624898291293734, "percentage": 26.6, "elapsed_time": "4:36:11", "remaining_time": "12:42:13"} +{"current_steps": 1150, "total_steps": 4305, "loss": 0.2895, "lr": 3.6704472872237786e-05, "epoch": 1.8706265256305938, "percentage": 26.71, "elapsed_time": "4:37:21", "remaining_time": "12:40:54"} +{"current_steps": 1155, "total_steps": 4305, "loss": 0.2943, "lr": 3.665974108522562e-05, "epoch": 1.8787632221318145, "percentage": 26.83, "elapsed_time": "4:38:28", "remaining_time": "12:39:27"} +{"current_steps": 1160, "total_steps": 4305, "loss": 0.3296, "lr": 3.6614735400646824e-05, "epoch": 1.886899918633035, "percentage": 26.95, "elapsed_time": "4:39:42", "remaining_time": "12:38:21"} +{"current_steps": 1165, "total_steps": 4305, "loss": 0.3082, "lr": 3.6569456558425724e-05, "epoch": 1.8950366151342555, "percentage": 27.06, "elapsed_time": "4:40:50", "remaining_time": "12:36:57"} +{"current_steps": 1170, "total_steps": 4305, "loss": 0.2904, "lr": 3.6523905302977524e-05, "epoch": 1.9031733116354759, "percentage": 27.18, "elapsed_time": "4:42:04", "remaining_time": "12:35:47"} +{"current_steps": 1175, "total_steps": 4305, "loss": 0.3069, "lr": 3.64780823831961e-05, "epoch": 1.9113100081366965, "percentage": 27.29, "elapsed_time": "4:43:17", "remaining_time": "12:34:38"} +{"current_steps": 1180, "total_steps": 4305, "loss": 0.3244, "lr": 3.643198855244167e-05, "epoch": 1.9194467046379171, "percentage": 27.41, "elapsed_time": "4:44:28", "remaining_time": "12:33:22"} +{"current_steps": 1185, "total_steps": 4305, "loss": 0.341, "lr": 3.6385624568528424e-05, "epoch": 1.9275834011391375, "percentage": 27.53, "elapsed_time": "4:45:40", "remaining_time": "12:32:08"} +{"current_steps": 1190, "total_steps": 4305, "loss": 0.3237, "lr": 3.6338991193712045e-05, "epoch": 1.935720097640358, "percentage": 27.64, "elapsed_time": "4:46:51", "remaining_time": "12:30:54"} +{"current_steps": 1195, "total_steps": 4305, "loss": 0.276, "lr": 3.629208919467718e-05, "epoch": 1.9438567941415785, "percentage": 27.76, "elapsed_time": "4:48:04", "remaining_time": "12:29:43"} +{"current_steps": 1200, "total_steps": 4305, "loss": 0.2816, "lr": 3.624491934252487e-05, "epoch": 1.9519934906427991, "percentage": 27.87, "elapsed_time": "4:49:15", "remaining_time": "12:28:26"} +{"current_steps": 1205, "total_steps": 4305, "loss": 0.3057, "lr": 3.619748241275981e-05, "epoch": 1.9601301871440195, "percentage": 27.99, "elapsed_time": "4:50:32", "remaining_time": "12:27:27"} +{"current_steps": 1210, "total_steps": 4305, "loss": 0.3115, "lr": 3.614977918527767e-05, "epoch": 1.96826688364524, "percentage": 28.11, "elapsed_time": "4:51:45", "remaining_time": "12:26:17"} +{"current_steps": 1215, "total_steps": 4305, "loss": 0.2885, "lr": 3.610181044435221e-05, "epoch": 1.9764035801464606, "percentage": 28.22, "elapsed_time": "4:52:57", "remaining_time": "12:25:03"} +{"current_steps": 1220, "total_steps": 4305, "loss": 0.3185, "lr": 3.605357697862242e-05, "epoch": 1.9845402766476812, "percentage": 28.34, "elapsed_time": "4:54:08", "remaining_time": "12:23:46"} +{"current_steps": 1225, "total_steps": 4305, "loss": 0.2837, "lr": 3.6005079581079545e-05, "epoch": 1.9926769731489016, "percentage": 28.46, "elapsed_time": "4:55:23", "remaining_time": "12:22:42"} +{"current_steps": 1230, "total_steps": 4305, "loss": 0.275, "lr": 3.595631904905406e-05, "epoch": 2.0, "percentage": 28.57, "elapsed_time": "4:56:33", "remaining_time": "12:21:24"} +{"current_steps": 1235, "total_steps": 4305, "loss": 0.292, "lr": 3.590729618420255e-05, "epoch": 2.0081366965012206, "percentage": 28.69, "elapsed_time": "4:57:38", "remaining_time": "12:19:54"} +{"current_steps": 1240, "total_steps": 4305, "loss": 0.3125, "lr": 3.585801179249452e-05, "epoch": 2.016273393002441, "percentage": 28.8, "elapsed_time": "4:58:48", "remaining_time": "12:18:34"} +{"current_steps": 1245, "total_steps": 4305, "loss": 0.3104, "lr": 3.5808466684199166e-05, "epoch": 2.0244100895036614, "percentage": 28.92, "elapsed_time": "4:59:59", "remaining_time": "12:17:20"} +{"current_steps": 1250, "total_steps": 4305, "loss": 0.2769, "lr": 3.575866167387204e-05, "epoch": 2.032546786004882, "percentage": 29.04, "elapsed_time": "5:01:08", "remaining_time": "12:15:58"} +{"current_steps": 1255, "total_steps": 4305, "loss": 0.3047, "lr": 3.570859758034165e-05, "epoch": 2.0406834825061027, "percentage": 29.15, "elapsed_time": "5:02:19", "remaining_time": "12:14:43"} +{"current_steps": 1260, "total_steps": 4305, "loss": 0.2798, "lr": 3.565827522669605e-05, "epoch": 2.048820179007323, "percentage": 29.27, "elapsed_time": "5:03:36", "remaining_time": "12:13:42"} +{"current_steps": 1265, "total_steps": 4305, "loss": 0.2589, "lr": 3.5607695440269214e-05, "epoch": 2.0569568755085434, "percentage": 29.38, "elapsed_time": "5:04:47", "remaining_time": "12:12:27"} +{"current_steps": 1270, "total_steps": 4305, "loss": 0.2607, "lr": 3.555685905262751e-05, "epoch": 2.065093572009764, "percentage": 29.5, "elapsed_time": "5:06:07", "remaining_time": "12:11:33"} +{"current_steps": 1275, "total_steps": 4305, "loss": 0.283, "lr": 3.5505766899556026e-05, "epoch": 2.0732302685109847, "percentage": 29.62, "elapsed_time": "5:07:16", "remaining_time": "12:10:13"} +{"current_steps": 1280, "total_steps": 4305, "loss": 0.2846, "lr": 3.5454419821044786e-05, "epoch": 2.081366965012205, "percentage": 29.73, "elapsed_time": "5:08:28", "remaining_time": "12:09:01"} +{"current_steps": 1285, "total_steps": 4305, "loss": 0.2829, "lr": 3.540281866127496e-05, "epoch": 2.0895036615134255, "percentage": 29.85, "elapsed_time": "5:09:42", "remaining_time": "12:07:52"} +{"current_steps": 1290, "total_steps": 4305, "loss": 0.2986, "lr": 3.5350964268605006e-05, "epoch": 2.097640358014646, "percentage": 29.97, "elapsed_time": "5:10:56", "remaining_time": "12:06:43"} +{"current_steps": 1295, "total_steps": 4305, "loss": 0.2679, "lr": 3.5298857495556684e-05, "epoch": 2.1057770545158667, "percentage": 30.08, "elapsed_time": "5:12:08", "remaining_time": "12:05:31"} +{"current_steps": 1300, "total_steps": 4305, "loss": 0.3006, "lr": 3.524649919880108e-05, "epoch": 2.113913751017087, "percentage": 30.2, "elapsed_time": "5:13:15", "remaining_time": "12:04:05"} +{"current_steps": 1305, "total_steps": 4305, "loss": 0.2731, "lr": 3.519389023914449e-05, "epoch": 2.1220504475183075, "percentage": 30.31, "elapsed_time": "5:14:27", "remaining_time": "12:02:52"} +{"current_steps": 1310, "total_steps": 4305, "loss": 0.2927, "lr": 3.5141031481514276e-05, "epoch": 2.130187144019528, "percentage": 30.43, "elapsed_time": "5:15:39", "remaining_time": "12:01:40"} +{"current_steps": 1315, "total_steps": 4305, "loss": 0.2813, "lr": 3.508792379494468e-05, "epoch": 2.1383238405207488, "percentage": 30.55, "elapsed_time": "5:16:51", "remaining_time": "12:00:28"} +{"current_steps": 1320, "total_steps": 4305, "loss": 0.2767, "lr": 3.503456805256246e-05, "epoch": 2.146460537021969, "percentage": 30.66, "elapsed_time": "5:18:04", "remaining_time": "11:59:16"} +{"current_steps": 1325, "total_steps": 4305, "loss": 0.2783, "lr": 3.4980965131572616e-05, "epoch": 2.1545972335231895, "percentage": 30.78, "elapsed_time": "5:19:18", "remaining_time": "11:58:08"} +{"current_steps": 1330, "total_steps": 4305, "loss": 0.2733, "lr": 3.492711591324392e-05, "epoch": 2.16273393002441, "percentage": 30.89, "elapsed_time": "5:20:26", "remaining_time": "11:56:45"} +{"current_steps": 1335, "total_steps": 4305, "loss": 0.3088, "lr": 3.487302128289445e-05, "epoch": 2.170870626525631, "percentage": 31.01, "elapsed_time": "5:21:32", "remaining_time": "11:55:20"} +{"current_steps": 1340, "total_steps": 4305, "loss": 0.2818, "lr": 3.481868212987702e-05, "epoch": 2.179007323026851, "percentage": 31.13, "elapsed_time": "5:22:45", "remaining_time": "11:54:10"} +{"current_steps": 1345, "total_steps": 4305, "loss": 0.2733, "lr": 3.476409934756456e-05, "epoch": 2.1871440195280716, "percentage": 31.24, "elapsed_time": "5:23:57", "remaining_time": "11:52:57"} +{"current_steps": 1350, "total_steps": 4305, "loss": 0.2879, "lr": 3.470927383333544e-05, "epoch": 2.195280716029292, "percentage": 31.36, "elapsed_time": "5:25:08", "remaining_time": "11:51:41"} +{"current_steps": 1355, "total_steps": 4305, "loss": 0.3084, "lr": 3.46542064885587e-05, "epoch": 2.203417412530513, "percentage": 31.48, "elapsed_time": "5:26:20", "remaining_time": "11:50:28"} +{"current_steps": 1360, "total_steps": 4305, "loss": 0.2836, "lr": 3.459889821857926e-05, "epoch": 2.211554109031733, "percentage": 31.59, "elapsed_time": "5:27:29", "remaining_time": "11:49:09"} +{"current_steps": 1365, "total_steps": 4305, "loss": 0.3028, "lr": 3.4543349932702984e-05, "epoch": 2.2196908055329536, "percentage": 31.71, "elapsed_time": "5:28:41", "remaining_time": "11:47:56"} +{"current_steps": 1370, "total_steps": 4305, "loss": 0.2826, "lr": 3.448756254418179e-05, "epoch": 2.2278275020341742, "percentage": 31.82, "elapsed_time": "5:29:52", "remaining_time": "11:46:42"} +{"current_steps": 1375, "total_steps": 4305, "loss": 0.2964, "lr": 3.443153697019861e-05, "epoch": 2.2359641985353944, "percentage": 31.94, "elapsed_time": "5:31:05", "remaining_time": "11:45:31"} +{"current_steps": 1380, "total_steps": 4305, "loss": 0.294, "lr": 3.437527413185227e-05, "epoch": 2.244100895036615, "percentage": 32.06, "elapsed_time": "5:32:20", "remaining_time": "11:44:25"} +{"current_steps": 1385, "total_steps": 4305, "loss": 0.295, "lr": 3.431877495414242e-05, "epoch": 2.2522375915378356, "percentage": 32.17, "elapsed_time": "5:33:33", "remaining_time": "11:43:14"} +{"current_steps": 1390, "total_steps": 4305, "loss": 0.2872, "lr": 3.42620403659543e-05, "epoch": 2.2603742880390563, "percentage": 32.29, "elapsed_time": "5:34:42", "remaining_time": "11:41:54"} +{"current_steps": 1395, "total_steps": 4305, "loss": 0.2929, "lr": 3.420507130004341e-05, "epoch": 2.268510984540277, "percentage": 32.4, "elapsed_time": "5:35:54", "remaining_time": "11:40:42"} +{"current_steps": 1400, "total_steps": 4305, "loss": 0.2715, "lr": 3.414786869302029e-05, "epoch": 2.276647681041497, "percentage": 32.52, "elapsed_time": "5:37:03", "remaining_time": "11:39:23"} +{"current_steps": 1405, "total_steps": 4305, "loss": 0.2776, "lr": 3.4090433485334996e-05, "epoch": 2.2847843775427177, "percentage": 32.64, "elapsed_time": "5:38:16", "remaining_time": "11:38:12"} +{"current_steps": 1410, "total_steps": 4305, "loss": 0.2816, "lr": 3.403276662126173e-05, "epoch": 2.2929210740439383, "percentage": 32.75, "elapsed_time": "5:39:27", "remaining_time": "11:36:58"} +{"current_steps": 1415, "total_steps": 4305, "loss": 0.2871, "lr": 3.397486904888328e-05, "epoch": 2.3010577705451585, "percentage": 32.87, "elapsed_time": "5:40:38", "remaining_time": "11:35:44"} +{"current_steps": 1420, "total_steps": 4305, "loss": 0.2884, "lr": 3.391674172007544e-05, "epoch": 2.309194467046379, "percentage": 32.98, "elapsed_time": "5:41:50", "remaining_time": "11:34:31"} +{"current_steps": 1425, "total_steps": 4305, "loss": 0.298, "lr": 3.3858385590491347e-05, "epoch": 2.3173311635475997, "percentage": 33.1, "elapsed_time": "5:42:57", "remaining_time": "11:33:08"} +{"current_steps": 1430, "total_steps": 4305, "loss": 0.2653, "lr": 3.379980161954578e-05, "epoch": 2.3254678600488203, "percentage": 33.22, "elapsed_time": "5:44:11", "remaining_time": "11:31:58"} +{"current_steps": 1435, "total_steps": 4305, "loss": 0.3016, "lr": 3.3740990770399404e-05, "epoch": 2.3336045565500405, "percentage": 33.33, "elapsed_time": "5:45:22", "remaining_time": "11:30:45"} +{"current_steps": 1440, "total_steps": 4305, "loss": 0.2675, "lr": 3.368195400994289e-05, "epoch": 2.341741253051261, "percentage": 33.45, "elapsed_time": "5:46:38", "remaining_time": "11:29:39"} +{"current_steps": 1445, "total_steps": 4305, "loss": 0.2874, "lr": 3.362269230878107e-05, "epoch": 2.3498779495524817, "percentage": 33.57, "elapsed_time": "5:47:45", "remaining_time": "11:28:18"} +{"current_steps": 1450, "total_steps": 4305, "loss": 0.3099, "lr": 3.356320664121694e-05, "epoch": 2.3580146460537024, "percentage": 33.68, "elapsed_time": "5:48:54", "remaining_time": "11:26:58"} +{"current_steps": 1455, "total_steps": 4305, "loss": 0.2887, "lr": 3.350349798523566e-05, "epoch": 2.3661513425549225, "percentage": 33.8, "elapsed_time": "5:49:59", "remaining_time": "11:25:33"} +{"current_steps": 1460, "total_steps": 4305, "loss": 0.2934, "lr": 3.344356732248849e-05, "epoch": 2.374288039056143, "percentage": 33.91, "elapsed_time": "5:51:16", "remaining_time": "11:24:30"} +{"current_steps": 1465, "total_steps": 4305, "loss": 0.2705, "lr": 3.33834156382766e-05, "epoch": 2.382424735557364, "percentage": 34.03, "elapsed_time": "5:52:34", "remaining_time": "11:23:28"} +{"current_steps": 1470, "total_steps": 4305, "loss": 0.2776, "lr": 3.332304392153494e-05, "epoch": 2.3905614320585844, "percentage": 34.15, "elapsed_time": "5:53:47", "remaining_time": "11:22:18"} +{"current_steps": 1475, "total_steps": 4305, "loss": 0.2844, "lr": 3.326245316481591e-05, "epoch": 2.3986981285598046, "percentage": 34.26, "elapsed_time": "5:54:59", "remaining_time": "11:21:06"} +{"current_steps": 1480, "total_steps": 4305, "loss": 0.2996, "lr": 3.320164436427311e-05, "epoch": 2.406834825061025, "percentage": 34.38, "elapsed_time": "5:56:14", "remaining_time": "11:19:59"} +{"current_steps": 1485, "total_steps": 4305, "loss": 0.2823, "lr": 3.314061851964491e-05, "epoch": 2.414971521562246, "percentage": 34.49, "elapsed_time": "5:57:25", "remaining_time": "11:18:45"} +{"current_steps": 1490, "total_steps": 4305, "loss": 0.2775, "lr": 3.307937663423804e-05, "epoch": 2.423108218063466, "percentage": 34.61, "elapsed_time": "5:58:41", "remaining_time": "11:17:39"} +{"current_steps": 1495, "total_steps": 4305, "loss": 0.2775, "lr": 3.3017919714911094e-05, "epoch": 2.4312449145646866, "percentage": 34.73, "elapsed_time": "5:59:53", "remaining_time": "11:16:26"} +{"current_steps": 1500, "total_steps": 4305, "loss": 0.293, "lr": 3.295624877205796e-05, "epoch": 2.4393816110659072, "percentage": 34.84, "elapsed_time": "6:01:04", "remaining_time": "11:15:12"} +{"current_steps": 1505, "total_steps": 4305, "loss": 0.2944, "lr": 3.2894364819591224e-05, "epoch": 2.447518307567128, "percentage": 34.96, "elapsed_time": "6:02:48", "remaining_time": "11:15:00"} +{"current_steps": 1510, "total_steps": 4305, "loss": 0.3226, "lr": 3.28322688749255e-05, "epoch": 2.4556550040683485, "percentage": 35.08, "elapsed_time": "6:03:59", "remaining_time": "11:13:45"} +{"current_steps": 1515, "total_steps": 4305, "loss": 0.2759, "lr": 3.2769961958960694e-05, "epoch": 2.4637917005695686, "percentage": 35.19, "elapsed_time": "6:05:12", "remaining_time": "11:12:33"} +{"current_steps": 1520, "total_steps": 4305, "loss": 0.2875, "lr": 3.270744509606523e-05, "epoch": 2.4719283970707893, "percentage": 35.31, "elapsed_time": "6:06:24", "remaining_time": "11:11:21"} +{"current_steps": 1525, "total_steps": 4305, "loss": 0.2815, "lr": 3.26447193140592e-05, "epoch": 2.48006509357201, "percentage": 35.42, "elapsed_time": "6:07:32", "remaining_time": "11:09:59"} +{"current_steps": 1530, "total_steps": 4305, "loss": 0.2833, "lr": 3.2581785644197456e-05, "epoch": 2.48820179007323, "percentage": 35.54, "elapsed_time": "6:08:42", "remaining_time": "11:08:43"} +{"current_steps": 1535, "total_steps": 4305, "loss": 0.3104, "lr": 3.251864512115271e-05, "epoch": 2.4963384865744507, "percentage": 35.66, "elapsed_time": "6:09:48", "remaining_time": "11:07:21"} +{"current_steps": 1540, "total_steps": 4305, "loss": 0.2737, "lr": 3.2455298782998424e-05, "epoch": 2.5044751830756713, "percentage": 35.77, "elapsed_time": "6:11:05", "remaining_time": "11:06:17"} +{"current_steps": 1545, "total_steps": 4305, "loss": 0.256, "lr": 3.2391747671191854e-05, "epoch": 2.512611879576892, "percentage": 35.89, "elapsed_time": "6:12:15", "remaining_time": "11:05:00"} +{"current_steps": 1550, "total_steps": 4305, "loss": 0.293, "lr": 3.2327992830556846e-05, "epoch": 2.5207485760781125, "percentage": 36.0, "elapsed_time": "6:13:32", "remaining_time": "11:03:55"} +{"current_steps": 1555, "total_steps": 4305, "loss": 0.3035, "lr": 3.22640353092667e-05, "epoch": 2.5288852725793327, "percentage": 36.12, "elapsed_time": "6:14:38", "remaining_time": "11:02:32"} +{"current_steps": 1560, "total_steps": 4305, "loss": 0.2979, "lr": 3.2199876158826915e-05, "epoch": 2.5370219690805533, "percentage": 36.24, "elapsed_time": "6:15:50", "remaining_time": "11:01:21"} +{"current_steps": 1565, "total_steps": 4305, "loss": 0.2907, "lr": 3.2135516434057915e-05, "epoch": 2.545158665581774, "percentage": 36.35, "elapsed_time": "6:17:04", "remaining_time": "11:00:10"} +{"current_steps": 1570, "total_steps": 4305, "loss": 0.2756, "lr": 3.2070957193077705e-05, "epoch": 2.553295362082994, "percentage": 36.47, "elapsed_time": "6:18:18", "remaining_time": "10:59:01"} +{"current_steps": 1575, "total_steps": 4305, "loss": 0.2694, "lr": 3.200619949728448e-05, "epoch": 2.5614320585842147, "percentage": 36.59, "elapsed_time": "6:19:28", "remaining_time": "10:57:45"} +{"current_steps": 1580, "total_steps": 4305, "loss": 0.2818, "lr": 3.194124441133916e-05, "epoch": 2.5695687550854354, "percentage": 36.7, "elapsed_time": "6:20:38", "remaining_time": "10:56:29"} +{"current_steps": 1585, "total_steps": 4305, "loss": 0.3062, "lr": 3.187609300314789e-05, "epoch": 2.577705451586656, "percentage": 36.82, "elapsed_time": "6:21:57", "remaining_time": "10:55:28"} +{"current_steps": 1590, "total_steps": 4305, "loss": 0.2891, "lr": 3.181074634384451e-05, "epoch": 2.585842148087876, "percentage": 36.93, "elapsed_time": "6:23:12", "remaining_time": "10:54:21"} +{"current_steps": 1595, "total_steps": 4305, "loss": 0.2818, "lr": 3.1745205507772876e-05, "epoch": 2.5939788445890968, "percentage": 37.05, "elapsed_time": "6:24:29", "remaining_time": "10:53:15"} +{"current_steps": 1600, "total_steps": 4305, "loss": 0.2834, "lr": 3.16794715724693e-05, "epoch": 2.6021155410903174, "percentage": 37.17, "elapsed_time": "6:25:40", "remaining_time": "10:52:02"} +{"current_steps": 1605, "total_steps": 4305, "loss": 0.2981, "lr": 3.161354561864474e-05, "epoch": 2.6102522375915376, "percentage": 37.28, "elapsed_time": "6:26:47", "remaining_time": "10:50:40"} +{"current_steps": 1610, "total_steps": 4305, "loss": 0.2871, "lr": 3.154742873016707e-05, "epoch": 2.618388934092758, "percentage": 37.4, "elapsed_time": "6:27:57", "remaining_time": "10:49:25"} +{"current_steps": 1615, "total_steps": 4305, "loss": 0.272, "lr": 3.14811219940433e-05, "epoch": 2.626525630593979, "percentage": 37.51, "elapsed_time": "6:29:14", "remaining_time": "10:48:20"} +{"current_steps": 1620, "total_steps": 4305, "loss": 0.2854, "lr": 3.141462650040161e-05, "epoch": 2.6346623270951994, "percentage": 37.63, "elapsed_time": "6:30:27", "remaining_time": "10:47:09"} +{"current_steps": 1625, "total_steps": 4305, "loss": 0.31, "lr": 3.134794334247351e-05, "epoch": 2.64279902359642, "percentage": 37.75, "elapsed_time": "6:31:43", "remaining_time": "10:46:02"} +{"current_steps": 1630, "total_steps": 4305, "loss": 0.2915, "lr": 3.1281073616575856e-05, "epoch": 2.6509357200976402, "percentage": 37.86, "elapsed_time": "6:32:58", "remaining_time": "10:44:54"} +{"current_steps": 1635, "total_steps": 4305, "loss": 0.2747, "lr": 3.121401842209279e-05, "epoch": 2.659072416598861, "percentage": 37.98, "elapsed_time": "6:34:11", "remaining_time": "10:43:44"} +{"current_steps": 1640, "total_steps": 4305, "loss": 0.2907, "lr": 3.114677886145768e-05, "epoch": 2.6672091131000815, "percentage": 38.1, "elapsed_time": "6:35:25", "remaining_time": "10:42:34"} +{"current_steps": 1645, "total_steps": 4305, "loss": 0.2957, "lr": 3.107935604013501e-05, "epoch": 2.6753458096013016, "percentage": 38.21, "elapsed_time": "6:36:36", "remaining_time": "10:41:19"} +{"current_steps": 1650, "total_steps": 4305, "loss": 0.3001, "lr": 3.101175106660219e-05, "epoch": 2.6834825061025223, "percentage": 38.33, "elapsed_time": "6:37:43", "remaining_time": "10:39:59"} +{"current_steps": 1655, "total_steps": 4305, "loss": 0.3009, "lr": 3.094396505233135e-05, "epoch": 2.691619202603743, "percentage": 38.44, "elapsed_time": "6:38:51", "remaining_time": "10:38:39"} +{"current_steps": 1660, "total_steps": 4305, "loss": 0.2798, "lr": 3.087599911177103e-05, "epoch": 2.6997558991049635, "percentage": 38.56, "elapsed_time": "6:40:06", "remaining_time": "10:37:31"} +{"current_steps": 1665, "total_steps": 4305, "loss": 0.3039, "lr": 3.0807854362327906e-05, "epoch": 2.707892595606184, "percentage": 38.68, "elapsed_time": "6:41:18", "remaining_time": "10:36:18"} +{"current_steps": 1670, "total_steps": 4305, "loss": 0.2666, "lr": 3.073953192434837e-05, "epoch": 2.7160292921074043, "percentage": 38.79, "elapsed_time": "6:42:30", "remaining_time": "10:35:06"} +{"current_steps": 1675, "total_steps": 4305, "loss": 0.2806, "lr": 3.067103292110017e-05, "epoch": 2.724165988608625, "percentage": 38.91, "elapsed_time": "6:43:48", "remaining_time": "10:34:02"} +{"current_steps": 1680, "total_steps": 4305, "loss": 0.3052, "lr": 3.060235847875387e-05, "epoch": 2.7323026851098455, "percentage": 39.02, "elapsed_time": "6:45:02", "remaining_time": "10:32:52"} +{"current_steps": 1685, "total_steps": 4305, "loss": 0.2659, "lr": 3.05335097263644e-05, "epoch": 2.7404393816110657, "percentage": 39.14, "elapsed_time": "6:46:16", "remaining_time": "10:31:42"} +{"current_steps": 1690, "total_steps": 4305, "loss": 0.2674, "lr": 3.0464487795852463e-05, "epoch": 2.7485760781122863, "percentage": 39.26, "elapsed_time": "6:47:28", "remaining_time": "10:30:29"} +{"current_steps": 1695, "total_steps": 4305, "loss": 0.2894, "lr": 3.0395293821985906e-05, "epoch": 2.756712774613507, "percentage": 39.37, "elapsed_time": "6:48:38", "remaining_time": "10:29:13"} +{"current_steps": 1700, "total_steps": 4305, "loss": 0.319, "lr": 3.032592894236112e-05, "epoch": 2.7648494711147276, "percentage": 39.49, "elapsed_time": "6:49:52", "remaining_time": "10:28:04"} +{"current_steps": 1705, "total_steps": 4305, "loss": 0.256, "lr": 3.0256394297384273e-05, "epoch": 2.772986167615948, "percentage": 39.61, "elapsed_time": "6:51:10", "remaining_time": "10:27:00"} +{"current_steps": 1710, "total_steps": 4305, "loss": 0.2855, "lr": 3.0186691030252614e-05, "epoch": 2.7811228641171684, "percentage": 39.72, "elapsed_time": "6:52:19", "remaining_time": "10:25:42"} +{"current_steps": 1715, "total_steps": 4305, "loss": 0.2921, "lr": 3.0116820286935654e-05, "epoch": 2.789259560618389, "percentage": 39.84, "elapsed_time": "6:53:34", "remaining_time": "10:24:34"} +{"current_steps": 1720, "total_steps": 4305, "loss": 0.29, "lr": 3.0046783216156315e-05, "epoch": 2.7973962571196096, "percentage": 39.95, "elapsed_time": "6:54:44", "remaining_time": "10:23:19"} +{"current_steps": 1725, "total_steps": 4305, "loss": 0.2791, "lr": 2.997658096937207e-05, "epoch": 2.8055329536208298, "percentage": 40.07, "elapsed_time": "6:55:55", "remaining_time": "10:22:04"} +{"current_steps": 1730, "total_steps": 4305, "loss": 0.2684, "lr": 2.990621470075598e-05, "epoch": 2.8136696501220504, "percentage": 40.19, "elapsed_time": "6:57:10", "remaining_time": "10:20:56"} +{"current_steps": 1735, "total_steps": 4305, "loss": 0.2708, "lr": 2.9835685567177763e-05, "epoch": 2.821806346623271, "percentage": 40.3, "elapsed_time": "6:58:15", "remaining_time": "10:19:33"} +{"current_steps": 1740, "total_steps": 4305, "loss": 0.2788, "lr": 2.9764994728184725e-05, "epoch": 2.8299430431244916, "percentage": 40.42, "elapsed_time": "6:59:26", "remaining_time": "10:18:19"} +{"current_steps": 1745, "total_steps": 4305, "loss": 0.3008, "lr": 2.9694143345982732e-05, "epoch": 2.838079739625712, "percentage": 40.53, "elapsed_time": "7:00:39", "remaining_time": "10:17:07"} +{"current_steps": 1750, "total_steps": 4305, "loss": 0.2713, "lr": 2.9623132585417096e-05, "epoch": 2.8462164361269324, "percentage": 40.65, "elapsed_time": "7:01:51", "remaining_time": "10:15:54"} +{"current_steps": 1755, "total_steps": 4305, "loss": 0.2949, "lr": 2.9551963613953404e-05, "epoch": 2.854353132628153, "percentage": 40.77, "elapsed_time": "7:03:03", "remaining_time": "10:14:41"} +{"current_steps": 1760, "total_steps": 4305, "loss": 0.2837, "lr": 2.948063760165835e-05, "epoch": 2.862489829129373, "percentage": 40.88, "elapsed_time": "7:04:13", "remaining_time": "10:13:26"} +{"current_steps": 1765, "total_steps": 4305, "loss": 0.27, "lr": 2.9409155721180477e-05, "epoch": 2.870626525630594, "percentage": 41.0, "elapsed_time": "7:05:27", "remaining_time": "10:12:15"} +{"current_steps": 1770, "total_steps": 4305, "loss": 0.2847, "lr": 2.9337519147730918e-05, "epoch": 2.8787632221318145, "percentage": 41.11, "elapsed_time": "7:06:37", "remaining_time": "10:11:00"} +{"current_steps": 1775, "total_steps": 4305, "loss": 0.2812, "lr": 2.9265729059064054e-05, "epoch": 2.886899918633035, "percentage": 41.23, "elapsed_time": "7:07:43", "remaining_time": "10:09:39"} +{"current_steps": 1780, "total_steps": 4305, "loss": 0.2841, "lr": 2.9193786635458178e-05, "epoch": 2.8950366151342557, "percentage": 41.35, "elapsed_time": "7:08:50", "remaining_time": "10:08:19"} +{"current_steps": 1785, "total_steps": 4305, "loss": 0.2812, "lr": 2.912169305969605e-05, "epoch": 2.903173311635476, "percentage": 41.46, "elapsed_time": "7:10:05", "remaining_time": "10:07:11"} +{"current_steps": 1790, "total_steps": 4305, "loss": 0.2604, "lr": 2.9049449517045497e-05, "epoch": 2.9113100081366965, "percentage": 41.58, "elapsed_time": "7:11:15", "remaining_time": "10:05:55"} +{"current_steps": 1795, "total_steps": 4305, "loss": 0.2515, "lr": 2.89770571952399e-05, "epoch": 2.919446704637917, "percentage": 41.7, "elapsed_time": "7:12:20", "remaining_time": "10:04:33"} +{"current_steps": 1800, "total_steps": 4305, "loss": 0.293, "lr": 2.890451728445866e-05, "epoch": 2.9275834011391373, "percentage": 41.81, "elapsed_time": "7:13:30", "remaining_time": "10:03:18"} +{"current_steps": 1805, "total_steps": 4305, "loss": 0.2784, "lr": 2.8831830977307644e-05, "epoch": 2.935720097640358, "percentage": 41.93, "elapsed_time": "7:14:42", "remaining_time": "10:02:05"} +{"current_steps": 1810, "total_steps": 4305, "loss": 0.2881, "lr": 2.8758999468799594e-05, "epoch": 2.9438567941415785, "percentage": 42.04, "elapsed_time": "7:15:54", "remaining_time": "10:00:52"} +{"current_steps": 1815, "total_steps": 4305, "loss": 0.2855, "lr": 2.868602395633444e-05, "epoch": 2.951993490642799, "percentage": 42.16, "elapsed_time": "7:17:09", "remaining_time": "9:59:44"} +{"current_steps": 1820, "total_steps": 4305, "loss": 0.315, "lr": 2.861290563967965e-05, "epoch": 2.9601301871440198, "percentage": 42.28, "elapsed_time": "7:18:22", "remaining_time": "9:58:33"} +{"current_steps": 1825, "total_steps": 4305, "loss": 0.2935, "lr": 2.8539645720950474e-05, "epoch": 2.96826688364524, "percentage": 42.39, "elapsed_time": "7:19:35", "remaining_time": "9:57:22"} +{"current_steps": 1830, "total_steps": 4305, "loss": 0.3094, "lr": 2.8466245404590226e-05, "epoch": 2.9764035801464606, "percentage": 42.51, "elapsed_time": "7:20:44", "remaining_time": "9:56:05"} +{"current_steps": 1835, "total_steps": 4305, "loss": 0.2902, "lr": 2.8392705897350425e-05, "epoch": 2.984540276647681, "percentage": 42.62, "elapsed_time": "7:22:00", "remaining_time": "9:54:58"} +{"current_steps": 1840, "total_steps": 4305, "loss": 0.2751, "lr": 2.8319028408270983e-05, "epoch": 2.9926769731489014, "percentage": 42.74, "elapsed_time": "7:23:13", "remaining_time": "9:53:47"} +{"current_steps": 1845, "total_steps": 4305, "loss": 0.3069, "lr": 2.8245214148660364e-05, "epoch": 3.0, "percentage": 42.86, "elapsed_time": "7:24:22", "remaining_time": "9:52:30"} +{"current_steps": 1850, "total_steps": 4305, "loss": 0.256, "lr": 2.8171264332075588e-05, "epoch": 3.0081366965012206, "percentage": 42.97, "elapsed_time": "7:25:35", "remaining_time": "9:51:19"} +{"current_steps": 1855, "total_steps": 4305, "loss": 0.246, "lr": 2.809718017430236e-05, "epoch": 3.016273393002441, "percentage": 43.09, "elapsed_time": "7:26:45", "remaining_time": "9:50:03"} +{"current_steps": 1860, "total_steps": 4305, "loss": 0.2619, "lr": 2.8022962893335023e-05, "epoch": 3.0244100895036614, "percentage": 43.21, "elapsed_time": "7:27:57", "remaining_time": "9:48:50"} +{"current_steps": 1865, "total_steps": 4305, "loss": 0.2881, "lr": 2.7948613709356565e-05, "epoch": 3.032546786004882, "percentage": 43.32, "elapsed_time": "7:29:11", "remaining_time": "9:47:41"} +{"current_steps": 1870, "total_steps": 4305, "loss": 0.2756, "lr": 2.7874133844718557e-05, "epoch": 3.0406834825061027, "percentage": 43.44, "elapsed_time": "7:30:21", "remaining_time": "9:46:25"} +{"current_steps": 1875, "total_steps": 4305, "loss": 0.2634, "lr": 2.7799524523921038e-05, "epoch": 3.048820179007323, "percentage": 43.55, "elapsed_time": "7:31:38", "remaining_time": "9:45:19"} +{"current_steps": 1880, "total_steps": 4305, "loss": 0.2785, "lr": 2.77247869735924e-05, "epoch": 3.0569568755085434, "percentage": 43.67, "elapsed_time": "7:32:47", "remaining_time": "9:44:03"} +{"current_steps": 1885, "total_steps": 4305, "loss": 0.2799, "lr": 2.764992242246921e-05, "epoch": 3.065093572009764, "percentage": 43.79, "elapsed_time": "7:34:00", "remaining_time": "9:42:52"} +{"current_steps": 1890, "total_steps": 4305, "loss": 0.2628, "lr": 2.7574932101376034e-05, "epoch": 3.0732302685109847, "percentage": 43.9, "elapsed_time": "7:35:17", "remaining_time": "9:41:45"} +{"current_steps": 1895, "total_steps": 4305, "loss": 0.2829, "lr": 2.749981724320516e-05, "epoch": 3.081366965012205, "percentage": 44.02, "elapsed_time": "7:36:28", "remaining_time": "9:40:31"} +{"current_steps": 1900, "total_steps": 4305, "loss": 0.2836, "lr": 2.7424579082896357e-05, "epoch": 3.0895036615134255, "percentage": 44.13, "elapsed_time": "7:37:37", "remaining_time": "9:39:15"} +{"current_steps": 1905, "total_steps": 4305, "loss": 0.2716, "lr": 2.7349218857416587e-05, "epoch": 3.097640358014646, "percentage": 44.25, "elapsed_time": "7:38:46", "remaining_time": "9:37:58"} +{"current_steps": 1910, "total_steps": 4305, "loss": 0.2696, "lr": 2.7273737805739614e-05, "epoch": 3.1057770545158667, "percentage": 44.37, "elapsed_time": "7:39:53", "remaining_time": "9:36:40"} +{"current_steps": 1915, "total_steps": 4305, "loss": 0.2483, "lr": 2.719813716882569e-05, "epoch": 3.113913751017087, "percentage": 44.48, "elapsed_time": "7:41:07", "remaining_time": "9:35:29"} +{"current_steps": 1920, "total_steps": 4305, "loss": 0.2599, "lr": 2.7122418189601118e-05, "epoch": 3.1220504475183075, "percentage": 44.6, "elapsed_time": "7:42:18", "remaining_time": "9:34:15"} +{"current_steps": 1925, "total_steps": 4305, "loss": 0.2513, "lr": 2.7046582112937837e-05, "epoch": 3.130187144019528, "percentage": 44.72, "elapsed_time": "7:43:28", "remaining_time": "9:33:01"} +{"current_steps": 1930, "total_steps": 4305, "loss": 0.2647, "lr": 2.697063018563295e-05, "epoch": 3.1383238405207488, "percentage": 44.83, "elapsed_time": "7:44:42", "remaining_time": "9:31:51"} +{"current_steps": 1935, "total_steps": 4305, "loss": 0.2757, "lr": 2.6894563656388217e-05, "epoch": 3.146460537021969, "percentage": 44.95, "elapsed_time": "7:45:53", "remaining_time": "9:30:37"} +{"current_steps": 1940, "total_steps": 4305, "loss": 0.2685, "lr": 2.681838377578954e-05, "epoch": 3.1545972335231895, "percentage": 45.06, "elapsed_time": "7:47:02", "remaining_time": "9:29:21"} +{"current_steps": 1945, "total_steps": 4305, "loss": 0.2799, "lr": 2.6742091796286388e-05, "epoch": 3.16273393002441, "percentage": 45.18, "elapsed_time": "7:48:13", "remaining_time": "9:28:07"} +{"current_steps": 1950, "total_steps": 4305, "loss": 0.2777, "lr": 2.6665688972171215e-05, "epoch": 3.170870626525631, "percentage": 45.3, "elapsed_time": "7:49:29", "remaining_time": "9:26:59"} +{"current_steps": 1955, "total_steps": 4305, "loss": 0.2623, "lr": 2.658917655955884e-05, "epoch": 3.179007323026851, "percentage": 45.41, "elapsed_time": "7:50:46", "remaining_time": "9:25:53"} +{"current_steps": 1960, "total_steps": 4305, "loss": 0.2759, "lr": 2.651255581636578e-05, "epoch": 3.1871440195280716, "percentage": 45.53, "elapsed_time": "7:51:57", "remaining_time": "9:24:39"} +{"current_steps": 1965, "total_steps": 4305, "loss": 0.2798, "lr": 2.6435828002289596e-05, "epoch": 3.195280716029292, "percentage": 45.64, "elapsed_time": "7:53:07", "remaining_time": "9:23:24"} +{"current_steps": 1970, "total_steps": 4305, "loss": 0.2499, "lr": 2.6358994378788163e-05, "epoch": 3.203417412530513, "percentage": 45.76, "elapsed_time": "7:54:24", "remaining_time": "9:22:18"} +{"current_steps": 1975, "total_steps": 4305, "loss": 0.2908, "lr": 2.6282056209058936e-05, "epoch": 3.211554109031733, "percentage": 45.88, "elapsed_time": "7:55:36", "remaining_time": "9:21:05"} +{"current_steps": 1980, "total_steps": 4305, "loss": 0.2731, "lr": 2.6205014758018176e-05, "epoch": 3.2196908055329536, "percentage": 45.99, "elapsed_time": "7:56:52", "remaining_time": "9:19:58"} +{"current_steps": 1985, "total_steps": 4305, "loss": 0.2654, "lr": 2.6127871292280165e-05, "epoch": 3.2278275020341742, "percentage": 46.11, "elapsed_time": "7:58:08", "remaining_time": "9:18:50"} +{"current_steps": 1990, "total_steps": 4305, "loss": 0.2457, "lr": 2.6050627080136376e-05, "epoch": 3.2359641985353944, "percentage": 46.23, "elapsed_time": "7:59:21", "remaining_time": "9:17:38"} +{"current_steps": 1995, "total_steps": 4305, "loss": 0.2627, "lr": 2.5973283391534615e-05, "epoch": 3.244100895036615, "percentage": 46.34, "elapsed_time": "8:00:32", "remaining_time": "9:16:24"} +{"current_steps": 2000, "total_steps": 4305, "loss": 0.2619, "lr": 2.589584149805817e-05, "epoch": 3.2522375915378356, "percentage": 46.46, "elapsed_time": "8:01:47", "remaining_time": "9:15:15"} +{"current_steps": 2005, "total_steps": 4305, "loss": 0.2546, "lr": 2.581830267290486e-05, "epoch": 3.2603742880390563, "percentage": 46.57, "elapsed_time": "8:03:02", "remaining_time": "9:14:06"} +{"current_steps": 2010, "total_steps": 4305, "loss": 0.2555, "lr": 2.574066819086613e-05, "epoch": 3.268510984540277, "percentage": 46.69, "elapsed_time": "8:04:16", "remaining_time": "9:12:55"} +{"current_steps": 2015, "total_steps": 4305, "loss": 0.2709, "lr": 2.5662939328306113e-05, "epoch": 3.276647681041497, "percentage": 46.81, "elapsed_time": "8:05:26", "remaining_time": "9:11:41"} +{"current_steps": 2020, "total_steps": 4305, "loss": 0.2671, "lr": 2.5585117363140592e-05, "epoch": 3.2847843775427177, "percentage": 46.92, "elapsed_time": "8:06:40", "remaining_time": "9:10:31"} +{"current_steps": 2025, "total_steps": 4305, "loss": 0.2777, "lr": 2.5507203574816043e-05, "epoch": 3.2929210740439383, "percentage": 47.04, "elapsed_time": "8:07:55", "remaining_time": "9:09:21"} +{"current_steps": 2030, "total_steps": 4305, "loss": 0.2754, "lr": 2.542919924428856e-05, "epoch": 3.3010577705451585, "percentage": 47.15, "elapsed_time": "8:09:04", "remaining_time": "9:08:06"} +{"current_steps": 2035, "total_steps": 4305, "loss": 0.2747, "lr": 2.5351105654002838e-05, "epoch": 3.309194467046379, "percentage": 47.27, "elapsed_time": "8:10:15", "remaining_time": "9:06:52"} +{"current_steps": 2040, "total_steps": 4305, "loss": 0.2634, "lr": 2.527292408787104e-05, "epoch": 3.3173311635475997, "percentage": 47.39, "elapsed_time": "8:11:25", "remaining_time": "9:05:37"} +{"current_steps": 2045, "total_steps": 4305, "loss": 0.2456, "lr": 2.5194655831251712e-05, "epoch": 3.3254678600488203, "percentage": 47.5, "elapsed_time": "8:12:41", "remaining_time": "9:04:28"} +{"current_steps": 2050, "total_steps": 4305, "loss": 0.251, "lr": 2.5116302170928678e-05, "epoch": 3.3336045565500405, "percentage": 47.62, "elapsed_time": "8:13:54", "remaining_time": "9:03:17"} +{"current_steps": 2055, "total_steps": 4305, "loss": 0.2893, "lr": 2.5037864395089822e-05, "epoch": 3.341741253051261, "percentage": 47.74, "elapsed_time": "8:15:09", "remaining_time": "9:02:08"} +{"current_steps": 2060, "total_steps": 4305, "loss": 0.2669, "lr": 2.495934379330597e-05, "epoch": 3.3498779495524817, "percentage": 47.85, "elapsed_time": "8:16:22", "remaining_time": "9:00:57"} +{"current_steps": 2065, "total_steps": 4305, "loss": 0.2577, "lr": 2.4880741656509656e-05, "epoch": 3.3580146460537024, "percentage": 47.97, "elapsed_time": "8:17:26", "remaining_time": "8:59:35"} +{"current_steps": 2070, "total_steps": 4305, "loss": 0.2744, "lr": 2.4802059276973904e-05, "epoch": 3.3661513425549225, "percentage": 48.08, "elapsed_time": "8:18:44", "remaining_time": "8:58:29"} +{"current_steps": 2075, "total_steps": 4305, "loss": 0.2559, "lr": 2.4723297948290982e-05, "epoch": 3.374288039056143, "percentage": 48.2, "elapsed_time": "8:19:56", "remaining_time": "8:57:17"} +{"current_steps": 2080, "total_steps": 4305, "loss": 0.2836, "lr": 2.464445896535113e-05, "epoch": 3.382424735557364, "percentage": 48.32, "elapsed_time": "8:21:11", "remaining_time": "8:56:08"} +{"current_steps": 2085, "total_steps": 4305, "loss": 0.2693, "lr": 2.45655436243213e-05, "epoch": 3.3905614320585844, "percentage": 48.43, "elapsed_time": "8:22:17", "remaining_time": "8:54:49"} +{"current_steps": 2090, "total_steps": 4305, "loss": 0.2774, "lr": 2.44865532226238e-05, "epoch": 3.3986981285598046, "percentage": 48.55, "elapsed_time": "8:23:29", "remaining_time": "8:53:36"} +{"current_steps": 2095, "total_steps": 4305, "loss": 0.2874, "lr": 2.4407489058915004e-05, "epoch": 3.406834825061025, "percentage": 48.66, "elapsed_time": "8:24:37", "remaining_time": "8:52:19"} +{"current_steps": 2100, "total_steps": 4305, "loss": 0.2752, "lr": 2.4328352433063966e-05, "epoch": 3.414971521562246, "percentage": 48.78, "elapsed_time": "8:25:50", "remaining_time": "8:51:07"} +{"current_steps": 2105, "total_steps": 4305, "loss": 0.2545, "lr": 2.4249144646131083e-05, "epoch": 3.423108218063466, "percentage": 48.9, "elapsed_time": "8:26:57", "remaining_time": "8:49:50"} +{"current_steps": 2110, "total_steps": 4305, "loss": 0.2631, "lr": 2.4169867000346684e-05, "epoch": 3.4312449145646866, "percentage": 49.01, "elapsed_time": "8:28:10", "remaining_time": "8:48:38"} +{"current_steps": 2115, "total_steps": 4305, "loss": 0.2866, "lr": 2.4090520799089612e-05, "epoch": 3.4393816110659072, "percentage": 49.13, "elapsed_time": "8:29:19", "remaining_time": "8:47:22"} +{"current_steps": 2120, "total_steps": 4305, "loss": 0.2892, "lr": 2.4011107346865844e-05, "epoch": 3.447518307567128, "percentage": 49.25, "elapsed_time": "8:30:31", "remaining_time": "8:46:11"} +{"current_steps": 2125, "total_steps": 4305, "loss": 0.2499, "lr": 2.393162794928697e-05, "epoch": 3.4556550040683485, "percentage": 49.36, "elapsed_time": "8:31:45", "remaining_time": "8:45:00"} +{"current_steps": 2130, "total_steps": 4305, "loss": 0.2581, "lr": 2.385208391304879e-05, "epoch": 3.4637917005695686, "percentage": 49.48, "elapsed_time": "8:33:00", "remaining_time": "8:43:50"} +{"current_steps": 2135, "total_steps": 4305, "loss": 0.2717, "lr": 2.3772476545909794e-05, "epoch": 3.4719283970707893, "percentage": 49.59, "elapsed_time": "8:34:12", "remaining_time": "8:42:38"} +{"current_steps": 2140, "total_steps": 4305, "loss": 0.2883, "lr": 2.3692807156669684e-05, "epoch": 3.48006509357201, "percentage": 49.71, "elapsed_time": "8:35:25", "remaining_time": "8:41:26"} +{"current_steps": 2145, "total_steps": 4305, "loss": 0.2871, "lr": 2.3613077055147855e-05, "epoch": 3.48820179007323, "percentage": 49.83, "elapsed_time": "8:36:34", "remaining_time": "8:40:11"} +{"current_steps": 2150, "total_steps": 4305, "loss": 0.2818, "lr": 2.3533287552161833e-05, "epoch": 3.4963384865744507, "percentage": 49.94, "elapsed_time": "8:37:40", "remaining_time": "8:38:52"} +{"current_steps": 2155, "total_steps": 4305, "loss": 0.2819, "lr": 2.345343995950577e-05, "epoch": 3.5044751830756713, "percentage": 50.06, "elapsed_time": "8:38:52", "remaining_time": "8:37:40"} +{"current_steps": 2160, "total_steps": 4305, "loss": 0.2793, "lr": 2.3373535589928827e-05, "epoch": 3.512611879576892, "percentage": 50.17, "elapsed_time": "8:40:03", "remaining_time": "8:36:26"} +{"current_steps": 2165, "total_steps": 4305, "loss": 0.2645, "lr": 2.3293575757113635e-05, "epoch": 3.5207485760781125, "percentage": 50.29, "elapsed_time": "8:41:17", "remaining_time": "8:35:16"} +{"current_steps": 2170, "total_steps": 4305, "loss": 0.2769, "lr": 2.3213561775654678e-05, "epoch": 3.5288852725793327, "percentage": 50.41, "elapsed_time": "8:42:37", "remaining_time": "8:34:12"} +{"current_steps": 2175, "total_steps": 4305, "loss": 0.2679, "lr": 2.3133494961036655e-05, "epoch": 3.5370219690805533, "percentage": 50.52, "elapsed_time": "8:44:00", "remaining_time": "8:33:09"} +{"current_steps": 2180, "total_steps": 4305, "loss": 0.2951, "lr": 2.305337662961292e-05, "epoch": 3.545158665581774, "percentage": 50.64, "elapsed_time": "8:45:14", "remaining_time": "8:31:59"} +{"current_steps": 2185, "total_steps": 4305, "loss": 0.2528, "lr": 2.2973208098583767e-05, "epoch": 3.553295362082994, "percentage": 50.75, "elapsed_time": "8:46:29", "remaining_time": "8:30:50"} +{"current_steps": 2190, "total_steps": 4305, "loss": 0.2319, "lr": 2.2892990685974815e-05, "epoch": 3.5614320585842147, "percentage": 50.87, "elapsed_time": "8:47:48", "remaining_time": "8:29:44"} +{"current_steps": 2195, "total_steps": 4305, "loss": 0.2586, "lr": 2.2812725710615328e-05, "epoch": 3.5695687550854354, "percentage": 50.99, "elapsed_time": "8:49:07", "remaining_time": "8:28:37"} +{"current_steps": 2200, "total_steps": 4305, "loss": 0.2549, "lr": 2.2732414492116538e-05, "epoch": 3.577705451586656, "percentage": 51.1, "elapsed_time": "8:50:22", "remaining_time": "8:27:28"} +{"current_steps": 2205, "total_steps": 4305, "loss": 0.2694, "lr": 2.2652058350849955e-05, "epoch": 3.585842148087876, "percentage": 51.22, "elapsed_time": "8:51:29", "remaining_time": "8:26:10"} +{"current_steps": 2210, "total_steps": 4305, "loss": 0.2594, "lr": 2.2571658607925624e-05, "epoch": 3.5939788445890968, "percentage": 51.34, "elapsed_time": "8:52:37", "remaining_time": "8:24:54"} +{"current_steps": 2215, "total_steps": 4305, "loss": 0.2594, "lr": 2.2491216585170458e-05, "epoch": 3.6021155410903174, "percentage": 51.45, "elapsed_time": "8:53:45", "remaining_time": "8:23:38"} +{"current_steps": 2220, "total_steps": 4305, "loss": 0.2567, "lr": 2.2410733605106462e-05, "epoch": 3.6102522375915376, "percentage": 51.57, "elapsed_time": "8:54:54", "remaining_time": "8:22:22"} +{"current_steps": 2225, "total_steps": 4305, "loss": 0.2528, "lr": 2.233021099092902e-05, "epoch": 3.618388934092758, "percentage": 51.68, "elapsed_time": "8:56:07", "remaining_time": "8:21:10"} +{"current_steps": 2230, "total_steps": 4305, "loss": 0.2615, "lr": 2.224965006648512e-05, "epoch": 3.626525630593979, "percentage": 51.8, "elapsed_time": "8:57:18", "remaining_time": "8:19:57"} +{"current_steps": 2235, "total_steps": 4305, "loss": 0.3001, "lr": 2.2169052156251585e-05, "epoch": 3.6346623270951994, "percentage": 51.92, "elapsed_time": "8:58:30", "remaining_time": "8:18:45"} +{"current_steps": 2240, "total_steps": 4305, "loss": 0.2613, "lr": 2.2088418585313346e-05, "epoch": 3.64279902359642, "percentage": 52.03, "elapsed_time": "8:59:43", "remaining_time": "8:17:33"} +{"current_steps": 2245, "total_steps": 4305, "loss": 0.2866, "lr": 2.200775067934158e-05, "epoch": 3.6509357200976402, "percentage": 52.15, "elapsed_time": "9:00:53", "remaining_time": "8:16:19"} +{"current_steps": 2250, "total_steps": 4305, "loss": 0.2555, "lr": 2.192704976457198e-05, "epoch": 3.659072416598861, "percentage": 52.26, "elapsed_time": "9:02:09", "remaining_time": "8:15:10"} +{"current_steps": 2255, "total_steps": 4305, "loss": 0.2481, "lr": 2.1846317167782923e-05, "epoch": 3.6672091131000815, "percentage": 52.38, "elapsed_time": "9:03:22", "remaining_time": "8:13:58"} +{"current_steps": 2260, "total_steps": 4305, "loss": 0.268, "lr": 2.1765554216273652e-05, "epoch": 3.6753458096013016, "percentage": 52.5, "elapsed_time": "9:04:35", "remaining_time": "8:12:47"} +{"current_steps": 2265, "total_steps": 4305, "loss": 0.2741, "lr": 2.1684762237842466e-05, "epoch": 3.6834825061025223, "percentage": 52.61, "elapsed_time": "9:05:49", "remaining_time": "8:11:35"} +{"current_steps": 2270, "total_steps": 4305, "loss": 0.2678, "lr": 2.1603942560764884e-05, "epoch": 3.691619202603743, "percentage": 52.73, "elapsed_time": "9:06:58", "remaining_time": "8:10:20"} +{"current_steps": 2275, "total_steps": 4305, "loss": 0.245, "lr": 2.1523096513771825e-05, "epoch": 3.6997558991049635, "percentage": 52.85, "elapsed_time": "9:08:17", "remaining_time": "8:09:14"} +{"current_steps": 2280, "total_steps": 4305, "loss": 0.2688, "lr": 2.1442225426027724e-05, "epoch": 3.707892595606184, "percentage": 52.96, "elapsed_time": "9:09:24", "remaining_time": "8:07:57"} +{"current_steps": 2285, "total_steps": 4305, "loss": 0.2786, "lr": 2.1361330627108724e-05, "epoch": 3.7160292921074043, "percentage": 53.08, "elapsed_time": "9:10:37", "remaining_time": "8:06:46"} +{"current_steps": 2290, "total_steps": 4305, "loss": 0.2391, "lr": 2.128041344698078e-05, "epoch": 3.724165988608625, "percentage": 53.19, "elapsed_time": "9:11:48", "remaining_time": "8:05:32"} +{"current_steps": 2295, "total_steps": 4305, "loss": 0.2665, "lr": 2.1199475215977817e-05, "epoch": 3.7323026851098455, "percentage": 53.31, "elapsed_time": "9:13:02", "remaining_time": "8:04:21"} +{"current_steps": 2300, "total_steps": 4305, "loss": 0.2475, "lr": 2.1118517264779858e-05, "epoch": 3.7404393816110657, "percentage": 53.43, "elapsed_time": "9:14:10", "remaining_time": "8:03:05"} +{"current_steps": 2305, "total_steps": 4305, "loss": 0.2823, "lr": 2.103754092439112e-05, "epoch": 3.7485760781122863, "percentage": 53.54, "elapsed_time": "9:15:22", "remaining_time": "8:01:53"} +{"current_steps": 2310, "total_steps": 4305, "loss": 0.26, "lr": 2.095654752611817e-05, "epoch": 3.756712774613507, "percentage": 53.66, "elapsed_time": "9:16:34", "remaining_time": "8:00:40"} +{"current_steps": 2315, "total_steps": 4305, "loss": 0.2658, "lr": 2.087553840154801e-05, "epoch": 3.7648494711147276, "percentage": 53.77, "elapsed_time": "9:17:47", "remaining_time": "7:59:28"} +{"current_steps": 2320, "total_steps": 4305, "loss": 0.2526, "lr": 2.0794514882526196e-05, "epoch": 3.772986167615948, "percentage": 53.89, "elapsed_time": "9:18:56", "remaining_time": "7:58:14"} +{"current_steps": 2325, "total_steps": 4305, "loss": 0.2882, "lr": 2.0713478301134935e-05, "epoch": 3.7811228641171684, "percentage": 54.01, "elapsed_time": "9:20:07", "remaining_time": "7:57:00"} +{"current_steps": 2330, "total_steps": 4305, "loss": 0.2512, "lr": 2.063242998967118e-05, "epoch": 3.789259560618389, "percentage": 54.12, "elapsed_time": "9:21:17", "remaining_time": "7:55:46"} +{"current_steps": 2335, "total_steps": 4305, "loss": 0.2551, "lr": 2.0551371280624758e-05, "epoch": 3.7973962571196096, "percentage": 54.24, "elapsed_time": "9:22:32", "remaining_time": "7:54:36"} +{"current_steps": 2340, "total_steps": 4305, "loss": 0.2515, "lr": 2.0470303506656414e-05, "epoch": 3.8055329536208298, "percentage": 54.36, "elapsed_time": "9:23:43", "remaining_time": "7:53:23"} +{"current_steps": 2345, "total_steps": 4305, "loss": 0.2726, "lr": 2.0389228000575953e-05, "epoch": 3.8136696501220504, "percentage": 54.47, "elapsed_time": "9:24:49", "remaining_time": "7:52:05"} +{"current_steps": 2350, "total_steps": 4305, "loss": 0.2669, "lr": 2.0308146095320275e-05, "epoch": 3.821806346623271, "percentage": 54.59, "elapsed_time": "9:25:56", "remaining_time": "7:50:49"} +{"current_steps": 2355, "total_steps": 4305, "loss": 0.2663, "lr": 2.0227059123931504e-05, "epoch": 3.8299430431244916, "percentage": 54.7, "elapsed_time": "9:27:09", "remaining_time": "7:49:37"} +{"current_steps": 2360, "total_steps": 4305, "loss": 0.2594, "lr": 2.0145968419535045e-05, "epoch": 3.838079739625712, "percentage": 54.82, "elapsed_time": "9:28:19", "remaining_time": "7:48:23"} +{"current_steps": 2365, "total_steps": 4305, "loss": 0.2661, "lr": 2.0064875315317674e-05, "epoch": 3.8462164361269324, "percentage": 54.94, "elapsed_time": "9:29:35", "remaining_time": "7:47:14"} +{"current_steps": 2370, "total_steps": 4305, "loss": 0.2568, "lr": 1.998378114450565e-05, "epoch": 3.854353132628153, "percentage": 55.05, "elapsed_time": "9:30:50", "remaining_time": "7:46:04"} +{"current_steps": 2375, "total_steps": 4305, "loss": 0.2568, "lr": 1.9902687240342722e-05, "epoch": 3.862489829129373, "percentage": 55.17, "elapsed_time": "9:32:01", "remaining_time": "7:44:50"} +{"current_steps": 2380, "total_steps": 4305, "loss": 0.2573, "lr": 1.982159493606829e-05, "epoch": 3.870626525630594, "percentage": 55.28, "elapsed_time": "9:33:15", "remaining_time": "7:43:39"} +{"current_steps": 2385, "total_steps": 4305, "loss": 0.262, "lr": 1.9740505564895436e-05, "epoch": 3.8787632221318145, "percentage": 55.4, "elapsed_time": "9:34:27", "remaining_time": "7:42:27"} +{"current_steps": 2390, "total_steps": 4305, "loss": 0.2742, "lr": 1.9659420459989026e-05, "epoch": 3.886899918633035, "percentage": 55.52, "elapsed_time": "9:35:40", "remaining_time": "7:41:15"} +{"current_steps": 2395, "total_steps": 4305, "loss": 0.2623, "lr": 1.9578340954443784e-05, "epoch": 3.8950366151342557, "percentage": 55.63, "elapsed_time": "9:36:54", "remaining_time": "7:40:05"} +{"current_steps": 2400, "total_steps": 4305, "loss": 0.2658, "lr": 1.949726838126237e-05, "epoch": 3.903173311635476, "percentage": 55.75, "elapsed_time": "9:38:01", "remaining_time": "7:38:48"} +{"current_steps": 2405, "total_steps": 4305, "loss": 0.2623, "lr": 1.941620407333347e-05, "epoch": 3.9113100081366965, "percentage": 55.87, "elapsed_time": "9:39:07", "remaining_time": "7:37:31"} +{"current_steps": 2410, "total_steps": 4305, "loss": 0.249, "lr": 1.933514936340991e-05, "epoch": 3.919446704637917, "percentage": 55.98, "elapsed_time": "9:40:23", "remaining_time": "7:36:21"} +{"current_steps": 2415, "total_steps": 4305, "loss": 0.2733, "lr": 1.9254105584086683e-05, "epoch": 3.9275834011391373, "percentage": 56.1, "elapsed_time": "9:41:38", "remaining_time": "7:35:11"} +{"current_steps": 2420, "total_steps": 4305, "loss": 0.2653, "lr": 1.9173074067779102e-05, "epoch": 3.935720097640358, "percentage": 56.21, "elapsed_time": "9:42:56", "remaining_time": "7:34:04"} +{"current_steps": 2425, "total_steps": 4305, "loss": 0.2623, "lr": 1.9092056146700844e-05, "epoch": 3.9438567941415785, "percentage": 56.33, "elapsed_time": "9:44:06", "remaining_time": "7:32:50"} +{"current_steps": 2430, "total_steps": 4305, "loss": 0.267, "lr": 1.9011053152842087e-05, "epoch": 3.951993490642799, "percentage": 56.45, "elapsed_time": "9:45:17", "remaining_time": "7:31:37"} +{"current_steps": 2435, "total_steps": 4305, "loss": 0.2784, "lr": 1.89300664179476e-05, "epoch": 3.9601301871440198, "percentage": 56.56, "elapsed_time": "9:46:26", "remaining_time": "7:30:21"} +{"current_steps": 2440, "total_steps": 4305, "loss": 0.2833, "lr": 1.8849097273494827e-05, "epoch": 3.96826688364524, "percentage": 56.68, "elapsed_time": "9:47:31", "remaining_time": "7:29:04"} +{"current_steps": 2445, "total_steps": 4305, "loss": 0.2637, "lr": 1.8768147050672028e-05, "epoch": 3.9764035801464606, "percentage": 56.79, "elapsed_time": "9:48:45", "remaining_time": "7:27:53"} +{"current_steps": 2450, "total_steps": 4305, "loss": 0.277, "lr": 1.8687217080356365e-05, "epoch": 3.984540276647681, "percentage": 56.91, "elapsed_time": "9:49:53", "remaining_time": "7:26:38"} +{"current_steps": 2455, "total_steps": 4305, "loss": 0.2655, "lr": 1.8606308693092035e-05, "epoch": 3.9926769731489014, "percentage": 57.03, "elapsed_time": "9:51:03", "remaining_time": "7:25:23"} +{"current_steps": 2460, "total_steps": 4305, "loss": 0.2677, "lr": 1.8525423219068423e-05, "epoch": 4.0, "percentage": 57.14, "elapsed_time": "9:52:09", "remaining_time": "7:24:07"} +{"current_steps": 2465, "total_steps": 4305, "loss": 0.2333, "lr": 1.844456198809817e-05, "epoch": 4.008136696501221, "percentage": 57.26, "elapsed_time": "9:53:23", "remaining_time": "7:22:56"} +{"current_steps": 2470, "total_steps": 4305, "loss": 0.2384, "lr": 1.8363726329595356e-05, "epoch": 4.016273393002441, "percentage": 57.38, "elapsed_time": "9:54:33", "remaining_time": "7:21:42"} +{"current_steps": 2475, "total_steps": 4305, "loss": 0.2381, "lr": 1.828291757255364e-05, "epoch": 4.024410089503662, "percentage": 57.49, "elapsed_time": "9:55:44", "remaining_time": "7:20:29"} +{"current_steps": 2480, "total_steps": 4305, "loss": 0.2623, "lr": 1.8202137045524383e-05, "epoch": 4.032546786004882, "percentage": 57.61, "elapsed_time": "9:56:50", "remaining_time": "7:19:12"} +{"current_steps": 2485, "total_steps": 4305, "loss": 0.2313, "lr": 1.812138607659486e-05, "epoch": 4.040683482506102, "percentage": 57.72, "elapsed_time": "9:58:07", "remaining_time": "7:18:03"} +{"current_steps": 2490, "total_steps": 4305, "loss": 0.2389, "lr": 1.8040665993366355e-05, "epoch": 4.048820179007323, "percentage": 57.84, "elapsed_time": "9:59:17", "remaining_time": "7:16:49"} +{"current_steps": 2495, "total_steps": 4305, "loss": 0.2565, "lr": 1.795997812293239e-05, "epoch": 4.0569568755085434, "percentage": 57.96, "elapsed_time": "10:00:27", "remaining_time": "7:15:36"} +{"current_steps": 2500, "total_steps": 4305, "loss": 0.264, "lr": 1.7879323791856875e-05, "epoch": 4.065093572009764, "percentage": 58.07, "elapsed_time": "10:01:36", "remaining_time": "7:14:21"} +{"current_steps": 2505, "total_steps": 4305, "loss": 0.257, "lr": 1.7798704326152317e-05, "epoch": 4.073230268510985, "percentage": 58.19, "elapsed_time": "10:02:46", "remaining_time": "7:13:07"} +{"current_steps": 2510, "total_steps": 4305, "loss": 0.262, "lr": 1.7718121051258016e-05, "epoch": 4.081366965012205, "percentage": 58.3, "elapsed_time": "10:03:54", "remaining_time": "7:11:52"} +{"current_steps": 2515, "total_steps": 4305, "loss": 0.2531, "lr": 1.763757529201826e-05, "epoch": 4.089503661513426, "percentage": 58.42, "elapsed_time": "10:05:02", "remaining_time": "7:10:37"} +{"current_steps": 2520, "total_steps": 4305, "loss": 0.2438, "lr": 1.7557068372660562e-05, "epoch": 4.097640358014646, "percentage": 58.54, "elapsed_time": "10:06:14", "remaining_time": "7:09:24"} +{"current_steps": 2525, "total_steps": 4305, "loss": 0.255, "lr": 1.747660161677387e-05, "epoch": 4.105777054515866, "percentage": 58.65, "elapsed_time": "10:07:22", "remaining_time": "7:08:10"} +{"current_steps": 2530, "total_steps": 4305, "loss": 0.2581, "lr": 1.7396176347286838e-05, "epoch": 4.113913751017087, "percentage": 58.77, "elapsed_time": "10:08:36", "remaining_time": "7:06:59"} +{"current_steps": 2535, "total_steps": 4305, "loss": 0.2557, "lr": 1.7315793886446036e-05, "epoch": 4.1220504475183075, "percentage": 58.89, "elapsed_time": "10:09:47", "remaining_time": "7:05:46"} +{"current_steps": 2540, "total_steps": 4305, "loss": 0.2373, "lr": 1.7235455555794236e-05, "epoch": 4.130187144019528, "percentage": 59.0, "elapsed_time": "10:10:59", "remaining_time": "7:04:34"} +{"current_steps": 2545, "total_steps": 4305, "loss": 0.2257, "lr": 1.7155162676148682e-05, "epoch": 4.138323840520749, "percentage": 59.12, "elapsed_time": "10:12:16", "remaining_time": "7:03:25"} +{"current_steps": 2550, "total_steps": 4305, "loss": 0.2287, "lr": 1.707491656757936e-05, "epoch": 4.146460537021969, "percentage": 59.23, "elapsed_time": "10:13:28", "remaining_time": "7:02:12"} +{"current_steps": 2555, "total_steps": 4305, "loss": 0.2367, "lr": 1.6994718549387332e-05, "epoch": 4.15459723352319, "percentage": 59.35, "elapsed_time": "10:14:41", "remaining_time": "7:01:01"} +{"current_steps": 2560, "total_steps": 4305, "loss": 0.2657, "lr": 1.6914569940083004e-05, "epoch": 4.16273393002441, "percentage": 59.47, "elapsed_time": "10:15:54", "remaining_time": "6:59:49"} +{"current_steps": 2565, "total_steps": 4305, "loss": 0.2403, "lr": 1.6834472057364462e-05, "epoch": 4.17087062652563, "percentage": 59.58, "elapsed_time": "10:17:04", "remaining_time": "6:58:36"} +{"current_steps": 2570, "total_steps": 4305, "loss": 0.2421, "lr": 1.6754426218095827e-05, "epoch": 4.179007323026851, "percentage": 59.7, "elapsed_time": "10:18:15", "remaining_time": "6:57:23"} +{"current_steps": 2575, "total_steps": 4305, "loss": 0.2587, "lr": 1.6674433738285573e-05, "epoch": 4.187144019528072, "percentage": 59.81, "elapsed_time": "10:19:30", "remaining_time": "6:56:12"} +{"current_steps": 2580, "total_steps": 4305, "loss": 0.2499, "lr": 1.6594495933064926e-05, "epoch": 4.195280716029292, "percentage": 59.93, "elapsed_time": "10:20:44", "remaining_time": "6:55:01"} +{"current_steps": 2585, "total_steps": 4305, "loss": 0.2664, "lr": 1.6514614116666213e-05, "epoch": 4.203417412530513, "percentage": 60.05, "elapsed_time": "10:21:58", "remaining_time": "6:53:51"} +{"current_steps": 2590, "total_steps": 4305, "loss": 0.2525, "lr": 1.6434789602401264e-05, "epoch": 4.211554109031733, "percentage": 60.16, "elapsed_time": "10:23:10", "remaining_time": "6:52:38"} +{"current_steps": 2595, "total_steps": 4305, "loss": 0.2676, "lr": 1.6355023702639835e-05, "epoch": 4.219690805532954, "percentage": 60.28, "elapsed_time": "10:24:20", "remaining_time": "6:51:24"} +{"current_steps": 2600, "total_steps": 4305, "loss": 0.2626, "lr": 1.6275317728787995e-05, "epoch": 4.227827502034174, "percentage": 60.39, "elapsed_time": "10:25:26", "remaining_time": "6:50:09"} +{"current_steps": 2605, "total_steps": 4305, "loss": 0.2436, "lr": 1.6195672991266627e-05, "epoch": 4.235964198535394, "percentage": 60.51, "elapsed_time": "10:26:38", "remaining_time": "6:48:56"} +{"current_steps": 2610, "total_steps": 4305, "loss": 0.263, "lr": 1.6116090799489817e-05, "epoch": 4.244100895036615, "percentage": 60.63, "elapsed_time": "10:27:44", "remaining_time": "6:47:40"} +{"current_steps": 2615, "total_steps": 4305, "loss": 0.2449, "lr": 1.603657246184337e-05, "epoch": 4.252237591537836, "percentage": 60.74, "elapsed_time": "10:28:57", "remaining_time": "6:46:28"} +{"current_steps": 2620, "total_steps": 4305, "loss": 0.2555, "lr": 1.5957119285663276e-05, "epoch": 4.260374288039056, "percentage": 60.86, "elapsed_time": "10:30:08", "remaining_time": "6:45:15"} +{"current_steps": 2625, "total_steps": 4305, "loss": 0.2713, "lr": 1.5877732577214227e-05, "epoch": 4.268510984540277, "percentage": 60.98, "elapsed_time": "10:31:23", "remaining_time": "6:44:05"} +{"current_steps": 2630, "total_steps": 4305, "loss": 0.2486, "lr": 1.5798413641668152e-05, "epoch": 4.2766476810414975, "percentage": 61.09, "elapsed_time": "10:32:34", "remaining_time": "6:42:52"} +{"current_steps": 2635, "total_steps": 4305, "loss": 0.2687, "lr": 1.5719163783082735e-05, "epoch": 4.284784377542717, "percentage": 61.21, "elapsed_time": "10:33:46", "remaining_time": "6:41:40"} +{"current_steps": 2640, "total_steps": 4305, "loss": 0.2536, "lr": 1.563998430437999e-05, "epoch": 4.292921074043938, "percentage": 61.32, "elapsed_time": "10:34:58", "remaining_time": "6:40:28"} +{"current_steps": 2645, "total_steps": 4305, "loss": 0.2216, "lr": 1.556087650732483e-05, "epoch": 4.3010577705451585, "percentage": 61.44, "elapsed_time": "10:36:07", "remaining_time": "6:39:13"} +{"current_steps": 2650, "total_steps": 4305, "loss": 0.2596, "lr": 1.5481841692503696e-05, "epoch": 4.309194467046379, "percentage": 61.56, "elapsed_time": "10:37:17", "remaining_time": "6:38:00"} +{"current_steps": 2655, "total_steps": 4305, "loss": 0.2641, "lr": 1.5402881159303132e-05, "epoch": 4.3173311635476, "percentage": 61.67, "elapsed_time": "10:38:34", "remaining_time": "6:36:51"} +{"current_steps": 2660, "total_steps": 4305, "loss": 0.2625, "lr": 1.5323996205888444e-05, "epoch": 4.32546786004882, "percentage": 61.79, "elapsed_time": "10:39:48", "remaining_time": "6:35:40"} +{"current_steps": 2665, "total_steps": 4305, "loss": 0.2466, "lr": 1.5245188129182352e-05, "epoch": 4.333604556550041, "percentage": 61.9, "elapsed_time": "10:40:58", "remaining_time": "6:34:26"} +{"current_steps": 2670, "total_steps": 4305, "loss": 0.2356, "lr": 1.5166458224843666e-05, "epoch": 4.341741253051262, "percentage": 62.02, "elapsed_time": "10:42:08", "remaining_time": "6:33:13"} +{"current_steps": 2675, "total_steps": 4305, "loss": 0.2478, "lr": 1.5087807787246018e-05, "epoch": 4.349877949552481, "percentage": 62.14, "elapsed_time": "10:43:21", "remaining_time": "6:32:01"} +{"current_steps": 2680, "total_steps": 4305, "loss": 0.2588, "lr": 1.5009238109456519e-05, "epoch": 4.358014646053702, "percentage": 62.25, "elapsed_time": "10:44:38", "remaining_time": "6:30:52"} +{"current_steps": 2685, "total_steps": 4305, "loss": 0.2338, "lr": 1.4930750483214545e-05, "epoch": 4.3661513425549225, "percentage": 62.37, "elapsed_time": "10:45:48", "remaining_time": "6:29:39"} +{"current_steps": 2690, "total_steps": 4305, "loss": 0.2538, "lr": 1.485234619891049e-05, "epoch": 4.374288039056143, "percentage": 62.49, "elapsed_time": "10:46:55", "remaining_time": "6:28:23"} +{"current_steps": 2695, "total_steps": 4305, "loss": 0.2493, "lr": 1.4774026545564542e-05, "epoch": 4.382424735557364, "percentage": 62.6, "elapsed_time": "10:48:06", "remaining_time": "6:27:10"} +{"current_steps": 2700, "total_steps": 4305, "loss": 0.2634, "lr": 1.4695792810805513e-05, "epoch": 4.390561432058584, "percentage": 62.72, "elapsed_time": "10:49:18", "remaining_time": "6:25:58"} +{"current_steps": 2705, "total_steps": 4305, "loss": 0.2774, "lr": 1.4617646280849642e-05, "epoch": 4.398698128559805, "percentage": 62.83, "elapsed_time": "10:50:33", "remaining_time": "6:24:48"} +{"current_steps": 2710, "total_steps": 4305, "loss": 0.2523, "lr": 1.4539588240479465e-05, "epoch": 4.406834825061026, "percentage": 62.95, "elapsed_time": "10:51:40", "remaining_time": "6:23:33"} +{"current_steps": 2715, "total_steps": 4305, "loss": 0.2471, "lr": 1.4461619973022687e-05, "epoch": 4.414971521562245, "percentage": 63.07, "elapsed_time": "10:52:56", "remaining_time": "6:22:23"} +{"current_steps": 2720, "total_steps": 4305, "loss": 0.2458, "lr": 1.4383742760331076e-05, "epoch": 4.423108218063466, "percentage": 63.18, "elapsed_time": "10:54:09", "remaining_time": "6:21:11"} +{"current_steps": 2725, "total_steps": 4305, "loss": 0.2536, "lr": 1.4305957882759427e-05, "epoch": 4.431244914564687, "percentage": 63.3, "elapsed_time": "10:55:18", "remaining_time": "6:19:57"} +{"current_steps": 2730, "total_steps": 4305, "loss": 0.2658, "lr": 1.4228266619144453e-05, "epoch": 4.439381611065907, "percentage": 63.41, "elapsed_time": "10:56:24", "remaining_time": "6:18:41"} +{"current_steps": 2735, "total_steps": 4305, "loss": 0.229, "lr": 1.4150670246783799e-05, "epoch": 4.447518307567128, "percentage": 63.53, "elapsed_time": "10:57:37", "remaining_time": "6:17:30"} +{"current_steps": 2740, "total_steps": 4305, "loss": 0.2428, "lr": 1.4073170041415028e-05, "epoch": 4.4556550040683485, "percentage": 63.65, "elapsed_time": "10:58:54", "remaining_time": "6:16:20"} +{"current_steps": 2745, "total_steps": 4305, "loss": 0.2429, "lr": 1.3995767277194665e-05, "epoch": 4.463791700569569, "percentage": 63.76, "elapsed_time": "11:00:10", "remaining_time": "6:15:11"} +{"current_steps": 2750, "total_steps": 4305, "loss": 0.2338, "lr": 1.391846322667722e-05, "epoch": 4.471928397070789, "percentage": 63.88, "elapsed_time": "11:01:20", "remaining_time": "6:13:57"} +{"current_steps": 2755, "total_steps": 4305, "loss": 0.256, "lr": 1.3841259160794298e-05, "epoch": 4.480065093572009, "percentage": 64.0, "elapsed_time": "11:02:31", "remaining_time": "6:12:44"} +{"current_steps": 2760, "total_steps": 4305, "loss": 0.2492, "lr": 1.3764156348833666e-05, "epoch": 4.48820179007323, "percentage": 64.11, "elapsed_time": "11:03:41", "remaining_time": "6:11:31"} +{"current_steps": 2765, "total_steps": 4305, "loss": 0.2305, "lr": 1.3687156058418422e-05, "epoch": 4.496338486574451, "percentage": 64.23, "elapsed_time": "11:04:55", "remaining_time": "6:10:20"} +{"current_steps": 2770, "total_steps": 4305, "loss": 0.2665, "lr": 1.3610259555486152e-05, "epoch": 4.504475183075671, "percentage": 64.34, "elapsed_time": "11:06:13", "remaining_time": "6:09:11"} +{"current_steps": 2775, "total_steps": 4305, "loss": 0.275, "lr": 1.3533468104268078e-05, "epoch": 4.512611879576892, "percentage": 64.46, "elapsed_time": "11:07:23", "remaining_time": "6:07:57"} +{"current_steps": 2780, "total_steps": 4305, "loss": 0.2492, "lr": 1.3456782967268316e-05, "epoch": 4.5207485760781125, "percentage": 64.58, "elapsed_time": "11:08:29", "remaining_time": "6:06:42"} +{"current_steps": 2785, "total_steps": 4305, "loss": 0.2423, "lr": 1.3380205405243096e-05, "epoch": 4.528885272579333, "percentage": 64.69, "elapsed_time": "11:09:43", "remaining_time": "6:05:31"} +{"current_steps": 2790, "total_steps": 4305, "loss": 0.2357, "lr": 1.3303736677180044e-05, "epoch": 4.537021969080554, "percentage": 64.81, "elapsed_time": "11:10:52", "remaining_time": "6:04:17"} +{"current_steps": 2795, "total_steps": 4305, "loss": 0.2452, "lr": 1.322737804027749e-05, "epoch": 4.5451586655817735, "percentage": 64.92, "elapsed_time": "11:12:11", "remaining_time": "6:03:08"} +{"current_steps": 2800, "total_steps": 4305, "loss": 0.255, "lr": 1.315113074992378e-05, "epoch": 4.553295362082994, "percentage": 65.04, "elapsed_time": "11:13:21", "remaining_time": "6:01:55"} +{"current_steps": 2805, "total_steps": 4305, "loss": 0.2383, "lr": 1.3074996059676644e-05, "epoch": 4.561432058584215, "percentage": 65.16, "elapsed_time": "11:14:36", "remaining_time": "6:00:45"} +{"current_steps": 2810, "total_steps": 4305, "loss": 0.2341, "lr": 1.2998975221242596e-05, "epoch": 4.569568755085435, "percentage": 65.27, "elapsed_time": "11:15:47", "remaining_time": "5:59:32"} +{"current_steps": 2815, "total_steps": 4305, "loss": 0.2527, "lr": 1.292306948445634e-05, "epoch": 4.577705451586656, "percentage": 65.39, "elapsed_time": "11:16:56", "remaining_time": "5:58:18"} +{"current_steps": 2820, "total_steps": 4305, "loss": 0.2361, "lr": 1.2847280097260245e-05, "epoch": 4.585842148087877, "percentage": 65.51, "elapsed_time": "11:18:12", "remaining_time": "5:57:08"} +{"current_steps": 2825, "total_steps": 4305, "loss": 0.2423, "lr": 1.2771608305683798e-05, "epoch": 4.593978844589097, "percentage": 65.62, "elapsed_time": "11:19:24", "remaining_time": "5:55:56"} +{"current_steps": 2830, "total_steps": 4305, "loss": 0.2547, "lr": 1.269605535382314e-05, "epoch": 4.602115541090317, "percentage": 65.74, "elapsed_time": "11:20:34", "remaining_time": "5:54:43"} +{"current_steps": 2835, "total_steps": 4305, "loss": 0.2402, "lr": 1.2620622483820604e-05, "epoch": 4.610252237591538, "percentage": 65.85, "elapsed_time": "11:21:42", "remaining_time": "5:53:28"} +{"current_steps": 2840, "total_steps": 4305, "loss": 0.2473, "lr": 1.2545310935844288e-05, "epoch": 4.618388934092758, "percentage": 65.97, "elapsed_time": "11:22:55", "remaining_time": "5:52:17"} +{"current_steps": 2845, "total_steps": 4305, "loss": 0.2656, "lr": 1.2470121948067693e-05, "epoch": 4.626525630593979, "percentage": 66.09, "elapsed_time": "11:24:05", "remaining_time": "5:51:03"} +{"current_steps": 2850, "total_steps": 4305, "loss": 0.241, "lr": 1.2395056756649328e-05, "epoch": 4.634662327095199, "percentage": 66.2, "elapsed_time": "11:25:15", "remaining_time": "5:49:50"} +{"current_steps": 2855, "total_steps": 4305, "loss": 0.2599, "lr": 1.2320116595712413e-05, "epoch": 4.64279902359642, "percentage": 66.32, "elapsed_time": "11:26:29", "remaining_time": "5:48:39"} +{"current_steps": 2860, "total_steps": 4305, "loss": 0.2535, "lr": 1.224530269732457e-05, "epoch": 4.650935720097641, "percentage": 66.43, "elapsed_time": "11:27:39", "remaining_time": "5:47:26"} +{"current_steps": 2865, "total_steps": 4305, "loss": 0.2532, "lr": 1.2170616291477595e-05, "epoch": 4.65907241659886, "percentage": 66.55, "elapsed_time": "11:28:49", "remaining_time": "5:46:12"} +{"current_steps": 2870, "total_steps": 4305, "loss": 0.257, "lr": 1.2096058606067205e-05, "epoch": 4.667209113100081, "percentage": 66.67, "elapsed_time": "11:30:00", "remaining_time": "5:45:00"} +{"current_steps": 2875, "total_steps": 4305, "loss": 0.2523, "lr": 1.2021630866872877e-05, "epoch": 4.675345809601302, "percentage": 66.78, "elapsed_time": "11:31:16", "remaining_time": "5:43:50"} +{"current_steps": 2880, "total_steps": 4305, "loss": 0.2564, "lr": 1.1947334297537675e-05, "epoch": 4.683482506102522, "percentage": 66.9, "elapsed_time": "11:32:31", "remaining_time": "5:42:39"} +{"current_steps": 2885, "total_steps": 4305, "loss": 0.2433, "lr": 1.1873170119548134e-05, "epoch": 4.691619202603743, "percentage": 67.02, "elapsed_time": "11:33:36", "remaining_time": "5:41:23"} +{"current_steps": 2890, "total_steps": 4305, "loss": 0.2656, "lr": 1.1799139552214202e-05, "epoch": 4.6997558991049635, "percentage": 67.13, "elapsed_time": "11:34:45", "remaining_time": "5:40:10"} +{"current_steps": 2895, "total_steps": 4305, "loss": 0.25, "lr": 1.1725243812649168e-05, "epoch": 4.707892595606184, "percentage": 67.25, "elapsed_time": "11:35:56", "remaining_time": "5:38:57"} +{"current_steps": 2900, "total_steps": 4305, "loss": 0.2542, "lr": 1.1651484115749647e-05, "epoch": 4.716029292107405, "percentage": 67.36, "elapsed_time": "11:37:01", "remaining_time": "5:37:41"} +{"current_steps": 2905, "total_steps": 4305, "loss": 0.2434, "lr": 1.1577861674175645e-05, "epoch": 4.724165988608625, "percentage": 67.48, "elapsed_time": "11:38:15", "remaining_time": "5:36:30"} +{"current_steps": 2910, "total_steps": 4305, "loss": 0.2636, "lr": 1.1504377698330575e-05, "epoch": 4.732302685109845, "percentage": 67.6, "elapsed_time": "11:39:29", "remaining_time": "5:35:19"} +{"current_steps": 2915, "total_steps": 4305, "loss": 0.2561, "lr": 1.1431033396341391e-05, "epoch": 4.740439381611066, "percentage": 67.71, "elapsed_time": "11:40:42", "remaining_time": "5:34:07"} +{"current_steps": 2920, "total_steps": 4305, "loss": 0.2688, "lr": 1.1357829974038703e-05, "epoch": 4.748576078112286, "percentage": 67.83, "elapsed_time": "11:41:54", "remaining_time": "5:32:55"} +{"current_steps": 2925, "total_steps": 4305, "loss": 0.2485, "lr": 1.1284768634936971e-05, "epoch": 4.756712774613507, "percentage": 67.94, "elapsed_time": "11:43:02", "remaining_time": "5:31:41"} +{"current_steps": 2930, "total_steps": 4305, "loss": 0.2424, "lr": 1.1211850580214703e-05, "epoch": 4.764849471114728, "percentage": 68.06, "elapsed_time": "11:44:17", "remaining_time": "5:30:30"} +{"current_steps": 2935, "total_steps": 4305, "loss": 0.2485, "lr": 1.1139077008694712e-05, "epoch": 4.772986167615948, "percentage": 68.18, "elapsed_time": "11:45:34", "remaining_time": "5:29:21"} +{"current_steps": 2940, "total_steps": 4305, "loss": 0.2454, "lr": 1.1066449116824428e-05, "epoch": 4.781122864117169, "percentage": 68.29, "elapsed_time": "11:46:44", "remaining_time": "5:28:07"} +{"current_steps": 2945, "total_steps": 4305, "loss": 0.2653, "lr": 1.099396809865618e-05, "epoch": 4.7892595606183885, "percentage": 68.41, "elapsed_time": "11:47:54", "remaining_time": "5:26:54"} +{"current_steps": 2950, "total_steps": 4305, "loss": 0.2416, "lr": 1.0921635145827611e-05, "epoch": 4.797396257119609, "percentage": 68.52, "elapsed_time": "11:49:04", "remaining_time": "5:25:41"} +{"current_steps": 2955, "total_steps": 4305, "loss": 0.2495, "lr": 1.0849451447542054e-05, "epoch": 4.80553295362083, "percentage": 68.64, "elapsed_time": "11:50:15", "remaining_time": "5:24:28"} +{"current_steps": 2960, "total_steps": 4305, "loss": 0.2512, "lr": 1.0777418190549018e-05, "epoch": 4.81366965012205, "percentage": 68.76, "elapsed_time": "11:51:25", "remaining_time": "5:23:15"} +{"current_steps": 2965, "total_steps": 4305, "loss": 0.2685, "lr": 1.070553655912463e-05, "epoch": 4.821806346623271, "percentage": 68.87, "elapsed_time": "11:52:38", "remaining_time": "5:22:04"} +{"current_steps": 2970, "total_steps": 4305, "loss": 0.2639, "lr": 1.0633807735052202e-05, "epoch": 4.829943043124492, "percentage": 68.99, "elapsed_time": "11:53:49", "remaining_time": "5:20:51"} +{"current_steps": 2975, "total_steps": 4305, "loss": 0.2399, "lr": 1.056223289760278e-05, "epoch": 4.838079739625712, "percentage": 69.11, "elapsed_time": "11:55:04", "remaining_time": "5:19:40"} +{"current_steps": 2980, "total_steps": 4305, "loss": 0.2487, "lr": 1.0490813223515764e-05, "epoch": 4.846216436126932, "percentage": 69.22, "elapsed_time": "11:56:09", "remaining_time": "5:18:25"} +{"current_steps": 2985, "total_steps": 4305, "loss": 0.2585, "lr": 1.0419549886979582e-05, "epoch": 4.854353132628153, "percentage": 69.34, "elapsed_time": "11:57:17", "remaining_time": "5:17:11"} +{"current_steps": 2990, "total_steps": 4305, "loss": 0.2537, "lr": 1.0348444059612338e-05, "epoch": 4.862489829129373, "percentage": 69.45, "elapsed_time": "11:58:28", "remaining_time": "5:15:59"} +{"current_steps": 2995, "total_steps": 4305, "loss": 0.2613, "lr": 1.0277496910442596e-05, "epoch": 4.870626525630594, "percentage": 69.57, "elapsed_time": "11:59:38", "remaining_time": "5:14:46"} +{"current_steps": 3000, "total_steps": 4305, "loss": 0.2626, "lr": 1.0206709605890133e-05, "epoch": 4.8787632221318145, "percentage": 69.69, "elapsed_time": "12:00:41", "remaining_time": "5:13:29"} +{"current_steps": 3005, "total_steps": 4305, "loss": 0.2623, "lr": 1.0136083309746765e-05, "epoch": 4.886899918633035, "percentage": 69.8, "elapsed_time": "12:02:30", "remaining_time": "5:12:33"} +{"current_steps": 3010, "total_steps": 4305, "loss": 0.2595, "lr": 1.006561918315724e-05, "epoch": 4.895036615134256, "percentage": 69.92, "elapsed_time": "12:03:39", "remaining_time": "5:11:20"} +{"current_steps": 3015, "total_steps": 4305, "loss": 0.2513, "lr": 9.995318384600112e-06, "epoch": 4.903173311635476, "percentage": 70.03, "elapsed_time": "12:04:49", "remaining_time": "5:10:07"} +{"current_steps": 3020, "total_steps": 4305, "loss": 0.2395, "lr": 9.92518206986871e-06, "epoch": 4.911310008136697, "percentage": 70.15, "elapsed_time": "12:05:57", "remaining_time": "5:08:53"} +{"current_steps": 3025, "total_steps": 4305, "loss": 0.2584, "lr": 9.855211392052139e-06, "epoch": 4.919446704637917, "percentage": 70.27, "elapsed_time": "12:07:10", "remaining_time": "5:07:41"} +{"current_steps": 3030, "total_steps": 4305, "loss": 0.2467, "lr": 9.78540750151632e-06, "epoch": 4.927583401139137, "percentage": 70.38, "elapsed_time": "12:08:24", "remaining_time": "5:06:30"} +{"current_steps": 3035, "total_steps": 4305, "loss": 0.2668, "lr": 9.715771545885076e-06, "epoch": 4.935720097640358, "percentage": 70.5, "elapsed_time": "12:09:35", "remaining_time": "5:05:17"} +{"current_steps": 3040, "total_steps": 4305, "loss": 0.2528, "lr": 9.646304670021263e-06, "epoch": 4.9438567941415785, "percentage": 70.62, "elapsed_time": "12:10:48", "remaining_time": "5:04:06"} +{"current_steps": 3045, "total_steps": 4305, "loss": 0.2683, "lr": 9.577008016007956e-06, "epoch": 4.951993490642799, "percentage": 70.73, "elapsed_time": "12:11:54", "remaining_time": "5:02:51"} +{"current_steps": 3050, "total_steps": 4305, "loss": 0.2623, "lr": 9.50788272312966e-06, "epoch": 4.96013018714402, "percentage": 70.85, "elapsed_time": "12:13:06", "remaining_time": "5:01:39"} +{"current_steps": 3055, "total_steps": 4305, "loss": 0.2595, "lr": 9.43892992785358e-06, "epoch": 4.96826688364524, "percentage": 70.96, "elapsed_time": "12:14:24", "remaining_time": "5:00:29"} +{"current_steps": 3060, "total_steps": 4305, "loss": 0.2569, "lr": 9.370150763810966e-06, "epoch": 4.97640358014646, "percentage": 71.08, "elapsed_time": "12:15:26", "remaining_time": "4:59:13"} +{"current_steps": 3065, "total_steps": 4305, "loss": 0.2457, "lr": 9.301546361778424e-06, "epoch": 4.984540276647681, "percentage": 71.2, "elapsed_time": "12:16:42", "remaining_time": "4:58:02"} +{"current_steps": 3070, "total_steps": 4305, "loss": 0.273, "lr": 9.233117849659367e-06, "epoch": 4.992676973148901, "percentage": 71.31, "elapsed_time": "12:17:53", "remaining_time": "4:56:50"} +{"current_steps": 3075, "total_steps": 4305, "loss": 0.2478, "lr": 9.164866352465447e-06, "epoch": 5.0, "percentage": 71.43, "elapsed_time": "12:19:01", "remaining_time": "4:55:36"} +{"current_steps": 3080, "total_steps": 4305, "loss": 0.2474, "lr": 9.096792992298089e-06, "epoch": 5.008136696501221, "percentage": 71.54, "elapsed_time": "12:20:11", "remaining_time": "4:54:23"} +{"current_steps": 3085, "total_steps": 4305, "loss": 0.2405, "lr": 9.028898888330005e-06, "epoch": 5.016273393002441, "percentage": 71.66, "elapsed_time": "12:21:22", "remaining_time": "4:53:11"} +{"current_steps": 3090, "total_steps": 4305, "loss": 0.2584, "lr": 8.961185156786815e-06, "epoch": 5.024410089503662, "percentage": 71.78, "elapsed_time": "12:22:32", "remaining_time": "4:51:58"} +{"current_steps": 3095, "total_steps": 4305, "loss": 0.236, "lr": 8.893652910928698e-06, "epoch": 5.032546786004882, "percentage": 71.89, "elapsed_time": "12:23:44", "remaining_time": "4:50:46"} +{"current_steps": 3100, "total_steps": 4305, "loss": 0.2596, "lr": 8.826303261032072e-06, "epoch": 5.040683482506102, "percentage": 72.01, "elapsed_time": "12:24:51", "remaining_time": "4:49:32"} +{"current_steps": 3105, "total_steps": 4305, "loss": 0.245, "lr": 8.759137314371378e-06, "epoch": 5.048820179007323, "percentage": 72.13, "elapsed_time": "12:26:07", "remaining_time": "4:48:21"} +{"current_steps": 3110, "total_steps": 4305, "loss": 0.2505, "lr": 8.692156175200823e-06, "epoch": 5.0569568755085434, "percentage": 72.24, "elapsed_time": "12:27:18", "remaining_time": "4:47:08"} +{"current_steps": 3115, "total_steps": 4305, "loss": 0.2436, "lr": 8.625360944736262e-06, "epoch": 5.065093572009764, "percentage": 72.36, "elapsed_time": "12:28:35", "remaining_time": "4:45:58"} +{"current_steps": 3120, "total_steps": 4305, "loss": 0.2469, "lr": 8.558752721137089e-06, "epoch": 5.073230268510985, "percentage": 72.47, "elapsed_time": "12:29:48", "remaining_time": "4:44:46"} +{"current_steps": 3125, "total_steps": 4305, "loss": 0.2324, "lr": 8.492332599488157e-06, "epoch": 5.081366965012205, "percentage": 72.59, "elapsed_time": "12:30:54", "remaining_time": "4:43:32"} +{"current_steps": 3130, "total_steps": 4305, "loss": 0.2397, "lr": 8.42610167178183e-06, "epoch": 5.089503661513426, "percentage": 72.71, "elapsed_time": "12:32:05", "remaining_time": "4:42:20"} +{"current_steps": 3135, "total_steps": 4305, "loss": 0.2207, "lr": 8.360061026899962e-06, "epoch": 5.097640358014646, "percentage": 72.82, "elapsed_time": "12:33:22", "remaining_time": "4:41:09"} +{"current_steps": 3140, "total_steps": 4305, "loss": 0.2457, "lr": 8.294211750596035e-06, "epoch": 5.105777054515866, "percentage": 72.94, "elapsed_time": "12:34:37", "remaining_time": "4:39:58"} +{"current_steps": 3145, "total_steps": 4305, "loss": 0.2349, "lr": 8.228554925477306e-06, "epoch": 5.113913751017087, "percentage": 73.05, "elapsed_time": "12:35:51", "remaining_time": "4:38:47"} +{"current_steps": 3150, "total_steps": 4305, "loss": 0.2327, "lr": 8.163091630987e-06, "epoch": 5.1220504475183075, "percentage": 73.17, "elapsed_time": "12:37:05", "remaining_time": "4:37:35"} +{"current_steps": 3155, "total_steps": 4305, "loss": 0.2308, "lr": 8.097822943386563e-06, "epoch": 5.130187144019528, "percentage": 73.29, "elapsed_time": "12:38:18", "remaining_time": "4:36:24"} +{"current_steps": 3160, "total_steps": 4305, "loss": 0.2424, "lr": 8.03274993573797e-06, "epoch": 5.138323840520749, "percentage": 73.4, "elapsed_time": "12:39:23", "remaining_time": "4:35:09"} +{"current_steps": 3165, "total_steps": 4305, "loss": 0.2401, "lr": 7.96787367788609e-06, "epoch": 5.146460537021969, "percentage": 73.52, "elapsed_time": "12:40:38", "remaining_time": "4:33:58"} +{"current_steps": 3170, "total_steps": 4305, "loss": 0.2505, "lr": 7.903195236441086e-06, "epoch": 5.15459723352319, "percentage": 73.64, "elapsed_time": "12:41:49", "remaining_time": "4:32:45"} +{"current_steps": 3175, "total_steps": 4305, "loss": 0.2298, "lr": 7.838715674760874e-06, "epoch": 5.16273393002441, "percentage": 73.75, "elapsed_time": "12:43:01", "remaining_time": "4:31:33"} +{"current_steps": 3180, "total_steps": 4305, "loss": 0.2413, "lr": 7.774436052933675e-06, "epoch": 5.17087062652563, "percentage": 73.87, "elapsed_time": "12:44:11", "remaining_time": "4:30:21"} +{"current_steps": 3185, "total_steps": 4305, "loss": 0.2339, "lr": 7.710357427760541e-06, "epoch": 5.179007323026851, "percentage": 73.98, "elapsed_time": "12:45:24", "remaining_time": "4:29:09"} +{"current_steps": 3190, "total_steps": 4305, "loss": 0.2286, "lr": 7.646480852738008e-06, "epoch": 5.187144019528072, "percentage": 74.1, "elapsed_time": "12:46:34", "remaining_time": "4:27:56"} +{"current_steps": 3195, "total_steps": 4305, "loss": 0.2188, "lr": 7.5828073780407575e-06, "epoch": 5.195280716029292, "percentage": 74.22, "elapsed_time": "12:47:50", "remaining_time": "4:26:45"} +{"current_steps": 3200, "total_steps": 4305, "loss": 0.2315, "lr": 7.51933805050439e-06, "epoch": 5.203417412530513, "percentage": 74.33, "elapsed_time": "12:49:02", "remaining_time": "4:25:33"} +{"current_steps": 3205, "total_steps": 4305, "loss": 0.243, "lr": 7.45607391360816e-06, "epoch": 5.211554109031733, "percentage": 74.45, "elapsed_time": "12:50:05", "remaining_time": "4:24:18"} +{"current_steps": 3210, "total_steps": 4305, "loss": 0.2561, "lr": 7.393016007457858e-06, "epoch": 5.219690805532954, "percentage": 74.56, "elapsed_time": "12:51:14", "remaining_time": "4:23:05"} +{"current_steps": 3215, "total_steps": 4305, "loss": 0.2272, "lr": 7.3301653687687005e-06, "epoch": 5.227827502034174, "percentage": 74.68, "elapsed_time": "12:52:27", "remaining_time": "4:21:53"} +{"current_steps": 3220, "total_steps": 4305, "loss": 0.2431, "lr": 7.2675230308482715e-06, "epoch": 5.235964198535394, "percentage": 74.8, "elapsed_time": "12:53:41", "remaining_time": "4:20:41"} +{"current_steps": 3225, "total_steps": 4305, "loss": 0.2533, "lr": 7.205090023579575e-06, "epoch": 5.244100895036615, "percentage": 74.91, "elapsed_time": "12:54:56", "remaining_time": "4:19:30"} +{"current_steps": 3230, "total_steps": 4305, "loss": 0.2374, "lr": 7.142867373404054e-06, "epoch": 5.252237591537836, "percentage": 75.03, "elapsed_time": "12:56:07", "remaining_time": "4:18:18"} +{"current_steps": 3235, "total_steps": 4305, "loss": 0.237, "lr": 7.080856103304739e-06, "epoch": 5.260374288039056, "percentage": 75.15, "elapsed_time": "12:57:21", "remaining_time": "4:17:06"} +{"current_steps": 3240, "total_steps": 4305, "loss": 0.2432, "lr": 7.019057232789432e-06, "epoch": 5.268510984540277, "percentage": 75.26, "elapsed_time": "12:58:32", "remaining_time": "4:15:54"} +{"current_steps": 3245, "total_steps": 4305, "loss": 0.2461, "lr": 6.95747177787393e-06, "epoch": 5.2766476810414975, "percentage": 75.38, "elapsed_time": "12:59:44", "remaining_time": "4:14:42"} +{"current_steps": 3250, "total_steps": 4305, "loss": 0.2296, "lr": 6.896100751065355e-06, "epoch": 5.284784377542717, "percentage": 75.49, "elapsed_time": "13:00:58", "remaining_time": "4:13:31"} +{"current_steps": 3255, "total_steps": 4305, "loss": 0.2316, "lr": 6.834945161345458e-06, "epoch": 5.292921074043938, "percentage": 75.61, "elapsed_time": "13:02:11", "remaining_time": "4:12:19"} +{"current_steps": 3260, "total_steps": 4305, "loss": 0.2573, "lr": 6.7740060141540735e-06, "epoch": 5.3010577705451585, "percentage": 75.73, "elapsed_time": "13:03:21", "remaining_time": "4:11:06"} +{"current_steps": 3265, "total_steps": 4305, "loss": 0.261, "lr": 6.713284311372559e-06, "epoch": 5.309194467046379, "percentage": 75.84, "elapsed_time": "13:04:33", "remaining_time": "4:09:54"} +{"current_steps": 3270, "total_steps": 4305, "loss": 0.26, "lr": 6.652781051307347e-06, "epoch": 5.3173311635476, "percentage": 75.96, "elapsed_time": "13:05:45", "remaining_time": "4:08:42"} +{"current_steps": 3275, "total_steps": 4305, "loss": 0.2335, "lr": 6.592497228673515e-06, "epoch": 5.32546786004882, "percentage": 76.07, "elapsed_time": "13:06:52", "remaining_time": "4:07:28"} +{"current_steps": 3280, "total_steps": 4305, "loss": 0.2354, "lr": 6.532433834578449e-06, "epoch": 5.333604556550041, "percentage": 76.19, "elapsed_time": "13:08:06", "remaining_time": "4:06:16"} +{"current_steps": 3285, "total_steps": 4305, "loss": 0.2479, "lr": 6.472591856505526e-06, "epoch": 5.341741253051262, "percentage": 76.31, "elapsed_time": "13:09:15", "remaining_time": "4:05:03"} +{"current_steps": 3290, "total_steps": 4305, "loss": 0.2525, "lr": 6.412972278297893e-06, "epoch": 5.349877949552481, "percentage": 76.42, "elapsed_time": "13:10:27", "remaining_time": "4:03:51"} +{"current_steps": 3295, "total_steps": 4305, "loss": 0.2289, "lr": 6.353576080142309e-06, "epoch": 5.358014646053702, "percentage": 76.54, "elapsed_time": "13:11:38", "remaining_time": "4:02:39"} +{"current_steps": 3300, "total_steps": 4305, "loss": 0.2363, "lr": 6.294404238552994e-06, "epoch": 5.3661513425549225, "percentage": 76.66, "elapsed_time": "13:12:49", "remaining_time": "4:01:27"} +{"current_steps": 3305, "total_steps": 4305, "loss": 0.2378, "lr": 6.235457726355591e-06, "epoch": 5.374288039056143, "percentage": 76.77, "elapsed_time": "13:13:59", "remaining_time": "4:00:14"} +{"current_steps": 3310, "total_steps": 4305, "loss": 0.243, "lr": 6.176737512671182e-06, "epoch": 5.382424735557364, "percentage": 76.89, "elapsed_time": "13:15:10", "remaining_time": "3:59:02"} +{"current_steps": 3315, "total_steps": 4305, "loss": 0.2434, "lr": 6.11824456290034e-06, "epoch": 5.390561432058584, "percentage": 77.0, "elapsed_time": "13:16:25", "remaining_time": "3:57:50"} +{"current_steps": 3320, "total_steps": 4305, "loss": 0.2343, "lr": 6.05997983870727e-06, "epoch": 5.398698128559805, "percentage": 77.12, "elapsed_time": "13:17:38", "remaining_time": "3:56:38"} +{"current_steps": 3325, "total_steps": 4305, "loss": 0.2486, "lr": 6.00194429800399e-06, "epoch": 5.406834825061026, "percentage": 77.24, "elapsed_time": "13:18:51", "remaining_time": "3:55:27"} +{"current_steps": 3330, "total_steps": 4305, "loss": 0.2362, "lr": 5.944138894934582e-06, "epoch": 5.414971521562245, "percentage": 77.35, "elapsed_time": "13:20:04", "remaining_time": "3:54:15"} +{"current_steps": 3335, "total_steps": 4305, "loss": 0.2228, "lr": 5.886564579859504e-06, "epoch": 5.423108218063466, "percentage": 77.47, "elapsed_time": "13:21:11", "remaining_time": "3:53:01"} +{"current_steps": 3340, "total_steps": 4305, "loss": 0.2256, "lr": 5.829222299339969e-06, "epoch": 5.431244914564687, "percentage": 77.58, "elapsed_time": "13:22:21", "remaining_time": "3:51:49"} +{"current_steps": 3345, "total_steps": 4305, "loss": 0.2588, "lr": 5.772112996122403e-06, "epoch": 5.439381611065907, "percentage": 77.7, "elapsed_time": "13:23:34", "remaining_time": "3:50:37"} +{"current_steps": 3350, "total_steps": 4305, "loss": 0.2226, "lr": 5.715237609122896e-06, "epoch": 5.447518307567128, "percentage": 77.82, "elapsed_time": "13:24:42", "remaining_time": "3:49:24"} +{"current_steps": 3355, "total_steps": 4305, "loss": 0.2394, "lr": 5.658597073411816e-06, "epoch": 5.4556550040683485, "percentage": 77.93, "elapsed_time": "13:25:51", "remaining_time": "3:48:11"} +{"current_steps": 3360, "total_steps": 4305, "loss": 0.2447, "lr": 5.602192320198401e-06, "epoch": 5.463791700569569, "percentage": 78.05, "elapsed_time": "13:27:05", "remaining_time": "3:46:59"} +{"current_steps": 3365, "total_steps": 4305, "loss": 0.2363, "lr": 5.546024276815467e-06, "epoch": 5.471928397070789, "percentage": 78.16, "elapsed_time": "13:28:18", "remaining_time": "3:45:47"} +{"current_steps": 3370, "total_steps": 4305, "loss": 0.2464, "lr": 5.490093866704171e-06, "epoch": 5.480065093572009, "percentage": 78.28, "elapsed_time": "13:29:25", "remaining_time": "3:44:34"} +{"current_steps": 3375, "total_steps": 4305, "loss": 0.2506, "lr": 5.434402009398798e-06, "epoch": 5.48820179007323, "percentage": 78.4, "elapsed_time": "13:30:33", "remaining_time": "3:43:21"} +{"current_steps": 3380, "total_steps": 4305, "loss": 0.2348, "lr": 5.378949620511671e-06, "epoch": 5.496338486574451, "percentage": 78.51, "elapsed_time": "13:31:37", "remaining_time": "3:42:07"} +{"current_steps": 3385, "total_steps": 4305, "loss": 0.2393, "lr": 5.3237376117180854e-06, "epoch": 5.504475183075671, "percentage": 78.63, "elapsed_time": "13:32:45", "remaining_time": "3:40:53"} +{"current_steps": 3390, "total_steps": 4305, "loss": 0.2582, "lr": 5.268766890741315e-06, "epoch": 5.512611879576892, "percentage": 78.75, "elapsed_time": "13:34:00", "remaining_time": "3:39:42"} +{"current_steps": 3395, "total_steps": 4305, "loss": 0.2594, "lr": 5.214038361337719e-06, "epoch": 5.5207485760781125, "percentage": 78.86, "elapsed_time": "13:35:10", "remaining_time": "3:38:30"} +{"current_steps": 3400, "total_steps": 4305, "loss": 0.259, "lr": 5.159552923281841e-06, "epoch": 5.528885272579333, "percentage": 78.98, "elapsed_time": "13:36:20", "remaining_time": "3:37:17"} +{"current_steps": 3405, "total_steps": 4305, "loss": 0.2308, "lr": 5.105311472351639e-06, "epoch": 5.537021969080554, "percentage": 79.09, "elapsed_time": "13:37:30", "remaining_time": "3:36:04"} +{"current_steps": 3410, "total_steps": 4305, "loss": 0.2351, "lr": 5.051314900313764e-06, "epoch": 5.5451586655817735, "percentage": 79.21, "elapsed_time": "13:38:43", "remaining_time": "3:34:53"} +{"current_steps": 3415, "total_steps": 4305, "loss": 0.2508, "lr": 4.997564094908878e-06, "epoch": 5.553295362082994, "percentage": 79.33, "elapsed_time": "13:39:59", "remaining_time": "3:33:42"} +{"current_steps": 3420, "total_steps": 4305, "loss": 0.2475, "lr": 4.944059939837082e-06, "epoch": 5.561432058584215, "percentage": 79.44, "elapsed_time": "13:41:13", "remaining_time": "3:32:30"} +{"current_steps": 3425, "total_steps": 4305, "loss": 0.255, "lr": 4.890803314743371e-06, "epoch": 5.569568755085435, "percentage": 79.56, "elapsed_time": "13:42:22", "remaining_time": "3:31:17"} +{"current_steps": 3430, "total_steps": 4305, "loss": 0.2306, "lr": 4.837795095203175e-06, "epoch": 5.577705451586656, "percentage": 79.67, "elapsed_time": "13:43:37", "remaining_time": "3:30:06"} +{"current_steps": 3435, "total_steps": 4305, "loss": 0.2434, "lr": 4.785036152707969e-06, "epoch": 5.585842148087877, "percentage": 79.79, "elapsed_time": "13:44:47", "remaining_time": "3:28:54"} +{"current_steps": 3440, "total_steps": 4305, "loss": 0.2432, "lr": 4.732527354650951e-06, "epoch": 5.593978844589097, "percentage": 79.91, "elapsed_time": "13:45:58", "remaining_time": "3:27:41"} +{"current_steps": 3445, "total_steps": 4305, "loss": 0.2277, "lr": 4.68026956431276e-06, "epoch": 5.602115541090317, "percentage": 80.02, "elapsed_time": "13:47:12", "remaining_time": "3:26:30"} +{"current_steps": 3450, "total_steps": 4305, "loss": 0.2455, "lr": 4.628263640847304e-06, "epoch": 5.610252237591538, "percentage": 80.14, "elapsed_time": "13:48:18", "remaining_time": "3:25:16"} +{"current_steps": 3455, "total_steps": 4305, "loss": 0.2356, "lr": 4.5765104392676205e-06, "epoch": 5.618388934092758, "percentage": 80.26, "elapsed_time": "13:49:32", "remaining_time": "3:24:05"} +{"current_steps": 3460, "total_steps": 4305, "loss": 0.2406, "lr": 4.525010810431825e-06, "epoch": 5.626525630593979, "percentage": 80.37, "elapsed_time": "13:50:44", "remaining_time": "3:22:52"} +{"current_steps": 3465, "total_steps": 4305, "loss": 0.2356, "lr": 4.4737656010291366e-06, "epoch": 5.634662327095199, "percentage": 80.49, "elapsed_time": "13:51:56", "remaining_time": "3:21:41"} +{"current_steps": 3470, "total_steps": 4305, "loss": 0.2356, "lr": 4.422775653565934e-06, "epoch": 5.64279902359642, "percentage": 80.6, "elapsed_time": "13:53:09", "remaining_time": "3:20:29"} +{"current_steps": 3475, "total_steps": 4305, "loss": 0.2516, "lr": 4.372041806351914e-06, "epoch": 5.650935720097641, "percentage": 80.72, "elapsed_time": "13:54:21", "remaining_time": "3:19:17"} +{"current_steps": 3480, "total_steps": 4305, "loss": 0.2519, "lr": 4.321564893486312e-06, "epoch": 5.65907241659886, "percentage": 80.84, "elapsed_time": "13:55:30", "remaining_time": "3:18:04"} +{"current_steps": 3485, "total_steps": 4305, "loss": 0.2125, "lr": 4.271345744844182e-06, "epoch": 5.667209113100081, "percentage": 80.95, "elapsed_time": "13:56:43", "remaining_time": "3:16:52"} +{"current_steps": 3490, "total_steps": 4305, "loss": 0.2415, "lr": 4.2213851860627696e-06, "epoch": 5.675345809601302, "percentage": 81.07, "elapsed_time": "13:57:57", "remaining_time": "3:15:41"} +{"current_steps": 3495, "total_steps": 4305, "loss": 0.2475, "lr": 4.171684038527914e-06, "epoch": 5.683482506102522, "percentage": 81.18, "elapsed_time": "13:59:10", "remaining_time": "3:14:29"} +{"current_steps": 3500, "total_steps": 4305, "loss": 0.2312, "lr": 4.12224311936056e-06, "epoch": 5.691619202603743, "percentage": 81.3, "elapsed_time": "14:00:27", "remaining_time": "3:13:18"} +{"current_steps": 3505, "total_steps": 4305, "loss": 0.2425, "lr": 4.073063241403316e-06, "epoch": 5.6997558991049635, "percentage": 81.42, "elapsed_time": "14:01:38", "remaining_time": "3:12:05"} +{"current_steps": 3510, "total_steps": 4305, "loss": 0.2474, "lr": 4.024145213207103e-06, "epoch": 5.707892595606184, "percentage": 81.53, "elapsed_time": "14:02:42", "remaining_time": "3:10:52"} +{"current_steps": 3515, "total_steps": 4305, "loss": 0.2312, "lr": 3.975489839017846e-06, "epoch": 5.716029292107405, "percentage": 81.65, "elapsed_time": "14:03:55", "remaining_time": "3:09:40"} +{"current_steps": 3520, "total_steps": 4305, "loss": 0.2422, "lr": 3.9270979187632516e-06, "epoch": 5.724165988608625, "percentage": 81.77, "elapsed_time": "14:05:10", "remaining_time": "3:08:29"} +{"current_steps": 3525, "total_steps": 4305, "loss": 0.2557, "lr": 3.878970248039678e-06, "epoch": 5.732302685109845, "percentage": 81.88, "elapsed_time": "14:06:19", "remaining_time": "3:07:16"} +{"current_steps": 3530, "total_steps": 4305, "loss": 0.2331, "lr": 3.831107618099026e-06, "epoch": 5.740439381611066, "percentage": 82.0, "elapsed_time": "14:07:29", "remaining_time": "3:06:03"} +{"current_steps": 3535, "total_steps": 4305, "loss": 0.2331, "lr": 3.7835108158357537e-06, "epoch": 5.748576078112286, "percentage": 82.11, "elapsed_time": "14:08:40", "remaining_time": "3:04:51"} +{"current_steps": 3540, "total_steps": 4305, "loss": 0.2317, "lr": 3.7361806237739264e-06, "epoch": 5.756712774613507, "percentage": 82.23, "elapsed_time": "14:09:47", "remaining_time": "3:03:38"} +{"current_steps": 3545, "total_steps": 4305, "loss": 0.2434, "lr": 3.689117820054351e-06, "epoch": 5.764849471114728, "percentage": 82.35, "elapsed_time": "14:10:59", "remaining_time": "3:02:26"} +{"current_steps": 3550, "total_steps": 4305, "loss": 0.2421, "lr": 3.6423231784217918e-06, "epoch": 5.772986167615948, "percentage": 82.46, "elapsed_time": "14:12:16", "remaining_time": "3:01:15"} +{"current_steps": 3555, "total_steps": 4305, "loss": 0.2419, "lr": 3.595797468212241e-06, "epoch": 5.781122864117169, "percentage": 82.58, "elapsed_time": "14:13:31", "remaining_time": "3:00:04"} +{"current_steps": 3560, "total_steps": 4305, "loss": 0.2249, "lr": 3.549541454340284e-06, "epoch": 5.7892595606183885, "percentage": 82.69, "elapsed_time": "14:14:46", "remaining_time": "2:58:52"} +{"current_steps": 3565, "total_steps": 4305, "loss": 0.2582, "lr": 3.503555897286499e-06, "epoch": 5.797396257119609, "percentage": 82.81, "elapsed_time": "14:16:01", "remaining_time": "2:57:41"} +{"current_steps": 3570, "total_steps": 4305, "loss": 0.2435, "lr": 3.4578415530849794e-06, "epoch": 5.80553295362083, "percentage": 82.93, "elapsed_time": "14:17:17", "remaining_time": "2:56:30"} +{"current_steps": 3575, "total_steps": 4305, "loss": 0.248, "lr": 3.4123991733108852e-06, "epoch": 5.81366965012205, "percentage": 83.04, "elapsed_time": "14:18:33", "remaining_time": "2:55:18"} +{"current_steps": 3580, "total_steps": 4305, "loss": 0.2333, "lr": 3.3672295050680946e-06, "epoch": 5.821806346623271, "percentage": 83.16, "elapsed_time": "14:19:46", "remaining_time": "2:54:06"} +{"current_steps": 3585, "total_steps": 4305, "loss": 0.2523, "lr": 3.322333290976936e-06, "epoch": 5.829943043124492, "percentage": 83.28, "elapsed_time": "14:20:56", "remaining_time": "2:52:54"} +{"current_steps": 3590, "total_steps": 4305, "loss": 0.2481, "lr": 3.2777112691619473e-06, "epoch": 5.838079739625712, "percentage": 83.39, "elapsed_time": "14:22:05", "remaining_time": "2:51:41"} +{"current_steps": 3595, "total_steps": 4305, "loss": 0.234, "lr": 3.233364173239766e-06, "epoch": 5.846216436126932, "percentage": 83.51, "elapsed_time": "14:23:17", "remaining_time": "2:50:29"} +{"current_steps": 3600, "total_steps": 4305, "loss": 0.2311, "lr": 3.189292732307052e-06, "epoch": 5.854353132628153, "percentage": 83.62, "elapsed_time": "14:24:29", "remaining_time": "2:49:17"} +{"current_steps": 3605, "total_steps": 4305, "loss": 0.2563, "lr": 3.1454976709285124e-06, "epoch": 5.862489829129373, "percentage": 83.74, "elapsed_time": "14:25:37", "remaining_time": "2:48:04"} +{"current_steps": 3610, "total_steps": 4305, "loss": 0.2352, "lr": 3.1019797091249938e-06, "epoch": 5.870626525630594, "percentage": 83.86, "elapsed_time": "14:26:49", "remaining_time": "2:46:52"} +{"current_steps": 3615, "total_steps": 4305, "loss": 0.245, "lr": 3.058739562361621e-06, "epoch": 5.8787632221318145, "percentage": 83.97, "elapsed_time": "14:28:03", "remaining_time": "2:45:41"} +{"current_steps": 3620, "total_steps": 4305, "loss": 0.2383, "lr": 3.015777941536058e-06, "epoch": 5.886899918633035, "percentage": 84.09, "elapsed_time": "14:29:12", "remaining_time": "2:44:28"} +{"current_steps": 3625, "total_steps": 4305, "loss": 0.2584, "lr": 2.973095552966805e-06, "epoch": 5.895036615134256, "percentage": 84.2, "elapsed_time": "14:30:19", "remaining_time": "2:43:15"} +{"current_steps": 3630, "total_steps": 4305, "loss": 0.2357, "lr": 2.9306930983816005e-06, "epoch": 5.903173311635476, "percentage": 84.32, "elapsed_time": "14:31:31", "remaining_time": "2:42:03"} +{"current_steps": 3635, "total_steps": 4305, "loss": 0.2496, "lr": 2.8885712749058737e-06, "epoch": 5.911310008136697, "percentage": 84.44, "elapsed_time": "14:32:44", "remaining_time": "2:40:51"} +{"current_steps": 3640, "total_steps": 4305, "loss": 0.2649, "lr": 2.8467307750512808e-06, "epoch": 5.919446704637917, "percentage": 84.55, "elapsed_time": "14:34:01", "remaining_time": "2:39:40"} +{"current_steps": 3645, "total_steps": 4305, "loss": 0.2395, "lr": 2.80517228670433e-06, "epoch": 5.927583401139137, "percentage": 84.67, "elapsed_time": "14:35:14", "remaining_time": "2:38:28"} +{"current_steps": 3650, "total_steps": 4305, "loss": 0.258, "lr": 2.7638964931150637e-06, "epoch": 5.935720097640358, "percentage": 84.79, "elapsed_time": "14:36:31", "remaining_time": "2:37:17"} +{"current_steps": 3655, "total_steps": 4305, "loss": 0.2477, "lr": 2.7229040728858323e-06, "epoch": 5.9438567941415785, "percentage": 84.9, "elapsed_time": "14:37:42", "remaining_time": "2:36:05"} +{"current_steps": 3660, "total_steps": 4305, "loss": 0.2427, "lr": 2.6821956999601306e-06, "epoch": 5.951993490642799, "percentage": 85.02, "elapsed_time": "14:38:52", "remaining_time": "2:34:53"} +{"current_steps": 3665, "total_steps": 4305, "loss": 0.2462, "lr": 2.641772043611521e-06, "epoch": 5.96013018714402, "percentage": 85.13, "elapsed_time": "14:39:55", "remaining_time": "2:33:39"} +{"current_steps": 3670, "total_steps": 4305, "loss": 0.2257, "lr": 2.6016337684326342e-06, "epoch": 5.96826688364524, "percentage": 85.25, "elapsed_time": "14:41:07", "remaining_time": "2:32:27"} +{"current_steps": 3675, "total_steps": 4305, "loss": 0.2502, "lr": 2.5617815343242327e-06, "epoch": 5.97640358014646, "percentage": 85.37, "elapsed_time": "14:42:14", "remaining_time": "2:31:14"} +{"current_steps": 3680, "total_steps": 4305, "loss": 0.2351, "lr": 2.522215996484374e-06, "epoch": 5.984540276647681, "percentage": 85.48, "elapsed_time": "14:43:27", "remaining_time": "2:30:02"} +{"current_steps": 3685, "total_steps": 4305, "loss": 0.226, "lr": 2.4829378053976318e-06, "epoch": 5.992676973148901, "percentage": 85.6, "elapsed_time": "14:44:43", "remaining_time": "2:28:51"} +{"current_steps": 3690, "total_steps": 4305, "loss": 0.2288, "lr": 2.4439476068243927e-06, "epoch": 6.0, "percentage": 85.71, "elapsed_time": "14:45:51", "remaining_time": "2:27:38"} +{"current_steps": 3695, "total_steps": 4305, "loss": 0.2364, "lr": 2.4052460417902613e-06, "epoch": 6.008136696501221, "percentage": 85.83, "elapsed_time": "14:47:02", "remaining_time": "2:26:26"} +{"current_steps": 3700, "total_steps": 4305, "loss": 0.2395, "lr": 2.3668337465754985e-06, "epoch": 6.016273393002441, "percentage": 85.95, "elapsed_time": "14:48:17", "remaining_time": "2:25:14"} +{"current_steps": 3705, "total_steps": 4305, "loss": 0.228, "lr": 2.3287113527045823e-06, "epoch": 6.024410089503662, "percentage": 86.06, "elapsed_time": "14:49:35", "remaining_time": "2:24:03"} +{"current_steps": 3710, "total_steps": 4305, "loss": 0.2524, "lr": 2.2908794869358044e-06, "epoch": 6.032546786004882, "percentage": 86.18, "elapsed_time": "14:50:48", "remaining_time": "2:22:51"} +{"current_steps": 3715, "total_steps": 4305, "loss": 0.2276, "lr": 2.253338771250977e-06, "epoch": 6.040683482506102, "percentage": 86.3, "elapsed_time": "14:51:59", "remaining_time": "2:21:39"} +{"current_steps": 3720, "total_steps": 4305, "loss": 0.2298, "lr": 2.216089822845211e-06, "epoch": 6.048820179007323, "percentage": 86.41, "elapsed_time": "14:53:10", "remaining_time": "2:20:27"} +{"current_steps": 3725, "total_steps": 4305, "loss": 0.2159, "lr": 2.1791332541167497e-06, "epoch": 6.0569568755085434, "percentage": 86.53, "elapsed_time": "14:54:28", "remaining_time": "2:19:16"} +{"current_steps": 3730, "total_steps": 4305, "loss": 0.2319, "lr": 2.142469672656935e-06, "epoch": 6.065093572009764, "percentage": 86.64, "elapsed_time": "14:55:31", "remaining_time": "2:18:02"} +{"current_steps": 3735, "total_steps": 4305, "loss": 0.2221, "lr": 2.106099681240179e-06, "epoch": 6.073230268510985, "percentage": 86.76, "elapsed_time": "14:56:38", "remaining_time": "2:16:50"} +{"current_steps": 3740, "total_steps": 4305, "loss": 0.2275, "lr": 2.07002387781408e-06, "epoch": 6.081366965012205, "percentage": 86.88, "elapsed_time": "14:57:50", "remaining_time": "2:15:38"} +{"current_steps": 3745, "total_steps": 4305, "loss": 0.2604, "lr": 2.0342428554895788e-06, "epoch": 6.089503661513426, "percentage": 86.99, "elapsed_time": "14:58:58", "remaining_time": "2:14:25"} +{"current_steps": 3750, "total_steps": 4305, "loss": 0.2392, "lr": 1.998757202531223e-06, "epoch": 6.097640358014646, "percentage": 87.11, "elapsed_time": "15:00:06", "remaining_time": "2:13:13"} +{"current_steps": 3755, "total_steps": 4305, "loss": 0.2399, "lr": 1.9635675023474764e-06, "epoch": 6.105777054515866, "percentage": 87.22, "elapsed_time": "15:01:19", "remaining_time": "2:12:01"} +{"current_steps": 3760, "total_steps": 4305, "loss": 0.2327, "lr": 1.92867433348114e-06, "epoch": 6.113913751017087, "percentage": 87.34, "elapsed_time": "15:02:31", "remaining_time": "2:10:49"} +{"current_steps": 3765, "total_steps": 4305, "loss": 0.241, "lr": 1.8940782695998305e-06, "epoch": 6.1220504475183075, "percentage": 87.46, "elapsed_time": "15:03:42", "remaining_time": "2:09:36"} +{"current_steps": 3770, "total_steps": 4305, "loss": 0.2405, "lr": 1.859779879486565e-06, "epoch": 6.130187144019528, "percentage": 87.57, "elapsed_time": "15:04:52", "remaining_time": "2:08:24"} +{"current_steps": 3775, "total_steps": 4305, "loss": 0.2386, "lr": 1.8257797270303924e-06, "epoch": 6.138323840520749, "percentage": 87.69, "elapsed_time": "15:06:02", "remaining_time": "2:07:12"} +{"current_steps": 3780, "total_steps": 4305, "loss": 0.226, "lr": 1.792078371217132e-06, "epoch": 6.146460537021969, "percentage": 87.8, "elapsed_time": "15:07:10", "remaining_time": "2:05:59"} +{"current_steps": 3785, "total_steps": 4305, "loss": 0.2432, "lr": 1.7586763661201821e-06, "epoch": 6.15459723352319, "percentage": 87.92, "elapsed_time": "15:08:25", "remaining_time": "2:04:48"} +{"current_steps": 3790, "total_steps": 4305, "loss": 0.2421, "lr": 1.7255742608914095e-06, "epoch": 6.16273393002441, "percentage": 88.04, "elapsed_time": "15:09:37", "remaining_time": "2:03:36"} +{"current_steps": 3795, "total_steps": 4305, "loss": 0.2428, "lr": 1.6927725997521171e-06, "epoch": 6.17087062652563, "percentage": 88.15, "elapsed_time": "15:10:48", "remaining_time": "2:02:24"} +{"current_steps": 3800, "total_steps": 4305, "loss": 0.2281, "lr": 1.6602719219841135e-06, "epoch": 6.179007323026851, "percentage": 88.27, "elapsed_time": "15:12:02", "remaining_time": "2:01:12"} +{"current_steps": 3805, "total_steps": 4305, "loss": 0.2386, "lr": 1.6280727619208202e-06, "epoch": 6.187144019528072, "percentage": 88.39, "elapsed_time": "15:13:11", "remaining_time": "1:59:59"} +{"current_steps": 3810, "total_steps": 4305, "loss": 0.2508, "lr": 1.5961756489385117e-06, "epoch": 6.195280716029292, "percentage": 88.5, "elapsed_time": "15:14:28", "remaining_time": "1:58:48"} +{"current_steps": 3815, "total_steps": 4305, "loss": 0.2417, "lr": 1.5645811074475915e-06, "epoch": 6.203417412530513, "percentage": 88.62, "elapsed_time": "15:15:40", "remaining_time": "1:57:36"} +{"current_steps": 3820, "total_steps": 4305, "loss": 0.25, "lr": 1.533289656883985e-06, "epoch": 6.211554109031733, "percentage": 88.73, "elapsed_time": "15:16:52", "remaining_time": "1:56:24"} +{"current_steps": 3825, "total_steps": 4305, "loss": 0.2254, "lr": 1.5023018117005995e-06, "epoch": 6.219690805532954, "percentage": 88.85, "elapsed_time": "15:18:12", "remaining_time": "1:55:13"} +{"current_steps": 3830, "total_steps": 4305, "loss": 0.2439, "lr": 1.4716180813588566e-06, "epoch": 6.227827502034174, "percentage": 88.97, "elapsed_time": "15:19:23", "remaining_time": "1:54:01"} +{"current_steps": 3835, "total_steps": 4305, "loss": 0.2327, "lr": 1.44123897032032e-06, "epoch": 6.235964198535394, "percentage": 89.08, "elapsed_time": "15:20:39", "remaining_time": "1:52:49"} +{"current_steps": 3840, "total_steps": 4305, "loss": 0.2434, "lr": 1.411164978038404e-06, "epoch": 6.244100895036615, "percentage": 89.2, "elapsed_time": "15:21:44", "remaining_time": "1:51:37"} +{"current_steps": 3845, "total_steps": 4305, "loss": 0.2284, "lr": 1.3813965989501687e-06, "epoch": 6.252237591537836, "percentage": 89.31, "elapsed_time": "15:22:52", "remaining_time": "1:50:24"} +{"current_steps": 3850, "total_steps": 4305, "loss": 0.2357, "lr": 1.3519343224681758e-06, "epoch": 6.260374288039056, "percentage": 89.43, "elapsed_time": "15:24:10", "remaining_time": "1:49:13"} +{"current_steps": 3855, "total_steps": 4305, "loss": 0.2403, "lr": 1.3227786329724479e-06, "epoch": 6.268510984540277, "percentage": 89.55, "elapsed_time": "15:25:17", "remaining_time": "1:48:00"} +{"current_steps": 3860, "total_steps": 4305, "loss": 0.2333, "lr": 1.2939300098025177e-06, "epoch": 6.2766476810414975, "percentage": 89.66, "elapsed_time": "15:26:32", "remaining_time": "1:46:48"} +{"current_steps": 3865, "total_steps": 4305, "loss": 0.2492, "lr": 1.2653889272495223e-06, "epoch": 6.284784377542717, "percentage": 89.78, "elapsed_time": "15:27:47", "remaining_time": "1:45:37"} +{"current_steps": 3870, "total_steps": 4305, "loss": 0.2533, "lr": 1.2371558545484375e-06, "epoch": 6.292921074043938, "percentage": 89.9, "elapsed_time": "15:29:00", "remaining_time": "1:44:25"} +{"current_steps": 3875, "total_steps": 4305, "loss": 0.2353, "lr": 1.2092312558703333e-06, "epoch": 6.3010577705451585, "percentage": 90.01, "elapsed_time": "15:30:13", "remaining_time": "1:43:13"} +{"current_steps": 3880, "total_steps": 4305, "loss": 0.2338, "lr": 1.181615590314762e-06, "epoch": 6.309194467046379, "percentage": 90.13, "elapsed_time": "15:31:25", "remaining_time": "1:42:01"} +{"current_steps": 3885, "total_steps": 4305, "loss": 0.2232, "lr": 1.1543093119021976e-06, "epoch": 6.3173311635476, "percentage": 90.24, "elapsed_time": "15:32:40", "remaining_time": "1:40:49"} +{"current_steps": 3890, "total_steps": 4305, "loss": 0.2398, "lr": 1.1273128695665814e-06, "epoch": 6.32546786004882, "percentage": 90.36, "elapsed_time": "15:33:52", "remaining_time": "1:39:37"} +{"current_steps": 3895, "total_steps": 4305, "loss": 0.2381, "lr": 1.1006267071479359e-06, "epoch": 6.333604556550041, "percentage": 90.48, "elapsed_time": "15:35:00", "remaining_time": "1:38:25"} +{"current_steps": 3900, "total_steps": 4305, "loss": 0.2391, "lr": 1.074251263385071e-06, "epoch": 6.341741253051262, "percentage": 90.59, "elapsed_time": "15:36:12", "remaining_time": "1:37:13"} +{"current_steps": 3905, "total_steps": 4305, "loss": 0.2629, "lr": 1.0481869719083647e-06, "epoch": 6.349877949552481, "percentage": 90.71, "elapsed_time": "15:37:18", "remaining_time": "1:36:00"} +{"current_steps": 3910, "total_steps": 4305, "loss": 0.2399, "lr": 1.022434261232641e-06, "epoch": 6.358014646053702, "percentage": 90.82, "elapsed_time": "15:38:24", "remaining_time": "1:34:48"} +{"current_steps": 3915, "total_steps": 4305, "loss": 0.2414, "lr": 9.969935547501208e-07, "epoch": 6.3661513425549225, "percentage": 90.94, "elapsed_time": "15:39:31", "remaining_time": "1:33:35"} +{"current_steps": 3920, "total_steps": 4305, "loss": 0.2296, "lr": 9.718652707234667e-07, "epoch": 6.374288039056143, "percentage": 91.06, "elapsed_time": "15:40:42", "remaining_time": "1:32:23"} +{"current_steps": 3925, "total_steps": 4305, "loss": 0.2479, "lr": 9.47049822278896e-07, "epoch": 6.382424735557364, "percentage": 91.17, "elapsed_time": "15:41:54", "remaining_time": "1:31:11"} +{"current_steps": 3930, "total_steps": 4305, "loss": 0.2332, "lr": 9.225476173993941e-07, "epoch": 6.390561432058584, "percentage": 91.29, "elapsed_time": "15:43:03", "remaining_time": "1:29:59"} +{"current_steps": 3935, "total_steps": 4305, "loss": 0.2382, "lr": 8.983590589180125e-07, "epoch": 6.398698128559805, "percentage": 91.41, "elapsed_time": "15:44:15", "remaining_time": "1:28:47"} +{"current_steps": 3940, "total_steps": 4305, "loss": 0.2373, "lr": 8.744845445112337e-07, "epoch": 6.406834825061026, "percentage": 91.52, "elapsed_time": "15:45:26", "remaining_time": "1:27:35"} +{"current_steps": 3945, "total_steps": 4305, "loss": 0.2294, "lr": 8.50924466692451e-07, "epoch": 6.414971521562245, "percentage": 91.64, "elapsed_time": "15:46:38", "remaining_time": "1:26:23"} +{"current_steps": 3950, "total_steps": 4305, "loss": 0.2359, "lr": 8.276792128054967e-07, "epoch": 6.423108218063466, "percentage": 91.75, "elapsed_time": "15:47:46", "remaining_time": "1:25:10"} +{"current_steps": 3955, "total_steps": 4305, "loss": 0.2229, "lr": 8.047491650182815e-07, "epoch": 6.431244914564687, "percentage": 91.87, "elapsed_time": "15:49:03", "remaining_time": "1:23:59"} +{"current_steps": 3960, "total_steps": 4305, "loss": 0.2282, "lr": 7.821347003165125e-07, "epoch": 6.439381611065907, "percentage": 91.99, "elapsed_time": "15:50:15", "remaining_time": "1:22:47"} +{"current_steps": 3965, "total_steps": 4305, "loss": 0.227, "lr": 7.598361904974982e-07, "epoch": 6.447518307567128, "percentage": 92.1, "elapsed_time": "15:51:29", "remaining_time": "1:21:35"} +{"current_steps": 3970, "total_steps": 4305, "loss": 0.2259, "lr": 7.378540021640313e-07, "epoch": 6.4556550040683485, "percentage": 92.22, "elapsed_time": "15:52:37", "remaining_time": "1:20:23"} +{"current_steps": 3975, "total_steps": 4305, "loss": 0.2439, "lr": 7.161884967183552e-07, "epoch": 6.463791700569569, "percentage": 92.33, "elapsed_time": "15:53:45", "remaining_time": "1:19:10"} +{"current_steps": 3980, "total_steps": 4305, "loss": 0.242, "lr": 6.948400303562386e-07, "epoch": 6.471928397070789, "percentage": 92.45, "elapsed_time": "15:54:53", "remaining_time": "1:17:58"} +{"current_steps": 3985, "total_steps": 4305, "loss": 0.2064, "lr": 6.738089540611059e-07, "epoch": 6.480065093572009, "percentage": 92.57, "elapsed_time": "15:56:03", "remaining_time": "1:16:46"} +{"current_steps": 3990, "total_steps": 4305, "loss": 0.2223, "lr": 6.530956135982713e-07, "epoch": 6.48820179007323, "percentage": 92.68, "elapsed_time": "15:57:17", "remaining_time": "1:15:34"} +{"current_steps": 3995, "total_steps": 4305, "loss": 0.2217, "lr": 6.327003495092565e-07, "epoch": 6.496338486574451, "percentage": 92.8, "elapsed_time": "15:58:30", "remaining_time": "1:14:22"} +{"current_steps": 4000, "total_steps": 4305, "loss": 0.2104, "lr": 6.126234971061861e-07, "epoch": 6.504475183075671, "percentage": 92.92, "elapsed_time": "15:59:44", "remaining_time": "1:13:10"} +{"current_steps": 4005, "total_steps": 4305, "loss": 0.2392, "lr": 5.928653864662815e-07, "epoch": 6.512611879576892, "percentage": 93.03, "elapsed_time": "16:00:54", "remaining_time": "1:11:58"} +{"current_steps": 4010, "total_steps": 4305, "loss": 0.2311, "lr": 5.734263424264242e-07, "epoch": 6.5207485760781125, "percentage": 93.15, "elapsed_time": "16:02:07", "remaining_time": "1:10:46"} +{"current_steps": 4015, "total_steps": 4305, "loss": 0.2341, "lr": 5.543066845778345e-07, "epoch": 6.528885272579333, "percentage": 93.26, "elapsed_time": "16:03:17", "remaining_time": "1:09:34"} +{"current_steps": 4020, "total_steps": 4305, "loss": 0.2381, "lr": 5.355067272607928e-07, "epoch": 6.537021969080554, "percentage": 93.38, "elapsed_time": "16:04:31", "remaining_time": "1:08:22"} +{"current_steps": 4025, "total_steps": 4305, "loss": 0.2159, "lr": 5.170267795594886e-07, "epoch": 6.5451586655817735, "percentage": 93.5, "elapsed_time": "16:05:45", "remaining_time": "1:07:10"} +{"current_steps": 4030, "total_steps": 4305, "loss": 0.2303, "lr": 4.988671452969329e-07, "epoch": 6.553295362082994, "percentage": 93.61, "elapsed_time": "16:06:59", "remaining_time": "1:05:59"} +{"current_steps": 4035, "total_steps": 4305, "loss": 0.2384, "lr": 4.810281230299674e-07, "epoch": 6.561432058584215, "percentage": 93.73, "elapsed_time": "16:08:10", "remaining_time": "1:04:47"} +{"current_steps": 4040, "total_steps": 4305, "loss": 0.2302, "lr": 4.6351000604434537e-07, "epoch": 6.569568755085435, "percentage": 93.84, "elapsed_time": "16:09:23", "remaining_time": "1:03:35"} +{"current_steps": 4045, "total_steps": 4305, "loss": 0.2342, "lr": 4.463130823499273e-07, "epoch": 6.577705451586656, "percentage": 93.96, "elapsed_time": "16:10:37", "remaining_time": "1:02:23"} +{"current_steps": 4050, "total_steps": 4305, "loss": 0.2393, "lr": 4.2943763467592436e-07, "epoch": 6.585842148087877, "percentage": 94.08, "elapsed_time": "16:11:46", "remaining_time": "1:01:11"} +{"current_steps": 4055, "total_steps": 4305, "loss": 0.2359, "lr": 4.12883940466271e-07, "epoch": 6.593978844589097, "percentage": 94.19, "elapsed_time": "16:12:57", "remaining_time": "0:59:59"} +{"current_steps": 4060, "total_steps": 4305, "loss": 0.232, "lr": 3.9665227187505097e-07, "epoch": 6.602115541090317, "percentage": 94.31, "elapsed_time": "16:14:12", "remaining_time": "0:58:47"} +{"current_steps": 4065, "total_steps": 4305, "loss": 0.2396, "lr": 3.8074289576202295e-07, "epoch": 6.610252237591538, "percentage": 94.43, "elapsed_time": "16:15:25", "remaining_time": "0:57:35"} +{"current_steps": 4070, "total_steps": 4305, "loss": 0.249, "lr": 3.6515607368824203e-07, "epoch": 6.618388934092758, "percentage": 94.54, "elapsed_time": "16:16:38", "remaining_time": "0:56:23"} +{"current_steps": 4075, "total_steps": 4305, "loss": 0.2525, "lr": 3.498920619117474e-07, "epoch": 6.626525630593979, "percentage": 94.66, "elapsed_time": "16:17:56", "remaining_time": "0:55:11"} +{"current_steps": 4080, "total_steps": 4305, "loss": 0.2161, "lr": 3.3495111138336147e-07, "epoch": 6.634662327095199, "percentage": 94.77, "elapsed_time": "16:19:08", "remaining_time": "0:53:59"} +{"current_steps": 4085, "total_steps": 4305, "loss": 0.2185, "lr": 3.203334677425529e-07, "epoch": 6.64279902359642, "percentage": 94.89, "elapsed_time": "16:20:21", "remaining_time": "0:52:47"} +{"current_steps": 4090, "total_steps": 4305, "loss": 0.2438, "lr": 3.060393713134091e-07, "epoch": 6.650935720097641, "percentage": 95.01, "elapsed_time": "16:21:36", "remaining_time": "0:51:36"} +{"current_steps": 4095, "total_steps": 4305, "loss": 0.2317, "lr": 2.920690571006768e-07, "epoch": 6.65907241659886, "percentage": 95.12, "elapsed_time": "16:22:48", "remaining_time": "0:50:24"} +{"current_steps": 4100, "total_steps": 4305, "loss": 0.2281, "lr": 2.784227547858964e-07, "epoch": 6.667209113100081, "percentage": 95.24, "elapsed_time": "16:23:56", "remaining_time": "0:49:11"} +{"current_steps": 4105, "total_steps": 4305, "loss": 0.2437, "lr": 2.651006887236385e-07, "epoch": 6.675345809601302, "percentage": 95.35, "elapsed_time": "16:25:14", "remaining_time": "0:48:00"} +{"current_steps": 4110, "total_steps": 4305, "loss": 0.2272, "lr": 2.52103077937802e-07, "epoch": 6.683482506102522, "percentage": 95.47, "elapsed_time": "16:26:29", "remaining_time": "0:46:48"} +{"current_steps": 4115, "total_steps": 4305, "loss": 0.2228, "lr": 2.394301361180218e-07, "epoch": 6.691619202603743, "percentage": 95.59, "elapsed_time": "16:27:45", "remaining_time": "0:45:36"} +{"current_steps": 4120, "total_steps": 4305, "loss": 0.2468, "lr": 2.2708207161615147e-07, "epoch": 6.6997558991049635, "percentage": 95.7, "elapsed_time": "16:28:59", "remaining_time": "0:44:24"} +{"current_steps": 4125, "total_steps": 4305, "loss": 0.2523, "lr": 2.150590874428371e-07, "epoch": 6.707892595606184, "percentage": 95.82, "elapsed_time": "16:30:07", "remaining_time": "0:43:12"} +{"current_steps": 4130, "total_steps": 4305, "loss": 0.2271, "lr": 2.0336138126417994e-07, "epoch": 6.716029292107405, "percentage": 95.93, "elapsed_time": "16:31:18", "remaining_time": "0:42:00"} +{"current_steps": 4135, "total_steps": 4305, "loss": 0.2216, "lr": 1.9198914539849455e-07, "epoch": 6.724165988608625, "percentage": 96.05, "elapsed_time": "16:32:31", "remaining_time": "0:40:48"} +{"current_steps": 4140, "total_steps": 4305, "loss": 0.2474, "lr": 1.8094256681313593e-07, "epoch": 6.732302685109845, "percentage": 96.17, "elapsed_time": "16:33:40", "remaining_time": "0:39:36"} +{"current_steps": 4145, "total_steps": 4305, "loss": 0.2349, "lr": 1.702218271214262e-07, "epoch": 6.740439381611066, "percentage": 96.28, "elapsed_time": "16:34:46", "remaining_time": "0:38:23"} +{"current_steps": 4150, "total_steps": 4305, "loss": 0.2231, "lr": 1.598271025796816e-07, "epoch": 6.748576078112286, "percentage": 96.4, "elapsed_time": "16:35:59", "remaining_time": "0:37:11"} +{"current_steps": 4155, "total_steps": 4305, "loss": 0.2491, "lr": 1.4975856408429912e-07, "epoch": 6.756712774613507, "percentage": 96.52, "elapsed_time": "16:37:14", "remaining_time": "0:36:00"} +{"current_steps": 4160, "total_steps": 4305, "loss": 0.2393, "lr": 1.4001637716895445e-07, "epoch": 6.764849471114728, "percentage": 96.63, "elapsed_time": "16:38:29", "remaining_time": "0:34:48"} +{"current_steps": 4165, "total_steps": 4305, "loss": 0.2396, "lr": 1.3060070200188179e-07, "epoch": 6.772986167615948, "percentage": 96.75, "elapsed_time": "16:39:40", "remaining_time": "0:33:36"} +{"current_steps": 4170, "total_steps": 4305, "loss": 0.2237, "lr": 1.215116933832361e-07, "epoch": 6.781122864117169, "percentage": 96.86, "elapsed_time": "16:40:52", "remaining_time": "0:32:24"} +{"current_steps": 4175, "total_steps": 4305, "loss": 0.2303, "lr": 1.1274950074255053e-07, "epoch": 6.7892595606183885, "percentage": 96.98, "elapsed_time": "16:42:06", "remaining_time": "0:31:12"} +{"current_steps": 4180, "total_steps": 4305, "loss": 0.2326, "lr": 1.0431426813628298e-07, "epoch": 6.797396257119609, "percentage": 97.1, "elapsed_time": "16:43:15", "remaining_time": "0:30:00"} +{"current_steps": 4185, "total_steps": 4305, "loss": 0.2434, "lr": 9.62061342454379e-08, "epoch": 6.80553295362083, "percentage": 97.21, "elapsed_time": "16:44:30", "remaining_time": "0:28:48"} +{"current_steps": 4190, "total_steps": 4305, "loss": 0.2459, "lr": 8.842523237329925e-08, "epoch": 6.81366965012205, "percentage": 97.33, "elapsed_time": "16:45:47", "remaining_time": "0:27:36"} +{"current_steps": 4195, "total_steps": 4305, "loss": 0.2315, "lr": 8.097169044322561e-08, "epoch": 6.821806346623271, "percentage": 97.44, "elapsed_time": "16:46:56", "remaining_time": "0:26:24"} +{"current_steps": 4200, "total_steps": 4305, "loss": 0.2467, "lr": 7.38456309965585e-08, "epoch": 6.829943043124492, "percentage": 97.56, "elapsed_time": "16:48:03", "remaining_time": "0:25:12"} +{"current_steps": 4205, "total_steps": 4305, "loss": 0.2305, "lr": 6.7047171190604e-08, "epoch": 6.838079739625712, "percentage": 97.68, "elapsed_time": "16:49:16", "remaining_time": "0:24:00"} +{"current_steps": 4210, "total_steps": 4305, "loss": 0.2589, "lr": 6.057642279669874e-08, "epoch": 6.846216436126932, "percentage": 97.79, "elapsed_time": "16:50:27", "remaining_time": "0:22:48"} +{"current_steps": 4215, "total_steps": 4305, "loss": 0.2596, "lr": 5.4433492198386895e-08, "epoch": 6.854353132628153, "percentage": 97.91, "elapsed_time": "16:51:36", "remaining_time": "0:21:36"} +{"current_steps": 4220, "total_steps": 4305, "loss": 0.2372, "lr": 4.861848038965722e-08, "epoch": 6.862489829129373, "percentage": 98.03, "elapsed_time": "16:52:43", "remaining_time": "0:20:23"} +{"current_steps": 4225, "total_steps": 4305, "loss": 0.2522, "lr": 4.313148297328873e-08, "epoch": 6.870626525630594, "percentage": 98.14, "elapsed_time": "16:53:50", "remaining_time": "0:19:11"} +{"current_steps": 4230, "total_steps": 4305, "loss": 0.2586, "lr": 3.797259015928534e-08, "epoch": 6.8787632221318145, "percentage": 98.26, "elapsed_time": "16:54:59", "remaining_time": "0:17:59"} +{"current_steps": 4235, "total_steps": 4305, "loss": 0.2493, "lr": 3.314188676338148e-08, "epoch": 6.886899918633035, "percentage": 98.37, "elapsed_time": "16:56:15", "remaining_time": "0:16:47"} +{"current_steps": 4240, "total_steps": 4305, "loss": 0.2367, "lr": 2.863945220565434e-08, "epoch": 6.895036615134256, "percentage": 98.49, "elapsed_time": "16:57:23", "remaining_time": "0:15:35"} +{"current_steps": 4245, "total_steps": 4305, "loss": 0.2308, "lr": 2.4465360509211555e-08, "epoch": 6.903173311635476, "percentage": 98.61, "elapsed_time": "16:58:30", "remaining_time": "0:14:23"} +{"current_steps": 4250, "total_steps": 4305, "loss": 0.2441, "lr": 2.0619680298983313e-08, "epoch": 6.911310008136697, "percentage": 98.72, "elapsed_time": "16:59:39", "remaining_time": "0:13:11"} +{"current_steps": 4255, "total_steps": 4305, "loss": 0.2359, "lr": 1.7102474800592128e-08, "epoch": 6.919446704637917, "percentage": 98.84, "elapsed_time": "17:00:49", "remaining_time": "0:11:59"} +{"current_steps": 4260, "total_steps": 4305, "loss": 0.2511, "lr": 1.3913801839307017e-08, "epoch": 6.927583401139137, "percentage": 98.95, "elapsed_time": "17:02:03", "remaining_time": "0:10:47"} +{"current_steps": 4265, "total_steps": 4305, "loss": 0.2383, "lr": 1.105371383909759e-08, "epoch": 6.935720097640358, "percentage": 99.07, "elapsed_time": "17:03:17", "remaining_time": "0:09:35"} +{"current_steps": 4270, "total_steps": 4305, "loss": 0.2491, "lr": 8.522257821770296e-09, "epoch": 6.9438567941415785, "percentage": 99.19, "elapsed_time": "17:04:29", "remaining_time": "0:08:23"} +{"current_steps": 4275, "total_steps": 4305, "loss": 0.2203, "lr": 6.319475406200148e-09, "epoch": 6.951993490642799, "percentage": 99.3, "elapsed_time": "17:05:33", "remaining_time": "0:07:11"} +{"current_steps": 4280, "total_steps": 4305, "loss": 0.2364, "lr": 4.445402807637944e-09, "epoch": 6.96013018714402, "percentage": 99.42, "elapsed_time": "17:06:39", "remaining_time": "0:05:59"} +{"current_steps": 4285, "total_steps": 4305, "loss": 0.2456, "lr": 2.9000708371240695e-09, "epoch": 6.96826688364524, "percentage": 99.54, "elapsed_time": "17:07:51", "remaining_time": "0:04:47"} +{"current_steps": 4290, "total_steps": 4305, "loss": 0.2368, "lr": 1.6835049009755745e-09, "epoch": 6.97640358014646, "percentage": 99.65, "elapsed_time": "17:09:02", "remaining_time": "0:03:35"} +{"current_steps": 4295, "total_steps": 4305, "loss": 0.2129, "lr": 7.9572500036873e-10, "epoch": 6.984540276647681, "percentage": 99.77, "elapsed_time": "17:10:18", "remaining_time": "0:02:23"} +{"current_steps": 4300, "total_steps": 4305, "loss": 0.2423, "lr": 2.367457310170629e-10, "epoch": 6.992676973148901, "percentage": 99.88, "elapsed_time": "17:11:29", "remaining_time": "0:01:11"} +{"current_steps": 4305, "total_steps": 4305, "loss": 0.2148, "lr": 6.5762829204452095e-12, "epoch": 7.0, "percentage": 100.0, "elapsed_time": "17:12:37", "remaining_time": "0:00:00"} +{"current_steps": 4305, "total_steps": 4305, "epoch": 7.0, "percentage": 100.0, "elapsed_time": "17:13:16", "remaining_time": "0:00:00"} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..19bdca6 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,9518 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 4305, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.008136696501220505, + "grad_norm": 16.090431037903322, + "learning_rate": 3.7122969837587006e-07, + "loss": 0.7715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2875712513923645, + "step": 5, + "valid_targets_mean": 4549.8, + "valid_targets_min": 252 + }, + { + "epoch": 0.01627339300244101, + "grad_norm": 13.600731709495204, + "learning_rate": 8.352668213457077e-07, + "loss": 0.7117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35614049434661865, + "step": 10, + "valid_targets_mean": 8209.8, + "valid_targets_min": 4066 + }, + { + "epoch": 0.024410089503661515, + "grad_norm": 14.14836434723303, + "learning_rate": 1.2993039443155453e-06, + "loss": 0.694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.250732421875, + "step": 15, + "valid_targets_mean": 4782.0, + "valid_targets_min": 867 + }, + { + "epoch": 0.03254678600488202, + "grad_norm": 7.548954071846173, + "learning_rate": 1.7633410672853829e-06, + "loss": 0.6943, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22547389566898346, + "step": 20, + "valid_targets_mean": 5292.1, + "valid_targets_min": 1196 + }, + { + "epoch": 0.04068348250610252, + "grad_norm": 6.231235771802954, + "learning_rate": 2.2273781902552207e-06, + "loss": 0.6218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33196526765823364, + "step": 25, + "valid_targets_mean": 5366.5, + "valid_targets_min": 2694 + }, + { + "epoch": 0.04882017900732303, + "grad_norm": 3.2220494398584676, + "learning_rate": 2.691415313225058e-06, + "loss": 0.5883, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2818257212638855, + "step": 30, + "valid_targets_mean": 5397.9, + "valid_targets_min": 4217 + }, + { + "epoch": 0.05695687550854353, + "grad_norm": 1.9906473871157997, + "learning_rate": 3.155452436194896e-06, + "loss": 0.5962, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29219943284988403, + "step": 35, + "valid_targets_mean": 6485.5, + "valid_targets_min": 4178 + }, + { + "epoch": 0.06509357200976404, + "grad_norm": 1.566586623976585, + "learning_rate": 3.6194895591647333e-06, + "loss": 0.5804, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31874755024909973, + "step": 40, + "valid_targets_mean": 5538.4, + "valid_targets_min": 2206 + }, + { + "epoch": 0.07323026851098453, + "grad_norm": 0.8989747000335192, + "learning_rate": 4.083526682134571e-06, + "loss": 0.5337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26823461055755615, + "step": 45, + "valid_targets_mean": 8599.2, + "valid_targets_min": 2163 + }, + { + "epoch": 0.08136696501220504, + "grad_norm": 0.9319478495784349, + "learning_rate": 4.547563805104409e-06, + "loss": 0.5084, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24531835317611694, + "step": 50, + "valid_targets_mean": 5544.0, + "valid_targets_min": 3675 + }, + { + "epoch": 0.08950366151342555, + "grad_norm": 0.8445321573859387, + "learning_rate": 5.011600928074246e-06, + "loss": 0.5385, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24889877438545227, + "step": 55, + "valid_targets_mean": 4860.2, + "valid_targets_min": 2473 + }, + { + "epoch": 0.09764035801464606, + "grad_norm": 0.5631526521452107, + "learning_rate": 5.4756380510440845e-06, + "loss": 0.5077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1919834017753601, + "step": 60, + "valid_targets_mean": 8965.6, + "valid_targets_min": 1390 + }, + { + "epoch": 0.10577705451586655, + "grad_norm": 0.723751948401996, + "learning_rate": 5.939675174013921e-06, + "loss": 0.5264, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24049162864685059, + "step": 65, + "valid_targets_mean": 5572.6, + "valid_targets_min": 987 + }, + { + "epoch": 0.11391375101708706, + "grad_norm": 0.6686399938539643, + "learning_rate": 6.403712296983759e-06, + "loss": 0.525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2604122459888458, + "step": 70, + "valid_targets_mean": 5024.8, + "valid_targets_min": 1360 + }, + { + "epoch": 0.12205044751830757, + "grad_norm": 0.6374560834798504, + "learning_rate": 6.867749419953597e-06, + "loss": 0.4679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20338362455368042, + "step": 75, + "valid_targets_mean": 4254.9, + "valid_targets_min": 1926 + }, + { + "epoch": 0.13018714401952808, + "grad_norm": 0.5787585870311817, + "learning_rate": 7.331786542923435e-06, + "loss": 0.4843, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21182265877723694, + "step": 80, + "valid_targets_mean": 6022.1, + "valid_targets_min": 2763 + }, + { + "epoch": 0.1383238405207486, + "grad_norm": 0.67321565497922, + "learning_rate": 7.795823665893271e-06, + "loss": 0.4312, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23013588786125183, + "step": 85, + "valid_targets_mean": 5411.8, + "valid_targets_min": 3173 + }, + { + "epoch": 0.14646053702196907, + "grad_norm": 0.6071366032342709, + "learning_rate": 8.25986078886311e-06, + "loss": 0.4646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1693999469280243, + "step": 90, + "valid_targets_mean": 4955.9, + "valid_targets_min": 1353 + }, + { + "epoch": 0.15459723352318958, + "grad_norm": 0.5269887432627762, + "learning_rate": 8.723897911832948e-06, + "loss": 0.456, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1908610761165619, + "step": 95, + "valid_targets_mean": 5783.5, + "valid_targets_min": 2568 + }, + { + "epoch": 0.16273393002441008, + "grad_norm": 0.49470856135363345, + "learning_rate": 9.187935034802784e-06, + "loss": 0.4707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16176165640354156, + "step": 100, + "valid_targets_mean": 5736.5, + "valid_targets_min": 914 + }, + { + "epoch": 0.1708706265256306, + "grad_norm": 0.6634219296905246, + "learning_rate": 9.651972157772623e-06, + "loss": 0.4261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21228602528572083, + "step": 105, + "valid_targets_mean": 4824.8, + "valid_targets_min": 880 + }, + { + "epoch": 0.1790073230268511, + "grad_norm": 0.5503232838963362, + "learning_rate": 1.011600928074246e-05, + "loss": 0.4521, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21243637800216675, + "step": 110, + "valid_targets_mean": 4871.6, + "valid_targets_min": 3180 + }, + { + "epoch": 0.1871440195280716, + "grad_norm": 0.5371175803438996, + "learning_rate": 1.0580046403712299e-05, + "loss": 0.4236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18787938356399536, + "step": 115, + "valid_targets_mean": 6630.4, + "valid_targets_min": 2886 + }, + { + "epoch": 0.19528071602929212, + "grad_norm": 0.592957013748528, + "learning_rate": 1.1044083526682134e-05, + "loss": 0.423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21313674747943878, + "step": 120, + "valid_targets_mean": 4635.9, + "valid_targets_min": 1984 + }, + { + "epoch": 0.20341741253051263, + "grad_norm": 0.5850643813081216, + "learning_rate": 1.1508120649651972e-05, + "loss": 0.441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18926499783992767, + "step": 125, + "valid_targets_mean": 3976.4, + "valid_targets_min": 2195 + }, + { + "epoch": 0.2115541090317331, + "grad_norm": 0.6154983784285487, + "learning_rate": 1.197215777262181e-05, + "loss": 0.4045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32481032609939575, + "step": 130, + "valid_targets_mean": 7336.1, + "valid_targets_min": 2261 + }, + { + "epoch": 0.21969080553295361, + "grad_norm": 0.5811910852740185, + "learning_rate": 1.2436194895591649e-05, + "loss": 0.3979, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19883839786052704, + "step": 135, + "valid_targets_mean": 4926.2, + "valid_targets_min": 1380 + }, + { + "epoch": 0.22782750203417412, + "grad_norm": 0.562023919355631, + "learning_rate": 1.2900232018561485e-05, + "loss": 0.4539, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1847591996192932, + "step": 140, + "valid_targets_mean": 4817.6, + "valid_targets_min": 472 + }, + { + "epoch": 0.23596419853539463, + "grad_norm": 0.72271568902011, + "learning_rate": 1.3364269141531323e-05, + "loss": 0.4195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2751965820789337, + "step": 145, + "valid_targets_mean": 4217.8, + "valid_targets_min": 1056 + }, + { + "epoch": 0.24410089503661514, + "grad_norm": 0.3938229955385366, + "learning_rate": 1.3828306264501162e-05, + "loss": 0.3832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14872416853904724, + "step": 150, + "valid_targets_mean": 9482.1, + "valid_targets_min": 1451 + }, + { + "epoch": 0.25223759153783565, + "grad_norm": 0.49221594968351173, + "learning_rate": 1.4292343387471e-05, + "loss": 0.3907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20908868312835693, + "step": 155, + "valid_targets_mean": 6888.2, + "valid_targets_min": 976 + }, + { + "epoch": 0.26037428803905616, + "grad_norm": 0.5545201308696927, + "learning_rate": 1.4756380510440838e-05, + "loss": 0.4102, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2789374887943268, + "step": 160, + "valid_targets_mean": 6530.2, + "valid_targets_min": 3557 + }, + { + "epoch": 0.26851098454027666, + "grad_norm": 0.4586839023597891, + "learning_rate": 1.5220417633410673e-05, + "loss": 0.3853, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15098837018013, + "step": 165, + "valid_targets_mean": 6107.5, + "valid_targets_min": 3366 + }, + { + "epoch": 0.2766476810414972, + "grad_norm": 0.5466087287399158, + "learning_rate": 1.5684454756380513e-05, + "loss": 0.3823, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18119701743125916, + "step": 170, + "valid_targets_mean": 5958.1, + "valid_targets_min": 2636 + }, + { + "epoch": 0.2847843775427177, + "grad_norm": 0.7246628057096877, + "learning_rate": 1.614849187935035e-05, + "loss": 0.3878, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18207889795303345, + "step": 175, + "valid_targets_mean": 3833.5, + "valid_targets_min": 731 + }, + { + "epoch": 0.29292107404393813, + "grad_norm": 0.538445461461904, + "learning_rate": 1.661252900232019e-05, + "loss": 0.3899, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20349320769309998, + "step": 180, + "valid_targets_mean": 5779.9, + "valid_targets_min": 2408 + }, + { + "epoch": 0.30105777054515864, + "grad_norm": 0.5154736408446468, + "learning_rate": 1.7076566125290022e-05, + "loss": 0.3979, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14643463492393494, + "step": 185, + "valid_targets_mean": 5368.9, + "valid_targets_min": 1130 + }, + { + "epoch": 0.30919446704637915, + "grad_norm": 0.5537361180002401, + "learning_rate": 1.7540603248259862e-05, + "loss": 0.3761, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24436548352241516, + "step": 190, + "valid_targets_mean": 6028.6, + "valid_targets_min": 2328 + }, + { + "epoch": 0.31733116354759966, + "grad_norm": 0.5029342180508779, + "learning_rate": 1.80046403712297e-05, + "loss": 0.4093, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1840636283159256, + "step": 195, + "valid_targets_mean": 5353.9, + "valid_targets_min": 2892 + }, + { + "epoch": 0.32546786004882017, + "grad_norm": 0.665787256003558, + "learning_rate": 1.846867749419954e-05, + "loss": 0.4148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20056715607643127, + "step": 200, + "valid_targets_mean": 4300.5, + "valid_targets_min": 2125 + }, + { + "epoch": 0.3336045565500407, + "grad_norm": 0.6052671481751055, + "learning_rate": 1.8932714617169375e-05, + "loss": 0.4007, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18301618099212646, + "step": 205, + "valid_targets_mean": 4318.6, + "valid_targets_min": 1348 + }, + { + "epoch": 0.3417412530512612, + "grad_norm": 0.5974300039254363, + "learning_rate": 1.9396751740139212e-05, + "loss": 0.359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1635710895061493, + "step": 210, + "valid_targets_mean": 4149.1, + "valid_targets_min": 1341 + }, + { + "epoch": 0.3498779495524817, + "grad_norm": 0.5162532888002979, + "learning_rate": 1.986078886310905e-05, + "loss": 0.3616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1892920732498169, + "step": 215, + "valid_targets_mean": 5807.4, + "valid_targets_min": 3465 + }, + { + "epoch": 0.3580146460537022, + "grad_norm": 0.6211875321766217, + "learning_rate": 2.0324825986078888e-05, + "loss": 0.3707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2048984169960022, + "step": 220, + "valid_targets_mean": 5146.1, + "valid_targets_min": 3448 + }, + { + "epoch": 0.3661513425549227, + "grad_norm": 0.5629734837453833, + "learning_rate": 2.0788863109048725e-05, + "loss": 0.4264, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20716804265975952, + "step": 225, + "valid_targets_mean": 6232.1, + "valid_targets_min": 3504 + }, + { + "epoch": 0.3742880390561432, + "grad_norm": 0.5754882541364582, + "learning_rate": 2.125290023201856e-05, + "loss": 0.3609, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17893140017986298, + "step": 230, + "valid_targets_mean": 5060.4, + "valid_targets_min": 1097 + }, + { + "epoch": 0.3824247355573637, + "grad_norm": 0.4803589387079019, + "learning_rate": 2.17169373549884e-05, + "loss": 0.3729, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.169406920671463, + "step": 235, + "valid_targets_mean": 10265.1, + "valid_targets_min": 3654 + }, + { + "epoch": 0.39056143205858423, + "grad_norm": 0.5684141070472157, + "learning_rate": 2.2180974477958238e-05, + "loss": 0.356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18647357821464539, + "step": 240, + "valid_targets_mean": 6180.6, + "valid_targets_min": 4034 + }, + { + "epoch": 0.39869812855980474, + "grad_norm": 2.1822397438079113, + "learning_rate": 2.2645011600928078e-05, + "loss": 0.3773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13844701647758484, + "step": 245, + "valid_targets_mean": 3962.9, + "valid_targets_min": 1115 + }, + { + "epoch": 0.40683482506102525, + "grad_norm": 0.5269077214375306, + "learning_rate": 2.3109048723897914e-05, + "loss": 0.3638, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22004611790180206, + "step": 250, + "valid_targets_mean": 7061.2, + "valid_targets_min": 1798 + }, + { + "epoch": 0.4149715215622457, + "grad_norm": 0.5440786293579026, + "learning_rate": 2.357308584686775e-05, + "loss": 0.3884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17783966660499573, + "step": 255, + "valid_targets_mean": 6231.8, + "valid_targets_min": 1180 + }, + { + "epoch": 0.4231082180634662, + "grad_norm": 0.5948755408238603, + "learning_rate": 2.4037122969837587e-05, + "loss": 0.3825, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17008402943611145, + "step": 260, + "valid_targets_mean": 5944.2, + "valid_targets_min": 3162 + }, + { + "epoch": 0.4312449145646867, + "grad_norm": 0.6242241542196177, + "learning_rate": 2.4501160092807427e-05, + "loss": 0.3741, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24811501801013947, + "step": 265, + "valid_targets_mean": 6068.0, + "valid_targets_min": 3582 + }, + { + "epoch": 0.43938161106590723, + "grad_norm": 0.6339502257062204, + "learning_rate": 2.4965197215777264e-05, + "loss": 0.378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23643383383750916, + "step": 270, + "valid_targets_mean": 6240.1, + "valid_targets_min": 3166 + }, + { + "epoch": 0.44751830756712774, + "grad_norm": 0.620013916375109, + "learning_rate": 2.54292343387471e-05, + "loss": 0.3781, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20989033579826355, + "step": 275, + "valid_targets_mean": 4992.1, + "valid_targets_min": 1974 + }, + { + "epoch": 0.45565500406834825, + "grad_norm": 0.6062383822505677, + "learning_rate": 2.589327146171694e-05, + "loss": 0.3909, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.165325328707695, + "step": 280, + "valid_targets_mean": 5992.2, + "valid_targets_min": 2286 + }, + { + "epoch": 0.46379170056956875, + "grad_norm": 0.5313593137715071, + "learning_rate": 2.6357308584686777e-05, + "loss": 0.3599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.211288183927536, + "step": 285, + "valid_targets_mean": 8111.2, + "valid_targets_min": 1743 + }, + { + "epoch": 0.47192839707078926, + "grad_norm": 0.6295506563199366, + "learning_rate": 2.6821345707656617e-05, + "loss": 0.4067, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2054595947265625, + "step": 290, + "valid_targets_mean": 5166.6, + "valid_targets_min": 665 + }, + { + "epoch": 0.48006509357200977, + "grad_norm": 0.49592526684619054, + "learning_rate": 2.7285382830626453e-05, + "loss": 0.3392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16820460557937622, + "step": 295, + "valid_targets_mean": 7745.8, + "valid_targets_min": 4466 + }, + { + "epoch": 0.4882017900732303, + "grad_norm": 0.567789284281097, + "learning_rate": 2.774941995359629e-05, + "loss": 0.3658, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20403246581554413, + "step": 300, + "valid_targets_mean": 5828.9, + "valid_targets_min": 3519 + }, + { + "epoch": 0.4963384865744508, + "grad_norm": 0.5994970137381144, + "learning_rate": 2.8213457076566126e-05, + "loss": 0.3616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17800754308700562, + "step": 305, + "valid_targets_mean": 6173.2, + "valid_targets_min": 2727 + }, + { + "epoch": 0.5044751830756713, + "grad_norm": 0.4507815805396529, + "learning_rate": 2.8677494199535966e-05, + "loss": 0.3422, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.163581982254982, + "step": 310, + "valid_targets_mean": 7815.4, + "valid_targets_min": 2591 + }, + { + "epoch": 0.5126118795768918, + "grad_norm": 0.5239390399565743, + "learning_rate": 2.9141531322505803e-05, + "loss": 0.3643, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17151904106140137, + "step": 315, + "valid_targets_mean": 6634.5, + "valid_targets_min": 2709 + }, + { + "epoch": 0.5207485760781123, + "grad_norm": 0.5369612515514515, + "learning_rate": 2.9605568445475643e-05, + "loss": 0.3502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18754036724567413, + "step": 320, + "valid_targets_mean": 7942.8, + "valid_targets_min": 2731 + }, + { + "epoch": 0.5288852725793328, + "grad_norm": 0.5793782711405452, + "learning_rate": 3.006960556844548e-05, + "loss": 0.3665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19467616081237793, + "step": 325, + "valid_targets_mean": 7150.4, + "valid_targets_min": 3200 + }, + { + "epoch": 0.5370219690805533, + "grad_norm": 0.6555376050209063, + "learning_rate": 3.053364269141532e-05, + "loss": 0.3597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22380053997039795, + "step": 330, + "valid_targets_mean": 5470.0, + "valid_targets_min": 3328 + }, + { + "epoch": 0.5451586655817738, + "grad_norm": 0.47280019324486067, + "learning_rate": 3.099767981438515e-05, + "loss": 0.353, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1600561887025833, + "step": 335, + "valid_targets_mean": 7268.8, + "valid_targets_min": 1187 + }, + { + "epoch": 0.5532953620829943, + "grad_norm": 0.5881598770087844, + "learning_rate": 3.146171693735499e-05, + "loss": 0.3822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24715545773506165, + "step": 340, + "valid_targets_mean": 6853.9, + "valid_targets_min": 3075 + }, + { + "epoch": 0.5614320585842149, + "grad_norm": 0.6391589883331622, + "learning_rate": 3.1925754060324825e-05, + "loss": 0.3614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1951560229063034, + "step": 345, + "valid_targets_mean": 5299.4, + "valid_targets_min": 1246 + }, + { + "epoch": 0.5695687550854354, + "grad_norm": 0.5623585142837345, + "learning_rate": 3.2389791183294665e-05, + "loss": 0.3891, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16995632648468018, + "step": 350, + "valid_targets_mean": 5440.2, + "valid_targets_min": 2508 + }, + { + "epoch": 0.5777054515866559, + "grad_norm": 0.6219299756731129, + "learning_rate": 3.2853828306264505e-05, + "loss": 0.3645, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1758767068386078, + "step": 355, + "valid_targets_mean": 5666.1, + "valid_targets_min": 4068 + }, + { + "epoch": 0.5858421480878763, + "grad_norm": 0.5685036390421684, + "learning_rate": 3.3317865429234345e-05, + "loss": 0.3647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16862249374389648, + "step": 360, + "valid_targets_mean": 4828.4, + "valid_targets_min": 3059 + }, + { + "epoch": 0.5939788445890968, + "grad_norm": 0.4625422720060745, + "learning_rate": 3.378190255220418e-05, + "loss": 0.3712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1963481456041336, + "step": 365, + "valid_targets_mean": 7321.2, + "valid_targets_min": 2340 + }, + { + "epoch": 0.6021155410903173, + "grad_norm": 0.5470733880761669, + "learning_rate": 3.424593967517402e-05, + "loss": 0.3691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.210045725107193, + "step": 370, + "valid_targets_mean": 6877.0, + "valid_targets_min": 3491 + }, + { + "epoch": 0.6102522375915378, + "grad_norm": 0.6202487972326748, + "learning_rate": 3.470997679814386e-05, + "loss": 0.3772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17183330655097961, + "step": 375, + "valid_targets_mean": 4664.4, + "valid_targets_min": 1184 + }, + { + "epoch": 0.6183889340927583, + "grad_norm": 0.5221402160582946, + "learning_rate": 3.517401392111369e-05, + "loss": 0.3627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12995409965515137, + "step": 380, + "valid_targets_mean": 4589.5, + "valid_targets_min": 1355 + }, + { + "epoch": 0.6265256305939788, + "grad_norm": 0.5752601496871426, + "learning_rate": 3.563805104408353e-05, + "loss": 0.357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1458801031112671, + "step": 385, + "valid_targets_mean": 4861.6, + "valid_targets_min": 781 + }, + { + "epoch": 0.6346623270951993, + "grad_norm": 0.49073739472195316, + "learning_rate": 3.6102088167053364e-05, + "loss": 0.3394, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14129197597503662, + "step": 390, + "valid_targets_mean": 7941.0, + "valid_targets_min": 1697 + }, + { + "epoch": 0.6427990235964198, + "grad_norm": 0.6724135988713974, + "learning_rate": 3.6566125290023204e-05, + "loss": 0.3407, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20702053606510162, + "step": 395, + "valid_targets_mean": 5088.1, + "valid_targets_min": 3051 + }, + { + "epoch": 0.6509357200976403, + "grad_norm": 0.5361691867146933, + "learning_rate": 3.7030162412993044e-05, + "loss": 0.3382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11901885271072388, + "step": 400, + "valid_targets_mean": 4371.2, + "valid_targets_min": 667 + }, + { + "epoch": 0.6590724165988608, + "grad_norm": 0.5133700351880802, + "learning_rate": 3.7494199535962884e-05, + "loss": 0.3437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19993503391742706, + "step": 405, + "valid_targets_mean": 7043.2, + "valid_targets_min": 2275 + }, + { + "epoch": 0.6672091131000814, + "grad_norm": 0.6482819055800569, + "learning_rate": 3.795823665893272e-05, + "loss": 0.3395, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17060551047325134, + "step": 410, + "valid_targets_mean": 4805.9, + "valid_targets_min": 2812 + }, + { + "epoch": 0.6753458096013019, + "grad_norm": 0.6390031583274932, + "learning_rate": 3.842227378190256e-05, + "loss": 0.3535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21496957540512085, + "step": 415, + "valid_targets_mean": 5022.5, + "valid_targets_min": 3145 + }, + { + "epoch": 0.6834825061025224, + "grad_norm": 0.5976519963434623, + "learning_rate": 3.888631090487239e-05, + "loss": 0.3517, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2512780427932739, + "step": 420, + "valid_targets_mean": 7279.9, + "valid_targets_min": 1433 + }, + { + "epoch": 0.6916192026037429, + "grad_norm": 0.47064787232454297, + "learning_rate": 3.935034802784223e-05, + "loss": 0.3492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13265804946422577, + "step": 425, + "valid_targets_mean": 7578.4, + "valid_targets_min": 1208 + }, + { + "epoch": 0.6997558991049634, + "grad_norm": 0.6527567618207681, + "learning_rate": 3.981438515081207e-05, + "loss": 0.3574, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1878128945827484, + "step": 430, + "valid_targets_mean": 6056.6, + "valid_targets_min": 839 + }, + { + "epoch": 0.7078925956061839, + "grad_norm": 0.5735509384032068, + "learning_rate": 3.9999940813479674e-05, + "loss": 0.3372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1770503968000412, + "step": 435, + "valid_targets_mean": 6436.2, + "valid_targets_min": 1048 + }, + { + "epoch": 0.7160292921074044, + "grad_norm": 0.6261328521415896, + "learning_rate": 3.999957911934624e-05, + "loss": 0.3515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18041738867759705, + "step": 440, + "valid_targets_mean": 4812.4, + "valid_targets_min": 978 + }, + { + "epoch": 0.7241659886086249, + "grad_norm": 0.6204622587345417, + "learning_rate": 3.9998888618418865e-05, + "loss": 0.3373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1899314522743225, + "step": 445, + "valid_targets_mean": 5510.2, + "valid_targets_min": 3204 + }, + { + "epoch": 0.7323026851098454, + "grad_norm": 3.299320631347469, + "learning_rate": 3.999786932204985e-05, + "loss": 0.3785, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1819208562374115, + "step": 450, + "valid_targets_mean": 4358.6, + "valid_targets_min": 1771 + }, + { + "epoch": 0.7404393816110659, + "grad_norm": 0.5252880370409836, + "learning_rate": 3.999652124699712e-05, + "loss": 0.3475, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16896438598632812, + "step": 455, + "valid_targets_mean": 5077.8, + "valid_targets_min": 1348 + }, + { + "epoch": 0.7485760781122864, + "grad_norm": 0.6698283241722841, + "learning_rate": 3.999484441542395e-05, + "loss": 0.3407, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1943579763174057, + "step": 460, + "valid_targets_mean": 6660.6, + "valid_targets_min": 2219 + }, + { + "epoch": 0.7567127746135069, + "grad_norm": 0.5507163167952325, + "learning_rate": 3.999283885489861e-05, + "loss": 0.3414, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17113232612609863, + "step": 465, + "valid_targets_mean": 5965.8, + "valid_targets_min": 821 + }, + { + "epoch": 0.7648494711147275, + "grad_norm": 0.5967720564945255, + "learning_rate": 3.999050459839389e-05, + "loss": 0.3584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18689490854740143, + "step": 470, + "valid_targets_mean": 5054.9, + "valid_targets_min": 2713 + }, + { + "epoch": 0.772986167615948, + "grad_norm": 0.47232445183793803, + "learning_rate": 3.998784168428657e-05, + "loss": 0.332, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15000829100608826, + "step": 475, + "valid_targets_mean": 8201.6, + "valid_targets_min": 4044 + }, + { + "epoch": 0.7811228641171685, + "grad_norm": 0.621422144424275, + "learning_rate": 3.998485015635677e-05, + "loss": 0.3432, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1805095225572586, + "step": 480, + "valid_targets_mean": 5017.2, + "valid_targets_min": 2360 + }, + { + "epoch": 0.789259560618389, + "grad_norm": 0.5384507298681502, + "learning_rate": 3.998153006378727e-05, + "loss": 0.3606, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14276368916034698, + "step": 485, + "valid_targets_mean": 5560.0, + "valid_targets_min": 3450 + }, + { + "epoch": 0.7973962571196095, + "grad_norm": 0.5979879136613555, + "learning_rate": 3.997788146116267e-05, + "loss": 0.3246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2316283881664276, + "step": 490, + "valid_targets_mean": 6350.8, + "valid_targets_min": 1530 + }, + { + "epoch": 0.80553295362083, + "grad_norm": 0.5101547736138818, + "learning_rate": 3.99739044084685e-05, + "loss": 0.3342, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14997598528862, + "step": 495, + "valid_targets_mean": 5629.4, + "valid_targets_min": 1916 + }, + { + "epoch": 0.8136696501220505, + "grad_norm": 0.5665977624248587, + "learning_rate": 3.9969598971090225e-05, + "loss": 0.3485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16634723544120789, + "step": 500, + "valid_targets_mean": 5539.4, + "valid_targets_min": 2190 + }, + { + "epoch": 0.8218063466232709, + "grad_norm": 0.5440024359106904, + "learning_rate": 3.99649652198122e-05, + "loss": 0.3367, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12725237011909485, + "step": 505, + "valid_targets_mean": 5113.1, + "valid_targets_min": 2708 + }, + { + "epoch": 0.8299430431244914, + "grad_norm": 0.5290865318322414, + "learning_rate": 3.9960003230816456e-05, + "loss": 0.3289, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14404511451721191, + "step": 510, + "valid_targets_mean": 5173.8, + "valid_targets_min": 3136 + }, + { + "epoch": 0.8380797396257119, + "grad_norm": 0.5770982920950211, + "learning_rate": 3.9954713085681504e-05, + "loss": 0.3588, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18526235222816467, + "step": 515, + "valid_targets_mean": 6702.4, + "valid_targets_min": 4946 + }, + { + "epoch": 0.8462164361269324, + "grad_norm": 0.47568964474126807, + "learning_rate": 3.994909487138096e-05, + "loss": 0.329, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14243432879447937, + "step": 520, + "valid_targets_mean": 7471.1, + "valid_targets_min": 1985 + }, + { + "epoch": 0.8543531326281529, + "grad_norm": 0.5983260323228816, + "learning_rate": 3.994314868028212e-05, + "loss": 0.3711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2362203747034073, + "step": 525, + "valid_targets_mean": 6965.2, + "valid_targets_min": 5206 + }, + { + "epoch": 0.8624898291293734, + "grad_norm": 0.5211844138476033, + "learning_rate": 3.9936874610144445e-05, + "loss": 0.3396, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15538473427295685, + "step": 530, + "valid_targets_mean": 5649.0, + "valid_targets_min": 2480 + }, + { + "epoch": 0.870626525630594, + "grad_norm": 0.5747838948995959, + "learning_rate": 3.993027276411793e-05, + "loss": 0.3405, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14920654892921448, + "step": 535, + "valid_targets_mean": 5064.1, + "valid_targets_min": 1328 + }, + { + "epoch": 0.8787632221318145, + "grad_norm": 0.7641456693399412, + "learning_rate": 3.992334325074148e-05, + "loss": 0.3486, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22456279397010803, + "step": 540, + "valid_targets_mean": 4989.8, + "valid_targets_min": 1908 + }, + { + "epoch": 0.886899918633035, + "grad_norm": 0.6130711548025931, + "learning_rate": 3.991608618394102e-05, + "loss": 0.3533, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13783709704875946, + "step": 545, + "valid_targets_mean": 4486.8, + "valid_targets_min": 2494 + }, + { + "epoch": 0.8950366151342555, + "grad_norm": 0.6595580159116637, + "learning_rate": 3.9908501683027726e-05, + "loss": 0.3288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1897541582584381, + "step": 550, + "valid_targets_mean": 4604.0, + "valid_targets_min": 3132 + }, + { + "epoch": 0.903173311635476, + "grad_norm": 0.5968370142818512, + "learning_rate": 3.990058987269597e-05, + "loss": 0.3331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15421295166015625, + "step": 555, + "valid_targets_mean": 5379.5, + "valid_targets_min": 1269 + }, + { + "epoch": 0.9113100081366965, + "grad_norm": 0.5675265201448391, + "learning_rate": 3.9892350883021366e-05, + "loss": 0.3461, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15991877019405365, + "step": 560, + "valid_targets_mean": 5252.5, + "valid_targets_min": 1873 + }, + { + "epoch": 0.919446704637917, + "grad_norm": 0.4953887074657764, + "learning_rate": 3.988378484945853e-05, + "loss": 0.3102, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13573208451271057, + "step": 565, + "valid_targets_mean": 4776.0, + "valid_targets_min": 1040 + }, + { + "epoch": 0.9275834011391375, + "grad_norm": 0.514455820774763, + "learning_rate": 3.987489191283894e-05, + "loss": 0.3416, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1836441606283188, + "step": 570, + "valid_targets_mean": 7845.2, + "valid_targets_min": 4051 + }, + { + "epoch": 0.935720097640358, + "grad_norm": 0.6132417120301569, + "learning_rate": 3.9865672219368574e-05, + "loss": 0.3199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20987677574157715, + "step": 575, + "valid_targets_mean": 7432.6, + "valid_targets_min": 3210 + }, + { + "epoch": 0.9438567941415785, + "grad_norm": 0.5225003165533024, + "learning_rate": 3.98561259206255e-05, + "loss": 0.3344, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13004842400550842, + "step": 580, + "valid_targets_mean": 4851.6, + "valid_targets_min": 873 + }, + { + "epoch": 0.951993490642799, + "grad_norm": 0.4373690128216657, + "learning_rate": 3.984625317355743e-05, + "loss": 0.3281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.152201846241951, + "step": 585, + "valid_targets_mean": 7664.9, + "valid_targets_min": 3641 + }, + { + "epoch": 0.9601301871440195, + "grad_norm": 0.47431856596663735, + "learning_rate": 3.983605414047908e-05, + "loss": 0.3609, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21499404311180115, + "step": 590, + "valid_targets_mean": 7886.2, + "valid_targets_min": 2797 + }, + { + "epoch": 0.96826688364524, + "grad_norm": 0.5715947847050505, + "learning_rate": 3.982552898906956e-05, + "loss": 0.3562, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20815590023994446, + "step": 595, + "valid_targets_mean": 5514.9, + "valid_targets_min": 2081 + }, + { + "epoch": 0.9764035801464606, + "grad_norm": 0.6400810920406435, + "learning_rate": 3.981467789236958e-05, + "loss": 0.3419, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1813117265701294, + "step": 600, + "valid_targets_mean": 5263.9, + "valid_targets_min": 850 + }, + { + "epoch": 0.9845402766476811, + "grad_norm": 0.507069029775432, + "learning_rate": 3.98035010287786e-05, + "loss": 0.3361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15920934081077576, + "step": 605, + "valid_targets_mean": 6134.8, + "valid_targets_min": 1505 + }, + { + "epoch": 0.9926769731489016, + "grad_norm": 0.5784988986931031, + "learning_rate": 3.979199858205192e-05, + "loss": 0.3263, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16542240977287292, + "step": 610, + "valid_targets_mean": 5547.0, + "valid_targets_min": 597 + }, + { + "epoch": 1.0, + "grad_norm": 0.907765371978957, + "learning_rate": 3.9780170741297655e-05, + "loss": 0.3348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36365807056427, + "step": 615, + "valid_targets_mean": 4512.4, + "valid_targets_min": 2120 + }, + { + "epoch": 1.0081366965012204, + "grad_norm": 0.5028868621668272, + "learning_rate": 3.976801770097361e-05, + "loss": 0.32, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1552557647228241, + "step": 620, + "valid_targets_mean": 6936.5, + "valid_targets_min": 4152 + }, + { + "epoch": 1.016273393002441, + "grad_norm": 0.5068697229571411, + "learning_rate": 3.975553966088412e-05, + "loss": 0.3018, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16703470051288605, + "step": 625, + "valid_targets_mean": 6322.5, + "valid_targets_min": 1831 + }, + { + "epoch": 1.0244100895036614, + "grad_norm": 0.6214780240503095, + "learning_rate": 3.9742736826176706e-05, + "loss": 0.3306, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19309839606285095, + "step": 630, + "valid_targets_mean": 5129.1, + "valid_targets_min": 1722 + }, + { + "epoch": 1.032546786004882, + "grad_norm": 0.5206937707201459, + "learning_rate": 3.9729609407338745e-05, + "loss": 0.3294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1733720600605011, + "step": 635, + "valid_targets_mean": 5940.8, + "valid_targets_min": 3375 + }, + { + "epoch": 1.0406834825061024, + "grad_norm": 0.558975152851932, + "learning_rate": 3.971615762019401e-05, + "loss": 0.3404, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17723140120506287, + "step": 640, + "valid_targets_mean": 5387.9, + "valid_targets_min": 844 + }, + { + "epoch": 1.048820179007323, + "grad_norm": 0.52042070753551, + "learning_rate": 3.970238168589911e-05, + "loss": 0.3239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16863638162612915, + "step": 645, + "valid_targets_mean": 5446.0, + "valid_targets_min": 3303 + }, + { + "epoch": 1.0569568755085434, + "grad_norm": 0.52743590283002, + "learning_rate": 3.968828183093984e-05, + "loss": 0.3076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1578497290611267, + "step": 650, + "valid_targets_mean": 6333.2, + "valid_targets_min": 3859 + }, + { + "epoch": 1.065093572009764, + "grad_norm": 0.5709713263916416, + "learning_rate": 3.9673858287127484e-05, + "loss": 0.3016, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13569536805152893, + "step": 655, + "valid_targets_mean": 6296.2, + "valid_targets_min": 4446 + }, + { + "epoch": 1.0732302685109845, + "grad_norm": 0.6034080046446395, + "learning_rate": 3.965911129159501e-05, + "loss": 0.3308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1853494793176651, + "step": 660, + "valid_targets_mean": 5594.5, + "valid_targets_min": 2807 + }, + { + "epoch": 1.081366965012205, + "grad_norm": 0.6783173009548439, + "learning_rate": 3.9644041086793115e-05, + "loss": 0.3322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2208280861377716, + "step": 665, + "valid_targets_mean": 5207.0, + "valid_targets_min": 2639 + }, + { + "epoch": 1.0895036615134255, + "grad_norm": 0.7073358550878341, + "learning_rate": 3.9628647920486313e-05, + "loss": 0.3372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11236638575792313, + "step": 670, + "valid_targets_mean": 4536.8, + "valid_targets_min": 2480 + }, + { + "epoch": 1.097640358014646, + "grad_norm": 0.5573022103538503, + "learning_rate": 3.961293204574881e-05, + "loss": 0.3426, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20048069953918457, + "step": 675, + "valid_targets_mean": 6317.4, + "valid_targets_min": 2446 + }, + { + "epoch": 1.1057770545158665, + "grad_norm": 0.6274818878999071, + "learning_rate": 3.959689372096034e-05, + "loss": 0.3242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19005917012691498, + "step": 680, + "valid_targets_mean": 6330.2, + "valid_targets_min": 3816 + }, + { + "epoch": 1.1139137510170871, + "grad_norm": 0.6054646269903999, + "learning_rate": 3.9580533209802e-05, + "loss": 0.3085, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16152934730052948, + "step": 685, + "valid_targets_mean": 5820.4, + "valid_targets_min": 4284 + }, + { + "epoch": 1.1220504475183075, + "grad_norm": 0.5033565392236059, + "learning_rate": 3.9563850781251785e-05, + "loss": 0.3274, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17019686102867126, + "step": 690, + "valid_targets_mean": 5422.2, + "valid_targets_min": 1910 + }, + { + "epoch": 1.1301871440195281, + "grad_norm": 0.6054316108985657, + "learning_rate": 3.954684670958027e-05, + "loss": 0.3027, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1751125305891037, + "step": 695, + "valid_targets_mean": 5926.9, + "valid_targets_min": 2434 + }, + { + "epoch": 1.1383238405207485, + "grad_norm": 0.5215439424842706, + "learning_rate": 3.9529521274346036e-05, + "loss": 0.3181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1284661889076233, + "step": 700, + "valid_targets_mean": 5178.6, + "valid_targets_min": 3204 + }, + { + "epoch": 1.1464605370219692, + "grad_norm": 0.5678065554082121, + "learning_rate": 3.951187476039114e-05, + "loss": 0.3532, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.209128275513649, + "step": 705, + "valid_targets_mean": 6623.4, + "valid_targets_min": 977 + }, + { + "epoch": 1.1545972335231895, + "grad_norm": 0.517064905402786, + "learning_rate": 3.9493907457836355e-05, + "loss": 0.3157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1468493938446045, + "step": 710, + "valid_targets_mean": 5925.8, + "valid_targets_min": 1771 + }, + { + "epoch": 1.1627339300244102, + "grad_norm": 0.5878396139579618, + "learning_rate": 3.947561966207646e-05, + "loss": 0.3283, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23921839892864227, + "step": 715, + "valid_targets_mean": 7141.8, + "valid_targets_min": 4723 + }, + { + "epoch": 1.1708706265256306, + "grad_norm": 0.6620036041434723, + "learning_rate": 3.945701167377537e-05, + "loss": 0.314, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1856687366962433, + "step": 720, + "valid_targets_mean": 5410.5, + "valid_targets_min": 1093 + }, + { + "epoch": 1.1790073230268512, + "grad_norm": 0.6212164485855235, + "learning_rate": 3.9438083798861145e-05, + "loss": 0.3013, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14771541953086853, + "step": 725, + "valid_targets_mean": 3694.8, + "valid_targets_min": 1040 + }, + { + "epoch": 1.1871440195280716, + "grad_norm": 0.5515816569814562, + "learning_rate": 3.9418836348521045e-05, + "loss": 0.3499, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17307670414447784, + "step": 730, + "valid_targets_mean": 5327.5, + "valid_targets_min": 3287 + }, + { + "epoch": 1.1952807160292922, + "grad_norm": 0.5104440783845825, + "learning_rate": 3.939926963919635e-05, + "loss": 0.3171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1218840628862381, + "step": 735, + "valid_targets_mean": 6123.0, + "valid_targets_min": 1751 + }, + { + "epoch": 1.2034174125305126, + "grad_norm": 0.4651258970343293, + "learning_rate": 3.9379383992577166e-05, + "loss": 0.3294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14117637276649475, + "step": 740, + "valid_targets_mean": 5320.9, + "valid_targets_min": 1282 + }, + { + "epoch": 1.211554109031733, + "grad_norm": 0.5744345931767872, + "learning_rate": 3.9359179735597174e-05, + "loss": 0.3076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1502009928226471, + "step": 745, + "valid_targets_mean": 5779.2, + "valid_targets_min": 1217 + }, + { + "epoch": 1.2196908055329536, + "grad_norm": 0.5866014778192581, + "learning_rate": 3.9338657200428215e-05, + "loss": 0.3116, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15397050976753235, + "step": 750, + "valid_targets_mean": 5608.9, + "valid_targets_min": 2017 + }, + { + "epoch": 1.2278275020341742, + "grad_norm": 0.49106387973213833, + "learning_rate": 3.931781672447482e-05, + "loss": 0.3015, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1318609118461609, + "step": 755, + "valid_targets_mean": 5518.1, + "valid_targets_min": 1324 + }, + { + "epoch": 1.2359641985353946, + "grad_norm": 0.535746593811322, + "learning_rate": 3.9296658650368707e-05, + "loss": 0.3423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20755848288536072, + "step": 760, + "valid_targets_mean": 6305.9, + "valid_targets_min": 2592 + }, + { + "epoch": 1.244100895036615, + "grad_norm": 0.7756294072634289, + "learning_rate": 3.927518332596313e-05, + "loss": 0.3199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17277516424655914, + "step": 765, + "valid_targets_mean": 5248.5, + "valid_targets_min": 1305 + }, + { + "epoch": 1.2522375915378356, + "grad_norm": 0.4743651921197624, + "learning_rate": 3.925339110432716e-05, + "loss": 0.3008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13423749804496765, + "step": 770, + "valid_targets_mean": 6439.6, + "valid_targets_min": 741 + }, + { + "epoch": 1.2603742880390563, + "grad_norm": 0.5214220787724717, + "learning_rate": 3.923128234373984e-05, + "loss": 0.3232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18879708647727966, + "step": 775, + "valid_targets_mean": 7090.8, + "valid_targets_min": 2174 + }, + { + "epoch": 1.2685109845402767, + "grad_norm": 0.5794444612757781, + "learning_rate": 3.9208857407684356e-05, + "loss": 0.3161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12169642746448517, + "step": 780, + "valid_targets_mean": 3770.6, + "valid_targets_min": 850 + }, + { + "epoch": 1.276647681041497, + "grad_norm": 0.9435889725374784, + "learning_rate": 3.918611666484205e-05, + "loss": 0.3243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18906664848327637, + "step": 785, + "valid_targets_mean": 8074.6, + "valid_targets_min": 4774 + }, + { + "epoch": 1.2847843775427177, + "grad_norm": 0.47698101778309643, + "learning_rate": 3.9163060489086305e-05, + "loss": 0.3169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1338808387517929, + "step": 790, + "valid_targets_mean": 5215.4, + "valid_targets_min": 1686 + }, + { + "epoch": 1.292921074043938, + "grad_norm": 0.5413947969675581, + "learning_rate": 3.913968925947647e-05, + "loss": 0.319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09044672548770905, + "step": 795, + "valid_targets_mean": 5740.1, + "valid_targets_min": 1164 + }, + { + "epoch": 1.3010577705451587, + "grad_norm": 0.4708777951982262, + "learning_rate": 3.91160033602516e-05, + "loss": 0.3296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14017099142074585, + "step": 800, + "valid_targets_mean": 5497.0, + "valid_targets_min": 2063 + }, + { + "epoch": 1.309194467046379, + "grad_norm": 0.49062883161248416, + "learning_rate": 3.909200318082409e-05, + "loss": 0.2945, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13735032081604004, + "step": 805, + "valid_targets_mean": 7376.5, + "valid_targets_min": 2311 + }, + { + "epoch": 1.3173311635475997, + "grad_norm": 0.5599551574609924, + "learning_rate": 3.906768911577337e-05, + "loss": 0.3023, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14982812106609344, + "step": 810, + "valid_targets_mean": 5673.0, + "valid_targets_min": 1423 + }, + { + "epoch": 1.3254678600488201, + "grad_norm": 0.5614503755525145, + "learning_rate": 3.9043061564839325e-05, + "loss": 0.3098, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14781512320041656, + "step": 815, + "valid_targets_mean": 4763.9, + "valid_targets_min": 1955 + }, + { + "epoch": 1.3336045565500407, + "grad_norm": 0.4786045979460182, + "learning_rate": 3.901812093291579e-05, + "loss": 0.3153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15768522024154663, + "step": 820, + "valid_targets_mean": 6786.4, + "valid_targets_min": 2828 + }, + { + "epoch": 1.3417412530512611, + "grad_norm": 0.6560977260032284, + "learning_rate": 3.8992867630043855e-05, + "loss": 0.3185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19212278723716736, + "step": 825, + "valid_targets_mean": 6104.2, + "valid_targets_min": 2648 + }, + { + "epoch": 1.3498779495524817, + "grad_norm": 0.6879274724026825, + "learning_rate": 3.896730207140512e-05, + "loss": 0.3315, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1794552505016327, + "step": 830, + "valid_targets_mean": 4482.6, + "valid_targets_min": 2414 + }, + { + "epoch": 1.3580146460537021, + "grad_norm": 0.5203653762780058, + "learning_rate": 3.894142467731492e-05, + "loss": 0.3268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13266748189926147, + "step": 835, + "valid_targets_mean": 5549.0, + "valid_targets_min": 1787 + }, + { + "epoch": 1.3661513425549228, + "grad_norm": 0.5135176764267754, + "learning_rate": 3.891523587321534e-05, + "loss": 0.291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13733965158462524, + "step": 840, + "valid_targets_mean": 4990.9, + "valid_targets_min": 975 + }, + { + "epoch": 1.3742880390561432, + "grad_norm": 0.550811284010829, + "learning_rate": 3.888873608966828e-05, + "loss": 0.3087, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17198342084884644, + "step": 845, + "valid_targets_mean": 6360.2, + "valid_targets_min": 1126 + }, + { + "epoch": 1.3824247355573638, + "grad_norm": 0.46756550131963104, + "learning_rate": 3.886192576234836e-05, + "loss": 0.3135, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16204015910625458, + "step": 850, + "valid_targets_mean": 6772.0, + "valid_targets_min": 1683 + }, + { + "epoch": 1.3905614320585842, + "grad_norm": 0.7407603671535479, + "learning_rate": 3.883480533203574e-05, + "loss": 0.3145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13542041182518005, + "step": 855, + "valid_targets_mean": 5719.1, + "valid_targets_min": 1115 + }, + { + "epoch": 1.3986981285598048, + "grad_norm": 0.49156469399841785, + "learning_rate": 3.880737524460888e-05, + "loss": 0.3135, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15303273499011993, + "step": 860, + "valid_targets_mean": 5483.2, + "valid_targets_min": 1521 + }, + { + "epoch": 1.4068348250610252, + "grad_norm": 0.6176037825800013, + "learning_rate": 3.877963595103725e-05, + "loss": 0.3101, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16262325644493103, + "step": 865, + "valid_targets_mean": 5055.8, + "valid_targets_min": 841 + }, + { + "epoch": 1.4149715215622458, + "grad_norm": 0.48552964622802863, + "learning_rate": 3.875158790737383e-05, + "loss": 0.3196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07722240686416626, + "step": 870, + "valid_targets_mean": 5601.2, + "valid_targets_min": 614 + }, + { + "epoch": 1.4231082180634662, + "grad_norm": 0.5920608654664343, + "learning_rate": 3.87232315747477e-05, + "loss": 0.3416, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2405494749546051, + "step": 875, + "valid_targets_mean": 5470.0, + "valid_targets_min": 3268 + }, + { + "epoch": 1.4312449145646866, + "grad_norm": 0.5377494420110379, + "learning_rate": 3.8694567419356414e-05, + "loss": 0.3561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14981283247470856, + "step": 880, + "valid_targets_mean": 6194.1, + "valid_targets_min": 4085 + }, + { + "epoch": 1.4393816110659072, + "grad_norm": 0.5103348467193803, + "learning_rate": 3.8665595912458346e-05, + "loss": 0.3001, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11243285983800888, + "step": 885, + "valid_targets_mean": 4186.1, + "valid_targets_min": 1883 + }, + { + "epoch": 1.4475183075671278, + "grad_norm": 0.49573235370231433, + "learning_rate": 3.863631753036492e-05, + "loss": 0.3084, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15874309837818146, + "step": 890, + "valid_targets_mean": 5946.5, + "valid_targets_min": 1225 + }, + { + "epoch": 1.4556550040683482, + "grad_norm": 0.6018434178435743, + "learning_rate": 3.860673275443283e-05, + "loss": 0.2917, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1639241725206375, + "step": 895, + "valid_targets_mean": 4885.0, + "valid_targets_min": 576 + }, + { + "epoch": 1.4637917005695686, + "grad_norm": 0.6566529428934831, + "learning_rate": 3.857684207105606e-05, + "loss": 0.3089, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16956907510757446, + "step": 900, + "valid_targets_mean": 5790.5, + "valid_targets_min": 2026 + }, + { + "epoch": 1.4719283970707893, + "grad_norm": 0.6479521917332159, + "learning_rate": 3.854664597165795e-05, + "loss": 0.3106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18413260579109192, + "step": 905, + "valid_targets_mean": 4738.2, + "valid_targets_min": 956 + }, + { + "epoch": 1.4800650935720099, + "grad_norm": 0.6614607029109295, + "learning_rate": 3.851614495268308e-05, + "loss": 0.2903, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12402161210775375, + "step": 910, + "valid_targets_mean": 6095.1, + "valid_targets_min": 1638 + }, + { + "epoch": 1.4882017900732303, + "grad_norm": 0.5271086668666808, + "learning_rate": 3.848533951558912e-05, + "loss": 0.3058, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1627025306224823, + "step": 915, + "valid_targets_mean": 5300.0, + "valid_targets_min": 3448 + }, + { + "epoch": 1.4963384865744507, + "grad_norm": 0.4963158372629004, + "learning_rate": 3.845423016683856e-05, + "loss": 0.2982, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14617714285850525, + "step": 920, + "valid_targets_mean": 5295.8, + "valid_targets_min": 2628 + }, + { + "epoch": 1.5044751830756713, + "grad_norm": 0.5240147926067843, + "learning_rate": 3.842281741789044e-05, + "loss": 0.3, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17961186170578003, + "step": 925, + "valid_targets_mean": 6973.5, + "valid_targets_min": 3698 + }, + { + "epoch": 1.512611879576892, + "grad_norm": 0.5416943235639524, + "learning_rate": 3.839110178519189e-05, + "loss": 0.2817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16294705867767334, + "step": 930, + "valid_targets_mean": 5591.8, + "valid_targets_min": 1248 + }, + { + "epoch": 1.5207485760781123, + "grad_norm": 0.5293043927321138, + "learning_rate": 3.835908379016966e-05, + "loss": 0.3194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18354995548725128, + "step": 935, + "valid_targets_mean": 6090.0, + "valid_targets_min": 2196 + }, + { + "epoch": 1.5288852725793327, + "grad_norm": 0.6655706480297668, + "learning_rate": 3.832676395922153e-05, + "loss": 0.3233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1539248526096344, + "step": 940, + "valid_targets_mean": 5697.2, + "valid_targets_min": 1431 + }, + { + "epoch": 1.5370219690805533, + "grad_norm": 0.4426293049857693, + "learning_rate": 3.82941428237077e-05, + "loss": 0.3199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14680266380310059, + "step": 945, + "valid_targets_mean": 7124.8, + "valid_targets_min": 2558 + }, + { + "epoch": 1.545158665581774, + "grad_norm": 0.5298814060383792, + "learning_rate": 3.826122091994198e-05, + "loss": 0.3119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19289115071296692, + "step": 950, + "valid_targets_mean": 6667.8, + "valid_targets_min": 4416 + }, + { + "epoch": 1.5532953620829943, + "grad_norm": 0.5978964272190899, + "learning_rate": 3.822799878918307e-05, + "loss": 0.2975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1660599559545517, + "step": 955, + "valid_targets_mean": 5468.9, + "valid_targets_min": 2193 + }, + { + "epoch": 1.5614320585842147, + "grad_norm": 0.561888873495537, + "learning_rate": 3.8194476977625556e-05, + "loss": 0.289, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16075164079666138, + "step": 960, + "valid_targets_mean": 5366.9, + "valid_targets_min": 2111 + }, + { + "epoch": 1.5695687550854354, + "grad_norm": 0.4590821633958237, + "learning_rate": 3.8160656036391024e-05, + "loss": 0.2967, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1680944561958313, + "step": 965, + "valid_targets_mean": 5881.0, + "valid_targets_min": 1981 + }, + { + "epoch": 1.577705451586656, + "grad_norm": 0.5016615802728864, + "learning_rate": 3.812653652151893e-05, + "loss": 0.3151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13341429829597473, + "step": 970, + "valid_targets_mean": 5605.9, + "valid_targets_min": 611 + }, + { + "epoch": 1.5858421480878762, + "grad_norm": 0.5543813724974704, + "learning_rate": 3.809211899395749e-05, + "loss": 0.3306, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14888155460357666, + "step": 975, + "valid_targets_mean": 4731.4, + "valid_targets_min": 2004 + }, + { + "epoch": 1.5939788445890968, + "grad_norm": 0.6038340910652956, + "learning_rate": 3.8057404019554464e-05, + "loss": 0.3164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20221802592277527, + "step": 980, + "valid_targets_mean": 5403.4, + "valid_targets_min": 2495 + }, + { + "epoch": 1.6021155410903174, + "grad_norm": 0.5913190155057948, + "learning_rate": 3.802239216904782e-05, + "loss": 0.2934, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12410770356655121, + "step": 985, + "valid_targets_mean": 3962.5, + "valid_targets_min": 617 + }, + { + "epoch": 1.6102522375915378, + "grad_norm": 0.6202617073499859, + "learning_rate": 3.79870840180564e-05, + "loss": 0.3299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21062825620174408, + "step": 990, + "valid_targets_mean": 5342.4, + "valid_targets_min": 2829 + }, + { + "epoch": 1.6183889340927582, + "grad_norm": 0.42689752582314566, + "learning_rate": 3.795148014707042e-05, + "loss": 0.3248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13294441998004913, + "step": 995, + "valid_targets_mean": 6876.8, + "valid_targets_min": 3548 + }, + { + "epoch": 1.6265256305939788, + "grad_norm": 0.5097378796851677, + "learning_rate": 3.791558114144192e-05, + "loss": 0.3253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1804441213607788, + "step": 1000, + "valid_targets_mean": 5285.4, + "valid_targets_min": 492 + }, + { + "epoch": 1.6346623270951994, + "grad_norm": 0.6005808241710359, + "learning_rate": 3.7879387591375174e-05, + "loss": 0.3049, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1667994111776352, + "step": 1005, + "valid_targets_mean": 4509.6, + "valid_targets_min": 1333 + }, + { + "epoch": 1.6427990235964198, + "grad_norm": 0.5231818030054007, + "learning_rate": 3.7842900091916956e-05, + "loss": 0.3145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13126087188720703, + "step": 1010, + "valid_targets_mean": 4980.8, + "valid_targets_min": 1418 + }, + { + "epoch": 1.6509357200976402, + "grad_norm": 0.5627337448855949, + "learning_rate": 3.7806119242946785e-05, + "loss": 0.3003, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15183790028095245, + "step": 1015, + "valid_targets_mean": 4827.0, + "valid_targets_min": 1418 + }, + { + "epoch": 1.6590724165988608, + "grad_norm": 0.5414919926293886, + "learning_rate": 3.7769045649167034e-05, + "loss": 0.3154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13620023429393768, + "step": 1020, + "valid_targets_mean": 5039.8, + "valid_targets_min": 1019 + }, + { + "epoch": 1.6672091131000815, + "grad_norm": 0.7328913562496312, + "learning_rate": 3.7731679920093e-05, + "loss": 0.3137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1815638542175293, + "step": 1025, + "valid_targets_mean": 5956.9, + "valid_targets_min": 3557 + }, + { + "epoch": 1.6753458096013019, + "grad_norm": 0.4817503231027874, + "learning_rate": 3.7694022670042894e-05, + "loss": 0.3071, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1990923136472702, + "step": 1030, + "valid_targets_mean": 8180.1, + "valid_targets_min": 2581 + }, + { + "epoch": 1.6834825061025223, + "grad_norm": 0.5371824559449501, + "learning_rate": 3.765607451812773e-05, + "loss": 0.2847, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14338502287864685, + "step": 1035, + "valid_targets_mean": 6586.2, + "valid_targets_min": 2170 + }, + { + "epoch": 1.6916192026037429, + "grad_norm": 0.5238554557666614, + "learning_rate": 3.7617836088241144e-05, + "loss": 0.3082, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17069703340530396, + "step": 1040, + "valid_targets_mean": 7230.1, + "valid_targets_min": 4015 + }, + { + "epoch": 1.6997558991049635, + "grad_norm": 0.5512994537952781, + "learning_rate": 3.757930800904914e-05, + "loss": 0.3302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18536214530467987, + "step": 1045, + "valid_targets_mean": 5432.5, + "valid_targets_min": 2706 + }, + { + "epoch": 1.707892595606184, + "grad_norm": 0.48107844360810964, + "learning_rate": 3.754049091397976e-05, + "loss": 0.2831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14424890279769897, + "step": 1050, + "valid_targets_mean": 5978.6, + "valid_targets_min": 2666 + }, + { + "epoch": 1.7160292921074043, + "grad_norm": 0.6075024591611888, + "learning_rate": 3.7501385441212664e-05, + "loss": 0.3182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17810899019241333, + "step": 1055, + "valid_targets_mean": 4907.1, + "valid_targets_min": 1817 + }, + { + "epoch": 1.724165988608625, + "grad_norm": 0.5996016937895567, + "learning_rate": 3.746199223366863e-05, + "loss": 0.3143, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1289026141166687, + "step": 1060, + "valid_targets_mean": 5785.2, + "valid_targets_min": 2095 + }, + { + "epoch": 1.7323026851098455, + "grad_norm": 0.5765570637174628, + "learning_rate": 3.7422311938999013e-05, + "loss": 0.3179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16623811423778534, + "step": 1065, + "valid_targets_mean": 4367.6, + "valid_targets_min": 665 + }, + { + "epoch": 1.740439381611066, + "grad_norm": 0.5940920131339937, + "learning_rate": 3.738234520957506e-05, + "loss": 0.3068, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19219470024108887, + "step": 1070, + "valid_targets_mean": 6104.2, + "valid_targets_min": 1766 + }, + { + "epoch": 1.7485760781122863, + "grad_norm": 0.5700715611730844, + "learning_rate": 3.73420927024772e-05, + "loss": 0.3197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19549089670181274, + "step": 1075, + "valid_targets_mean": 5461.0, + "valid_targets_min": 1614 + }, + { + "epoch": 1.756712774613507, + "grad_norm": 0.7143078330637325, + "learning_rate": 3.730155507948426e-05, + "loss": 0.3209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13924923539161682, + "step": 1080, + "valid_targets_mean": 5168.4, + "valid_targets_min": 2990 + }, + { + "epoch": 1.7648494711147276, + "grad_norm": 0.5056362879775232, + "learning_rate": 3.726073300706256e-05, + "loss": 0.3125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2037101835012436, + "step": 1085, + "valid_targets_mean": 7316.2, + "valid_targets_min": 4681 + }, + { + "epoch": 1.772986167615948, + "grad_norm": 0.6236146764427375, + "learning_rate": 3.721962715635495e-05, + "loss": 0.2814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10541586577892303, + "step": 1090, + "valid_targets_mean": 6140.2, + "valid_targets_min": 2053 + }, + { + "epoch": 1.7811228641171684, + "grad_norm": 0.4808526495829943, + "learning_rate": 3.7178238203169804e-05, + "loss": 0.3098, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11800515651702881, + "step": 1095, + "valid_targets_mean": 5675.6, + "valid_targets_min": 3375 + }, + { + "epoch": 1.789259560618389, + "grad_norm": 0.5465172567640042, + "learning_rate": 3.7136566827969895e-05, + "loss": 0.3057, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11254914849996567, + "step": 1100, + "valid_targets_mean": 4710.8, + "valid_targets_min": 3037 + }, + { + "epoch": 1.7973962571196096, + "grad_norm": 0.6709120787275277, + "learning_rate": 3.70946137158612e-05, + "loss": 0.3051, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1213892251253128, + "step": 1105, + "valid_targets_mean": 4375.8, + "valid_targets_min": 767 + }, + { + "epoch": 1.80553295362083, + "grad_norm": 0.5484870209932681, + "learning_rate": 3.705237955658166e-05, + "loss": 0.3045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12435522675514221, + "step": 1110, + "valid_targets_mean": 4072.5, + "valid_targets_min": 628 + }, + { + "epoch": 1.8136696501220504, + "grad_norm": 0.5750687925217363, + "learning_rate": 3.70098650444898e-05, + "loss": 0.3174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17003116011619568, + "step": 1115, + "valid_targets_mean": 5343.2, + "valid_targets_min": 2628 + }, + { + "epoch": 1.821806346623271, + "grad_norm": 0.546589915088346, + "learning_rate": 3.6967070878553346e-05, + "loss": 0.2939, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13826923072338104, + "step": 1120, + "valid_targets_mean": 5285.4, + "valid_targets_min": 1237 + }, + { + "epoch": 1.8299430431244914, + "grad_norm": 0.4773415462163218, + "learning_rate": 3.692399776233775e-05, + "loss": 0.3012, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16702383756637573, + "step": 1125, + "valid_targets_mean": 6414.5, + "valid_targets_min": 913 + }, + { + "epoch": 1.8380797396257118, + "grad_norm": 0.5328441386714764, + "learning_rate": 3.688064640399456e-05, + "loss": 0.3206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19889968633651733, + "step": 1130, + "valid_targets_mean": 5738.4, + "valid_targets_min": 2918 + }, + { + "epoch": 1.8462164361269324, + "grad_norm": 0.6976118740867715, + "learning_rate": 3.683701751624983e-05, + "loss": 0.3086, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18962863087654114, + "step": 1135, + "valid_targets_mean": 4923.5, + "valid_targets_min": 566 + }, + { + "epoch": 1.854353132628153, + "grad_norm": 0.5304211947920925, + "learning_rate": 3.67931118163924e-05, + "loss": 0.3114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14399157464504242, + "step": 1140, + "valid_targets_mean": 4750.4, + "valid_targets_min": 3074 + }, + { + "epoch": 1.8624898291293734, + "grad_norm": 0.5257505288021834, + "learning_rate": 3.674893002626208e-05, + "loss": 0.3281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1668822169303894, + "step": 1145, + "valid_targets_mean": 4587.6, + "valid_targets_min": 2853 + }, + { + "epoch": 1.8706265256305938, + "grad_norm": 0.5378879554200597, + "learning_rate": 3.6704472872237786e-05, + "loss": 0.2895, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13983070850372314, + "step": 1150, + "valid_targets_mean": 4616.9, + "valid_targets_min": 1062 + }, + { + "epoch": 1.8787632221318145, + "grad_norm": 0.4433938061464499, + "learning_rate": 3.665974108522562e-05, + "loss": 0.2943, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13427463173866272, + "step": 1155, + "valid_targets_mean": 6249.2, + "valid_targets_min": 4339 + }, + { + "epoch": 1.886899918633035, + "grad_norm": 0.5206360977809323, + "learning_rate": 3.6614735400646824e-05, + "loss": 0.3296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16344305872917175, + "step": 1160, + "valid_targets_mean": 5424.0, + "valid_targets_min": 1984 + }, + { + "epoch": 1.8950366151342555, + "grad_norm": 0.7386914345471229, + "learning_rate": 3.6569456558425724e-05, + "loss": 0.3082, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1590915322303772, + "step": 1165, + "valid_targets_mean": 4894.9, + "valid_targets_min": 1686 + }, + { + "epoch": 1.9031733116354759, + "grad_norm": 0.5227919706663646, + "learning_rate": 3.6523905302977524e-05, + "loss": 0.2904, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14777085185050964, + "step": 1170, + "valid_targets_mean": 5690.5, + "valid_targets_min": 2053 + }, + { + "epoch": 1.9113100081366965, + "grad_norm": 0.4725060339813615, + "learning_rate": 3.64780823831961e-05, + "loss": 0.3069, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11375989019870758, + "step": 1175, + "valid_targets_mean": 5967.5, + "valid_targets_min": 2992 + }, + { + "epoch": 1.9194467046379171, + "grad_norm": 0.5531722742216661, + "learning_rate": 3.643198855244167e-05, + "loss": 0.3244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16253279149532318, + "step": 1180, + "valid_targets_mean": 4639.1, + "valid_targets_min": 536 + }, + { + "epoch": 1.9275834011391375, + "grad_norm": 0.5643086611406565, + "learning_rate": 3.6385624568528424e-05, + "loss": 0.341, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2203579545021057, + "step": 1185, + "valid_targets_mean": 5916.4, + "valid_targets_min": 2444 + }, + { + "epoch": 1.935720097640358, + "grad_norm": 0.5188039070960107, + "learning_rate": 3.6338991193712045e-05, + "loss": 0.3237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15397492051124573, + "step": 1190, + "valid_targets_mean": 5445.5, + "valid_targets_min": 1156 + }, + { + "epoch": 1.9438567941415785, + "grad_norm": 0.5944670726923176, + "learning_rate": 3.629208919467718e-05, + "loss": 0.276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10873277485370636, + "step": 1195, + "valid_targets_mean": 5434.2, + "valid_targets_min": 1056 + }, + { + "epoch": 1.9519934906427991, + "grad_norm": 0.5820905612331527, + "learning_rate": 3.624491934252487e-05, + "loss": 0.2816, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17353525757789612, + "step": 1200, + "valid_targets_mean": 5973.5, + "valid_targets_min": 2297 + }, + { + "epoch": 1.9601301871440195, + "grad_norm": 0.5328994536277564, + "learning_rate": 3.619748241275981e-05, + "loss": 0.3057, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15726155042648315, + "step": 1205, + "valid_targets_mean": 5639.8, + "valid_targets_min": 2308 + }, + { + "epoch": 1.96826688364524, + "grad_norm": 0.5129359181321838, + "learning_rate": 3.614977918527767e-05, + "loss": 0.3115, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18390318751335144, + "step": 1210, + "valid_targets_mean": 5109.2, + "valid_targets_min": 1159 + }, + { + "epoch": 1.9764035801464606, + "grad_norm": 0.5994511326540063, + "learning_rate": 3.610181044435221e-05, + "loss": 0.2885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13535848259925842, + "step": 1215, + "valid_targets_mean": 3102.2, + "valid_targets_min": 560 + }, + { + "epoch": 1.9845402766476812, + "grad_norm": 0.4870695756000568, + "learning_rate": 3.605357697862242e-05, + "loss": 0.3185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12981772422790527, + "step": 1220, + "valid_targets_mean": 5653.2, + "valid_targets_min": 1754 + }, + { + "epoch": 1.9926769731489016, + "grad_norm": 0.43943406829275067, + "learning_rate": 3.6005079581079545e-05, + "loss": 0.2837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13087867200374603, + "step": 1225, + "valid_targets_mean": 5805.1, + "valid_targets_min": 1258 + }, + { + "epoch": 2.0, + "grad_norm": 0.9516475150367983, + "learning_rate": 3.595631904905406e-05, + "loss": 0.275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2887159287929535, + "step": 1230, + "valid_targets_mean": 6080.8, + "valid_targets_min": 2494 + }, + { + "epoch": 2.0081366965012206, + "grad_norm": 0.6856718094729896, + "learning_rate": 3.590729618420255e-05, + "loss": 0.292, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12657839059829712, + "step": 1235, + "valid_targets_mean": 4212.5, + "valid_targets_min": 1370 + }, + { + "epoch": 2.016273393002441, + "grad_norm": 0.558226531525447, + "learning_rate": 3.585801179249452e-05, + "loss": 0.3125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1799025982618332, + "step": 1240, + "valid_targets_mean": 7292.6, + "valid_targets_min": 3950 + }, + { + "epoch": 2.0244100895036614, + "grad_norm": 0.5075555500011202, + "learning_rate": 3.5808466684199166e-05, + "loss": 0.3104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15327878296375275, + "step": 1245, + "valid_targets_mean": 6603.9, + "valid_targets_min": 2080 + }, + { + "epoch": 2.032546786004882, + "grad_norm": 0.6541052075091615, + "learning_rate": 3.575866167387204e-05, + "loss": 0.2769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10481029748916626, + "step": 1250, + "valid_targets_mean": 4328.6, + "valid_targets_min": 1036 + }, + { + "epoch": 2.0406834825061027, + "grad_norm": 0.5007902862675059, + "learning_rate": 3.570859758034165e-05, + "loss": 0.3047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11358005553483963, + "step": 1255, + "valid_targets_mean": 6206.4, + "valid_targets_min": 966 + }, + { + "epoch": 2.048820179007323, + "grad_norm": 0.4639873782398551, + "learning_rate": 3.565827522669605e-05, + "loss": 0.2798, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18322622776031494, + "step": 1260, + "valid_targets_mean": 7581.5, + "valid_targets_min": 1826 + }, + { + "epoch": 2.0569568755085434, + "grad_norm": 0.47229568303905795, + "learning_rate": 3.5607695440269214e-05, + "loss": 0.2589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11020516604185104, + "step": 1265, + "valid_targets_mean": 5878.0, + "valid_targets_min": 743 + }, + { + "epoch": 2.065093572009764, + "grad_norm": 0.4606387724405939, + "learning_rate": 3.555685905262751e-05, + "loss": 0.2607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12334157526493073, + "step": 1270, + "valid_targets_mean": 7156.1, + "valid_targets_min": 3306 + }, + { + "epoch": 2.0732302685109847, + "grad_norm": 0.5568749724181045, + "learning_rate": 3.5505766899556026e-05, + "loss": 0.283, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18671417236328125, + "step": 1275, + "valid_targets_mean": 5929.2, + "valid_targets_min": 2774 + }, + { + "epoch": 2.081366965012205, + "grad_norm": 0.5320198579082824, + "learning_rate": 3.5454419821044786e-05, + "loss": 0.2846, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1624656319618225, + "step": 1280, + "valid_targets_mean": 5359.9, + "valid_targets_min": 1879 + }, + { + "epoch": 2.0895036615134255, + "grad_norm": 0.4946581187629097, + "learning_rate": 3.540281866127496e-05, + "loss": 0.2829, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.113202303647995, + "step": 1285, + "valid_targets_mean": 4963.8, + "valid_targets_min": 590 + }, + { + "epoch": 2.097640358014646, + "grad_norm": 0.5314804091550819, + "learning_rate": 3.5350964268605006e-05, + "loss": 0.2986, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1298123598098755, + "step": 1290, + "valid_targets_mean": 5789.2, + "valid_targets_min": 524 + }, + { + "epoch": 2.1057770545158667, + "grad_norm": 0.4608248324996564, + "learning_rate": 3.5298857495556684e-05, + "loss": 0.2679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09899343550205231, + "step": 1295, + "valid_targets_mean": 6090.4, + "valid_targets_min": 625 + }, + { + "epoch": 2.113913751017087, + "grad_norm": 0.6160654304320085, + "learning_rate": 3.524649919880108e-05, + "loss": 0.3006, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1561860293149948, + "step": 1300, + "valid_targets_mean": 5319.5, + "valid_targets_min": 2221 + }, + { + "epoch": 2.1220504475183075, + "grad_norm": 0.6034183220777789, + "learning_rate": 3.519389023914449e-05, + "loss": 0.2731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1617409586906433, + "step": 1305, + "valid_targets_mean": 4925.8, + "valid_targets_min": 1622 + }, + { + "epoch": 2.130187144019528, + "grad_norm": 0.5445584399411685, + "learning_rate": 3.5141031481514276e-05, + "loss": 0.2927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15059779584407806, + "step": 1310, + "valid_targets_mean": 4658.4, + "valid_targets_min": 2734 + }, + { + "epoch": 2.1383238405207488, + "grad_norm": 0.5409881257424901, + "learning_rate": 3.508792379494468e-05, + "loss": 0.2813, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1463170051574707, + "step": 1315, + "valid_targets_mean": 5791.5, + "valid_targets_min": 1064 + }, + { + "epoch": 2.146460537021969, + "grad_norm": 1.106581439320117, + "learning_rate": 3.503456805256246e-05, + "loss": 0.2767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1498536616563797, + "step": 1320, + "valid_targets_mean": 5623.2, + "valid_targets_min": 2620 + }, + { + "epoch": 2.1545972335231895, + "grad_norm": 0.4584831942256986, + "learning_rate": 3.4980965131572616e-05, + "loss": 0.2783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14147350192070007, + "step": 1325, + "valid_targets_mean": 5904.6, + "valid_targets_min": 3878 + }, + { + "epoch": 2.16273393002441, + "grad_norm": 0.4492273639183458, + "learning_rate": 3.492711591324392e-05, + "loss": 0.2733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14748775959014893, + "step": 1330, + "valid_targets_mean": 5023.4, + "valid_targets_min": 2861 + }, + { + "epoch": 2.170870626525631, + "grad_norm": 0.5811998512861577, + "learning_rate": 3.487302128289445e-05, + "loss": 0.3088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11501553654670715, + "step": 1335, + "valid_targets_mean": 2852.8, + "valid_targets_min": 977 + }, + { + "epoch": 2.179007323026851, + "grad_norm": 0.4764245873738506, + "learning_rate": 3.481868212987702e-05, + "loss": 0.2818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1383422166109085, + "step": 1340, + "valid_targets_mean": 6408.1, + "valid_targets_min": 2128 + }, + { + "epoch": 2.1871440195280716, + "grad_norm": 0.48558055183022075, + "learning_rate": 3.476409934756456e-05, + "loss": 0.2733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1000407487154007, + "step": 1345, + "valid_targets_mean": 5105.9, + "valid_targets_min": 542 + }, + { + "epoch": 2.195280716029292, + "grad_norm": 0.5180718638517471, + "learning_rate": 3.470927383333544e-05, + "loss": 0.2879, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10666623711585999, + "step": 1350, + "valid_targets_mean": 5258.9, + "valid_targets_min": 3522 + }, + { + "epoch": 2.203417412530513, + "grad_norm": 0.5400230528001939, + "learning_rate": 3.46542064885587e-05, + "loss": 0.3084, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12988999485969543, + "step": 1355, + "valid_targets_mean": 4880.5, + "valid_targets_min": 573 + }, + { + "epoch": 2.211554109031733, + "grad_norm": 0.45852170946065385, + "learning_rate": 3.459889821857926e-05, + "loss": 0.2836, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11199237406253815, + "step": 1360, + "valid_targets_mean": 4982.0, + "valid_targets_min": 310 + }, + { + "epoch": 2.2196908055329536, + "grad_norm": 0.5665127278933965, + "learning_rate": 3.4543349932702984e-05, + "loss": 0.3028, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18964138627052307, + "step": 1365, + "valid_targets_mean": 6463.2, + "valid_targets_min": 3622 + }, + { + "epoch": 2.2278275020341742, + "grad_norm": 0.5118619545403494, + "learning_rate": 3.448756254418179e-05, + "loss": 0.2826, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18084275722503662, + "step": 1370, + "valid_targets_mean": 5036.5, + "valid_targets_min": 1393 + }, + { + "epoch": 2.2359641985353944, + "grad_norm": 0.4567137661572972, + "learning_rate": 3.443153697019861e-05, + "loss": 0.2964, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13496790826320648, + "step": 1375, + "valid_targets_mean": 6337.4, + "valid_targets_min": 2816 + }, + { + "epoch": 2.244100895036615, + "grad_norm": 0.5129895131737416, + "learning_rate": 3.437527413185227e-05, + "loss": 0.294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15807993710041046, + "step": 1380, + "valid_targets_mean": 6726.0, + "valid_targets_min": 1732 + }, + { + "epoch": 2.2522375915378356, + "grad_norm": 0.658861425263113, + "learning_rate": 3.431877495414242e-05, + "loss": 0.295, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.119639553129673, + "step": 1385, + "valid_targets_mean": 5690.6, + "valid_targets_min": 1710 + }, + { + "epoch": 2.2603742880390563, + "grad_norm": 0.5053124575310604, + "learning_rate": 3.42620403659543e-05, + "loss": 0.2872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14893598854541779, + "step": 1390, + "valid_targets_mean": 5824.2, + "valid_targets_min": 741 + }, + { + "epoch": 2.268510984540277, + "grad_norm": 0.5549205480957295, + "learning_rate": 3.420507130004341e-05, + "loss": 0.2929, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14574193954467773, + "step": 1395, + "valid_targets_mean": 5895.8, + "valid_targets_min": 3196 + }, + { + "epoch": 2.276647681041497, + "grad_norm": 0.5169854577549462, + "learning_rate": 3.414786869302029e-05, + "loss": 0.2715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1294897347688675, + "step": 1400, + "valid_targets_mean": 4723.2, + "valid_targets_min": 3238 + }, + { + "epoch": 2.2847843775427177, + "grad_norm": 0.4968472508374019, + "learning_rate": 3.4090433485334996e-05, + "loss": 0.2776, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13790994882583618, + "step": 1405, + "valid_targets_mean": 6752.5, + "valid_targets_min": 2943 + }, + { + "epoch": 2.2929210740439383, + "grad_norm": 0.5185289577994608, + "learning_rate": 3.403276662126173e-05, + "loss": 0.2816, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1724536418914795, + "step": 1410, + "valid_targets_mean": 6443.1, + "valid_targets_min": 1159 + }, + { + "epoch": 2.3010577705451585, + "grad_norm": 0.5649680792199295, + "learning_rate": 3.397486904888328e-05, + "loss": 0.2871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13218429684638977, + "step": 1415, + "valid_targets_mean": 4330.9, + "valid_targets_min": 2339 + }, + { + "epoch": 2.309194467046379, + "grad_norm": 0.5035857907328071, + "learning_rate": 3.391674172007544e-05, + "loss": 0.2884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1316101849079132, + "step": 1420, + "valid_targets_mean": 4778.2, + "valid_targets_min": 2482 + }, + { + "epoch": 2.3173311635475997, + "grad_norm": 0.5303641033831891, + "learning_rate": 3.3858385590491347e-05, + "loss": 0.298, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1569986343383789, + "step": 1425, + "valid_targets_mean": 6596.2, + "valid_targets_min": 4475 + }, + { + "epoch": 2.3254678600488203, + "grad_norm": 0.48585758657147765, + "learning_rate": 3.379980161954578e-05, + "loss": 0.2653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11201968044042587, + "step": 1430, + "valid_targets_mean": 5466.8, + "valid_targets_min": 2472 + }, + { + "epoch": 2.3336045565500405, + "grad_norm": 0.4949945647060311, + "learning_rate": 3.3740990770399404e-05, + "loss": 0.3016, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15348434448242188, + "step": 1435, + "valid_targets_mean": 5463.9, + "valid_targets_min": 2560 + }, + { + "epoch": 2.341741253051261, + "grad_norm": 0.51532488211287, + "learning_rate": 3.368195400994289e-05, + "loss": 0.2675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14001132547855377, + "step": 1440, + "valid_targets_mean": 5797.4, + "valid_targets_min": 3868 + }, + { + "epoch": 2.3498779495524817, + "grad_norm": 0.4954512879924778, + "learning_rate": 3.362269230878107e-05, + "loss": 0.2874, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1684633195400238, + "step": 1445, + "valid_targets_mean": 5250.2, + "valid_targets_min": 484 + }, + { + "epoch": 2.3580146460537024, + "grad_norm": 0.5206552287941425, + "learning_rate": 3.356320664121694e-05, + "loss": 0.3099, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13183483481407166, + "step": 1450, + "valid_targets_mean": 5600.0, + "valid_targets_min": 2037 + }, + { + "epoch": 2.3661513425549225, + "grad_norm": 0.5806830244648058, + "learning_rate": 3.350349798523566e-05, + "loss": 0.2887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11899223923683167, + "step": 1455, + "valid_targets_mean": 4941.5, + "valid_targets_min": 1237 + }, + { + "epoch": 2.374288039056143, + "grad_norm": 0.4835797115209612, + "learning_rate": 3.344356732248849e-05, + "loss": 0.2934, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12682092189788818, + "step": 1460, + "valid_targets_mean": 5932.6, + "valid_targets_min": 2233 + }, + { + "epoch": 2.382424735557364, + "grad_norm": 0.46420895136183626, + "learning_rate": 3.33834156382766e-05, + "loss": 0.2705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1228257492184639, + "step": 1465, + "valid_targets_mean": 6454.4, + "valid_targets_min": 3734 + }, + { + "epoch": 2.3905614320585844, + "grad_norm": 0.4848046054819125, + "learning_rate": 3.332304392153494e-05, + "loss": 0.2776, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13247308135032654, + "step": 1470, + "valid_targets_mean": 5843.1, + "valid_targets_min": 3532 + }, + { + "epoch": 2.3986981285598046, + "grad_norm": 0.5322468446136851, + "learning_rate": 3.326245316481591e-05, + "loss": 0.2844, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14443229138851166, + "step": 1475, + "valid_targets_mean": 5665.5, + "valid_targets_min": 2645 + }, + { + "epoch": 2.406834825061025, + "grad_norm": 0.4953219209876432, + "learning_rate": 3.320164436427311e-05, + "loss": 0.2996, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12060455232858658, + "step": 1480, + "valid_targets_mean": 5264.1, + "valid_targets_min": 3318 + }, + { + "epoch": 2.414971521562246, + "grad_norm": 0.5103794103032024, + "learning_rate": 3.314061851964491e-05, + "loss": 0.2823, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12996214628219604, + "step": 1485, + "valid_targets_mean": 4645.6, + "valid_targets_min": 3056 + }, + { + "epoch": 2.423108218063466, + "grad_norm": 0.48812963792791125, + "learning_rate": 3.307937663423804e-05, + "loss": 0.2775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13980984687805176, + "step": 1490, + "valid_targets_mean": 6567.1, + "valid_targets_min": 3602 + }, + { + "epoch": 2.4312449145646866, + "grad_norm": 0.6781638230036592, + "learning_rate": 3.3017919714911094e-05, + "loss": 0.2775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11503338813781738, + "step": 1495, + "valid_targets_mean": 5187.6, + "valid_targets_min": 598 + }, + { + "epoch": 2.4393816110659072, + "grad_norm": 0.494679876515722, + "learning_rate": 3.295624877205796e-05, + "loss": 0.293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12201376259326935, + "step": 1500, + "valid_targets_mean": 5504.5, + "valid_targets_min": 1291 + }, + { + "epoch": 2.447518307567128, + "grad_norm": 0.6017430889988331, + "learning_rate": 3.2894364819591224e-05, + "loss": 0.2944, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.154220849275589, + "step": 1505, + "valid_targets_mean": 4390.1, + "valid_targets_min": 1214 + }, + { + "epoch": 2.4556550040683485, + "grad_norm": 0.5265681649533256, + "learning_rate": 3.28322688749255e-05, + "loss": 0.3226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14010374248027802, + "step": 1510, + "valid_targets_mean": 4612.5, + "valid_targets_min": 1296 + }, + { + "epoch": 2.4637917005695686, + "grad_norm": 0.551484588945014, + "learning_rate": 3.2769961958960694e-05, + "loss": 0.2759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14503279328346252, + "step": 1515, + "valid_targets_mean": 5862.9, + "valid_targets_min": 2141 + }, + { + "epoch": 2.4719283970707893, + "grad_norm": 0.5213608708077778, + "learning_rate": 3.270744509606523e-05, + "loss": 0.2875, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12514446675777435, + "step": 1520, + "valid_targets_mean": 5143.2, + "valid_targets_min": 2810 + }, + { + "epoch": 2.48006509357201, + "grad_norm": 0.5609869887661976, + "learning_rate": 3.26447193140592e-05, + "loss": 0.2815, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16480261087417603, + "step": 1525, + "valid_targets_mean": 4906.8, + "valid_targets_min": 2091 + }, + { + "epoch": 2.48820179007323, + "grad_norm": 0.45622929795341044, + "learning_rate": 3.2581785644197456e-05, + "loss": 0.2833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10129719972610474, + "step": 1530, + "valid_targets_mean": 4956.2, + "valid_targets_min": 2164 + }, + { + "epoch": 2.4963384865744507, + "grad_norm": 0.5164564778909829, + "learning_rate": 3.251864512115271e-05, + "loss": 0.3104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2165476679801941, + "step": 1535, + "valid_targets_mean": 6606.1, + "valid_targets_min": 2363 + }, + { + "epoch": 2.5044751830756713, + "grad_norm": 0.3983072775801996, + "learning_rate": 3.2455298782998424e-05, + "loss": 0.2737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10032185167074203, + "step": 1540, + "valid_targets_mean": 8497.1, + "valid_targets_min": 2843 + }, + { + "epoch": 2.512611879576892, + "grad_norm": 0.5732682239726878, + "learning_rate": 3.2391747671191854e-05, + "loss": 0.256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12304198741912842, + "step": 1545, + "valid_targets_mean": 6310.2, + "valid_targets_min": 2146 + }, + { + "epoch": 2.5207485760781125, + "grad_norm": 0.48301651904457216, + "learning_rate": 3.2327992830556846e-05, + "loss": 0.293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1629238873720169, + "step": 1550, + "valid_targets_mean": 5748.8, + "valid_targets_min": 845 + }, + { + "epoch": 2.5288852725793327, + "grad_norm": 0.4455251023255143, + "learning_rate": 3.22640353092667e-05, + "loss": 0.3035, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1761193573474884, + "step": 1555, + "valid_targets_mean": 5489.5, + "valid_targets_min": 1059 + }, + { + "epoch": 2.5370219690805533, + "grad_norm": 0.4809788733224243, + "learning_rate": 3.2199876158826915e-05, + "loss": 0.2979, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1458398401737213, + "step": 1560, + "valid_targets_mean": 7149.8, + "valid_targets_min": 3817 + }, + { + "epoch": 2.545158665581774, + "grad_norm": 0.45307852053685194, + "learning_rate": 3.2135516434057915e-05, + "loss": 0.2907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11577823013067245, + "step": 1565, + "valid_targets_mean": 5393.9, + "valid_targets_min": 2233 + }, + { + "epoch": 2.553295362082994, + "grad_norm": 0.49760950499062984, + "learning_rate": 3.2070957193077705e-05, + "loss": 0.2756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12272712588310242, + "step": 1570, + "valid_targets_mean": 4861.1, + "valid_targets_min": 3252 + }, + { + "epoch": 2.5614320585842147, + "grad_norm": 0.5355275643102434, + "learning_rate": 3.200619949728448e-05, + "loss": 0.2694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1525430828332901, + "step": 1575, + "valid_targets_mean": 6466.9, + "valid_targets_min": 3548 + }, + { + "epoch": 2.5695687550854354, + "grad_norm": 0.5018072667233595, + "learning_rate": 3.194124441133916e-05, + "loss": 0.2818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14310288429260254, + "step": 1580, + "valid_targets_mean": 4550.6, + "valid_targets_min": 2784 + }, + { + "epoch": 2.577705451586656, + "grad_norm": 0.5173938528400699, + "learning_rate": 3.187609300314789e-05, + "loss": 0.3062, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13510233163833618, + "step": 1585, + "valid_targets_mean": 5440.2, + "valid_targets_min": 923 + }, + { + "epoch": 2.585842148087876, + "grad_norm": 0.5159504691075016, + "learning_rate": 3.181074634384451e-05, + "loss": 0.2891, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14067131280899048, + "step": 1590, + "valid_targets_mean": 5820.6, + "valid_targets_min": 3059 + }, + { + "epoch": 2.5939788445890968, + "grad_norm": 0.5405735538422601, + "learning_rate": 3.1745205507772876e-05, + "loss": 0.2818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12036160379648209, + "step": 1595, + "valid_targets_mean": 5669.1, + "valid_targets_min": 2723 + }, + { + "epoch": 2.6021155410903174, + "grad_norm": 0.7407500991451971, + "learning_rate": 3.16794715724693e-05, + "loss": 0.2834, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15827390551567078, + "step": 1600, + "valid_targets_mean": 5583.9, + "valid_targets_min": 2685 + }, + { + "epoch": 2.6102522375915376, + "grad_norm": 0.5099598998443594, + "learning_rate": 3.161354561864474e-05, + "loss": 0.2981, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1440209299325943, + "step": 1605, + "valid_targets_mean": 5326.2, + "valid_targets_min": 1707 + }, + { + "epoch": 2.618388934092758, + "grad_norm": 0.5479785273711072, + "learning_rate": 3.154742873016707e-05, + "loss": 0.2871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13522969186306, + "step": 1610, + "valid_targets_mean": 5222.2, + "valid_targets_min": 1303 + }, + { + "epoch": 2.626525630593979, + "grad_norm": 0.47481378459752427, + "learning_rate": 3.14811219940433e-05, + "loss": 0.272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13894972205162048, + "step": 1615, + "valid_targets_mean": 6755.9, + "valid_targets_min": 3830 + }, + { + "epoch": 2.6346623270951994, + "grad_norm": 0.4650084315327305, + "learning_rate": 3.141462650040161e-05, + "loss": 0.2854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1224336326122284, + "step": 1620, + "valid_targets_mean": 5373.9, + "valid_targets_min": 1348 + }, + { + "epoch": 2.64279902359642, + "grad_norm": 0.5401018992903237, + "learning_rate": 3.134794334247351e-05, + "loss": 0.31, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1545572429895401, + "step": 1625, + "valid_targets_mean": 5654.0, + "valid_targets_min": 1794 + }, + { + "epoch": 2.6509357200976402, + "grad_norm": 0.48016953909282284, + "learning_rate": 3.1281073616575856e-05, + "loss": 0.2915, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13257913291454315, + "step": 1630, + "valid_targets_mean": 5957.4, + "valid_targets_min": 3444 + }, + { + "epoch": 2.659072416598861, + "grad_norm": 0.5599972237868494, + "learning_rate": 3.121401842209279e-05, + "loss": 0.2747, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22545486688613892, + "step": 1635, + "valid_targets_mean": 6662.0, + "valid_targets_min": 3924 + }, + { + "epoch": 2.6672091131000815, + "grad_norm": 0.5099869798156484, + "learning_rate": 3.114677886145768e-05, + "loss": 0.2907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15294325351715088, + "step": 1640, + "valid_targets_mean": 5627.6, + "valid_targets_min": 2398 + }, + { + "epoch": 2.6753458096013016, + "grad_norm": 0.5577431582136917, + "learning_rate": 3.107935604013501e-05, + "loss": 0.2957, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1127748042345047, + "step": 1645, + "valid_targets_mean": 4538.1, + "valid_targets_min": 1505 + }, + { + "epoch": 2.6834825061025223, + "grad_norm": 0.5463423029256798, + "learning_rate": 3.101175106660219e-05, + "loss": 0.3001, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1513296216726303, + "step": 1650, + "valid_targets_mean": 5564.5, + "valid_targets_min": 1164 + }, + { + "epoch": 2.691619202603743, + "grad_norm": 0.6027625180902552, + "learning_rate": 3.094396505233135e-05, + "loss": 0.3009, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1423185020685196, + "step": 1655, + "valid_targets_mean": 4589.9, + "valid_targets_min": 1015 + }, + { + "epoch": 2.6997558991049635, + "grad_norm": 0.43885668468200845, + "learning_rate": 3.087599911177103e-05, + "loss": 0.2798, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12957511842250824, + "step": 1660, + "valid_targets_mean": 7121.9, + "valid_targets_min": 4396 + }, + { + "epoch": 2.707892595606184, + "grad_norm": 0.5377034711492127, + "learning_rate": 3.0807854362327906e-05, + "loss": 0.3039, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14502663910388947, + "step": 1665, + "valid_targets_mean": 5456.5, + "valid_targets_min": 3570 + }, + { + "epoch": 2.7160292921074043, + "grad_norm": 0.5001336173501958, + "learning_rate": 3.073953192434837e-05, + "loss": 0.2666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12598839402198792, + "step": 1670, + "valid_targets_mean": 4810.9, + "valid_targets_min": 873 + }, + { + "epoch": 2.724165988608625, + "grad_norm": 0.5220789996194123, + "learning_rate": 3.067103292110017e-05, + "loss": 0.2806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14378760755062103, + "step": 1675, + "valid_targets_mean": 4979.9, + "valid_targets_min": 2252 + }, + { + "epoch": 2.7323026851098455, + "grad_norm": 0.5162691594592334, + "learning_rate": 3.060235847875387e-05, + "loss": 0.3052, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1796841025352478, + "step": 1680, + "valid_targets_mean": 6276.2, + "valid_targets_min": 521 + }, + { + "epoch": 2.7404393816110657, + "grad_norm": 0.48029341615912935, + "learning_rate": 3.05335097263644e-05, + "loss": 0.2659, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.125966876745224, + "step": 1685, + "valid_targets_mean": 5720.2, + "valid_targets_min": 1338 + }, + { + "epoch": 2.7485760781122863, + "grad_norm": 0.4803595848509251, + "learning_rate": 3.0464487795852463e-05, + "loss": 0.2674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09926225244998932, + "step": 1690, + "valid_targets_mean": 5128.4, + "valid_targets_min": 1470 + }, + { + "epoch": 2.756712774613507, + "grad_norm": 0.5106278700225423, + "learning_rate": 3.0395293821985906e-05, + "loss": 0.2894, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12691687047481537, + "step": 1695, + "valid_targets_mean": 4647.5, + "valid_targets_min": 1087 + }, + { + "epoch": 2.7648494711147276, + "grad_norm": 0.61727008379644, + "learning_rate": 3.032592894236112e-05, + "loss": 0.319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1375855952501297, + "step": 1700, + "valid_targets_mean": 4063.8, + "valid_targets_min": 1345 + }, + { + "epoch": 2.772986167615948, + "grad_norm": 0.5220802075309688, + "learning_rate": 3.0256394297384273e-05, + "loss": 0.256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1398482620716095, + "step": 1705, + "valid_targets_mean": 5660.1, + "valid_targets_min": 3984 + }, + { + "epoch": 2.7811228641171684, + "grad_norm": 0.5238354995617224, + "learning_rate": 3.0186691030252614e-05, + "loss": 0.2855, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.132085919380188, + "step": 1710, + "valid_targets_mean": 5220.8, + "valid_targets_min": 801 + }, + { + "epoch": 2.789259560618389, + "grad_norm": 0.499241003810301, + "learning_rate": 3.0116820286935654e-05, + "loss": 0.2921, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.177951842546463, + "step": 1715, + "valid_targets_mean": 5724.1, + "valid_targets_min": 2484 + }, + { + "epoch": 2.7973962571196096, + "grad_norm": 0.575750437435253, + "learning_rate": 3.0046783216156315e-05, + "loss": 0.29, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12942825257778168, + "step": 1720, + "valid_targets_mean": 4648.0, + "valid_targets_min": 2633 + }, + { + "epoch": 2.8055329536208298, + "grad_norm": 0.5045556071219864, + "learning_rate": 2.997658096937207e-05, + "loss": 0.2791, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12483763694763184, + "step": 1725, + "valid_targets_mean": 5357.9, + "valid_targets_min": 895 + }, + { + "epoch": 2.8136696501220504, + "grad_norm": 0.684348188778152, + "learning_rate": 2.990621470075598e-05, + "loss": 0.2684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1319955587387085, + "step": 1730, + "valid_targets_mean": 5457.0, + "valid_targets_min": 1374 + }, + { + "epoch": 2.821806346623271, + "grad_norm": 0.5671325911785545, + "learning_rate": 2.9835685567177763e-05, + "loss": 0.2708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1548902690410614, + "step": 1735, + "valid_targets_mean": 5260.6, + "valid_targets_min": 956 + }, + { + "epoch": 2.8299430431244916, + "grad_norm": 0.4629636978551261, + "learning_rate": 2.9764994728184725e-05, + "loss": 0.2788, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15818393230438232, + "step": 1740, + "valid_targets_mean": 6452.5, + "valid_targets_min": 3759 + }, + { + "epoch": 2.838079739625712, + "grad_norm": 0.5953069122381895, + "learning_rate": 2.9694143345982732e-05, + "loss": 0.3008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17098405957221985, + "step": 1745, + "valid_targets_mean": 5593.5, + "valid_targets_min": 372 + }, + { + "epoch": 2.8462164361269324, + "grad_norm": 0.45172986591859304, + "learning_rate": 2.9623132585417096e-05, + "loss": 0.2713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13527032732963562, + "step": 1750, + "valid_targets_mean": 6704.2, + "valid_targets_min": 1353 + }, + { + "epoch": 2.854353132628153, + "grad_norm": 0.4975648396200817, + "learning_rate": 2.9551963613953404e-05, + "loss": 0.2949, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12178505957126617, + "step": 1755, + "valid_targets_mean": 6038.4, + "valid_targets_min": 4521 + }, + { + "epoch": 2.862489829129373, + "grad_norm": 0.5414519194121625, + "learning_rate": 2.948063760165835e-05, + "loss": 0.2837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12081147730350494, + "step": 1760, + "valid_targets_mean": 4120.1, + "valid_targets_min": 537 + }, + { + "epoch": 2.870626525630594, + "grad_norm": 0.4366067141691184, + "learning_rate": 2.9409155721180477e-05, + "loss": 0.27, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1350255012512207, + "step": 1765, + "valid_targets_mean": 6225.4, + "valid_targets_min": 3036 + }, + { + "epoch": 2.8787632221318145, + "grad_norm": 0.5500766155760595, + "learning_rate": 2.9337519147730918e-05, + "loss": 0.2847, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10548760741949081, + "step": 1770, + "valid_targets_mean": 4567.8, + "valid_targets_min": 1523 + }, + { + "epoch": 2.886899918633035, + "grad_norm": 0.4435904826881841, + "learning_rate": 2.9265729059064054e-05, + "loss": 0.2812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13118746876716614, + "step": 1775, + "valid_targets_mean": 6306.4, + "valid_targets_min": 3141 + }, + { + "epoch": 2.8950366151342557, + "grad_norm": 0.7587627287786612, + "learning_rate": 2.9193786635458178e-05, + "loss": 0.2841, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18454968929290771, + "step": 1780, + "valid_targets_mean": 5579.1, + "valid_targets_min": 1106 + }, + { + "epoch": 2.903173311635476, + "grad_norm": 0.4897525087855841, + "learning_rate": 2.912169305969605e-05, + "loss": 0.2812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13022705912590027, + "step": 1785, + "valid_targets_mean": 6038.9, + "valid_targets_min": 3288 + }, + { + "epoch": 2.9113100081366965, + "grad_norm": 0.3896829782222166, + "learning_rate": 2.9049449517045497e-05, + "loss": 0.2604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11002755165100098, + "step": 1790, + "valid_targets_mean": 6118.0, + "valid_targets_min": 2338 + }, + { + "epoch": 2.919446704637917, + "grad_norm": 0.3784633224945294, + "learning_rate": 2.89770571952399e-05, + "loss": 0.2515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11330070346593857, + "step": 1795, + "valid_targets_mean": 8400.2, + "valid_targets_min": 4155 + }, + { + "epoch": 2.9275834011391373, + "grad_norm": 0.49491264882028485, + "learning_rate": 2.890451728445866e-05, + "loss": 0.293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18684348464012146, + "step": 1800, + "valid_targets_mean": 6696.6, + "valid_targets_min": 3861 + }, + { + "epoch": 2.935720097640358, + "grad_norm": 0.47181247576364993, + "learning_rate": 2.8831830977307644e-05, + "loss": 0.2784, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16414770483970642, + "step": 1805, + "valid_targets_mean": 6644.9, + "valid_targets_min": 1462 + }, + { + "epoch": 2.9438567941415785, + "grad_norm": 0.5155396891551823, + "learning_rate": 2.8758999468799594e-05, + "loss": 0.2881, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15796388685703278, + "step": 1810, + "valid_targets_mean": 4846.5, + "valid_targets_min": 1632 + }, + { + "epoch": 2.951993490642799, + "grad_norm": 0.4514689977307621, + "learning_rate": 2.868602395633444e-05, + "loss": 0.2855, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19608153402805328, + "step": 1815, + "valid_targets_mean": 7106.8, + "valid_targets_min": 3883 + }, + { + "epoch": 2.9601301871440198, + "grad_norm": 0.5846021256028041, + "learning_rate": 2.861290563967965e-05, + "loss": 0.315, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0958099514245987, + "step": 1820, + "valid_targets_mean": 2989.9, + "valid_targets_min": 1378 + }, + { + "epoch": 2.96826688364524, + "grad_norm": 0.4539261325479636, + "learning_rate": 2.8539645720950474e-05, + "loss": 0.2935, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15887710452079773, + "step": 1825, + "valid_targets_mean": 5430.4, + "valid_targets_min": 2053 + }, + { + "epoch": 2.9764035801464606, + "grad_norm": 0.5167204311608852, + "learning_rate": 2.8466245404590226e-05, + "loss": 0.3094, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17045152187347412, + "step": 1830, + "valid_targets_mean": 6415.8, + "valid_targets_min": 2274 + }, + { + "epoch": 2.984540276647681, + "grad_norm": 0.4910802890132026, + "learning_rate": 2.8392705897350425e-05, + "loss": 0.2902, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15125080943107605, + "step": 1835, + "valid_targets_mean": 5811.2, + "valid_targets_min": 2158 + }, + { + "epoch": 2.9926769731489014, + "grad_norm": 0.46505504808407744, + "learning_rate": 2.8319028408270983e-05, + "loss": 0.2751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15869152545928955, + "step": 1840, + "valid_targets_mean": 7832.1, + "valid_targets_min": 3792 + }, + { + "epoch": 3.0, + "grad_norm": 0.7018093322068503, + "learning_rate": 2.8245214148660364e-05, + "loss": 0.3069, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32103702425956726, + "step": 1845, + "valid_targets_mean": 5602.2, + "valid_targets_min": 2942 + }, + { + "epoch": 3.0081366965012206, + "grad_norm": 0.557545342235143, + "learning_rate": 2.8171264332075588e-05, + "loss": 0.256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12069088220596313, + "step": 1850, + "valid_targets_mean": 4822.1, + "valid_targets_min": 3071 + }, + { + "epoch": 3.016273393002441, + "grad_norm": 0.5055444788767136, + "learning_rate": 2.809718017430236e-05, + "loss": 0.246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1308608502149582, + "step": 1855, + "valid_targets_mean": 5950.2, + "valid_targets_min": 2493 + }, + { + "epoch": 3.0244100895036614, + "grad_norm": 0.564974769389222, + "learning_rate": 2.8022962893335023e-05, + "loss": 0.2619, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13714183866977692, + "step": 1860, + "valid_targets_mean": 4790.5, + "valid_targets_min": 1710 + }, + { + "epoch": 3.032546786004882, + "grad_norm": 0.5368918165884716, + "learning_rate": 2.7948613709356565e-05, + "loss": 0.2881, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11848818510770798, + "step": 1865, + "valid_targets_mean": 5709.6, + "valid_targets_min": 1869 + }, + { + "epoch": 3.0406834825061027, + "grad_norm": 0.6202460762235937, + "learning_rate": 2.7874133844718557e-05, + "loss": 0.2756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15099003911018372, + "step": 1870, + "valid_targets_mean": 5946.1, + "valid_targets_min": 3788 + }, + { + "epoch": 3.048820179007323, + "grad_norm": 0.47945587720363, + "learning_rate": 2.7799524523921038e-05, + "loss": 0.2634, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14982934296131134, + "step": 1875, + "valid_targets_mean": 4646.1, + "valid_targets_min": 1010 + }, + { + "epoch": 3.0569568755085434, + "grad_norm": 0.5981553209252303, + "learning_rate": 2.77247869735924e-05, + "loss": 0.2785, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13525862991809845, + "step": 1880, + "valid_targets_mean": 5607.5, + "valid_targets_min": 3276 + }, + { + "epoch": 3.065093572009764, + "grad_norm": 0.5197959262649877, + "learning_rate": 2.764992242246921e-05, + "loss": 0.2799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12702880799770355, + "step": 1885, + "valid_targets_mean": 5396.0, + "valid_targets_min": 2859 + }, + { + "epoch": 3.0732302685109847, + "grad_norm": 0.4896279271559394, + "learning_rate": 2.7574932101376034e-05, + "loss": 0.2628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10663357377052307, + "step": 1890, + "valid_targets_mean": 5472.4, + "valid_targets_min": 2781 + }, + { + "epoch": 3.081366965012205, + "grad_norm": 0.6197885493966871, + "learning_rate": 2.749981724320516e-05, + "loss": 0.2829, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14642459154129028, + "step": 1895, + "valid_targets_mean": 4172.6, + "valid_targets_min": 867 + }, + { + "epoch": 3.0895036615134255, + "grad_norm": 0.4909453144584929, + "learning_rate": 2.7424579082896357e-05, + "loss": 0.2836, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1485298126935959, + "step": 1900, + "valid_targets_mean": 5413.5, + "valid_targets_min": 2571 + }, + { + "epoch": 3.097640358014646, + "grad_norm": 0.4913841836660842, + "learning_rate": 2.7349218857416587e-05, + "loss": 0.2716, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16140612959861755, + "step": 1905, + "valid_targets_mean": 6672.6, + "valid_targets_min": 1381 + }, + { + "epoch": 3.1057770545158667, + "grad_norm": 0.5286751495746139, + "learning_rate": 2.7273737805739614e-05, + "loss": 0.2696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09970603883266449, + "step": 1910, + "valid_targets_mean": 4983.8, + "valid_targets_min": 926 + }, + { + "epoch": 3.113913751017087, + "grad_norm": 0.4922716591309361, + "learning_rate": 2.719813716882569e-05, + "loss": 0.2483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1322004795074463, + "step": 1915, + "valid_targets_mean": 7345.9, + "valid_targets_min": 4037 + }, + { + "epoch": 3.1220504475183075, + "grad_norm": 0.4932161067767006, + "learning_rate": 2.7122418189601118e-05, + "loss": 0.2599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12052470445632935, + "step": 1920, + "valid_targets_mean": 5224.1, + "valid_targets_min": 3445 + }, + { + "epoch": 3.130187144019528, + "grad_norm": 0.49943776929619677, + "learning_rate": 2.7046582112937837e-05, + "loss": 0.2513, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13334211707115173, + "step": 1925, + "valid_targets_mean": 7433.6, + "valid_targets_min": 1974 + }, + { + "epoch": 3.1383238405207488, + "grad_norm": 0.44357138068663077, + "learning_rate": 2.697063018563295e-05, + "loss": 0.2647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11824844032526016, + "step": 1930, + "valid_targets_mean": 6402.1, + "valid_targets_min": 3582 + }, + { + "epoch": 3.146460537021969, + "grad_norm": 0.6216470540080042, + "learning_rate": 2.6894563656388217e-05, + "loss": 0.2757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16654358804225922, + "step": 1935, + "valid_targets_mean": 4445.5, + "valid_targets_min": 652 + }, + { + "epoch": 3.1545972335231895, + "grad_norm": 0.5203431916047961, + "learning_rate": 2.681838377578954e-05, + "loss": 0.2685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13338005542755127, + "step": 1940, + "valid_targets_mean": 6740.8, + "valid_targets_min": 3477 + }, + { + "epoch": 3.16273393002441, + "grad_norm": 0.548730057587269, + "learning_rate": 2.6742091796286388e-05, + "loss": 0.2799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14711609482765198, + "step": 1945, + "valid_targets_mean": 5364.9, + "valid_targets_min": 1328 + }, + { + "epoch": 3.170870626525631, + "grad_norm": 0.5303267033016945, + "learning_rate": 2.6665688972171215e-05, + "loss": 0.2777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1701548993587494, + "step": 1950, + "valid_targets_mean": 5727.0, + "valid_targets_min": 4151 + }, + { + "epoch": 3.179007323026851, + "grad_norm": 0.5676758975951339, + "learning_rate": 2.658917655955884e-05, + "loss": 0.2623, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15539327263832092, + "step": 1955, + "valid_targets_mean": 4853.1, + "valid_targets_min": 2899 + }, + { + "epoch": 3.1871440195280716, + "grad_norm": 0.4484115372218612, + "learning_rate": 2.651255581636578e-05, + "loss": 0.2759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16180385649204254, + "step": 1960, + "valid_targets_mean": 7725.1, + "valid_targets_min": 4420 + }, + { + "epoch": 3.195280716029292, + "grad_norm": 0.5105638327238713, + "learning_rate": 2.6435828002289596e-05, + "loss": 0.2798, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12511882185935974, + "step": 1965, + "valid_targets_mean": 5482.6, + "valid_targets_min": 2480 + }, + { + "epoch": 3.203417412530513, + "grad_norm": 0.45443106401153505, + "learning_rate": 2.6358994378788163e-05, + "loss": 0.2499, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1377660185098648, + "step": 1970, + "valid_targets_mean": 6621.2, + "valid_targets_min": 3045 + }, + { + "epoch": 3.211554109031733, + "grad_norm": 0.5569701717755269, + "learning_rate": 2.6282056209058936e-05, + "loss": 0.2908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12813882529735565, + "step": 1975, + "valid_targets_mean": 4760.6, + "valid_targets_min": 1870 + }, + { + "epoch": 3.2196908055329536, + "grad_norm": 0.5403643934008627, + "learning_rate": 2.6205014758018176e-05, + "loss": 0.2731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.126073956489563, + "step": 1980, + "valid_targets_mean": 5044.4, + "valid_targets_min": 3035 + }, + { + "epoch": 3.2278275020341742, + "grad_norm": 0.4746581189512608, + "learning_rate": 2.6127871292280165e-05, + "loss": 0.2654, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11351381242275238, + "step": 1985, + "valid_targets_mean": 6290.9, + "valid_targets_min": 1744 + }, + { + "epoch": 3.2359641985353944, + "grad_norm": 0.5001109013797744, + "learning_rate": 2.6050627080136376e-05, + "loss": 0.2457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1430266797542572, + "step": 1990, + "valid_targets_mean": 4843.6, + "valid_targets_min": 2324 + }, + { + "epoch": 3.244100895036615, + "grad_norm": 0.4495739039013548, + "learning_rate": 2.5973283391534615e-05, + "loss": 0.2627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09228505194187164, + "step": 1995, + "valid_targets_mean": 4507.0, + "valid_targets_min": 2149 + }, + { + "epoch": 3.2522375915378356, + "grad_norm": 0.48980120047593206, + "learning_rate": 2.589584149805817e-05, + "loss": 0.2619, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13319309055805206, + "step": 2000, + "valid_targets_mean": 7194.9, + "valid_targets_min": 977 + }, + { + "epoch": 3.2603742880390563, + "grad_norm": 0.4812607915960625, + "learning_rate": 2.581830267290486e-05, + "loss": 0.2546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1082778349518776, + "step": 2005, + "valid_targets_mean": 6961.8, + "valid_targets_min": 3516 + }, + { + "epoch": 3.268510984540277, + "grad_norm": 0.570112545383423, + "learning_rate": 2.574066819086613e-05, + "loss": 0.2555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.134844571352005, + "step": 2010, + "valid_targets_mean": 4588.6, + "valid_targets_min": 1036 + }, + { + "epoch": 3.276647681041497, + "grad_norm": 0.4362815156888107, + "learning_rate": 2.5662939328306113e-05, + "loss": 0.2709, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16294899582862854, + "step": 2015, + "valid_targets_mean": 8057.6, + "valid_targets_min": 2510 + }, + { + "epoch": 3.2847843775427177, + "grad_norm": 0.4089077124565424, + "learning_rate": 2.5585117363140592e-05, + "loss": 0.2671, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12175396829843521, + "step": 2020, + "valid_targets_mean": 6774.1, + "valid_targets_min": 3456 + }, + { + "epoch": 3.2929210740439383, + "grad_norm": 0.502187014362583, + "learning_rate": 2.5507203574816043e-05, + "loss": 0.2777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16614243388175964, + "step": 2025, + "valid_targets_mean": 6135.8, + "valid_targets_min": 2329 + }, + { + "epoch": 3.3010577705451585, + "grad_norm": 0.4730402208775795, + "learning_rate": 2.542919924428856e-05, + "loss": 0.2754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16762784123420715, + "step": 2030, + "valid_targets_mean": 5869.8, + "valid_targets_min": 1504 + }, + { + "epoch": 3.309194467046379, + "grad_norm": 0.5339111572346003, + "learning_rate": 2.5351105654002838e-05, + "loss": 0.2747, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1370839774608612, + "step": 2035, + "valid_targets_mean": 5234.9, + "valid_targets_min": 2162 + }, + { + "epoch": 3.3173311635475997, + "grad_norm": 0.6413302863152066, + "learning_rate": 2.527292408787104e-05, + "loss": 0.2634, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16920380294322968, + "step": 2040, + "valid_targets_mean": 4609.8, + "valid_targets_min": 1040 + }, + { + "epoch": 3.3254678600488203, + "grad_norm": 0.470230236442957, + "learning_rate": 2.5194655831251712e-05, + "loss": 0.2456, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12207140028476715, + "step": 2045, + "valid_targets_mean": 5943.4, + "valid_targets_min": 2111 + }, + { + "epoch": 3.3336045565500405, + "grad_norm": 0.48456012812970206, + "learning_rate": 2.5116302170928678e-05, + "loss": 0.251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11639226973056793, + "step": 2050, + "valid_targets_mean": 5307.0, + "valid_targets_min": 784 + }, + { + "epoch": 3.341741253051261, + "grad_norm": 0.5523702500052676, + "learning_rate": 2.5037864395089822e-05, + "loss": 0.2893, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.106050044298172, + "step": 2055, + "valid_targets_mean": 4106.2, + "valid_targets_min": 1187 + }, + { + "epoch": 3.3498779495524817, + "grad_norm": 0.6268239379014566, + "learning_rate": 2.495934379330597e-05, + "loss": 0.2669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13571971654891968, + "step": 2060, + "valid_targets_mean": 4496.1, + "valid_targets_min": 1237 + }, + { + "epoch": 3.3580146460537024, + "grad_norm": 0.5211118227275444, + "learning_rate": 2.4880741656509656e-05, + "loss": 0.2577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18967539072036743, + "step": 2065, + "valid_targets_mean": 5973.8, + "valid_targets_min": 2874 + }, + { + "epoch": 3.3661513425549225, + "grad_norm": 0.4867184690553686, + "learning_rate": 2.4802059276973904e-05, + "loss": 0.2744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08832257986068726, + "step": 2070, + "valid_targets_mean": 6020.0, + "valid_targets_min": 2571 + }, + { + "epoch": 3.374288039056143, + "grad_norm": 0.40231974560194717, + "learning_rate": 2.4723297948290982e-05, + "loss": 0.2559, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13233518600463867, + "step": 2075, + "valid_targets_mean": 9070.0, + "valid_targets_min": 3459 + }, + { + "epoch": 3.382424735557364, + "grad_norm": 0.5154449145165141, + "learning_rate": 2.464445896535113e-05, + "loss": 0.2836, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1818242222070694, + "step": 2080, + "valid_targets_mean": 6717.2, + "valid_targets_min": 4298 + }, + { + "epoch": 3.3905614320585844, + "grad_norm": 0.5379336133652549, + "learning_rate": 2.45655436243213e-05, + "loss": 0.2693, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12027017772197723, + "step": 2085, + "valid_targets_mean": 5752.2, + "valid_targets_min": 3219 + }, + { + "epoch": 3.3986981285598046, + "grad_norm": 0.4175221191336141, + "learning_rate": 2.44865532226238e-05, + "loss": 0.2774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1881314218044281, + "step": 2090, + "valid_targets_mean": 8001.6, + "valid_targets_min": 2705 + }, + { + "epoch": 3.406834825061025, + "grad_norm": 0.5961468554541282, + "learning_rate": 2.4407489058915004e-05, + "loss": 0.2874, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1391357183456421, + "step": 2095, + "valid_targets_mean": 4966.1, + "valid_targets_min": 3157 + }, + { + "epoch": 3.414971521562246, + "grad_norm": 0.4943897075326212, + "learning_rate": 2.4328352433063966e-05, + "loss": 0.2752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14456993341445923, + "step": 2100, + "valid_targets_mean": 7371.6, + "valid_targets_min": 4929 + }, + { + "epoch": 3.423108218063466, + "grad_norm": 0.4798546004908442, + "learning_rate": 2.4249144646131083e-05, + "loss": 0.2545, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12103509902954102, + "step": 2105, + "valid_targets_mean": 5275.5, + "valid_targets_min": 2062 + }, + { + "epoch": 3.4312449145646866, + "grad_norm": 0.6024466670234377, + "learning_rate": 2.4169867000346684e-05, + "loss": 0.2631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1351066529750824, + "step": 2110, + "valid_targets_mean": 4428.4, + "valid_targets_min": 1064 + }, + { + "epoch": 3.4393816110659072, + "grad_norm": 0.5447078874142987, + "learning_rate": 2.4090520799089612e-05, + "loss": 0.2866, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.157949760556221, + "step": 2115, + "valid_targets_mean": 5712.9, + "valid_targets_min": 2100 + }, + { + "epoch": 3.447518307567128, + "grad_norm": 0.5293454552675316, + "learning_rate": 2.4011107346865844e-05, + "loss": 0.2892, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19082076847553253, + "step": 2120, + "valid_targets_mean": 6204.5, + "valid_targets_min": 3697 + }, + { + "epoch": 3.4556550040683485, + "grad_norm": 0.496357535543296, + "learning_rate": 2.393162794928697e-05, + "loss": 0.2499, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1595294177532196, + "step": 2125, + "valid_targets_mean": 7122.4, + "valid_targets_min": 2914 + }, + { + "epoch": 3.4637917005695686, + "grad_norm": 0.5390530304141364, + "learning_rate": 2.385208391304879e-05, + "loss": 0.2581, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13380271196365356, + "step": 2130, + "valid_targets_mean": 5964.4, + "valid_targets_min": 4317 + }, + { + "epoch": 3.4719283970707893, + "grad_norm": 0.48671301947421164, + "learning_rate": 2.3772476545909794e-05, + "loss": 0.2717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0984102338552475, + "step": 2135, + "valid_targets_mean": 5276.6, + "valid_targets_min": 2773 + }, + { + "epoch": 3.48006509357201, + "grad_norm": 0.5168171595907421, + "learning_rate": 2.3692807156669684e-05, + "loss": 0.2883, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10761836171150208, + "step": 2140, + "valid_targets_mean": 5127.4, + "valid_targets_min": 1005 + }, + { + "epoch": 3.48820179007323, + "grad_norm": 0.45917243865127705, + "learning_rate": 2.3613077055147855e-05, + "loss": 0.2871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11678306758403778, + "step": 2145, + "valid_targets_mean": 6023.4, + "valid_targets_min": 1403 + }, + { + "epoch": 3.4963384865744507, + "grad_norm": 1.0802408916637594, + "learning_rate": 2.3533287552161833e-05, + "loss": 0.2818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12424206733703613, + "step": 2150, + "valid_targets_mean": 4101.6, + "valid_targets_min": 1208 + }, + { + "epoch": 3.5044751830756713, + "grad_norm": 0.9725147310489535, + "learning_rate": 2.345343995950577e-05, + "loss": 0.2819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11299556493759155, + "step": 2155, + "valid_targets_mean": 5489.8, + "valid_targets_min": 2994 + }, + { + "epoch": 3.512611879576892, + "grad_norm": 0.5277612614291434, + "learning_rate": 2.3373535589928827e-05, + "loss": 0.2793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14234156906604767, + "step": 2160, + "valid_targets_mean": 5168.0, + "valid_targets_min": 1637 + }, + { + "epoch": 3.5207485760781125, + "grad_norm": 0.4577806901348409, + "learning_rate": 2.3293575757113635e-05, + "loss": 0.2645, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11869750916957855, + "step": 2165, + "valid_targets_mean": 5705.0, + "valid_targets_min": 3465 + }, + { + "epoch": 3.5288852725793327, + "grad_norm": 0.48813053590763633, + "learning_rate": 2.3213561775654678e-05, + "loss": 0.2769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13192936778068542, + "step": 2170, + "valid_targets_mean": 5435.0, + "valid_targets_min": 2039 + }, + { + "epoch": 3.5370219690805533, + "grad_norm": 0.5417864760687305, + "learning_rate": 2.3133494961036655e-05, + "loss": 0.2679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14656861126422882, + "step": 2175, + "valid_targets_mean": 4186.5, + "valid_targets_min": 3036 + }, + { + "epoch": 3.545158665581774, + "grad_norm": 0.5079700581274689, + "learning_rate": 2.305337662961292e-05, + "loss": 0.2951, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15438416600227356, + "step": 2180, + "valid_targets_mean": 5634.9, + "valid_targets_min": 1514 + }, + { + "epoch": 3.553295362082994, + "grad_norm": 0.4302507514212832, + "learning_rate": 2.2973208098583767e-05, + "loss": 0.2528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1305883228778839, + "step": 2185, + "valid_targets_mean": 7573.4, + "valid_targets_min": 3714 + }, + { + "epoch": 3.5614320585842147, + "grad_norm": 0.4784814526634792, + "learning_rate": 2.2892990685974815e-05, + "loss": 0.2319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13934920728206635, + "step": 2190, + "valid_targets_mean": 6661.2, + "valid_targets_min": 2725 + }, + { + "epoch": 3.5695687550854354, + "grad_norm": 0.5218194964987156, + "learning_rate": 2.2812725710615328e-05, + "loss": 0.2586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1036933958530426, + "step": 2195, + "valid_targets_mean": 4396.5, + "valid_targets_min": 530 + }, + { + "epoch": 3.577705451586656, + "grad_norm": 0.3915528153922062, + "learning_rate": 2.2732414492116538e-05, + "loss": 0.2549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11413315683603287, + "step": 2200, + "valid_targets_mean": 7739.1, + "valid_targets_min": 988 + }, + { + "epoch": 3.585842148087876, + "grad_norm": 0.5474376964212345, + "learning_rate": 2.2652058350849955e-05, + "loss": 0.2694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12832045555114746, + "step": 2205, + "valid_targets_mean": 4460.6, + "valid_targets_min": 844 + }, + { + "epoch": 3.5939788445890968, + "grad_norm": 0.4513603679012204, + "learning_rate": 2.2571658607925624e-05, + "loss": 0.2594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11507508158683777, + "step": 2210, + "valid_targets_mean": 6023.5, + "valid_targets_min": 2711 + }, + { + "epoch": 3.6021155410903174, + "grad_norm": 0.4358199903498031, + "learning_rate": 2.2491216585170458e-05, + "loss": 0.2594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08925238251686096, + "step": 2215, + "valid_targets_mean": 5539.4, + "valid_targets_min": 841 + }, + { + "epoch": 3.6102522375915376, + "grad_norm": 0.5033142192460704, + "learning_rate": 2.2410733605106462e-05, + "loss": 0.2567, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07945795357227325, + "step": 2220, + "valid_targets_mean": 4576.6, + "valid_targets_min": 1937 + }, + { + "epoch": 3.618388934092758, + "grad_norm": 0.5295724882881188, + "learning_rate": 2.233021099092902e-05, + "loss": 0.2528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1273106187582016, + "step": 2225, + "valid_targets_mean": 5181.6, + "valid_targets_min": 3580 + }, + { + "epoch": 3.626525630593979, + "grad_norm": 0.49909534425266827, + "learning_rate": 2.224965006648512e-05, + "loss": 0.2615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11837124824523926, + "step": 2230, + "valid_targets_mean": 6526.1, + "valid_targets_min": 956 + }, + { + "epoch": 3.6346623270951994, + "grad_norm": 0.48550502453666, + "learning_rate": 2.2169052156251585e-05, + "loss": 0.3001, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13036608695983887, + "step": 2235, + "valid_targets_mean": 5403.1, + "valid_targets_min": 1572 + }, + { + "epoch": 3.64279902359642, + "grad_norm": 0.6877569373231779, + "learning_rate": 2.2088418585313346e-05, + "loss": 0.2613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15400430560112, + "step": 2240, + "valid_targets_mean": 4920.0, + "valid_targets_min": 1879 + }, + { + "epoch": 3.6509357200976402, + "grad_norm": 0.5449931803254913, + "learning_rate": 2.200775067934158e-05, + "loss": 0.2866, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11476185917854309, + "step": 2245, + "valid_targets_mean": 4997.9, + "valid_targets_min": 1732 + }, + { + "epoch": 3.659072416598861, + "grad_norm": 0.4525544723604396, + "learning_rate": 2.192704976457198e-05, + "loss": 0.2555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1380799114704132, + "step": 2250, + "valid_targets_mean": 6882.5, + "valid_targets_min": 4428 + }, + { + "epoch": 3.6672091131000815, + "grad_norm": 0.5174034196077824, + "learning_rate": 2.1846317167782923e-05, + "loss": 0.2481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.152031809091568, + "step": 2255, + "valid_targets_mean": 5447.2, + "valid_targets_min": 2463 + }, + { + "epoch": 3.6753458096013016, + "grad_norm": 0.44137473000448374, + "learning_rate": 2.1765554216273652e-05, + "loss": 0.268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1166839748620987, + "step": 2260, + "valid_targets_mean": 6610.4, + "valid_targets_min": 3051 + }, + { + "epoch": 3.6834825061025223, + "grad_norm": 0.5023783889425373, + "learning_rate": 2.1684762237842466e-05, + "loss": 0.2741, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20356138050556183, + "step": 2265, + "valid_targets_mean": 6716.1, + "valid_targets_min": 2851 + }, + { + "epoch": 3.691619202603743, + "grad_norm": 0.5649370975103999, + "learning_rate": 2.1603942560764884e-05, + "loss": 0.2678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14867046475410461, + "step": 2270, + "valid_targets_mean": 4637.0, + "valid_targets_min": 1356 + }, + { + "epoch": 3.6997558991049635, + "grad_norm": 0.43128962161492657, + "learning_rate": 2.1523096513771825e-05, + "loss": 0.245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11670452356338501, + "step": 2275, + "valid_targets_mean": 6769.8, + "valid_targets_min": 3135 + }, + { + "epoch": 3.707892595606184, + "grad_norm": 0.6107884039191125, + "learning_rate": 2.1442225426027724e-05, + "loss": 0.2688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1622416079044342, + "step": 2280, + "valid_targets_mean": 4730.1, + "valid_targets_min": 2457 + }, + { + "epoch": 3.7160292921074043, + "grad_norm": 0.42009628659466286, + "learning_rate": 2.1361330627108724e-05, + "loss": 0.2786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14188794791698456, + "step": 2285, + "valid_targets_mean": 8693.0, + "valid_targets_min": 3303 + }, + { + "epoch": 3.724165988608625, + "grad_norm": 0.4593106062963763, + "learning_rate": 2.128041344698078e-05, + "loss": 0.2391, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10363125801086426, + "step": 2290, + "valid_targets_mean": 4878.5, + "valid_targets_min": 1828 + }, + { + "epoch": 3.7323026851098455, + "grad_norm": 0.43814883641270597, + "learning_rate": 2.1199475215977817e-05, + "loss": 0.2665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10538579523563385, + "step": 2295, + "valid_targets_mean": 5957.1, + "valid_targets_min": 2916 + }, + { + "epoch": 3.7404393816110657, + "grad_norm": 0.5103737751157224, + "learning_rate": 2.1118517264779858e-05, + "loss": 0.2475, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12744760513305664, + "step": 2300, + "valid_targets_mean": 4476.2, + "valid_targets_min": 1892 + }, + { + "epoch": 3.7485760781122863, + "grad_norm": 0.47819552478708083, + "learning_rate": 2.103754092439112e-05, + "loss": 0.2823, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11515910178422928, + "step": 2305, + "valid_targets_mean": 6143.5, + "valid_targets_min": 1626 + }, + { + "epoch": 3.756712774613507, + "grad_norm": 0.5649450823382121, + "learning_rate": 2.095654752611817e-05, + "loss": 0.26, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15299847722053528, + "step": 2310, + "valid_targets_mean": 5482.8, + "valid_targets_min": 624 + }, + { + "epoch": 3.7648494711147276, + "grad_norm": 0.43149118197396086, + "learning_rate": 2.087553840154801e-05, + "loss": 0.2658, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12518690526485443, + "step": 2315, + "valid_targets_mean": 6021.2, + "valid_targets_min": 2664 + }, + { + "epoch": 3.772986167615948, + "grad_norm": 0.4891027718138667, + "learning_rate": 2.0794514882526196e-05, + "loss": 0.2526, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11703081429004669, + "step": 2320, + "valid_targets_mean": 4964.6, + "valid_targets_min": 397 + }, + { + "epoch": 3.7811228641171684, + "grad_norm": 0.48601703764578974, + "learning_rate": 2.0713478301134935e-05, + "loss": 0.2882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13128621876239777, + "step": 2325, + "valid_targets_mean": 6682.8, + "valid_targets_min": 2997 + }, + { + "epoch": 3.789259560618389, + "grad_norm": 0.5447061578281931, + "learning_rate": 2.063242998967118e-05, + "loss": 0.2512, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12155985832214355, + "step": 2330, + "valid_targets_mean": 4736.2, + "valid_targets_min": 2174 + }, + { + "epoch": 3.7973962571196096, + "grad_norm": 0.45017085915069277, + "learning_rate": 2.0551371280624758e-05, + "loss": 0.2551, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.158024862408638, + "step": 2335, + "valid_targets_mean": 8097.1, + "valid_targets_min": 4594 + }, + { + "epoch": 3.8055329536208298, + "grad_norm": 0.48647898978382154, + "learning_rate": 2.0470303506656414e-05, + "loss": 0.2515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12568457424640656, + "step": 2340, + "valid_targets_mean": 4161.4, + "valid_targets_min": 611 + }, + { + "epoch": 3.8136696501220504, + "grad_norm": 0.5446656209319005, + "learning_rate": 2.0389228000575953e-05, + "loss": 0.2726, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16627874970436096, + "step": 2345, + "valid_targets_mean": 5223.8, + "valid_targets_min": 3621 + }, + { + "epoch": 3.821806346623271, + "grad_norm": 0.5378101789966692, + "learning_rate": 2.0308146095320275e-05, + "loss": 0.2669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11316706240177155, + "step": 2350, + "valid_targets_mean": 5668.1, + "valid_targets_min": 2857 + }, + { + "epoch": 3.8299430431244916, + "grad_norm": 0.4704455377437423, + "learning_rate": 2.0227059123931504e-05, + "loss": 0.2663, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14419454336166382, + "step": 2355, + "valid_targets_mean": 6657.2, + "valid_targets_min": 2752 + }, + { + "epoch": 3.838079739625712, + "grad_norm": 0.4545925318504704, + "learning_rate": 2.0145968419535045e-05, + "loss": 0.2594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0803401917219162, + "step": 2360, + "valid_targets_mean": 5352.0, + "valid_targets_min": 2022 + }, + { + "epoch": 3.8462164361269324, + "grad_norm": 0.5261750953052919, + "learning_rate": 2.0064875315317674e-05, + "loss": 0.2661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1119522899389267, + "step": 2365, + "valid_targets_mean": 5081.0, + "valid_targets_min": 1380 + }, + { + "epoch": 3.854353132628153, + "grad_norm": 0.5977456077727262, + "learning_rate": 1.998378114450565e-05, + "loss": 0.2568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10629862546920776, + "step": 2370, + "valid_targets_mean": 4967.9, + "valid_targets_min": 559 + }, + { + "epoch": 3.862489829129373, + "grad_norm": 0.6008964888102649, + "learning_rate": 1.9902687240342722e-05, + "loss": 0.2568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11401569843292236, + "step": 2375, + "valid_targets_mean": 4733.9, + "valid_targets_min": 2100 + }, + { + "epoch": 3.870626525630594, + "grad_norm": 0.5127247595415064, + "learning_rate": 1.982159493606829e-05, + "loss": 0.2573, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1426711082458496, + "step": 2380, + "valid_targets_mean": 5931.9, + "valid_targets_min": 4307 + }, + { + "epoch": 3.8787632221318145, + "grad_norm": 0.5231212605718677, + "learning_rate": 1.9740505564895436e-05, + "loss": 0.262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15910865366458893, + "step": 2385, + "valid_targets_mean": 5790.6, + "valid_targets_min": 3166 + }, + { + "epoch": 3.886899918633035, + "grad_norm": 0.5112833558456881, + "learning_rate": 1.9659420459989026e-05, + "loss": 0.2742, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12127886712551117, + "step": 2390, + "valid_targets_mean": 4869.2, + "valid_targets_min": 3034 + }, + { + "epoch": 3.8950366151342557, + "grad_norm": 0.4557114664235136, + "learning_rate": 1.9578340954443784e-05, + "loss": 0.2623, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.132114976644516, + "step": 2395, + "valid_targets_mean": 6727.5, + "valid_targets_min": 360 + }, + { + "epoch": 3.903173311635476, + "grad_norm": 0.5394796995535039, + "learning_rate": 1.949726838126237e-05, + "loss": 0.2658, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15240266919136047, + "step": 2400, + "valid_targets_mean": 5102.9, + "valid_targets_min": 2064 + }, + { + "epoch": 3.9113100081366965, + "grad_norm": 0.5486897951018997, + "learning_rate": 1.941620407333347e-05, + "loss": 0.2623, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12607380747795105, + "step": 2405, + "valid_targets_mean": 4510.5, + "valid_targets_min": 1648 + }, + { + "epoch": 3.919446704637917, + "grad_norm": 0.42322274272575794, + "learning_rate": 1.933514936340991e-05, + "loss": 0.249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06517419964075089, + "step": 2410, + "valid_targets_mean": 5701.9, + "valid_targets_min": 1451 + }, + { + "epoch": 3.9275834011391373, + "grad_norm": 0.49141277897204666, + "learning_rate": 1.9254105584086683e-05, + "loss": 0.2733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12552395462989807, + "step": 2415, + "valid_targets_mean": 5657.6, + "valid_targets_min": 3092 + }, + { + "epoch": 3.935720097640358, + "grad_norm": 0.5126538411225738, + "learning_rate": 1.9173074067779102e-05, + "loss": 0.2653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13129568099975586, + "step": 2420, + "valid_targets_mean": 5424.1, + "valid_targets_min": 2023 + }, + { + "epoch": 3.9438567941415785, + "grad_norm": 0.4865930267161227, + "learning_rate": 1.9092056146700844e-05, + "loss": 0.2623, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12925077974796295, + "step": 2425, + "valid_targets_mean": 5372.9, + "valid_targets_min": 3511 + }, + { + "epoch": 3.951993490642799, + "grad_norm": 0.5288677950325463, + "learning_rate": 1.9011053152842087e-05, + "loss": 0.267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1401764452457428, + "step": 2430, + "valid_targets_mean": 5477.1, + "valid_targets_min": 958 + }, + { + "epoch": 3.9601301871440198, + "grad_norm": 0.5471093288120334, + "learning_rate": 1.89300664179476e-05, + "loss": 0.2784, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1709902137517929, + "step": 2435, + "valid_targets_mean": 5307.8, + "valid_targets_min": 1418 + }, + { + "epoch": 3.96826688364524, + "grad_norm": 0.6064125532966921, + "learning_rate": 1.8849097273494827e-05, + "loss": 0.2833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1471061408519745, + "step": 2440, + "valid_targets_mean": 5405.4, + "valid_targets_min": 582 + }, + { + "epoch": 3.9764035801464606, + "grad_norm": 0.41587136888702764, + "learning_rate": 1.8768147050672028e-05, + "loss": 0.2637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09928697347640991, + "step": 2445, + "valid_targets_mean": 6103.5, + "valid_targets_min": 1364 + }, + { + "epoch": 3.984540276647681, + "grad_norm": 0.5513050609197007, + "learning_rate": 1.8687217080356365e-05, + "loss": 0.277, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12455247342586517, + "step": 2450, + "valid_targets_mean": 3710.2, + "valid_targets_min": 468 + }, + { + "epoch": 3.9926769731489014, + "grad_norm": 0.5016492346972834, + "learning_rate": 1.8606308693092035e-05, + "loss": 0.2655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10865034908056259, + "step": 2455, + "valid_targets_mean": 4436.8, + "valid_targets_min": 2339 + }, + { + "epoch": 4.0, + "grad_norm": 0.6629905256300082, + "learning_rate": 1.8525423219068423e-05, + "loss": 0.2677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2965284585952759, + "step": 2460, + "valid_targets_mean": 5677.5, + "valid_targets_min": 2822 + }, + { + "epoch": 4.008136696501221, + "grad_norm": 0.5129517123763623, + "learning_rate": 1.844456198809817e-05, + "loss": 0.2333, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10877451300621033, + "step": 2465, + "valid_targets_mean": 5123.9, + "valid_targets_min": 2568 + }, + { + "epoch": 4.016273393002441, + "grad_norm": 0.473163436849981, + "learning_rate": 1.8363726329595356e-05, + "loss": 0.2384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14932730793952942, + "step": 2470, + "valid_targets_mean": 7151.4, + "valid_targets_min": 3539 + }, + { + "epoch": 4.024410089503662, + "grad_norm": 0.4124240574592047, + "learning_rate": 1.828291757255364e-05, + "loss": 0.2381, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11770130693912506, + "step": 2475, + "valid_targets_mean": 8133.2, + "valid_targets_min": 3713 + }, + { + "epoch": 4.032546786004882, + "grad_norm": 0.6586399627241981, + "learning_rate": 1.8202137045524383e-05, + "loss": 0.2623, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13058783113956451, + "step": 2480, + "valid_targets_mean": 6915.6, + "valid_targets_min": 4175 + }, + { + "epoch": 4.040683482506102, + "grad_norm": 0.5513424168769842, + "learning_rate": 1.812138607659486e-05, + "loss": 0.2313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12268248945474625, + "step": 2485, + "valid_targets_mean": 5776.4, + "valid_targets_min": 2434 + }, + { + "epoch": 4.048820179007323, + "grad_norm": 0.5252916649047769, + "learning_rate": 1.8040665993366355e-05, + "loss": 0.2389, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13886849582195282, + "step": 2490, + "valid_targets_mean": 5557.5, + "valid_targets_min": 2120 + }, + { + "epoch": 4.0569568755085434, + "grad_norm": 0.5060252786002666, + "learning_rate": 1.795997812293239e-05, + "loss": 0.2565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20440292358398438, + "step": 2495, + "valid_targets_mean": 8090.6, + "valid_targets_min": 3649 + }, + { + "epoch": 4.065093572009764, + "grad_norm": 0.5304099045808806, + "learning_rate": 1.7879323791856875e-05, + "loss": 0.264, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11923650652170181, + "step": 2500, + "valid_targets_mean": 5128.9, + "valid_targets_min": 3118 + }, + { + "epoch": 4.073230268510985, + "grad_norm": 0.5848838368779085, + "learning_rate": 1.7798704326152317e-05, + "loss": 0.257, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19044384360313416, + "step": 2505, + "valid_targets_mean": 6830.1, + "valid_targets_min": 4289 + }, + { + "epoch": 4.081366965012205, + "grad_norm": 0.5705122461346422, + "learning_rate": 1.7718121051258016e-05, + "loss": 0.262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14008468389511108, + "step": 2510, + "valid_targets_mean": 5542.6, + "valid_targets_min": 2108 + }, + { + "epoch": 4.089503661513426, + "grad_norm": 0.441927710137963, + "learning_rate": 1.763757529201826e-05, + "loss": 0.2531, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12376945465803146, + "step": 2515, + "valid_targets_mean": 6966.4, + "valid_targets_min": 1952 + }, + { + "epoch": 4.097640358014646, + "grad_norm": 0.7043747925369948, + "learning_rate": 1.7557068372660562e-05, + "loss": 0.2438, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12836799025535583, + "step": 2520, + "valid_targets_mean": 4607.2, + "valid_targets_min": 2518 + }, + { + "epoch": 4.105777054515866, + "grad_norm": 0.5410005564912586, + "learning_rate": 1.747660161677387e-05, + "loss": 0.255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15527786314487457, + "step": 2525, + "valid_targets_mean": 6187.5, + "valid_targets_min": 1744 + }, + { + "epoch": 4.113913751017087, + "grad_norm": 0.4912342111176402, + "learning_rate": 1.7396176347286838e-05, + "loss": 0.2581, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1411733329296112, + "step": 2530, + "valid_targets_mean": 6837.1, + "valid_targets_min": 2828 + }, + { + "epoch": 4.1220504475183075, + "grad_norm": 0.5865774973750087, + "learning_rate": 1.7315793886446036e-05, + "loss": 0.2557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1618509590625763, + "step": 2535, + "valid_targets_mean": 5910.0, + "valid_targets_min": 3286 + }, + { + "epoch": 4.130187144019528, + "grad_norm": 0.626903542457007, + "learning_rate": 1.7235455555794236e-05, + "loss": 0.2373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13324236869812012, + "step": 2540, + "valid_targets_mean": 5587.8, + "valid_targets_min": 2551 + }, + { + "epoch": 4.138323840520749, + "grad_norm": 0.5795141806915454, + "learning_rate": 1.7155162676148682e-05, + "loss": 0.2257, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12815946340560913, + "step": 2545, + "valid_targets_mean": 5687.1, + "valid_targets_min": 1514 + }, + { + "epoch": 4.146460537021969, + "grad_norm": 0.5206487840907027, + "learning_rate": 1.707491656757936e-05, + "loss": 0.2287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14685989916324615, + "step": 2550, + "valid_targets_mean": 6729.9, + "valid_targets_min": 1309 + }, + { + "epoch": 4.15459723352319, + "grad_norm": 0.5418721063841692, + "learning_rate": 1.6994718549387332e-05, + "loss": 0.2367, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11401748657226562, + "step": 2555, + "valid_targets_mean": 4553.0, + "valid_targets_min": 914 + }, + { + "epoch": 4.16273393002441, + "grad_norm": 0.5304635472704533, + "learning_rate": 1.6914569940083004e-05, + "loss": 0.2657, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15515920519828796, + "step": 2560, + "valid_targets_mean": 5698.4, + "valid_targets_min": 3076 + }, + { + "epoch": 4.17087062652563, + "grad_norm": 0.5577644314206343, + "learning_rate": 1.6834472057364462e-05, + "loss": 0.2403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1251552402973175, + "step": 2565, + "valid_targets_mean": 5167.2, + "valid_targets_min": 897 + }, + { + "epoch": 4.179007323026851, + "grad_norm": 0.516551191316929, + "learning_rate": 1.6754426218095827e-05, + "loss": 0.2421, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13127167522907257, + "step": 2570, + "valid_targets_mean": 5867.4, + "valid_targets_min": 3286 + }, + { + "epoch": 4.187144019528072, + "grad_norm": 0.4789758761612466, + "learning_rate": 1.6674433738285573e-05, + "loss": 0.2587, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1123281717300415, + "step": 2575, + "valid_targets_mean": 6434.2, + "valid_targets_min": 3024 + }, + { + "epoch": 4.195280716029292, + "grad_norm": 0.5647313559781841, + "learning_rate": 1.6594495933064926e-05, + "loss": 0.2499, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16273194551467896, + "step": 2580, + "valid_targets_mean": 5906.6, + "valid_targets_min": 3636 + }, + { + "epoch": 4.203417412530513, + "grad_norm": 0.4660706331602136, + "learning_rate": 1.6514614116666213e-05, + "loss": 0.2664, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11347354203462601, + "step": 2585, + "valid_targets_mean": 7309.0, + "valid_targets_min": 2153 + }, + { + "epoch": 4.211554109031733, + "grad_norm": 0.5253962224050552, + "learning_rate": 1.6434789602401264e-05, + "loss": 0.2525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1410287618637085, + "step": 2590, + "valid_targets_mean": 6679.8, + "valid_targets_min": 1391 + }, + { + "epoch": 4.219690805532954, + "grad_norm": 0.5081317554456144, + "learning_rate": 1.6355023702639835e-05, + "loss": 0.2676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10498687624931335, + "step": 2595, + "valid_targets_mean": 5955.5, + "valid_targets_min": 1427 + }, + { + "epoch": 4.227827502034174, + "grad_norm": 0.5133995179689695, + "learning_rate": 1.6275317728787995e-05, + "loss": 0.2626, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15336143970489502, + "step": 2600, + "valid_targets_mean": 6639.5, + "valid_targets_min": 3253 + }, + { + "epoch": 4.235964198535394, + "grad_norm": 0.6025044099778876, + "learning_rate": 1.6195672991266627e-05, + "loss": 0.2436, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1540364921092987, + "step": 2605, + "valid_targets_mean": 5138.1, + "valid_targets_min": 1887 + }, + { + "epoch": 4.244100895036615, + "grad_norm": 0.5322557851182275, + "learning_rate": 1.6116090799489817e-05, + "loss": 0.263, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13185200095176697, + "step": 2610, + "valid_targets_mean": 6066.1, + "valid_targets_min": 3914 + }, + { + "epoch": 4.252237591537836, + "grad_norm": 0.5441188482256656, + "learning_rate": 1.603657246184337e-05, + "loss": 0.2449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12901723384857178, + "step": 2615, + "valid_targets_mean": 4693.8, + "valid_targets_min": 1555 + }, + { + "epoch": 4.260374288039056, + "grad_norm": 0.5849782251122515, + "learning_rate": 1.5957119285663276e-05, + "loss": 0.2555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11500156670808792, + "step": 2620, + "valid_targets_mean": 5282.9, + "valid_targets_min": 2751 + }, + { + "epoch": 4.268510984540277, + "grad_norm": 0.48316160524922036, + "learning_rate": 1.5877732577214227e-05, + "loss": 0.2713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12493854761123657, + "step": 2625, + "valid_targets_mean": 6715.5, + "valid_targets_min": 2447 + }, + { + "epoch": 4.2766476810414975, + "grad_norm": 0.5053057310492522, + "learning_rate": 1.5798413641668152e-05, + "loss": 0.2486, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10976533591747284, + "step": 2630, + "valid_targets_mean": 5234.4, + "valid_targets_min": 3316 + }, + { + "epoch": 4.284784377542717, + "grad_norm": 0.5519671887282555, + "learning_rate": 1.5719163783082735e-05, + "loss": 0.2687, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11516216397285461, + "step": 2635, + "valid_targets_mean": 4224.4, + "valid_targets_min": 2866 + }, + { + "epoch": 4.292921074043938, + "grad_norm": 0.5225037254497872, + "learning_rate": 1.563998430437999e-05, + "loss": 0.2536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17195287346839905, + "step": 2640, + "valid_targets_mean": 5706.8, + "valid_targets_min": 3850 + }, + { + "epoch": 4.3010577705451585, + "grad_norm": 0.598135661385332, + "learning_rate": 1.556087650732483e-05, + "loss": 0.2216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13336588442325592, + "step": 2645, + "valid_targets_mean": 4468.4, + "valid_targets_min": 3203 + }, + { + "epoch": 4.309194467046379, + "grad_norm": 0.5569556750382165, + "learning_rate": 1.5481841692503696e-05, + "loss": 0.2596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1275118738412857, + "step": 2650, + "valid_targets_mean": 6468.5, + "valid_targets_min": 2076 + }, + { + "epoch": 4.3173311635476, + "grad_norm": 0.5748124791778696, + "learning_rate": 1.5402881159303132e-05, + "loss": 0.2641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13305258750915527, + "step": 2655, + "valid_targets_mean": 5509.2, + "valid_targets_min": 4168 + }, + { + "epoch": 4.32546786004882, + "grad_norm": 0.5867859868511631, + "learning_rate": 1.5323996205888444e-05, + "loss": 0.2625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10594667494297028, + "step": 2660, + "valid_targets_mean": 4375.5, + "valid_targets_min": 1215 + }, + { + "epoch": 4.333604556550041, + "grad_norm": 0.5536999755590897, + "learning_rate": 1.5245188129182352e-05, + "loss": 0.2466, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15136541426181793, + "step": 2665, + "valid_targets_mean": 6034.4, + "valid_targets_min": 2955 + }, + { + "epoch": 4.341741253051262, + "grad_norm": 0.45096926773078744, + "learning_rate": 1.5166458224843666e-05, + "loss": 0.2356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15225692093372345, + "step": 2670, + "valid_targets_mean": 7230.6, + "valid_targets_min": 4046 + }, + { + "epoch": 4.349877949552481, + "grad_norm": 0.4876695137087219, + "learning_rate": 1.5087807787246018e-05, + "loss": 0.2478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1192513108253479, + "step": 2675, + "valid_targets_mean": 6170.1, + "valid_targets_min": 4160 + }, + { + "epoch": 4.358014646053702, + "grad_norm": 0.518906460408022, + "learning_rate": 1.5009238109456519e-05, + "loss": 0.2588, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11651073396205902, + "step": 2680, + "valid_targets_mean": 5396.5, + "valid_targets_min": 2022 + }, + { + "epoch": 4.3661513425549225, + "grad_norm": 0.49929632927019785, + "learning_rate": 1.4930750483214545e-05, + "loss": 0.2338, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12936654686927795, + "step": 2685, + "valid_targets_mean": 6563.0, + "valid_targets_min": 3114 + }, + { + "epoch": 4.374288039056143, + "grad_norm": 0.6156782440298237, + "learning_rate": 1.485234619891049e-05, + "loss": 0.2538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11831729114055634, + "step": 2690, + "valid_targets_mean": 3777.8, + "valid_targets_min": 1211 + }, + { + "epoch": 4.382424735557364, + "grad_norm": 0.49361613525157977, + "learning_rate": 1.4774026545564542e-05, + "loss": 0.2493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13980375230312347, + "step": 2695, + "valid_targets_mean": 6734.5, + "valid_targets_min": 3579 + }, + { + "epoch": 4.390561432058584, + "grad_norm": 0.4945912218477993, + "learning_rate": 1.4695792810805513e-05, + "loss": 0.2634, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11305113881826401, + "step": 2700, + "valid_targets_mean": 4944.9, + "valid_targets_min": 2712 + }, + { + "epoch": 4.398698128559805, + "grad_norm": 0.5756325553240135, + "learning_rate": 1.4617646280849642e-05, + "loss": 0.2774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12662610411643982, + "step": 2705, + "valid_targets_mean": 4155.1, + "valid_targets_min": 2861 + }, + { + "epoch": 4.406834825061026, + "grad_norm": 0.4985205743712053, + "learning_rate": 1.4539588240479465e-05, + "loss": 0.2523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12237317860126495, + "step": 2710, + "valid_targets_mean": 6090.8, + "valid_targets_min": 4559 + }, + { + "epoch": 4.414971521562245, + "grad_norm": 0.5203081116851938, + "learning_rate": 1.4461619973022687e-05, + "loss": 0.2471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1582365483045578, + "step": 2715, + "valid_targets_mean": 5534.2, + "valid_targets_min": 2713 + }, + { + "epoch": 4.423108218063466, + "grad_norm": 0.5071779116892461, + "learning_rate": 1.4383742760331076e-05, + "loss": 0.2458, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10871212184429169, + "step": 2720, + "valid_targets_mean": 5417.2, + "valid_targets_min": 1808 + }, + { + "epoch": 4.431244914564687, + "grad_norm": 0.5122608576322747, + "learning_rate": 1.4305957882759427e-05, + "loss": 0.2536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09927636384963989, + "step": 2725, + "valid_targets_mean": 4650.1, + "valid_targets_min": 544 + }, + { + "epoch": 4.439381611065907, + "grad_norm": 0.6181215239682092, + "learning_rate": 1.4228266619144453e-05, + "loss": 0.2658, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1790265142917633, + "step": 2730, + "valid_targets_mean": 5120.4, + "valid_targets_min": 2997 + }, + { + "epoch": 4.447518307567128, + "grad_norm": 0.568948631389522, + "learning_rate": 1.4150670246783799e-05, + "loss": 0.229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12507446110248566, + "step": 2735, + "valid_targets_mean": 4760.6, + "valid_targets_min": 2546 + }, + { + "epoch": 4.4556550040683485, + "grad_norm": 0.49435152766723117, + "learning_rate": 1.4073170041415028e-05, + "loss": 0.2428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1444876343011856, + "step": 2740, + "valid_targets_mean": 6215.4, + "valid_targets_min": 3236 + }, + { + "epoch": 4.463791700569569, + "grad_norm": 0.6181766282098673, + "learning_rate": 1.3995767277194665e-05, + "loss": 0.2429, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10405126214027405, + "step": 2745, + "valid_targets_mean": 4468.4, + "valid_targets_min": 724 + }, + { + "epoch": 4.471928397070789, + "grad_norm": 0.41154847544209594, + "learning_rate": 1.391846322667722e-05, + "loss": 0.2338, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0906953513622284, + "step": 2750, + "valid_targets_mean": 5218.9, + "valid_targets_min": 2520 + }, + { + "epoch": 4.480065093572009, + "grad_norm": 0.6572132993720595, + "learning_rate": 1.3841259160794298e-05, + "loss": 0.256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12475261837244034, + "step": 2755, + "valid_targets_mean": 6810.9, + "valid_targets_min": 2340 + }, + { + "epoch": 4.48820179007323, + "grad_norm": 0.5336969527360339, + "learning_rate": 1.3764156348833666e-05, + "loss": 0.2492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12281182408332825, + "step": 2760, + "valid_targets_mean": 6573.4, + "valid_targets_min": 984 + }, + { + "epoch": 4.496338486574451, + "grad_norm": 0.568863776583933, + "learning_rate": 1.3687156058418422e-05, + "loss": 0.2305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1258392333984375, + "step": 2765, + "valid_targets_mean": 4216.4, + "valid_targets_min": 2142 + }, + { + "epoch": 4.504475183075671, + "grad_norm": 0.5202434927147002, + "learning_rate": 1.3610259555486152e-05, + "loss": 0.2665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14846715331077576, + "step": 2770, + "valid_targets_mean": 5878.0, + "valid_targets_min": 880 + }, + { + "epoch": 4.512611879576892, + "grad_norm": 0.620131546040906, + "learning_rate": 1.3533468104268078e-05, + "loss": 0.275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1723940074443817, + "step": 2775, + "valid_targets_mean": 5155.9, + "valid_targets_min": 902 + }, + { + "epoch": 4.5207485760781125, + "grad_norm": 0.49196921841926167, + "learning_rate": 1.3456782967268316e-05, + "loss": 0.2492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12150443345308304, + "step": 2780, + "valid_targets_mean": 5932.6, + "valid_targets_min": 3864 + }, + { + "epoch": 4.528885272579333, + "grad_norm": 0.4638624677398862, + "learning_rate": 1.3380205405243096e-05, + "loss": 0.2423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09334471076726913, + "step": 2785, + "valid_targets_mean": 5831.0, + "valid_targets_min": 2304 + }, + { + "epoch": 4.537021969080554, + "grad_norm": 0.582037329244977, + "learning_rate": 1.3303736677180044e-05, + "loss": 0.2357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17545725405216217, + "step": 2790, + "valid_targets_mean": 5931.8, + "valid_targets_min": 1744 + }, + { + "epoch": 4.5451586655817735, + "grad_norm": 0.40849974141197115, + "learning_rate": 1.322737804027749e-05, + "loss": 0.2452, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11623605340719223, + "step": 2795, + "valid_targets_mean": 8913.4, + "valid_targets_min": 3525 + }, + { + "epoch": 4.553295362082994, + "grad_norm": 0.5932308533815444, + "learning_rate": 1.315113074992378e-05, + "loss": 0.255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11075286567211151, + "step": 2800, + "valid_targets_mean": 6780.4, + "valid_targets_min": 4594 + }, + { + "epoch": 4.561432058584215, + "grad_norm": 0.5613236786122735, + "learning_rate": 1.3074996059676644e-05, + "loss": 0.2383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12261034548282623, + "step": 2805, + "valid_targets_mean": 6014.5, + "valid_targets_min": 1412 + }, + { + "epoch": 4.569568755085435, + "grad_norm": 0.6145107752409859, + "learning_rate": 1.2998975221242596e-05, + "loss": 0.2341, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11969676613807678, + "step": 2810, + "valid_targets_mean": 6176.0, + "valid_targets_min": 2517 + }, + { + "epoch": 4.577705451586656, + "grad_norm": 0.5116817887496089, + "learning_rate": 1.292306948445634e-05, + "loss": 0.2527, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09205292165279388, + "step": 2815, + "valid_targets_mean": 4062.6, + "valid_targets_min": 1865 + }, + { + "epoch": 4.585842148087877, + "grad_norm": 0.5032628834115105, + "learning_rate": 1.2847280097260245e-05, + "loss": 0.2361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10667150467634201, + "step": 2820, + "valid_targets_mean": 5223.2, + "valid_targets_min": 1628 + }, + { + "epoch": 4.593978844589097, + "grad_norm": 0.519138895357275, + "learning_rate": 1.2771608305683798e-05, + "loss": 0.2423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1109437346458435, + "step": 2825, + "valid_targets_mean": 5241.1, + "valid_targets_min": 2067 + }, + { + "epoch": 4.602115541090317, + "grad_norm": 0.525394243588212, + "learning_rate": 1.269605535382314e-05, + "loss": 0.2547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16636374592781067, + "step": 2830, + "valid_targets_mean": 6438.2, + "valid_targets_min": 558 + }, + { + "epoch": 4.610252237591538, + "grad_norm": 0.5191821565520898, + "learning_rate": 1.2620622483820604e-05, + "loss": 0.2402, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11650275439023972, + "step": 2835, + "valid_targets_mean": 4864.6, + "valid_targets_min": 524 + }, + { + "epoch": 4.618388934092758, + "grad_norm": 0.5627143798214106, + "learning_rate": 1.2545310935844288e-05, + "loss": 0.2473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09677013009786606, + "step": 2840, + "valid_targets_mean": 3457.5, + "valid_targets_min": 360 + }, + { + "epoch": 4.626525630593979, + "grad_norm": 0.5874185551809977, + "learning_rate": 1.2470121948067693e-05, + "loss": 0.2656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1323344111442566, + "step": 2845, + "valid_targets_mean": 5520.2, + "valid_targets_min": 2752 + }, + { + "epoch": 4.634662327095199, + "grad_norm": 0.5568858672026162, + "learning_rate": 1.2395056756649328e-05, + "loss": 0.241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13221968710422516, + "step": 2850, + "valid_targets_mean": 5042.0, + "valid_targets_min": 1374 + }, + { + "epoch": 4.64279902359642, + "grad_norm": 0.4669743898337823, + "learning_rate": 1.2320116595712413e-05, + "loss": 0.2599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10005086660385132, + "step": 2855, + "valid_targets_mean": 6354.0, + "valid_targets_min": 2977 + }, + { + "epoch": 4.650935720097641, + "grad_norm": 0.890197194306011, + "learning_rate": 1.224530269732457e-05, + "loss": 0.2535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11468639224767685, + "step": 2860, + "valid_targets_mean": 4317.4, + "valid_targets_min": 1349 + }, + { + "epoch": 4.65907241659886, + "grad_norm": 0.523673019189796, + "learning_rate": 1.2170616291477595e-05, + "loss": 0.2532, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10656925290822983, + "step": 2865, + "valid_targets_mean": 5904.4, + "valid_targets_min": 513 + }, + { + "epoch": 4.667209113100081, + "grad_norm": 0.5080582692191867, + "learning_rate": 1.2096058606067205e-05, + "loss": 0.257, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10707581788301468, + "step": 2870, + "valid_targets_mean": 4963.8, + "valid_targets_min": 2731 + }, + { + "epoch": 4.675345809601302, + "grad_norm": 0.5988092562512479, + "learning_rate": 1.2021630866872877e-05, + "loss": 0.2523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1406228244304657, + "step": 2875, + "valid_targets_mean": 5334.5, + "valid_targets_min": 1531 + }, + { + "epoch": 4.683482506102522, + "grad_norm": 0.41508039808193575, + "learning_rate": 1.1947334297537675e-05, + "loss": 0.2564, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07547567784786224, + "step": 2880, + "valid_targets_mean": 6389.6, + "valid_targets_min": 1338 + }, + { + "epoch": 4.691619202603743, + "grad_norm": 0.4920978401058176, + "learning_rate": 1.1873170119548134e-05, + "loss": 0.2433, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12869910895824432, + "step": 2885, + "valid_targets_mean": 6174.5, + "valid_targets_min": 1998 + }, + { + "epoch": 4.6997558991049635, + "grad_norm": 0.5167942635637724, + "learning_rate": 1.1799139552214202e-05, + "loss": 0.2656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16343358159065247, + "step": 2890, + "valid_targets_mean": 5750.2, + "valid_targets_min": 3412 + }, + { + "epoch": 4.707892595606184, + "grad_norm": 0.6239170640519179, + "learning_rate": 1.1725243812649168e-05, + "loss": 0.25, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14894859492778778, + "step": 2895, + "valid_targets_mean": 5554.4, + "valid_targets_min": 4602 + }, + { + "epoch": 4.716029292107405, + "grad_norm": 0.5652543437239146, + "learning_rate": 1.1651484115749647e-05, + "loss": 0.2542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13217860460281372, + "step": 2900, + "valid_targets_mean": 4814.0, + "valid_targets_min": 1908 + }, + { + "epoch": 4.724165988608625, + "grad_norm": 0.43613616950750617, + "learning_rate": 1.1577861674175645e-05, + "loss": 0.2434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11000243574380875, + "step": 2905, + "valid_targets_mean": 6706.0, + "valid_targets_min": 2946 + }, + { + "epoch": 4.732302685109845, + "grad_norm": 0.5312582849880375, + "learning_rate": 1.1504377698330575e-05, + "loss": 0.2636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10568075627088547, + "step": 2910, + "valid_targets_mean": 4921.6, + "valid_targets_min": 2460 + }, + { + "epoch": 4.740439381611066, + "grad_norm": 0.47078207010849765, + "learning_rate": 1.1431033396341391e-05, + "loss": 0.2561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1127002015709877, + "step": 2915, + "valid_targets_mean": 6926.8, + "valid_targets_min": 1427 + }, + { + "epoch": 4.748576078112286, + "grad_norm": 0.5215888340298842, + "learning_rate": 1.1357829974038703e-05, + "loss": 0.2688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1564989537000656, + "step": 2920, + "valid_targets_mean": 6785.0, + "valid_targets_min": 3423 + }, + { + "epoch": 4.756712774613507, + "grad_norm": 0.5470344569640518, + "learning_rate": 1.1284768634936971e-05, + "loss": 0.2485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.187900573015213, + "step": 2925, + "valid_targets_mean": 6071.8, + "valid_targets_min": 2555 + }, + { + "epoch": 4.764849471114728, + "grad_norm": 0.5184922299861338, + "learning_rate": 1.1211850580214703e-05, + "loss": 0.2424, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10915215313434601, + "step": 2930, + "valid_targets_mean": 5272.6, + "valid_targets_min": 774 + }, + { + "epoch": 4.772986167615948, + "grad_norm": 0.5260960542301046, + "learning_rate": 1.1139077008694712e-05, + "loss": 0.2485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16236786544322968, + "step": 2935, + "valid_targets_mean": 6286.5, + "valid_targets_min": 1737 + }, + { + "epoch": 4.781122864117169, + "grad_norm": 0.5220847375110784, + "learning_rate": 1.1066449116824428e-05, + "loss": 0.2454, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12581384181976318, + "step": 2940, + "valid_targets_mean": 5605.1, + "valid_targets_min": 4296 + }, + { + "epoch": 4.7892595606183885, + "grad_norm": 0.48555117830549954, + "learning_rate": 1.099396809865618e-05, + "loss": 0.2653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10319238156080246, + "step": 2945, + "valid_targets_mean": 5697.5, + "valid_targets_min": 1837 + }, + { + "epoch": 4.797396257119609, + "grad_norm": 0.5192267689139981, + "learning_rate": 1.0921635145827611e-05, + "loss": 0.2416, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1321655809879303, + "step": 2950, + "valid_targets_mean": 6476.0, + "valid_targets_min": 3020 + }, + { + "epoch": 4.80553295362083, + "grad_norm": 0.5076930593639264, + "learning_rate": 1.0849451447542054e-05, + "loss": 0.2495, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11374587565660477, + "step": 2955, + "valid_targets_mean": 6396.4, + "valid_targets_min": 1826 + }, + { + "epoch": 4.81366965012205, + "grad_norm": 0.6351691391627748, + "learning_rate": 1.0777418190549018e-05, + "loss": 0.2512, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2092050015926361, + "step": 2960, + "valid_targets_mean": 4927.0, + "valid_targets_min": 803 + }, + { + "epoch": 4.821806346623271, + "grad_norm": 0.55668226084915, + "learning_rate": 1.070553655912463e-05, + "loss": 0.2685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11320999264717102, + "step": 2965, + "valid_targets_mean": 4125.8, + "valid_targets_min": 1163 + }, + { + "epoch": 4.829943043124492, + "grad_norm": 0.4950231660068158, + "learning_rate": 1.0633807735052202e-05, + "loss": 0.2639, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11982996016740799, + "step": 2970, + "valid_targets_mean": 4853.8, + "valid_targets_min": 2324 + }, + { + "epoch": 4.838079739625712, + "grad_norm": 0.7573897869396419, + "learning_rate": 1.056223289760278e-05, + "loss": 0.2399, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10986106842756271, + "step": 2975, + "valid_targets_mean": 6308.2, + "valid_targets_min": 2719 + }, + { + "epoch": 4.846216436126932, + "grad_norm": 0.5238872739771269, + "learning_rate": 1.0490813223515764e-05, + "loss": 0.2487, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12110009789466858, + "step": 2980, + "valid_targets_mean": 6574.9, + "valid_targets_min": 3849 + }, + { + "epoch": 4.854353132628153, + "grad_norm": 0.6054997927877389, + "learning_rate": 1.0419549886979582e-05, + "loss": 0.2585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14355552196502686, + "step": 2985, + "valid_targets_mean": 4339.4, + "valid_targets_min": 947 + }, + { + "epoch": 4.862489829129373, + "grad_norm": 0.5891493401006247, + "learning_rate": 1.0348444059612338e-05, + "loss": 0.2537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12982431054115295, + "step": 2990, + "valid_targets_mean": 4962.1, + "valid_targets_min": 3174 + }, + { + "epoch": 4.870626525630594, + "grad_norm": 0.49481010220886834, + "learning_rate": 1.0277496910442596e-05, + "loss": 0.2613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11110454797744751, + "step": 2995, + "valid_targets_mean": 5967.2, + "valid_targets_min": 767 + }, + { + "epoch": 4.8787632221318145, + "grad_norm": 0.45375461974264514, + "learning_rate": 1.0206709605890133e-05, + "loss": 0.2626, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1309911608695984, + "step": 3000, + "valid_targets_mean": 5787.9, + "valid_targets_min": 3290 + }, + { + "epoch": 4.886899918633035, + "grad_norm": 0.4717289745698522, + "learning_rate": 1.0136083309746765e-05, + "loss": 0.2623, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18679049611091614, + "step": 3005, + "valid_targets_mean": 8518.8, + "valid_targets_min": 5241 + }, + { + "epoch": 4.895036615134256, + "grad_norm": 0.4826417818565528, + "learning_rate": 1.006561918315724e-05, + "loss": 0.2595, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14399048686027527, + "step": 3010, + "valid_targets_mean": 6302.8, + "valid_targets_min": 4959 + }, + { + "epoch": 4.903173311635476, + "grad_norm": 0.626033134101394, + "learning_rate": 9.995318384600112e-06, + "loss": 0.2513, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13014422357082367, + "step": 3015, + "valid_targets_mean": 6003.1, + "valid_targets_min": 1225 + }, + { + "epoch": 4.911310008136697, + "grad_norm": 0.530085231028136, + "learning_rate": 9.92518206986871e-06, + "loss": 0.2395, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09500178694725037, + "step": 3020, + "valid_targets_mean": 4791.5, + "valid_targets_min": 524 + }, + { + "epoch": 4.919446704637917, + "grad_norm": 0.645773429216433, + "learning_rate": 9.855211392052139e-06, + "loss": 0.2584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1401991993188858, + "step": 3025, + "valid_targets_mean": 4193.1, + "valid_targets_min": 2546 + }, + { + "epoch": 4.927583401139137, + "grad_norm": 0.5337804157823813, + "learning_rate": 9.78540750151632e-06, + "loss": 0.2467, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13427186012268066, + "step": 3030, + "valid_targets_mean": 5285.6, + "valid_targets_min": 1577 + }, + { + "epoch": 4.935720097640358, + "grad_norm": 1.9302688171029854, + "learning_rate": 9.715771545885076e-06, + "loss": 0.2668, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14041340351104736, + "step": 3035, + "valid_targets_mean": 7417.6, + "valid_targets_min": 3274 + }, + { + "epoch": 4.9438567941415785, + "grad_norm": 0.5343935268176057, + "learning_rate": 9.646304670021263e-06, + "loss": 0.2528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1480150669813156, + "step": 3040, + "valid_targets_mean": 6581.8, + "valid_targets_min": 1927 + }, + { + "epoch": 4.951993490642799, + "grad_norm": 0.569875399697325, + "learning_rate": 9.577008016007956e-06, + "loss": 0.2683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17220965027809143, + "step": 3045, + "valid_targets_mean": 5431.8, + "valid_targets_min": 1040 + }, + { + "epoch": 4.96013018714402, + "grad_norm": 0.47006455437543826, + "learning_rate": 9.50788272312966e-06, + "loss": 0.2623, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1282365322113037, + "step": 3050, + "valid_targets_mean": 6441.4, + "valid_targets_min": 427 + }, + { + "epoch": 4.96826688364524, + "grad_norm": 0.5349848814276844, + "learning_rate": 9.43892992785358e-06, + "loss": 0.2595, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12551423907279968, + "step": 3055, + "valid_targets_mean": 5614.6, + "valid_targets_min": 2311 + }, + { + "epoch": 4.97640358014646, + "grad_norm": 0.5428356503095221, + "learning_rate": 9.370150763810966e-06, + "loss": 0.2569, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10237240791320801, + "step": 3060, + "valid_targets_mean": 4330.6, + "valid_targets_min": 2059 + }, + { + "epoch": 4.984540276647681, + "grad_norm": 0.48029744452868756, + "learning_rate": 9.301546361778424e-06, + "loss": 0.2457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13645213842391968, + "step": 3065, + "valid_targets_mean": 8541.2, + "valid_targets_min": 2287 + }, + { + "epoch": 4.992676973148901, + "grad_norm": 0.6660157157795131, + "learning_rate": 9.233117849659367e-06, + "loss": 0.273, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12218661606311798, + "step": 3070, + "valid_targets_mean": 5242.0, + "valid_targets_min": 3400 + }, + { + "epoch": 5.0, + "grad_norm": 0.6496835877212901, + "learning_rate": 9.164866352465447e-06, + "loss": 0.2478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23714704811573029, + "step": 3075, + "valid_targets_mean": 6213.5, + "valid_targets_min": 4269 + }, + { + "epoch": 5.008136696501221, + "grad_norm": 0.5523377269177591, + "learning_rate": 9.096792992298089e-06, + "loss": 0.2474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14559800922870636, + "step": 3080, + "valid_targets_mean": 5338.9, + "valid_targets_min": 1860 + }, + { + "epoch": 5.016273393002441, + "grad_norm": 0.48621978067676924, + "learning_rate": 9.028898888330005e-06, + "loss": 0.2405, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11603275686502457, + "step": 3085, + "valid_targets_mean": 7716.8, + "valid_targets_min": 4160 + }, + { + "epoch": 5.024410089503662, + "grad_norm": 0.4888228209250331, + "learning_rate": 8.961185156786815e-06, + "loss": 0.2584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15556365251541138, + "step": 3090, + "valid_targets_mean": 7306.2, + "valid_targets_min": 1730 + }, + { + "epoch": 5.032546786004882, + "grad_norm": 0.5604592596720499, + "learning_rate": 8.893652910928698e-06, + "loss": 0.236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09606099128723145, + "step": 3095, + "valid_targets_mean": 4868.5, + "valid_targets_min": 867 + }, + { + "epoch": 5.040683482506102, + "grad_norm": 0.5312703952374581, + "learning_rate": 8.826303261032072e-06, + "loss": 0.2596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14954786002635956, + "step": 3100, + "valid_targets_mean": 5175.0, + "valid_targets_min": 3054 + }, + { + "epoch": 5.048820179007323, + "grad_norm": 0.49989048406424097, + "learning_rate": 8.759137314371378e-06, + "loss": 0.245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10760124027729034, + "step": 3105, + "valid_targets_mean": 6618.8, + "valid_targets_min": 2164 + }, + { + "epoch": 5.0569568755085434, + "grad_norm": 0.7886032990488385, + "learning_rate": 8.692156175200823e-06, + "loss": 0.2505, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1214192658662796, + "step": 3110, + "valid_targets_mean": 6027.0, + "valid_targets_min": 2787 + }, + { + "epoch": 5.065093572009764, + "grad_norm": 0.48898341530084205, + "learning_rate": 8.625360944736262e-06, + "loss": 0.2436, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11569772660732269, + "step": 3115, + "valid_targets_mean": 6272.9, + "valid_targets_min": 372 + }, + { + "epoch": 5.073230268510985, + "grad_norm": 0.5956493423805724, + "learning_rate": 8.558752721137089e-06, + "loss": 0.2469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1290283203125, + "step": 3120, + "valid_targets_mean": 5507.6, + "valid_targets_min": 3024 + }, + { + "epoch": 5.081366965012205, + "grad_norm": 0.5660632743523386, + "learning_rate": 8.492332599488157e-06, + "loss": 0.2324, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10170029103755951, + "step": 3125, + "valid_targets_mean": 4242.0, + "valid_targets_min": 541 + }, + { + "epoch": 5.089503661513426, + "grad_norm": 0.5887922517647676, + "learning_rate": 8.42610167178183e-06, + "loss": 0.2397, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15197445452213287, + "step": 3130, + "valid_targets_mean": 5382.2, + "valid_targets_min": 3204 + }, + { + "epoch": 5.097640358014646, + "grad_norm": 0.487011491561986, + "learning_rate": 8.360061026899962e-06, + "loss": 0.2207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12325992435216904, + "step": 3135, + "valid_targets_mean": 8065.1, + "valid_targets_min": 3117 + }, + { + "epoch": 5.105777054515866, + "grad_norm": 0.5182417975398911, + "learning_rate": 8.294211750596035e-06, + "loss": 0.2457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09405682235956192, + "step": 3140, + "valid_targets_mean": 5815.0, + "valid_targets_min": 466 + }, + { + "epoch": 5.113913751017087, + "grad_norm": 0.5322887135274532, + "learning_rate": 8.228554925477306e-06, + "loss": 0.2349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11751969158649445, + "step": 3145, + "valid_targets_mean": 6338.4, + "valid_targets_min": 2067 + }, + { + "epoch": 5.1220504475183075, + "grad_norm": 0.5012197400585588, + "learning_rate": 8.163091630987e-06, + "loss": 0.2327, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16712604463100433, + "step": 3150, + "valid_targets_mean": 6505.2, + "valid_targets_min": 2195 + }, + { + "epoch": 5.130187144019528, + "grad_norm": 0.5774573904412683, + "learning_rate": 8.097822943386563e-06, + "loss": 0.2308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14454790949821472, + "step": 3155, + "valid_targets_mean": 5446.6, + "valid_targets_min": 2004 + }, + { + "epoch": 5.138323840520749, + "grad_norm": 0.5165060277624411, + "learning_rate": 8.03274993573797e-06, + "loss": 0.2424, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14901450276374817, + "step": 3160, + "valid_targets_mean": 6276.4, + "valid_targets_min": 3350 + }, + { + "epoch": 5.146460537021969, + "grad_norm": 0.4804753447195192, + "learning_rate": 7.96787367788609e-06, + "loss": 0.2401, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11782047897577286, + "step": 3165, + "valid_targets_mean": 6550.2, + "valid_targets_min": 1879 + }, + { + "epoch": 5.15459723352319, + "grad_norm": 0.6535091078691466, + "learning_rate": 7.903195236441086e-06, + "loss": 0.2505, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11725549399852753, + "step": 3170, + "valid_targets_mean": 6698.9, + "valid_targets_min": 1744 + }, + { + "epoch": 5.16273393002441, + "grad_norm": 0.4259935959590437, + "learning_rate": 7.838715674760874e-06, + "loss": 0.2298, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0984138548374176, + "step": 3175, + "valid_targets_mean": 7291.5, + "valid_targets_min": 2757 + }, + { + "epoch": 5.17087062652563, + "grad_norm": 0.5362331748478609, + "learning_rate": 7.774436052933675e-06, + "loss": 0.2413, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1301015317440033, + "step": 3180, + "valid_targets_mean": 5733.1, + "valid_targets_min": 2704 + }, + { + "epoch": 5.179007323026851, + "grad_norm": 0.4742304640079011, + "learning_rate": 7.710357427760541e-06, + "loss": 0.2339, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11534355580806732, + "step": 3185, + "valid_targets_mean": 7236.4, + "valid_targets_min": 897 + }, + { + "epoch": 5.187144019528072, + "grad_norm": 0.5344847333949452, + "learning_rate": 7.646480852738008e-06, + "loss": 0.2286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11348524689674377, + "step": 3190, + "valid_targets_mean": 6580.0, + "valid_targets_min": 1862 + }, + { + "epoch": 5.195280716029292, + "grad_norm": 0.5127549344284401, + "learning_rate": 7.5828073780407575e-06, + "loss": 0.2188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12163542211055756, + "step": 3195, + "valid_targets_mean": 7134.1, + "valid_targets_min": 4067 + }, + { + "epoch": 5.203417412530513, + "grad_norm": 0.43286097887389563, + "learning_rate": 7.51933805050439e-06, + "loss": 0.2315, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11376143991947174, + "step": 3200, + "valid_targets_mean": 9556.1, + "valid_targets_min": 3383 + }, + { + "epoch": 5.211554109031733, + "grad_norm": 0.574180003978773, + "learning_rate": 7.45607391360816e-06, + "loss": 0.243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0861620083451271, + "step": 3205, + "valid_targets_mean": 3932.6, + "valid_targets_min": 1533 + }, + { + "epoch": 5.219690805532954, + "grad_norm": 0.6356603119387289, + "learning_rate": 7.393016007457858e-06, + "loss": 0.2561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19337713718414307, + "step": 3210, + "valid_targets_mean": 6120.9, + "valid_targets_min": 3426 + }, + { + "epoch": 5.227827502034174, + "grad_norm": 0.7725829783966397, + "learning_rate": 7.3301653687687005e-06, + "loss": 0.2272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1038031131029129, + "step": 3215, + "valid_targets_mean": 5630.2, + "valid_targets_min": 1724 + }, + { + "epoch": 5.235964198535394, + "grad_norm": 0.6539324695436758, + "learning_rate": 7.2675230308482715e-06, + "loss": 0.2431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12459734082221985, + "step": 3220, + "valid_targets_mean": 7373.1, + "valid_targets_min": 4269 + }, + { + "epoch": 5.244100895036615, + "grad_norm": 0.5030860212842273, + "learning_rate": 7.205090023579575e-06, + "loss": 0.2533, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12467215210199356, + "step": 3225, + "valid_targets_mean": 6925.0, + "valid_targets_min": 2743 + }, + { + "epoch": 5.252237591537836, + "grad_norm": 0.5831950972097935, + "learning_rate": 7.142867373404054e-06, + "loss": 0.2374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.120590440928936, + "step": 3230, + "valid_targets_mean": 5793.6, + "valid_targets_min": 1388 + }, + { + "epoch": 5.260374288039056, + "grad_norm": 0.550231410324282, + "learning_rate": 7.080856103304739e-06, + "loss": 0.237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12391173839569092, + "step": 3235, + "valid_targets_mean": 5339.4, + "valid_targets_min": 3466 + }, + { + "epoch": 5.268510984540277, + "grad_norm": 0.5759308774766279, + "learning_rate": 7.019057232789432e-06, + "loss": 0.2432, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11659073829650879, + "step": 3240, + "valid_targets_mean": 4783.8, + "valid_targets_min": 1721 + }, + { + "epoch": 5.2766476810414975, + "grad_norm": 0.6119869770255949, + "learning_rate": 6.95747177787393e-06, + "loss": 0.2461, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12066857516765594, + "step": 3245, + "valid_targets_mean": 5504.8, + "valid_targets_min": 2771 + }, + { + "epoch": 5.284784377542717, + "grad_norm": 0.4599465517768359, + "learning_rate": 6.896100751065355e-06, + "loss": 0.2296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10617942363023758, + "step": 3250, + "valid_targets_mean": 6689.1, + "valid_targets_min": 4081 + }, + { + "epoch": 5.292921074043938, + "grad_norm": 0.6272025768366799, + "learning_rate": 6.834945161345458e-06, + "loss": 0.2316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12755145132541656, + "step": 3255, + "valid_targets_mean": 5025.0, + "valid_targets_min": 1015 + }, + { + "epoch": 5.3010577705451585, + "grad_norm": 0.6127725285042429, + "learning_rate": 6.7740060141540735e-06, + "loss": 0.2573, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13596072793006897, + "step": 3260, + "valid_targets_mean": 3870.8, + "valid_targets_min": 614 + }, + { + "epoch": 5.309194467046379, + "grad_norm": 0.6393405669196937, + "learning_rate": 6.713284311372559e-06, + "loss": 0.261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13608285784721375, + "step": 3265, + "valid_targets_mean": 3831.1, + "valid_targets_min": 1395 + }, + { + "epoch": 5.3173311635476, + "grad_norm": 0.519005266223286, + "learning_rate": 6.652781051307347e-06, + "loss": 0.26, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09571771323680878, + "step": 3270, + "valid_targets_mean": 5871.8, + "valid_targets_min": 1771 + }, + { + "epoch": 5.32546786004882, + "grad_norm": 0.4801756872745154, + "learning_rate": 6.592497228673515e-06, + "loss": 0.2335, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09945647418498993, + "step": 3275, + "valid_targets_mean": 5828.0, + "valid_targets_min": 3637 + }, + { + "epoch": 5.333604556550041, + "grad_norm": 0.6255645015728781, + "learning_rate": 6.532433834578449e-06, + "loss": 0.2354, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1372753381729126, + "step": 3280, + "valid_targets_mean": 5747.9, + "valid_targets_min": 2363 + }, + { + "epoch": 5.341741253051262, + "grad_norm": 0.5792091744981441, + "learning_rate": 6.472591856505526e-06, + "loss": 0.2479, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12719324231147766, + "step": 3285, + "valid_targets_mean": 5969.4, + "valid_targets_min": 3258 + }, + { + "epoch": 5.349877949552481, + "grad_norm": 0.46470017871854946, + "learning_rate": 6.412972278297893e-06, + "loss": 0.2525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0882813110947609, + "step": 3290, + "valid_targets_mean": 6402.1, + "valid_targets_min": 774 + }, + { + "epoch": 5.358014646053702, + "grad_norm": 0.521223858319776, + "learning_rate": 6.353576080142309e-06, + "loss": 0.2289, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11632373183965683, + "step": 3295, + "valid_targets_mean": 6959.1, + "valid_targets_min": 5246 + }, + { + "epoch": 5.3661513425549225, + "grad_norm": 0.5413214801922716, + "learning_rate": 6.294404238552994e-06, + "loss": 0.2363, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13462717831134796, + "step": 3300, + "valid_targets_mean": 6453.6, + "valid_targets_min": 3165 + }, + { + "epoch": 5.374288039056143, + "grad_norm": 0.5573148190013232, + "learning_rate": 6.235457726355591e-06, + "loss": 0.2378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08801619708538055, + "step": 3305, + "valid_targets_mean": 4524.6, + "valid_targets_min": 903 + }, + { + "epoch": 5.382424735557364, + "grad_norm": 0.606307196821786, + "learning_rate": 6.176737512671182e-06, + "loss": 0.243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13813389837741852, + "step": 3310, + "valid_targets_mean": 4672.2, + "valid_targets_min": 1451 + }, + { + "epoch": 5.390561432058584, + "grad_norm": 0.6132589856336323, + "learning_rate": 6.11824456290034e-06, + "loss": 0.2434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1388852894306183, + "step": 3315, + "valid_targets_mean": 7884.9, + "valid_targets_min": 3424 + }, + { + "epoch": 5.398698128559805, + "grad_norm": 0.5924080709083809, + "learning_rate": 6.05997983870727e-06, + "loss": 0.2343, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17288537323474884, + "step": 3320, + "valid_targets_mean": 4974.5, + "valid_targets_min": 1145 + }, + { + "epoch": 5.406834825061026, + "grad_norm": 0.5457228907298186, + "learning_rate": 6.00194429800399e-06, + "loss": 0.2486, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13619005680084229, + "step": 3325, + "valid_targets_mean": 5901.8, + "valid_targets_min": 1958 + }, + { + "epoch": 5.414971521562245, + "grad_norm": 0.5079754475219553, + "learning_rate": 5.944138894934582e-06, + "loss": 0.2362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13231110572814941, + "step": 3330, + "valid_targets_mean": 4886.6, + "valid_targets_min": 1461 + }, + { + "epoch": 5.423108218063466, + "grad_norm": 0.5492790566828493, + "learning_rate": 5.886564579859504e-06, + "loss": 0.2228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10987076908349991, + "step": 3335, + "valid_targets_mean": 4042.6, + "valid_targets_min": 397 + }, + { + "epoch": 5.431244914564687, + "grad_norm": 0.5765503149421513, + "learning_rate": 5.829222299339969e-06, + "loss": 0.2256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09734175354242325, + "step": 3340, + "valid_targets_mean": 4017.5, + "valid_targets_min": 687 + }, + { + "epoch": 5.439381611065907, + "grad_norm": 0.4945201441893148, + "learning_rate": 5.772112996122403e-06, + "loss": 0.2588, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12293386459350586, + "step": 3345, + "valid_targets_mean": 6276.8, + "valid_targets_min": 1669 + }, + { + "epoch": 5.447518307567128, + "grad_norm": 0.5241675442790266, + "learning_rate": 5.715237609122896e-06, + "loss": 0.2226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13944748044013977, + "step": 3350, + "valid_targets_mean": 6314.0, + "valid_targets_min": 4246 + }, + { + "epoch": 5.4556550040683485, + "grad_norm": 0.5086698627055527, + "learning_rate": 5.658597073411816e-06, + "loss": 0.2394, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10946042835712433, + "step": 3355, + "valid_targets_mean": 4947.4, + "valid_targets_min": 803 + }, + { + "epoch": 5.463791700569569, + "grad_norm": 0.5087935856854946, + "learning_rate": 5.602192320198401e-06, + "loss": 0.2447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11571265012025833, + "step": 3360, + "valid_targets_mean": 4712.5, + "valid_targets_min": 768 + }, + { + "epoch": 5.471928397070789, + "grad_norm": 0.6209480221847946, + "learning_rate": 5.546024276815467e-06, + "loss": 0.2363, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12471382319927216, + "step": 3365, + "valid_targets_mean": 6229.0, + "valid_targets_min": 2774 + }, + { + "epoch": 5.480065093572009, + "grad_norm": 0.5740469197183198, + "learning_rate": 5.490093866704171e-06, + "loss": 0.2464, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15511798858642578, + "step": 3370, + "valid_targets_mean": 5492.1, + "valid_targets_min": 2163 + }, + { + "epoch": 5.48820179007323, + "grad_norm": 0.5005163631534619, + "learning_rate": 5.434402009398798e-06, + "loss": 0.2506, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10742124915122986, + "step": 3375, + "valid_targets_mean": 5065.9, + "valid_targets_min": 1052 + }, + { + "epoch": 5.496338486574451, + "grad_norm": 0.5671784567412589, + "learning_rate": 5.378949620511671e-06, + "loss": 0.2348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11315690726041794, + "step": 3380, + "valid_targets_mean": 3717.4, + "valid_targets_min": 1470 + }, + { + "epoch": 5.504475183075671, + "grad_norm": 0.4360799056624353, + "learning_rate": 5.3237376117180854e-06, + "loss": 0.2393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0977911725640297, + "step": 3385, + "valid_targets_mean": 6910.8, + "valid_targets_min": 3082 + }, + { + "epoch": 5.512611879576892, + "grad_norm": 0.5142900343224198, + "learning_rate": 5.268766890741315e-06, + "loss": 0.2582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13378457725048065, + "step": 3390, + "valid_targets_mean": 6389.2, + "valid_targets_min": 3445 + }, + { + "epoch": 5.5207485760781125, + "grad_norm": 0.5602434630005119, + "learning_rate": 5.214038361337719e-06, + "loss": 0.2594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10973970592021942, + "step": 3395, + "valid_targets_mean": 4815.2, + "valid_targets_min": 1484 + }, + { + "epoch": 5.528885272579333, + "grad_norm": 0.5342812643462996, + "learning_rate": 5.159552923281841e-06, + "loss": 0.259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0758785754442215, + "step": 3400, + "valid_targets_mean": 4529.4, + "valid_targets_min": 2360 + }, + { + "epoch": 5.537021969080554, + "grad_norm": 0.5716216749186573, + "learning_rate": 5.105311472351639e-06, + "loss": 0.2308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.121833935379982, + "step": 3405, + "valid_targets_mean": 5706.9, + "valid_targets_min": 4080 + }, + { + "epoch": 5.5451586655817735, + "grad_norm": 0.43346386249748464, + "learning_rate": 5.051314900313764e-06, + "loss": 0.2351, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07879453897476196, + "step": 3410, + "valid_targets_mean": 5555.4, + "valid_targets_min": 3770 + }, + { + "epoch": 5.553295362082994, + "grad_norm": 0.49340156082331843, + "learning_rate": 4.997564094908878e-06, + "loss": 0.2508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12145587801933289, + "step": 3415, + "valid_targets_mean": 6155.6, + "valid_targets_min": 2387 + }, + { + "epoch": 5.561432058584215, + "grad_norm": 0.47890012005963234, + "learning_rate": 4.944059939837082e-06, + "loss": 0.2475, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12839338183403015, + "step": 3420, + "valid_targets_mean": 6536.0, + "valid_targets_min": 1825 + }, + { + "epoch": 5.569568755085435, + "grad_norm": 0.4903125361830746, + "learning_rate": 4.890803314743371e-06, + "loss": 0.255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11269734799861908, + "step": 3425, + "valid_targets_mean": 5621.2, + "valid_targets_min": 3511 + }, + { + "epoch": 5.577705451586656, + "grad_norm": 0.5283918720724791, + "learning_rate": 4.837795095203175e-06, + "loss": 0.2306, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10433046519756317, + "step": 3430, + "valid_targets_mean": 5621.9, + "valid_targets_min": 2251 + }, + { + "epoch": 5.585842148087877, + "grad_norm": 0.6026011579728354, + "learning_rate": 4.785036152707969e-06, + "loss": 0.2434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12681570649147034, + "step": 3435, + "valid_targets_mean": 4668.2, + "valid_targets_min": 2544 + }, + { + "epoch": 5.593978844589097, + "grad_norm": 0.6061374193583238, + "learning_rate": 4.732527354650951e-06, + "loss": 0.2432, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13143488764762878, + "step": 3440, + "valid_targets_mean": 4703.0, + "valid_targets_min": 1974 + }, + { + "epoch": 5.602115541090317, + "grad_norm": 0.5221796158770693, + "learning_rate": 4.68026956431276e-06, + "loss": 0.2277, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.149415522813797, + "step": 3445, + "valid_targets_mean": 6412.0, + "valid_targets_min": 4008 + }, + { + "epoch": 5.610252237591538, + "grad_norm": 0.576901307893142, + "learning_rate": 4.628263640847304e-06, + "loss": 0.2455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12277019023895264, + "step": 3450, + "valid_targets_mean": 5465.6, + "valid_targets_min": 3759 + }, + { + "epoch": 5.618388934092758, + "grad_norm": 0.5641514509002485, + "learning_rate": 4.5765104392676205e-06, + "loss": 0.2356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13894778490066528, + "step": 3455, + "valid_targets_mean": 5405.1, + "valid_targets_min": 1513 + }, + { + "epoch": 5.626525630593979, + "grad_norm": 0.7340535896041918, + "learning_rate": 4.525010810431825e-06, + "loss": 0.2406, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13835415244102478, + "step": 3460, + "valid_targets_mean": 3785.0, + "valid_targets_min": 2039 + }, + { + "epoch": 5.634662327095199, + "grad_norm": 0.5034917359869234, + "learning_rate": 4.4737656010291366e-06, + "loss": 0.2356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1569593846797943, + "step": 3465, + "valid_targets_mean": 8556.2, + "valid_targets_min": 4927 + }, + { + "epoch": 5.64279902359642, + "grad_norm": 0.6255736017643779, + "learning_rate": 4.422775653565934e-06, + "loss": 0.2356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10940764844417572, + "step": 3470, + "valid_targets_mean": 4768.1, + "valid_targets_min": 1338 + }, + { + "epoch": 5.650935720097641, + "grad_norm": 0.46222130787565185, + "learning_rate": 4.372041806351914e-06, + "loss": 0.2516, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12370267510414124, + "step": 3475, + "valid_targets_mean": 6039.4, + "valid_targets_min": 2221 + }, + { + "epoch": 5.65907241659886, + "grad_norm": 0.7151061983633159, + "learning_rate": 4.321564893486312e-06, + "loss": 0.2519, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10197927057743073, + "step": 3480, + "valid_targets_mean": 5656.9, + "valid_targets_min": 3191 + }, + { + "epoch": 5.667209113100081, + "grad_norm": 0.4612178221303647, + "learning_rate": 4.271345744844182e-06, + "loss": 0.2125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07878731191158295, + "step": 3485, + "valid_targets_mean": 5026.6, + "valid_targets_min": 781 + }, + { + "epoch": 5.675345809601302, + "grad_norm": 0.5034775896468006, + "learning_rate": 4.2213851860627696e-06, + "loss": 0.2415, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11451518535614014, + "step": 3490, + "valid_targets_mean": 5065.0, + "valid_targets_min": 3912 + }, + { + "epoch": 5.683482506102522, + "grad_norm": 0.48370829828670214, + "learning_rate": 4.171684038527914e-06, + "loss": 0.2475, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1177678257226944, + "step": 3495, + "valid_targets_mean": 5450.1, + "valid_targets_min": 2203 + }, + { + "epoch": 5.691619202603743, + "grad_norm": 0.4915476071311832, + "learning_rate": 4.12224311936056e-06, + "loss": 0.2312, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11996840685606003, + "step": 3500, + "valid_targets_mean": 7179.9, + "valid_targets_min": 4365 + }, + { + "epoch": 5.6997558991049635, + "grad_norm": 0.5337016634168994, + "learning_rate": 4.073063241403316e-06, + "loss": 0.2425, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12122653424739838, + "step": 3505, + "valid_targets_mean": 5070.0, + "valid_targets_min": 784 + }, + { + "epoch": 5.707892595606184, + "grad_norm": 0.5853690423707902, + "learning_rate": 4.024145213207103e-06, + "loss": 0.2474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10927228629589081, + "step": 3510, + "valid_targets_mean": 4184.2, + "valid_targets_min": 867 + }, + { + "epoch": 5.716029292107405, + "grad_norm": 0.5353891362994679, + "learning_rate": 3.975489839017846e-06, + "loss": 0.2312, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12216495722532272, + "step": 3515, + "valid_targets_mean": 5225.1, + "valid_targets_min": 949 + }, + { + "epoch": 5.724165988608625, + "grad_norm": 0.9481430336242733, + "learning_rate": 3.9270979187632516e-06, + "loss": 0.2422, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12332406640052795, + "step": 3520, + "valid_targets_mean": 5509.9, + "valid_targets_min": 2682 + }, + { + "epoch": 5.732302685109845, + "grad_norm": 0.47893060639696544, + "learning_rate": 3.878970248039678e-06, + "loss": 0.2557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1404319554567337, + "step": 3525, + "valid_targets_mean": 7358.9, + "valid_targets_min": 3809 + }, + { + "epoch": 5.740439381611066, + "grad_norm": 0.509875821596104, + "learning_rate": 3.831107618099026e-06, + "loss": 0.2331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19203639030456543, + "step": 3530, + "valid_targets_mean": 6786.8, + "valid_targets_min": 2958 + }, + { + "epoch": 5.748576078112286, + "grad_norm": 0.47911534628300395, + "learning_rate": 3.7835108158357537e-06, + "loss": 0.2331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12184299528598785, + "step": 3535, + "valid_targets_mean": 7290.6, + "valid_targets_min": 3489 + }, + { + "epoch": 5.756712774613507, + "grad_norm": 0.45486474848191916, + "learning_rate": 3.7361806237739264e-06, + "loss": 0.2317, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08215008676052094, + "step": 3540, + "valid_targets_mean": 5637.9, + "valid_targets_min": 1196 + }, + { + "epoch": 5.764849471114728, + "grad_norm": 0.5210595475392571, + "learning_rate": 3.689117820054351e-06, + "loss": 0.2434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08964404463768005, + "step": 3545, + "valid_targets_mean": 5758.0, + "valid_targets_min": 2964 + }, + { + "epoch": 5.772986167615948, + "grad_norm": 0.5267608109326574, + "learning_rate": 3.6423231784217918e-06, + "loss": 0.2421, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14031247794628143, + "step": 3550, + "valid_targets_mean": 6369.0, + "valid_targets_min": 1512 + }, + { + "epoch": 5.781122864117169, + "grad_norm": 0.5326050285350579, + "learning_rate": 3.595797468212241e-06, + "loss": 0.2419, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10683193802833557, + "step": 3555, + "valid_targets_mean": 4920.4, + "valid_targets_min": 806 + }, + { + "epoch": 5.7892595606183885, + "grad_norm": 0.47240098718567736, + "learning_rate": 3.549541454340284e-06, + "loss": 0.2249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07187937200069427, + "step": 3560, + "valid_targets_mean": 4559.9, + "valid_targets_min": 556 + }, + { + "epoch": 5.797396257119609, + "grad_norm": 0.5523118981586366, + "learning_rate": 3.503555897286499e-06, + "loss": 0.2582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1383015513420105, + "step": 3565, + "valid_targets_mean": 6232.8, + "valid_targets_min": 3199 + }, + { + "epoch": 5.80553295362083, + "grad_norm": 0.47382486470602847, + "learning_rate": 3.4578415530849794e-06, + "loss": 0.2435, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10226757824420929, + "step": 3570, + "valid_targets_mean": 6907.6, + "valid_targets_min": 2369 + }, + { + "epoch": 5.81366965012205, + "grad_norm": 0.559498744815589, + "learning_rate": 3.4123991733108852e-06, + "loss": 0.248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11900554597377777, + "step": 3575, + "valid_targets_mean": 4755.1, + "valid_targets_min": 1019 + }, + { + "epoch": 5.821806346623271, + "grad_norm": 0.5114120203304382, + "learning_rate": 3.3672295050680946e-06, + "loss": 0.2333, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14657077193260193, + "step": 3580, + "valid_targets_mean": 6173.2, + "valid_targets_min": 3078 + }, + { + "epoch": 5.829943043124492, + "grad_norm": 0.9817994961771758, + "learning_rate": 3.322333290976936e-06, + "loss": 0.2523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13037019968032837, + "step": 3585, + "valid_targets_mean": 4806.9, + "valid_targets_min": 948 + }, + { + "epoch": 5.838079739625712, + "grad_norm": 0.49960877842083473, + "learning_rate": 3.2777112691619473e-06, + "loss": 0.2481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11504438519477844, + "step": 3590, + "valid_targets_mean": 5366.2, + "valid_targets_min": 3196 + }, + { + "epoch": 5.846216436126932, + "grad_norm": 0.5060382703438006, + "learning_rate": 3.233364173239766e-06, + "loss": 0.234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11748774349689484, + "step": 3595, + "valid_targets_mean": 5511.2, + "valid_targets_min": 2098 + }, + { + "epoch": 5.854353132628153, + "grad_norm": 0.5646071765030719, + "learning_rate": 3.189292732307052e-06, + "loss": 0.2311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11946381628513336, + "step": 3600, + "valid_targets_mean": 5497.6, + "valid_targets_min": 3401 + }, + { + "epoch": 5.862489829129373, + "grad_norm": 1.9085216141047994, + "learning_rate": 3.1454976709285124e-06, + "loss": 0.2563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12498481571674347, + "step": 3605, + "valid_targets_mean": 5483.4, + "valid_targets_min": 756 + }, + { + "epoch": 5.870626525630594, + "grad_norm": 0.6119847782021685, + "learning_rate": 3.1019797091249938e-06, + "loss": 0.2352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1357029676437378, + "step": 3610, + "valid_targets_mean": 4750.2, + "valid_targets_min": 1614 + }, + { + "epoch": 5.8787632221318145, + "grad_norm": 0.5952452103733551, + "learning_rate": 3.058739562361621e-06, + "loss": 0.245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12497454881668091, + "step": 3615, + "valid_targets_mean": 4917.1, + "valid_targets_min": 3716 + }, + { + "epoch": 5.886899918633035, + "grad_norm": 0.5378260436755556, + "learning_rate": 3.015777941536058e-06, + "loss": 0.2383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10903813689947128, + "step": 3620, + "valid_targets_mean": 4843.9, + "valid_targets_min": 2199 + }, + { + "epoch": 5.895036615134256, + "grad_norm": 0.5888457289118504, + "learning_rate": 2.973095552966805e-06, + "loss": 0.2584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13942526280879974, + "step": 3625, + "valid_targets_mean": 5965.9, + "valid_targets_min": 3408 + }, + { + "epoch": 5.903173311635476, + "grad_norm": 0.5111041219072542, + "learning_rate": 2.9306930983816005e-06, + "loss": 0.2357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15496578812599182, + "step": 3630, + "valid_targets_mean": 6038.0, + "valid_targets_min": 1693 + }, + { + "epoch": 5.911310008136697, + "grad_norm": 0.5794364971608068, + "learning_rate": 2.8885712749058737e-06, + "loss": 0.2496, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1413809061050415, + "step": 3635, + "valid_targets_mean": 5439.6, + "valid_targets_min": 1362 + }, + { + "epoch": 5.919446704637917, + "grad_norm": 0.498196696749723, + "learning_rate": 2.8467307750512808e-06, + "loss": 0.2649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14829573035240173, + "step": 3640, + "valid_targets_mean": 7339.1, + "valid_targets_min": 1391 + }, + { + "epoch": 5.927583401139137, + "grad_norm": 0.5485087375601867, + "learning_rate": 2.80517228670433e-06, + "loss": 0.2395, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12239962071180344, + "step": 3645, + "valid_targets_mean": 5950.1, + "valid_targets_min": 1544 + }, + { + "epoch": 5.935720097640358, + "grad_norm": 0.9852713752535961, + "learning_rate": 2.7638964931150637e-06, + "loss": 0.258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13210991024971008, + "step": 3650, + "valid_targets_mean": 5893.9, + "valid_targets_min": 1385 + }, + { + "epoch": 5.9438567941415785, + "grad_norm": 0.5176163129021438, + "learning_rate": 2.7229040728858323e-06, + "loss": 0.2477, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10251633077859879, + "step": 3655, + "valid_targets_mean": 5866.8, + "valid_targets_min": 2058 + }, + { + "epoch": 5.951993490642799, + "grad_norm": 0.553870107905142, + "learning_rate": 2.6821956999601306e-06, + "loss": 0.2427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11761283129453659, + "step": 3660, + "valid_targets_mean": 4549.0, + "valid_targets_min": 504 + }, + { + "epoch": 5.96013018714402, + "grad_norm": 0.5622284508277662, + "learning_rate": 2.641772043611521e-06, + "loss": 0.2462, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11832987517118454, + "step": 3665, + "valid_targets_mean": 5334.1, + "valid_targets_min": 1997 + }, + { + "epoch": 5.96826688364524, + "grad_norm": 0.6002412417162024, + "learning_rate": 2.6016337684326342e-06, + "loss": 0.2257, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1252046376466751, + "step": 3670, + "valid_targets_mean": 5856.5, + "valid_targets_min": 670 + }, + { + "epoch": 5.97640358014646, + "grad_norm": 0.5820259812808503, + "learning_rate": 2.5617815343242327e-06, + "loss": 0.2502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15230754017829895, + "step": 3675, + "valid_targets_mean": 5588.6, + "valid_targets_min": 1374 + }, + { + "epoch": 5.984540276647681, + "grad_norm": 0.5006926676913205, + "learning_rate": 2.522215996484374e-06, + "loss": 0.2351, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10319880396127701, + "step": 3680, + "valid_targets_mean": 5475.5, + "valid_targets_min": 2095 + }, + { + "epoch": 5.992676973148901, + "grad_norm": 0.4818010996292355, + "learning_rate": 2.4829378053976318e-06, + "loss": 0.226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12096649408340454, + "step": 3685, + "valid_targets_mean": 6174.9, + "valid_targets_min": 3864 + }, + { + "epoch": 6.0, + "grad_norm": 0.7286610952593501, + "learning_rate": 2.4439476068243927e-06, + "loss": 0.2288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2138330638408661, + "step": 3690, + "valid_targets_mean": 5464.5, + "valid_targets_min": 1291 + }, + { + "epoch": 6.008136696501221, + "grad_norm": 0.5858367728890088, + "learning_rate": 2.4052460417902613e-06, + "loss": 0.2364, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14344994723796844, + "step": 3695, + "valid_targets_mean": 5086.1, + "valid_targets_min": 1582 + }, + { + "epoch": 6.016273393002441, + "grad_norm": 0.5202481454299519, + "learning_rate": 2.3668337465754985e-06, + "loss": 0.2395, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10779225826263428, + "step": 3700, + "valid_targets_mean": 6387.1, + "valid_targets_min": 2931 + }, + { + "epoch": 6.024410089503662, + "grad_norm": 0.45889546479866966, + "learning_rate": 2.3287113527045823e-06, + "loss": 0.228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1384791135787964, + "step": 3705, + "valid_targets_mean": 8278.0, + "valid_targets_min": 3366 + }, + { + "epoch": 6.032546786004882, + "grad_norm": 0.5089628731085755, + "learning_rate": 2.2908794869358044e-06, + "loss": 0.2524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15829752385616302, + "step": 3710, + "valid_targets_mean": 6261.4, + "valid_targets_min": 2457 + }, + { + "epoch": 6.040683482506102, + "grad_norm": 0.5462143117486035, + "learning_rate": 2.253338771250977e-06, + "loss": 0.2276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08127269148826599, + "step": 3715, + "valid_targets_mean": 3890.1, + "valid_targets_min": 1056 + }, + { + "epoch": 6.048820179007323, + "grad_norm": 0.5707430580005115, + "learning_rate": 2.216089822845211e-06, + "loss": 0.2298, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09576968103647232, + "step": 3720, + "valid_targets_mean": 4583.1, + "valid_targets_min": 606 + }, + { + "epoch": 6.0569568755085434, + "grad_norm": 0.4608492749702116, + "learning_rate": 2.1791332541167497e-06, + "loss": 0.2159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0854940190911293, + "step": 3725, + "valid_targets_mean": 5545.5, + "valid_targets_min": 1019 + }, + { + "epoch": 6.065093572009764, + "grad_norm": 0.5002751592382064, + "learning_rate": 2.142469672656935e-06, + "loss": 0.2319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10318467766046524, + "step": 3730, + "valid_targets_mean": 6957.6, + "valid_targets_min": 3822 + }, + { + "epoch": 6.073230268510985, + "grad_norm": 0.570462395210286, + "learning_rate": 2.106099681240179e-06, + "loss": 0.2221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09837909042835236, + "step": 3735, + "valid_targets_mean": 4815.5, + "valid_targets_min": 1144 + }, + { + "epoch": 6.081366965012205, + "grad_norm": 0.5408066637162211, + "learning_rate": 2.07002387781408e-06, + "loss": 0.2275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15398940443992615, + "step": 3740, + "valid_targets_mean": 5805.0, + "valid_targets_min": 3641 + }, + { + "epoch": 6.089503661513426, + "grad_norm": 0.6160653619825817, + "learning_rate": 2.0342428554895788e-06, + "loss": 0.2604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10584094375371933, + "step": 3745, + "valid_targets_mean": 4334.1, + "valid_targets_min": 767 + }, + { + "epoch": 6.097640358014646, + "grad_norm": 0.48826990351762456, + "learning_rate": 1.998757202531223e-06, + "loss": 0.2392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09239885210990906, + "step": 3750, + "valid_targets_mean": 5868.6, + "valid_targets_min": 391 + }, + { + "epoch": 6.105777054515866, + "grad_norm": 0.6379627065382744, + "learning_rate": 1.9635675023474764e-06, + "loss": 0.2399, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11344724893569946, + "step": 3755, + "valid_targets_mean": 5121.8, + "valid_targets_min": 2171 + }, + { + "epoch": 6.113913751017087, + "grad_norm": 0.5446797282952438, + "learning_rate": 1.92867433348114e-06, + "loss": 0.2327, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09264282137155533, + "step": 3760, + "valid_targets_mean": 4778.0, + "valid_targets_min": 1570 + }, + { + "epoch": 6.1220504475183075, + "grad_norm": 0.626815073545696, + "learning_rate": 1.8940782695998305e-06, + "loss": 0.241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10249383747577667, + "step": 3765, + "valid_targets_mean": 5221.0, + "valid_targets_min": 3398 + }, + { + "epoch": 6.130187144019528, + "grad_norm": 0.5214124493423383, + "learning_rate": 1.859779879486565e-06, + "loss": 0.2405, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13012221455574036, + "step": 3770, + "valid_targets_mean": 6414.4, + "valid_targets_min": 947 + }, + { + "epoch": 6.138323840520749, + "grad_norm": 0.4894257977488121, + "learning_rate": 1.8257797270303924e-06, + "loss": 0.2386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08547305315732956, + "step": 3775, + "valid_targets_mean": 5550.0, + "valid_targets_min": 1010 + }, + { + "epoch": 6.146460537021969, + "grad_norm": 0.4914282914782947, + "learning_rate": 1.792078371217132e-06, + "loss": 0.226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1106870174407959, + "step": 3780, + "valid_targets_mean": 6550.1, + "valid_targets_min": 2920 + }, + { + "epoch": 6.15459723352319, + "grad_norm": 0.5426971061042444, + "learning_rate": 1.7586763661201821e-06, + "loss": 0.2432, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0976264476776123, + "step": 3785, + "valid_targets_mean": 5254.1, + "valid_targets_min": 1979 + }, + { + "epoch": 6.16273393002441, + "grad_norm": 0.5986618193157306, + "learning_rate": 1.7255742608914095e-06, + "loss": 0.2421, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14822298288345337, + "step": 3790, + "valid_targets_mean": 5250.0, + "valid_targets_min": 1139 + }, + { + "epoch": 6.17087062652563, + "grad_norm": 0.5924847870251507, + "learning_rate": 1.6927725997521171e-06, + "loss": 0.2428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11596965044736862, + "step": 3795, + "valid_targets_mean": 5393.9, + "valid_targets_min": 3392 + }, + { + "epoch": 6.179007323026851, + "grad_norm": 0.5014703652224826, + "learning_rate": 1.6602719219841135e-06, + "loss": 0.2281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09786193817853928, + "step": 3800, + "valid_targets_mean": 7391.9, + "valid_targets_min": 3280 + }, + { + "epoch": 6.187144019528072, + "grad_norm": 0.5273515456482646, + "learning_rate": 1.6280727619208202e-06, + "loss": 0.2386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12879054248332977, + "step": 3805, + "valid_targets_mean": 5833.8, + "valid_targets_min": 1693 + }, + { + "epoch": 6.195280716029292, + "grad_norm": 0.5379631945457531, + "learning_rate": 1.5961756489385117e-06, + "loss": 0.2508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13405945897102356, + "step": 3810, + "valid_targets_mean": 6324.8, + "valid_targets_min": 3480 + }, + { + "epoch": 6.203417412530513, + "grad_norm": 0.5506482251419262, + "learning_rate": 1.5645811074475915e-06, + "loss": 0.2417, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1008005291223526, + "step": 3815, + "valid_targets_mean": 5211.1, + "valid_targets_min": 2095 + }, + { + "epoch": 6.211554109031733, + "grad_norm": 0.5525468218833477, + "learning_rate": 1.533289656883985e-06, + "loss": 0.25, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12161102890968323, + "step": 3820, + "valid_targets_mean": 5366.6, + "valid_targets_min": 1201 + }, + { + "epoch": 6.219690805532954, + "grad_norm": 1.3205892043820247, + "learning_rate": 1.5023018117005995e-06, + "loss": 0.2254, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13536399602890015, + "step": 3825, + "valid_targets_mean": 5411.5, + "valid_targets_min": 976 + }, + { + "epoch": 6.227827502034174, + "grad_norm": 0.6399965745417111, + "learning_rate": 1.4716180813588566e-06, + "loss": 0.2439, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14973969757556915, + "step": 3830, + "valid_targets_mean": 5368.0, + "valid_targets_min": 3462 + }, + { + "epoch": 6.235964198535394, + "grad_norm": 0.5609556474306848, + "learning_rate": 1.44123897032032e-06, + "loss": 0.2327, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13443996012210846, + "step": 3835, + "valid_targets_mean": 5981.2, + "valid_targets_min": 3215 + }, + { + "epoch": 6.244100895036615, + "grad_norm": 0.6385228076191484, + "learning_rate": 1.411164978038404e-06, + "loss": 0.2434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1237209141254425, + "step": 3840, + "valid_targets_mean": 4783.6, + "valid_targets_min": 1112 + }, + { + "epoch": 6.252237591537836, + "grad_norm": 0.6555481944926224, + "learning_rate": 1.3813965989501687e-06, + "loss": 0.2284, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10206277668476105, + "step": 3845, + "valid_targets_mean": 6029.4, + "valid_targets_min": 1648 + }, + { + "epoch": 6.260374288039056, + "grad_norm": 0.5440883938396228, + "learning_rate": 1.3519343224681758e-06, + "loss": 0.2357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13689102232456207, + "step": 3850, + "valid_targets_mean": 5527.1, + "valid_targets_min": 2357 + }, + { + "epoch": 6.268510984540277, + "grad_norm": 0.5738449854156686, + "learning_rate": 1.3227786329724479e-06, + "loss": 0.2403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13260340690612793, + "step": 3855, + "valid_targets_mean": 4806.5, + "valid_targets_min": 1773 + }, + { + "epoch": 6.2766476810414975, + "grad_norm": 0.5551324266480604, + "learning_rate": 1.2939300098025177e-06, + "loss": 0.2333, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14535200595855713, + "step": 3860, + "valid_targets_mean": 5923.5, + "valid_targets_min": 919 + }, + { + "epoch": 6.284784377542717, + "grad_norm": 0.6073163759364055, + "learning_rate": 1.2653889272495223e-06, + "loss": 0.2492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10163906216621399, + "step": 3865, + "valid_targets_mean": 4454.5, + "valid_targets_min": 427 + }, + { + "epoch": 6.292921074043938, + "grad_norm": 0.6023482154559401, + "learning_rate": 1.2371558545484375e-06, + "loss": 0.2533, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13912144303321838, + "step": 3870, + "valid_targets_mean": 4576.5, + "valid_targets_min": 2053 + }, + { + "epoch": 6.3010577705451585, + "grad_norm": 0.4614699781457518, + "learning_rate": 1.2092312558703333e-06, + "loss": 0.2353, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09365392476320267, + "step": 3875, + "valid_targets_mean": 5213.4, + "valid_targets_min": 2221 + }, + { + "epoch": 6.309194467046379, + "grad_norm": 0.5158867835539471, + "learning_rate": 1.181615590314762e-06, + "loss": 0.2338, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12160620093345642, + "step": 3880, + "valid_targets_mean": 6346.5, + "valid_targets_min": 3619 + }, + { + "epoch": 6.3173311635476, + "grad_norm": 2.2609058090585687, + "learning_rate": 1.1543093119021976e-06, + "loss": 0.2232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13036061823368073, + "step": 3885, + "valid_targets_mean": 4674.4, + "valid_targets_min": 1355 + }, + { + "epoch": 6.32546786004882, + "grad_norm": 0.6835770950079834, + "learning_rate": 1.1273128695665814e-06, + "loss": 0.2398, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16928444802761078, + "step": 3890, + "valid_targets_mean": 5820.9, + "valid_targets_min": 1498 + }, + { + "epoch": 6.333604556550041, + "grad_norm": 0.5664875984145479, + "learning_rate": 1.1006267071479359e-06, + "loss": 0.2381, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10270114988088608, + "step": 3895, + "valid_targets_mean": 5303.8, + "valid_targets_min": 1183 + }, + { + "epoch": 6.341741253051262, + "grad_norm": 0.4578194482636973, + "learning_rate": 1.074251263385071e-06, + "loss": 0.2391, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08202557265758514, + "step": 3900, + "valid_targets_mean": 6897.5, + "valid_targets_min": 2742 + }, + { + "epoch": 6.349877949552481, + "grad_norm": 0.5720561944484039, + "learning_rate": 1.0481869719083647e-06, + "loss": 0.2629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12599898874759674, + "step": 3905, + "valid_targets_mean": 4984.6, + "valid_targets_min": 1067 + }, + { + "epoch": 6.358014646053702, + "grad_norm": 0.5346739530338187, + "learning_rate": 1.022434261232641e-06, + "loss": 0.2399, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18678709864616394, + "step": 3910, + "valid_targets_mean": 7899.0, + "valid_targets_min": 2439 + }, + { + "epoch": 6.3661513425549225, + "grad_norm": 0.5245267855591524, + "learning_rate": 9.969935547501208e-07, + "loss": 0.2414, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1010551005601883, + "step": 3915, + "valid_targets_mean": 5726.0, + "valid_targets_min": 2426 + }, + { + "epoch": 6.374288039056143, + "grad_norm": 0.577988469761867, + "learning_rate": 9.718652707234667e-07, + "loss": 0.2296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15215085446834564, + "step": 3920, + "valid_targets_mean": 6691.8, + "valid_targets_min": 741 + }, + { + "epoch": 6.382424735557364, + "grad_norm": 0.5313322523199454, + "learning_rate": 9.47049822278896e-07, + "loss": 0.2479, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1224609836935997, + "step": 3925, + "valid_targets_mean": 7647.4, + "valid_targets_min": 3864 + }, + { + "epoch": 6.390561432058584, + "grad_norm": 0.5506328179742512, + "learning_rate": 9.225476173993941e-07, + "loss": 0.2332, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11494912952184677, + "step": 3930, + "valid_targets_mean": 5041.4, + "valid_targets_min": 1844 + }, + { + "epoch": 6.398698128559805, + "grad_norm": 0.6221746878029165, + "learning_rate": 8.983590589180125e-07, + "loss": 0.2382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13699518144130707, + "step": 3935, + "valid_targets_mean": 4982.8, + "valid_targets_min": 596 + }, + { + "epoch": 6.406834825061026, + "grad_norm": 0.5322907918953855, + "learning_rate": 8.744845445112337e-07, + "loss": 0.2373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14416161179542542, + "step": 3940, + "valid_targets_mean": 6786.8, + "valid_targets_min": 3528 + }, + { + "epoch": 6.414971521562245, + "grad_norm": 0.6032735858410141, + "learning_rate": 8.50924466692451e-07, + "loss": 0.2294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12138237059116364, + "step": 3945, + "valid_targets_mean": 5486.4, + "valid_targets_min": 1091 + }, + { + "epoch": 6.423108218063466, + "grad_norm": 0.6020791332482403, + "learning_rate": 8.276792128054967e-07, + "loss": 0.2359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11796577274799347, + "step": 3950, + "valid_targets_mean": 4668.9, + "valid_targets_min": 2761 + }, + { + "epoch": 6.431244914564687, + "grad_norm": 0.550955973668879, + "learning_rate": 8.047491650182815e-07, + "loss": 0.2229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14780160784721375, + "step": 3955, + "valid_targets_mean": 5403.9, + "valid_targets_min": 1514 + }, + { + "epoch": 6.439381611065907, + "grad_norm": 0.5899929467979808, + "learning_rate": 7.821347003165125e-07, + "loss": 0.2282, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13076281547546387, + "step": 3960, + "valid_targets_mean": 5458.2, + "valid_targets_min": 715 + }, + { + "epoch": 6.447518307567128, + "grad_norm": 0.46432249628629235, + "learning_rate": 7.598361904974982e-07, + "loss": 0.227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.103657066822052, + "step": 3965, + "valid_targets_mean": 5980.2, + "valid_targets_min": 4230 + }, + { + "epoch": 6.4556550040683485, + "grad_norm": 0.7371030234438611, + "learning_rate": 7.378540021640313e-07, + "loss": 0.2259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07392923533916473, + "step": 3970, + "valid_targets_mean": 4101.2, + "valid_targets_min": 756 + }, + { + "epoch": 6.463791700569569, + "grad_norm": 0.5682466131911814, + "learning_rate": 7.161884967183552e-07, + "loss": 0.2439, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1610821634531021, + "step": 3975, + "valid_targets_mean": 6404.6, + "valid_targets_min": 2554 + }, + { + "epoch": 6.471928397070789, + "grad_norm": 0.5442753436375526, + "learning_rate": 6.948400303562386e-07, + "loss": 0.242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13828691840171814, + "step": 3980, + "valid_targets_mean": 6081.0, + "valid_targets_min": 1105 + }, + { + "epoch": 6.480065093572009, + "grad_norm": 0.5290599896063467, + "learning_rate": 6.738089540611059e-07, + "loss": 0.2064, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12780803442001343, + "step": 3985, + "valid_targets_mean": 5326.5, + "valid_targets_min": 3083 + }, + { + "epoch": 6.48820179007323, + "grad_norm": 0.4829545021115107, + "learning_rate": 6.530956135982713e-07, + "loss": 0.2223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07991525530815125, + "step": 3990, + "valid_targets_mean": 5208.4, + "valid_targets_min": 988 + }, + { + "epoch": 6.496338486574451, + "grad_norm": 0.4941367346868645, + "learning_rate": 6.327003495092565e-07, + "loss": 0.2217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09314847737550735, + "step": 3995, + "valid_targets_mean": 6371.6, + "valid_targets_min": 3403 + }, + { + "epoch": 6.504475183075671, + "grad_norm": 0.603169711820912, + "learning_rate": 6.126234971061861e-07, + "loss": 0.2104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14228929579257965, + "step": 4000, + "valid_targets_mean": 5264.0, + "valid_targets_min": 542 + }, + { + "epoch": 6.512611879576892, + "grad_norm": 0.5710471143580141, + "learning_rate": 5.928653864662815e-07, + "loss": 0.2392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15178368985652924, + "step": 4005, + "valid_targets_mean": 5708.6, + "valid_targets_min": 3584 + }, + { + "epoch": 6.5207485760781125, + "grad_norm": 0.4842323415856468, + "learning_rate": 5.734263424264242e-07, + "loss": 0.2311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0898006483912468, + "step": 4010, + "valid_targets_mean": 5642.4, + "valid_targets_min": 1473 + }, + { + "epoch": 6.528885272579333, + "grad_norm": 0.5617774998939432, + "learning_rate": 5.543066845778345e-07, + "loss": 0.2341, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14239703118801117, + "step": 4015, + "valid_targets_mean": 4555.8, + "valid_targets_min": 1628 + }, + { + "epoch": 6.537021969080554, + "grad_norm": 0.5072944150418117, + "learning_rate": 5.355067272607928e-07, + "loss": 0.2381, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11048190295696259, + "step": 4020, + "valid_targets_mean": 7035.8, + "valid_targets_min": 803 + }, + { + "epoch": 6.5451586655817735, + "grad_norm": 0.4954909171386829, + "learning_rate": 5.170267795594886e-07, + "loss": 0.2159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11649087071418762, + "step": 4025, + "valid_targets_mean": 6345.4, + "valid_targets_min": 2927 + }, + { + "epoch": 6.553295362082994, + "grad_norm": 0.6103860123458417, + "learning_rate": 4.988671452969329e-07, + "loss": 0.2303, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10858224332332611, + "step": 4030, + "valid_targets_mean": 4031.6, + "valid_targets_min": 2584 + }, + { + "epoch": 6.561432058584215, + "grad_norm": 0.539076716156704, + "learning_rate": 4.810281230299674e-07, + "loss": 0.2384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10688619315624237, + "step": 4035, + "valid_targets_mean": 4906.5, + "valid_targets_min": 1837 + }, + { + "epoch": 6.569568755085435, + "grad_norm": 0.5451844058939661, + "learning_rate": 4.6351000604434537e-07, + "loss": 0.2302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13301129639148712, + "step": 4040, + "valid_targets_mean": 6030.0, + "valid_targets_min": 3338 + }, + { + "epoch": 6.577705451586656, + "grad_norm": 0.6364692773603006, + "learning_rate": 4.463130823499273e-07, + "loss": 0.2342, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1468641757965088, + "step": 4045, + "valid_targets_mean": 5677.6, + "valid_targets_min": 560 + }, + { + "epoch": 6.585842148087877, + "grad_norm": 0.5318256719237091, + "learning_rate": 4.2943763467592436e-07, + "loss": 0.2393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10516038537025452, + "step": 4050, + "valid_targets_mean": 4711.4, + "valid_targets_min": 1877 + }, + { + "epoch": 6.593978844589097, + "grad_norm": 0.5509512683634988, + "learning_rate": 4.12883940466271e-07, + "loss": 0.2359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10421756654977798, + "step": 4055, + "valid_targets_mean": 4614.4, + "valid_targets_min": 2460 + }, + { + "epoch": 6.602115541090317, + "grad_norm": 0.5200751439970589, + "learning_rate": 3.9665227187505097e-07, + "loss": 0.232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10616647452116013, + "step": 4060, + "valid_targets_mean": 4184.4, + "valid_targets_min": 2193 + }, + { + "epoch": 6.610252237591538, + "grad_norm": 0.5747208042923473, + "learning_rate": 3.8074289576202295e-07, + "loss": 0.2396, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11532352864742279, + "step": 4065, + "valid_targets_mean": 4391.8, + "valid_targets_min": 2875 + }, + { + "epoch": 6.618388934092758, + "grad_norm": 0.5442269160669613, + "learning_rate": 3.6515607368824203e-07, + "loss": 0.249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14139285683631897, + "step": 4070, + "valid_targets_mean": 5408.9, + "valid_targets_min": 1305 + }, + { + "epoch": 6.626525630593979, + "grad_norm": 0.6566336791778115, + "learning_rate": 3.498920619117474e-07, + "loss": 0.2525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10617846250534058, + "step": 4075, + "valid_targets_mean": 4429.5, + "valid_targets_min": 1337 + }, + { + "epoch": 6.634662327095199, + "grad_norm": 0.5122404519465269, + "learning_rate": 3.3495111138336147e-07, + "loss": 0.2161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10197494924068451, + "step": 4080, + "valid_targets_mean": 4975.5, + "valid_targets_min": 1614 + }, + { + "epoch": 6.64279902359642, + "grad_norm": 0.6028082911064025, + "learning_rate": 3.203334677425529e-07, + "loss": 0.2185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09473268687725067, + "step": 4085, + "valid_targets_mean": 3829.1, + "valid_targets_min": 1555 + }, + { + "epoch": 6.650935720097641, + "grad_norm": 0.5205649448698142, + "learning_rate": 3.060393713134091e-07, + "loss": 0.2438, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1434570699930191, + "step": 4090, + "valid_targets_mean": 6809.5, + "valid_targets_min": 1572 + }, + { + "epoch": 6.65907241659886, + "grad_norm": 0.5601018905652814, + "learning_rate": 2.920690571006768e-07, + "loss": 0.2317, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12937568128108978, + "step": 4095, + "valid_targets_mean": 7661.0, + "valid_targets_min": 1771 + }, + { + "epoch": 6.667209113100081, + "grad_norm": 0.6361282605163243, + "learning_rate": 2.784227547858964e-07, + "loss": 0.2281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0899709165096283, + "step": 4100, + "valid_targets_mean": 4337.1, + "valid_targets_min": 484 + }, + { + "epoch": 6.675345809601302, + "grad_norm": 0.5508211250858578, + "learning_rate": 2.651006887236385e-07, + "loss": 0.2437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09255590289831161, + "step": 4105, + "valid_targets_mean": 5757.0, + "valid_targets_min": 3592 + }, + { + "epoch": 6.683482506102522, + "grad_norm": 0.5929185382963317, + "learning_rate": 2.52103077937802e-07, + "loss": 0.2272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11462150514125824, + "step": 4110, + "valid_targets_mean": 8654.9, + "valid_targets_min": 687 + }, + { + "epoch": 6.691619202603743, + "grad_norm": 0.5406995619531852, + "learning_rate": 2.394301361180218e-07, + "loss": 0.2228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09833990037441254, + "step": 4115, + "valid_targets_mean": 5543.6, + "valid_targets_min": 1684 + }, + { + "epoch": 6.6997558991049635, + "grad_norm": 0.5202765802597007, + "learning_rate": 2.2708207161615147e-07, + "loss": 0.2468, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12440069019794464, + "step": 4120, + "valid_targets_mean": 6339.9, + "valid_targets_min": 2935 + }, + { + "epoch": 6.707892595606184, + "grad_norm": 0.5477566339180919, + "learning_rate": 2.150590874428371e-07, + "loss": 0.2523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16028626263141632, + "step": 4125, + "valid_targets_mean": 6068.5, + "valid_targets_min": 665 + }, + { + "epoch": 6.716029292107405, + "grad_norm": 0.5263469114319956, + "learning_rate": 2.0336138126417994e-07, + "loss": 0.2271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12802526354789734, + "step": 4130, + "valid_targets_mean": 6818.8, + "valid_targets_min": 3843 + }, + { + "epoch": 6.724165988608625, + "grad_norm": 0.6239937148070356, + "learning_rate": 1.9198914539849455e-07, + "loss": 0.2216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10577556490898132, + "step": 4135, + "valid_targets_mean": 4284.1, + "valid_targets_min": 524 + }, + { + "epoch": 6.732302685109845, + "grad_norm": 0.5786354008011593, + "learning_rate": 1.8094256681313593e-07, + "loss": 0.2474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17910592257976532, + "step": 4140, + "valid_targets_mean": 5484.6, + "valid_targets_min": 1652 + }, + { + "epoch": 6.740439381611066, + "grad_norm": 0.6573268588625598, + "learning_rate": 1.702218271214262e-07, + "loss": 0.2349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12595227360725403, + "step": 4145, + "valid_targets_mean": 4625.1, + "valid_targets_min": 1444 + }, + { + "epoch": 6.748576078112286, + "grad_norm": 0.4184562575542007, + "learning_rate": 1.598271025796816e-07, + "loss": 0.2231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09041139483451843, + "step": 4150, + "valid_targets_mean": 5593.0, + "valid_targets_min": 1619 + }, + { + "epoch": 6.756712774613507, + "grad_norm": 0.49775516096636063, + "learning_rate": 1.4975856408429912e-07, + "loss": 0.2491, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12942853569984436, + "step": 4155, + "valid_targets_mean": 4617.5, + "valid_targets_min": 1893 + }, + { + "epoch": 6.764849471114728, + "grad_norm": 0.5158152468576911, + "learning_rate": 1.4001637716895445e-07, + "loss": 0.2393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12505996227264404, + "step": 4160, + "valid_targets_mean": 5568.4, + "valid_targets_min": 2868 + }, + { + "epoch": 6.772986167615948, + "grad_norm": 0.45862764208578194, + "learning_rate": 1.3060070200188179e-07, + "loss": 0.2396, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09306715428829193, + "step": 4165, + "valid_targets_mean": 7481.6, + "valid_targets_min": 1395 + }, + { + "epoch": 6.781122864117169, + "grad_norm": 0.5484801191580874, + "learning_rate": 1.215116933832361e-07, + "loss": 0.2237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0996013730764389, + "step": 4170, + "valid_targets_mean": 5064.8, + "valid_targets_min": 1288 + }, + { + "epoch": 6.7892595606183885, + "grad_norm": 0.5432376350068135, + "learning_rate": 1.1274950074255053e-07, + "loss": 0.2303, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14020952582359314, + "step": 4175, + "valid_targets_mean": 5912.4, + "valid_targets_min": 3887 + }, + { + "epoch": 6.797396257119609, + "grad_norm": 0.63592955162879, + "learning_rate": 1.0431426813628298e-07, + "loss": 0.2326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14750230312347412, + "step": 4180, + "valid_targets_mean": 6155.6, + "valid_targets_min": 2960 + }, + { + "epoch": 6.80553295362083, + "grad_norm": 0.5502827308580879, + "learning_rate": 9.62061342454379e-08, + "loss": 0.2434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08104512095451355, + "step": 4185, + "valid_targets_mean": 4188.5, + "valid_targets_min": 1325 + }, + { + "epoch": 6.81366965012205, + "grad_norm": 0.5325444503557673, + "learning_rate": 8.842523237329925e-08, + "loss": 0.2459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12346133589744568, + "step": 4190, + "valid_targets_mean": 6763.8, + "valid_targets_min": 1068 + }, + { + "epoch": 6.821806346623271, + "grad_norm": 0.546512190706496, + "learning_rate": 8.097169044322561e-08, + "loss": 0.2315, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1239863932132721, + "step": 4195, + "valid_targets_mean": 5937.0, + "valid_targets_min": 2620 + }, + { + "epoch": 6.829943043124492, + "grad_norm": 0.5906532640459007, + "learning_rate": 7.38456309965585e-08, + "loss": 0.2467, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12631988525390625, + "step": 4200, + "valid_targets_mean": 5960.2, + "valid_targets_min": 2942 + }, + { + "epoch": 6.838079739625712, + "grad_norm": 0.6953061985356663, + "learning_rate": 6.7047171190604e-08, + "loss": 0.2305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13684195280075073, + "step": 4205, + "valid_targets_mean": 4888.6, + "valid_targets_min": 2983 + }, + { + "epoch": 6.846216436126932, + "grad_norm": 0.5425415272735113, + "learning_rate": 6.057642279669874e-08, + "loss": 0.2589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10478918999433517, + "step": 4210, + "valid_targets_mean": 4543.4, + "valid_targets_min": 1180 + }, + { + "epoch": 6.854353132628153, + "grad_norm": 0.48291552518428543, + "learning_rate": 5.4433492198386895e-08, + "loss": 0.2596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16383056342601776, + "step": 4215, + "valid_targets_mean": 7361.4, + "valid_targets_min": 4819 + }, + { + "epoch": 6.862489829129373, + "grad_norm": 0.5655144988352875, + "learning_rate": 4.861848038965722e-08, + "loss": 0.2372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1411811113357544, + "step": 4220, + "valid_targets_mean": 5598.1, + "valid_targets_min": 2990 + }, + { + "epoch": 6.870626525630594, + "grad_norm": 0.477678998506917, + "learning_rate": 4.313148297328873e-08, + "loss": 0.2522, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14228369295597076, + "step": 4225, + "valid_targets_mean": 6497.6, + "valid_targets_min": 3097 + }, + { + "epoch": 6.8787632221318145, + "grad_norm": 0.581502670933742, + "learning_rate": 3.797259015928534e-08, + "loss": 0.2586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15068471431732178, + "step": 4230, + "valid_targets_mean": 4712.4, + "valid_targets_min": 2505 + }, + { + "epoch": 6.886899918633035, + "grad_norm": 0.4756280191630647, + "learning_rate": 3.314188676338148e-08, + "loss": 0.2493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1089138463139534, + "step": 4235, + "valid_targets_mean": 5755.9, + "valid_targets_min": 2892 + }, + { + "epoch": 6.895036615134256, + "grad_norm": 0.5398206094881605, + "learning_rate": 2.863945220565434e-08, + "loss": 0.2367, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11508828401565552, + "step": 4240, + "valid_targets_mean": 5286.5, + "valid_targets_min": 2360 + }, + { + "epoch": 6.903173311635476, + "grad_norm": 0.5717657037216582, + "learning_rate": 2.4465360509211555e-08, + "loss": 0.2308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13908590376377106, + "step": 4245, + "valid_targets_mean": 5737.0, + "valid_targets_min": 2518 + }, + { + "epoch": 6.911310008136697, + "grad_norm": 0.5554985915270165, + "learning_rate": 2.0619680298983313e-08, + "loss": 0.2441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11254111677408218, + "step": 4250, + "valid_targets_mean": 4905.9, + "valid_targets_min": 792 + }, + { + "epoch": 6.919446704637917, + "grad_norm": 0.5218369286912145, + "learning_rate": 1.7102474800592128e-08, + "loss": 0.2359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13761496543884277, + "step": 4255, + "valid_targets_mean": 5413.0, + "valid_targets_min": 2422 + }, + { + "epoch": 6.927583401139137, + "grad_norm": 0.5381548129002338, + "learning_rate": 1.3913801839307017e-08, + "loss": 0.2511, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14477401971817017, + "step": 4260, + "valid_targets_mean": 6205.5, + "valid_targets_min": 558 + }, + { + "epoch": 6.935720097640358, + "grad_norm": 0.44713106150780874, + "learning_rate": 1.105371383909759e-08, + "loss": 0.2383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10858777165412903, + "step": 4265, + "valid_targets_mean": 8534.0, + "valid_targets_min": 1522 + }, + { + "epoch": 6.9438567941415785, + "grad_norm": 0.5274514347290213, + "learning_rate": 8.522257821770296e-09, + "loss": 0.2491, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1236865222454071, + "step": 4270, + "valid_targets_mean": 6043.4, + "valid_targets_min": 3334 + }, + { + "epoch": 6.951993490642799, + "grad_norm": 0.5801773088235224, + "learning_rate": 6.319475406200148e-09, + "loss": 0.2203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11447873711585999, + "step": 4275, + "valid_targets_mean": 4326.0, + "valid_targets_min": 2843 + }, + { + "epoch": 6.96013018714402, + "grad_norm": 0.5965479996719061, + "learning_rate": 4.445402807637944e-09, + "loss": 0.2364, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13095706701278687, + "step": 4280, + "valid_targets_mean": 5125.6, + "valid_targets_min": 3083 + }, + { + "epoch": 6.96826688364524, + "grad_norm": 0.596073208594123, + "learning_rate": 2.9000708371240695e-09, + "loss": 0.2456, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12374962121248245, + "step": 4285, + "valid_targets_mean": 4898.1, + "valid_targets_min": 1390 + }, + { + "epoch": 6.97640358014646, + "grad_norm": 0.547455008958649, + "learning_rate": 1.6835049009755745e-09, + "loss": 0.2368, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13343475759029388, + "step": 4290, + "valid_targets_mean": 6155.8, + "valid_targets_min": 1121 + }, + { + "epoch": 6.984540276647681, + "grad_norm": 0.5017858705960277, + "learning_rate": 7.9572500036873e-10, + "loss": 0.2129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11443660408258438, + "step": 4295, + "valid_targets_mean": 7366.5, + "valid_targets_min": 3418 + }, + { + "epoch": 6.992676973148901, + "grad_norm": 0.5649590473030836, + "learning_rate": 2.367457310170629e-10, + "loss": 0.2423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.168892502784729, + "step": 4300, + "valid_targets_mean": 6009.1, + "valid_targets_min": 3140 + }, + { + "epoch": 7.0, + "grad_norm": 0.6042592466198581, + "learning_rate": 6.5762829204452095e-12, + "loss": 0.2148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17104065418243408, + "step": 4305, + "valid_targets_mean": 7242.6, + "valid_targets_min": 3287 + }, + { + "epoch": 7.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17104065418243408, + "step": 4305, + "total_flos": 2.6783135993776046e+18, + "train_loss": 0.2845736916749181, + "train_runtime": 62000.2676, + "train_samples_per_second": 1.109, + "train_steps_per_second": 0.069, + "valid_targets_mean": 7242.6, + "valid_targets_min": 3287 + } + ], + "logging_steps": 5, + "max_steps": 4305, + "num_input_tokens_seen": 0, + "num_train_epochs": 7, + "save_steps": 1500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 2.6783135993776046e+18, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..9545139 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0cdca66ca4fb0f1ceadab1ec7500629a6ad4dedfafbb2d9c5b044ead00aa58c +size 8721 diff --git a/training_loss.png b/training_loss.png new file mode 100644 index 0000000..555b21f Binary files /dev/null and b/training_loss.png differ diff --git a/vocab.json b/vocab.json new file mode 100644 index 0000000..6c49fc6 --- /dev/null +++ b/vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910 +size 2776833