commit f75e54170cc66f0e8e9afbd3cc5448f496f97cd9 Author: ModelHub XC Date: Wed May 20 16:40:33 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: open-thoughts/OpenThinker-Agent-v1-SFT Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..5ee0a6f --- /dev/null +++ b/.gitattributes @@ -0,0 +1,56 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text + + +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zstandard filter=lfs diff=lfs merge=lfs -text +*.tfevents* filter=lfs diff=lfs merge=lfs -text +*.db* filter=lfs diff=lfs merge=lfs -text +*.ark* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text + +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.gguf* filter=lfs diff=lfs merge=lfs -text +*.ggml filter=lfs diff=lfs merge=lfs -text +*.llamafile* filter=lfs diff=lfs merge=lfs -text +*.pt2 filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text + +model-00004-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text +model-00003-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text +merges.txt filter=lfs diff=lfs merge=lfs -text +training_args.bin filter=lfs diff=lfs merge=lfs -text +vocab.json filter=lfs diff=lfs merge=lfs -text +model-00001-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text +model-00002-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..4b8c145 --- /dev/null +++ b/README.md @@ -0,0 +1,121 @@ +--- +base_model: +- Qwen/Qwen3-8B +datasets: +- OpenThoughts-Agent-v1-SFT +- OpenThoughts-Agent-v1-RL +library_name: transformers +license: apache-2.0 +model-index: +- name: OpenThinker-Agent-v1 + results: [] +pipeline_tag: text-generation +tags: +- agents +- terminal +- code +- software-engineering +--- + +

+ +

+ +

+Project | +SFT dataset | +RL dataset | +SFT model | +RL model +

+ + +# OpenThinker-Agent-v1-SFT + +**OpenThoughts-Agent** is an open-source effort to curate the best datasets for training agents. Our first release includes [datasets](https://huggingface.co/collections/open-thoughts/openthinker-agent), [models](https://huggingface.co/collections/open-thoughts/openthinker-agent) and our [research codebase](https://github.com/open-thoughts/OpenThoughts-Agent). + +[OpenThinker-Agent-v1](https://huggingface.co/open-thoughts/OpenThinker-Agent-v1) is a model trained for agentic tasks such as **Terminal-Bench 2.0** and **SWE-Bench**. + +The [OpenThinker-Agent-v1](https://huggingface.co/open-thoughts/OpenThinker-Agent-v1) model is post-trained from [Qwen/Qwen3-8B](https://huggingface.co/Qwen/Qwen3-8B). +It is SFT-ed on the [OpenThoughts-Agent-v1-SFT](https://huggingface.co/datasets/open-thoughts/OpenThoughts-Agent-v1-SFT) dataset, then RL-ed on the [OpenThoughts-Agent-v1-RL](https://huggingface.co/datasets/open-thoughts/OpenThoughts-Agent-v1-RL) dataset. + +This [OpenThinker-Agent-v1-SFT](https://huggingface.co/open-thoughts/OpenThinker-Agent-v1-SFT) model is the model after the SFT stage. For the model after both SFT and RL stages, see [OpenThinker-Agent-v1](https://huggingface.co/open-thoughts/OpenThinker-Agent-v1). + +- **Homepage:** https://www.openthoughts.ai/blog/agent +- **Repository:** https://github.com/open-thoughts/OpenThoughts-Agent + + +# OpenThinker-Agent-v1 Model Performance + +Our [OpenThinker-Agent-v1](https://huggingface.co/datasets/open-thoughts/OpenThoughts-Agent-v1-RL) model is the state-of-the-art model at its scale on agent benchmarks. + +| Model | Harness | Terminal-Bench 2.0 | SWE-Bench Verified | OpenThoughts-TB-Dev | +| ----------------------------------------------------------------------------------------------- | ------- | ------------------ | --------- | ------------------- | +| [Qwen3-8B](https://huggingface.co/Qwen/Qwen3-8B) | Terminus-2 | 0.0 | 0.7 | 5.7 | +| **[OpenThinker-Agent-v1](https://huggingface.co/open-thoughts/OpenThinker-Agent-v1)** | Terminus-2 | 4.9 | 15.7 | 17.3 | +| [Qwen3-32B](https://huggingface.co/Qwen/Qwen3-32B) | Terminus-2 | 1.9 | 5.7 | 10.2 | +| [Qwen/Qwen3-Coder-30B-A3B-Instruct](https://huggingface.co/Qwen/Qwen3-Coder-30B-A3B-Instruct) | OpenHands | 10.1 | 49.2 | 24.5 | + + +# Data + +We built [OpenThinker-Agent-v1](https://huggingface.co/open-thoughts/OpenThinker-Agent-v1) in two stages: **supervised fine-tuning**, followed by **reinforcement learning**. +Each stage required its own data pipeline – RL tasks (instructions, environments, and verifiers) and SFT traces from strong teacher agents completing tasks. + +[OpenThoughts-Agent-v1-SFT](https://huggingface.co/datasets/open-thoughts/OpenThoughts-Agent-v1-SFT) is an SFT trace dataset containing approximately **15,200 traces** drawn from two different data sources we curate: +- **nl2bash**: Simple synthetically generated tasks where the agent has to format shell commands effectively +- **InferredBugs**: A set of bugs in C# and Java collected by Microsoft that we turned into tasks + +[OpenThoughts-Agent-v1-RL](https://huggingface.co/datasets/open-thoughts/OpenThoughts-Agent-v1-RL) is an RL dataset containing ~720 tasks drawn from the **nl2bash verified** dataset. + +To stabilize training, we built a three-stage filtration pipeline that prunes tasks before they ever hit the learner: + +1. Bad verifiers filter: drop tasks with flaky or excessively slow verifiers. +2. Environment stability: remove tasks whose containers take too long to build or tear down. +Optional difficulty filter: discard tasks that even a strong model (GPT-5 Codex) cannot solve in a single pass. + + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 4e-05 +- train_batch_size: 1 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 16 +- total_train_batch_size: 16 +- total_eval_batch_size: 128 +- optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.98) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.1 +- num_epochs: 7.0 + +### Framework versions + +- Transformers 4.56.0 +- Pytorch 2.9.0+cu128 +- Datasets 4.4.1 +- Tokenizers 0.22.1 + + +# Links +- 🌐 [OpenThoughts-Agent project page](https://open-thoughts.ai/blog/agent) +- 💻 [OpenThoughts-Agent GitHub repository](https://github.com/open-thoughts/OpenThoughts-Agent) +- 🧠 [OpenThoughts-Agent-v1-SFT dataset](https://huggingface.co/datasets/open-thoughts/OpenThoughts-Agent-v1-SFT) +- 🧠 [OpenThoughts-Agent-v1-RL dataset](https://huggingface.co/datasets/open-thoughts/OpenThoughts-Agent-v1-RL) +- 🧠 [OpenThoughts-TB-dev dataset](https://huggingface.co/datasets/open-thoughts/OpenThoughts-TB-dev) +- 🤖 [OpenThinker-Agent-v1 model](https://huggingface.co/open-thoughts/OpenThinker-Agent-v1) +- 🤖 [OpenThinker-Agent-v1-SFT model](https://huggingface.co/open-thoughts/OpenThinker-Agent-v1-SFT) --> this model + + +# Citation +``` +@misc{openthoughts-agent, + author = {Team, OpenThoughts-Agent}, + month = Dec, + title = {{OpenThoughts-Agent}}, + howpublished = {https://www.open-thoughts.ai/blog/agent}, + year = {2025} +} +``` \ No newline at end of file diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000..b54f913 --- /dev/null +++ b/added_tokens.json @@ -0,0 +1,28 @@ +{ + "": 151668, + "": 151658, + "": 151666, + "": 151667, + "": 151657, + "": 151665, + "<|box_end|>": 151649, + "<|box_start|>": 151648, + "<|endoftext|>": 151643, + "<|file_sep|>": 151664, + "<|fim_middle|>": 151660, + "<|fim_pad|>": 151662, + "<|fim_prefix|>": 151659, + "<|fim_suffix|>": 151661, + "<|im_end|>": 151645, + "<|im_start|>": 151644, + "<|image_pad|>": 151655, + "<|object_ref_end|>": 151647, + "<|object_ref_start|>": 151646, + "<|quad_end|>": 151651, + "<|quad_start|>": 151650, + "<|repo_name|>": 151663, + "<|video_pad|>": 151656, + "<|vision_end|>": 151653, + "<|vision_pad|>": 151654, + "<|vision_start|>": 151652 +} diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..31e0fd3 --- /dev/null +++ b/all_results.json @@ -0,0 +1,16 @@ +{ + "achieved_tflops_per_gpu": 0.0036400978834181994, + "achieved_tflops_per_gpu_theoretical": 918.1221365250786, + "epoch": 7.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05594291538000107, + "mfu_percent": 0.0002572507338104735, + "mfu_percent_theoretical": 64.88495664488188, + "total_flos": 1429285307547648.0, + "train_loss": 0.14208543798229403, + "train_runtime": 24540.64, + "train_samples_per_second": 4.338, + "train_steps_per_second": 0.271, + "valid_targets_mean": 2843.0, + "valid_targets_min": 791 +} \ No newline at end of file diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..01be9b3 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,89 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{- messages[0].content + '\n\n' }} + {%- endif %} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('') and message.content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- if message.content is string %} + {%- set content = message.content %} + {%- else %} + {%- set content = '' %} + {%- endif %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is string %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in content %} + {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- set content = content.split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {%- if loop.last or (not loop.last and reasoning_content) %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '}\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..d04042c --- /dev/null +++ b/config.json @@ -0,0 +1,68 @@ +{ + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "dtype": "bfloat16", + "eos_token_id": 151645, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "pad_token_id": 151643, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "transformers_version": "4.56.0", + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..bbeeda1 --- /dev/null +++ b/configuration.json @@ -0,0 +1 @@ +{"framework": "pytorch", "task": "text-generation", "allow_remote": true} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..eff07c5 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,12 @@ +{ + "do_sample": true, + "eos_token_id": [ + 151645, + 151643 + ], + "pad_token_id": 151643, + "temperature": 0.6, + "top_k": 20, + "top_p": 0.95, + "transformers_version": "4.56.0" +} diff --git a/merges.txt b/merges.txt new file mode 100644 index 0000000..80c1a19 --- /dev/null +++ b/merges.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5 +size 1671853 diff --git a/model-00001-of-00004.safetensors b/model-00001-of-00004.safetensors new file mode 100644 index 0000000..4b1c853 --- /dev/null +++ b/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c10460e9df3549213011f77b86cba4ba0d5deebccc86f5ffa246c1c1bb951e17 +size 4902257696 diff --git a/model-00002-of-00004.safetensors b/model-00002-of-00004.safetensors new file mode 100644 index 0000000..f3d26cd --- /dev/null +++ b/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e8290a63e42ded9e76f9cfbcf74533bb585329c1da90e147d7aa8c1552d51e0 +size 4915960368 diff --git a/model-00003-of-00004.safetensors b/model-00003-of-00004.safetensors new file mode 100644 index 0000000..f3aafc8 --- /dev/null +++ b/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb5ed70b6025af1129537ba9391c0d0fd01522b687a7aeea8bd62e73d0b9cf4e +size 4983068496 diff --git a/model-00004-of-00004.safetensors b/model-00004-of-00004.safetensors new file mode 100644 index 0000000..ba43a44 --- /dev/null +++ b/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:381f95ed592c48537271c4703142cfab38ddff1ac4a3591371b911a8b34ac748 +size 1580230264 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..ba886c0 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,407 @@ +{ + "metadata": { + "total_parameters": 308224, + "total_size": 16381470720 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.q_norm.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.32.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.33.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.33.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.34.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.k_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.q_norm.weight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.35.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.35.mlp.down_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.35.mlp.gate_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.35.mlp.up_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.35.self_attn.k_norm.weight": "model-00004-of-00004.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.35.self_attn.q_norm.weight": "model-00004-of-00004.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.k_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.q_norm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.norm.weight": "model-00004-of-00004.safetensors" + } +} diff --git a/run_summary.json b/run_summary.json new file mode 100644 index 0000000..fe39b34 --- /dev/null +++ b/run_summary.json @@ -0,0 +1,12 @@ +{ + "agent_name": null, + "training_start": null, + "training_end": null, + "created_by": "DCAgent", + "base_model_name": "Qwen/Qwen3-8B", + "dataset_name": "penfever/GLM-4.6-nl2bash-verified-32eps-32k,penfever/GLM-4.6-inferredbugs-32eps-65k", + "training_type": "SFT", + "training_parameters": "https://huggingface.co/DCAgent2/nl2bash-bugsseq/blob/main/config.json", + "wandb_link": "https://wandb.ai/dogml/dc-agent/runs/nl2bash-bugsseq_Qwen3-8B", + "traces_location_s3": null +} \ No newline at end of file diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..ac23c0a --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,31 @@ +{ + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..cd71f61 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4 +size 11422654 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..e9dc937 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,240 @@ +{ + "add_bos_token": false, + "add_prefix_space": false, + "added_tokens_decoder": { + "151643": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151644": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151645": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151646": { + "content": "<|object_ref_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151647": { + "content": "<|object_ref_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151648": { + "content": "<|box_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151649": { + "content": "<|box_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151650": { + "content": "<|quad_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151657": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151658": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151665": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151666": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151667": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151668": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": {}, + "model_max_length": 32768, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..31e0fd3 --- /dev/null +++ b/train_results.json @@ -0,0 +1,16 @@ +{ + "achieved_tflops_per_gpu": 0.0036400978834181994, + "achieved_tflops_per_gpu_theoretical": 918.1221365250786, + "epoch": 7.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05594291538000107, + "mfu_percent": 0.0002572507338104735, + "mfu_percent_theoretical": 64.88495664488188, + "total_flos": 1429285307547648.0, + "train_loss": 0.14208543798229403, + "train_runtime": 24540.64, + "train_samples_per_second": 4.338, + "train_steps_per_second": 0.271, + "valid_targets_mean": 2843.0, + "valid_targets_min": 791 +} \ No newline at end of file diff --git a/trainer_log.jsonl b/trainer_log.jsonl new file mode 100644 index 0000000..aa94880 --- /dev/null +++ b/trainer_log.jsonl @@ -0,0 +1,1332 @@ +{"current_steps": 5, "total_steps": 6657, "loss": 0.8123, "lr": 2.4024024024024026e-07, "epoch": 0.005257623554153523, "percentage": 0.08, "elapsed_time": "0:00:24", "remaining_time": "9:11:13"} +{"current_steps": 10, "total_steps": 6657, "loss": 0.8146, "lr": 5.405405405405406e-07, "epoch": 0.010515247108307046, "percentage": 0.15, "elapsed_time": "0:00:38", "remaining_time": "7:04:58"} +{"current_steps": 15, "total_steps": 6657, "loss": 0.7847, "lr": 8.40840840840841e-07, "epoch": 0.015772870662460567, "percentage": 0.23, "elapsed_time": "0:00:52", "remaining_time": "6:27:20"} +{"current_steps": 20, "total_steps": 6657, "loss": 0.772, "lr": 1.1411411411411411e-06, "epoch": 0.02103049421661409, "percentage": 0.3, "elapsed_time": "0:01:06", "remaining_time": "6:05:33"} +{"current_steps": 25, "total_steps": 6657, "loss": 0.7447, "lr": 1.4414414414414416e-06, "epoch": 0.026288117770767613, "percentage": 0.38, "elapsed_time": "0:01:21", "remaining_time": "5:58:24"} +{"current_steps": 30, "total_steps": 6657, "loss": 0.7118, "lr": 1.7417417417417418e-06, "epoch": 0.031545741324921134, "percentage": 0.45, "elapsed_time": "0:01:34", "remaining_time": "5:47:56"} +{"current_steps": 35, "total_steps": 6657, "loss": 0.6655, "lr": 2.0420420420420424e-06, "epoch": 0.03680336487907466, "percentage": 0.53, "elapsed_time": "0:01:47", "remaining_time": "5:39:24"} +{"current_steps": 40, "total_steps": 6657, "loss": 0.6395, "lr": 2.3423423423423424e-06, "epoch": 0.04206098843322818, "percentage": 0.6, "elapsed_time": "0:02:01", "remaining_time": "5:33:37"} +{"current_steps": 45, "total_steps": 6657, "loss": 0.5812, "lr": 2.642642642642643e-06, "epoch": 0.0473186119873817, "percentage": 0.68, "elapsed_time": "0:02:13", "remaining_time": "5:27:48"} +{"current_steps": 50, "total_steps": 6657, "loss": 0.5305, "lr": 2.942942942942943e-06, "epoch": 0.052576235541535225, "percentage": 0.75, "elapsed_time": "0:02:27", "remaining_time": "5:25:32"} +{"current_steps": 55, "total_steps": 6657, "loss": 0.4958, "lr": 3.2432432432432437e-06, "epoch": 0.05783385909568875, "percentage": 0.83, "elapsed_time": "0:02:42", "remaining_time": "5:24:45"} +{"current_steps": 60, "total_steps": 6657, "loss": 0.4717, "lr": 3.5435435435435437e-06, "epoch": 0.06309148264984227, "percentage": 0.9, "elapsed_time": "0:02:56", "remaining_time": "5:23:00"} +{"current_steps": 65, "total_steps": 6657, "loss": 0.4494, "lr": 3.843843843843844e-06, "epoch": 0.0683491062039958, "percentage": 0.98, "elapsed_time": "0:03:09", "remaining_time": "5:20:02"} +{"current_steps": 70, "total_steps": 6657, "loss": 0.4231, "lr": 4.1441441441441446e-06, "epoch": 0.07360672975814932, "percentage": 1.05, "elapsed_time": "0:03:22", "remaining_time": "5:17:53"} +{"current_steps": 75, "total_steps": 6657, "loss": 0.4086, "lr": 4.444444444444444e-06, "epoch": 0.07886435331230283, "percentage": 1.13, "elapsed_time": "0:03:36", "remaining_time": "5:16:27"} +{"current_steps": 80, "total_steps": 6657, "loss": 0.4084, "lr": 4.7447447447447454e-06, "epoch": 0.08412197686645637, "percentage": 1.2, "elapsed_time": "0:03:50", "remaining_time": "5:15:40"} +{"current_steps": 85, "total_steps": 6657, "loss": 0.3979, "lr": 5.045045045045045e-06, "epoch": 0.08937960042060988, "percentage": 1.28, "elapsed_time": "0:04:04", "remaining_time": "5:15:15"} +{"current_steps": 90, "total_steps": 6657, "loss": 0.3871, "lr": 5.345345345345346e-06, "epoch": 0.0946372239747634, "percentage": 1.35, "elapsed_time": "0:04:18", "remaining_time": "5:14:04"} +{"current_steps": 95, "total_steps": 6657, "loss": 0.3651, "lr": 5.645645645645647e-06, "epoch": 0.09989484752891693, "percentage": 1.43, "elapsed_time": "0:04:32", "remaining_time": "5:13:50"} +{"current_steps": 100, "total_steps": 6657, "loss": 0.3606, "lr": 5.945945945945947e-06, "epoch": 0.10515247108307045, "percentage": 1.5, "elapsed_time": "0:04:47", "remaining_time": "5:14:10"} +{"current_steps": 105, "total_steps": 6657, "loss": 0.3616, "lr": 6.246246246246247e-06, "epoch": 0.11041009463722397, "percentage": 1.58, "elapsed_time": "0:05:03", "remaining_time": "5:15:13"} +{"current_steps": 110, "total_steps": 6657, "loss": 0.3525, "lr": 6.546546546546547e-06, "epoch": 0.1156677181913775, "percentage": 1.65, "elapsed_time": "0:05:17", "remaining_time": "5:15:00"} +{"current_steps": 115, "total_steps": 6657, "loss": 0.3401, "lr": 6.846846846846848e-06, "epoch": 0.12092534174553102, "percentage": 1.73, "elapsed_time": "0:05:30", "remaining_time": "5:13:45"} +{"current_steps": 120, "total_steps": 6657, "loss": 0.3306, "lr": 7.147147147147148e-06, "epoch": 0.12618296529968454, "percentage": 1.8, "elapsed_time": "0:05:43", "remaining_time": "5:11:59"} +{"current_steps": 125, "total_steps": 6657, "loss": 0.3261, "lr": 7.447447447447448e-06, "epoch": 0.13144058885383805, "percentage": 1.88, "elapsed_time": "0:05:57", "remaining_time": "5:11:19"} +{"current_steps": 130, "total_steps": 6657, "loss": 0.3199, "lr": 7.747747747747749e-06, "epoch": 0.1366982124079916, "percentage": 1.95, "elapsed_time": "0:06:10", "remaining_time": "5:10:26"} +{"current_steps": 135, "total_steps": 6657, "loss": 0.3176, "lr": 8.048048048048048e-06, "epoch": 0.14195583596214512, "percentage": 2.03, "elapsed_time": "0:06:26", "remaining_time": "5:11:07"} +{"current_steps": 140, "total_steps": 6657, "loss": 0.3202, "lr": 8.348348348348348e-06, "epoch": 0.14721345951629863, "percentage": 2.1, "elapsed_time": "0:06:40", "remaining_time": "5:10:42"} +{"current_steps": 145, "total_steps": 6657, "loss": 0.3065, "lr": 8.64864864864865e-06, "epoch": 0.15247108307045215, "percentage": 2.18, "elapsed_time": "0:06:53", "remaining_time": "5:09:42"} +{"current_steps": 150, "total_steps": 6657, "loss": 0.3157, "lr": 8.94894894894895e-06, "epoch": 0.15772870662460567, "percentage": 2.25, "elapsed_time": "0:07:06", "remaining_time": "5:08:42"} +{"current_steps": 155, "total_steps": 6657, "loss": 0.3099, "lr": 9.24924924924925e-06, "epoch": 0.16298633017875921, "percentage": 2.33, "elapsed_time": "0:07:20", "remaining_time": "5:08:17"} +{"current_steps": 160, "total_steps": 6657, "loss": 0.2996, "lr": 9.54954954954955e-06, "epoch": 0.16824395373291273, "percentage": 2.4, "elapsed_time": "0:07:33", "remaining_time": "5:07:09"} +{"current_steps": 165, "total_steps": 6657, "loss": 0.2971, "lr": 9.849849849849851e-06, "epoch": 0.17350157728706625, "percentage": 2.48, "elapsed_time": "0:07:46", "remaining_time": "5:06:13"} +{"current_steps": 170, "total_steps": 6657, "loss": 0.285, "lr": 1.015015015015015e-05, "epoch": 0.17875920084121977, "percentage": 2.55, "elapsed_time": "0:07:59", "remaining_time": "5:05:04"} +{"current_steps": 175, "total_steps": 6657, "loss": 0.2909, "lr": 1.0450450450450452e-05, "epoch": 0.18401682439537329, "percentage": 2.63, "elapsed_time": "0:08:13", "remaining_time": "5:04:22"} +{"current_steps": 180, "total_steps": 6657, "loss": 0.2916, "lr": 1.0750750750750751e-05, "epoch": 0.1892744479495268, "percentage": 2.7, "elapsed_time": "0:08:26", "remaining_time": "5:03:47"} +{"current_steps": 185, "total_steps": 6657, "loss": 0.2937, "lr": 1.1051051051051051e-05, "epoch": 0.19453207150368035, "percentage": 2.78, "elapsed_time": "0:08:39", "remaining_time": "5:02:56"} +{"current_steps": 190, "total_steps": 6657, "loss": 0.2923, "lr": 1.1351351351351352e-05, "epoch": 0.19978969505783387, "percentage": 2.85, "elapsed_time": "0:08:53", "remaining_time": "5:02:25"} +{"current_steps": 195, "total_steps": 6657, "loss": 0.2952, "lr": 1.1651651651651652e-05, "epoch": 0.20504731861198738, "percentage": 2.93, "elapsed_time": "0:09:06", "remaining_time": "5:01:47"} +{"current_steps": 200, "total_steps": 6657, "loss": 0.2827, "lr": 1.1951951951951951e-05, "epoch": 0.2103049421661409, "percentage": 3.0, "elapsed_time": "0:09:19", "remaining_time": "5:01:05"} +{"current_steps": 205, "total_steps": 6657, "loss": 0.2786, "lr": 1.2252252252252253e-05, "epoch": 0.21556256572029442, "percentage": 3.08, "elapsed_time": "0:10:34", "remaining_time": "5:32:50"} +{"current_steps": 210, "total_steps": 6657, "loss": 0.2986, "lr": 1.2552552552552552e-05, "epoch": 0.22082018927444794, "percentage": 3.15, "elapsed_time": "0:10:47", "remaining_time": "5:31:29"} +{"current_steps": 215, "total_steps": 6657, "loss": 0.3021, "lr": 1.2852852852852854e-05, "epoch": 0.22607781282860148, "percentage": 3.23, "elapsed_time": "0:11:03", "remaining_time": "5:31:10"} +{"current_steps": 220, "total_steps": 6657, "loss": 0.2764, "lr": 1.3153153153153155e-05, "epoch": 0.231335436382755, "percentage": 3.3, "elapsed_time": "0:11:16", "remaining_time": "5:29:43"} +{"current_steps": 225, "total_steps": 6657, "loss": 0.2812, "lr": 1.3453453453453456e-05, "epoch": 0.23659305993690852, "percentage": 3.38, "elapsed_time": "0:11:30", "remaining_time": "5:28:52"} +{"current_steps": 230, "total_steps": 6657, "loss": 0.2858, "lr": 1.3753753753753756e-05, "epoch": 0.24185068349106204, "percentage": 3.46, "elapsed_time": "0:11:43", "remaining_time": "5:27:43"} +{"current_steps": 235, "total_steps": 6657, "loss": 0.274, "lr": 1.4054054054054055e-05, "epoch": 0.24710830704521555, "percentage": 3.53, "elapsed_time": "0:11:56", "remaining_time": "5:26:19"} +{"current_steps": 240, "total_steps": 6657, "loss": 0.2672, "lr": 1.4354354354354357e-05, "epoch": 0.25236593059936907, "percentage": 3.61, "elapsed_time": "0:12:08", "remaining_time": "5:24:50"} +{"current_steps": 245, "total_steps": 6657, "loss": 0.2662, "lr": 1.4654654654654656e-05, "epoch": 0.2576235541535226, "percentage": 3.68, "elapsed_time": "0:12:21", "remaining_time": "5:23:38"} +{"current_steps": 250, "total_steps": 6657, "loss": 0.2838, "lr": 1.4954954954954957e-05, "epoch": 0.2628811777076761, "percentage": 3.76, "elapsed_time": "0:12:36", "remaining_time": "5:23:07"} +{"current_steps": 255, "total_steps": 6657, "loss": 0.2736, "lr": 1.5255255255255257e-05, "epoch": 0.26813880126182965, "percentage": 3.83, "elapsed_time": "0:12:49", "remaining_time": "5:22:02"} +{"current_steps": 260, "total_steps": 6657, "loss": 0.2668, "lr": 1.555555555555556e-05, "epoch": 0.2733964248159832, "percentage": 3.91, "elapsed_time": "0:13:03", "remaining_time": "5:21:08"} +{"current_steps": 265, "total_steps": 6657, "loss": 0.2701, "lr": 1.5855855855855858e-05, "epoch": 0.2786540483701367, "percentage": 3.98, "elapsed_time": "0:13:16", "remaining_time": "5:20:10"} +{"current_steps": 270, "total_steps": 6657, "loss": 0.2672, "lr": 1.6156156156156157e-05, "epoch": 0.28391167192429023, "percentage": 4.06, "elapsed_time": "0:13:31", "remaining_time": "5:19:49"} +{"current_steps": 275, "total_steps": 6657, "loss": 0.2696, "lr": 1.6456456456456457e-05, "epoch": 0.2891692954784437, "percentage": 4.13, "elapsed_time": "0:13:44", "remaining_time": "5:19:00"} +{"current_steps": 280, "total_steps": 6657, "loss": 0.2626, "lr": 1.6756756756756757e-05, "epoch": 0.29442691903259727, "percentage": 4.21, "elapsed_time": "0:13:58", "remaining_time": "5:18:11"} +{"current_steps": 285, "total_steps": 6657, "loss": 0.2593, "lr": 1.705705705705706e-05, "epoch": 0.2996845425867508, "percentage": 4.28, "elapsed_time": "0:14:12", "remaining_time": "5:17:32"} +{"current_steps": 290, "total_steps": 6657, "loss": 0.2504, "lr": 1.735735735735736e-05, "epoch": 0.3049421661409043, "percentage": 4.36, "elapsed_time": "0:14:25", "remaining_time": "5:16:33"} +{"current_steps": 295, "total_steps": 6657, "loss": 0.2586, "lr": 1.765765765765766e-05, "epoch": 0.31019978969505785, "percentage": 4.43, "elapsed_time": "0:14:39", "remaining_time": "5:16:11"} +{"current_steps": 300, "total_steps": 6657, "loss": 0.2598, "lr": 1.795795795795796e-05, "epoch": 0.31545741324921134, "percentage": 4.51, "elapsed_time": "0:14:54", "remaining_time": "5:15:50"} +{"current_steps": 305, "total_steps": 6657, "loss": 0.2544, "lr": 1.8258258258258258e-05, "epoch": 0.3207150368033649, "percentage": 4.58, "elapsed_time": "0:15:06", "remaining_time": "5:14:39"} +{"current_steps": 310, "total_steps": 6657, "loss": 0.2599, "lr": 1.855855855855856e-05, "epoch": 0.32597266035751843, "percentage": 4.66, "elapsed_time": "0:15:18", "remaining_time": "5:13:34"} +{"current_steps": 315, "total_steps": 6657, "loss": 0.2621, "lr": 1.885885885885886e-05, "epoch": 0.3312302839116719, "percentage": 4.73, "elapsed_time": "0:15:32", "remaining_time": "5:12:50"} +{"current_steps": 320, "total_steps": 6657, "loss": 0.2508, "lr": 1.915915915915916e-05, "epoch": 0.33648790746582546, "percentage": 4.81, "elapsed_time": "0:15:46", "remaining_time": "5:12:14"} +{"current_steps": 325, "total_steps": 6657, "loss": 0.261, "lr": 1.9459459459459463e-05, "epoch": 0.34174553101997895, "percentage": 4.88, "elapsed_time": "0:15:58", "remaining_time": "5:11:20"} +{"current_steps": 330, "total_steps": 6657, "loss": 0.2581, "lr": 1.9759759759759763e-05, "epoch": 0.3470031545741325, "percentage": 4.96, "elapsed_time": "0:16:11", "remaining_time": "5:10:28"} +{"current_steps": 335, "total_steps": 6657, "loss": 0.2696, "lr": 2.0060060060060062e-05, "epoch": 0.352260778128286, "percentage": 5.03, "elapsed_time": "0:16:23", "remaining_time": "5:09:26"} +{"current_steps": 340, "total_steps": 6657, "loss": 0.2601, "lr": 2.0360360360360362e-05, "epoch": 0.35751840168243953, "percentage": 5.11, "elapsed_time": "0:16:37", "remaining_time": "5:08:55"} +{"current_steps": 345, "total_steps": 6657, "loss": 0.2719, "lr": 2.066066066066066e-05, "epoch": 0.3627760252365931, "percentage": 5.18, "elapsed_time": "0:16:51", "remaining_time": "5:08:19"} +{"current_steps": 350, "total_steps": 6657, "loss": 0.2619, "lr": 2.0960960960960964e-05, "epoch": 0.36803364879074657, "percentage": 5.26, "elapsed_time": "0:17:06", "remaining_time": "5:08:24"} +{"current_steps": 355, "total_steps": 6657, "loss": 0.257, "lr": 2.1261261261261264e-05, "epoch": 0.3732912723449001, "percentage": 5.33, "elapsed_time": "0:17:19", "remaining_time": "5:07:38"} +{"current_steps": 360, "total_steps": 6657, "loss": 0.256, "lr": 2.1561561561561564e-05, "epoch": 0.3785488958990536, "percentage": 5.41, "elapsed_time": "0:17:32", "remaining_time": "5:06:48"} +{"current_steps": 365, "total_steps": 6657, "loss": 0.2487, "lr": 2.1861861861861863e-05, "epoch": 0.38380651945320715, "percentage": 5.48, "elapsed_time": "0:17:45", "remaining_time": "5:06:15"} +{"current_steps": 370, "total_steps": 6657, "loss": 0.2629, "lr": 2.2162162162162163e-05, "epoch": 0.3890641430073607, "percentage": 5.56, "elapsed_time": "0:17:59", "remaining_time": "5:05:39"} +{"current_steps": 375, "total_steps": 6657, "loss": 0.2502, "lr": 2.2462462462462466e-05, "epoch": 0.3943217665615142, "percentage": 5.63, "elapsed_time": "0:18:12", "remaining_time": "5:04:59"} +{"current_steps": 380, "total_steps": 6657, "loss": 0.2447, "lr": 2.2762762762762765e-05, "epoch": 0.39957939011566773, "percentage": 5.71, "elapsed_time": "0:18:25", "remaining_time": "5:04:22"} +{"current_steps": 385, "total_steps": 6657, "loss": 0.238, "lr": 2.3063063063063065e-05, "epoch": 0.4048370136698212, "percentage": 5.78, "elapsed_time": "0:18:38", "remaining_time": "5:03:33"} +{"current_steps": 390, "total_steps": 6657, "loss": 0.2637, "lr": 2.3363363363363364e-05, "epoch": 0.41009463722397477, "percentage": 5.86, "elapsed_time": "0:18:51", "remaining_time": "5:03:03"} +{"current_steps": 395, "total_steps": 6657, "loss": 0.2404, "lr": 2.3663663663663664e-05, "epoch": 0.4153522607781283, "percentage": 5.93, "elapsed_time": "0:19:04", "remaining_time": "5:02:19"} +{"current_steps": 400, "total_steps": 6657, "loss": 0.2501, "lr": 2.3963963963963967e-05, "epoch": 0.4206098843322818, "percentage": 6.01, "elapsed_time": "0:19:21", "remaining_time": "5:02:41"} +{"current_steps": 405, "total_steps": 6657, "loss": 0.2278, "lr": 2.4264264264264267e-05, "epoch": 0.42586750788643535, "percentage": 6.08, "elapsed_time": "0:20:35", "remaining_time": "5:17:54"} +{"current_steps": 410, "total_steps": 6657, "loss": 0.2372, "lr": 2.4564564564564566e-05, "epoch": 0.43112513144058884, "percentage": 6.16, "elapsed_time": "0:20:48", "remaining_time": "5:17:00"} +{"current_steps": 415, "total_steps": 6657, "loss": 0.2443, "lr": 2.4864864864864866e-05, "epoch": 0.4363827549947424, "percentage": 6.23, "elapsed_time": "0:21:00", "remaining_time": "5:16:00"} +{"current_steps": 420, "total_steps": 6657, "loss": 0.248, "lr": 2.5165165165165165e-05, "epoch": 0.4416403785488959, "percentage": 6.31, "elapsed_time": "0:21:14", "remaining_time": "5:15:19"} +{"current_steps": 425, "total_steps": 6657, "loss": 0.243, "lr": 2.5465465465465465e-05, "epoch": 0.4468980021030494, "percentage": 6.38, "elapsed_time": "0:21:27", "remaining_time": "5:14:39"} +{"current_steps": 430, "total_steps": 6657, "loss": 0.2464, "lr": 2.5765765765765768e-05, "epoch": 0.45215562565720296, "percentage": 6.46, "elapsed_time": "0:21:40", "remaining_time": "5:13:58"} +{"current_steps": 435, "total_steps": 6657, "loss": 0.2522, "lr": 2.6066066066066067e-05, "epoch": 0.45741324921135645, "percentage": 6.53, "elapsed_time": "0:21:53", "remaining_time": "5:13:11"} +{"current_steps": 440, "total_steps": 6657, "loss": 0.2528, "lr": 2.6366366366366367e-05, "epoch": 0.46267087276551, "percentage": 6.61, "elapsed_time": "0:22:06", "remaining_time": "5:12:28"} +{"current_steps": 445, "total_steps": 6657, "loss": 0.265, "lr": 2.6666666666666667e-05, "epoch": 0.4679284963196635, "percentage": 6.68, "elapsed_time": "0:22:31", "remaining_time": "5:14:24"} +{"current_steps": 450, "total_steps": 6657, "loss": 0.1961, "lr": 2.6966966966966966e-05, "epoch": 0.47318611987381703, "percentage": 6.76, "elapsed_time": "0:22:51", "remaining_time": "5:15:14"} +{"current_steps": 455, "total_steps": 6657, "loss": 0.1718, "lr": 2.726726726726727e-05, "epoch": 0.4784437434279706, "percentage": 6.83, "elapsed_time": "0:23:12", "remaining_time": "5:16:25"} +{"current_steps": 460, "total_steps": 6657, "loss": 0.193, "lr": 2.756756756756757e-05, "epoch": 0.48370136698212407, "percentage": 6.91, "elapsed_time": "0:23:32", "remaining_time": "5:17:02"} +{"current_steps": 465, "total_steps": 6657, "loss": 0.1612, "lr": 2.786786786786787e-05, "epoch": 0.4889589905362776, "percentage": 6.99, "elapsed_time": "0:23:56", "remaining_time": "5:18:47"} +{"current_steps": 470, "total_steps": 6657, "loss": 0.1958, "lr": 2.8168168168168168e-05, "epoch": 0.4942166140904311, "percentage": 7.06, "elapsed_time": "0:24:15", "remaining_time": "5:19:20"} +{"current_steps": 475, "total_steps": 6657, "loss": 0.2507, "lr": 2.8468468468468467e-05, "epoch": 0.49947423764458465, "percentage": 7.14, "elapsed_time": "0:24:39", "remaining_time": "5:20:58"} +{"current_steps": 480, "total_steps": 6657, "loss": 0.1682, "lr": 2.8768768768768774e-05, "epoch": 0.5047318611987381, "percentage": 7.21, "elapsed_time": "0:24:58", "remaining_time": "5:21:21"} +{"current_steps": 485, "total_steps": 6657, "loss": 0.1773, "lr": 2.9069069069069073e-05, "epoch": 0.5099894847528917, "percentage": 7.29, "elapsed_time": "0:25:21", "remaining_time": "5:22:45"} +{"current_steps": 490, "total_steps": 6657, "loss": 0.1879, "lr": 2.9369369369369373e-05, "epoch": 0.5152471083070452, "percentage": 7.36, "elapsed_time": "0:25:50", "remaining_time": "5:25:18"} +{"current_steps": 495, "total_steps": 6657, "loss": 0.1497, "lr": 2.9669669669669673e-05, "epoch": 0.5205047318611987, "percentage": 7.44, "elapsed_time": "0:26:15", "remaining_time": "5:26:47"} +{"current_steps": 500, "total_steps": 6657, "loss": 0.1552, "lr": 2.9969969969969976e-05, "epoch": 0.5257623554153522, "percentage": 7.51, "elapsed_time": "0:26:40", "remaining_time": "5:28:22"} +{"current_steps": 505, "total_steps": 6657, "loss": 0.1684, "lr": 3.0270270270270275e-05, "epoch": 0.5310199789695058, "percentage": 7.59, "elapsed_time": "0:27:05", "remaining_time": "5:30:07"} +{"current_steps": 510, "total_steps": 6657, "loss": 0.1847, "lr": 3.0570570570570575e-05, "epoch": 0.5362776025236593, "percentage": 7.66, "elapsed_time": "0:27:27", "remaining_time": "5:31:00"} +{"current_steps": 515, "total_steps": 6657, "loss": 0.1521, "lr": 3.0870870870870874e-05, "epoch": 0.5415352260778128, "percentage": 7.74, "elapsed_time": "0:27:50", "remaining_time": "5:32:05"} +{"current_steps": 520, "total_steps": 6657, "loss": 0.1439, "lr": 3.1171171171171174e-05, "epoch": 0.5467928496319664, "percentage": 7.81, "elapsed_time": "0:28:09", "remaining_time": "5:32:15"} +{"current_steps": 525, "total_steps": 6657, "loss": 0.1312, "lr": 3.1471471471471473e-05, "epoch": 0.5520504731861199, "percentage": 7.89, "elapsed_time": "0:28:28", "remaining_time": "5:32:31"} +{"current_steps": 530, "total_steps": 6657, "loss": 0.174, "lr": 3.177177177177177e-05, "epoch": 0.5573080967402734, "percentage": 7.96, "elapsed_time": "0:28:46", "remaining_time": "5:32:41"} +{"current_steps": 535, "total_steps": 6657, "loss": 0.1653, "lr": 3.207207207207207e-05, "epoch": 0.562565720294427, "percentage": 8.04, "elapsed_time": "0:29:13", "remaining_time": "5:34:26"} +{"current_steps": 540, "total_steps": 6657, "loss": 0.1651, "lr": 3.237237237237238e-05, "epoch": 0.5678233438485805, "percentage": 8.11, "elapsed_time": "0:29:32", "remaining_time": "5:34:41"} +{"current_steps": 545, "total_steps": 6657, "loss": 0.161, "lr": 3.267267267267268e-05, "epoch": 0.573080967402734, "percentage": 8.19, "elapsed_time": "0:29:54", "remaining_time": "5:35:21"} +{"current_steps": 550, "total_steps": 6657, "loss": 0.2085, "lr": 3.297297297297298e-05, "epoch": 0.5783385909568874, "percentage": 8.26, "elapsed_time": "0:30:18", "remaining_time": "5:36:28"} +{"current_steps": 555, "total_steps": 6657, "loss": 0.1379, "lr": 3.327327327327328e-05, "epoch": 0.583596214511041, "percentage": 8.34, "elapsed_time": "0:30:35", "remaining_time": "5:36:19"} +{"current_steps": 560, "total_steps": 6657, "loss": 0.193, "lr": 3.357357357357358e-05, "epoch": 0.5888538380651945, "percentage": 8.41, "elapsed_time": "0:30:53", "remaining_time": "5:36:21"} +{"current_steps": 565, "total_steps": 6657, "loss": 0.1771, "lr": 3.387387387387388e-05, "epoch": 0.594111461619348, "percentage": 8.49, "elapsed_time": "0:31:21", "remaining_time": "5:38:05"} +{"current_steps": 570, "total_steps": 6657, "loss": 0.1446, "lr": 3.4174174174174176e-05, "epoch": 0.5993690851735016, "percentage": 8.56, "elapsed_time": "0:31:44", "remaining_time": "5:39:02"} +{"current_steps": 575, "total_steps": 6657, "loss": 0.2978, "lr": 3.4474474474474476e-05, "epoch": 0.6046267087276551, "percentage": 8.64, "elapsed_time": "0:32:13", "remaining_time": "5:40:55"} +{"current_steps": 580, "total_steps": 6657, "loss": 0.2646, "lr": 3.4774774774774776e-05, "epoch": 0.6098843322818086, "percentage": 8.71, "elapsed_time": "0:32:36", "remaining_time": "5:41:34"} +{"current_steps": 585, "total_steps": 6657, "loss": 0.1573, "lr": 3.5075075075075075e-05, "epoch": 0.6151419558359621, "percentage": 8.79, "elapsed_time": "0:32:59", "remaining_time": "5:42:22"} +{"current_steps": 590, "total_steps": 6657, "loss": 0.2494, "lr": 3.5375375375375375e-05, "epoch": 0.6203995793901157, "percentage": 8.86, "elapsed_time": "0:33:30", "remaining_time": "5:44:30"} +{"current_steps": 595, "total_steps": 6657, "loss": 0.161, "lr": 3.567567567567568e-05, "epoch": 0.6256572029442692, "percentage": 8.94, "elapsed_time": "0:33:56", "remaining_time": "5:45:44"} +{"current_steps": 600, "total_steps": 6657, "loss": 0.1352, "lr": 3.597597597597598e-05, "epoch": 0.6309148264984227, "percentage": 9.01, "elapsed_time": "0:34:21", "remaining_time": "5:46:51"} +{"current_steps": 605, "total_steps": 6657, "loss": 0.1543, "lr": 3.627627627627628e-05, "epoch": 0.6361724500525763, "percentage": 9.09, "elapsed_time": "0:35:40", "remaining_time": "5:56:54"} +{"current_steps": 610, "total_steps": 6657, "loss": 0.1377, "lr": 3.657657657657658e-05, "epoch": 0.6414300736067298, "percentage": 9.16, "elapsed_time": "0:36:02", "remaining_time": "5:57:15"} +{"current_steps": 615, "total_steps": 6657, "loss": 0.1438, "lr": 3.687687687687688e-05, "epoch": 0.6466876971608833, "percentage": 9.24, "elapsed_time": "0:36:27", "remaining_time": "5:58:10"} +{"current_steps": 620, "total_steps": 6657, "loss": 0.1308, "lr": 3.717717717717718e-05, "epoch": 0.6519453207150369, "percentage": 9.31, "elapsed_time": "0:36:44", "remaining_time": "5:57:48"} +{"current_steps": 625, "total_steps": 6657, "loss": 0.139, "lr": 3.747747747747748e-05, "epoch": 0.6572029442691903, "percentage": 9.39, "elapsed_time": "0:37:01", "remaining_time": "5:57:19"} +{"current_steps": 630, "total_steps": 6657, "loss": 0.1201, "lr": 3.777777777777778e-05, "epoch": 0.6624605678233438, "percentage": 9.46, "elapsed_time": "0:37:20", "remaining_time": "5:57:15"} +{"current_steps": 635, "total_steps": 6657, "loss": 0.2007, "lr": 3.807807807807808e-05, "epoch": 0.6677181913774973, "percentage": 9.54, "elapsed_time": "0:37:53", "remaining_time": "5:59:17"} +{"current_steps": 640, "total_steps": 6657, "loss": 0.121, "lr": 3.837837837837838e-05, "epoch": 0.6729758149316509, "percentage": 9.61, "elapsed_time": "0:38:11", "remaining_time": "5:59:03"} +{"current_steps": 645, "total_steps": 6657, "loss": 0.1253, "lr": 3.8678678678678684e-05, "epoch": 0.6782334384858044, "percentage": 9.69, "elapsed_time": "0:38:29", "remaining_time": "5:58:48"} +{"current_steps": 650, "total_steps": 6657, "loss": 0.1263, "lr": 3.897897897897898e-05, "epoch": 0.6834910620399579, "percentage": 9.76, "elapsed_time": "0:38:48", "remaining_time": "5:58:34"} +{"current_steps": 655, "total_steps": 6657, "loss": 0.172, "lr": 3.927927927927928e-05, "epoch": 0.6887486855941115, "percentage": 9.84, "elapsed_time": "0:39:19", "remaining_time": "6:00:20"} +{"current_steps": 660, "total_steps": 6657, "loss": 0.1546, "lr": 3.957957957957958e-05, "epoch": 0.694006309148265, "percentage": 9.91, "elapsed_time": "0:39:41", "remaining_time": "6:00:41"} +{"current_steps": 665, "total_steps": 6657, "loss": 0.1793, "lr": 3.987987987987988e-05, "epoch": 0.6992639327024185, "percentage": 9.99, "elapsed_time": "0:40:06", "remaining_time": "6:01:26"} +{"current_steps": 670, "total_steps": 6657, "loss": 0.1507, "lr": 3.9999975251805184e-05, "epoch": 0.704521556256572, "percentage": 10.06, "elapsed_time": "0:40:27", "remaining_time": "6:01:29"} +{"current_steps": 675, "total_steps": 6657, "loss": 0.1733, "lr": 3.9999824013058675e-05, "epoch": 0.7097791798107256, "percentage": 10.14, "elapsed_time": "0:40:52", "remaining_time": "6:02:11"} +{"current_steps": 680, "total_steps": 6657, "loss": 0.1819, "lr": 3.99995352856012e-05, "epoch": 0.7150368033648791, "percentage": 10.21, "elapsed_time": "0:41:17", "remaining_time": "6:02:53"} +{"current_steps": 685, "total_steps": 6657, "loss": 0.1487, "lr": 3.999910907141761e-05, "epoch": 0.7202944269190326, "percentage": 10.29, "elapsed_time": "0:41:37", "remaining_time": "6:02:52"} +{"current_steps": 690, "total_steps": 6657, "loss": 0.169, "lr": 3.9998545373437924e-05, "epoch": 0.7255520504731862, "percentage": 10.37, "elapsed_time": "0:41:55", "remaining_time": "6:02:30"} +{"current_steps": 695, "total_steps": 6657, "loss": 0.1534, "lr": 3.999784419553728e-05, "epoch": 0.7308096740273397, "percentage": 10.44, "elapsed_time": "0:42:18", "remaining_time": "6:02:56"} +{"current_steps": 700, "total_steps": 6657, "loss": 0.1385, "lr": 3.9997005542535916e-05, "epoch": 0.7360672975814931, "percentage": 10.52, "elapsed_time": "0:42:47", "remaining_time": "6:04:06"} +{"current_steps": 705, "total_steps": 6657, "loss": 0.1251, "lr": 3.9996029420199154e-05, "epoch": 0.7413249211356467, "percentage": 10.59, "elapsed_time": "0:43:07", "remaining_time": "6:04:04"} +{"current_steps": 710, "total_steps": 6657, "loss": 0.1189, "lr": 3.9994915835237336e-05, "epoch": 0.7465825446898002, "percentage": 10.67, "elapsed_time": "0:43:36", "remaining_time": "6:05:12"} +{"current_steps": 715, "total_steps": 6657, "loss": 0.1325, "lr": 3.999366479530581e-05, "epoch": 0.7518401682439537, "percentage": 10.74, "elapsed_time": "0:43:54", "remaining_time": "6:04:54"} +{"current_steps": 720, "total_steps": 6657, "loss": 0.1602, "lr": 3.999227630900483e-05, "epoch": 0.7570977917981072, "percentage": 10.82, "elapsed_time": "0:44:28", "remaining_time": "6:06:46"} +{"current_steps": 725, "total_steps": 6657, "loss": 0.136, "lr": 3.9990750385879554e-05, "epoch": 0.7623554153522608, "percentage": 10.89, "elapsed_time": "0:44:47", "remaining_time": "6:06:27"} +{"current_steps": 730, "total_steps": 6657, "loss": 0.1516, "lr": 3.998908703641993e-05, "epoch": 0.7676130389064143, "percentage": 10.97, "elapsed_time": "0:45:05", "remaining_time": "6:06:03"} +{"current_steps": 735, "total_steps": 6657, "loss": 0.1138, "lr": 3.9987286272060644e-05, "epoch": 0.7728706624605678, "percentage": 11.04, "elapsed_time": "0:45:26", "remaining_time": "6:06:08"} +{"current_steps": 740, "total_steps": 6657, "loss": 0.1468, "lr": 3.998534810518104e-05, "epoch": 0.7781282860147214, "percentage": 11.12, "elapsed_time": "0:45:52", "remaining_time": "6:06:50"} +{"current_steps": 745, "total_steps": 6657, "loss": 0.1357, "lr": 3.998327254910504e-05, "epoch": 0.7833859095688749, "percentage": 11.19, "elapsed_time": "0:46:09", "remaining_time": "6:06:21"} +{"current_steps": 750, "total_steps": 6657, "loss": 0.1249, "lr": 3.998105961810105e-05, "epoch": 0.7886435331230284, "percentage": 11.27, "elapsed_time": "0:46:30", "remaining_time": "6:06:16"} +{"current_steps": 755, "total_steps": 6657, "loss": 0.1386, "lr": 3.997870932738187e-05, "epoch": 0.7939011566771819, "percentage": 11.34, "elapsed_time": "0:46:49", "remaining_time": "6:06:04"} +{"current_steps": 760, "total_steps": 6657, "loss": 0.1434, "lr": 3.997622169310454e-05, "epoch": 0.7991587802313355, "percentage": 11.42, "elapsed_time": "0:47:09", "remaining_time": "6:05:51"} +{"current_steps": 765, "total_steps": 6657, "loss": 0.1552, "lr": 3.9973596732370296e-05, "epoch": 0.804416403785489, "percentage": 11.49, "elapsed_time": "0:47:35", "remaining_time": "6:06:36"} +{"current_steps": 770, "total_steps": 6657, "loss": 0.1389, "lr": 3.997083446322443e-05, "epoch": 0.8096740273396424, "percentage": 11.57, "elapsed_time": "0:47:57", "remaining_time": "6:06:38"} +{"current_steps": 775, "total_steps": 6657, "loss": 0.1555, "lr": 3.9967934904656145e-05, "epoch": 0.814931650893796, "percentage": 11.64, "elapsed_time": "0:48:14", "remaining_time": "6:06:05"} +{"current_steps": 780, "total_steps": 6657, "loss": 0.1601, "lr": 3.9964898076598445e-05, "epoch": 0.8201892744479495, "percentage": 11.72, "elapsed_time": "0:48:40", "remaining_time": "6:06:44"} +{"current_steps": 785, "total_steps": 6657, "loss": 0.1309, "lr": 3.996172399992799e-05, "epoch": 0.825446898002103, "percentage": 11.79, "elapsed_time": "0:48:58", "remaining_time": "6:06:20"} +{"current_steps": 790, "total_steps": 6657, "loss": 0.1123, "lr": 3.995841269646496e-05, "epoch": 0.8307045215562566, "percentage": 11.87, "elapsed_time": "0:49:16", "remaining_time": "6:05:55"} +{"current_steps": 795, "total_steps": 6657, "loss": 0.134, "lr": 3.995496418897291e-05, "epoch": 0.8359621451104101, "percentage": 11.94, "elapsed_time": "0:49:34", "remaining_time": "6:05:29"} +{"current_steps": 800, "total_steps": 6657, "loss": 0.1691, "lr": 3.995137850115856e-05, "epoch": 0.8412197686645636, "percentage": 12.02, "elapsed_time": "0:50:03", "remaining_time": "6:06:32"} +{"current_steps": 805, "total_steps": 6657, "loss": 0.1125, "lr": 3.994765565767174e-05, "epoch": 0.8464773922187171, "percentage": 12.09, "elapsed_time": "0:51:20", "remaining_time": "6:13:15"} +{"current_steps": 810, "total_steps": 6657, "loss": 0.1261, "lr": 3.9943795684105104e-05, "epoch": 0.8517350157728707, "percentage": 12.17, "elapsed_time": "0:51:51", "remaining_time": "6:14:23"} +{"current_steps": 815, "total_steps": 6657, "loss": 0.1292, "lr": 3.993979860699403e-05, "epoch": 0.8569926393270242, "percentage": 12.24, "elapsed_time": "0:52:12", "remaining_time": "6:14:14"} +{"current_steps": 820, "total_steps": 6657, "loss": 0.1077, "lr": 3.993566445381641e-05, "epoch": 0.8622502628811777, "percentage": 12.32, "elapsed_time": "0:52:31", "remaining_time": "6:13:53"} +{"current_steps": 825, "total_steps": 6657, "loss": 0.1396, "lr": 3.9931393252992454e-05, "epoch": 0.8675078864353313, "percentage": 12.39, "elapsed_time": "0:52:51", "remaining_time": "6:13:40"} +{"current_steps": 830, "total_steps": 6657, "loss": 0.1173, "lr": 3.992698503388453e-05, "epoch": 0.8727655099894848, "percentage": 12.47, "elapsed_time": "0:53:14", "remaining_time": "6:13:45"} +{"current_steps": 835, "total_steps": 6657, "loss": 0.1599, "lr": 3.992243982679691e-05, "epoch": 0.8780231335436383, "percentage": 12.54, "elapsed_time": "0:53:40", "remaining_time": "6:14:12"} +{"current_steps": 840, "total_steps": 6657, "loss": 0.1485, "lr": 3.991775766297562e-05, "epoch": 0.8832807570977917, "percentage": 12.62, "elapsed_time": "0:54:00", "remaining_time": "6:13:59"} +{"current_steps": 845, "total_steps": 6657, "loss": 0.126, "lr": 3.991293857460815e-05, "epoch": 0.8885383806519453, "percentage": 12.69, "elapsed_time": "0:54:19", "remaining_time": "6:13:36"} +{"current_steps": 850, "total_steps": 6657, "loss": 0.1404, "lr": 3.9907982594823326e-05, "epoch": 0.8937960042060988, "percentage": 12.77, "elapsed_time": "0:54:36", "remaining_time": "6:13:03"} +{"current_steps": 855, "total_steps": 6657, "loss": 0.1274, "lr": 3.9902889757691e-05, "epoch": 0.8990536277602523, "percentage": 12.84, "elapsed_time": "0:54:55", "remaining_time": "6:12:40"} +{"current_steps": 860, "total_steps": 6657, "loss": 0.1373, "lr": 3.9897660098221866e-05, "epoch": 0.9043112513144059, "percentage": 12.92, "elapsed_time": "0:55:11", "remaining_time": "6:11:59"} +{"current_steps": 865, "total_steps": 6657, "loss": 0.1645, "lr": 3.98922936523672e-05, "epoch": 0.9095688748685594, "percentage": 12.99, "elapsed_time": "0:55:37", "remaining_time": "6:12:25"} +{"current_steps": 870, "total_steps": 6657, "loss": 0.1694, "lr": 3.9886790457018604e-05, "epoch": 0.9148264984227129, "percentage": 13.07, "elapsed_time": "0:56:01", "remaining_time": "6:12:41"} +{"current_steps": 875, "total_steps": 6657, "loss": 0.1383, "lr": 3.9881150550007776e-05, "epoch": 0.9200841219768665, "percentage": 13.14, "elapsed_time": "0:56:27", "remaining_time": "6:13:06"} +{"current_steps": 880, "total_steps": 6657, "loss": 0.1413, "lr": 3.987537397010624e-05, "epoch": 0.92534174553102, "percentage": 13.22, "elapsed_time": "0:56:49", "remaining_time": "6:13:03"} +{"current_steps": 885, "total_steps": 6657, "loss": 0.1253, "lr": 3.9869460757025064e-05, "epoch": 0.9305993690851735, "percentage": 13.29, "elapsed_time": "0:57:06", "remaining_time": "6:12:29"} +{"current_steps": 890, "total_steps": 6657, "loss": 0.1256, "lr": 3.9863410951414616e-05, "epoch": 0.935856992639327, "percentage": 13.37, "elapsed_time": "0:57:23", "remaining_time": "6:11:51"} +{"current_steps": 895, "total_steps": 6657, "loss": 0.1259, "lr": 3.985722459486425e-05, "epoch": 0.9411146161934806, "percentage": 13.44, "elapsed_time": "0:57:40", "remaining_time": "6:11:18"} +{"current_steps": 900, "total_steps": 6657, "loss": 0.1517, "lr": 3.985090172990206e-05, "epoch": 0.9463722397476341, "percentage": 13.52, "elapsed_time": "0:57:57", "remaining_time": "6:10:44"} +{"current_steps": 905, "total_steps": 6657, "loss": 0.1169, "lr": 3.984444239999455e-05, "epoch": 0.9516298633017876, "percentage": 13.59, "elapsed_time": "0:58:15", "remaining_time": "6:10:13"} +{"current_steps": 910, "total_steps": 6657, "loss": 0.124, "lr": 3.9837846649546354e-05, "epoch": 0.9568874868559412, "percentage": 13.67, "elapsed_time": "0:58:34", "remaining_time": "6:09:54"} +{"current_steps": 915, "total_steps": 6657, "loss": 0.1406, "lr": 3.9831114523899945e-05, "epoch": 0.9621451104100947, "percentage": 13.74, "elapsed_time": "0:58:52", "remaining_time": "6:09:26"} +{"current_steps": 920, "total_steps": 6657, "loss": 0.1106, "lr": 3.982424606933529e-05, "epoch": 0.9674027339642481, "percentage": 13.82, "elapsed_time": "0:59:19", "remaining_time": "6:09:58"} +{"current_steps": 925, "total_steps": 6657, "loss": 0.1193, "lr": 3.981724133306954e-05, "epoch": 0.9726603575184016, "percentage": 13.9, "elapsed_time": "0:59:41", "remaining_time": "6:09:51"} +{"current_steps": 930, "total_steps": 6657, "loss": 0.1248, "lr": 3.981010036325674e-05, "epoch": 0.9779179810725552, "percentage": 13.97, "elapsed_time": "1:00:06", "remaining_time": "6:10:11"} +{"current_steps": 935, "total_steps": 6657, "loss": 0.237, "lr": 3.980282320898746e-05, "epoch": 0.9831756046267087, "percentage": 14.05, "elapsed_time": "1:00:49", "remaining_time": "6:12:12"} +{"current_steps": 940, "total_steps": 6657, "loss": 0.1155, "lr": 3.9795409920288456e-05, "epoch": 0.9884332281808622, "percentage": 14.12, "elapsed_time": "1:01:06", "remaining_time": "6:11:40"} +{"current_steps": 945, "total_steps": 6657, "loss": 0.1349, "lr": 3.978786054812236e-05, "epoch": 0.9936908517350158, "percentage": 14.2, "elapsed_time": "1:01:30", "remaining_time": "6:11:47"} +{"current_steps": 950, "total_steps": 6657, "loss": 0.1584, "lr": 3.9780175144387304e-05, "epoch": 0.9989484752891693, "percentage": 14.27, "elapsed_time": "1:01:55", "remaining_time": "6:12:00"} +{"current_steps": 955, "total_steps": 6657, "loss": 0.262, "lr": 3.977235376191656e-05, "epoch": 1.0042060988433228, "percentage": 14.35, "elapsed_time": "1:02:08", "remaining_time": "6:10:59"} +{"current_steps": 960, "total_steps": 6657, "loss": 0.258, "lr": 3.9764396454478195e-05, "epoch": 1.0094637223974763, "percentage": 14.42, "elapsed_time": "1:02:19", "remaining_time": "6:09:52"} +{"current_steps": 965, "total_steps": 6657, "loss": 0.2547, "lr": 3.975630327677468e-05, "epoch": 1.0147213459516298, "percentage": 14.5, "elapsed_time": "1:02:31", "remaining_time": "6:08:50"} +{"current_steps": 970, "total_steps": 6657, "loss": 0.2499, "lr": 3.974807428444254e-05, "epoch": 1.0199789695057835, "percentage": 14.57, "elapsed_time": "1:02:43", "remaining_time": "6:07:45"} +{"current_steps": 975, "total_steps": 6657, "loss": 0.2455, "lr": 3.973970953405195e-05, "epoch": 1.025236593059937, "percentage": 14.65, "elapsed_time": "1:02:56", "remaining_time": "6:06:46"} +{"current_steps": 980, "total_steps": 6657, "loss": 0.2525, "lr": 3.9731209083106354e-05, "epoch": 1.0304942166140905, "percentage": 14.72, "elapsed_time": "1:03:07", "remaining_time": "6:05:41"} +{"current_steps": 985, "total_steps": 6657, "loss": 0.237, "lr": 3.972257299004206e-05, "epoch": 1.035751840168244, "percentage": 14.8, "elapsed_time": "1:03:19", "remaining_time": "6:04:38"} +{"current_steps": 990, "total_steps": 6657, "loss": 0.2523, "lr": 3.9713801314227867e-05, "epoch": 1.0410094637223974, "percentage": 14.87, "elapsed_time": "1:03:31", "remaining_time": "6:03:37"} +{"current_steps": 995, "total_steps": 6657, "loss": 0.2396, "lr": 3.9704894115964615e-05, "epoch": 1.046267087276551, "percentage": 14.95, "elapsed_time": "1:03:42", "remaining_time": "6:02:31"} +{"current_steps": 1000, "total_steps": 6657, "loss": 0.2427, "lr": 3.9695851456484805e-05, "epoch": 1.0515247108307044, "percentage": 15.02, "elapsed_time": "1:03:54", "remaining_time": "6:01:33"} +{"current_steps": 1005, "total_steps": 6657, "loss": 0.231, "lr": 3.968667339795218e-05, "epoch": 1.0567823343848581, "percentage": 15.1, "elapsed_time": "1:05:07", "remaining_time": "6:06:15"} +{"current_steps": 1010, "total_steps": 6657, "loss": 0.2492, "lr": 3.9677360003461246e-05, "epoch": 1.0620399579390116, "percentage": 15.17, "elapsed_time": "1:05:20", "remaining_time": "6:05:18"} +{"current_steps": 1015, "total_steps": 6657, "loss": 0.2329, "lr": 3.966791133703691e-05, "epoch": 1.0672975814931651, "percentage": 15.25, "elapsed_time": "1:05:31", "remaining_time": "6:04:15"} +{"current_steps": 1020, "total_steps": 6657, "loss": 0.2243, "lr": 3.965832746363397e-05, "epoch": 1.0725552050473186, "percentage": 15.32, "elapsed_time": "1:05:43", "remaining_time": "6:03:12"} +{"current_steps": 1025, "total_steps": 6657, "loss": 0.2219, "lr": 3.964860844913676e-05, "epoch": 1.077812828601472, "percentage": 15.4, "elapsed_time": "1:05:55", "remaining_time": "6:02:12"} +{"current_steps": 1030, "total_steps": 6657, "loss": 0.2361, "lr": 3.9638754360358585e-05, "epoch": 1.0830704521556256, "percentage": 15.47, "elapsed_time": "1:06:07", "remaining_time": "6:01:15"} +{"current_steps": 1035, "total_steps": 6657, "loss": 0.2451, "lr": 3.962876526504134e-05, "epoch": 1.088328075709779, "percentage": 15.55, "elapsed_time": "1:06:19", "remaining_time": "6:00:15"} +{"current_steps": 1040, "total_steps": 6657, "loss": 0.2426, "lr": 3.961864123185502e-05, "epoch": 1.0935856992639328, "percentage": 15.62, "elapsed_time": "1:06:32", "remaining_time": "5:59:22"} +{"current_steps": 1045, "total_steps": 6657, "loss": 0.2355, "lr": 3.9608382330397265e-05, "epoch": 1.0988433228180863, "percentage": 15.7, "elapsed_time": "1:06:44", "remaining_time": "5:58:27"} +{"current_steps": 1050, "total_steps": 6657, "loss": 0.227, "lr": 3.959798863119284e-05, "epoch": 1.1041009463722398, "percentage": 15.77, "elapsed_time": "1:06:56", "remaining_time": "5:57:28"} +{"current_steps": 1055, "total_steps": 6657, "loss": 0.2387, "lr": 3.9587460205693194e-05, "epoch": 1.1093585699263933, "percentage": 15.85, "elapsed_time": "1:07:10", "remaining_time": "5:56:42"} +{"current_steps": 1060, "total_steps": 6657, "loss": 0.2443, "lr": 3.9576797126275945e-05, "epoch": 1.1146161934805467, "percentage": 15.92, "elapsed_time": "1:07:24", "remaining_time": "5:55:56"} +{"current_steps": 1065, "total_steps": 6657, "loss": 0.2286, "lr": 3.9565999466244384e-05, "epoch": 1.1198738170347002, "percentage": 16.0, "elapsed_time": "1:07:36", "remaining_time": "5:55:01"} +{"current_steps": 1070, "total_steps": 6657, "loss": 0.2265, "lr": 3.955506729982699e-05, "epoch": 1.125131440588854, "percentage": 16.07, "elapsed_time": "1:07:48", "remaining_time": "5:54:02"} +{"current_steps": 1075, "total_steps": 6657, "loss": 0.2243, "lr": 3.9544000702176896e-05, "epoch": 1.1303890641430074, "percentage": 16.15, "elapsed_time": "1:08:00", "remaining_time": "5:53:06"} +{"current_steps": 1080, "total_steps": 6657, "loss": 0.2239, "lr": 3.953279974937139e-05, "epoch": 1.135646687697161, "percentage": 16.22, "elapsed_time": "1:08:12", "remaining_time": "5:52:11"} +{"current_steps": 1085, "total_steps": 6657, "loss": 0.2259, "lr": 3.9521464518411356e-05, "epoch": 1.1409043112513144, "percentage": 16.3, "elapsed_time": "1:08:24", "remaining_time": "5:51:18"} +{"current_steps": 1090, "total_steps": 6657, "loss": 0.2309, "lr": 3.950999508722082e-05, "epoch": 1.146161934805468, "percentage": 16.37, "elapsed_time": "1:08:37", "remaining_time": "5:50:31"} +{"current_steps": 1095, "total_steps": 6657, "loss": 0.2193, "lr": 3.9498391534646325e-05, "epoch": 1.1514195583596214, "percentage": 16.45, "elapsed_time": "1:08:49", "remaining_time": "5:49:38"} +{"current_steps": 1100, "total_steps": 6657, "loss": 0.2294, "lr": 3.948665394045646e-05, "epoch": 1.1566771819137749, "percentage": 16.52, "elapsed_time": "1:09:01", "remaining_time": "5:48:42"} +{"current_steps": 1105, "total_steps": 6657, "loss": 0.2306, "lr": 3.9474782385341255e-05, "epoch": 1.1619348054679284, "percentage": 16.6, "elapsed_time": "1:09:13", "remaining_time": "5:47:47"} +{"current_steps": 1110, "total_steps": 6657, "loss": 0.2241, "lr": 3.9462776950911684e-05, "epoch": 1.167192429022082, "percentage": 16.67, "elapsed_time": "1:09:25", "remaining_time": "5:46:55"} +{"current_steps": 1115, "total_steps": 6657, "loss": 0.2177, "lr": 3.9450637719699046e-05, "epoch": 1.1724500525762356, "percentage": 16.75, "elapsed_time": "1:09:36", "remaining_time": "5:46:01"} +{"current_steps": 1120, "total_steps": 6657, "loss": 0.2177, "lr": 3.9438364775154436e-05, "epoch": 1.177707676130389, "percentage": 16.82, "elapsed_time": "1:09:48", "remaining_time": "5:45:06"} +{"current_steps": 1125, "total_steps": 6657, "loss": 0.2171, "lr": 3.942595820164818e-05, "epoch": 1.1829652996845426, "percentage": 16.9, "elapsed_time": "1:10:00", "remaining_time": "5:44:13"} +{"current_steps": 1130, "total_steps": 6657, "loss": 0.2222, "lr": 3.94134180844692e-05, "epoch": 1.188222923238696, "percentage": 16.97, "elapsed_time": "1:10:12", "remaining_time": "5:43:21"} +{"current_steps": 1135, "total_steps": 6657, "loss": 0.2186, "lr": 3.940074450982449e-05, "epoch": 1.1934805467928495, "percentage": 17.05, "elapsed_time": "1:10:23", "remaining_time": "5:42:28"} +{"current_steps": 1140, "total_steps": 6657, "loss": 0.2212, "lr": 3.93879375648385e-05, "epoch": 1.1987381703470033, "percentage": 17.12, "elapsed_time": "1:10:35", "remaining_time": "5:41:37"} +{"current_steps": 1145, "total_steps": 6657, "loss": 0.2237, "lr": 3.9374997337552496e-05, "epoch": 1.2039957939011567, "percentage": 17.2, "elapsed_time": "1:10:47", "remaining_time": "5:40:47"} +{"current_steps": 1150, "total_steps": 6657, "loss": 0.2267, "lr": 3.936192391692404e-05, "epoch": 1.2092534174553102, "percentage": 17.28, "elapsed_time": "1:10:59", "remaining_time": "5:39:56"} +{"current_steps": 1155, "total_steps": 6657, "loss": 0.2148, "lr": 3.9348717392826306e-05, "epoch": 1.2145110410094637, "percentage": 17.35, "elapsed_time": "1:11:12", "remaining_time": "5:39:10"} +{"current_steps": 1160, "total_steps": 6657, "loss": 0.2335, "lr": 3.933537785604748e-05, "epoch": 1.2197686645636172, "percentage": 17.43, "elapsed_time": "1:11:24", "remaining_time": "5:38:22"} +{"current_steps": 1165, "total_steps": 6657, "loss": 0.2184, "lr": 3.932190539829018e-05, "epoch": 1.2250262881177707, "percentage": 17.5, "elapsed_time": "1:11:37", "remaining_time": "5:37:40"} +{"current_steps": 1170, "total_steps": 6657, "loss": 0.2337, "lr": 3.9308300112170735e-05, "epoch": 1.2302839116719242, "percentage": 17.58, "elapsed_time": "1:11:49", "remaining_time": "5:36:51"} +{"current_steps": 1175, "total_steps": 6657, "loss": 0.2159, "lr": 3.929456209121865e-05, "epoch": 1.235541535226078, "percentage": 17.65, "elapsed_time": "1:12:02", "remaining_time": "5:36:05"} +{"current_steps": 1180, "total_steps": 6657, "loss": 0.2225, "lr": 3.928069142987589e-05, "epoch": 1.2407991587802314, "percentage": 17.73, "elapsed_time": "1:12:14", "remaining_time": "5:35:17"} +{"current_steps": 1185, "total_steps": 6657, "loss": 0.2242, "lr": 3.926668822349625e-05, "epoch": 1.2460567823343849, "percentage": 17.8, "elapsed_time": "1:12:26", "remaining_time": "5:34:30"} +{"current_steps": 1190, "total_steps": 6657, "loss": 0.209, "lr": 3.925255256834474e-05, "epoch": 1.2513144058885384, "percentage": 17.88, "elapsed_time": "1:12:37", "remaining_time": "5:33:40"} +{"current_steps": 1195, "total_steps": 6657, "loss": 0.214, "lr": 3.923828456159685e-05, "epoch": 1.2565720294426919, "percentage": 17.95, "elapsed_time": "1:12:49", "remaining_time": "5:32:52"} +{"current_steps": 1200, "total_steps": 6657, "loss": 0.2192, "lr": 3.922388430133793e-05, "epoch": 1.2618296529968454, "percentage": 18.03, "elapsed_time": "1:13:02", "remaining_time": "5:32:08"} +{"current_steps": 1205, "total_steps": 6657, "loss": 0.2232, "lr": 3.9209351886562535e-05, "epoch": 1.267087276550999, "percentage": 18.1, "elapsed_time": "1:14:10", "remaining_time": "5:35:37"} +{"current_steps": 1210, "total_steps": 6657, "loss": 0.218, "lr": 3.919468741717367e-05, "epoch": 1.2723449001051526, "percentage": 18.18, "elapsed_time": "1:14:22", "remaining_time": "5:34:50"} +{"current_steps": 1215, "total_steps": 6657, "loss": 0.2142, "lr": 3.9179890993982186e-05, "epoch": 1.277602523659306, "percentage": 18.25, "elapsed_time": "1:14:35", "remaining_time": "5:34:03"} +{"current_steps": 1220, "total_steps": 6657, "loss": 0.2199, "lr": 3.916496271870603e-05, "epoch": 1.2828601472134595, "percentage": 18.33, "elapsed_time": "1:14:48", "remaining_time": "5:33:21"} +{"current_steps": 1225, "total_steps": 6657, "loss": 0.2139, "lr": 3.914990269396957e-05, "epoch": 1.288117770767613, "percentage": 18.4, "elapsed_time": "1:14:59", "remaining_time": "5:32:32"} +{"current_steps": 1230, "total_steps": 6657, "loss": 0.2105, "lr": 3.913471102330288e-05, "epoch": 1.2933753943217665, "percentage": 18.48, "elapsed_time": "1:15:11", "remaining_time": "5:31:45"} +{"current_steps": 1235, "total_steps": 6657, "loss": 0.2149, "lr": 3.911938781114105e-05, "epoch": 1.29863301787592, "percentage": 18.55, "elapsed_time": "1:15:24", "remaining_time": "5:31:01"} +{"current_steps": 1240, "total_steps": 6657, "loss": 0.2012, "lr": 3.910393316282345e-05, "epoch": 1.3038906414300735, "percentage": 18.63, "elapsed_time": "1:15:35", "remaining_time": "5:30:13"} +{"current_steps": 1245, "total_steps": 6657, "loss": 0.2148, "lr": 3.9088347184592974e-05, "epoch": 1.3091482649842272, "percentage": 18.7, "elapsed_time": "1:15:48", "remaining_time": "5:29:31"} +{"current_steps": 1250, "total_steps": 6657, "loss": 0.2115, "lr": 3.907262998359539e-05, "epoch": 1.3144058885383807, "percentage": 18.78, "elapsed_time": "1:16:01", "remaining_time": "5:28:49"} +{"current_steps": 1255, "total_steps": 6657, "loss": 0.2065, "lr": 3.905678166787852e-05, "epoch": 1.3196635120925342, "percentage": 18.85, "elapsed_time": "1:16:12", "remaining_time": "5:28:01"} +{"current_steps": 1260, "total_steps": 6657, "loss": 0.2153, "lr": 3.9040802346391555e-05, "epoch": 1.3249211356466877, "percentage": 18.93, "elapsed_time": "1:16:23", "remaining_time": "5:27:13"} +{"current_steps": 1265, "total_steps": 6657, "loss": 0.2145, "lr": 3.902469212898427e-05, "epoch": 1.3301787592008412, "percentage": 19.0, "elapsed_time": "1:16:35", "remaining_time": "5:26:28"} +{"current_steps": 1270, "total_steps": 6657, "loss": 0.2134, "lr": 3.900845112640631e-05, "epoch": 1.3354363827549949, "percentage": 19.08, "elapsed_time": "1:16:47", "remaining_time": "5:25:44"} +{"current_steps": 1275, "total_steps": 6657, "loss": 0.2175, "lr": 3.8992079450306355e-05, "epoch": 1.3406940063091484, "percentage": 19.15, "elapsed_time": "1:16:59", "remaining_time": "5:24:58"} +{"current_steps": 1280, "total_steps": 6657, "loss": 0.2083, "lr": 3.897557721323145e-05, "epoch": 1.3459516298633019, "percentage": 19.23, "elapsed_time": "1:17:10", "remaining_time": "5:24:13"} +{"current_steps": 1285, "total_steps": 6657, "loss": 0.2242, "lr": 3.895894452862614e-05, "epoch": 1.3512092534174553, "percentage": 19.3, "elapsed_time": "1:17:22", "remaining_time": "5:23:27"} +{"current_steps": 1290, "total_steps": 6657, "loss": 0.2124, "lr": 3.894218151083176e-05, "epoch": 1.3564668769716088, "percentage": 19.38, "elapsed_time": "1:17:34", "remaining_time": "5:22:43"} +{"current_steps": 1295, "total_steps": 6657, "loss": 0.2299, "lr": 3.892528827508562e-05, "epoch": 1.3617245005257623, "percentage": 19.45, "elapsed_time": "1:17:46", "remaining_time": "5:22:01"} +{"current_steps": 1300, "total_steps": 6657, "loss": 0.2106, "lr": 3.890826493752018e-05, "epoch": 1.3669821240799158, "percentage": 19.53, "elapsed_time": "1:18:00", "remaining_time": "5:21:26"} +{"current_steps": 1305, "total_steps": 6657, "loss": 0.2175, "lr": 3.8891111615162314e-05, "epoch": 1.3722397476340693, "percentage": 19.6, "elapsed_time": "1:18:12", "remaining_time": "5:20:43"} +{"current_steps": 1310, "total_steps": 6657, "loss": 0.2099, "lr": 3.8873828425932486e-05, "epoch": 1.3774973711882228, "percentage": 19.68, "elapsed_time": "1:18:23", "remaining_time": "5:19:59"} +{"current_steps": 1315, "total_steps": 6657, "loss": 0.2125, "lr": 3.8856415488643885e-05, "epoch": 1.3827549947423765, "percentage": 19.75, "elapsed_time": "1:18:35", "remaining_time": "5:19:17"} +{"current_steps": 1320, "total_steps": 6657, "loss": 0.2165, "lr": 3.88388729230017e-05, "epoch": 1.38801261829653, "percentage": 19.83, "elapsed_time": "1:18:47", "remaining_time": "5:18:35"} +{"current_steps": 1325, "total_steps": 6657, "loss": 0.2074, "lr": 3.8821200849602215e-05, "epoch": 1.3932702418506835, "percentage": 19.9, "elapsed_time": "1:18:59", "remaining_time": "5:17:52"} +{"current_steps": 1330, "total_steps": 6657, "loss": 0.2046, "lr": 3.880339938993204e-05, "epoch": 1.398527865404837, "percentage": 19.98, "elapsed_time": "1:19:11", "remaining_time": "5:17:12"} +{"current_steps": 1335, "total_steps": 6657, "loss": 0.1995, "lr": 3.878546866636724e-05, "epoch": 1.4037854889589905, "percentage": 20.05, "elapsed_time": "1:19:23", "remaining_time": "5:16:28"} +{"current_steps": 1340, "total_steps": 6657, "loss": 0.2218, "lr": 3.876740880217248e-05, "epoch": 1.4090431125131442, "percentage": 20.13, "elapsed_time": "1:19:35", "remaining_time": "5:15:46"} +{"current_steps": 1345, "total_steps": 6657, "loss": 0.2084, "lr": 3.874921992150026e-05, "epoch": 1.4143007360672977, "percentage": 20.2, "elapsed_time": "1:19:46", "remaining_time": "5:15:05"} +{"current_steps": 1350, "total_steps": 6657, "loss": 0.201, "lr": 3.873090214938994e-05, "epoch": 1.4195583596214512, "percentage": 20.28, "elapsed_time": "1:20:01", "remaining_time": "5:14:36"} +{"current_steps": 1355, "total_steps": 6657, "loss": 0.2009, "lr": 3.871245561176698e-05, "epoch": 1.4248159831756047, "percentage": 20.35, "elapsed_time": "1:20:16", "remaining_time": "5:14:07"} +{"current_steps": 1360, "total_steps": 6657, "loss": 0.1913, "lr": 3.869388043544204e-05, "epoch": 1.4300736067297581, "percentage": 20.43, "elapsed_time": "1:20:28", "remaining_time": "5:13:24"} +{"current_steps": 1365, "total_steps": 6657, "loss": 0.2093, "lr": 3.8675176748110076e-05, "epoch": 1.4353312302839116, "percentage": 20.5, "elapsed_time": "1:20:39", "remaining_time": "5:12:42"} +{"current_steps": 1370, "total_steps": 6657, "loss": 0.208, "lr": 3.865634467834953e-05, "epoch": 1.4405888538380651, "percentage": 20.58, "elapsed_time": "1:20:51", "remaining_time": "5:12:03"} +{"current_steps": 1375, "total_steps": 6657, "loss": 0.2063, "lr": 3.863738435562139e-05, "epoch": 1.4458464773922186, "percentage": 20.65, "elapsed_time": "1:21:03", "remaining_time": "5:11:24"} +{"current_steps": 1380, "total_steps": 6657, "loss": 0.2082, "lr": 3.8618295910268316e-05, "epoch": 1.4511041009463723, "percentage": 20.73, "elapsed_time": "1:21:16", "remaining_time": "5:10:46"} +{"current_steps": 1385, "total_steps": 6657, "loss": 0.2089, "lr": 3.859907947351374e-05, "epoch": 1.4563617245005258, "percentage": 20.81, "elapsed_time": "1:21:28", "remaining_time": "5:10:07"} +{"current_steps": 1390, "total_steps": 6657, "loss": 0.2149, "lr": 3.8579735177460994e-05, "epoch": 1.4616193480546793, "percentage": 20.88, "elapsed_time": "1:21:39", "remaining_time": "5:09:26"} +{"current_steps": 1395, "total_steps": 6657, "loss": 0.1806, "lr": 3.856026315509236e-05, "epoch": 1.4668769716088328, "percentage": 20.96, "elapsed_time": "1:22:00", "remaining_time": "5:09:20"} +{"current_steps": 1400, "total_steps": 6657, "loss": 0.1364, "lr": 3.8540663540268175e-05, "epoch": 1.4721345951629863, "percentage": 21.03, "elapsed_time": "1:22:20", "remaining_time": "5:09:10"} +{"current_steps": 1405, "total_steps": 6657, "loss": 0.1175, "lr": 3.852093646772592e-05, "epoch": 1.4773922187171398, "percentage": 21.11, "elapsed_time": "1:23:38", "remaining_time": "5:12:40"} +{"current_steps": 1410, "total_steps": 6657, "loss": 0.1378, "lr": 3.850108207307927e-05, "epoch": 1.4826498422712935, "percentage": 21.18, "elapsed_time": "1:23:55", "remaining_time": "5:12:19"} +{"current_steps": 1415, "total_steps": 6657, "loss": 0.1152, "lr": 3.848110049281719e-05, "epoch": 1.487907465825447, "percentage": 21.26, "elapsed_time": "1:24:18", "remaining_time": "5:12:18"} +{"current_steps": 1420, "total_steps": 6657, "loss": 0.123, "lr": 3.846099186430297e-05, "epoch": 1.4931650893796005, "percentage": 21.33, "elapsed_time": "1:24:36", "remaining_time": "5:12:00"} +{"current_steps": 1425, "total_steps": 6657, "loss": 0.1913, "lr": 3.8440756325773296e-05, "epoch": 1.498422712933754, "percentage": 21.41, "elapsed_time": "1:24:56", "remaining_time": "5:11:53"} +{"current_steps": 1430, "total_steps": 6657, "loss": 0.1188, "lr": 3.84203940163373e-05, "epoch": 1.5036803364879074, "percentage": 21.48, "elapsed_time": "1:25:15", "remaining_time": "5:11:38"} +{"current_steps": 1435, "total_steps": 6657, "loss": 0.1426, "lr": 3.83999050759756e-05, "epoch": 1.508937960042061, "percentage": 21.56, "elapsed_time": "1:25:37", "remaining_time": "5:11:34"} +{"current_steps": 1440, "total_steps": 6657, "loss": 0.1438, "lr": 3.837928964553933e-05, "epoch": 1.5141955835962144, "percentage": 21.63, "elapsed_time": "1:26:04", "remaining_time": "5:11:49"} +{"current_steps": 1445, "total_steps": 6657, "loss": 0.1117, "lr": 3.835854786674918e-05, "epoch": 1.519453207150368, "percentage": 21.71, "elapsed_time": "1:26:26", "remaining_time": "5:11:46"} +{"current_steps": 1450, "total_steps": 6657, "loss": 0.118, "lr": 3.8337679882194443e-05, "epoch": 1.5247108307045214, "percentage": 21.78, "elapsed_time": "1:26:50", "remaining_time": "5:11:49"} +{"current_steps": 1455, "total_steps": 6657, "loss": 0.1176, "lr": 3.8316685835331984e-05, "epoch": 1.5299684542586751, "percentage": 21.86, "elapsed_time": "1:27:12", "remaining_time": "5:11:49"} +{"current_steps": 1460, "total_steps": 6657, "loss": 0.1598, "lr": 3.8295565870485295e-05, "epoch": 1.5352260778128286, "percentage": 21.93, "elapsed_time": "1:27:35", "remaining_time": "5:11:46"} +{"current_steps": 1465, "total_steps": 6657, "loss": 0.1087, "lr": 3.827432013284349e-05, "epoch": 1.540483701366982, "percentage": 22.01, "elapsed_time": "1:27:51", "remaining_time": "5:11:21"} +{"current_steps": 1470, "total_steps": 6657, "loss": 0.1268, "lr": 3.825294876846031e-05, "epoch": 1.5457413249211358, "percentage": 22.08, "elapsed_time": "1:28:12", "remaining_time": "5:11:16"} +{"current_steps": 1475, "total_steps": 6657, "loss": 0.1, "lr": 3.823145192425313e-05, "epoch": 1.5509989484752893, "percentage": 22.16, "elapsed_time": "1:28:30", "remaining_time": "5:10:55"} +{"current_steps": 1480, "total_steps": 6657, "loss": 0.1341, "lr": 3.8209829748001894e-05, "epoch": 1.5562565720294428, "percentage": 22.23, "elapsed_time": "1:28:47", "remaining_time": "5:10:34"} +{"current_steps": 1485, "total_steps": 6657, "loss": 0.1313, "lr": 3.8188082388348186e-05, "epoch": 1.5615141955835963, "percentage": 22.31, "elapsed_time": "1:29:10", "remaining_time": "5:10:36"} +{"current_steps": 1490, "total_steps": 6657, "loss": 0.1358, "lr": 3.816620999479413e-05, "epoch": 1.5667718191377498, "percentage": 22.38, "elapsed_time": "1:29:30", "remaining_time": "5:10:22"} +{"current_steps": 1495, "total_steps": 6657, "loss": 0.1369, "lr": 3.8144212717701424e-05, "epoch": 1.5720294426919033, "percentage": 22.46, "elapsed_time": "1:29:49", "remaining_time": "5:10:10"} +{"current_steps": 1500, "total_steps": 6657, "loss": 0.1754, "lr": 3.812209070829025e-05, "epoch": 1.5772870662460567, "percentage": 22.53, "elapsed_time": "1:30:12", "remaining_time": "5:10:07"} +{"current_steps": 1505, "total_steps": 6657, "loss": 0.109, "lr": 3.809984411863828e-05, "epoch": 1.5825446898002102, "percentage": 22.61, "elapsed_time": "1:30:27", "remaining_time": "5:09:40"} +{"current_steps": 1510, "total_steps": 6657, "loss": 0.1376, "lr": 3.80774731016796e-05, "epoch": 1.5878023133543637, "percentage": 22.68, "elapsed_time": "1:30:43", "remaining_time": "5:09:13"} +{"current_steps": 1515, "total_steps": 6657, "loss": 0.1563, "lr": 3.805497781120369e-05, "epoch": 1.5930599369085172, "percentage": 22.76, "elapsed_time": "1:31:06", "remaining_time": "5:09:13"} +{"current_steps": 1520, "total_steps": 6657, "loss": 0.1299, "lr": 3.8032358401854315e-05, "epoch": 1.598317560462671, "percentage": 22.83, "elapsed_time": "1:31:32", "remaining_time": "5:09:22"} +{"current_steps": 1525, "total_steps": 6657, "loss": 0.2038, "lr": 3.800961502912854e-05, "epoch": 1.6035751840168244, "percentage": 22.91, "elapsed_time": "1:31:56", "remaining_time": "5:09:23"} +{"current_steps": 1530, "total_steps": 6657, "loss": 0.2697, "lr": 3.798674784937557e-05, "epoch": 1.608832807570978, "percentage": 22.98, "elapsed_time": "1:32:20", "remaining_time": "5:09:26"} +{"current_steps": 1535, "total_steps": 6657, "loss": 0.1382, "lr": 3.7963757019795756e-05, "epoch": 1.6140904311251314, "percentage": 23.06, "elapsed_time": "1:32:42", "remaining_time": "5:09:20"} +{"current_steps": 1540, "total_steps": 6657, "loss": 0.1541, "lr": 3.794064269843946e-05, "epoch": 1.619348054679285, "percentage": 23.13, "elapsed_time": "1:33:05", "remaining_time": "5:09:20"} +{"current_steps": 1545, "total_steps": 6657, "loss": 0.1986, "lr": 3.791740504420599e-05, "epoch": 1.6246056782334386, "percentage": 23.21, "elapsed_time": "1:33:34", "remaining_time": "5:09:37"} +{"current_steps": 1550, "total_steps": 6657, "loss": 0.1106, "lr": 3.789404421684251e-05, "epoch": 1.629863301787592, "percentage": 23.28, "elapsed_time": "1:33:57", "remaining_time": "5:09:33"} +{"current_steps": 1555, "total_steps": 6657, "loss": 0.123, "lr": 3.787056037694293e-05, "epoch": 1.6351209253417456, "percentage": 23.36, "elapsed_time": "1:34:15", "remaining_time": "5:09:15"} +{"current_steps": 1560, "total_steps": 6657, "loss": 0.1213, "lr": 3.784695368594682e-05, "epoch": 1.640378548895899, "percentage": 23.43, "elapsed_time": "1:34:35", "remaining_time": "5:09:03"} +{"current_steps": 1565, "total_steps": 6657, "loss": 0.1114, "lr": 3.782322430613828e-05, "epoch": 1.6456361724500526, "percentage": 23.51, "elapsed_time": "1:34:58", "remaining_time": "5:09:01"} +{"current_steps": 1570, "total_steps": 6657, "loss": 0.1121, "lr": 3.779937240064484e-05, "epoch": 1.650893796004206, "percentage": 23.58, "elapsed_time": "1:35:14", "remaining_time": "5:08:35"} +{"current_steps": 1575, "total_steps": 6657, "loss": 0.1158, "lr": 3.777539813343634e-05, "epoch": 1.6561514195583595, "percentage": 23.66, "elapsed_time": "1:35:29", "remaining_time": "5:08:07"} +{"current_steps": 1580, "total_steps": 6657, "loss": 0.0989, "lr": 3.7751301669323776e-05, "epoch": 1.661409043112513, "percentage": 23.73, "elapsed_time": "1:35:46", "remaining_time": "5:07:44"} +{"current_steps": 1585, "total_steps": 6657, "loss": 0.1739, "lr": 3.772708317395818e-05, "epoch": 1.6666666666666665, "percentage": 23.81, "elapsed_time": "1:36:17", "remaining_time": "5:08:06"} +{"current_steps": 1590, "total_steps": 6657, "loss": 0.091, "lr": 3.770274281382952e-05, "epoch": 1.6719242902208202, "percentage": 23.88, "elapsed_time": "1:36:31", "remaining_time": "5:07:37"} +{"current_steps": 1595, "total_steps": 6657, "loss": 0.1056, "lr": 3.767828075626551e-05, "epoch": 1.6771819137749737, "percentage": 23.96, "elapsed_time": "1:36:50", "remaining_time": "5:07:19"} +{"current_steps": 1600, "total_steps": 6657, "loss": 0.1, "lr": 3.7653697169430456e-05, "epoch": 1.6824395373291272, "percentage": 24.03, "elapsed_time": "1:37:06", "remaining_time": "5:06:54"} +{"current_steps": 1605, "total_steps": 6657, "loss": 0.1216, "lr": 3.762899222232413e-05, "epoch": 1.687697160883281, "percentage": 24.11, "elapsed_time": "1:38:27", "remaining_time": "5:09:54"} +{"current_steps": 1610, "total_steps": 6657, "loss": 0.1492, "lr": 3.760416608478061e-05, "epoch": 1.6929547844374344, "percentage": 24.19, "elapsed_time": "1:38:53", "remaining_time": "5:10:00"} +{"current_steps": 1615, "total_steps": 6657, "loss": 0.1422, "lr": 3.7579218927467044e-05, "epoch": 1.698212407991588, "percentage": 24.26, "elapsed_time": "1:39:17", "remaining_time": "5:09:59"} +{"current_steps": 1620, "total_steps": 6657, "loss": 0.1321, "lr": 3.7554150921882596e-05, "epoch": 1.7034700315457414, "percentage": 24.34, "elapsed_time": "1:39:35", "remaining_time": "5:09:39"} +{"current_steps": 1625, "total_steps": 6657, "loss": 0.1404, "lr": 3.752896224035716e-05, "epoch": 1.7087276550998949, "percentage": 24.41, "elapsed_time": "1:39:58", "remaining_time": "5:09:33"} +{"current_steps": 1630, "total_steps": 6657, "loss": 0.1587, "lr": 3.750365305605024e-05, "epoch": 1.7139852786540484, "percentage": 24.49, "elapsed_time": "1:40:23", "remaining_time": "5:09:36"} +{"current_steps": 1635, "total_steps": 6657, "loss": 0.1191, "lr": 3.7478223542949704e-05, "epoch": 1.7192429022082019, "percentage": 24.56, "elapsed_time": "1:40:41", "remaining_time": "5:09:16"} +{"current_steps": 1640, "total_steps": 6657, "loss": 0.145, "lr": 3.745267387587065e-05, "epoch": 1.7245005257623554, "percentage": 24.64, "elapsed_time": "1:40:58", "remaining_time": "5:08:53"} +{"current_steps": 1645, "total_steps": 6657, "loss": 0.1284, "lr": 3.742700423045416e-05, "epoch": 1.7297581493165088, "percentage": 24.71, "elapsed_time": "1:41:20", "remaining_time": "5:08:45"} +{"current_steps": 1650, "total_steps": 6657, "loss": 0.1154, "lr": 3.7401214783166116e-05, "epoch": 1.7350157728706623, "percentage": 24.79, "elapsed_time": "1:41:42", "remaining_time": "5:08:38"} +{"current_steps": 1655, "total_steps": 6657, "loss": 0.104, "lr": 3.737530571129596e-05, "epoch": 1.7402733964248158, "percentage": 24.86, "elapsed_time": "1:42:04", "remaining_time": "5:08:31"} +{"current_steps": 1660, "total_steps": 6657, "loss": 0.1063, "lr": 3.734927719295551e-05, "epoch": 1.7455310199789695, "percentage": 24.94, "elapsed_time": "1:42:32", "remaining_time": "5:08:40"} +{"current_steps": 1665, "total_steps": 6657, "loss": 0.1079, "lr": 3.732312940707772e-05, "epoch": 1.750788643533123, "percentage": 25.01, "elapsed_time": "1:42:49", "remaining_time": "5:08:18"} +{"current_steps": 1670, "total_steps": 6657, "loss": 0.1234, "lr": 3.729686253341543e-05, "epoch": 1.7560462670872765, "percentage": 25.09, "elapsed_time": "1:43:17", "remaining_time": "5:08:27"} +{"current_steps": 1675, "total_steps": 6657, "loss": 0.1224, "lr": 3.7270476752540163e-05, "epoch": 1.7613038906414302, "percentage": 25.16, "elapsed_time": "1:43:39", "remaining_time": "5:08:17"} +{"current_steps": 1680, "total_steps": 6657, "loss": 0.1303, "lr": 3.724397224584086e-05, "epoch": 1.7665615141955837, "percentage": 25.24, "elapsed_time": "1:43:56", "remaining_time": "5:07:55"} +{"current_steps": 1685, "total_steps": 6657, "loss": 0.097, "lr": 3.7217349195522656e-05, "epoch": 1.7718191377497372, "percentage": 25.31, "elapsed_time": "1:44:16", "remaining_time": "5:07:41"} +{"current_steps": 1690, "total_steps": 6657, "loss": 0.1191, "lr": 3.7190607784605604e-05, "epoch": 1.7770767613038907, "percentage": 25.39, "elapsed_time": "1:44:37", "remaining_time": "5:07:30"} +{"current_steps": 1695, "total_steps": 6657, "loss": 0.1201, "lr": 3.716374819692341e-05, "epoch": 1.7823343848580442, "percentage": 25.46, "elapsed_time": "1:44:57", "remaining_time": "5:07:14"} +{"current_steps": 1700, "total_steps": 6657, "loss": 0.0947, "lr": 3.713677061712223e-05, "epoch": 1.7875920084121977, "percentage": 25.54, "elapsed_time": "1:45:14", "remaining_time": "5:06:51"} +{"current_steps": 1705, "total_steps": 6657, "loss": 0.122, "lr": 3.7109675230659316e-05, "epoch": 1.7928496319663512, "percentage": 25.61, "elapsed_time": "1:45:34", "remaining_time": "5:06:37"} +{"current_steps": 1710, "total_steps": 6657, "loss": 0.1173, "lr": 3.7082462223801784e-05, "epoch": 1.7981072555205047, "percentage": 25.69, "elapsed_time": "1:45:51", "remaining_time": "5:06:14"} +{"current_steps": 1715, "total_steps": 6657, "loss": 0.1397, "lr": 3.7055131783625364e-05, "epoch": 1.8033648790746581, "percentage": 25.76, "elapsed_time": "1:46:14", "remaining_time": "5:06:08"} +{"current_steps": 1720, "total_steps": 6657, "loss": 0.1155, "lr": 3.702768409801304e-05, "epoch": 1.8086225026288116, "percentage": 25.84, "elapsed_time": "1:46:37", "remaining_time": "5:06:02"} +{"current_steps": 1725, "total_steps": 6657, "loss": 0.1315, "lr": 3.700011935565384e-05, "epoch": 1.8138801261829653, "percentage": 25.91, "elapsed_time": "1:46:52", "remaining_time": "5:05:34"} +{"current_steps": 1730, "total_steps": 6657, "loss": 0.1229, "lr": 3.697243774604145e-05, "epoch": 1.8191377497371188, "percentage": 25.99, "elapsed_time": "1:47:16", "remaining_time": "5:05:30"} +{"current_steps": 1735, "total_steps": 6657, "loss": 0.1248, "lr": 3.6944639459473e-05, "epoch": 1.8243953732912723, "percentage": 26.06, "elapsed_time": "1:47:33", "remaining_time": "5:05:07"} +{"current_steps": 1740, "total_steps": 6657, "loss": 0.105, "lr": 3.69167246870477e-05, "epoch": 1.8296529968454258, "percentage": 26.14, "elapsed_time": "1:47:50", "remaining_time": "5:04:45"} +{"current_steps": 1745, "total_steps": 6657, "loss": 0.1101, "lr": 3.6888693620665546e-05, "epoch": 1.8349106203995795, "percentage": 26.21, "elapsed_time": "1:48:05", "remaining_time": "5:04:16"} +{"current_steps": 1750, "total_steps": 6657, "loss": 0.142, "lr": 3.686054645302598e-05, "epoch": 1.840168243953733, "percentage": 26.29, "elapsed_time": "1:48:34", "remaining_time": "5:04:25"} +{"current_steps": 1755, "total_steps": 6657, "loss": 0.1021, "lr": 3.6832283377626603e-05, "epoch": 1.8454258675078865, "percentage": 26.36, "elapsed_time": "1:48:50", "remaining_time": "5:04:00"} +{"current_steps": 1760, "total_steps": 6657, "loss": 0.1087, "lr": 3.680390458876182e-05, "epoch": 1.85068349106204, "percentage": 26.44, "elapsed_time": "1:49:18", "remaining_time": "5:04:08"} +{"current_steps": 1765, "total_steps": 6657, "loss": 0.1095, "lr": 3.67754102815215e-05, "epoch": 1.8559411146161935, "percentage": 26.51, "elapsed_time": "1:49:38", "remaining_time": "5:03:52"} +{"current_steps": 1770, "total_steps": 6657, "loss": 0.0937, "lr": 3.6746800651789636e-05, "epoch": 1.861198738170347, "percentage": 26.59, "elapsed_time": "1:49:55", "remaining_time": "5:03:30"} +{"current_steps": 1775, "total_steps": 6657, "loss": 0.1095, "lr": 3.671807589624302e-05, "epoch": 1.8664563617245005, "percentage": 26.66, "elapsed_time": "1:50:13", "remaining_time": "5:03:09"} +{"current_steps": 1780, "total_steps": 6657, "loss": 0.11, "lr": 3.6689236212349865e-05, "epoch": 1.871713985278654, "percentage": 26.74, "elapsed_time": "1:50:30", "remaining_time": "5:02:47"} +{"current_steps": 1785, "total_steps": 6657, "loss": 0.1323, "lr": 3.6660281798368485e-05, "epoch": 1.8769716088328074, "percentage": 26.81, "elapsed_time": "1:50:58", "remaining_time": "5:02:53"} +{"current_steps": 1790, "total_steps": 6657, "loss": 0.1361, "lr": 3.663121285334586e-05, "epoch": 1.882229232386961, "percentage": 26.89, "elapsed_time": "1:51:17", "remaining_time": "5:02:36"} +{"current_steps": 1795, "total_steps": 6657, "loss": 0.0894, "lr": 3.660202957711635e-05, "epoch": 1.8874868559411146, "percentage": 26.96, "elapsed_time": "1:51:34", "remaining_time": "5:02:13"} +{"current_steps": 1800, "total_steps": 6657, "loss": 0.1363, "lr": 3.657273217030026e-05, "epoch": 1.8927444794952681, "percentage": 27.04, "elapsed_time": "1:51:52", "remaining_time": "5:01:52"} +{"current_steps": 1805, "total_steps": 6657, "loss": 0.1089, "lr": 3.654332083430252e-05, "epoch": 1.8980021030494216, "percentage": 27.11, "elapsed_time": "1:53:10", "remaining_time": "5:04:14"} +{"current_steps": 1810, "total_steps": 6657, "loss": 0.115, "lr": 3.651379577131121e-05, "epoch": 1.9032597266035753, "percentage": 27.19, "elapsed_time": "1:53:26", "remaining_time": "5:03:47"} +{"current_steps": 1815, "total_steps": 6657, "loss": 0.1294, "lr": 3.648415718429629e-05, "epoch": 1.9085173501577288, "percentage": 27.26, "elapsed_time": "1:53:48", "remaining_time": "5:03:37"} +{"current_steps": 1820, "total_steps": 6657, "loss": 0.1635, "lr": 3.6454405277008087e-05, "epoch": 1.9137749737118823, "percentage": 27.34, "elapsed_time": "1:54:13", "remaining_time": "5:03:35"} +{"current_steps": 1825, "total_steps": 6657, "loss": 0.1227, "lr": 3.6424540253975985e-05, "epoch": 1.9190325972660358, "percentage": 27.41, "elapsed_time": "1:54:39", "remaining_time": "5:03:34"} +{"current_steps": 1830, "total_steps": 6657, "loss": 0.1162, "lr": 3.6394562320506955e-05, "epoch": 1.9242902208201893, "percentage": 27.49, "elapsed_time": "1:54:59", "remaining_time": "5:03:18"} +{"current_steps": 1835, "total_steps": 6657, "loss": 0.1086, "lr": 3.636447168268419e-05, "epoch": 1.9295478443743428, "percentage": 27.56, "elapsed_time": "1:55:15", "remaining_time": "5:02:52"} +{"current_steps": 1840, "total_steps": 6657, "loss": 0.1108, "lr": 3.633426854736566e-05, "epoch": 1.9348054679284963, "percentage": 27.64, "elapsed_time": "1:55:31", "remaining_time": "5:02:24"} +{"current_steps": 1845, "total_steps": 6657, "loss": 0.0971, "lr": 3.6303953122182695e-05, "epoch": 1.9400630914826498, "percentage": 27.72, "elapsed_time": "1:55:46", "remaining_time": "5:01:57"} +{"current_steps": 1850, "total_steps": 6657, "loss": 0.1365, "lr": 3.6273525615538564e-05, "epoch": 1.9453207150368033, "percentage": 27.79, "elapsed_time": "1:56:02", "remaining_time": "5:01:30"} +{"current_steps": 1855, "total_steps": 6657, "loss": 0.0989, "lr": 3.6242986236607046e-05, "epoch": 1.9505783385909568, "percentage": 27.87, "elapsed_time": "1:56:18", "remaining_time": "5:01:05"} +{"current_steps": 1860, "total_steps": 6657, "loss": 0.109, "lr": 3.6212335195330976e-05, "epoch": 1.9558359621451105, "percentage": 27.94, "elapsed_time": "1:56:36", "remaining_time": "5:00:44"} +{"current_steps": 1865, "total_steps": 6657, "loss": 0.1203, "lr": 3.618157270242082e-05, "epoch": 1.961093585699264, "percentage": 28.02, "elapsed_time": "1:56:53", "remaining_time": "5:00:20"} +{"current_steps": 1870, "total_steps": 6657, "loss": 0.1023, "lr": 3.615069896935321e-05, "epoch": 1.9663512092534174, "percentage": 28.09, "elapsed_time": "1:57:18", "remaining_time": "5:00:17"} +{"current_steps": 1875, "total_steps": 6657, "loss": 0.0999, "lr": 3.6119714208369506e-05, "epoch": 1.971608832807571, "percentage": 28.17, "elapsed_time": "1:57:38", "remaining_time": "5:00:01"} +{"current_steps": 1880, "total_steps": 6657, "loss": 0.1057, "lr": 3.608861863247432e-05, "epoch": 1.9768664563617246, "percentage": 28.24, "elapsed_time": "1:58:02", "remaining_time": "4:59:56"} +{"current_steps": 1885, "total_steps": 6657, "loss": 0.1915, "lr": 3.6057412455434075e-05, "epoch": 1.9821240799158781, "percentage": 28.32, "elapsed_time": "1:58:39", "remaining_time": "5:00:22"} +{"current_steps": 1890, "total_steps": 6657, "loss": 0.1217, "lr": 3.6026095891775494e-05, "epoch": 1.9873817034700316, "percentage": 28.39, "elapsed_time": "1:58:59", "remaining_time": "5:00:07"} +{"current_steps": 1895, "total_steps": 6657, "loss": 0.1161, "lr": 3.5994669156784184e-05, "epoch": 1.9926393270241851, "percentage": 28.47, "elapsed_time": "1:59:22", "remaining_time": "4:59:57"} +{"current_steps": 1900, "total_steps": 6657, "loss": 0.1142, "lr": 3.5963132466503107e-05, "epoch": 1.9978969505783386, "percentage": 28.54, "elapsed_time": "1:59:40", "remaining_time": "4:59:37"} +{"current_steps": 1905, "total_steps": 6657, "loss": 0.2091, "lr": 3.593148603773111e-05, "epoch": 2.003154574132492, "percentage": 28.62, "elapsed_time": "1:59:59", "remaining_time": "4:59:18"} +{"current_steps": 1910, "total_steps": 6657, "loss": 0.214, "lr": 3.5899730088021455e-05, "epoch": 2.0084121976866456, "percentage": 28.69, "elapsed_time": "2:00:10", "remaining_time": "4:58:40"} +{"current_steps": 1915, "total_steps": 6657, "loss": 0.204, "lr": 3.586786483568028e-05, "epoch": 2.013669821240799, "percentage": 28.77, "elapsed_time": "2:00:22", "remaining_time": "4:58:04"} +{"current_steps": 1920, "total_steps": 6657, "loss": 0.2023, "lr": 3.583589049976514e-05, "epoch": 2.0189274447949526, "percentage": 28.84, "elapsed_time": "2:00:34", "remaining_time": "4:57:28"} +{"current_steps": 1925, "total_steps": 6657, "loss": 0.2009, "lr": 3.580380730008348e-05, "epoch": 2.024185068349106, "percentage": 28.92, "elapsed_time": "2:00:47", "remaining_time": "4:56:54"} +{"current_steps": 1930, "total_steps": 6657, "loss": 0.1992, "lr": 3.577161545719113e-05, "epoch": 2.0294426919032595, "percentage": 28.99, "elapsed_time": "2:00:58", "remaining_time": "4:56:17"} +{"current_steps": 1935, "total_steps": 6657, "loss": 0.2045, "lr": 3.573931519239079e-05, "epoch": 2.034700315457413, "percentage": 29.07, "elapsed_time": "2:01:10", "remaining_time": "4:55:42"} +{"current_steps": 1940, "total_steps": 6657, "loss": 0.1926, "lr": 3.5706906727730496e-05, "epoch": 2.039957939011567, "percentage": 29.14, "elapsed_time": "2:01:22", "remaining_time": "4:55:05"} +{"current_steps": 1945, "total_steps": 6657, "loss": 0.2106, "lr": 3.567439028600211e-05, "epoch": 2.0452155625657205, "percentage": 29.22, "elapsed_time": "2:01:33", "remaining_time": "4:54:29"} +{"current_steps": 1950, "total_steps": 6657, "loss": 0.2001, "lr": 3.564176609073979e-05, "epoch": 2.050473186119874, "percentage": 29.29, "elapsed_time": "2:01:45", "remaining_time": "4:53:54"} +{"current_steps": 1955, "total_steps": 6657, "loss": 0.1937, "lr": 3.5609034366218426e-05, "epoch": 2.0557308096740274, "percentage": 29.37, "elapsed_time": "2:01:57", "remaining_time": "4:53:20"} +{"current_steps": 1960, "total_steps": 6657, "loss": 0.2066, "lr": 3.5576195337452146e-05, "epoch": 2.060988433228181, "percentage": 29.44, "elapsed_time": "2:02:10", "remaining_time": "4:52:47"} +{"current_steps": 1965, "total_steps": 6657, "loss": 0.195, "lr": 3.55432492301927e-05, "epoch": 2.0662460567823344, "percentage": 29.52, "elapsed_time": "2:02:22", "remaining_time": "4:52:11"} +{"current_steps": 1970, "total_steps": 6657, "loss": 0.1871, "lr": 3.551019627092799e-05, "epoch": 2.071503680336488, "percentage": 29.59, "elapsed_time": "2:02:33", "remaining_time": "4:51:35"} +{"current_steps": 1975, "total_steps": 6657, "loss": 0.1843, "lr": 3.547703668688044e-05, "epoch": 2.0767613038906414, "percentage": 29.67, "elapsed_time": "2:02:45", "remaining_time": "4:50:59"} +{"current_steps": 1980, "total_steps": 6657, "loss": 0.194, "lr": 3.544377070600549e-05, "epoch": 2.082018927444795, "percentage": 29.74, "elapsed_time": "2:02:57", "remaining_time": "4:50:26"} +{"current_steps": 1985, "total_steps": 6657, "loss": 0.2055, "lr": 3.541039855699e-05, "epoch": 2.0872765509989484, "percentage": 29.82, "elapsed_time": "2:03:09", "remaining_time": "4:49:52"} +{"current_steps": 1990, "total_steps": 6657, "loss": 0.2055, "lr": 3.537692046925065e-05, "epoch": 2.092534174553102, "percentage": 29.89, "elapsed_time": "2:03:22", "remaining_time": "4:49:20"} +{"current_steps": 1995, "total_steps": 6657, "loss": 0.1966, "lr": 3.534333667293244e-05, "epoch": 2.0977917981072554, "percentage": 29.97, "elapsed_time": "2:03:34", "remaining_time": "4:48:46"} +{"current_steps": 2000, "total_steps": 6657, "loss": 0.1966, "lr": 3.5309647398907056e-05, "epoch": 2.103049421661409, "percentage": 30.04, "elapsed_time": "2:03:46", "remaining_time": "4:48:13"} +{"current_steps": 2005, "total_steps": 6657, "loss": 0.1944, "lr": 3.527585287877125e-05, "epoch": 2.108307045215563, "percentage": 30.12, "elapsed_time": "2:04:56", "remaining_time": "4:49:52"} +{"current_steps": 2010, "total_steps": 6657, "loss": 0.2029, "lr": 3.5241953344845345e-05, "epoch": 2.1135646687697163, "percentage": 30.19, "elapsed_time": "2:05:10", "remaining_time": "4:49:24"} +{"current_steps": 2015, "total_steps": 6657, "loss": 0.1947, "lr": 3.520794903017153e-05, "epoch": 2.1188222923238698, "percentage": 30.27, "elapsed_time": "2:05:22", "remaining_time": "4:48:50"} +{"current_steps": 2020, "total_steps": 6657, "loss": 0.1879, "lr": 3.517384016851235e-05, "epoch": 2.1240799158780233, "percentage": 30.34, "elapsed_time": "2:05:34", "remaining_time": "4:48:15"} +{"current_steps": 2025, "total_steps": 6657, "loss": 0.1892, "lr": 3.513962699434903e-05, "epoch": 2.1293375394321767, "percentage": 30.42, "elapsed_time": "2:05:46", "remaining_time": "4:47:41"} +{"current_steps": 2030, "total_steps": 6657, "loss": 0.1872, "lr": 3.5105309742879894e-05, "epoch": 2.1345951629863302, "percentage": 30.49, "elapsed_time": "2:05:58", "remaining_time": "4:47:07"} +{"current_steps": 2035, "total_steps": 6657, "loss": 0.1917, "lr": 3.507088865001876e-05, "epoch": 2.1398527865404837, "percentage": 30.57, "elapsed_time": "2:06:10", "remaining_time": "4:46:33"} +{"current_steps": 2040, "total_steps": 6657, "loss": 0.1996, "lr": 3.5036363952393296e-05, "epoch": 2.145110410094637, "percentage": 30.64, "elapsed_time": "2:06:24", "remaining_time": "4:46:04"} +{"current_steps": 2045, "total_steps": 6657, "loss": 0.1809, "lr": 3.500173588734339e-05, "epoch": 2.1503680336487907, "percentage": 30.72, "elapsed_time": "2:06:36", "remaining_time": "4:45:31"} +{"current_steps": 2050, "total_steps": 6657, "loss": 0.1897, "lr": 3.4967004692919555e-05, "epoch": 2.155625657202944, "percentage": 30.79, "elapsed_time": "2:06:47", "remaining_time": "4:44:57"} +{"current_steps": 2055, "total_steps": 6657, "loss": 0.1923, "lr": 3.4932170607881226e-05, "epoch": 2.1608832807570977, "percentage": 30.87, "elapsed_time": "2:06:59", "remaining_time": "4:44:23"} +{"current_steps": 2060, "total_steps": 6657, "loss": 0.1936, "lr": 3.4897233871695205e-05, "epoch": 2.166140904311251, "percentage": 30.94, "elapsed_time": "2:07:11", "remaining_time": "4:43:50"} +{"current_steps": 2065, "total_steps": 6657, "loss": 0.1865, "lr": 3.4862194724533934e-05, "epoch": 2.1713985278654047, "percentage": 31.02, "elapsed_time": "2:07:23", "remaining_time": "4:43:17"} +{"current_steps": 2070, "total_steps": 6657, "loss": 0.1825, "lr": 3.4827053407273894e-05, "epoch": 2.176656151419558, "percentage": 31.1, "elapsed_time": "2:07:34", "remaining_time": "4:42:42"} +{"current_steps": 2075, "total_steps": 6657, "loss": 0.1749, "lr": 3.4791810161493935e-05, "epoch": 2.181913774973712, "percentage": 31.17, "elapsed_time": "2:07:46", "remaining_time": "4:42:08"} +{"current_steps": 2080, "total_steps": 6657, "loss": 0.1916, "lr": 3.47564652294736e-05, "epoch": 2.1871713985278656, "percentage": 31.25, "elapsed_time": "2:07:58", "remaining_time": "4:41:36"} +{"current_steps": 2085, "total_steps": 6657, "loss": 0.186, "lr": 3.472101885419149e-05, "epoch": 2.192429022082019, "percentage": 31.32, "elapsed_time": "2:08:10", "remaining_time": "4:41:02"} +{"current_steps": 2090, "total_steps": 6657, "loss": 0.1867, "lr": 3.468547127932358e-05, "epoch": 2.1976866456361726, "percentage": 31.4, "elapsed_time": "2:08:21", "remaining_time": "4:40:29"} +{"current_steps": 2095, "total_steps": 6657, "loss": 0.1875, "lr": 3.4649822749241525e-05, "epoch": 2.202944269190326, "percentage": 31.47, "elapsed_time": "2:08:33", "remaining_time": "4:39:56"} +{"current_steps": 2100, "total_steps": 6657, "loss": 0.2002, "lr": 3.4614073509011e-05, "epoch": 2.2082018927444795, "percentage": 31.55, "elapsed_time": "2:08:45", "remaining_time": "4:39:24"} +{"current_steps": 2105, "total_steps": 6657, "loss": 0.1772, "lr": 3.4578223804390026e-05, "epoch": 2.213459516298633, "percentage": 31.62, "elapsed_time": "2:08:57", "remaining_time": "4:38:51"} +{"current_steps": 2110, "total_steps": 6657, "loss": 0.1985, "lr": 3.454227388182725e-05, "epoch": 2.2187171398527865, "percentage": 31.7, "elapsed_time": "2:09:10", "remaining_time": "4:38:21"} +{"current_steps": 2115, "total_steps": 6657, "loss": 0.1907, "lr": 3.450622398846026e-05, "epoch": 2.22397476340694, "percentage": 31.77, "elapsed_time": "2:09:23", "remaining_time": "4:37:52"} +{"current_steps": 2120, "total_steps": 6657, "loss": 0.2048, "lr": 3.447007437211392e-05, "epoch": 2.2292323869610935, "percentage": 31.85, "elapsed_time": "2:09:35", "remaining_time": "4:37:21"} +{"current_steps": 2125, "total_steps": 6657, "loss": 0.1787, "lr": 3.443382528129862e-05, "epoch": 2.234490010515247, "percentage": 31.92, "elapsed_time": "2:09:48", "remaining_time": "4:36:49"} +{"current_steps": 2130, "total_steps": 6657, "loss": 0.1885, "lr": 3.4397476965208604e-05, "epoch": 2.2397476340694005, "percentage": 32.0, "elapsed_time": "2:09:59", "remaining_time": "4:36:17"} +{"current_steps": 2135, "total_steps": 6657, "loss": 0.1893, "lr": 3.43610296737202e-05, "epoch": 2.245005257623554, "percentage": 32.07, "elapsed_time": "2:10:11", "remaining_time": "4:35:45"} +{"current_steps": 2140, "total_steps": 6657, "loss": 0.1816, "lr": 3.432448365739019e-05, "epoch": 2.250262881177708, "percentage": 32.15, "elapsed_time": "2:10:23", "remaining_time": "4:35:12"} +{"current_steps": 2145, "total_steps": 6657, "loss": 0.1822, "lr": 3.4287839167454016e-05, "epoch": 2.2555205047318614, "percentage": 32.22, "elapsed_time": "2:10:34", "remaining_time": "4:34:39"} +{"current_steps": 2150, "total_steps": 6657, "loss": 0.1887, "lr": 3.4251096455824076e-05, "epoch": 2.260778128286015, "percentage": 32.3, "elapsed_time": "2:10:47", "remaining_time": "4:34:09"} +{"current_steps": 2155, "total_steps": 6657, "loss": 0.1876, "lr": 3.421425577508799e-05, "epoch": 2.2660357518401684, "percentage": 32.37, "elapsed_time": "2:10:58", "remaining_time": "4:33:37"} +{"current_steps": 2160, "total_steps": 6657, "loss": 0.1921, "lr": 3.417731737850687e-05, "epoch": 2.271293375394322, "percentage": 32.45, "elapsed_time": "2:11:10", "remaining_time": "4:33:05"} +{"current_steps": 2165, "total_steps": 6657, "loss": 0.1785, "lr": 3.4140281520013595e-05, "epoch": 2.2765509989484753, "percentage": 32.52, "elapsed_time": "2:11:22", "remaining_time": "4:32:35"} +{"current_steps": 2170, "total_steps": 6657, "loss": 0.1896, "lr": 3.4103148454211017e-05, "epoch": 2.281808622502629, "percentage": 32.6, "elapsed_time": "2:11:34", "remaining_time": "4:32:04"} +{"current_steps": 2175, "total_steps": 6657, "loss": 0.1889, "lr": 3.4065918436370244e-05, "epoch": 2.2870662460567823, "percentage": 32.67, "elapsed_time": "2:11:47", "remaining_time": "4:31:34"} +{"current_steps": 2180, "total_steps": 6657, "loss": 0.18, "lr": 3.402859172242889e-05, "epoch": 2.292323869610936, "percentage": 32.75, "elapsed_time": "2:11:59", "remaining_time": "4:31:03"} +{"current_steps": 2185, "total_steps": 6657, "loss": 0.1876, "lr": 3.399116856898931e-05, "epoch": 2.2975814931650893, "percentage": 32.82, "elapsed_time": "2:12:11", "remaining_time": "4:30:32"} +{"current_steps": 2190, "total_steps": 6657, "loss": 0.171, "lr": 3.395364923331681e-05, "epoch": 2.302839116719243, "percentage": 32.9, "elapsed_time": "2:12:22", "remaining_time": "4:30:01"} +{"current_steps": 2195, "total_steps": 6657, "loss": 0.1792, "lr": 3.391603397333793e-05, "epoch": 2.3080967402733963, "percentage": 32.97, "elapsed_time": "2:12:35", "remaining_time": "4:29:31"} +{"current_steps": 2200, "total_steps": 6657, "loss": 0.1888, "lr": 3.387832304763861e-05, "epoch": 2.3133543638275498, "percentage": 33.05, "elapsed_time": "2:12:48", "remaining_time": "4:29:03"} +{"current_steps": 2205, "total_steps": 6657, "loss": 0.1755, "lr": 3.384051671546247e-05, "epoch": 2.3186119873817033, "percentage": 33.12, "elapsed_time": "2:13:58", "remaining_time": "4:30:30"} +{"current_steps": 2210, "total_steps": 6657, "loss": 0.1872, "lr": 3.380261523670899e-05, "epoch": 2.3238696109358568, "percentage": 33.2, "elapsed_time": "2:14:09", "remaining_time": "4:29:58"} +{"current_steps": 2215, "total_steps": 6657, "loss": 0.1828, "lr": 3.376461887193173e-05, "epoch": 2.3291272344900107, "percentage": 33.27, "elapsed_time": "2:14:21", "remaining_time": "4:29:26"} +{"current_steps": 2220, "total_steps": 6657, "loss": 0.1883, "lr": 3.372652788233656e-05, "epoch": 2.334384858044164, "percentage": 33.35, "elapsed_time": "2:14:33", "remaining_time": "4:28:56"} +{"current_steps": 2225, "total_steps": 6657, "loss": 0.1854, "lr": 3.368834252977982e-05, "epoch": 2.3396424815983177, "percentage": 33.42, "elapsed_time": "2:14:45", "remaining_time": "4:28:26"} +{"current_steps": 2230, "total_steps": 6657, "loss": 0.1815, "lr": 3.3650063076766586e-05, "epoch": 2.344900105152471, "percentage": 33.5, "elapsed_time": "2:14:57", "remaining_time": "4:27:54"} +{"current_steps": 2235, "total_steps": 6657, "loss": 0.1898, "lr": 3.3611689786448786e-05, "epoch": 2.3501577287066246, "percentage": 33.57, "elapsed_time": "2:15:09", "remaining_time": "4:27:24"} +{"current_steps": 2240, "total_steps": 6657, "loss": 0.1861, "lr": 3.357322292262346e-05, "epoch": 2.355415352260778, "percentage": 33.65, "elapsed_time": "2:15:20", "remaining_time": "4:26:53"} +{"current_steps": 2245, "total_steps": 6657, "loss": 0.1929, "lr": 3.353466274973092e-05, "epoch": 2.3606729758149316, "percentage": 33.72, "elapsed_time": "2:15:32", "remaining_time": "4:26:22"} +{"current_steps": 2250, "total_steps": 6657, "loss": 0.1885, "lr": 3.3496009532852907e-05, "epoch": 2.365930599369085, "percentage": 33.8, "elapsed_time": "2:15:47", "remaining_time": "4:25:57"} +{"current_steps": 2255, "total_steps": 6657, "loss": 0.1872, "lr": 3.345726353771082e-05, "epoch": 2.3711882229232386, "percentage": 33.87, "elapsed_time": "2:15:59", "remaining_time": "4:25:27"} +{"current_steps": 2260, "total_steps": 6657, "loss": 0.176, "lr": 3.341842503066384e-05, "epoch": 2.376445846477392, "percentage": 33.95, "elapsed_time": "2:16:10", "remaining_time": "4:24:55"} +{"current_steps": 2265, "total_steps": 6657, "loss": 0.1839, "lr": 3.3379494278707136e-05, "epoch": 2.3817034700315456, "percentage": 34.02, "elapsed_time": "2:16:22", "remaining_time": "4:24:26"} +{"current_steps": 2270, "total_steps": 6657, "loss": 0.1845, "lr": 3.334047154947e-05, "epoch": 2.386961093585699, "percentage": 34.1, "elapsed_time": "2:16:34", "remaining_time": "4:23:56"} +{"current_steps": 2275, "total_steps": 6657, "loss": 0.18, "lr": 3.330135711121404e-05, "epoch": 2.392218717139853, "percentage": 34.17, "elapsed_time": "2:16:45", "remaining_time": "4:23:25"} +{"current_steps": 2280, "total_steps": 6657, "loss": 0.1737, "lr": 3.32621512328313e-05, "epoch": 2.3974763406940065, "percentage": 34.25, "elapsed_time": "2:16:57", "remaining_time": "4:22:55"} +{"current_steps": 2285, "total_steps": 6657, "loss": 0.1725, "lr": 3.3222854183842434e-05, "epoch": 2.40273396424816, "percentage": 34.32, "elapsed_time": "2:17:09", "remaining_time": "4:22:25"} +{"current_steps": 2290, "total_steps": 6657, "loss": 0.1944, "lr": 3.318346623439486e-05, "epoch": 2.4079915878023135, "percentage": 34.4, "elapsed_time": "2:17:21", "remaining_time": "4:21:55"} +{"current_steps": 2295, "total_steps": 6657, "loss": 0.1805, "lr": 3.314398765526087e-05, "epoch": 2.413249211356467, "percentage": 34.47, "elapsed_time": "2:17:32", "remaining_time": "4:21:25"} +{"current_steps": 2300, "total_steps": 6657, "loss": 0.1714, "lr": 3.310441871783581e-05, "epoch": 2.4185068349106205, "percentage": 34.55, "elapsed_time": "2:17:46", "remaining_time": "4:20:59"} +{"current_steps": 2305, "total_steps": 6657, "loss": 0.1755, "lr": 3.3064759694136165e-05, "epoch": 2.423764458464774, "percentage": 34.63, "elapsed_time": "2:18:02", "remaining_time": "4:20:38"} +{"current_steps": 2310, "total_steps": 6657, "loss": 0.1666, "lr": 3.302501085679776e-05, "epoch": 2.4290220820189274, "percentage": 34.7, "elapsed_time": "2:18:14", "remaining_time": "4:20:08"} +{"current_steps": 2315, "total_steps": 6657, "loss": 0.1806, "lr": 3.29851724790738e-05, "epoch": 2.434279705573081, "percentage": 34.78, "elapsed_time": "2:18:25", "remaining_time": "4:19:38"} +{"current_steps": 2320, "total_steps": 6657, "loss": 0.1764, "lr": 3.294524483483306e-05, "epoch": 2.4395373291272344, "percentage": 34.85, "elapsed_time": "2:18:37", "remaining_time": "4:19:08"} +{"current_steps": 2325, "total_steps": 6657, "loss": 0.1843, "lr": 3.290522819855799e-05, "epoch": 2.444794952681388, "percentage": 34.93, "elapsed_time": "2:18:50", "remaining_time": "4:18:41"} +{"current_steps": 2330, "total_steps": 6657, "loss": 0.1782, "lr": 3.2865122845342776e-05, "epoch": 2.4500525762355414, "percentage": 35.0, "elapsed_time": "2:19:02", "remaining_time": "4:18:12"} +{"current_steps": 2335, "total_steps": 6657, "loss": 0.1793, "lr": 3.282492905089151e-05, "epoch": 2.455310199789695, "percentage": 35.08, "elapsed_time": "2:19:14", "remaining_time": "4:17:43"} +{"current_steps": 2340, "total_steps": 6657, "loss": 0.1827, "lr": 3.2784647091516285e-05, "epoch": 2.4605678233438484, "percentage": 35.15, "elapsed_time": "2:19:25", "remaining_time": "4:17:13"} +{"current_steps": 2345, "total_steps": 6657, "loss": 0.1733, "lr": 3.274427724413527e-05, "epoch": 2.465825446898002, "percentage": 35.23, "elapsed_time": "2:19:46", "remaining_time": "4:17:00"} +{"current_steps": 2350, "total_steps": 6657, "loss": 0.1181, "lr": 3.270381978627081e-05, "epoch": 2.471083070452156, "percentage": 35.3, "elapsed_time": "2:20:04", "remaining_time": "4:16:43"} +{"current_steps": 2355, "total_steps": 6657, "loss": 0.1067, "lr": 3.266327499604755e-05, "epoch": 2.4763406940063093, "percentage": 35.38, "elapsed_time": "2:20:25", "remaining_time": "4:16:31"} +{"current_steps": 2360, "total_steps": 6657, "loss": 0.1084, "lr": 3.262264315219049e-05, "epoch": 2.481598317560463, "percentage": 35.45, "elapsed_time": "2:20:42", "remaining_time": "4:16:11"} +{"current_steps": 2365, "total_steps": 6657, "loss": 0.1013, "lr": 3.258192453402306e-05, "epoch": 2.4868559411146163, "percentage": 35.53, "elapsed_time": "2:20:59", "remaining_time": "4:15:52"} +{"current_steps": 2370, "total_steps": 6657, "loss": 0.1067, "lr": 3.254111942146526e-05, "epoch": 2.4921135646687698, "percentage": 35.6, "elapsed_time": "2:21:22", "remaining_time": "4:15:42"} +{"current_steps": 2375, "total_steps": 6657, "loss": 0.156, "lr": 3.2500228095031677e-05, "epoch": 2.4973711882229233, "percentage": 35.68, "elapsed_time": "2:21:42", "remaining_time": "4:15:29"} +{"current_steps": 2380, "total_steps": 6657, "loss": 0.118, "lr": 3.2459250835829553e-05, "epoch": 2.5026288117770767, "percentage": 35.75, "elapsed_time": "2:22:01", "remaining_time": "4:15:13"} +{"current_steps": 2385, "total_steps": 6657, "loss": 0.1154, "lr": 3.241818792555692e-05, "epoch": 2.5078864353312302, "percentage": 35.83, "elapsed_time": "2:22:18", "remaining_time": "4:14:53"} +{"current_steps": 2390, "total_steps": 6657, "loss": 0.1431, "lr": 3.2377039646500565e-05, "epoch": 2.5131440588853837, "percentage": 35.9, "elapsed_time": "2:22:50", "remaining_time": "4:15:00"} +{"current_steps": 2395, "total_steps": 6657, "loss": 0.0944, "lr": 3.2335806281534195e-05, "epoch": 2.518401682439537, "percentage": 35.98, "elapsed_time": "2:23:11", "remaining_time": "4:14:49"} +{"current_steps": 2400, "total_steps": 6657, "loss": 0.0988, "lr": 3.229448811411639e-05, "epoch": 2.5236593059936907, "percentage": 36.05, "elapsed_time": "2:23:35", "remaining_time": "4:14:41"} +{"current_steps": 2405, "total_steps": 6657, "loss": 0.1014, "lr": 3.225308542828874e-05, "epoch": 2.5289169295478446, "percentage": 36.13, "elapsed_time": "2:24:55", "remaining_time": "4:16:12"} +{"current_steps": 2410, "total_steps": 6657, "loss": 0.1491, "lr": 3.221159850867385e-05, "epoch": 2.534174553101998, "percentage": 36.2, "elapsed_time": "2:25:17", "remaining_time": "4:16:02"} +{"current_steps": 2415, "total_steps": 6657, "loss": 0.0921, "lr": 3.217002764047338e-05, "epoch": 2.5394321766561516, "percentage": 36.28, "elapsed_time": "2:25:33", "remaining_time": "4:15:40"} +{"current_steps": 2420, "total_steps": 6657, "loss": 0.1129, "lr": 3.212837310946609e-05, "epoch": 2.544689800210305, "percentage": 36.35, "elapsed_time": "2:25:55", "remaining_time": "4:15:29"} +{"current_steps": 2425, "total_steps": 6657, "loss": 0.0851, "lr": 3.20866352020059e-05, "epoch": 2.5499474237644586, "percentage": 36.43, "elapsed_time": "2:26:12", "remaining_time": "4:15:08"} +{"current_steps": 2430, "total_steps": 6657, "loss": 0.1112, "lr": 3.204481420501989e-05, "epoch": 2.555205047318612, "percentage": 36.5, "elapsed_time": "2:26:29", "remaining_time": "4:14:50"} +{"current_steps": 2435, "total_steps": 6657, "loss": 0.1183, "lr": 3.200291040600632e-05, "epoch": 2.5604626708727656, "percentage": 36.58, "elapsed_time": "2:26:51", "remaining_time": "4:14:38"} +{"current_steps": 2440, "total_steps": 6657, "loss": 0.1145, "lr": 3.196092409303272e-05, "epoch": 2.565720294426919, "percentage": 36.65, "elapsed_time": "2:27:12", "remaining_time": "4:14:25"} +{"current_steps": 2445, "total_steps": 6657, "loss": 0.1165, "lr": 3.1918855554733804e-05, "epoch": 2.5709779179810726, "percentage": 36.73, "elapsed_time": "2:27:29", "remaining_time": "4:14:04"} +{"current_steps": 2450, "total_steps": 6657, "loss": 0.1602, "lr": 3.187670508030959e-05, "epoch": 2.576235541535226, "percentage": 36.8, "elapsed_time": "2:27:54", "remaining_time": "4:13:59"} +{"current_steps": 2455, "total_steps": 6657, "loss": 0.0959, "lr": 3.183447295952334e-05, "epoch": 2.5814931650893795, "percentage": 36.88, "elapsed_time": "2:28:10", "remaining_time": "4:13:37"} +{"current_steps": 2460, "total_steps": 6657, "loss": 0.1151, "lr": 3.1792159482699606e-05, "epoch": 2.586750788643533, "percentage": 36.95, "elapsed_time": "2:28:26", "remaining_time": "4:13:14"} +{"current_steps": 2465, "total_steps": 6657, "loss": 0.1332, "lr": 3.174976494072222e-05, "epoch": 2.5920084121976865, "percentage": 37.03, "elapsed_time": "2:28:43", "remaining_time": "4:12:56"} +{"current_steps": 2470, "total_steps": 6657, "loss": 0.1213, "lr": 3.170728962503227e-05, "epoch": 2.59726603575184, "percentage": 37.1, "elapsed_time": "2:29:15", "remaining_time": "4:13:00"} +{"current_steps": 2475, "total_steps": 6657, "loss": 0.1468, "lr": 3.1664733827626174e-05, "epoch": 2.6025236593059935, "percentage": 37.18, "elapsed_time": "2:29:36", "remaining_time": "4:12:47"} +{"current_steps": 2480, "total_steps": 6657, "loss": 0.2823, "lr": 3.1622097841053574e-05, "epoch": 2.607781282860147, "percentage": 37.25, "elapsed_time": "2:30:03", "remaining_time": "4:12:43"} +{"current_steps": 2485, "total_steps": 6657, "loss": 0.1153, "lr": 3.15793819584154e-05, "epoch": 2.6130389064143005, "percentage": 37.33, "elapsed_time": "2:30:23", "remaining_time": "4:12:29"} +{"current_steps": 2490, "total_steps": 6657, "loss": 0.1154, "lr": 3.1536586473361815e-05, "epoch": 2.6182965299684544, "percentage": 37.4, "elapsed_time": "2:30:42", "remaining_time": "4:12:12"} +{"current_steps": 2495, "total_steps": 6657, "loss": 0.2104, "lr": 3.149371168009022e-05, "epoch": 2.623554153522608, "percentage": 37.48, "elapsed_time": "2:31:15", "remaining_time": "4:12:19"} +{"current_steps": 2500, "total_steps": 6657, "loss": 0.0852, "lr": 3.145075787334319e-05, "epoch": 2.6288117770767614, "percentage": 37.55, "elapsed_time": "2:31:35", "remaining_time": "4:12:04"} +{"current_steps": 2505, "total_steps": 6657, "loss": 0.1143, "lr": 3.140772534840652e-05, "epoch": 2.634069400630915, "percentage": 37.63, "elapsed_time": "2:31:57", "remaining_time": "4:11:52"} +{"current_steps": 2510, "total_steps": 6657, "loss": 0.1145, "lr": 3.1364614401107126e-05, "epoch": 2.6393270241850684, "percentage": 37.7, "elapsed_time": "2:32:17", "remaining_time": "4:11:36"} +{"current_steps": 2515, "total_steps": 6657, "loss": 0.0944, "lr": 3.1321425327811044e-05, "epoch": 2.644584647739222, "percentage": 37.78, "elapsed_time": "2:32:40", "remaining_time": "4:11:26"} +{"current_steps": 2520, "total_steps": 6657, "loss": 0.0951, "lr": 3.127815842542138e-05, "epoch": 2.6498422712933754, "percentage": 37.85, "elapsed_time": "2:32:56", "remaining_time": "4:11:05"} +{"current_steps": 2525, "total_steps": 6657, "loss": 0.0996, "lr": 3.1234813991376296e-05, "epoch": 2.655099894847529, "percentage": 37.93, "elapsed_time": "2:33:12", "remaining_time": "4:10:42"} +{"current_steps": 2530, "total_steps": 6657, "loss": 0.0923, "lr": 3.119139232364693e-05, "epoch": 2.6603575184016823, "percentage": 38.01, "elapsed_time": "2:33:28", "remaining_time": "4:10:20"} +{"current_steps": 2535, "total_steps": 6657, "loss": 0.1472, "lr": 3.1147893720735356e-05, "epoch": 2.665615141955836, "percentage": 38.08, "elapsed_time": "2:33:54", "remaining_time": "4:10:15"} +{"current_steps": 2540, "total_steps": 6657, "loss": 0.0895, "lr": 3.110431848167255e-05, "epoch": 2.6708727655099898, "percentage": 38.16, "elapsed_time": "2:34:14", "remaining_time": "4:10:00"} +{"current_steps": 2545, "total_steps": 6657, "loss": 0.0907, "lr": 3.106066690601633e-05, "epoch": 2.6761303890641432, "percentage": 38.23, "elapsed_time": "2:34:32", "remaining_time": "4:09:42"} +{"current_steps": 2550, "total_steps": 6657, "loss": 0.0861, "lr": 3.101693929384927e-05, "epoch": 2.6813880126182967, "percentage": 38.31, "elapsed_time": "2:34:48", "remaining_time": "4:09:20"} +{"current_steps": 2555, "total_steps": 6657, "loss": 0.0914, "lr": 3.097313594577667e-05, "epoch": 2.6866456361724502, "percentage": 38.38, "elapsed_time": "2:35:09", "remaining_time": "4:09:06"} +{"current_steps": 2560, "total_steps": 6657, "loss": 0.1479, "lr": 3.092925716292447e-05, "epoch": 2.6919032597266037, "percentage": 38.46, "elapsed_time": "2:35:40", "remaining_time": "4:09:07"} +{"current_steps": 2565, "total_steps": 6657, "loss": 0.1203, "lr": 3.088530324693719e-05, "epoch": 2.697160883280757, "percentage": 38.53, "elapsed_time": "2:36:04", "remaining_time": "4:08:58"} +{"current_steps": 2570, "total_steps": 6657, "loss": 0.1055, "lr": 3.0841274499975855e-05, "epoch": 2.7024185068349107, "percentage": 38.61, "elapsed_time": "2:36:21", "remaining_time": "4:08:39"} +{"current_steps": 2575, "total_steps": 6657, "loss": 0.1288, "lr": 3.079717122471591e-05, "epoch": 2.707676130389064, "percentage": 38.68, "elapsed_time": "2:36:40", "remaining_time": "4:08:21"} +{"current_steps": 2580, "total_steps": 6657, "loss": 0.147, "lr": 3.075299372434515e-05, "epoch": 2.7129337539432177, "percentage": 38.76, "elapsed_time": "2:37:09", "remaining_time": "4:08:21"} +{"current_steps": 2585, "total_steps": 6657, "loss": 0.1035, "lr": 3.0708742302561606e-05, "epoch": 2.718191377497371, "percentage": 38.83, "elapsed_time": "2:37:26", "remaining_time": "4:08:00"} +{"current_steps": 2590, "total_steps": 6657, "loss": 0.1293, "lr": 3.066441726357153e-05, "epoch": 2.7234490010515247, "percentage": 38.91, "elapsed_time": "2:37:44", "remaining_time": "4:07:42"} +{"current_steps": 2595, "total_steps": 6657, "loss": 0.1137, "lr": 3.062001891208721e-05, "epoch": 2.728706624605678, "percentage": 38.98, "elapsed_time": "2:38:03", "remaining_time": "4:07:24"} +{"current_steps": 2600, "total_steps": 6657, "loss": 0.0947, "lr": 3.0575547553324944e-05, "epoch": 2.7339642481598316, "percentage": 39.06, "elapsed_time": "2:38:23", "remaining_time": "4:07:09"} +{"current_steps": 2605, "total_steps": 6657, "loss": 0.1022, "lr": 3.053100349300291e-05, "epoch": 2.739221871713985, "percentage": 39.13, "elapsed_time": "2:39:51", "remaining_time": "4:08:38"} +{"current_steps": 2610, "total_steps": 6657, "loss": 0.0927, "lr": 3.0486387037339074e-05, "epoch": 2.7444794952681386, "percentage": 39.21, "elapsed_time": "2:40:18", "remaining_time": "4:08:34"} +{"current_steps": 2615, "total_steps": 6657, "loss": 0.093, "lr": 3.0441698493049078e-05, "epoch": 2.749737118822292, "percentage": 39.28, "elapsed_time": "2:40:36", "remaining_time": "4:08:15"} +{"current_steps": 2620, "total_steps": 6657, "loss": 0.1042, "lr": 3.0396938167344153e-05, "epoch": 2.7549947423764456, "percentage": 39.36, "elapsed_time": "2:40:59", "remaining_time": "4:08:03"} +{"current_steps": 2625, "total_steps": 6657, "loss": 0.1114, "lr": 3.0352106367928974e-05, "epoch": 2.7602523659305995, "percentage": 39.43, "elapsed_time": "2:41:25", "remaining_time": "4:07:56"} +{"current_steps": 2630, "total_steps": 6657, "loss": 0.1102, "lr": 3.030720340299957e-05, "epoch": 2.765509989484753, "percentage": 39.51, "elapsed_time": "2:41:42", "remaining_time": "4:07:36"} +{"current_steps": 2635, "total_steps": 6657, "loss": 0.0954, "lr": 3.0262229581241197e-05, "epoch": 2.7707676130389065, "percentage": 39.58, "elapsed_time": "2:41:59", "remaining_time": "4:07:16"} +{"current_steps": 2640, "total_steps": 6657, "loss": 0.0983, "lr": 3.0217185211826218e-05, "epoch": 2.77602523659306, "percentage": 39.66, "elapsed_time": "2:42:19", "remaining_time": "4:06:59"} +{"current_steps": 2645, "total_steps": 6657, "loss": 0.1108, "lr": 3.0172070604411957e-05, "epoch": 2.7812828601472135, "percentage": 39.73, "elapsed_time": "2:42:44", "remaining_time": "4:06:50"} +{"current_steps": 2650, "total_steps": 6657, "loss": 0.0822, "lr": 3.0126886069138623e-05, "epoch": 2.786540483701367, "percentage": 39.81, "elapsed_time": "2:43:01", "remaining_time": "4:06:29"} +{"current_steps": 2655, "total_steps": 6657, "loss": 0.1071, "lr": 3.0081631916627114e-05, "epoch": 2.7917981072555205, "percentage": 39.88, "elapsed_time": "2:43:18", "remaining_time": "4:06:09"} +{"current_steps": 2660, "total_steps": 6657, "loss": 0.1015, "lr": 3.003630845797693e-05, "epoch": 2.797055730809674, "percentage": 39.96, "elapsed_time": "2:43:38", "remaining_time": "4:05:52"} +{"current_steps": 2665, "total_steps": 6657, "loss": 0.1201, "lr": 2.9990916004763996e-05, "epoch": 2.8023133543638274, "percentage": 40.03, "elapsed_time": "2:44:01", "remaining_time": "4:05:41"} +{"current_steps": 2670, "total_steps": 6657, "loss": 0.1032, "lr": 2.9945454869038562e-05, "epoch": 2.807570977917981, "percentage": 40.11, "elapsed_time": "2:44:24", "remaining_time": "4:05:30"} +{"current_steps": 2675, "total_steps": 6657, "loss": 0.1106, "lr": 2.9899925363323022e-05, "epoch": 2.812828601472135, "percentage": 40.18, "elapsed_time": "2:44:39", "remaining_time": "4:05:07"} +{"current_steps": 2680, "total_steps": 6657, "loss": 0.1079, "lr": 2.9854327800609775e-05, "epoch": 2.8180862250262884, "percentage": 40.26, "elapsed_time": "2:45:01", "remaining_time": "4:04:52"} +{"current_steps": 2685, "total_steps": 6657, "loss": 0.1118, "lr": 2.98086624943591e-05, "epoch": 2.823343848580442, "percentage": 40.33, "elapsed_time": "2:45:20", "remaining_time": "4:04:35"} +{"current_steps": 2690, "total_steps": 6657, "loss": 0.1015, "lr": 2.976292975849696e-05, "epoch": 2.8286014721345953, "percentage": 40.41, "elapsed_time": "2:45:38", "remaining_time": "4:04:16"} +{"current_steps": 2695, "total_steps": 6657, "loss": 0.0965, "lr": 2.9717129907412857e-05, "epoch": 2.833859095688749, "percentage": 40.48, "elapsed_time": "2:45:52", "remaining_time": "4:03:52"} +{"current_steps": 2700, "total_steps": 6657, "loss": 0.1192, "lr": 2.9671263255957697e-05, "epoch": 2.8391167192429023, "percentage": 40.56, "elapsed_time": "2:46:20", "remaining_time": "4:03:46"} +{"current_steps": 2705, "total_steps": 6657, "loss": 0.0914, "lr": 2.9625330119441584e-05, "epoch": 2.844374342797056, "percentage": 40.63, "elapsed_time": "2:46:37", "remaining_time": "4:03:26"} +{"current_steps": 2710, "total_steps": 6657, "loss": 0.0922, "lr": 2.957933081363169e-05, "epoch": 2.8496319663512093, "percentage": 40.71, "elapsed_time": "2:47:00", "remaining_time": "4:03:14"} +{"current_steps": 2715, "total_steps": 6657, "loss": 0.1032, "lr": 2.953326565475006e-05, "epoch": 2.854889589905363, "percentage": 40.78, "elapsed_time": "2:47:25", "remaining_time": "4:03:04"} +{"current_steps": 2720, "total_steps": 6657, "loss": 0.0851, "lr": 2.9487134959471445e-05, "epoch": 2.8601472134595163, "percentage": 40.86, "elapsed_time": "2:47:42", "remaining_time": "4:02:44"} +{"current_steps": 2725, "total_steps": 6657, "loss": 0.0845, "lr": 2.944093904492113e-05, "epoch": 2.8654048370136698, "percentage": 40.93, "elapsed_time": "2:47:59", "remaining_time": "4:02:23"} +{"current_steps": 2730, "total_steps": 6657, "loss": 0.1054, "lr": 2.9394678228672737e-05, "epoch": 2.8706624605678233, "percentage": 41.01, "elapsed_time": "2:48:17", "remaining_time": "4:02:05"} +{"current_steps": 2735, "total_steps": 6657, "loss": 0.1182, "lr": 2.9348352828746076e-05, "epoch": 2.8759200841219767, "percentage": 41.08, "elapsed_time": "2:48:45", "remaining_time": "4:02:00"} +{"current_steps": 2740, "total_steps": 6657, "loss": 0.0971, "lr": 2.9301963163604916e-05, "epoch": 2.8811777076761302, "percentage": 41.16, "elapsed_time": "2:49:02", "remaining_time": "4:01:39"} +{"current_steps": 2745, "total_steps": 6657, "loss": 0.104, "lr": 2.925550955215483e-05, "epoch": 2.8864353312302837, "percentage": 41.23, "elapsed_time": "2:49:22", "remaining_time": "4:01:22"} +{"current_steps": 2750, "total_steps": 6657, "loss": 0.1209, "lr": 2.9208992313740993e-05, "epoch": 2.891692954784437, "percentage": 41.31, "elapsed_time": "2:49:39", "remaining_time": "4:01:02"} +{"current_steps": 2755, "total_steps": 6657, "loss": 0.0953, "lr": 2.916241176814596e-05, "epoch": 2.8969505783385907, "percentage": 41.39, "elapsed_time": "2:49:55", "remaining_time": "4:00:39"} +{"current_steps": 2760, "total_steps": 6657, "loss": 0.1037, "lr": 2.9115768235587526e-05, "epoch": 2.9022082018927446, "percentage": 41.46, "elapsed_time": "2:50:12", "remaining_time": "4:00:19"} +{"current_steps": 2765, "total_steps": 6657, "loss": 0.1053, "lr": 2.9069062036716454e-05, "epoch": 2.907465825446898, "percentage": 41.54, "elapsed_time": "2:50:31", "remaining_time": "4:00:02"} +{"current_steps": 2770, "total_steps": 6657, "loss": 0.1407, "lr": 2.9022293492614334e-05, "epoch": 2.9127234490010516, "percentage": 41.61, "elapsed_time": "2:50:57", "remaining_time": "3:59:53"} +{"current_steps": 2775, "total_steps": 6657, "loss": 0.1217, "lr": 2.8975462924791334e-05, "epoch": 2.917981072555205, "percentage": 41.69, "elapsed_time": "2:51:24", "remaining_time": "3:59:46"} +{"current_steps": 2780, "total_steps": 6657, "loss": 0.0979, "lr": 2.892857065518401e-05, "epoch": 2.9232386961093586, "percentage": 41.76, "elapsed_time": "2:51:43", "remaining_time": "3:59:29"} +{"current_steps": 2785, "total_steps": 6657, "loss": 0.0985, "lr": 2.8881617006153072e-05, "epoch": 2.928496319663512, "percentage": 41.84, "elapsed_time": "2:51:59", "remaining_time": "3:59:07"} +{"current_steps": 2790, "total_steps": 6657, "loss": 0.096, "lr": 2.8834602300481207e-05, "epoch": 2.9337539432176656, "percentage": 41.91, "elapsed_time": "2:52:15", "remaining_time": "3:58:45"} +{"current_steps": 2795, "total_steps": 6657, "loss": 0.0872, "lr": 2.878752686137082e-05, "epoch": 2.939011566771819, "percentage": 41.99, "elapsed_time": "2:52:30", "remaining_time": "3:58:22"} +{"current_steps": 2800, "total_steps": 6657, "loss": 0.1209, "lr": 2.874039101244183e-05, "epoch": 2.9442691903259726, "percentage": 42.06, "elapsed_time": "2:52:46", "remaining_time": "3:57:59"} +{"current_steps": 2805, "total_steps": 6657, "loss": 0.0869, "lr": 2.869319507772944e-05, "epoch": 2.949526813880126, "percentage": 42.14, "elapsed_time": "2:54:04", "remaining_time": "3:59:03"} +{"current_steps": 2810, "total_steps": 6657, "loss": 0.0979, "lr": 2.864593938168192e-05, "epoch": 2.9547844374342795, "percentage": 42.21, "elapsed_time": "2:54:20", "remaining_time": "3:58:41"} +{"current_steps": 2815, "total_steps": 6657, "loss": 0.0978, "lr": 2.8598624249158367e-05, "epoch": 2.9600420609884335, "percentage": 42.29, "elapsed_time": "2:54:39", "remaining_time": "3:58:22"} +{"current_steps": 2820, "total_steps": 6657, "loss": 0.0992, "lr": 2.855125000542647e-05, "epoch": 2.965299684542587, "percentage": 42.36, "elapsed_time": "2:55:03", "remaining_time": "3:58:10"} +{"current_steps": 2825, "total_steps": 6657, "loss": 0.0855, "lr": 2.8503816976160278e-05, "epoch": 2.9705573080967405, "percentage": 42.44, "elapsed_time": "2:55:22", "remaining_time": "3:57:53"} +{"current_steps": 2830, "total_steps": 6657, "loss": 0.0936, "lr": 2.8456325487437966e-05, "epoch": 2.975814931650894, "percentage": 42.51, "elapsed_time": "2:55:43", "remaining_time": "3:57:38"} +{"current_steps": 2835, "total_steps": 6657, "loss": 0.1435, "lr": 2.8408775865739578e-05, "epoch": 2.9810725552050474, "percentage": 42.59, "elapsed_time": "2:56:20", "remaining_time": "3:57:43"} +{"current_steps": 2840, "total_steps": 6657, "loss": 0.1388, "lr": 2.8361168437944817e-05, "epoch": 2.986330178759201, "percentage": 42.66, "elapsed_time": "2:56:45", "remaining_time": "3:57:34"} +{"current_steps": 2845, "total_steps": 6657, "loss": 0.0995, "lr": 2.8313503531330738e-05, "epoch": 2.9915878023133544, "percentage": 42.74, "elapsed_time": "2:57:06", "remaining_time": "3:57:17"} +{"current_steps": 2850, "total_steps": 6657, "loss": 0.0915, "lr": 2.826578147356956e-05, "epoch": 2.996845425867508, "percentage": 42.81, "elapsed_time": "2:57:23", "remaining_time": "3:56:57"} +{"current_steps": 2855, "total_steps": 6657, "loss": 0.1717, "lr": 2.8218002592726384e-05, "epoch": 3.0021030494216614, "percentage": 42.89, "elapsed_time": "2:57:45", "remaining_time": "3:56:43"} +{"current_steps": 2860, "total_steps": 6657, "loss": 0.1962, "lr": 2.8170167217256934e-05, "epoch": 3.007360672975815, "percentage": 42.96, "elapsed_time": "2:57:57", "remaining_time": "3:56:15"} +{"current_steps": 2865, "total_steps": 6657, "loss": 0.1772, "lr": 2.8122275676005304e-05, "epoch": 3.0126182965299684, "percentage": 43.04, "elapsed_time": "2:58:09", "remaining_time": "3:55:48"} +{"current_steps": 2870, "total_steps": 6657, "loss": 0.1752, "lr": 2.807432829820171e-05, "epoch": 3.017875920084122, "percentage": 43.11, "elapsed_time": "2:58:21", "remaining_time": "3:55:20"} +{"current_steps": 2875, "total_steps": 6657, "loss": 0.1745, "lr": 2.8026325413460215e-05, "epoch": 3.0231335436382754, "percentage": 43.19, "elapsed_time": "2:58:33", "remaining_time": "3:54:54"} +{"current_steps": 2880, "total_steps": 6657, "loss": 0.1726, "lr": 2.7978267351776448e-05, "epoch": 3.028391167192429, "percentage": 43.26, "elapsed_time": "2:58:45", "remaining_time": "3:54:26"} +{"current_steps": 2885, "total_steps": 6657, "loss": 0.1807, "lr": 2.7930154443525377e-05, "epoch": 3.0336487907465823, "percentage": 43.34, "elapsed_time": "2:58:57", "remaining_time": "3:53:58"} +{"current_steps": 2890, "total_steps": 6657, "loss": 0.1618, "lr": 2.7881987019458992e-05, "epoch": 3.0389064143007363, "percentage": 43.41, "elapsed_time": "2:59:09", "remaining_time": "3:53:30"} +{"current_steps": 2895, "total_steps": 6657, "loss": 0.1839, "lr": 2.7833765410704062e-05, "epoch": 3.0441640378548898, "percentage": 43.49, "elapsed_time": "2:59:20", "remaining_time": "3:53:03"} +{"current_steps": 2900, "total_steps": 6657, "loss": 0.1688, "lr": 2.778548994875984e-05, "epoch": 3.0494216614090432, "percentage": 43.56, "elapsed_time": "2:59:32", "remaining_time": "3:52:35"} +{"current_steps": 2905, "total_steps": 6657, "loss": 0.1715, "lr": 2.7737160965495794e-05, "epoch": 3.0546792849631967, "percentage": 43.64, "elapsed_time": "2:59:44", "remaining_time": "3:52:09"} +{"current_steps": 2910, "total_steps": 6657, "loss": 0.1747, "lr": 2.768877879314935e-05, "epoch": 3.0599369085173502, "percentage": 43.71, "elapsed_time": "2:59:57", "remaining_time": "3:51:42"} +{"current_steps": 2915, "total_steps": 6657, "loss": 0.1753, "lr": 2.7640343764323535e-05, "epoch": 3.0651945320715037, "percentage": 43.79, "elapsed_time": "3:00:09", "remaining_time": "3:51:15"} +{"current_steps": 2920, "total_steps": 6657, "loss": 0.1568, "lr": 2.7591856211984783e-05, "epoch": 3.070452155625657, "percentage": 43.86, "elapsed_time": "3:00:20", "remaining_time": "3:50:48"} +{"current_steps": 2925, "total_steps": 6657, "loss": 0.1607, "lr": 2.7543316469460565e-05, "epoch": 3.0757097791798107, "percentage": 43.94, "elapsed_time": "3:00:32", "remaining_time": "3:50:20"} +{"current_steps": 2930, "total_steps": 6657, "loss": 0.1675, "lr": 2.7494724870437147e-05, "epoch": 3.080967402733964, "percentage": 44.01, "elapsed_time": "3:00:44", "remaining_time": "3:49:54"} +{"current_steps": 2935, "total_steps": 6657, "loss": 0.1702, "lr": 2.7446081748957306e-05, "epoch": 3.0862250262881177, "percentage": 44.09, "elapsed_time": "3:00:56", "remaining_time": "3:49:27"} +{"current_steps": 2940, "total_steps": 6657, "loss": 0.1759, "lr": 2.7397387439417963e-05, "epoch": 3.091482649842271, "percentage": 44.16, "elapsed_time": "3:01:09", "remaining_time": "3:49:02"} +{"current_steps": 2945, "total_steps": 6657, "loss": 0.1696, "lr": 2.7348642276567973e-05, "epoch": 3.0967402733964247, "percentage": 44.24, "elapsed_time": "3:01:20", "remaining_time": "3:48:34"} +{"current_steps": 2950, "total_steps": 6657, "loss": 0.1727, "lr": 2.729984659550576e-05, "epoch": 3.101997896950578, "percentage": 44.31, "elapsed_time": "3:01:33", "remaining_time": "3:48:09"} +{"current_steps": 2955, "total_steps": 6657, "loss": 0.1645, "lr": 2.7251000731677035e-05, "epoch": 3.107255520504732, "percentage": 44.39, "elapsed_time": "3:01:47", "remaining_time": "3:47:44"} +{"current_steps": 2960, "total_steps": 6657, "loss": 0.1731, "lr": 2.72021050208725e-05, "epoch": 3.1125131440588856, "percentage": 44.46, "elapsed_time": "3:02:01", "remaining_time": "3:47:21"} +{"current_steps": 2965, "total_steps": 6657, "loss": 0.1712, "lr": 2.715315979922552e-05, "epoch": 3.117770767613039, "percentage": 44.54, "elapsed_time": "3:02:14", "remaining_time": "3:46:55"} +{"current_steps": 2970, "total_steps": 6657, "loss": 0.1568, "lr": 2.7104165403209843e-05, "epoch": 3.1230283911671926, "percentage": 44.61, "elapsed_time": "3:02:25", "remaining_time": "3:46:27"} +{"current_steps": 2975, "total_steps": 6657, "loss": 0.1641, "lr": 2.7055122169637224e-05, "epoch": 3.128286014721346, "percentage": 44.69, "elapsed_time": "3:02:37", "remaining_time": "3:46:01"} +{"current_steps": 2980, "total_steps": 6657, "loss": 0.1616, "lr": 2.7006030435655205e-05, "epoch": 3.1335436382754995, "percentage": 44.76, "elapsed_time": "3:02:49", "remaining_time": "3:45:34"} +{"current_steps": 2985, "total_steps": 6657, "loss": 0.1622, "lr": 2.6956890538744703e-05, "epoch": 3.138801261829653, "percentage": 44.84, "elapsed_time": "3:03:01", "remaining_time": "3:45:08"} +{"current_steps": 2990, "total_steps": 6657, "loss": 0.1738, "lr": 2.6907702816717742e-05, "epoch": 3.1440588853838065, "percentage": 44.92, "elapsed_time": "3:03:15", "remaining_time": "3:44:44"} +{"current_steps": 2995, "total_steps": 6657, "loss": 0.1583, "lr": 2.685846760771513e-05, "epoch": 3.14931650893796, "percentage": 44.99, "elapsed_time": "3:03:27", "remaining_time": "3:44:18"} +{"current_steps": 3000, "total_steps": 6657, "loss": 0.1642, "lr": 2.6809185250204113e-05, "epoch": 3.1545741324921135, "percentage": 45.07, "elapsed_time": "3:03:38", "remaining_time": "3:43:51"} +{"current_steps": 3005, "total_steps": 6657, "loss": 0.1619, "lr": 2.6759856082976066e-05, "epoch": 3.159831756046267, "percentage": 45.14, "elapsed_time": "3:04:50", "remaining_time": "3:44:38"} +{"current_steps": 3010, "total_steps": 6657, "loss": 0.1655, "lr": 2.6710480445144145e-05, "epoch": 3.1650893796004205, "percentage": 45.22, "elapsed_time": "3:05:02", "remaining_time": "3:44:12"} +{"current_steps": 3015, "total_steps": 6657, "loss": 0.1631, "lr": 2.666105867614099e-05, "epoch": 3.170347003154574, "percentage": 45.29, "elapsed_time": "3:05:14", "remaining_time": "3:43:46"} +{"current_steps": 3020, "total_steps": 6657, "loss": 0.1594, "lr": 2.6611591115716345e-05, "epoch": 3.1756046267087275, "percentage": 45.37, "elapsed_time": "3:05:26", "remaining_time": "3:43:19"} +{"current_steps": 3025, "total_steps": 6657, "loss": 0.1499, "lr": 2.6562078103934755e-05, "epoch": 3.1808622502628814, "percentage": 45.44, "elapsed_time": "3:05:37", "remaining_time": "3:42:52"} +{"current_steps": 3030, "total_steps": 6657, "loss": 0.162, "lr": 2.6512519981173238e-05, "epoch": 3.186119873817035, "percentage": 45.52, "elapsed_time": "3:05:49", "remaining_time": "3:42:26"} +{"current_steps": 3035, "total_steps": 6657, "loss": 0.1631, "lr": 2.64629170881189e-05, "epoch": 3.1913774973711884, "percentage": 45.59, "elapsed_time": "3:06:01", "remaining_time": "3:42:00"} +{"current_steps": 3040, "total_steps": 6657, "loss": 0.1584, "lr": 2.641326976576664e-05, "epoch": 3.196635120925342, "percentage": 45.67, "elapsed_time": "3:06:13", "remaining_time": "3:41:34"} +{"current_steps": 3045, "total_steps": 6657, "loss": 0.158, "lr": 2.6363578355416772e-05, "epoch": 3.2018927444794953, "percentage": 45.74, "elapsed_time": "3:06:25", "remaining_time": "3:41:07"} +{"current_steps": 3050, "total_steps": 6657, "loss": 0.167, "lr": 2.6313843198672712e-05, "epoch": 3.207150368033649, "percentage": 45.82, "elapsed_time": "3:06:36", "remaining_time": "3:40:41"} +{"current_steps": 3055, "total_steps": 6657, "loss": 0.1591, "lr": 2.6264064637438585e-05, "epoch": 3.2124079915878023, "percentage": 45.89, "elapsed_time": "3:06:48", "remaining_time": "3:40:15"} +{"current_steps": 3060, "total_steps": 6657, "loss": 0.1689, "lr": 2.6214243013916915e-05, "epoch": 3.217665615141956, "percentage": 45.97, "elapsed_time": "3:07:01", "remaining_time": "3:39:51"} +{"current_steps": 3065, "total_steps": 6657, "loss": 0.1678, "lr": 2.616437867060627e-05, "epoch": 3.2229232386961093, "percentage": 46.04, "elapsed_time": "3:07:15", "remaining_time": "3:39:27"} +{"current_steps": 3070, "total_steps": 6657, "loss": 0.1714, "lr": 2.6114471950298853e-05, "epoch": 3.228180862250263, "percentage": 46.12, "elapsed_time": "3:07:27", "remaining_time": "3:39:01"} +{"current_steps": 3075, "total_steps": 6657, "loss": 0.1563, "lr": 2.6064523196078248e-05, "epoch": 3.2334384858044163, "percentage": 46.19, "elapsed_time": "3:07:39", "remaining_time": "3:38:35"} +{"current_steps": 3080, "total_steps": 6657, "loss": 0.1572, "lr": 2.6014532751316937e-05, "epoch": 3.2386961093585698, "percentage": 46.27, "elapsed_time": "3:07:51", "remaining_time": "3:38:10"} +{"current_steps": 3085, "total_steps": 6657, "loss": 0.1675, "lr": 2.5964500959674057e-05, "epoch": 3.2439537329127233, "percentage": 46.34, "elapsed_time": "3:08:03", "remaining_time": "3:37:44"} +{"current_steps": 3090, "total_steps": 6657, "loss": 0.1557, "lr": 2.5914428165092956e-05, "epoch": 3.249211356466877, "percentage": 46.42, "elapsed_time": "3:08:15", "remaining_time": "3:37:18"} +{"current_steps": 3095, "total_steps": 6657, "loss": 0.1558, "lr": 2.5864314711798856e-05, "epoch": 3.2544689800210307, "percentage": 46.49, "elapsed_time": "3:08:26", "remaining_time": "3:36:52"} +{"current_steps": 3100, "total_steps": 6657, "loss": 0.1606, "lr": 2.5814160944296495e-05, "epoch": 3.259726603575184, "percentage": 46.57, "elapsed_time": "3:08:39", "remaining_time": "3:36:28"} +{"current_steps": 3105, "total_steps": 6657, "loss": 0.1623, "lr": 2.5763967207367752e-05, "epoch": 3.2649842271293377, "percentage": 46.64, "elapsed_time": "3:08:51", "remaining_time": "3:36:02"} +{"current_steps": 3110, "total_steps": 6657, "loss": 0.1639, "lr": 2.5713733846069272e-05, "epoch": 3.270241850683491, "percentage": 46.72, "elapsed_time": "3:09:03", "remaining_time": "3:35:37"} +{"current_steps": 3115, "total_steps": 6657, "loss": 0.1549, "lr": 2.56634612057301e-05, "epoch": 3.2754994742376446, "percentage": 46.79, "elapsed_time": "3:09:15", "remaining_time": "3:35:11"} +{"current_steps": 3120, "total_steps": 6657, "loss": 0.1641, "lr": 2.561314963194929e-05, "epoch": 3.280757097791798, "percentage": 46.87, "elapsed_time": "3:09:27", "remaining_time": "3:34:47"} +{"current_steps": 3125, "total_steps": 6657, "loss": 0.1565, "lr": 2.556279947059358e-05, "epoch": 3.2860147213459516, "percentage": 46.94, "elapsed_time": "3:09:40", "remaining_time": "3:34:22"} +{"current_steps": 3130, "total_steps": 6657, "loss": 0.161, "lr": 2.551241106779494e-05, "epoch": 3.291272344900105, "percentage": 47.02, "elapsed_time": "3:09:52", "remaining_time": "3:33:57"} +{"current_steps": 3135, "total_steps": 6657, "loss": 0.157, "lr": 2.5461984769948244e-05, "epoch": 3.2965299684542586, "percentage": 47.09, "elapsed_time": "3:10:04", "remaining_time": "3:33:31"} +{"current_steps": 3140, "total_steps": 6657, "loss": 0.1534, "lr": 2.5411520923708874e-05, "epoch": 3.301787592008412, "percentage": 47.17, "elapsed_time": "3:10:15", "remaining_time": "3:33:06"} +{"current_steps": 3145, "total_steps": 6657, "loss": 0.1502, "lr": 2.536101987599036e-05, "epoch": 3.3070452155625656, "percentage": 47.24, "elapsed_time": "3:10:28", "remaining_time": "3:32:42"} +{"current_steps": 3150, "total_steps": 6657, "loss": 0.1635, "lr": 2.5310481973961935e-05, "epoch": 3.312302839116719, "percentage": 47.32, "elapsed_time": "3:10:40", "remaining_time": "3:32:17"} +{"current_steps": 3155, "total_steps": 6657, "loss": 0.1543, "lr": 2.5259907565046217e-05, "epoch": 3.3175604626708726, "percentage": 47.39, "elapsed_time": "3:10:53", "remaining_time": "3:31:52"} +{"current_steps": 3160, "total_steps": 6657, "loss": 0.1579, "lr": 2.5209296996916774e-05, "epoch": 3.322818086225026, "percentage": 47.47, "elapsed_time": "3:11:04", "remaining_time": "3:31:27"} +{"current_steps": 3165, "total_steps": 6657, "loss": 0.1596, "lr": 2.5158650617495753e-05, "epoch": 3.32807570977918, "percentage": 47.54, "elapsed_time": "3:11:15", "remaining_time": "3:31:01"} +{"current_steps": 3170, "total_steps": 6657, "loss": 0.161, "lr": 2.5107968774951504e-05, "epoch": 3.3333333333333335, "percentage": 47.62, "elapsed_time": "3:11:27", "remaining_time": "3:30:36"} +{"current_steps": 3175, "total_steps": 6657, "loss": 0.1579, "lr": 2.5057251817696138e-05, "epoch": 3.338590956887487, "percentage": 47.69, "elapsed_time": "3:11:40", "remaining_time": "3:30:12"} +{"current_steps": 3180, "total_steps": 6657, "loss": 0.1597, "lr": 2.5006500094383176e-05, "epoch": 3.3438485804416405, "percentage": 47.77, "elapsed_time": "3:11:51", "remaining_time": "3:29:46"} +{"current_steps": 3185, "total_steps": 6657, "loss": 0.1601, "lr": 2.4955713953905155e-05, "epoch": 3.349106203995794, "percentage": 47.84, "elapsed_time": "3:12:03", "remaining_time": "3:29:21"} +{"current_steps": 3190, "total_steps": 6657, "loss": 0.1555, "lr": 2.490489374539118e-05, "epoch": 3.3543638275499474, "percentage": 47.92, "elapsed_time": "3:12:14", "remaining_time": "3:28:56"} +{"current_steps": 3195, "total_steps": 6657, "loss": 0.1681, "lr": 2.4854039818204577e-05, "epoch": 3.359621451104101, "percentage": 47.99, "elapsed_time": "3:12:26", "remaining_time": "3:28:31"} +{"current_steps": 3200, "total_steps": 6657, "loss": 0.1617, "lr": 2.480315252194047e-05, "epoch": 3.3648790746582544, "percentage": 48.07, "elapsed_time": "3:12:38", "remaining_time": "3:28:06"} +{"current_steps": 3205, "total_steps": 6657, "loss": 0.1632, "lr": 2.4752232206423387e-05, "epoch": 3.370136698212408, "percentage": 48.14, "elapsed_time": "3:13:48", "remaining_time": "3:28:44"} +{"current_steps": 3210, "total_steps": 6657, "loss": 0.1577, "lr": 2.4701279221704812e-05, "epoch": 3.3753943217665614, "percentage": 48.22, "elapsed_time": "3:14:00", "remaining_time": "3:28:19"} +{"current_steps": 3215, "total_steps": 6657, "loss": 0.149, "lr": 2.4650293918060845e-05, "epoch": 3.380651945320715, "percentage": 48.3, "elapsed_time": "3:14:12", "remaining_time": "3:27:54"} +{"current_steps": 3220, "total_steps": 6657, "loss": 0.1568, "lr": 2.4599276645989763e-05, "epoch": 3.3859095688748684, "percentage": 48.37, "elapsed_time": "3:14:23", "remaining_time": "3:27:30"} +{"current_steps": 3225, "total_steps": 6657, "loss": 0.1622, "lr": 2.4548227756209593e-05, "epoch": 3.3911671924290223, "percentage": 48.45, "elapsed_time": "3:14:35", "remaining_time": "3:27:05"} +{"current_steps": 3230, "total_steps": 6657, "loss": 0.153, "lr": 2.4497147599655726e-05, "epoch": 3.396424815983176, "percentage": 48.52, "elapsed_time": "3:14:47", "remaining_time": "3:26:40"} +{"current_steps": 3235, "total_steps": 6657, "loss": 0.1557, "lr": 2.44460365274785e-05, "epoch": 3.4016824395373293, "percentage": 48.6, "elapsed_time": "3:14:59", "remaining_time": "3:26:15"} +{"current_steps": 3240, "total_steps": 6657, "loss": 0.1633, "lr": 2.4394894891040774e-05, "epoch": 3.406940063091483, "percentage": 48.67, "elapsed_time": "3:15:11", "remaining_time": "3:25:51"} +{"current_steps": 3245, "total_steps": 6657, "loss": 0.1605, "lr": 2.434372304191553e-05, "epoch": 3.4121976866456363, "percentage": 48.75, "elapsed_time": "3:15:23", "remaining_time": "3:25:26"} +{"current_steps": 3250, "total_steps": 6657, "loss": 0.1541, "lr": 2.4292521331883432e-05, "epoch": 3.4174553101997898, "percentage": 48.82, "elapsed_time": "3:15:35", "remaining_time": "3:25:02"} +{"current_steps": 3255, "total_steps": 6657, "loss": 0.153, "lr": 2.4241290112930448e-05, "epoch": 3.4227129337539433, "percentage": 48.9, "elapsed_time": "3:15:53", "remaining_time": "3:24:44"} +{"current_steps": 3260, "total_steps": 6657, "loss": 0.1425, "lr": 2.4190029737245368e-05, "epoch": 3.4279705573080967, "percentage": 48.97, "elapsed_time": "3:16:05", "remaining_time": "3:24:19"} +{"current_steps": 3265, "total_steps": 6657, "loss": 0.1561, "lr": 2.4138740557217462e-05, "epoch": 3.4332281808622502, "percentage": 49.05, "elapsed_time": "3:16:16", "remaining_time": "3:23:54"} +{"current_steps": 3270, "total_steps": 6657, "loss": 0.1528, "lr": 2.4087422925433988e-05, "epoch": 3.4384858044164037, "percentage": 49.12, "elapsed_time": "3:16:28", "remaining_time": "3:23:29"} +{"current_steps": 3275, "total_steps": 6657, "loss": 0.1557, "lr": 2.4036077194677803e-05, "epoch": 3.443743427970557, "percentage": 49.2, "elapsed_time": "3:16:40", "remaining_time": "3:23:06"} +{"current_steps": 3280, "total_steps": 6657, "loss": 0.1599, "lr": 2.3984703717924932e-05, "epoch": 3.4490010515247107, "percentage": 49.27, "elapsed_time": "3:16:52", "remaining_time": "3:22:42"} +{"current_steps": 3285, "total_steps": 6657, "loss": 0.1494, "lr": 2.3933302848342127e-05, "epoch": 3.454258675078864, "percentage": 49.35, "elapsed_time": "3:17:04", "remaining_time": "3:22:18"} +{"current_steps": 3290, "total_steps": 6657, "loss": 0.1607, "lr": 2.388187493928447e-05, "epoch": 3.4595162986330177, "percentage": 49.42, "elapsed_time": "3:17:16", "remaining_time": "3:21:53"} +{"current_steps": 3295, "total_steps": 6657, "loss": 0.1424, "lr": 2.3830420344292922e-05, "epoch": 3.464773922187171, "percentage": 49.5, "elapsed_time": "3:17:30", "remaining_time": "3:21:31"} +{"current_steps": 3300, "total_steps": 6657, "loss": 0.1226, "lr": 2.377893941709189e-05, "epoch": 3.470031545741325, "percentage": 49.57, "elapsed_time": "3:17:53", "remaining_time": "3:21:18"} +{"current_steps": 3305, "total_steps": 6657, "loss": 0.0918, "lr": 2.3727432511586802e-05, "epoch": 3.4752891692954786, "percentage": 49.65, "elapsed_time": "3:18:15", "remaining_time": "3:21:04"} +{"current_steps": 3310, "total_steps": 6657, "loss": 0.0882, "lr": 2.3675899981861675e-05, "epoch": 3.480546792849632, "percentage": 49.72, "elapsed_time": "3:18:31", "remaining_time": "3:20:44"} +{"current_steps": 3315, "total_steps": 6657, "loss": 0.1009, "lr": 2.362434218217668e-05, "epoch": 3.4858044164037856, "percentage": 49.8, "elapsed_time": "3:18:48", "remaining_time": "3:20:26"} +{"current_steps": 3320, "total_steps": 6657, "loss": 0.0924, "lr": 2.3572759466965706e-05, "epoch": 3.491062039957939, "percentage": 49.87, "elapsed_time": "3:19:10", "remaining_time": "3:20:11"} +{"current_steps": 3325, "total_steps": 6657, "loss": 0.13, "lr": 2.3521152190833934e-05, "epoch": 3.4963196635120926, "percentage": 49.95, "elapsed_time": "3:19:31", "remaining_time": "3:19:56"} +{"current_steps": 3330, "total_steps": 6657, "loss": 0.1134, "lr": 2.346952070855537e-05, "epoch": 3.501577287066246, "percentage": 50.02, "elapsed_time": "3:19:51", "remaining_time": "3:19:40"} +{"current_steps": 3335, "total_steps": 6657, "loss": 0.1034, "lr": 2.3417865375070433e-05, "epoch": 3.5068349106203995, "percentage": 50.1, "elapsed_time": "3:20:09", "remaining_time": "3:19:22"} +{"current_steps": 3340, "total_steps": 6657, "loss": 0.1374, "lr": 2.336618654548352e-05, "epoch": 3.512092534174553, "percentage": 50.17, "elapsed_time": "3:20:40", "remaining_time": "3:19:17"} +{"current_steps": 3345, "total_steps": 6657, "loss": 0.0734, "lr": 2.331448457506053e-05, "epoch": 3.5173501577287065, "percentage": 50.25, "elapsed_time": "3:21:01", "remaining_time": "3:19:02"} +{"current_steps": 3350, "total_steps": 6657, "loss": 0.091, "lr": 2.326275981922645e-05, "epoch": 3.52260778128286, "percentage": 50.32, "elapsed_time": "3:21:25", "remaining_time": "3:18:50"} +{"current_steps": 3355, "total_steps": 6657, "loss": 0.0907, "lr": 2.3211012633562923e-05, "epoch": 3.527865404837014, "percentage": 50.4, "elapsed_time": "3:21:48", "remaining_time": "3:18:37"} +{"current_steps": 3360, "total_steps": 6657, "loss": 0.1328, "lr": 2.3159243373805764e-05, "epoch": 3.5331230283911674, "percentage": 50.47, "elapsed_time": "3:22:11", "remaining_time": "3:18:24"} +{"current_steps": 3365, "total_steps": 6657, "loss": 0.0851, "lr": 2.3107452395842542e-05, "epoch": 3.538380651945321, "percentage": 50.55, "elapsed_time": "3:22:27", "remaining_time": "3:18:04"} +{"current_steps": 3370, "total_steps": 6657, "loss": 0.0913, "lr": 2.3055640055710132e-05, "epoch": 3.5436382754994744, "percentage": 50.62, "elapsed_time": "3:22:50", "remaining_time": "3:17:50"} +{"current_steps": 3375, "total_steps": 6657, "loss": 0.0841, "lr": 2.3003806709592268e-05, "epoch": 3.548895899053628, "percentage": 50.7, "elapsed_time": "3:23:06", "remaining_time": "3:17:31"} +{"current_steps": 3380, "total_steps": 6657, "loss": 0.0928, "lr": 2.295195271381707e-05, "epoch": 3.5541535226077814, "percentage": 50.77, "elapsed_time": "3:23:24", "remaining_time": "3:17:12"} +{"current_steps": 3385, "total_steps": 6657, "loss": 0.0957, "lr": 2.290007842485463e-05, "epoch": 3.559411146161935, "percentage": 50.85, "elapsed_time": "3:23:41", "remaining_time": "3:16:53"} +{"current_steps": 3390, "total_steps": 6657, "loss": 0.1074, "lr": 2.2848184199314546e-05, "epoch": 3.5646687697160884, "percentage": 50.92, "elapsed_time": "3:24:07", "remaining_time": "3:16:43"} +{"current_steps": 3395, "total_steps": 6657, "loss": 0.1029, "lr": 2.2796270393943472e-05, "epoch": 3.569926393270242, "percentage": 51.0, "elapsed_time": "3:24:23", "remaining_time": "3:16:23"} +{"current_steps": 3400, "total_steps": 6657, "loss": 0.1275, "lr": 2.274433736562264e-05, "epoch": 3.5751840168243953, "percentage": 51.07, "elapsed_time": "3:24:49", "remaining_time": "3:16:12"} +{"current_steps": 3405, "total_steps": 6657, "loss": 0.105, "lr": 2.2692385471365465e-05, "epoch": 3.580441640378549, "percentage": 51.15, "elapsed_time": "3:26:02", "remaining_time": "3:16:46"} +{"current_steps": 3410, "total_steps": 6657, "loss": 0.0876, "lr": 2.264041506831503e-05, "epoch": 3.5856992639327023, "percentage": 51.22, "elapsed_time": "3:26:17", "remaining_time": "3:16:25"} +{"current_steps": 3415, "total_steps": 6657, "loss": 0.1273, "lr": 2.258842651374166e-05, "epoch": 3.590956887486856, "percentage": 51.3, "elapsed_time": "3:26:35", "remaining_time": "3:16:07"} +{"current_steps": 3420, "total_steps": 6657, "loss": 0.1072, "lr": 2.2536420165040478e-05, "epoch": 3.5962145110410093, "percentage": 51.37, "elapsed_time": "3:27:07", "remaining_time": "3:16:02"} +{"current_steps": 3425, "total_steps": 6657, "loss": 0.1004, "lr": 2.248439637972892e-05, "epoch": 3.601472134595163, "percentage": 51.45, "elapsed_time": "3:27:23", "remaining_time": "3:15:42"} +{"current_steps": 3430, "total_steps": 6657, "loss": 0.2917, "lr": 2.2432355515444284e-05, "epoch": 3.6067297581493163, "percentage": 51.52, "elapsed_time": "3:27:54", "remaining_time": "3:15:36"} +{"current_steps": 3435, "total_steps": 6657, "loss": 0.1039, "lr": 2.2380297929941296e-05, "epoch": 3.61198738170347, "percentage": 51.6, "elapsed_time": "3:28:15", "remaining_time": "3:15:20"} +{"current_steps": 3440, "total_steps": 6657, "loss": 0.1036, "lr": 2.2328223981089613e-05, "epoch": 3.6172450052576237, "percentage": 51.67, "elapsed_time": "3:28:34", "remaining_time": "3:15:03"} +{"current_steps": 3445, "total_steps": 6657, "loss": 0.1906, "lr": 2.2276134026871393e-05, "epoch": 3.622502628811777, "percentage": 51.75, "elapsed_time": "3:29:07", "remaining_time": "3:14:58"} +{"current_steps": 3450, "total_steps": 6657, "loss": 0.0842, "lr": 2.222402842537882e-05, "epoch": 3.6277602523659307, "percentage": 51.83, "elapsed_time": "3:29:25", "remaining_time": "3:14:40"} +{"current_steps": 3455, "total_steps": 6657, "loss": 0.1021, "lr": 2.2171907534811652e-05, "epoch": 3.633017875920084, "percentage": 51.9, "elapsed_time": "3:29:48", "remaining_time": "3:14:26"} +{"current_steps": 3460, "total_steps": 6657, "loss": 0.0976, "lr": 2.2119771713474732e-05, "epoch": 3.6382754994742377, "percentage": 51.98, "elapsed_time": "3:30:07", "remaining_time": "3:14:09"} +{"current_steps": 3465, "total_steps": 6657, "loss": 0.086, "lr": 2.2067621319775564e-05, "epoch": 3.643533123028391, "percentage": 52.05, "elapsed_time": "3:30:31", "remaining_time": "3:13:56"} +{"current_steps": 3470, "total_steps": 6657, "loss": 0.0868, "lr": 2.201545671222183e-05, "epoch": 3.6487907465825447, "percentage": 52.13, "elapsed_time": "3:30:48", "remaining_time": "3:13:37"} +{"current_steps": 3475, "total_steps": 6657, "loss": 0.0865, "lr": 2.1963278249418894e-05, "epoch": 3.654048370136698, "percentage": 52.2, "elapsed_time": "3:31:03", "remaining_time": "3:13:15"} +{"current_steps": 3480, "total_steps": 6657, "loss": 0.0832, "lr": 2.191108629006742e-05, "epoch": 3.6593059936908516, "percentage": 52.28, "elapsed_time": "3:31:19", "remaining_time": "3:12:55"} +{"current_steps": 3485, "total_steps": 6657, "loss": 0.0913, "lr": 2.1858881192960814e-05, "epoch": 3.664563617245005, "percentage": 52.35, "elapsed_time": "3:31:41", "remaining_time": "3:12:40"} +{"current_steps": 3490, "total_steps": 6657, "loss": 0.1208, "lr": 2.180666331698281e-05, "epoch": 3.669821240799159, "percentage": 52.43, "elapsed_time": "3:32:06", "remaining_time": "3:12:28"} +{"current_steps": 3495, "total_steps": 6657, "loss": 0.0729, "lr": 2.1754433021104985e-05, "epoch": 3.6750788643533125, "percentage": 52.5, "elapsed_time": "3:32:23", "remaining_time": "3:12:09"} +{"current_steps": 3500, "total_steps": 6657, "loss": 0.0832, "lr": 2.170219066438431e-05, "epoch": 3.680336487907466, "percentage": 52.58, "elapsed_time": "3:32:40", "remaining_time": "3:11:49"} +{"current_steps": 3505, "total_steps": 6657, "loss": 0.0772, "lr": 2.164993660596065e-05, "epoch": 3.6855941114616195, "percentage": 52.65, "elapsed_time": "3:32:58", "remaining_time": "3:11:31"} +{"current_steps": 3510, "total_steps": 6657, "loss": 0.1306, "lr": 2.1597671205054326e-05, "epoch": 3.690851735015773, "percentage": 52.73, "elapsed_time": "3:33:31", "remaining_time": "3:11:26"} +{"current_steps": 3515, "total_steps": 6657, "loss": 0.097, "lr": 2.1545394820963637e-05, "epoch": 3.6961093585699265, "percentage": 52.8, "elapsed_time": "3:33:55", "remaining_time": "3:11:13"} +{"current_steps": 3520, "total_steps": 6657, "loss": 0.104, "lr": 2.149310781306237e-05, "epoch": 3.70136698212408, "percentage": 52.88, "elapsed_time": "3:34:13", "remaining_time": "3:10:54"} +{"current_steps": 3525, "total_steps": 6657, "loss": 0.0952, "lr": 2.1440810540797354e-05, "epoch": 3.7066246056782335, "percentage": 52.95, "elapsed_time": "3:34:30", "remaining_time": "3:10:35"} +{"current_steps": 3530, "total_steps": 6657, "loss": 0.1391, "lr": 2.1388503363685985e-05, "epoch": 3.711882229232387, "percentage": 53.03, "elapsed_time": "3:34:59", "remaining_time": "3:10:26"} +{"current_steps": 3535, "total_steps": 6657, "loss": 0.0949, "lr": 2.133618664131374e-05, "epoch": 3.7171398527865405, "percentage": 53.1, "elapsed_time": "3:35:17", "remaining_time": "3:10:08"} +{"current_steps": 3540, "total_steps": 6657, "loss": 0.1215, "lr": 2.1283860733331722e-05, "epoch": 3.722397476340694, "percentage": 53.18, "elapsed_time": "3:35:36", "remaining_time": "3:09:50"} +{"current_steps": 3545, "total_steps": 6657, "loss": 0.0908, "lr": 2.123152599945417e-05, "epoch": 3.7276550998948474, "percentage": 53.25, "elapsed_time": "3:35:52", "remaining_time": "3:09:30"} +{"current_steps": 3550, "total_steps": 6657, "loss": 0.0924, "lr": 2.1179182799456024e-05, "epoch": 3.732912723449001, "percentage": 53.33, "elapsed_time": "3:36:14", "remaining_time": "3:09:15"} +{"current_steps": 3555, "total_steps": 6657, "loss": 0.098, "lr": 2.112683149317039e-05, "epoch": 3.7381703470031544, "percentage": 53.4, "elapsed_time": "3:36:42", "remaining_time": "3:09:05"} +{"current_steps": 3560, "total_steps": 6657, "loss": 0.0765, "lr": 2.1074472440486118e-05, "epoch": 3.743427970557308, "percentage": 53.48, "elapsed_time": "3:37:10", "remaining_time": "3:08:55"} +{"current_steps": 3565, "total_steps": 6657, "loss": 0.0768, "lr": 2.102210600134531e-05, "epoch": 3.7486855941114614, "percentage": 53.55, "elapsed_time": "3:37:27", "remaining_time": "3:08:36"} +{"current_steps": 3570, "total_steps": 6657, "loss": 0.0882, "lr": 2.096973253574084e-05, "epoch": 3.753943217665615, "percentage": 53.63, "elapsed_time": "3:37:45", "remaining_time": "3:08:17"} +{"current_steps": 3575, "total_steps": 6657, "loss": 0.1056, "lr": 2.09173524037139e-05, "epoch": 3.759200841219769, "percentage": 53.7, "elapsed_time": "3:38:16", "remaining_time": "3:08:10"} +{"current_steps": 3580, "total_steps": 6657, "loss": 0.0999, "lr": 2.0864965965351495e-05, "epoch": 3.7644584647739223, "percentage": 53.78, "elapsed_time": "3:38:33", "remaining_time": "3:07:51"} +{"current_steps": 3585, "total_steps": 6657, "loss": 0.09, "lr": 2.081257358078398e-05, "epoch": 3.769716088328076, "percentage": 53.85, "elapsed_time": "3:38:50", "remaining_time": "3:07:31"} +{"current_steps": 3590, "total_steps": 6657, "loss": 0.0861, "lr": 2.0760175610182613e-05, "epoch": 3.7749737118822293, "percentage": 53.93, "elapsed_time": "3:39:10", "remaining_time": "3:07:15"} +{"current_steps": 3595, "total_steps": 6657, "loss": 0.0999, "lr": 2.0707772413757016e-05, "epoch": 3.780231335436383, "percentage": 54.0, "elapsed_time": "3:39:35", "remaining_time": "3:07:02"} +{"current_steps": 3600, "total_steps": 6657, "loss": 0.0718, "lr": 2.0655364351752763e-05, "epoch": 3.7854889589905363, "percentage": 54.08, "elapsed_time": "3:39:52", "remaining_time": "3:06:42"} +{"current_steps": 3605, "total_steps": 6657, "loss": 0.0918, "lr": 2.060295178444887e-05, "epoch": 3.7907465825446898, "percentage": 54.15, "elapsed_time": "3:41:07", "remaining_time": "3:07:12"} +{"current_steps": 3610, "total_steps": 6657, "loss": 0.092, "lr": 2.055053507215533e-05, "epoch": 3.7960042060988433, "percentage": 54.23, "elapsed_time": "3:41:26", "remaining_time": "3:06:54"} +{"current_steps": 3615, "total_steps": 6657, "loss": 0.1009, "lr": 2.049811457521061e-05, "epoch": 3.8012618296529967, "percentage": 54.3, "elapsed_time": "3:41:50", "remaining_time": "3:06:40"} +{"current_steps": 3620, "total_steps": 6657, "loss": 0.0913, "lr": 2.0445690653979216e-05, "epoch": 3.8065194532071502, "percentage": 54.38, "elapsed_time": "3:42:13", "remaining_time": "3:06:26"} +{"current_steps": 3625, "total_steps": 6657, "loss": 0.0947, "lr": 2.039326366884919e-05, "epoch": 3.8117770767613037, "percentage": 54.45, "elapsed_time": "3:42:29", "remaining_time": "3:06:05"} +{"current_steps": 3630, "total_steps": 6657, "loss": 0.0866, "lr": 2.034083398022963e-05, "epoch": 3.8170347003154577, "percentage": 54.53, "elapsed_time": "3:42:45", "remaining_time": "3:05:45"} +{"current_steps": 3635, "total_steps": 6657, "loss": 0.1117, "lr": 2.028840194854822e-05, "epoch": 3.822292323869611, "percentage": 54.6, "elapsed_time": "3:43:09", "remaining_time": "3:05:31"} +{"current_steps": 3640, "total_steps": 6657, "loss": 0.0883, "lr": 2.0235967934248756e-05, "epoch": 3.8275499474237646, "percentage": 54.68, "elapsed_time": "3:43:26", "remaining_time": "3:05:11"} +{"current_steps": 3645, "total_steps": 6657, "loss": 0.0819, "lr": 2.018353229778867e-05, "epoch": 3.832807570977918, "percentage": 54.75, "elapsed_time": "3:43:42", "remaining_time": "3:04:51"} +{"current_steps": 3650, "total_steps": 6657, "loss": 0.1024, "lr": 2.0131095399636522e-05, "epoch": 3.8380651945320716, "percentage": 54.83, "elapsed_time": "3:44:08", "remaining_time": "3:04:39"} +{"current_steps": 3655, "total_steps": 6657, "loss": 0.0963, "lr": 2.0078657600269573e-05, "epoch": 3.843322818086225, "percentage": 54.9, "elapsed_time": "3:44:27", "remaining_time": "3:04:21"} +{"current_steps": 3660, "total_steps": 6657, "loss": 0.0745, "lr": 2.0026219260171262e-05, "epoch": 3.8485804416403786, "percentage": 54.98, "elapsed_time": "3:44:47", "remaining_time": "3:04:04"} +{"current_steps": 3665, "total_steps": 6657, "loss": 0.0883, "lr": 1.9973780739828748e-05, "epoch": 3.853838065194532, "percentage": 55.05, "elapsed_time": "3:45:13", "remaining_time": "3:03:51"} +{"current_steps": 3670, "total_steps": 6657, "loss": 0.0806, "lr": 1.9921342399730433e-05, "epoch": 3.8590956887486856, "percentage": 55.13, "elapsed_time": "3:45:31", "remaining_time": "3:03:33"} +{"current_steps": 3675, "total_steps": 6657, "loss": 0.0717, "lr": 1.9868904600363485e-05, "epoch": 3.864353312302839, "percentage": 55.21, "elapsed_time": "3:45:48", "remaining_time": "3:03:13"} +{"current_steps": 3680, "total_steps": 6657, "loss": 0.0955, "lr": 1.9816467702211342e-05, "epoch": 3.8696109358569926, "percentage": 55.28, "elapsed_time": "3:46:06", "remaining_time": "3:02:55"} +{"current_steps": 3685, "total_steps": 6657, "loss": 0.1015, "lr": 1.9764032065751248e-05, "epoch": 3.874868559411146, "percentage": 55.36, "elapsed_time": "3:46:32", "remaining_time": "3:02:42"} +{"current_steps": 3690, "total_steps": 6657, "loss": 0.0896, "lr": 1.971159805145178e-05, "epoch": 3.8801261829652995, "percentage": 55.43, "elapsed_time": "3:46:52", "remaining_time": "3:02:25"} +{"current_steps": 3695, "total_steps": 6657, "loss": 0.0961, "lr": 1.965916601977038e-05, "epoch": 3.885383806519453, "percentage": 55.51, "elapsed_time": "3:47:11", "remaining_time": "3:02:07"} +{"current_steps": 3700, "total_steps": 6657, "loss": 0.1091, "lr": 1.9606736331150812e-05, "epoch": 3.8906414300736065, "percentage": 55.58, "elapsed_time": "3:47:29", "remaining_time": "3:01:48"} +{"current_steps": 3705, "total_steps": 6657, "loss": 0.0793, "lr": 1.9554309346020784e-05, "epoch": 3.89589905362776, "percentage": 55.66, "elapsed_time": "3:47:44", "remaining_time": "3:01:27"} +{"current_steps": 3710, "total_steps": 6657, "loss": 0.0942, "lr": 1.9501885424789394e-05, "epoch": 3.9011566771819135, "percentage": 55.73, "elapsed_time": "3:48:01", "remaining_time": "3:01:07"} +{"current_steps": 3715, "total_steps": 6657, "loss": 0.0802, "lr": 1.9449464927844677e-05, "epoch": 3.9064143007360674, "percentage": 55.81, "elapsed_time": "3:48:18", "remaining_time": "3:00:48"} +{"current_steps": 3720, "total_steps": 6657, "loss": 0.1272, "lr": 1.939704821555113e-05, "epoch": 3.911671924290221, "percentage": 55.88, "elapsed_time": "3:48:43", "remaining_time": "3:00:34"} +{"current_steps": 3725, "total_steps": 6657, "loss": 0.1179, "lr": 1.9344635648247244e-05, "epoch": 3.9169295478443744, "percentage": 55.96, "elapsed_time": "3:49:13", "remaining_time": "3:00:25"} +{"current_steps": 3730, "total_steps": 6657, "loss": 0.0873, "lr": 1.9292227586242994e-05, "epoch": 3.922187171398528, "percentage": 56.03, "elapsed_time": "3:49:32", "remaining_time": "3:00:07"} +{"current_steps": 3735, "total_steps": 6657, "loss": 0.0862, "lr": 1.9239824389817397e-05, "epoch": 3.9274447949526814, "percentage": 56.11, "elapsed_time": "3:49:48", "remaining_time": "2:59:47"} +{"current_steps": 3740, "total_steps": 6657, "loss": 0.0867, "lr": 1.9187426419216026e-05, "epoch": 3.932702418506835, "percentage": 56.18, "elapsed_time": "3:50:05", "remaining_time": "2:59:27"} +{"current_steps": 3745, "total_steps": 6657, "loss": 0.0777, "lr": 1.9135034034648515e-05, "epoch": 3.9379600420609884, "percentage": 56.26, "elapsed_time": "3:50:20", "remaining_time": "2:59:06"} +{"current_steps": 3750, "total_steps": 6657, "loss": 0.1056, "lr": 1.90826475962861e-05, "epoch": 3.943217665615142, "percentage": 56.33, "elapsed_time": "3:50:35", "remaining_time": "2:58:45"} +{"current_steps": 3755, "total_steps": 6657, "loss": 0.0763, "lr": 1.9030267464259164e-05, "epoch": 3.9484752891692954, "percentage": 56.41, "elapsed_time": "3:50:52", "remaining_time": "2:58:25"} +{"current_steps": 3760, "total_steps": 6657, "loss": 0.081, "lr": 1.8977893998654692e-05, "epoch": 3.953732912723449, "percentage": 56.48, "elapsed_time": "3:51:08", "remaining_time": "2:58:05"} +{"current_steps": 3765, "total_steps": 6657, "loss": 0.0839, "lr": 1.8925527559513886e-05, "epoch": 3.958990536277603, "percentage": 56.56, "elapsed_time": "3:51:27", "remaining_time": "2:57:47"} +{"current_steps": 3770, "total_steps": 6657, "loss": 0.0897, "lr": 1.8873168506829614e-05, "epoch": 3.9642481598317563, "percentage": 56.63, "elapsed_time": "3:51:49", "remaining_time": "2:57:31"} +{"current_steps": 3775, "total_steps": 6657, "loss": 0.0773, "lr": 1.882081720054398e-05, "epoch": 3.9695057833859098, "percentage": 56.71, "elapsed_time": "3:52:09", "remaining_time": "2:57:14"} +{"current_steps": 3780, "total_steps": 6657, "loss": 0.0856, "lr": 1.876847400054583e-05, "epoch": 3.9747634069400632, "percentage": 56.78, "elapsed_time": "3:52:29", "remaining_time": "2:56:57"} +{"current_steps": 3785, "total_steps": 6657, "loss": 0.1051, "lr": 1.8716139266668288e-05, "epoch": 3.9800210304942167, "percentage": 56.86, "elapsed_time": "3:53:03", "remaining_time": "2:56:50"} +{"current_steps": 3790, "total_steps": 6657, "loss": 0.1526, "lr": 1.8663813358686267e-05, "epoch": 3.9852786540483702, "percentage": 56.93, "elapsed_time": "3:53:34", "remaining_time": "2:56:41"} +{"current_steps": 3795, "total_steps": 6657, "loss": 0.0855, "lr": 1.8611496636314025e-05, "epoch": 3.9905362776025237, "percentage": 57.01, "elapsed_time": "3:53:54", "remaining_time": "2:56:24"} +{"current_steps": 3800, "total_steps": 6657, "loss": 0.0809, "lr": 1.8559189459202653e-05, "epoch": 3.995793901156677, "percentage": 57.08, "elapsed_time": "3:54:11", "remaining_time": "2:56:04"} +{"current_steps": 3805, "total_steps": 6657, "loss": 0.1319, "lr": 1.8506892186937636e-05, "epoch": 4.001051524710831, "percentage": 57.16, "elapsed_time": "3:55:33", "remaining_time": "2:56:33"} +{"current_steps": 3810, "total_steps": 6657, "loss": 0.1762, "lr": 1.845460517903637e-05, "epoch": 4.006309148264984, "percentage": 57.23, "elapsed_time": "3:55:44", "remaining_time": "2:56:09"} +{"current_steps": 3815, "total_steps": 6657, "loss": 0.1594, "lr": 1.8402328794945678e-05, "epoch": 4.011566771819138, "percentage": 57.31, "elapsed_time": "3:55:56", "remaining_time": "2:55:46"} +{"current_steps": 3820, "total_steps": 6657, "loss": 0.1582, "lr": 1.8350063394039352e-05, "epoch": 4.016824395373291, "percentage": 57.38, "elapsed_time": "3:56:08", "remaining_time": "2:55:22"} +{"current_steps": 3825, "total_steps": 6657, "loss": 0.1471, "lr": 1.82978093356157e-05, "epoch": 4.022082018927445, "percentage": 57.46, "elapsed_time": "3:56:20", "remaining_time": "2:54:59"} +{"current_steps": 3830, "total_steps": 6657, "loss": 0.1544, "lr": 1.824556697889502e-05, "epoch": 4.027339642481598, "percentage": 57.53, "elapsed_time": "3:56:33", "remaining_time": "2:54:36"} +{"current_steps": 3835, "total_steps": 6657, "loss": 0.1589, "lr": 1.8193336683017197e-05, "epoch": 4.032597266035752, "percentage": 57.61, "elapsed_time": "3:56:44", "remaining_time": "2:54:12"} +{"current_steps": 3840, "total_steps": 6657, "loss": 0.1416, "lr": 1.8141118807039193e-05, "epoch": 4.037854889589905, "percentage": 57.68, "elapsed_time": "3:56:56", "remaining_time": "2:53:49"} +{"current_steps": 3845, "total_steps": 6657, "loss": 0.1644, "lr": 1.8088913709932582e-05, "epoch": 4.043112513144059, "percentage": 57.76, "elapsed_time": "3:57:08", "remaining_time": "2:53:25"} +{"current_steps": 3850, "total_steps": 6657, "loss": 0.1429, "lr": 1.8036721750581106e-05, "epoch": 4.048370136698212, "percentage": 57.83, "elapsed_time": "3:57:19", "remaining_time": "2:53:01"} +{"current_steps": 3855, "total_steps": 6657, "loss": 0.1521, "lr": 1.7984543287778185e-05, "epoch": 4.053627760252366, "percentage": 57.91, "elapsed_time": "3:57:31", "remaining_time": "2:52:38"} +{"current_steps": 3860, "total_steps": 6657, "loss": 0.1534, "lr": 1.7932378680224443e-05, "epoch": 4.058885383806519, "percentage": 57.98, "elapsed_time": "3:57:44", "remaining_time": "2:52:16"} +{"current_steps": 3865, "total_steps": 6657, "loss": 0.1511, "lr": 1.7880228286525275e-05, "epoch": 4.064143007360673, "percentage": 58.06, "elapsed_time": "3:57:56", "remaining_time": "2:51:53"} +{"current_steps": 3870, "total_steps": 6657, "loss": 0.1434, "lr": 1.782809246518836e-05, "epoch": 4.069400630914826, "percentage": 58.13, "elapsed_time": "3:58:08", "remaining_time": "2:51:29"} +{"current_steps": 3875, "total_steps": 6657, "loss": 0.1401, "lr": 1.7775971574621186e-05, "epoch": 4.0746582544689804, "percentage": 58.21, "elapsed_time": "3:58:19", "remaining_time": "2:51:06"} +{"current_steps": 3880, "total_steps": 6657, "loss": 0.1341, "lr": 1.772386597312861e-05, "epoch": 4.079915878023134, "percentage": 58.28, "elapsed_time": "3:58:31", "remaining_time": "2:50:43"} +{"current_steps": 3885, "total_steps": 6657, "loss": 0.1565, "lr": 1.7671776018910397e-05, "epoch": 4.085173501577287, "percentage": 58.36, "elapsed_time": "3:58:43", "remaining_time": "2:50:20"} +{"current_steps": 3890, "total_steps": 6657, "loss": 0.1563, "lr": 1.761970207005871e-05, "epoch": 4.090431125131441, "percentage": 58.43, "elapsed_time": "3:58:56", "remaining_time": "2:49:57"} +{"current_steps": 3895, "total_steps": 6657, "loss": 0.1493, "lr": 1.756764448455572e-05, "epoch": 4.095688748685594, "percentage": 58.51, "elapsed_time": "3:59:08", "remaining_time": "2:49:34"} +{"current_steps": 3900, "total_steps": 6657, "loss": 0.1454, "lr": 1.7515603620271087e-05, "epoch": 4.100946372239748, "percentage": 58.58, "elapsed_time": "3:59:21", "remaining_time": "2:49:12"} +{"current_steps": 3905, "total_steps": 6657, "loss": 0.1464, "lr": 1.7463579834959525e-05, "epoch": 4.106203995793901, "percentage": 58.66, "elapsed_time": "3:59:34", "remaining_time": "2:48:50"} +{"current_steps": 3910, "total_steps": 6657, "loss": 0.1477, "lr": 1.7411573486258343e-05, "epoch": 4.111461619348055, "percentage": 58.74, "elapsed_time": "3:59:49", "remaining_time": "2:48:29"} +{"current_steps": 3915, "total_steps": 6657, "loss": 0.1451, "lr": 1.735958493168498e-05, "epoch": 4.116719242902208, "percentage": 58.81, "elapsed_time": "4:00:01", "remaining_time": "2:48:06"} +{"current_steps": 3920, "total_steps": 6657, "loss": 0.1349, "lr": 1.730761452863454e-05, "epoch": 4.121976866456362, "percentage": 58.89, "elapsed_time": "4:00:13", "remaining_time": "2:47:43"} +{"current_steps": 3925, "total_steps": 6657, "loss": 0.1407, "lr": 1.7255662634377365e-05, "epoch": 4.127234490010515, "percentage": 58.96, "elapsed_time": "4:00:24", "remaining_time": "2:47:20"} +{"current_steps": 3930, "total_steps": 6657, "loss": 0.1364, "lr": 1.720372960605654e-05, "epoch": 4.132492113564669, "percentage": 59.04, "elapsed_time": "4:00:36", "remaining_time": "2:46:57"} +{"current_steps": 3935, "total_steps": 6657, "loss": 0.1453, "lr": 1.715181580068546e-05, "epoch": 4.137749737118822, "percentage": 59.11, "elapsed_time": "4:00:48", "remaining_time": "2:46:34"} +{"current_steps": 3940, "total_steps": 6657, "loss": 0.1485, "lr": 1.7099921575145372e-05, "epoch": 4.143007360672976, "percentage": 59.19, "elapsed_time": "4:01:02", "remaining_time": "2:46:13"} +{"current_steps": 3945, "total_steps": 6657, "loss": 0.1401, "lr": 1.7048047286182945e-05, "epoch": 4.148264984227129, "percentage": 59.26, "elapsed_time": "4:01:14", "remaining_time": "2:45:50"} +{"current_steps": 3950, "total_steps": 6657, "loss": 0.1407, "lr": 1.6996193290407742e-05, "epoch": 4.153522607781283, "percentage": 59.34, "elapsed_time": "4:01:26", "remaining_time": "2:45:27"} +{"current_steps": 3955, "total_steps": 6657, "loss": 0.1392, "lr": 1.694435994428987e-05, "epoch": 4.158780231335436, "percentage": 59.41, "elapsed_time": "4:01:38", "remaining_time": "2:45:04"} +{"current_steps": 3960, "total_steps": 6657, "loss": 0.148, "lr": 1.6892547604157464e-05, "epoch": 4.16403785488959, "percentage": 59.49, "elapsed_time": "4:01:50", "remaining_time": "2:44:42"} +{"current_steps": 3965, "total_steps": 6657, "loss": 0.1372, "lr": 1.6840756626194242e-05, "epoch": 4.169295478443743, "percentage": 59.56, "elapsed_time": "4:02:02", "remaining_time": "2:44:19"} +{"current_steps": 3970, "total_steps": 6657, "loss": 0.1355, "lr": 1.678898736643708e-05, "epoch": 4.174553101997897, "percentage": 59.64, "elapsed_time": "4:02:13", "remaining_time": "2:43:56"} +{"current_steps": 3975, "total_steps": 6657, "loss": 0.1257, "lr": 1.6737240180773554e-05, "epoch": 4.17981072555205, "percentage": 59.71, "elapsed_time": "4:02:24", "remaining_time": "2:43:33"} +{"current_steps": 3980, "total_steps": 6657, "loss": 0.1419, "lr": 1.6685515424939478e-05, "epoch": 4.185068349106204, "percentage": 59.79, "elapsed_time": "4:02:36", "remaining_time": "2:43:11"} +{"current_steps": 3985, "total_steps": 6657, "loss": 0.1334, "lr": 1.6633813454516486e-05, "epoch": 4.190325972660357, "percentage": 59.86, "elapsed_time": "4:02:48", "remaining_time": "2:42:48"} +{"current_steps": 3990, "total_steps": 6657, "loss": 0.1362, "lr": 1.658213462492957e-05, "epoch": 4.195583596214511, "percentage": 59.94, "elapsed_time": "4:03:00", "remaining_time": "2:42:25"} +{"current_steps": 3995, "total_steps": 6657, "loss": 0.1378, "lr": 1.6530479291444636e-05, "epoch": 4.200841219768664, "percentage": 60.01, "elapsed_time": "4:03:12", "remaining_time": "2:42:03"} +{"current_steps": 4000, "total_steps": 6657, "loss": 0.1368, "lr": 1.6478847809166066e-05, "epoch": 4.206098843322818, "percentage": 60.09, "elapsed_time": "4:03:23", "remaining_time": "2:41:40"} +{"current_steps": 4005, "total_steps": 6657, "loss": 0.1412, "lr": 1.64272405330343e-05, "epoch": 4.211356466876971, "percentage": 60.16, "elapsed_time": "4:04:36", "remaining_time": "2:41:58"} +{"current_steps": 4010, "total_steps": 6657, "loss": 0.1413, "lr": 1.6375657817823323e-05, "epoch": 4.216614090431126, "percentage": 60.24, "elapsed_time": "4:04:49", "remaining_time": "2:41:36"} +{"current_steps": 4015, "total_steps": 6657, "loss": 0.1452, "lr": 1.6324100018138328e-05, "epoch": 4.221871713985279, "percentage": 60.31, "elapsed_time": "4:05:02", "remaining_time": "2:41:14"} +{"current_steps": 4020, "total_steps": 6657, "loss": 0.1491, "lr": 1.6272567488413204e-05, "epoch": 4.2271293375394325, "percentage": 60.39, "elapsed_time": "4:05:15", "remaining_time": "2:40:52"} +{"current_steps": 4025, "total_steps": 6657, "loss": 0.1359, "lr": 1.6221060582908115e-05, "epoch": 4.232386961093586, "percentage": 60.46, "elapsed_time": "4:05:27", "remaining_time": "2:40:30"} +{"current_steps": 4030, "total_steps": 6657, "loss": 0.13, "lr": 1.616957965570708e-05, "epoch": 4.2376445846477395, "percentage": 60.54, "elapsed_time": "4:05:39", "remaining_time": "2:40:07"} +{"current_steps": 4035, "total_steps": 6657, "loss": 0.1457, "lr": 1.6118125060715534e-05, "epoch": 4.242902208201893, "percentage": 60.61, "elapsed_time": "4:05:51", "remaining_time": "2:39:45"} +{"current_steps": 4040, "total_steps": 6657, "loss": 0.1366, "lr": 1.6066697151657876e-05, "epoch": 4.2481598317560465, "percentage": 60.69, "elapsed_time": "4:06:03", "remaining_time": "2:39:23"} +{"current_steps": 4045, "total_steps": 6657, "loss": 0.1294, "lr": 1.601529628207508e-05, "epoch": 4.2534174553102, "percentage": 60.76, "elapsed_time": "4:06:14", "remaining_time": "2:39:00"} +{"current_steps": 4050, "total_steps": 6657, "loss": 0.133, "lr": 1.5963922805322204e-05, "epoch": 4.2586750788643535, "percentage": 60.84, "elapsed_time": "4:06:25", "remaining_time": "2:38:37"} +{"current_steps": 4055, "total_steps": 6657, "loss": 0.1407, "lr": 1.5912577074566016e-05, "epoch": 4.263932702418507, "percentage": 60.91, "elapsed_time": "4:06:38", "remaining_time": "2:38:15"} +{"current_steps": 4060, "total_steps": 6657, "loss": 0.1409, "lr": 1.5861259442782548e-05, "epoch": 4.2691903259726605, "percentage": 60.99, "elapsed_time": "4:06:50", "remaining_time": "2:37:53"} +{"current_steps": 4065, "total_steps": 6657, "loss": 0.1366, "lr": 1.580997026275464e-05, "epoch": 4.274447949526814, "percentage": 61.06, "elapsed_time": "4:07:02", "remaining_time": "2:37:31"} +{"current_steps": 4070, "total_steps": 6657, "loss": 0.1342, "lr": 1.5758709887069562e-05, "epoch": 4.279705573080967, "percentage": 61.14, "elapsed_time": "4:07:14", "remaining_time": "2:37:09"} +{"current_steps": 4075, "total_steps": 6657, "loss": 0.1416, "lr": 1.570747866811658e-05, "epoch": 4.284963196635121, "percentage": 61.21, "elapsed_time": "4:07:27", "remaining_time": "2:36:47"} +{"current_steps": 4080, "total_steps": 6657, "loss": 0.1368, "lr": 1.5656276958084478e-05, "epoch": 4.290220820189274, "percentage": 61.29, "elapsed_time": "4:07:39", "remaining_time": "2:36:25"} +{"current_steps": 4085, "total_steps": 6657, "loss": 0.1363, "lr": 1.560510510895923e-05, "epoch": 4.295478443743428, "percentage": 61.36, "elapsed_time": "4:07:50", "remaining_time": "2:36:03"} +{"current_steps": 4090, "total_steps": 6657, "loss": 0.1319, "lr": 1.5553963472521506e-05, "epoch": 4.300736067297581, "percentage": 61.44, "elapsed_time": "4:08:02", "remaining_time": "2:35:40"} +{"current_steps": 4095, "total_steps": 6657, "loss": 0.1259, "lr": 1.5502852400344277e-05, "epoch": 4.305993690851735, "percentage": 61.51, "elapsed_time": "4:08:14", "remaining_time": "2:35:18"} +{"current_steps": 4100, "total_steps": 6657, "loss": 0.1395, "lr": 1.545177224379041e-05, "epoch": 4.311251314405888, "percentage": 61.59, "elapsed_time": "4:08:27", "remaining_time": "2:34:57"} +{"current_steps": 4105, "total_steps": 6657, "loss": 0.1381, "lr": 1.5400723354010244e-05, "epoch": 4.316508937960042, "percentage": 61.66, "elapsed_time": "4:08:39", "remaining_time": "2:34:35"} +{"current_steps": 4110, "total_steps": 6657, "loss": 0.1338, "lr": 1.5349706081939158e-05, "epoch": 4.321766561514195, "percentage": 61.74, "elapsed_time": "4:08:51", "remaining_time": "2:34:12"} +{"current_steps": 4115, "total_steps": 6657, "loss": 0.1357, "lr": 1.5298720778295195e-05, "epoch": 4.327024185068349, "percentage": 61.81, "elapsed_time": "4:09:02", "remaining_time": "2:33:50"} +{"current_steps": 4120, "total_steps": 6657, "loss": 0.1401, "lr": 1.5247767793576625e-05, "epoch": 4.332281808622502, "percentage": 61.89, "elapsed_time": "4:09:14", "remaining_time": "2:33:28"} +{"current_steps": 4125, "total_steps": 6657, "loss": 0.1335, "lr": 1.519684747805953e-05, "epoch": 4.337539432176656, "percentage": 61.96, "elapsed_time": "4:09:26", "remaining_time": "2:33:06"} +{"current_steps": 4130, "total_steps": 6657, "loss": 0.1348, "lr": 1.5145960181795421e-05, "epoch": 4.342797055730809, "percentage": 62.04, "elapsed_time": "4:09:38", "remaining_time": "2:32:44"} +{"current_steps": 4135, "total_steps": 6657, "loss": 0.1378, "lr": 1.509510625460883e-05, "epoch": 4.348054679284963, "percentage": 62.12, "elapsed_time": "4:09:49", "remaining_time": "2:32:22"} +{"current_steps": 4140, "total_steps": 6657, "loss": 0.1319, "lr": 1.5044286046094851e-05, "epoch": 4.353312302839116, "percentage": 62.19, "elapsed_time": "4:10:01", "remaining_time": "2:32:00"} +{"current_steps": 4145, "total_steps": 6657, "loss": 0.1431, "lr": 1.4993499905616823e-05, "epoch": 4.358569926393271, "percentage": 62.27, "elapsed_time": "4:10:12", "remaining_time": "2:31:38"} +{"current_steps": 4150, "total_steps": 6657, "loss": 0.1427, "lr": 1.494274818230387e-05, "epoch": 4.363827549947424, "percentage": 62.34, "elapsed_time": "4:10:25", "remaining_time": "2:31:16"} +{"current_steps": 4155, "total_steps": 6657, "loss": 0.1368, "lr": 1.4892031225048503e-05, "epoch": 4.369085173501578, "percentage": 62.42, "elapsed_time": "4:10:39", "remaining_time": "2:30:56"} +{"current_steps": 4160, "total_steps": 6657, "loss": 0.1403, "lr": 1.4841349382504247e-05, "epoch": 4.374342797055731, "percentage": 62.49, "elapsed_time": "4:10:51", "remaining_time": "2:30:34"} +{"current_steps": 4165, "total_steps": 6657, "loss": 0.1267, "lr": 1.4790703003083236e-05, "epoch": 4.379600420609885, "percentage": 62.57, "elapsed_time": "4:11:02", "remaining_time": "2:30:12"} +{"current_steps": 4170, "total_steps": 6657, "loss": 0.1312, "lr": 1.4740092434953793e-05, "epoch": 4.384858044164038, "percentage": 62.64, "elapsed_time": "4:11:14", "remaining_time": "2:29:50"} +{"current_steps": 4175, "total_steps": 6657, "loss": 0.1394, "lr": 1.4689518026038065e-05, "epoch": 4.390115667718192, "percentage": 62.72, "elapsed_time": "4:11:26", "remaining_time": "2:29:28"} +{"current_steps": 4180, "total_steps": 6657, "loss": 0.132, "lr": 1.4638980124009649e-05, "epoch": 4.395373291272345, "percentage": 62.79, "elapsed_time": "4:11:38", "remaining_time": "2:29:06"} +{"current_steps": 4185, "total_steps": 6657, "loss": 0.1279, "lr": 1.458847907629113e-05, "epoch": 4.400630914826499, "percentage": 62.87, "elapsed_time": "4:11:49", "remaining_time": "2:28:45"} +{"current_steps": 4190, "total_steps": 6657, "loss": 0.1369, "lr": 1.4538015230051761e-05, "epoch": 4.405888538380652, "percentage": 62.94, "elapsed_time": "4:12:01", "remaining_time": "2:28:23"} +{"current_steps": 4195, "total_steps": 6657, "loss": 0.1446, "lr": 1.4487588932205072e-05, "epoch": 4.411146161934806, "percentage": 63.02, "elapsed_time": "4:12:14", "remaining_time": "2:28:02"} +{"current_steps": 4200, "total_steps": 6657, "loss": 0.1286, "lr": 1.4437200529406425e-05, "epoch": 4.416403785488959, "percentage": 63.09, "elapsed_time": "4:12:26", "remaining_time": "2:27:40"} +{"current_steps": 4205, "total_steps": 6657, "loss": 0.1388, "lr": 1.4386850368050706e-05, "epoch": 4.421661409043113, "percentage": 63.17, "elapsed_time": "4:13:39", "remaining_time": "2:27:54"} +{"current_steps": 4210, "total_steps": 6657, "loss": 0.1186, "lr": 1.433653879426991e-05, "epoch": 4.426919032597266, "percentage": 63.24, "elapsed_time": "4:13:54", "remaining_time": "2:27:34"} +{"current_steps": 4215, "total_steps": 6657, "loss": 0.1311, "lr": 1.4286266153930733e-05, "epoch": 4.4321766561514195, "percentage": 63.32, "elapsed_time": "4:14:05", "remaining_time": "2:27:12"} +{"current_steps": 4220, "total_steps": 6657, "loss": 0.1306, "lr": 1.4236032792632251e-05, "epoch": 4.437434279705573, "percentage": 63.39, "elapsed_time": "4:14:17", "remaining_time": "2:26:50"} +{"current_steps": 4225, "total_steps": 6657, "loss": 0.1355, "lr": 1.4185839055703511e-05, "epoch": 4.4426919032597265, "percentage": 63.47, "elapsed_time": "4:14:29", "remaining_time": "2:26:29"} +{"current_steps": 4230, "total_steps": 6657, "loss": 0.1317, "lr": 1.4135685288201151e-05, "epoch": 4.44794952681388, "percentage": 63.54, "elapsed_time": "4:14:41", "remaining_time": "2:26:08"} +{"current_steps": 4235, "total_steps": 6657, "loss": 0.1355, "lr": 1.4085571834907046e-05, "epoch": 4.4532071503680335, "percentage": 63.62, "elapsed_time": "4:14:54", "remaining_time": "2:25:46"} +{"current_steps": 4240, "total_steps": 6657, "loss": 0.1349, "lr": 1.4035499040325946e-05, "epoch": 4.458464773922187, "percentage": 63.69, "elapsed_time": "4:15:05", "remaining_time": "2:25:25"} +{"current_steps": 4245, "total_steps": 6657, "loss": 0.1296, "lr": 1.3985467248683064e-05, "epoch": 4.4637223974763405, "percentage": 63.77, "elapsed_time": "4:15:19", "remaining_time": "2:25:04"} +{"current_steps": 4250, "total_steps": 6657, "loss": 0.1083, "lr": 1.3935476803921755e-05, "epoch": 4.468980021030494, "percentage": 63.84, "elapsed_time": "4:15:41", "remaining_time": "2:24:48"} +{"current_steps": 4255, "total_steps": 6657, "loss": 0.0839, "lr": 1.3885528049701148e-05, "epoch": 4.4742376445846475, "percentage": 63.92, "elapsed_time": "4:16:01", "remaining_time": "2:24:31"} +{"current_steps": 4260, "total_steps": 6657, "loss": 0.0788, "lr": 1.3835621329393738e-05, "epoch": 4.479495268138801, "percentage": 63.99, "elapsed_time": "4:16:20", "remaining_time": "2:24:13"} +{"current_steps": 4265, "total_steps": 6657, "loss": 0.0967, "lr": 1.3785756986083091e-05, "epoch": 4.484752891692954, "percentage": 64.07, "elapsed_time": "4:16:37", "remaining_time": "2:23:55"} +{"current_steps": 4270, "total_steps": 6657, "loss": 0.0779, "lr": 1.3735935362561419e-05, "epoch": 4.490010515247108, "percentage": 64.14, "elapsed_time": "4:16:59", "remaining_time": "2:23:39"} +{"current_steps": 4275, "total_steps": 6657, "loss": 0.1084, "lr": 1.3686156801327293e-05, "epoch": 4.495268138801261, "percentage": 64.22, "elapsed_time": "4:17:18", "remaining_time": "2:23:22"} +{"current_steps": 4280, "total_steps": 6657, "loss": 0.1138, "lr": 1.3636421644583231e-05, "epoch": 4.500525762355416, "percentage": 64.29, "elapsed_time": "4:17:39", "remaining_time": "2:23:05"} +{"current_steps": 4285, "total_steps": 6657, "loss": 0.0859, "lr": 1.3586730234233367e-05, "epoch": 4.505783385909568, "percentage": 64.37, "elapsed_time": "4:17:56", "remaining_time": "2:22:47"} +{"current_steps": 4290, "total_steps": 6657, "loss": 0.1196, "lr": 1.3537082911881106e-05, "epoch": 4.511041009463723, "percentage": 64.44, "elapsed_time": "4:18:23", "remaining_time": "2:22:33"} +{"current_steps": 4295, "total_steps": 6657, "loss": 0.0742, "lr": 1.3487480018826772e-05, "epoch": 4.516298633017876, "percentage": 64.52, "elapsed_time": "4:18:48", "remaining_time": "2:22:19"} +{"current_steps": 4300, "total_steps": 6657, "loss": 0.0812, "lr": 1.343792189606525e-05, "epoch": 4.52155625657203, "percentage": 64.59, "elapsed_time": "4:19:12", "remaining_time": "2:22:05"} +{"current_steps": 4305, "total_steps": 6657, "loss": 0.081, "lr": 1.338840888428366e-05, "epoch": 4.526813880126183, "percentage": 64.67, "elapsed_time": "4:19:32", "remaining_time": "2:21:47"} +{"current_steps": 4310, "total_steps": 6657, "loss": 0.095, "lr": 1.3338941323859023e-05, "epoch": 4.532071503680337, "percentage": 64.74, "elapsed_time": "4:19:57", "remaining_time": "2:21:33"} +{"current_steps": 4315, "total_steps": 6657, "loss": 0.1014, "lr": 1.3289519554855858e-05, "epoch": 4.53732912723449, "percentage": 64.82, "elapsed_time": "4:20:15", "remaining_time": "2:21:15"} +{"current_steps": 4320, "total_steps": 6657, "loss": 0.0797, "lr": 1.3240143917023938e-05, "epoch": 4.542586750788644, "percentage": 64.89, "elapsed_time": "4:20:36", "remaining_time": "2:20:59"} +{"current_steps": 4325, "total_steps": 6657, "loss": 0.0756, "lr": 1.3190814749795893e-05, "epoch": 4.547844374342797, "percentage": 64.97, "elapsed_time": "4:20:54", "remaining_time": "2:20:40"} +{"current_steps": 4330, "total_steps": 6657, "loss": 0.0798, "lr": 1.3141532392284873e-05, "epoch": 4.553101997896951, "percentage": 65.04, "elapsed_time": "4:21:11", "remaining_time": "2:20:21"} +{"current_steps": 4335, "total_steps": 6657, "loss": 0.0903, "lr": 1.3092297183282261e-05, "epoch": 4.558359621451104, "percentage": 65.12, "elapsed_time": "4:21:27", "remaining_time": "2:20:02"} +{"current_steps": 4340, "total_steps": 6657, "loss": 0.0864, "lr": 1.3043109461255305e-05, "epoch": 4.563617245005258, "percentage": 65.19, "elapsed_time": "4:21:52", "remaining_time": "2:19:48"} +{"current_steps": 4345, "total_steps": 6657, "loss": 0.0917, "lr": 1.29939695643448e-05, "epoch": 4.568874868559411, "percentage": 65.27, "elapsed_time": "4:22:09", "remaining_time": "2:19:29"} +{"current_steps": 4350, "total_steps": 6657, "loss": 0.099, "lr": 1.2944877830362777e-05, "epoch": 4.574132492113565, "percentage": 65.34, "elapsed_time": "4:22:35", "remaining_time": "2:19:15"} +{"current_steps": 4355, "total_steps": 6657, "loss": 0.1143, "lr": 1.289583459679017e-05, "epoch": 4.579390115667718, "percentage": 65.42, "elapsed_time": "4:22:52", "remaining_time": "2:18:57"} +{"current_steps": 4360, "total_steps": 6657, "loss": 0.0753, "lr": 1.2846840200774484e-05, "epoch": 4.584647739221872, "percentage": 65.49, "elapsed_time": "4:23:07", "remaining_time": "2:18:37"} +{"current_steps": 4365, "total_steps": 6657, "loss": 0.1135, "lr": 1.2797894979127503e-05, "epoch": 4.589905362776025, "percentage": 65.57, "elapsed_time": "4:23:25", "remaining_time": "2:18:19"} +{"current_steps": 4370, "total_steps": 6657, "loss": 0.0975, "lr": 1.2748999268322977e-05, "epoch": 4.595162986330179, "percentage": 65.65, "elapsed_time": "4:23:52", "remaining_time": "2:18:05"} +{"current_steps": 4375, "total_steps": 6657, "loss": 0.0838, "lr": 1.2700153404494247e-05, "epoch": 4.600420609884332, "percentage": 65.72, "elapsed_time": "4:24:14", "remaining_time": "2:17:49"} +{"current_steps": 4380, "total_steps": 6657, "loss": 0.2514, "lr": 1.2651357723432027e-05, "epoch": 4.605678233438486, "percentage": 65.8, "elapsed_time": "4:24:42", "remaining_time": "2:17:36"} +{"current_steps": 4385, "total_steps": 6657, "loss": 0.113, "lr": 1.2602612560582044e-05, "epoch": 4.610935856992639, "percentage": 65.87, "elapsed_time": "4:25:00", "remaining_time": "2:17:18"} +{"current_steps": 4390, "total_steps": 6657, "loss": 0.0956, "lr": 1.2553918251042701e-05, "epoch": 4.616193480546793, "percentage": 65.95, "elapsed_time": "4:25:24", "remaining_time": "2:17:03"} +{"current_steps": 4395, "total_steps": 6657, "loss": 0.1788, "lr": 1.2505275129562851e-05, "epoch": 4.621451104100946, "percentage": 66.02, "elapsed_time": "4:25:56", "remaining_time": "2:16:52"} +{"current_steps": 4400, "total_steps": 6657, "loss": 0.0821, "lr": 1.2456683530539446e-05, "epoch": 4.6267087276550996, "percentage": 66.1, "elapsed_time": "4:26:15", "remaining_time": "2:16:34"} +{"current_steps": 4405, "total_steps": 6657, "loss": 0.0837, "lr": 1.2408143788015225e-05, "epoch": 4.631966351209253, "percentage": 66.17, "elapsed_time": "4:27:35", "remaining_time": "2:16:48"} +{"current_steps": 4410, "total_steps": 6657, "loss": 0.0858, "lr": 1.2359656235676468e-05, "epoch": 4.6372239747634065, "percentage": 66.25, "elapsed_time": "4:27:55", "remaining_time": "2:16:30"} +{"current_steps": 4415, "total_steps": 6657, "loss": 0.0783, "lr": 1.231122120685066e-05, "epoch": 4.642481598317561, "percentage": 66.32, "elapsed_time": "4:28:13", "remaining_time": "2:16:12"} +{"current_steps": 4420, "total_steps": 6657, "loss": 0.0796, "lr": 1.2262839034504208e-05, "epoch": 4.6477392218717135, "percentage": 66.4, "elapsed_time": "4:28:36", "remaining_time": "2:15:56"} +{"current_steps": 4425, "total_steps": 6657, "loss": 0.0732, "lr": 1.2214510051240164e-05, "epoch": 4.652996845425868, "percentage": 66.47, "elapsed_time": "4:28:51", "remaining_time": "2:15:36"} +{"current_steps": 4430, "total_steps": 6657, "loss": 0.0789, "lr": 1.2166234589295951e-05, "epoch": 4.658254468980021, "percentage": 66.55, "elapsed_time": "4:29:06", "remaining_time": "2:15:17"} +{"current_steps": 4435, "total_steps": 6657, "loss": 0.0716, "lr": 1.2118012980541013e-05, "epoch": 4.663512092534175, "percentage": 66.62, "elapsed_time": "4:29:23", "remaining_time": "2:14:58"} +{"current_steps": 4440, "total_steps": 6657, "loss": 0.1116, "lr": 1.2069845556474626e-05, "epoch": 4.668769716088328, "percentage": 66.7, "elapsed_time": "4:29:54", "remaining_time": "2:14:46"} +{"current_steps": 4445, "total_steps": 6657, "loss": 0.0665, "lr": 1.2021732648223553e-05, "epoch": 4.674027339642482, "percentage": 66.77, "elapsed_time": "4:30:11", "remaining_time": "2:14:27"} +{"current_steps": 4450, "total_steps": 6657, "loss": 0.0711, "lr": 1.1973674586539791e-05, "epoch": 4.679284963196635, "percentage": 66.85, "elapsed_time": "4:30:27", "remaining_time": "2:14:08"} +{"current_steps": 4455, "total_steps": 6657, "loss": 0.0688, "lr": 1.1925671701798292e-05, "epoch": 4.684542586750789, "percentage": 66.92, "elapsed_time": "4:30:46", "remaining_time": "2:13:50"} +{"current_steps": 4460, "total_steps": 6657, "loss": 0.1146, "lr": 1.1877724323994704e-05, "epoch": 4.689800210304942, "percentage": 67.0, "elapsed_time": "4:31:19", "remaining_time": "2:13:39"} +{"current_steps": 4465, "total_steps": 6657, "loss": 0.0798, "lr": 1.1829832782743074e-05, "epoch": 4.695057833859096, "percentage": 67.07, "elapsed_time": "4:31:37", "remaining_time": "2:13:21"} +{"current_steps": 4470, "total_steps": 6657, "loss": 0.1033, "lr": 1.178199740727362e-05, "epoch": 4.700315457413249, "percentage": 67.15, "elapsed_time": "4:32:00", "remaining_time": "2:13:05"} +{"current_steps": 4475, "total_steps": 6657, "loss": 0.0781, "lr": 1.1734218526430446e-05, "epoch": 4.705573080967403, "percentage": 67.22, "elapsed_time": "4:32:17", "remaining_time": "2:12:46"} +{"current_steps": 4480, "total_steps": 6657, "loss": 0.1061, "lr": 1.1686496468669269e-05, "epoch": 4.710830704521556, "percentage": 67.3, "elapsed_time": "4:32:45", "remaining_time": "2:12:32"} +{"current_steps": 4485, "total_steps": 6657, "loss": 0.1041, "lr": 1.1638831562055191e-05, "epoch": 4.71608832807571, "percentage": 67.37, "elapsed_time": "4:33:04", "remaining_time": "2:12:14"} +{"current_steps": 4490, "total_steps": 6657, "loss": 0.106, "lr": 1.1591224134260425e-05, "epoch": 4.721345951629863, "percentage": 67.45, "elapsed_time": "4:33:23", "remaining_time": "2:11:56"} +{"current_steps": 4495, "total_steps": 6657, "loss": 0.0805, "lr": 1.1543674512562037e-05, "epoch": 4.726603575184017, "percentage": 67.52, "elapsed_time": "4:33:39", "remaining_time": "2:11:37"} +{"current_steps": 4500, "total_steps": 6657, "loss": 0.0859, "lr": 1.1496183023839729e-05, "epoch": 4.73186119873817, "percentage": 67.6, "elapsed_time": "4:34:01", "remaining_time": "2:11:20"} +{"current_steps": 4505, "total_steps": 6657, "loss": 0.0838, "lr": 1.144874999457354e-05, "epoch": 4.737118822292324, "percentage": 67.67, "elapsed_time": "4:34:29", "remaining_time": "2:11:07"} +{"current_steps": 4510, "total_steps": 6657, "loss": 0.0701, "lr": 1.1401375750841637e-05, "epoch": 4.742376445846477, "percentage": 67.75, "elapsed_time": "4:34:52", "remaining_time": "2:10:51"} +{"current_steps": 4515, "total_steps": 6657, "loss": 0.0664, "lr": 1.1354060618318086e-05, "epoch": 4.747634069400631, "percentage": 67.82, "elapsed_time": "4:35:14", "remaining_time": "2:10:34"} +{"current_steps": 4520, "total_steps": 6657, "loss": 0.0789, "lr": 1.1306804922270568e-05, "epoch": 4.752891692954784, "percentage": 67.9, "elapsed_time": "4:35:32", "remaining_time": "2:10:16"} +{"current_steps": 4525, "total_steps": 6657, "loss": 0.0955, "lr": 1.1259608987558175e-05, "epoch": 4.758149316508938, "percentage": 67.97, "elapsed_time": "4:36:03", "remaining_time": "2:10:04"} +{"current_steps": 4530, "total_steps": 6657, "loss": 0.0905, "lr": 1.1212473138629187e-05, "epoch": 4.763406940063091, "percentage": 68.05, "elapsed_time": "4:36:21", "remaining_time": "2:09:45"} +{"current_steps": 4535, "total_steps": 6657, "loss": 0.0822, "lr": 1.1165397699518797e-05, "epoch": 4.768664563617245, "percentage": 68.12, "elapsed_time": "4:36:37", "remaining_time": "2:09:26"} +{"current_steps": 4540, "total_steps": 6657, "loss": 0.0725, "lr": 1.1118382993846933e-05, "epoch": 4.773922187171398, "percentage": 68.2, "elapsed_time": "4:36:57", "remaining_time": "2:09:08"} +{"current_steps": 4545, "total_steps": 6657, "loss": 0.0891, "lr": 1.1071429344816003e-05, "epoch": 4.779179810725552, "percentage": 68.27, "elapsed_time": "4:37:22", "remaining_time": "2:08:53"} +{"current_steps": 4550, "total_steps": 6657, "loss": 0.0684, "lr": 1.102453707520867e-05, "epoch": 4.784437434279706, "percentage": 68.35, "elapsed_time": "4:37:38", "remaining_time": "2:08:34"} +{"current_steps": 4555, "total_steps": 6657, "loss": 0.0822, "lr": 1.0977706507385673e-05, "epoch": 4.789695057833859, "percentage": 68.42, "elapsed_time": "4:37:57", "remaining_time": "2:08:15"} +{"current_steps": 4560, "total_steps": 6657, "loss": 0.0774, "lr": 1.0930937963283554e-05, "epoch": 4.794952681388013, "percentage": 68.5, "elapsed_time": "4:38:16", "remaining_time": "2:07:58"} +{"current_steps": 4565, "total_steps": 6657, "loss": 0.0794, "lr": 1.088423176441248e-05, "epoch": 4.8002103049421665, "percentage": 68.57, "elapsed_time": "4:38:37", "remaining_time": "2:07:41"} +{"current_steps": 4570, "total_steps": 6657, "loss": 0.0944, "lr": 1.0837588231854044e-05, "epoch": 4.80546792849632, "percentage": 68.65, "elapsed_time": "4:39:02", "remaining_time": "2:07:25"} +{"current_steps": 4575, "total_steps": 6657, "loss": 0.0727, "lr": 1.0791007686259019e-05, "epoch": 4.8107255520504735, "percentage": 68.72, "elapsed_time": "4:39:17", "remaining_time": "2:07:06"} +{"current_steps": 4580, "total_steps": 6657, "loss": 0.0839, "lr": 1.0744490447845172e-05, "epoch": 4.815983175604627, "percentage": 68.8, "elapsed_time": "4:39:34", "remaining_time": "2:06:46"} +{"current_steps": 4585, "total_steps": 6657, "loss": 0.0991, "lr": 1.0698036836395084e-05, "epoch": 4.8212407991587805, "percentage": 68.87, "elapsed_time": "4:39:58", "remaining_time": "2:06:31"} +{"current_steps": 4590, "total_steps": 6657, "loss": 0.0787, "lr": 1.0651647171253936e-05, "epoch": 4.826498422712934, "percentage": 68.95, "elapsed_time": "4:40:15", "remaining_time": "2:06:12"} +{"current_steps": 4595, "total_steps": 6657, "loss": 0.0699, "lr": 1.0605321771327267e-05, "epoch": 4.831756046267087, "percentage": 69.03, "elapsed_time": "4:40:31", "remaining_time": "2:05:53"} +{"current_steps": 4600, "total_steps": 6657, "loss": 0.0847, "lr": 1.0559060955078873e-05, "epoch": 4.837013669821241, "percentage": 69.1, "elapsed_time": "4:40:52", "remaining_time": "2:05:35"} +{"current_steps": 4605, "total_steps": 6657, "loss": 0.0933, "lr": 1.0512865040528558e-05, "epoch": 4.842271293375394, "percentage": 69.18, "elapsed_time": "4:42:19", "remaining_time": "2:05:48"} +{"current_steps": 4610, "total_steps": 6657, "loss": 0.064, "lr": 1.0466734345249946e-05, "epoch": 4.847528916929548, "percentage": 69.25, "elapsed_time": "4:42:35", "remaining_time": "2:05:28"} +{"current_steps": 4615, "total_steps": 6657, "loss": 0.0816, "lr": 1.0420669186368311e-05, "epoch": 4.852786540483701, "percentage": 69.33, "elapsed_time": "4:43:04", "remaining_time": "2:05:15"} +{"current_steps": 4620, "total_steps": 6657, "loss": 0.0725, "lr": 1.0374669880558419e-05, "epoch": 4.858044164037855, "percentage": 69.4, "elapsed_time": "4:43:23", "remaining_time": "2:04:56"} +{"current_steps": 4625, "total_steps": 6657, "loss": 0.0616, "lr": 1.0328736744042311e-05, "epoch": 4.863301787592008, "percentage": 69.48, "elapsed_time": "4:43:40", "remaining_time": "2:04:37"} +{"current_steps": 4630, "total_steps": 6657, "loss": 0.0878, "lr": 1.0282870092587144e-05, "epoch": 4.868559411146162, "percentage": 69.55, "elapsed_time": "4:43:58", "remaining_time": "2:04:19"} +{"current_steps": 4635, "total_steps": 6657, "loss": 0.0686, "lr": 1.023707024150305e-05, "epoch": 4.873817034700315, "percentage": 69.63, "elapsed_time": "4:44:20", "remaining_time": "2:04:02"} +{"current_steps": 4640, "total_steps": 6657, "loss": 0.1021, "lr": 1.0191337505640905e-05, "epoch": 4.879074658254469, "percentage": 69.7, "elapsed_time": "4:44:44", "remaining_time": "2:03:46"} +{"current_steps": 4645, "total_steps": 6657, "loss": 0.0899, "lr": 1.0145672199390226e-05, "epoch": 4.884332281808622, "percentage": 69.78, "elapsed_time": "4:45:03", "remaining_time": "2:03:28"} +{"current_steps": 4650, "total_steps": 6657, "loss": 0.0918, "lr": 1.010007463667699e-05, "epoch": 4.889589905362776, "percentage": 69.85, "elapsed_time": "4:45:21", "remaining_time": "2:03:09"} +{"current_steps": 4655, "total_steps": 6657, "loss": 0.0692, "lr": 1.0054545130961441e-05, "epoch": 4.894847528916929, "percentage": 69.93, "elapsed_time": "4:45:36", "remaining_time": "2:02:50"} +{"current_steps": 4660, "total_steps": 6657, "loss": 0.0789, "lr": 1.0009083995236009e-05, "epoch": 4.900105152471083, "percentage": 70.0, "elapsed_time": "4:45:54", "remaining_time": "2:02:31"} +{"current_steps": 4665, "total_steps": 6657, "loss": 0.0772, "lr": 9.963691542023079e-06, "epoch": 4.905362776025236, "percentage": 70.08, "elapsed_time": "4:46:09", "remaining_time": "2:02:11"} +{"current_steps": 4670, "total_steps": 6657, "loss": 0.1058, "lr": 9.918368083372884e-06, "epoch": 4.91062039957939, "percentage": 70.15, "elapsed_time": "4:46:33", "remaining_time": "2:01:55"} +{"current_steps": 4675, "total_steps": 6657, "loss": 0.1031, "lr": 9.87311393086138e-06, "epoch": 4.915878023133543, "percentage": 70.23, "elapsed_time": "4:47:01", "remaining_time": "2:01:41"} +{"current_steps": 4680, "total_steps": 6657, "loss": 0.0837, "lr": 9.827929395588048e-06, "epoch": 4.921135646687697, "percentage": 70.3, "elapsed_time": "4:47:24", "remaining_time": "2:01:24"} +{"current_steps": 4685, "total_steps": 6657, "loss": 0.0771, "lr": 9.782814788173787e-06, "epoch": 4.926393270241851, "percentage": 70.38, "elapsed_time": "4:47:41", "remaining_time": "2:01:05"} +{"current_steps": 4690, "total_steps": 6657, "loss": 0.0789, "lr": 9.737770418758808e-06, "epoch": 4.931650893796004, "percentage": 70.45, "elapsed_time": "4:47:57", "remaining_time": "2:00:46"} +{"current_steps": 4695, "total_steps": 6657, "loss": 0.0677, "lr": 9.692796597000438e-06, "epoch": 4.936908517350158, "percentage": 70.53, "elapsed_time": "4:48:12", "remaining_time": "2:00:26"} +{"current_steps": 4700, "total_steps": 6657, "loss": 0.0846, "lr": 9.64789363207103e-06, "epoch": 4.942166140904312, "percentage": 70.6, "elapsed_time": "4:48:28", "remaining_time": "2:00:06"} +{"current_steps": 4705, "total_steps": 6657, "loss": 0.0765, "lr": 9.603061832655847e-06, "epoch": 4.947423764458465, "percentage": 70.68, "elapsed_time": "4:48:44", "remaining_time": "1:59:47"} +{"current_steps": 4710, "total_steps": 6657, "loss": 0.0721, "lr": 9.55830150695093e-06, "epoch": 4.952681388012619, "percentage": 70.75, "elapsed_time": "4:49:00", "remaining_time": "1:59:28"} +{"current_steps": 4715, "total_steps": 6657, "loss": 0.0725, "lr": 9.513612962660935e-06, "epoch": 4.957939011566772, "percentage": 70.83, "elapsed_time": "4:49:18", "remaining_time": "1:59:09"} +{"current_steps": 4720, "total_steps": 6657, "loss": 0.08, "lr": 9.468996506997093e-06, "epoch": 4.963196635120926, "percentage": 70.9, "elapsed_time": "4:49:36", "remaining_time": "1:58:51"} +{"current_steps": 4725, "total_steps": 6657, "loss": 0.0697, "lr": 9.424452446675059e-06, "epoch": 4.968454258675079, "percentage": 70.98, "elapsed_time": "4:50:01", "remaining_time": "1:58:35"} +{"current_steps": 4730, "total_steps": 6657, "loss": 0.0704, "lr": 9.379981087912795e-06, "epoch": 4.9737118822292326, "percentage": 71.05, "elapsed_time": "4:50:21", "remaining_time": "1:58:17"} +{"current_steps": 4735, "total_steps": 6657, "loss": 0.0828, "lr": 9.33558273642848e-06, "epoch": 4.978969505783386, "percentage": 71.13, "elapsed_time": "4:50:50", "remaining_time": "1:58:03"} +{"current_steps": 4740, "total_steps": 6657, "loss": 0.1595, "lr": 9.291257697438393e-06, "epoch": 4.9842271293375395, "percentage": 71.2, "elapsed_time": "4:51:25", "remaining_time": "1:57:51"} +{"current_steps": 4745, "total_steps": 6657, "loss": 0.0759, "lr": 9.247006275654861e-06, "epoch": 4.989484752891693, "percentage": 71.28, "elapsed_time": "4:51:46", "remaining_time": "1:57:34"} +{"current_steps": 4750, "total_steps": 6657, "loss": 0.0709, "lr": 9.202828775284101e-06, "epoch": 4.9947423764458465, "percentage": 71.35, "elapsed_time": "4:52:03", "remaining_time": "1:57:15"} +{"current_steps": 4755, "total_steps": 6657, "loss": 0.1046, "lr": 9.158725500024148e-06, "epoch": 5.0, "percentage": 71.43, "elapsed_time": "4:52:27", "remaining_time": "1:56:58"} +{"current_steps": 4760, "total_steps": 6657, "loss": 0.158, "lr": 9.114696753062816e-06, "epoch": 5.0052576235541535, "percentage": 71.5, "elapsed_time": "4:52:39", "remaining_time": "1:56:37"} +{"current_steps": 4765, "total_steps": 6657, "loss": 0.1411, "lr": 9.07074283707554e-06, "epoch": 5.010515247108307, "percentage": 71.58, "elapsed_time": "4:52:50", "remaining_time": "1:56:16"} +{"current_steps": 4770, "total_steps": 6657, "loss": 0.1383, "lr": 9.026864054223337e-06, "epoch": 5.0157728706624605, "percentage": 71.65, "elapsed_time": "4:53:03", "remaining_time": "1:55:55"} +{"current_steps": 4775, "total_steps": 6657, "loss": 0.1289, "lr": 8.98306070615073e-06, "epoch": 5.021030494216614, "percentage": 71.73, "elapsed_time": "4:53:14", "remaining_time": "1:55:34"} +{"current_steps": 4780, "total_steps": 6657, "loss": 0.1422, "lr": 8.93933309398368e-06, "epoch": 5.0262881177707674, "percentage": 71.8, "elapsed_time": "4:53:27", "remaining_time": "1:55:14"} +{"current_steps": 4785, "total_steps": 6657, "loss": 0.1382, "lr": 8.89568151832745e-06, "epoch": 5.031545741324921, "percentage": 71.88, "elapsed_time": "4:53:39", "remaining_time": "1:54:53"} +{"current_steps": 4790, "total_steps": 6657, "loss": 0.1244, "lr": 8.852106279264643e-06, "epoch": 5.036803364879074, "percentage": 71.95, "elapsed_time": "4:53:50", "remaining_time": "1:54:31"} +{"current_steps": 4795, "total_steps": 6657, "loss": 0.1441, "lr": 8.808607676353074e-06, "epoch": 5.042060988433228, "percentage": 72.03, "elapsed_time": "4:54:02", "remaining_time": "1:54:10"} +{"current_steps": 4800, "total_steps": 6657, "loss": 0.1275, "lr": 8.765186008623706e-06, "epoch": 5.047318611987381, "percentage": 72.1, "elapsed_time": "4:54:13", "remaining_time": "1:53:49"} +{"current_steps": 4805, "total_steps": 6657, "loss": 0.133, "lr": 8.721841574578617e-06, "epoch": 5.052576235541535, "percentage": 72.18, "elapsed_time": "4:55:25", "remaining_time": "1:53:51"} +{"current_steps": 4810, "total_steps": 6657, "loss": 0.1313, "lr": 8.678574672188963e-06, "epoch": 5.057833859095688, "percentage": 72.25, "elapsed_time": "4:55:38", "remaining_time": "1:53:31"} +{"current_steps": 4815, "total_steps": 6657, "loss": 0.133, "lr": 8.635385598892881e-06, "epoch": 5.063091482649842, "percentage": 72.33, "elapsed_time": "4:55:50", "remaining_time": "1:53:10"} +{"current_steps": 4820, "total_steps": 6657, "loss": 0.1244, "lr": 8.592274651593482e-06, "epoch": 5.068349106203995, "percentage": 72.4, "elapsed_time": "4:56:01", "remaining_time": "1:52:49"} +{"current_steps": 4825, "total_steps": 6657, "loss": 0.1225, "lr": 8.549242126656814e-06, "epoch": 5.07360672975815, "percentage": 72.48, "elapsed_time": "4:56:13", "remaining_time": "1:52:28"} +{"current_steps": 4830, "total_steps": 6657, "loss": 0.1195, "lr": 8.506288319909793e-06, "epoch": 5.078864353312303, "percentage": 72.56, "elapsed_time": "4:56:25", "remaining_time": "1:52:07"} +{"current_steps": 4835, "total_steps": 6657, "loss": 0.1336, "lr": 8.463413526638186e-06, "epoch": 5.084121976866457, "percentage": 72.63, "elapsed_time": "4:56:37", "remaining_time": "1:51:46"} +{"current_steps": 4840, "total_steps": 6657, "loss": 0.1362, "lr": 8.420618041584604e-06, "epoch": 5.08937960042061, "percentage": 72.71, "elapsed_time": "4:56:50", "remaining_time": "1:51:26"} +{"current_steps": 4845, "total_steps": 6657, "loss": 0.1341, "lr": 8.377902158946427e-06, "epoch": 5.094637223974764, "percentage": 72.78, "elapsed_time": "4:57:02", "remaining_time": "1:51:05"} +{"current_steps": 4850, "total_steps": 6657, "loss": 0.1225, "lr": 8.335266172373832e-06, "epoch": 5.099894847528917, "percentage": 72.86, "elapsed_time": "4:57:15", "remaining_time": "1:50:45"} +{"current_steps": 4855, "total_steps": 6657, "loss": 0.1255, "lr": 8.292710374967737e-06, "epoch": 5.105152471083071, "percentage": 72.93, "elapsed_time": "4:57:28", "remaining_time": "1:50:24"} +{"current_steps": 4860, "total_steps": 6657, "loss": 0.1332, "lr": 8.250235059277792e-06, "epoch": 5.110410094637224, "percentage": 73.01, "elapsed_time": "4:57:42", "remaining_time": "1:50:04"} +{"current_steps": 4865, "total_steps": 6657, "loss": 0.1236, "lr": 8.207840517300398e-06, "epoch": 5.115667718191378, "percentage": 73.08, "elapsed_time": "4:57:55", "remaining_time": "1:49:44"} +{"current_steps": 4870, "total_steps": 6657, "loss": 0.1237, "lr": 8.165527040476666e-06, "epoch": 5.120925341745531, "percentage": 73.16, "elapsed_time": "4:58:07", "remaining_time": "1:49:23"} +{"current_steps": 4875, "total_steps": 6657, "loss": 0.117, "lr": 8.123294919690413e-06, "epoch": 5.126182965299685, "percentage": 73.23, "elapsed_time": "4:58:18", "remaining_time": "1:49:02"} +{"current_steps": 4880, "total_steps": 6657, "loss": 0.1251, "lr": 8.081144445266201e-06, "epoch": 5.131440588853838, "percentage": 73.31, "elapsed_time": "4:58:30", "remaining_time": "1:48:42"} +{"current_steps": 4885, "total_steps": 6657, "loss": 0.1234, "lr": 8.039075906967293e-06, "epoch": 5.136698212407992, "percentage": 73.38, "elapsed_time": "4:58:42", "remaining_time": "1:48:21"} +{"current_steps": 4890, "total_steps": 6657, "loss": 0.1277, "lr": 7.99708959399368e-06, "epoch": 5.141955835962145, "percentage": 73.46, "elapsed_time": "4:58:56", "remaining_time": "1:48:01"} +{"current_steps": 4895, "total_steps": 6657, "loss": 0.1293, "lr": 7.955185794980117e-06, "epoch": 5.147213459516299, "percentage": 73.53, "elapsed_time": "4:59:08", "remaining_time": "1:47:40"} +{"current_steps": 4900, "total_steps": 6657, "loss": 0.1208, "lr": 7.913364797994111e-06, "epoch": 5.152471083070452, "percentage": 73.61, "elapsed_time": "4:59:20", "remaining_time": "1:47:20"} +{"current_steps": 4905, "total_steps": 6657, "loss": 0.1253, "lr": 7.871626890533917e-06, "epoch": 5.157728706624606, "percentage": 73.68, "elapsed_time": "4:59:32", "remaining_time": "1:46:59"} +{"current_steps": 4910, "total_steps": 6657, "loss": 0.1249, "lr": 7.829972359526626e-06, "epoch": 5.162986330178759, "percentage": 73.76, "elapsed_time": "4:59:44", "remaining_time": "1:46:38"} +{"current_steps": 4915, "total_steps": 6657, "loss": 0.1167, "lr": 7.788401491326155e-06, "epoch": 5.168243953732913, "percentage": 73.83, "elapsed_time": "4:59:55", "remaining_time": "1:46:18"} +{"current_steps": 4920, "total_steps": 6657, "loss": 0.1196, "lr": 7.746914571711264e-06, "epoch": 5.173501577287066, "percentage": 73.91, "elapsed_time": "5:00:07", "remaining_time": "1:45:57"} +{"current_steps": 4925, "total_steps": 6657, "loss": 0.1099, "lr": 7.705511885883612e-06, "epoch": 5.1787592008412195, "percentage": 73.98, "elapsed_time": "5:00:18", "remaining_time": "1:45:36"} +{"current_steps": 4930, "total_steps": 6657, "loss": 0.124, "lr": 7.664193718465814e-06, "epoch": 5.184016824395373, "percentage": 74.06, "elapsed_time": "5:00:30", "remaining_time": "1:45:16"} +{"current_steps": 4935, "total_steps": 6657, "loss": 0.1144, "lr": 7.622960353499438e-06, "epoch": 5.1892744479495265, "percentage": 74.13, "elapsed_time": "5:00:42", "remaining_time": "1:44:55"} +{"current_steps": 4940, "total_steps": 6657, "loss": 0.1196, "lr": 7.581812074443084e-06, "epoch": 5.19453207150368, "percentage": 74.21, "elapsed_time": "5:00:54", "remaining_time": "1:44:35"} +{"current_steps": 4945, "total_steps": 6657, "loss": 0.1195, "lr": 7.5407491641704464e-06, "epoch": 5.1997896950578335, "percentage": 74.28, "elapsed_time": "5:01:06", "remaining_time": "1:44:14"} +{"current_steps": 4950, "total_steps": 6657, "loss": 0.1213, "lr": 7.499771904968332e-06, "epoch": 5.205047318611987, "percentage": 74.36, "elapsed_time": "5:01:17", "remaining_time": "1:43:54"} +{"current_steps": 4955, "total_steps": 6657, "loss": 0.1195, "lr": 7.45888057853474e-06, "epoch": 5.2103049421661405, "percentage": 74.43, "elapsed_time": "5:01:29", "remaining_time": "1:43:33"} +{"current_steps": 4960, "total_steps": 6657, "loss": 0.1193, "lr": 7.418075465976944e-06, "epoch": 5.215562565720294, "percentage": 74.51, "elapsed_time": "5:01:42", "remaining_time": "1:43:13"} +{"current_steps": 4965, "total_steps": 6657, "loss": 0.1269, "lr": 7.3773568478095184e-06, "epoch": 5.220820189274448, "percentage": 74.58, "elapsed_time": "5:01:54", "remaining_time": "1:42:53"} +{"current_steps": 4970, "total_steps": 6657, "loss": 0.1347, "lr": 7.336725003952456e-06, "epoch": 5.226077812828602, "percentage": 74.66, "elapsed_time": "5:02:08", "remaining_time": "1:42:33"} +{"current_steps": 4975, "total_steps": 6657, "loss": 0.1142, "lr": 7.296180213729196e-06, "epoch": 5.231335436382755, "percentage": 74.73, "elapsed_time": "5:02:19", "remaining_time": "1:42:12"} +{"current_steps": 4980, "total_steps": 6657, "loss": 0.1179, "lr": 7.255722755864734e-06, "epoch": 5.236593059936909, "percentage": 74.81, "elapsed_time": "5:02:32", "remaining_time": "1:41:52"} +{"current_steps": 4985, "total_steps": 6657, "loss": 0.1216, "lr": 7.21535290848372e-06, "epoch": 5.241850683491062, "percentage": 74.88, "elapsed_time": "5:02:44", "remaining_time": "1:41:32"} +{"current_steps": 4990, "total_steps": 6657, "loss": 0.1179, "lr": 7.175070949108496e-06, "epoch": 5.247108307045216, "percentage": 74.96, "elapsed_time": "5:02:56", "remaining_time": "1:41:12"} +{"current_steps": 4995, "total_steps": 6657, "loss": 0.1126, "lr": 7.1348771546572315e-06, "epoch": 5.252365930599369, "percentage": 75.03, "elapsed_time": "5:03:07", "remaining_time": "1:40:51"} +{"current_steps": 5000, "total_steps": 6657, "loss": 0.1122, "lr": 7.09477180144202e-06, "epoch": 5.257623554153523, "percentage": 75.11, "elapsed_time": "5:03:19", "remaining_time": "1:40:31"} +{"current_steps": 5005, "total_steps": 6657, "loss": 0.126, "lr": 7.054755165166945e-06, "epoch": 5.262881177707676, "percentage": 75.18, "elapsed_time": "5:04:32", "remaining_time": "1:40:31"} +{"current_steps": 5010, "total_steps": 6657, "loss": 0.1205, "lr": 7.014827520926206e-06, "epoch": 5.26813880126183, "percentage": 75.26, "elapsed_time": "5:04:44", "remaining_time": "1:40:10"} +{"current_steps": 5015, "total_steps": 6657, "loss": 0.1192, "lr": 6.9749891432022505e-06, "epoch": 5.273396424815983, "percentage": 75.33, "elapsed_time": "5:04:56", "remaining_time": "1:39:50"} +{"current_steps": 5020, "total_steps": 6657, "loss": 0.1158, "lr": 6.935240305863844e-06, "epoch": 5.278654048370137, "percentage": 75.41, "elapsed_time": "5:05:08", "remaining_time": "1:39:30"} +{"current_steps": 5025, "total_steps": 6657, "loss": 0.124, "lr": 6.895581282164201e-06, "epoch": 5.28391167192429, "percentage": 75.48, "elapsed_time": "5:05:21", "remaining_time": "1:39:10"} +{"current_steps": 5030, "total_steps": 6657, "loss": 0.1212, "lr": 6.856012344739138e-06, "epoch": 5.289169295478444, "percentage": 75.56, "elapsed_time": "5:05:33", "remaining_time": "1:38:50"} +{"current_steps": 5035, "total_steps": 6657, "loss": 0.1171, "lr": 6.816533765605144e-06, "epoch": 5.294426919032597, "percentage": 75.63, "elapsed_time": "5:05:45", "remaining_time": "1:38:29"} +{"current_steps": 5040, "total_steps": 6657, "loss": 0.1155, "lr": 6.7771458161575685e-06, "epoch": 5.299684542586751, "percentage": 75.71, "elapsed_time": "5:05:57", "remaining_time": "1:38:09"} +{"current_steps": 5045, "total_steps": 6657, "loss": 0.1137, "lr": 6.737848767168709e-06, "epoch": 5.304942166140904, "percentage": 75.78, "elapsed_time": "5:06:08", "remaining_time": "1:37:49"} +{"current_steps": 5050, "total_steps": 6657, "loss": 0.1176, "lr": 6.698642888785965e-06, "epoch": 5.310199789695058, "percentage": 75.86, "elapsed_time": "5:06:21", "remaining_time": "1:37:29"} +{"current_steps": 5055, "total_steps": 6657, "loss": 0.1232, "lr": 6.659528450530006e-06, "epoch": 5.315457413249211, "percentage": 75.94, "elapsed_time": "5:06:34", "remaining_time": "1:37:09"} +{"current_steps": 5060, "total_steps": 6657, "loss": 0.1137, "lr": 6.6205057212928755e-06, "epoch": 5.320715036803365, "percentage": 76.01, "elapsed_time": "5:06:46", "remaining_time": "1:36:49"} +{"current_steps": 5065, "total_steps": 6657, "loss": 0.1185, "lr": 6.5815749693361645e-06, "epoch": 5.325972660357518, "percentage": 76.09, "elapsed_time": "5:06:57", "remaining_time": "1:36:28"} +{"current_steps": 5070, "total_steps": 6657, "loss": 0.1204, "lr": 6.542736462289188e-06, "epoch": 5.331230283911672, "percentage": 76.16, "elapsed_time": "5:07:09", "remaining_time": "1:36:08"} +{"current_steps": 5075, "total_steps": 6657, "loss": 0.1151, "lr": 6.503990467147101e-06, "epoch": 5.336487907465825, "percentage": 76.24, "elapsed_time": "5:07:21", "remaining_time": "1:35:48"} +{"current_steps": 5080, "total_steps": 6657, "loss": 0.1196, "lr": 6.465337250269086e-06, "epoch": 5.341745531019979, "percentage": 76.31, "elapsed_time": "5:07:32", "remaining_time": "1:35:28"} +{"current_steps": 5085, "total_steps": 6657, "loss": 0.1181, "lr": 6.426777077376538e-06, "epoch": 5.347003154574132, "percentage": 76.39, "elapsed_time": "5:07:44", "remaining_time": "1:35:08"} +{"current_steps": 5090, "total_steps": 6657, "loss": 0.1169, "lr": 6.388310213551223e-06, "epoch": 5.352260778128286, "percentage": 76.46, "elapsed_time": "5:07:56", "remaining_time": "1:34:48"} +{"current_steps": 5095, "total_steps": 6657, "loss": 0.1186, "lr": 6.349936923233422e-06, "epoch": 5.357518401682439, "percentage": 76.54, "elapsed_time": "5:08:07", "remaining_time": "1:34:27"} +{"current_steps": 5100, "total_steps": 6657, "loss": 0.1282, "lr": 6.311657470220178e-06, "epoch": 5.3627760252365935, "percentage": 76.61, "elapsed_time": "5:08:20", "remaining_time": "1:34:07"} +{"current_steps": 5105, "total_steps": 6657, "loss": 0.1177, "lr": 6.273472117663446e-06, "epoch": 5.368033648790747, "percentage": 76.69, "elapsed_time": "5:08:34", "remaining_time": "1:33:48"} +{"current_steps": 5110, "total_steps": 6657, "loss": 0.1187, "lr": 6.2353811280682715e-06, "epoch": 5.3732912723449004, "percentage": 76.76, "elapsed_time": "5:08:46", "remaining_time": "1:33:28"} +{"current_steps": 5115, "total_steps": 6657, "loss": 0.1158, "lr": 6.19738476329101e-06, "epoch": 5.378548895899054, "percentage": 76.84, "elapsed_time": "5:08:57", "remaining_time": "1:33:08"} +{"current_steps": 5120, "total_steps": 6657, "loss": 0.112, "lr": 6.159483284537533e-06, "epoch": 5.383806519453207, "percentage": 76.91, "elapsed_time": "5:09:09", "remaining_time": "1:32:48"} +{"current_steps": 5125, "total_steps": 6657, "loss": 0.1204, "lr": 6.121676952361395e-06, "epoch": 5.389064143007361, "percentage": 76.99, "elapsed_time": "5:09:21", "remaining_time": "1:32:28"} +{"current_steps": 5130, "total_steps": 6657, "loss": 0.112, "lr": 6.083966026662076e-06, "epoch": 5.394321766561514, "percentage": 77.06, "elapsed_time": "5:09:33", "remaining_time": "1:32:08"} +{"current_steps": 5135, "total_steps": 6657, "loss": 0.113, "lr": 6.046350766683194e-06, "epoch": 5.399579390115668, "percentage": 77.14, "elapsed_time": "5:09:45", "remaining_time": "1:31:48"} +{"current_steps": 5140, "total_steps": 6657, "loss": 0.1186, "lr": 6.0088314310107e-06, "epoch": 5.404837013669821, "percentage": 77.21, "elapsed_time": "5:09:56", "remaining_time": "1:31:28"} +{"current_steps": 5145, "total_steps": 6657, "loss": 0.1229, "lr": 5.9714082775711115e-06, "epoch": 5.410094637223975, "percentage": 77.29, "elapsed_time": "5:10:08", "remaining_time": "1:31:08"} +{"current_steps": 5150, "total_steps": 6657, "loss": 0.1136, "lr": 5.934081563629764e-06, "epoch": 5.415352260778128, "percentage": 77.36, "elapsed_time": "5:10:20", "remaining_time": "1:30:48"} +{"current_steps": 5155, "total_steps": 6657, "loss": 0.1217, "lr": 5.896851545788987e-06, "epoch": 5.420609884332282, "percentage": 77.44, "elapsed_time": "5:10:35", "remaining_time": "1:30:29"} +{"current_steps": 5160, "total_steps": 6657, "loss": 0.1091, "lr": 5.859718479986407e-06, "epoch": 5.425867507886435, "percentage": 77.51, "elapsed_time": "5:10:50", "remaining_time": "1:30:10"} +{"current_steps": 5165, "total_steps": 6657, "loss": 0.1099, "lr": 5.822682621493132e-06, "epoch": 5.431125131440589, "percentage": 77.59, "elapsed_time": "5:11:01", "remaining_time": "1:29:50"} +{"current_steps": 5170, "total_steps": 6657, "loss": 0.1141, "lr": 5.7857442249120155e-06, "epoch": 5.436382754994742, "percentage": 77.66, "elapsed_time": "5:11:13", "remaining_time": "1:29:30"} +{"current_steps": 5175, "total_steps": 6657, "loss": 0.1187, "lr": 5.748903544175934e-06, "epoch": 5.441640378548896, "percentage": 77.74, "elapsed_time": "5:11:25", "remaining_time": "1:29:11"} +{"current_steps": 5180, "total_steps": 6657, "loss": 0.1151, "lr": 5.712160832545992e-06, "epoch": 5.446898002103049, "percentage": 77.81, "elapsed_time": "5:11:37", "remaining_time": "1:28:51"} +{"current_steps": 5185, "total_steps": 6657, "loss": 0.1178, "lr": 5.675516342609811e-06, "epoch": 5.452155625657203, "percentage": 77.89, "elapsed_time": "5:11:49", "remaining_time": "1:28:31"} +{"current_steps": 5190, "total_steps": 6657, "loss": 0.1153, "lr": 5.638970326279802e-06, "epoch": 5.457413249211356, "percentage": 77.96, "elapsed_time": "5:12:01", "remaining_time": "1:28:11"} +{"current_steps": 5195, "total_steps": 6657, "loss": 0.1148, "lr": 5.602523034791407e-06, "epoch": 5.46267087276551, "percentage": 78.04, "elapsed_time": "5:12:13", "remaining_time": "1:27:52"} +{"current_steps": 5200, "total_steps": 6657, "loss": 0.1071, "lr": 5.566174718701378e-06, "epoch": 5.467928496319663, "percentage": 78.11, "elapsed_time": "5:12:36", "remaining_time": "1:27:35"} +{"current_steps": 5205, "total_steps": 6657, "loss": 0.0757, "lr": 5.529925627886079e-06, "epoch": 5.473186119873817, "percentage": 78.19, "elapsed_time": "5:13:52", "remaining_time": "1:27:33"} +{"current_steps": 5210, "total_steps": 6657, "loss": 0.0722, "lr": 5.493776011539749e-06, "epoch": 5.47844374342797, "percentage": 78.26, "elapsed_time": "5:14:12", "remaining_time": "1:27:15"} +{"current_steps": 5215, "total_steps": 6657, "loss": 0.0886, "lr": 5.457726118172761e-06, "epoch": 5.483701366982124, "percentage": 78.34, "elapsed_time": "5:14:29", "remaining_time": "1:26:57"} +{"current_steps": 5220, "total_steps": 6657, "loss": 0.0718, "lr": 5.421776195609982e-06, "epoch": 5.488958990536277, "percentage": 78.41, "elapsed_time": "5:14:52", "remaining_time": "1:26:40"} +{"current_steps": 5225, "total_steps": 6657, "loss": 0.0872, "lr": 5.385926490989e-06, "epoch": 5.494216614090431, "percentage": 78.49, "elapsed_time": "5:15:09", "remaining_time": "1:26:22"} +{"current_steps": 5230, "total_steps": 6657, "loss": 0.1184, "lr": 5.350177250758479e-06, "epoch": 5.499474237644584, "percentage": 78.56, "elapsed_time": "5:15:32", "remaining_time": "1:26:05"} +{"current_steps": 5235, "total_steps": 6657, "loss": 0.0779, "lr": 5.314528720676424e-06, "epoch": 5.504731861198739, "percentage": 78.64, "elapsed_time": "5:15:49", "remaining_time": "1:25:47"} +{"current_steps": 5240, "total_steps": 6657, "loss": 0.0837, "lr": 5.2789811458085085e-06, "epoch": 5.509989484752892, "percentage": 78.71, "elapsed_time": "5:16:11", "remaining_time": "1:25:30"} +{"current_steps": 5245, "total_steps": 6657, "loss": 0.0968, "lr": 5.243534770526404e-06, "epoch": 5.515247108307046, "percentage": 78.79, "elapsed_time": "5:16:38", "remaining_time": "1:25:14"} +{"current_steps": 5250, "total_steps": 6657, "loss": 0.0717, "lr": 5.208189838506074e-06, "epoch": 5.520504731861199, "percentage": 78.86, "elapsed_time": "5:17:01", "remaining_time": "1:24:57"} +{"current_steps": 5255, "total_steps": 6657, "loss": 0.0757, "lr": 5.172946592726109e-06, "epoch": 5.5257623554153525, "percentage": 78.94, "elapsed_time": "5:17:24", "remaining_time": "1:24:40"} +{"current_steps": 5260, "total_steps": 6657, "loss": 0.082, "lr": 5.137805275466072e-06, "epoch": 5.531019978969506, "percentage": 79.01, "elapsed_time": "5:17:48", "remaining_time": "1:24:24"} +{"current_steps": 5265, "total_steps": 6657, "loss": 0.0988, "lr": 5.1027661283048036e-06, "epoch": 5.5362776025236595, "percentage": 79.09, "elapsed_time": "5:18:09", "remaining_time": "1:24:06"} +{"current_steps": 5270, "total_steps": 6657, "loss": 0.0718, "lr": 5.067829392118775e-06, "epoch": 5.541535226077813, "percentage": 79.16, "elapsed_time": "5:18:30", "remaining_time": "1:23:49"} +{"current_steps": 5275, "total_steps": 6657, "loss": 0.0682, "lr": 5.03299530708045e-06, "epoch": 5.5467928496319665, "percentage": 79.24, "elapsed_time": "5:18:47", "remaining_time": "1:23:31"} +{"current_steps": 5280, "total_steps": 6657, "loss": 0.0636, "lr": 4.998264112656617e-06, "epoch": 5.55205047318612, "percentage": 79.32, "elapsed_time": "5:19:04", "remaining_time": "1:23:12"} +{"current_steps": 5285, "total_steps": 6657, "loss": 0.0861, "lr": 4.963636047606712e-06, "epoch": 5.5573080967402735, "percentage": 79.39, "elapsed_time": "5:19:21", "remaining_time": "1:22:54"} +{"current_steps": 5290, "total_steps": 6657, "loss": 0.0827, "lr": 4.929111349981244e-06, "epoch": 5.562565720294427, "percentage": 79.47, "elapsed_time": "5:19:46", "remaining_time": "1:22:38"} +{"current_steps": 5295, "total_steps": 6657, "loss": 0.0831, "lr": 4.894690257120114e-06, "epoch": 5.5678233438485805, "percentage": 79.54, "elapsed_time": "5:20:04", "remaining_time": "1:22:19"} +{"current_steps": 5300, "total_steps": 6657, "loss": 0.0806, "lr": 4.860373005650985e-06, "epoch": 5.573080967402734, "percentage": 79.62, "elapsed_time": "5:20:24", "remaining_time": "1:22:02"} +{"current_steps": 5305, "total_steps": 6657, "loss": 0.1142, "lr": 4.826159831487656e-06, "epoch": 5.578338590956887, "percentage": 79.69, "elapsed_time": "5:20:46", "remaining_time": "1:21:45"} +{"current_steps": 5310, "total_steps": 6657, "loss": 0.0676, "lr": 4.792050969828474e-06, "epoch": 5.583596214511041, "percentage": 79.77, "elapsed_time": "5:21:02", "remaining_time": "1:21:26"} +{"current_steps": 5315, "total_steps": 6657, "loss": 0.0968, "lr": 4.758046655154664e-06, "epoch": 5.588853838065194, "percentage": 79.84, "elapsed_time": "5:21:18", "remaining_time": "1:21:07"} +{"current_steps": 5320, "total_steps": 6657, "loss": 0.091, "lr": 4.72414712122875e-06, "epoch": 5.594111461619348, "percentage": 79.92, "elapsed_time": "5:21:44", "remaining_time": "1:20:51"} +{"current_steps": 5325, "total_steps": 6657, "loss": 0.0744, "lr": 4.690352601092954e-06, "epoch": 5.599369085173501, "percentage": 79.99, "elapsed_time": "5:22:06", "remaining_time": "1:20:34"} +{"current_steps": 5330, "total_steps": 6657, "loss": 0.1864, "lr": 4.656663327067563e-06, "epoch": 5.604626708727655, "percentage": 80.07, "elapsed_time": "5:22:33", "remaining_time": "1:20:18"} +{"current_steps": 5335, "total_steps": 6657, "loss": 0.1678, "lr": 4.623079530749355e-06, "epoch": 5.609884332281808, "percentage": 80.14, "elapsed_time": "5:22:54", "remaining_time": "1:20:00"} +{"current_steps": 5340, "total_steps": 6657, "loss": 0.0816, "lr": 4.589601443010012e-06, "epoch": 5.615141955835962, "percentage": 80.22, "elapsed_time": "5:23:16", "remaining_time": "1:19:43"} +{"current_steps": 5345, "total_steps": 6657, "loss": 0.1564, "lr": 4.55622929399451e-06, "epoch": 5.620399579390115, "percentage": 80.29, "elapsed_time": "5:23:45", "remaining_time": "1:19:28"} +{"current_steps": 5350, "total_steps": 6657, "loss": 0.0894, "lr": 4.522963313119564e-06, "epoch": 5.625657202944269, "percentage": 80.37, "elapsed_time": "5:24:09", "remaining_time": "1:19:11"} +{"current_steps": 5355, "total_steps": 6657, "loss": 0.0741, "lr": 4.48980372907202e-06, "epoch": 5.630914826498422, "percentage": 80.44, "elapsed_time": "5:24:31", "remaining_time": "1:18:54"} +{"current_steps": 5360, "total_steps": 6657, "loss": 0.083, "lr": 4.456750769807303e-06, "epoch": 5.636172450052577, "percentage": 80.52, "elapsed_time": "5:24:50", "remaining_time": "1:18:36"} +{"current_steps": 5365, "total_steps": 6657, "loss": 0.0703, "lr": 4.4238046625478635e-06, "epoch": 5.641430073606729, "percentage": 80.59, "elapsed_time": "5:25:09", "remaining_time": "1:18:18"} +{"current_steps": 5370, "total_steps": 6657, "loss": 0.0747, "lr": 4.390965633781579e-06, "epoch": 5.646687697160884, "percentage": 80.67, "elapsed_time": "5:25:33", "remaining_time": "1:18:01"} +{"current_steps": 5375, "total_steps": 6657, "loss": 0.0653, "lr": 4.358233909260215e-06, "epoch": 5.651945320715037, "percentage": 80.74, "elapsed_time": "5:25:48", "remaining_time": "1:17:42"} +{"current_steps": 5380, "total_steps": 6657, "loss": 0.0716, "lr": 4.3256097139978934e-06, "epoch": 5.657202944269191, "percentage": 80.82, "elapsed_time": "5:26:03", "remaining_time": "1:17:23"} +{"current_steps": 5385, "total_steps": 6657, "loss": 0.0611, "lr": 4.293093272269513e-06, "epoch": 5.662460567823344, "percentage": 80.89, "elapsed_time": "5:26:20", "remaining_time": "1:17:05"} +{"current_steps": 5390, "total_steps": 6657, "loss": 0.1103, "lr": 4.260684807609217e-06, "epoch": 5.667718191377498, "percentage": 80.97, "elapsed_time": "5:26:51", "remaining_time": "1:16:49"} +{"current_steps": 5395, "total_steps": 6657, "loss": 0.0585, "lr": 4.22838454280887e-06, "epoch": 5.672975814931651, "percentage": 81.04, "elapsed_time": "5:27:07", "remaining_time": "1:16:31"} +{"current_steps": 5400, "total_steps": 6657, "loss": 0.0612, "lr": 4.196192699916528e-06, "epoch": 5.678233438485805, "percentage": 81.12, "elapsed_time": "5:27:24", "remaining_time": "1:16:12"} +{"current_steps": 5405, "total_steps": 6657, "loss": 0.0648, "lr": 4.164109500234865e-06, "epoch": 5.683491062039958, "percentage": 81.19, "elapsed_time": "5:28:34", "remaining_time": "1:16:06"} +{"current_steps": 5410, "total_steps": 6657, "loss": 0.0898, "lr": 4.1321351643197235e-06, "epoch": 5.688748685594112, "percentage": 81.27, "elapsed_time": "5:29:04", "remaining_time": "1:15:51"} +{"current_steps": 5415, "total_steps": 6657, "loss": 0.0803, "lr": 4.100269911978549e-06, "epoch": 5.694006309148265, "percentage": 81.34, "elapsed_time": "5:29:25", "remaining_time": "1:15:33"} +{"current_steps": 5420, "total_steps": 6657, "loss": 0.0977, "lr": 4.068513962268892e-06, "epoch": 5.699263932702419, "percentage": 81.42, "elapsed_time": "5:29:49", "remaining_time": "1:15:16"} +{"current_steps": 5425, "total_steps": 6657, "loss": 0.0728, "lr": 4.036867533496895e-06, "epoch": 5.704521556256572, "percentage": 81.49, "elapsed_time": "5:30:08", "remaining_time": "1:14:58"} +{"current_steps": 5430, "total_steps": 6657, "loss": 0.0887, "lr": 4.00533084321582e-06, "epoch": 5.709779179810726, "percentage": 81.57, "elapsed_time": "5:30:31", "remaining_time": "1:14:41"} +{"current_steps": 5435, "total_steps": 6657, "loss": 0.1038, "lr": 3.9739041082245114e-06, "epoch": 5.715036803364879, "percentage": 81.64, "elapsed_time": "5:30:55", "remaining_time": "1:14:24"} +{"current_steps": 5440, "total_steps": 6657, "loss": 0.0799, "lr": 3.942587544565932e-06, "epoch": 5.720294426919033, "percentage": 81.72, "elapsed_time": "5:31:13", "remaining_time": "1:14:05"} +{"current_steps": 5445, "total_steps": 6657, "loss": 0.0895, "lr": 3.9113813675256816e-06, "epoch": 5.725552050473186, "percentage": 81.79, "elapsed_time": "5:31:30", "remaining_time": "1:13:47"} +{"current_steps": 5450, "total_steps": 6657, "loss": 0.0778, "lr": 3.8802857916305006e-06, "epoch": 5.7308096740273395, "percentage": 81.87, "elapsed_time": "5:31:52", "remaining_time": "1:13:29"} +{"current_steps": 5455, "total_steps": 6657, "loss": 0.0771, "lr": 3.849301030646797e-06, "epoch": 5.736067297581493, "percentage": 81.94, "elapsed_time": "5:32:19", "remaining_time": "1:13:13"} +{"current_steps": 5460, "total_steps": 6657, "loss": 0.0643, "lr": 3.818427297579186e-06, "epoch": 5.7413249211356465, "percentage": 82.02, "elapsed_time": "5:32:37", "remaining_time": "1:12:55"} +{"current_steps": 5465, "total_steps": 6657, "loss": 0.0625, "lr": 3.787664804669027e-06, "epoch": 5.7465825446898, "percentage": 82.09, "elapsed_time": "5:33:04", "remaining_time": "1:12:38"} +{"current_steps": 5470, "total_steps": 6657, "loss": 0.0723, "lr": 3.7570137633929647e-06, "epoch": 5.7518401682439535, "percentage": 82.17, "elapsed_time": "5:33:21", "remaining_time": "1:12:20"} +{"current_steps": 5475, "total_steps": 6657, "loss": 0.089, "lr": 3.7264743844614424e-06, "epoch": 5.757097791798107, "percentage": 82.24, "elapsed_time": "5:33:54", "remaining_time": "1:12:05"} +{"current_steps": 5480, "total_steps": 6657, "loss": 0.0761, "lr": 3.6960468778173097e-06, "epoch": 5.7623554153522605, "percentage": 82.32, "elapsed_time": "5:34:11", "remaining_time": "1:11:46"} +{"current_steps": 5485, "total_steps": 6657, "loss": 0.0854, "lr": 3.665731452634347e-06, "epoch": 5.767613038906414, "percentage": 82.39, "elapsed_time": "5:34:28", "remaining_time": "1:11:28"} +{"current_steps": 5490, "total_steps": 6657, "loss": 0.0598, "lr": 3.6355283173158153e-06, "epoch": 5.7728706624605675, "percentage": 82.47, "elapsed_time": "5:34:48", "remaining_time": "1:11:10"} +{"current_steps": 5495, "total_steps": 6657, "loss": 0.0791, "lr": 3.6054376794930467e-06, "epoch": 5.778128286014722, "percentage": 82.54, "elapsed_time": "5:35:13", "remaining_time": "1:10:53"} +{"current_steps": 5500, "total_steps": 6657, "loss": 0.0695, "lr": 3.5754597460240216e-06, "epoch": 5.783385909568874, "percentage": 82.62, "elapsed_time": "5:35:29", "remaining_time": "1:10:34"} +{"current_steps": 5505, "total_steps": 6657, "loss": 0.0671, "lr": 3.5455947229919185e-06, "epoch": 5.788643533123029, "percentage": 82.69, "elapsed_time": "5:35:47", "remaining_time": "1:10:16"} +{"current_steps": 5510, "total_steps": 6657, "loss": 0.0684, "lr": 3.515842815703716e-06, "epoch": 5.793901156677181, "percentage": 82.77, "elapsed_time": "5:36:06", "remaining_time": "1:09:57"} +{"current_steps": 5515, "total_steps": 6657, "loss": 0.0745, "lr": 3.4862042286887943e-06, "epoch": 5.799158780231336, "percentage": 82.85, "elapsed_time": "5:36:23", "remaining_time": "1:09:39"} +{"current_steps": 5520, "total_steps": 6657, "loss": 0.083, "lr": 3.456679165697494e-06, "epoch": 5.804416403785489, "percentage": 82.92, "elapsed_time": "5:36:49", "remaining_time": "1:09:22"} +{"current_steps": 5525, "total_steps": 6657, "loss": 0.0715, "lr": 3.427267829699741e-06, "epoch": 5.809674027339643, "percentage": 83.0, "elapsed_time": "5:37:08", "remaining_time": "1:09:04"} +{"current_steps": 5530, "total_steps": 6657, "loss": 0.0804, "lr": 3.3979704228836586e-06, "epoch": 5.814931650893796, "percentage": 83.07, "elapsed_time": "5:37:24", "remaining_time": "1:08:45"} +{"current_steps": 5535, "total_steps": 6657, "loss": 0.088, "lr": 3.3687871466541424e-06, "epoch": 5.82018927444795, "percentage": 83.15, "elapsed_time": "5:37:49", "remaining_time": "1:08:28"} +{"current_steps": 5540, "total_steps": 6657, "loss": 0.0716, "lr": 3.339718201631521e-06, "epoch": 5.825446898002103, "percentage": 83.22, "elapsed_time": "5:38:05", "remaining_time": "1:08:10"} +{"current_steps": 5545, "total_steps": 6657, "loss": 0.0591, "lr": 3.3107637876501352e-06, "epoch": 5.830704521556257, "percentage": 83.3, "elapsed_time": "5:38:22", "remaining_time": "1:07:51"} +{"current_steps": 5550, "total_steps": 6657, "loss": 0.0713, "lr": 3.2819241037569838e-06, "epoch": 5.83596214511041, "percentage": 83.37, "elapsed_time": "5:38:38", "remaining_time": "1:07:32"} +{"current_steps": 5555, "total_steps": 6657, "loss": 0.0957, "lr": 3.253199348210372e-06, "epoch": 5.841219768664564, "percentage": 83.45, "elapsed_time": "5:39:07", "remaining_time": "1:07:16"} +{"current_steps": 5560, "total_steps": 6657, "loss": 0.0576, "lr": 3.2245897184785103e-06, "epoch": 5.846477392218717, "percentage": 83.52, "elapsed_time": "5:39:22", "remaining_time": "1:06:57"} +{"current_steps": 5565, "total_steps": 6657, "loss": 0.0723, "lr": 3.1960954112381825e-06, "epoch": 5.851735015772871, "percentage": 83.6, "elapsed_time": "5:39:51", "remaining_time": "1:06:41"} +{"current_steps": 5570, "total_steps": 6657, "loss": 0.0696, "lr": 3.1677166223733934e-06, "epoch": 5.856992639327024, "percentage": 83.67, "elapsed_time": "5:40:10", "remaining_time": "1:06:23"} +{"current_steps": 5575, "total_steps": 6657, "loss": 0.0571, "lr": 3.1394535469740273e-06, "epoch": 5.862250262881178, "percentage": 83.75, "elapsed_time": "5:40:27", "remaining_time": "1:06:04"} +{"current_steps": 5580, "total_steps": 6657, "loss": 0.0786, "lr": 3.111306379334462e-06, "epoch": 5.867507886435331, "percentage": 83.82, "elapsed_time": "5:40:46", "remaining_time": "1:05:46"} +{"current_steps": 5585, "total_steps": 6657, "loss": 0.0642, "lr": 3.083275312952301e-06, "epoch": 5.872765509989485, "percentage": 83.9, "elapsed_time": "5:41:07", "remaining_time": "1:05:28"} +{"current_steps": 5590, "total_steps": 6657, "loss": 0.0953, "lr": 3.055360540527006e-06, "epoch": 5.878023133543638, "percentage": 83.97, "elapsed_time": "5:41:31", "remaining_time": "1:05:11"} +{"current_steps": 5595, "total_steps": 6657, "loss": 0.0888, "lr": 3.0275622539585556e-06, "epoch": 5.883280757097792, "percentage": 84.05, "elapsed_time": "5:41:50", "remaining_time": "1:04:53"} +{"current_steps": 5600, "total_steps": 6657, "loss": 0.0674, "lr": 2.999880644346165e-06, "epoch": 5.888538380651945, "percentage": 84.12, "elapsed_time": "5:42:07", "remaining_time": "1:04:34"} +{"current_steps": 5605, "total_steps": 6657, "loss": 0.0757, "lr": 2.9723159019869597e-06, "epoch": 5.893796004206099, "percentage": 84.2, "elapsed_time": "5:43:23", "remaining_time": "1:04:27"} +{"current_steps": 5610, "total_steps": 6657, "loss": 0.0693, "lr": 2.9448682163746413e-06, "epoch": 5.899053627760252, "percentage": 84.27, "elapsed_time": "5:43:40", "remaining_time": "1:04:08"} +{"current_steps": 5615, "total_steps": 6657, "loss": 0.0699, "lr": 2.917537776198216e-06, "epoch": 5.904311251314406, "percentage": 84.35, "elapsed_time": "5:43:55", "remaining_time": "1:03:49"} +{"current_steps": 5620, "total_steps": 6657, "loss": 0.0932, "lr": 2.8903247693406932e-06, "epoch": 5.909568874868559, "percentage": 84.42, "elapsed_time": "5:44:20", "remaining_time": "1:03:32"} +{"current_steps": 5625, "total_steps": 6657, "loss": 0.0951, "lr": 2.863229382877777e-06, "epoch": 5.914826498422713, "percentage": 84.5, "elapsed_time": "5:44:43", "remaining_time": "1:03:14"} +{"current_steps": 5630, "total_steps": 6657, "loss": 0.076, "lr": 2.8362518030765904e-06, "epoch": 5.920084121976867, "percentage": 84.57, "elapsed_time": "5:45:08", "remaining_time": "1:02:57"} +{"current_steps": 5635, "total_steps": 6657, "loss": 0.0763, "lr": 2.8093922153944065e-06, "epoch": 5.9253417455310196, "percentage": 84.65, "elapsed_time": "5:45:28", "remaining_time": "1:02:39"} +{"current_steps": 5640, "total_steps": 6657, "loss": 0.0686, "lr": 2.782650804477347e-06, "epoch": 5.930599369085174, "percentage": 84.72, "elapsed_time": "5:45:44", "remaining_time": "1:02:20"} +{"current_steps": 5645, "total_steps": 6657, "loss": 0.0652, "lr": 2.7560277541591427e-06, "epoch": 5.9358569926393265, "percentage": 84.8, "elapsed_time": "5:45:59", "remaining_time": "1:02:01"} +{"current_steps": 5650, "total_steps": 6657, "loss": 0.0655, "lr": 2.7295232474598445e-06, "epoch": 5.941114616193481, "percentage": 84.87, "elapsed_time": "5:46:15", "remaining_time": "1:01:42"} +{"current_steps": 5655, "total_steps": 6657, "loss": 0.0798, "lr": 2.703137466584571e-06, "epoch": 5.946372239747634, "percentage": 84.95, "elapsed_time": "5:46:31", "remaining_time": "1:01:23"} +{"current_steps": 5660, "total_steps": 6657, "loss": 0.0645, "lr": 2.6768705929222827e-06, "epoch": 5.951629863301788, "percentage": 85.02, "elapsed_time": "5:46:47", "remaining_time": "1:01:05"} +{"current_steps": 5665, "total_steps": 6657, "loss": 0.0643, "lr": 2.6507228070444922e-06, "epoch": 5.956887486855941, "percentage": 85.1, "elapsed_time": "5:47:05", "remaining_time": "1:00:46"} +{"current_steps": 5670, "total_steps": 6657, "loss": 0.0765, "lr": 2.6246942887040416e-06, "epoch": 5.962145110410095, "percentage": 85.17, "elapsed_time": "5:47:22", "remaining_time": "1:00:28"} +{"current_steps": 5675, "total_steps": 6657, "loss": 0.0616, "lr": 2.5987852168338922e-06, "epoch": 5.967402733964248, "percentage": 85.25, "elapsed_time": "5:47:48", "remaining_time": "1:00:11"} +{"current_steps": 5680, "total_steps": 6657, "loss": 0.0646, "lr": 2.5729957695458454e-06, "epoch": 5.972660357518402, "percentage": 85.32, "elapsed_time": "5:48:07", "remaining_time": "0:59:52"} +{"current_steps": 5685, "total_steps": 6657, "loss": 0.0692, "lr": 2.5473261241293547e-06, "epoch": 5.977917981072555, "percentage": 85.4, "elapsed_time": "5:48:31", "remaining_time": "0:59:35"} +{"current_steps": 5690, "total_steps": 6657, "loss": 0.1572, "lr": 2.521776457050302e-06, "epoch": 5.983175604626709, "percentage": 85.47, "elapsed_time": "5:49:12", "remaining_time": "0:59:20"} +{"current_steps": 5695, "total_steps": 6657, "loss": 0.0632, "lr": 2.4963469439497703e-06, "epoch": 5.988433228180862, "percentage": 85.55, "elapsed_time": "5:49:28", "remaining_time": "0:59:02"} +{"current_steps": 5700, "total_steps": 6657, "loss": 0.0734, "lr": 2.4710377596428404e-06, "epoch": 5.993690851735016, "percentage": 85.62, "elapsed_time": "5:49:51", "remaining_time": "0:58:44"} +{"current_steps": 5705, "total_steps": 6657, "loss": 0.0968, "lr": 2.4458490781174084e-06, "epoch": 5.998948475289169, "percentage": 85.7, "elapsed_time": "5:50:14", "remaining_time": "0:58:26"} +{"current_steps": 5710, "total_steps": 6657, "loss": 0.1304, "lr": 2.4207810725329583e-06, "epoch": 6.004206098843323, "percentage": 85.77, "elapsed_time": "5:50:27", "remaining_time": "0:58:07"} +{"current_steps": 5715, "total_steps": 6657, "loss": 0.1315, "lr": 2.395833915219401e-06, "epoch": 6.009463722397476, "percentage": 85.85, "elapsed_time": "5:50:38", "remaining_time": "0:57:47"} +{"current_steps": 5720, "total_steps": 6657, "loss": 0.1261, "lr": 2.3710077776758713e-06, "epoch": 6.01472134595163, "percentage": 85.92, "elapsed_time": "5:50:51", "remaining_time": "0:57:28"} +{"current_steps": 5725, "total_steps": 6657, "loss": 0.1207, "lr": 2.3463028305695447e-06, "epoch": 6.019978969505783, "percentage": 86.0, "elapsed_time": "5:51:02", "remaining_time": "0:57:08"} +{"current_steps": 5730, "total_steps": 6657, "loss": 0.1244, "lr": 2.3217192437344925e-06, "epoch": 6.025236593059937, "percentage": 86.07, "elapsed_time": "5:51:15", "remaining_time": "0:56:49"} +{"current_steps": 5735, "total_steps": 6657, "loss": 0.1315, "lr": 2.2972571861704784e-06, "epoch": 6.03049421661409, "percentage": 86.15, "elapsed_time": "5:51:26", "remaining_time": "0:56:30"} +{"current_steps": 5740, "total_steps": 6657, "loss": 0.1153, "lr": 2.2729168260418224e-06, "epoch": 6.035751840168244, "percentage": 86.23, "elapsed_time": "5:51:38", "remaining_time": "0:56:10"} +{"current_steps": 5745, "total_steps": 6657, "loss": 0.1312, "lr": 2.2486983306762332e-06, "epoch": 6.041009463722397, "percentage": 86.3, "elapsed_time": "5:51:50", "remaining_time": "0:55:51"} +{"current_steps": 5750, "total_steps": 6657, "loss": 0.1168, "lr": 2.224601866563665e-06, "epoch": 6.046267087276551, "percentage": 86.38, "elapsed_time": "5:52:01", "remaining_time": "0:55:31"} +{"current_steps": 5755, "total_steps": 6657, "loss": 0.1251, "lr": 2.2006275993551563e-06, "epoch": 6.051524710830704, "percentage": 86.45, "elapsed_time": "5:52:14", "remaining_time": "0:55:12"} +{"current_steps": 5760, "total_steps": 6657, "loss": 0.1132, "lr": 2.176775693861719e-06, "epoch": 6.056782334384858, "percentage": 86.53, "elapsed_time": "5:52:26", "remaining_time": "0:54:53"} +{"current_steps": 5765, "total_steps": 6657, "loss": 0.1291, "lr": 2.1530463140531886e-06, "epoch": 6.062039957939011, "percentage": 86.6, "elapsed_time": "5:52:39", "remaining_time": "0:54:33"} +{"current_steps": 5770, "total_steps": 6657, "loss": 0.113, "lr": 2.129439623057077e-06, "epoch": 6.067297581493165, "percentage": 86.68, "elapsed_time": "5:52:50", "remaining_time": "0:54:14"} +{"current_steps": 5775, "total_steps": 6657, "loss": 0.1138, "lr": 2.105955783157498e-06, "epoch": 6.072555205047319, "percentage": 86.75, "elapsed_time": "5:53:02", "remaining_time": "0:53:55"} +{"current_steps": 5780, "total_steps": 6657, "loss": 0.1054, "lr": 2.0825949557940174e-06, "epoch": 6.0778128286014725, "percentage": 86.83, "elapsed_time": "5:53:13", "remaining_time": "0:53:35"} +{"current_steps": 5785, "total_steps": 6657, "loss": 0.1194, "lr": 2.059357301560547e-06, "epoch": 6.083070452155626, "percentage": 86.9, "elapsed_time": "5:53:26", "remaining_time": "0:53:16"} +{"current_steps": 5790, "total_steps": 6657, "loss": 0.124, "lr": 2.036242980204244e-06, "epoch": 6.0883280757097795, "percentage": 86.98, "elapsed_time": "5:53:37", "remaining_time": "0:52:57"} +{"current_steps": 5795, "total_steps": 6657, "loss": 0.1277, "lr": 2.0132521506244294e-06, "epoch": 6.093585699263933, "percentage": 87.05, "elapsed_time": "5:53:50", "remaining_time": "0:52:38"} +{"current_steps": 5800, "total_steps": 6657, "loss": 0.1158, "lr": 1.9903849708714664e-06, "epoch": 6.0988433228180865, "percentage": 87.13, "elapsed_time": "5:54:03", "remaining_time": "0:52:18"} +{"current_steps": 5805, "total_steps": 6657, "loss": 0.1121, "lr": 1.967641598145684e-06, "epoch": 6.10410094637224, "percentage": 87.2, "elapsed_time": "5:55:10", "remaining_time": "0:52:07"} +{"current_steps": 5810, "total_steps": 6657, "loss": 0.1175, "lr": 1.9450221887963194e-06, "epoch": 6.1093585699263935, "percentage": 87.28, "elapsed_time": "5:55:24", "remaining_time": "0:51:48"} +{"current_steps": 5815, "total_steps": 6657, "loss": 0.1219, "lr": 1.922526898320407e-06, "epoch": 6.114616193480547, "percentage": 87.35, "elapsed_time": "5:55:38", "remaining_time": "0:51:29"} +{"current_steps": 5820, "total_steps": 6657, "loss": 0.1109, "lr": 1.900155881361727e-06, "epoch": 6.1198738170347005, "percentage": 87.43, "elapsed_time": "5:55:50", "remaining_time": "0:51:10"} +{"current_steps": 5825, "total_steps": 6657, "loss": 0.1081, "lr": 1.8779092917097564e-06, "epoch": 6.125131440588854, "percentage": 87.5, "elapsed_time": "5:56:02", "remaining_time": "0:50:51"} +{"current_steps": 5830, "total_steps": 6657, "loss": 0.1112, "lr": 1.85578728229858e-06, "epoch": 6.130389064143007, "percentage": 87.58, "elapsed_time": "5:56:14", "remaining_time": "0:50:31"} +{"current_steps": 5835, "total_steps": 6657, "loss": 0.1133, "lr": 1.8337900052058732e-06, "epoch": 6.135646687697161, "percentage": 87.65, "elapsed_time": "5:56:26", "remaining_time": "0:50:12"} +{"current_steps": 5840, "total_steps": 6657, "loss": 0.1186, "lr": 1.811917611651821e-06, "epoch": 6.140904311251314, "percentage": 87.73, "elapsed_time": "5:56:38", "remaining_time": "0:49:53"} +{"current_steps": 5845, "total_steps": 6657, "loss": 0.1184, "lr": 1.7901702519981068e-06, "epoch": 6.146161934805468, "percentage": 87.8, "elapsed_time": "5:56:51", "remaining_time": "0:49:34"} +{"current_steps": 5850, "total_steps": 6657, "loss": 0.1094, "lr": 1.7685480757468765e-06, "epoch": 6.151419558359621, "percentage": 87.88, "elapsed_time": "5:57:04", "remaining_time": "0:49:15"} +{"current_steps": 5855, "total_steps": 6657, "loss": 0.1169, "lr": 1.7470512315396894e-06, "epoch": 6.156677181913775, "percentage": 87.95, "elapsed_time": "5:57:15", "remaining_time": "0:48:56"} +{"current_steps": 5860, "total_steps": 6657, "loss": 0.1146, "lr": 1.7256798671565111e-06, "epoch": 6.161934805467928, "percentage": 88.03, "elapsed_time": "5:57:27", "remaining_time": "0:48:36"} +{"current_steps": 5865, "total_steps": 6657, "loss": 0.107, "lr": 1.7044341295147116e-06, "epoch": 6.167192429022082, "percentage": 88.1, "elapsed_time": "5:57:39", "remaining_time": "0:48:17"} +{"current_steps": 5870, "total_steps": 6657, "loss": 0.1095, "lr": 1.683314164668024e-06, "epoch": 6.172450052576235, "percentage": 88.18, "elapsed_time": "5:57:51", "remaining_time": "0:47:58"} +{"current_steps": 5875, "total_steps": 6657, "loss": 0.1042, "lr": 1.6623201178055603e-06, "epoch": 6.177707676130389, "percentage": 88.25, "elapsed_time": "5:58:02", "remaining_time": "0:47:39"} +{"current_steps": 5880, "total_steps": 6657, "loss": 0.108, "lr": 1.6414521332508183e-06, "epoch": 6.182965299684542, "percentage": 88.33, "elapsed_time": "5:58:14", "remaining_time": "0:47:20"} +{"current_steps": 5885, "total_steps": 6657, "loss": 0.1074, "lr": 1.6207103544606795e-06, "epoch": 6.188222923238696, "percentage": 88.4, "elapsed_time": "5:58:26", "remaining_time": "0:47:01"} +{"current_steps": 5890, "total_steps": 6657, "loss": 0.1065, "lr": 1.6000949240244047e-06, "epoch": 6.193480546792849, "percentage": 88.48, "elapsed_time": "5:58:37", "remaining_time": "0:46:42"} +{"current_steps": 5895, "total_steps": 6657, "loss": 0.11, "lr": 1.5796059836626998e-06, "epoch": 6.198738170347003, "percentage": 88.55, "elapsed_time": "5:58:49", "remaining_time": "0:46:22"} +{"current_steps": 5900, "total_steps": 6657, "loss": 0.1085, "lr": 1.5592436742267048e-06, "epoch": 6.203995793901156, "percentage": 88.63, "elapsed_time": "5:59:01", "remaining_time": "0:46:03"} +{"current_steps": 5905, "total_steps": 6657, "loss": 0.1138, "lr": 1.5390081356970331e-06, "epoch": 6.20925341745531, "percentage": 88.7, "elapsed_time": "5:59:13", "remaining_time": "0:45:44"} +{"current_steps": 5910, "total_steps": 6657, "loss": 0.1084, "lr": 1.5188995071828117e-06, "epoch": 6.214511041009464, "percentage": 88.78, "elapsed_time": "5:59:26", "remaining_time": "0:45:25"} +{"current_steps": 5915, "total_steps": 6657, "loss": 0.1175, "lr": 1.498917926920731e-06, "epoch": 6.219768664563618, "percentage": 88.85, "elapsed_time": "5:59:38", "remaining_time": "0:45:06"} +{"current_steps": 5920, "total_steps": 6657, "loss": 0.1132, "lr": 1.4790635322740855e-06, "epoch": 6.225026288117771, "percentage": 88.93, "elapsed_time": "5:59:51", "remaining_time": "0:44:48"} +{"current_steps": 5925, "total_steps": 6657, "loss": 0.1154, "lr": 1.4593364597318305e-06, "epoch": 6.230283911671925, "percentage": 89.0, "elapsed_time": "6:00:03", "remaining_time": "0:44:28"} +{"current_steps": 5930, "total_steps": 6657, "loss": 0.1063, "lr": 1.4397368449076443e-06, "epoch": 6.235541535226078, "percentage": 89.08, "elapsed_time": "6:00:16", "remaining_time": "0:44:10"} +{"current_steps": 5935, "total_steps": 6657, "loss": 0.109, "lr": 1.4202648225390103e-06, "epoch": 6.240799158780232, "percentage": 89.15, "elapsed_time": "6:00:27", "remaining_time": "0:43:51"} +{"current_steps": 5940, "total_steps": 6657, "loss": 0.1106, "lr": 1.4009205264862646e-06, "epoch": 6.246056782334385, "percentage": 89.23, "elapsed_time": "6:00:39", "remaining_time": "0:43:32"} +{"current_steps": 5945, "total_steps": 6657, "loss": 0.103, "lr": 1.3817040897316903e-06, "epoch": 6.251314405888539, "percentage": 89.3, "elapsed_time": "6:00:50", "remaining_time": "0:43:13"} +{"current_steps": 5950, "total_steps": 6657, "loss": 0.1058, "lr": 1.362615644378611e-06, "epoch": 6.256572029442692, "percentage": 89.38, "elapsed_time": "6:01:02", "remaining_time": "0:42:53"} +{"current_steps": 5955, "total_steps": 6657, "loss": 0.1102, "lr": 1.3436553216504721e-06, "epoch": 6.261829652996846, "percentage": 89.45, "elapsed_time": "6:01:14", "remaining_time": "0:42:35"} +{"current_steps": 5960, "total_steps": 6657, "loss": 0.1118, "lr": 1.324823251889924e-06, "epoch": 6.267087276550999, "percentage": 89.53, "elapsed_time": "6:01:26", "remaining_time": "0:42:16"} +{"current_steps": 5965, "total_steps": 6657, "loss": 0.1112, "lr": 1.3061195645579661e-06, "epoch": 6.2723449001051526, "percentage": 89.6, "elapsed_time": "6:01:38", "remaining_time": "0:41:57"} +{"current_steps": 5970, "total_steps": 6657, "loss": 0.107, "lr": 1.2875443882330218e-06, "epoch": 6.277602523659306, "percentage": 89.68, "elapsed_time": "6:01:50", "remaining_time": "0:41:38"} +{"current_steps": 5975, "total_steps": 6657, "loss": 0.1148, "lr": 1.269097850610066e-06, "epoch": 6.2828601472134595, "percentage": 89.76, "elapsed_time": "6:02:03", "remaining_time": "0:41:19"} +{"current_steps": 5980, "total_steps": 6657, "loss": 0.1075, "lr": 1.250780078499747e-06, "epoch": 6.288117770767613, "percentage": 89.83, "elapsed_time": "6:02:15", "remaining_time": "0:41:00"} +{"current_steps": 5985, "total_steps": 6657, "loss": 0.1072, "lr": 1.2325911978275196e-06, "epoch": 6.2933753943217665, "percentage": 89.91, "elapsed_time": "6:02:26", "remaining_time": "0:40:41"} +{"current_steps": 5990, "total_steps": 6657, "loss": 0.1062, "lr": 1.214531333632769e-06, "epoch": 6.29863301787592, "percentage": 89.98, "elapsed_time": "6:02:39", "remaining_time": "0:40:22"} +{"current_steps": 5995, "total_steps": 6657, "loss": 0.1029, "lr": 1.1966006100679596e-06, "epoch": 6.3038906414300735, "percentage": 90.06, "elapsed_time": "6:02:50", "remaining_time": "0:40:04"} +{"current_steps": 6000, "total_steps": 6657, "loss": 0.1098, "lr": 1.1787991503977846e-06, "epoch": 6.309148264984227, "percentage": 90.13, "elapsed_time": "6:03:03", "remaining_time": "0:39:45"} +{"current_steps": 6005, "total_steps": 6657, "loss": 0.1131, "lr": 1.1611270769983051e-06, "epoch": 6.3144058885383805, "percentage": 90.21, "elapsed_time": "6:04:22", "remaining_time": "0:39:33"} +{"current_steps": 6010, "total_steps": 6657, "loss": 0.1023, "lr": 1.143584511356115e-06, "epoch": 6.319663512092534, "percentage": 90.28, "elapsed_time": "6:04:33", "remaining_time": "0:39:14"} +{"current_steps": 6015, "total_steps": 6657, "loss": 0.1092, "lr": 1.1261715740675205e-06, "epoch": 6.3249211356466875, "percentage": 90.36, "elapsed_time": "6:04:45", "remaining_time": "0:38:55"} +{"current_steps": 6020, "total_steps": 6657, "loss": 0.1101, "lr": 1.108888384837683e-06, "epoch": 6.330178759200841, "percentage": 90.43, "elapsed_time": "6:04:57", "remaining_time": "0:38:37"} +{"current_steps": 6025, "total_steps": 6657, "loss": 0.1068, "lr": 1.0917350624798262e-06, "epoch": 6.335436382754994, "percentage": 90.51, "elapsed_time": "6:05:09", "remaining_time": "0:38:18"} +{"current_steps": 6030, "total_steps": 6657, "loss": 0.1118, "lr": 1.07471172491439e-06, "epoch": 6.340694006309148, "percentage": 90.58, "elapsed_time": "6:05:21", "remaining_time": "0:37:59"} +{"current_steps": 6035, "total_steps": 6657, "loss": 0.1047, "lr": 1.0578184891682408e-06, "epoch": 6.345951629863301, "percentage": 90.66, "elapsed_time": "6:05:32", "remaining_time": "0:37:40"} +{"current_steps": 6040, "total_steps": 6657, "loss": 0.1097, "lr": 1.041055471373864e-06, "epoch": 6.351209253417455, "percentage": 90.73, "elapsed_time": "6:05:44", "remaining_time": "0:37:21"} +{"current_steps": 6045, "total_steps": 6657, "loss": 0.1032, "lr": 1.0244227867685597e-06, "epoch": 6.356466876971609, "percentage": 90.81, "elapsed_time": "6:05:56", "remaining_time": "0:37:02"} +{"current_steps": 6050, "total_steps": 6657, "loss": 0.121, "lr": 1.0079205496936484e-06, "epoch": 6.361724500525763, "percentage": 90.88, "elapsed_time": "6:06:08", "remaining_time": "0:36:44"} +{"current_steps": 6055, "total_steps": 6657, "loss": 0.1065, "lr": 9.915488735936995e-07, "epoch": 6.366982124079916, "percentage": 90.96, "elapsed_time": "6:06:22", "remaining_time": "0:36:25"} +{"current_steps": 6060, "total_steps": 6657, "loss": 0.1102, "lr": 9.753078710157316e-07, "epoch": 6.37223974763407, "percentage": 91.03, "elapsed_time": "6:06:34", "remaining_time": "0:36:06"} +{"current_steps": 6065, "total_steps": 6657, "loss": 0.1068, "lr": 9.59197653608448e-07, "epoch": 6.377497371188223, "percentage": 91.11, "elapsed_time": "6:06:45", "remaining_time": "0:35:47"} +{"current_steps": 6070, "total_steps": 6657, "loss": 0.1024, "lr": 9.432183321214805e-07, "epoch": 6.382754994742377, "percentage": 91.18, "elapsed_time": "6:06:57", "remaining_time": "0:35:29"} +{"current_steps": 6075, "total_steps": 6657, "loss": 0.11, "lr": 9.273700164046162e-07, "epoch": 6.38801261829653, "percentage": 91.26, "elapsed_time": "6:07:09", "remaining_time": "0:35:10"} +{"current_steps": 6080, "total_steps": 6657, "loss": 0.1019, "lr": 9.11652815407027e-07, "epoch": 6.393270241850684, "percentage": 91.33, "elapsed_time": "6:07:21", "remaining_time": "0:34:51"} +{"current_steps": 6085, "total_steps": 6657, "loss": 0.1033, "lr": 8.960668371765569e-07, "epoch": 6.398527865404837, "percentage": 91.41, "elapsed_time": "6:07:33", "remaining_time": "0:34:33"} +{"current_steps": 6090, "total_steps": 6657, "loss": 0.1074, "lr": 8.806121888589492e-07, "epoch": 6.403785488958991, "percentage": 91.48, "elapsed_time": "6:07:44", "remaining_time": "0:34:14"} +{"current_steps": 6095, "total_steps": 6657, "loss": 0.1148, "lr": 8.652889766971229e-07, "epoch": 6.409043112513144, "percentage": 91.56, "elapsed_time": "6:07:56", "remaining_time": "0:33:55"} +{"current_steps": 6100, "total_steps": 6657, "loss": 0.107, "lr": 8.500973060304374e-07, "epoch": 6.414300736067298, "percentage": 91.63, "elapsed_time": "6:08:08", "remaining_time": "0:33:36"} +{"current_steps": 6105, "total_steps": 6657, "loss": 0.1038, "lr": 8.350372812939778e-07, "epoch": 6.419558359621451, "percentage": 91.71, "elapsed_time": "6:08:23", "remaining_time": "0:33:18"} +{"current_steps": 6110, "total_steps": 6657, "loss": 0.1047, "lr": 8.201090060178174e-07, "epoch": 6.424815983175605, "percentage": 91.78, "elapsed_time": "6:08:38", "remaining_time": "0:33:00"} +{"current_steps": 6115, "total_steps": 6657, "loss": 0.0963, "lr": 8.053125828263297e-07, "epoch": 6.430073606729758, "percentage": 91.86, "elapsed_time": "6:08:49", "remaining_time": "0:32:41"} +{"current_steps": 6120, "total_steps": 6657, "loss": 0.1071, "lr": 7.906481134374688e-07, "epoch": 6.435331230283912, "percentage": 91.93, "elapsed_time": "6:09:00", "remaining_time": "0:32:22"} +{"current_steps": 6125, "total_steps": 6657, "loss": 0.1089, "lr": 7.761156986620677e-07, "epoch": 6.440588853838065, "percentage": 92.01, "elapsed_time": "6:09:13", "remaining_time": "0:32:04"} +{"current_steps": 6130, "total_steps": 6657, "loss": 0.1066, "lr": 7.617154384031545e-07, "epoch": 6.445846477392219, "percentage": 92.08, "elapsed_time": "6:09:25", "remaining_time": "0:31:45"} +{"current_steps": 6135, "total_steps": 6657, "loss": 0.1074, "lr": 7.474474316552638e-07, "epoch": 6.451104100946372, "percentage": 92.16, "elapsed_time": "6:09:37", "remaining_time": "0:31:26"} +{"current_steps": 6140, "total_steps": 6657, "loss": 0.1031, "lr": 7.33311776503749e-07, "epoch": 6.456361724500526, "percentage": 92.23, "elapsed_time": "6:09:49", "remaining_time": "0:31:08"} +{"current_steps": 6145, "total_steps": 6657, "loss": 0.1066, "lr": 7.193085701241175e-07, "epoch": 6.461619348054679, "percentage": 92.31, "elapsed_time": "6:10:01", "remaining_time": "0:30:49"} +{"current_steps": 6150, "total_steps": 6657, "loss": 0.1009, "lr": 7.054379087813568e-07, "epoch": 6.466876971608833, "percentage": 92.38, "elapsed_time": "6:10:21", "remaining_time": "0:30:31"} +{"current_steps": 6155, "total_steps": 6657, "loss": 0.0786, "lr": 6.916998878292691e-07, "epoch": 6.472134595162986, "percentage": 92.46, "elapsed_time": "6:10:41", "remaining_time": "0:30:14"} +{"current_steps": 6160, "total_steps": 6657, "loss": 0.0683, "lr": 6.780946017098289e-07, "epoch": 6.4773922187171395, "percentage": 92.53, "elapsed_time": "6:11:01", "remaining_time": "0:29:56"} +{"current_steps": 6165, "total_steps": 6657, "loss": 0.0853, "lr": 6.646221439525225e-07, "epoch": 6.482649842271293, "percentage": 92.61, "elapsed_time": "6:11:18", "remaining_time": "0:29:37"} +{"current_steps": 6170, "total_steps": 6657, "loss": 0.0697, "lr": 6.512826071737021e-07, "epoch": 6.4879074658254465, "percentage": 92.68, "elapsed_time": "6:11:40", "remaining_time": "0:29:20"} +{"current_steps": 6175, "total_steps": 6657, "loss": 0.0723, "lr": 6.380760830759669e-07, "epoch": 6.4931650893796, "percentage": 92.76, "elapsed_time": "6:11:58", "remaining_time": "0:29:02"} +{"current_steps": 6180, "total_steps": 6657, "loss": 0.1223, "lr": 6.250026624475092e-07, "epoch": 6.498422712933754, "percentage": 92.83, "elapsed_time": "6:12:19", "remaining_time": "0:28:44"} +{"current_steps": 6185, "total_steps": 6657, "loss": 0.0732, "lr": 6.12062435161509e-07, "epoch": 6.503680336487907, "percentage": 92.91, "elapsed_time": "6:12:37", "remaining_time": "0:28:26"} +{"current_steps": 6190, "total_steps": 6657, "loss": 0.0869, "lr": 5.992554901755121e-07, "epoch": 6.508937960042061, "percentage": 92.98, "elapsed_time": "6:12:59", "remaining_time": "0:28:08"} +{"current_steps": 6195, "total_steps": 6657, "loss": 0.0938, "lr": 5.865819155308039e-07, "epoch": 6.514195583596215, "percentage": 93.06, "elapsed_time": "6:13:26", "remaining_time": "0:27:50"} +{"current_steps": 6200, "total_steps": 6657, "loss": 0.0675, "lr": 5.740417983518253e-07, "epoch": 6.519453207150368, "percentage": 93.14, "elapsed_time": "6:13:48", "remaining_time": "0:27:33"} +{"current_steps": 6205, "total_steps": 6657, "loss": 0.0725, "lr": 5.61635224845567e-07, "epoch": 6.524710830704522, "percentage": 93.21, "elapsed_time": "6:15:05", "remaining_time": "0:27:19"} +{"current_steps": 6210, "total_steps": 6657, "loss": 0.07, "lr": 5.493622803009602e-07, "epoch": 6.529968454258675, "percentage": 93.29, "elapsed_time": "6:15:28", "remaining_time": "0:27:01"} +{"current_steps": 6215, "total_steps": 6657, "loss": 0.105, "lr": 5.372230490883246e-07, "epoch": 6.535226077812829, "percentage": 93.36, "elapsed_time": "6:15:50", "remaining_time": "0:26:43"} +{"current_steps": 6220, "total_steps": 6657, "loss": 0.0636, "lr": 5.252176146587484e-07, "epoch": 6.540483701366982, "percentage": 93.44, "elapsed_time": "6:16:06", "remaining_time": "0:26:25"} +{"current_steps": 6225, "total_steps": 6657, "loss": 0.0734, "lr": 5.133460595435447e-07, "epoch": 6.545741324921136, "percentage": 93.51, "elapsed_time": "6:16:28", "remaining_time": "0:26:07"} +{"current_steps": 6230, "total_steps": 6657, "loss": 0.0581, "lr": 5.016084653536756e-07, "epoch": 6.550998948475289, "percentage": 93.59, "elapsed_time": "6:16:46", "remaining_time": "0:25:49"} +{"current_steps": 6235, "total_steps": 6657, "loss": 0.0814, "lr": 4.900049127791851e-07, "epoch": 6.556256572029443, "percentage": 93.66, "elapsed_time": "6:17:03", "remaining_time": "0:25:31"} +{"current_steps": 6240, "total_steps": 6657, "loss": 0.0758, "lr": 4.785354815886445e-07, "epoch": 6.561514195583596, "percentage": 93.74, "elapsed_time": "6:17:26", "remaining_time": "0:25:13"} +{"current_steps": 6245, "total_steps": 6657, "loss": 0.0811, "lr": 4.6720025062862106e-07, "epoch": 6.56677181913775, "percentage": 93.81, "elapsed_time": "6:17:46", "remaining_time": "0:24:55"} +{"current_steps": 6250, "total_steps": 6657, "loss": 0.0812, "lr": 4.559992978231087e-07, "epoch": 6.572029442691903, "percentage": 93.89, "elapsed_time": "6:18:05", "remaining_time": "0:24:37"} +{"current_steps": 6255, "total_steps": 6657, "loss": 0.1107, "lr": 4.4493270017301305e-07, "epoch": 6.577287066246057, "percentage": 93.96, "elapsed_time": "6:18:28", "remaining_time": "0:24:19"} +{"current_steps": 6260, "total_steps": 6657, "loss": 0.0637, "lr": 4.340005337556186e-07, "epoch": 6.58254468980021, "percentage": 94.04, "elapsed_time": "6:18:43", "remaining_time": "0:24:01"} +{"current_steps": 6265, "total_steps": 6657, "loss": 0.0847, "lr": 4.232028737240623e-07, "epoch": 6.587802313354364, "percentage": 94.11, "elapsed_time": "6:18:59", "remaining_time": "0:23:42"} +{"current_steps": 6270, "total_steps": 6657, "loss": 0.0913, "lr": 4.125397943068099e-07, "epoch": 6.593059936908517, "percentage": 94.19, "elapsed_time": "6:19:22", "remaining_time": "0:23:24"} +{"current_steps": 6275, "total_steps": 6657, "loss": 0.0797, "lr": 4.0201136880716027e-07, "epoch": 6.598317560462671, "percentage": 94.26, "elapsed_time": "6:19:48", "remaining_time": "0:23:07"} +{"current_steps": 6280, "total_steps": 6657, "loss": 0.1396, "lr": 3.9161766960273517e-07, "epoch": 6.603575184016824, "percentage": 94.34, "elapsed_time": "6:20:12", "remaining_time": "0:22:49"} +{"current_steps": 6285, "total_steps": 6657, "loss": 0.2018, "lr": 3.8135876814497927e-07, "epoch": 6.608832807570978, "percentage": 94.41, "elapsed_time": "6:20:36", "remaining_time": "0:22:31"} +{"current_steps": 6290, "total_steps": 6657, "loss": 0.0825, "lr": 3.7123473495866314e-07, "epoch": 6.614090431125131, "percentage": 94.49, "elapsed_time": "6:20:57", "remaining_time": "0:22:13"} +{"current_steps": 6295, "total_steps": 6657, "loss": 0.0984, "lr": 3.61245639641421e-07, "epoch": 6.619348054679285, "percentage": 94.56, "elapsed_time": "6:21:21", "remaining_time": "0:21:55"} +{"current_steps": 6300, "total_steps": 6657, "loss": 0.1422, "lr": 3.513915508632448e-07, "epoch": 6.624605678233438, "percentage": 94.64, "elapsed_time": "6:21:50", "remaining_time": "0:21:38"} +{"current_steps": 6305, "total_steps": 6657, "loss": 0.0709, "lr": 3.4167253636602893e-07, "epoch": 6.629863301787592, "percentage": 94.71, "elapsed_time": "6:22:13", "remaining_time": "0:21:20"} +{"current_steps": 6310, "total_steps": 6657, "loss": 0.0756, "lr": 3.3208866296310147e-07, "epoch": 6.635120925341745, "percentage": 94.79, "elapsed_time": "6:22:31", "remaining_time": "0:21:02"} +{"current_steps": 6315, "total_steps": 6657, "loss": 0.0728, "lr": 3.2263999653876057e-07, "epoch": 6.6403785488958995, "percentage": 94.86, "elapsed_time": "6:22:51", "remaining_time": "0:20:44"} +{"current_steps": 6320, "total_steps": 6657, "loss": 0.0672, "lr": 3.133266020478254e-07, "epoch": 6.645636172450052, "percentage": 94.94, "elapsed_time": "6:23:14", "remaining_time": "0:20:26"} +{"current_steps": 6325, "total_steps": 6657, "loss": 0.0665, "lr": 3.0414854351519476e-07, "epoch": 6.6508937960042065, "percentage": 95.01, "elapsed_time": "6:23:30", "remaining_time": "0:20:07"} +{"current_steps": 6330, "total_steps": 6657, "loss": 0.0699, "lr": 2.951058840353893e-07, "epoch": 6.65615141955836, "percentage": 95.09, "elapsed_time": "6:23:45", "remaining_time": "0:19:49"} +{"current_steps": 6335, "total_steps": 6657, "loss": 0.0579, "lr": 2.861986857721388e-07, "epoch": 6.6614090431125135, "percentage": 95.16, "elapsed_time": "6:24:02", "remaining_time": "0:19:31"} +{"current_steps": 6340, "total_steps": 6657, "loss": 0.11, "lr": 2.7742700995794457e-07, "epoch": 6.666666666666667, "percentage": 95.24, "elapsed_time": "6:24:33", "remaining_time": "0:19:13"} +{"current_steps": 6345, "total_steps": 6657, "loss": 0.0524, "lr": 2.687909168936509e-07, "epoch": 6.6719242902208205, "percentage": 95.31, "elapsed_time": "6:24:47", "remaining_time": "0:18:55"} +{"current_steps": 6350, "total_steps": 6657, "loss": 0.0605, "lr": 2.6029046594805206e-07, "epoch": 6.677181913774974, "percentage": 95.39, "elapsed_time": "6:25:06", "remaining_time": "0:18:37"} +{"current_steps": 6355, "total_steps": 6657, "loss": 0.0596, "lr": 2.519257155574617e-07, "epoch": 6.682439537329127, "percentage": 95.46, "elapsed_time": "6:25:22", "remaining_time": "0:18:18"} +{"current_steps": 6360, "total_steps": 6657, "loss": 0.0738, "lr": 2.436967232253218e-07, "epoch": 6.687697160883281, "percentage": 95.54, "elapsed_time": "6:25:47", "remaining_time": "0:18:00"} +{"current_steps": 6365, "total_steps": 6657, "loss": 0.0895, "lr": 2.3560354552180976e-07, "epoch": 6.692954784437434, "percentage": 95.61, "elapsed_time": "6:26:13", "remaining_time": "0:17:43"} +{"current_steps": 6370, "total_steps": 6657, "loss": 0.0908, "lr": 2.27646238083441e-07, "epoch": 6.698212407991588, "percentage": 95.69, "elapsed_time": "6:26:37", "remaining_time": "0:17:25"} +{"current_steps": 6375, "total_steps": 6657, "loss": 0.075, "lr": 2.1982485561269805e-07, "epoch": 6.703470031545741, "percentage": 95.76, "elapsed_time": "6:26:55", "remaining_time": "0:17:06"} +{"current_steps": 6380, "total_steps": 6657, "loss": 0.0837, "lr": 2.1213945187763764e-07, "epoch": 6.708727655099895, "percentage": 95.84, "elapsed_time": "6:27:17", "remaining_time": "0:16:48"} +{"current_steps": 6385, "total_steps": 6657, "loss": 0.1014, "lr": 2.0459007971154632e-07, "epoch": 6.713985278654048, "percentage": 95.91, "elapsed_time": "6:27:43", "remaining_time": "0:16:31"} +{"current_steps": 6390, "total_steps": 6657, "loss": 0.0736, "lr": 1.9717679101254549e-07, "epoch": 6.719242902208202, "percentage": 95.99, "elapsed_time": "6:28:01", "remaining_time": "0:16:12"} +{"current_steps": 6395, "total_steps": 6657, "loss": 0.09, "lr": 1.898996367432604e-07, "epoch": 6.724500525762355, "percentage": 96.06, "elapsed_time": "6:28:18", "remaining_time": "0:15:54"} +{"current_steps": 6400, "total_steps": 6657, "loss": 0.0751, "lr": 1.8275866693046263e-07, "epoch": 6.729758149316509, "percentage": 96.14, "elapsed_time": "6:28:40", "remaining_time": "0:15:36"} +{"current_steps": 6405, "total_steps": 6657, "loss": 0.0722, "lr": 1.7575393066471714e-07, "epoch": 6.735015772870662, "percentage": 96.21, "elapsed_time": "6:29:58", "remaining_time": "0:15:20"} +{"current_steps": 6410, "total_steps": 6657, "loss": 0.0645, "lr": 1.6888547610005802e-07, "epoch": 6.740273396424816, "percentage": 96.29, "elapsed_time": "6:30:20", "remaining_time": "0:15:02"} +{"current_steps": 6415, "total_steps": 6657, "loss": 0.0631, "lr": 1.6215335045364656e-07, "epoch": 6.745531019978969, "percentage": 96.36, "elapsed_time": "6:30:48", "remaining_time": "0:14:44"} +{"current_steps": 6420, "total_steps": 6657, "loss": 0.0673, "lr": 1.5555760000545595e-07, "epoch": 6.750788643533123, "percentage": 96.44, "elapsed_time": "6:31:06", "remaining_time": "0:14:26"} +{"current_steps": 6425, "total_steps": 6657, "loss": 0.0792, "lr": 1.4909827009794486e-07, "epoch": 6.756046267087276, "percentage": 96.51, "elapsed_time": "6:31:33", "remaining_time": "0:14:08"} +{"current_steps": 6430, "total_steps": 6657, "loss": 0.077, "lr": 1.4277540513575328e-07, "epoch": 6.76130389064143, "percentage": 96.59, "elapsed_time": "6:31:55", "remaining_time": "0:13:50"} +{"current_steps": 6435, "total_steps": 6657, "loss": 0.0848, "lr": 1.3658904858538936e-07, "epoch": 6.766561514195583, "percentage": 96.67, "elapsed_time": "6:32:12", "remaining_time": "0:13:31"} +{"current_steps": 6440, "total_steps": 6657, "loss": 0.0595, "lr": 1.3053924297493858e-07, "epoch": 6.771819137749737, "percentage": 96.74, "elapsed_time": "6:32:32", "remaining_time": "0:13:13"} +{"current_steps": 6445, "total_steps": 6657, "loss": 0.0726, "lr": 1.2462602989376404e-07, "epoch": 6.77707676130389, "percentage": 96.82, "elapsed_time": "6:32:53", "remaining_time": "0:12:55"} +{"current_steps": 6450, "total_steps": 6657, "loss": 0.0709, "lr": 1.1884944999222658e-07, "epoch": 6.782334384858045, "percentage": 96.89, "elapsed_time": "6:33:13", "remaining_time": "0:12:37"} +{"current_steps": 6455, "total_steps": 6657, "loss": 0.0574, "lr": 1.1320954298140063e-07, "epoch": 6.787592008412197, "percentage": 96.97, "elapsed_time": "6:33:30", "remaining_time": "0:12:18"} +{"current_steps": 6460, "total_steps": 6657, "loss": 0.0715, "lr": 1.0770634763280552e-07, "epoch": 6.792849631966352, "percentage": 97.04, "elapsed_time": "6:33:50", "remaining_time": "0:12:00"} +{"current_steps": 6465, "total_steps": 6657, "loss": 0.0704, "lr": 1.023399017781368e-07, "epoch": 6.798107255520505, "percentage": 97.12, "elapsed_time": "6:34:07", "remaining_time": "0:11:42"} +{"current_steps": 6470, "total_steps": 6657, "loss": 0.0828, "lr": 9.711024230900423e-08, "epoch": 6.803364879074659, "percentage": 97.19, "elapsed_time": "6:34:30", "remaining_time": "0:11:24"} +{"current_steps": 6475, "total_steps": 6657, "loss": 0.0693, "lr": 9.201740517668089e-08, "epoch": 6.808622502628812, "percentage": 97.27, "elapsed_time": "6:34:53", "remaining_time": "0:11:05"} +{"current_steps": 6480, "total_steps": 6657, "loss": 0.0767, "lr": 8.706142539185447e-08, "epoch": 6.813880126182966, "percentage": 97.34, "elapsed_time": "6:35:08", "remaining_time": "0:10:47"} +{"current_steps": 6485, "total_steps": 6657, "loss": 0.0744, "lr": 8.224233702438966e-08, "epoch": 6.819137749737119, "percentage": 97.42, "elapsed_time": "6:35:32", "remaining_time": "0:10:29"} +{"current_steps": 6490, "total_steps": 6657, "loss": 0.0775, "lr": 7.756017320309283e-08, "epoch": 6.8243953732912725, "percentage": 97.49, "elapsed_time": "6:35:49", "remaining_time": "0:10:11"} +{"current_steps": 6495, "total_steps": 6657, "loss": 0.0638, "lr": 7.301496611547665e-08, "epoch": 6.829652996845426, "percentage": 97.57, "elapsed_time": "6:36:07", "remaining_time": "0:09:52"} +{"current_steps": 6500, "total_steps": 6657, "loss": 0.0653, "lr": 6.86067470075491e-08, "epoch": 6.8349106203995795, "percentage": 97.64, "elapsed_time": "6:36:21", "remaining_time": "0:09:34"} +{"current_steps": 6505, "total_steps": 6657, "loss": 0.0896, "lr": 6.433554618359816e-08, "epoch": 6.840168243953733, "percentage": 97.72, "elapsed_time": "6:36:50", "remaining_time": "0:09:16"} +{"current_steps": 6510, "total_steps": 6657, "loss": 0.061, "lr": 6.020139300597638e-08, "epoch": 6.8454258675078865, "percentage": 97.79, "elapsed_time": "6:37:06", "remaining_time": "0:08:58"} +{"current_steps": 6515, "total_steps": 6657, "loss": 0.0671, "lr": 5.620431589490105e-08, "epoch": 6.85068349106204, "percentage": 97.87, "elapsed_time": "6:37:35", "remaining_time": "0:08:39"} +{"current_steps": 6520, "total_steps": 6657, "loss": 0.0679, "lr": 5.234434232826324e-08, "epoch": 6.8559411146161935, "percentage": 97.94, "elapsed_time": "6:37:54", "remaining_time": "0:08:21"} +{"current_steps": 6525, "total_steps": 6657, "loss": 0.0568, "lr": 4.862149884143907e-08, "epoch": 6.861198738170347, "percentage": 98.02, "elapsed_time": "6:38:11", "remaining_time": "0:08:03"} +{"current_steps": 6530, "total_steps": 6657, "loss": 0.0686, "lr": 4.503581102709875e-08, "epoch": 6.8664563617245005, "percentage": 98.09, "elapsed_time": "6:38:29", "remaining_time": "0:07:45"} +{"current_steps": 6535, "total_steps": 6657, "loss": 0.0704, "lr": 4.1587303535040035e-08, "epoch": 6.871713985278654, "percentage": 98.17, "elapsed_time": "6:38:46", "remaining_time": "0:07:26"} +{"current_steps": 6540, "total_steps": 6657, "loss": 0.0868, "lr": 3.827600007201282e-08, "epoch": 6.8769716088328074, "percentage": 98.24, "elapsed_time": "6:39:14", "remaining_time": "0:07:08"} +{"current_steps": 6545, "total_steps": 6657, "loss": 0.0898, "lr": 3.510192340156149e-08, "epoch": 6.882229232386961, "percentage": 98.32, "elapsed_time": "6:39:33", "remaining_time": "0:06:50"} +{"current_steps": 6550, "total_steps": 6657, "loss": 0.0517, "lr": 3.20650953438606e-08, "epoch": 6.887486855941114, "percentage": 98.39, "elapsed_time": "6:39:50", "remaining_time": "0:06:31"} +{"current_steps": 6555, "total_steps": 6657, "loss": 0.0847, "lr": 2.9165536775574987e-08, "epoch": 6.892744479495268, "percentage": 98.47, "elapsed_time": "6:40:08", "remaining_time": "0:06:13"} +{"current_steps": 6560, "total_steps": 6657, "loss": 0.0665, "lr": 2.6403267629706575e-08, "epoch": 6.898002103049421, "percentage": 98.54, "elapsed_time": "6:40:24", "remaining_time": "0:05:55"} +{"current_steps": 6565, "total_steps": 6657, "loss": 0.067, "lr": 2.3778306895467785e-08, "epoch": 6.903259726603575, "percentage": 98.62, "elapsed_time": "6:40:40", "remaining_time": "0:05:36"} +{"current_steps": 6570, "total_steps": 6657, "loss": 0.0793, "lr": 2.1290672618135e-08, "epoch": 6.908517350157728, "percentage": 98.69, "elapsed_time": "6:41:02", "remaining_time": "0:05:18"} +{"current_steps": 6575, "total_steps": 6657, "loss": 0.104, "lr": 1.8940381898946424e-08, "epoch": 6.913774973711882, "percentage": 98.77, "elapsed_time": "6:41:27", "remaining_time": "0:05:00"} +{"current_steps": 6580, "total_steps": 6657, "loss": 0.0759, "lr": 1.6727450894959973e-08, "epoch": 6.919032597266035, "percentage": 98.84, "elapsed_time": "6:41:53", "remaining_time": "0:04:42"} +{"current_steps": 6585, "total_steps": 6657, "loss": 0.07, "lr": 1.4651894818966671e-08, "epoch": 6.92429022082019, "percentage": 98.92, "elapsed_time": "6:42:12", "remaining_time": "0:04:23"} +{"current_steps": 6590, "total_steps": 6657, "loss": 0.0661, "lr": 1.2713727939364096e-08, "epoch": 6.929547844374342, "percentage": 98.99, "elapsed_time": "6:42:29", "remaining_time": "0:04:05"} +{"current_steps": 6595, "total_steps": 6657, "loss": 0.0654, "lr": 1.091296358007643e-08, "epoch": 6.934805467928497, "percentage": 99.07, "elapsed_time": "6:42:44", "remaining_time": "0:03:47"} +{"current_steps": 6600, "total_steps": 6657, "loss": 0.0555, "lr": 9.249614120450113e-09, "epoch": 6.94006309148265, "percentage": 99.14, "elapsed_time": "6:42:59", "remaining_time": "0:03:28"} +{"current_steps": 6605, "total_steps": 6657, "loss": 0.0812, "lr": 7.723690995171673e-09, "epoch": 6.945320715036804, "percentage": 99.22, "elapsed_time": "6:44:11", "remaining_time": "0:03:10"} +{"current_steps": 6610, "total_steps": 6657, "loss": 0.0607, "lr": 6.335204694196684e-09, "epoch": 6.950578338590957, "percentage": 99.29, "elapsed_time": "6:44:28", "remaining_time": "0:02:52"} +{"current_steps": 6615, "total_steps": 6657, "loss": 0.0632, "lr": 5.084164762667598e-09, "epoch": 6.955835962145111, "percentage": 99.37, "elapsed_time": "6:44:46", "remaining_time": "0:02:34"} +{"current_steps": 6620, "total_steps": 6657, "loss": 0.0718, "lr": 3.970579800853802e-09, "epoch": 6.961093585699264, "percentage": 99.44, "elapsed_time": "6:45:02", "remaining_time": "0:02:15"} +{"current_steps": 6625, "total_steps": 6657, "loss": 0.0631, "lr": 2.9944574640894398e-09, "epoch": 6.966351209253418, "percentage": 99.52, "elapsed_time": "6:45:28", "remaining_time": "0:01:57"} +{"current_steps": 6630, "total_steps": 6657, "loss": 0.0609, "lr": 2.1558044627267847e-09, "epoch": 6.971608832807571, "percentage": 99.59, "elapsed_time": "6:45:47", "remaining_time": "0:01:39"} +{"current_steps": 6635, "total_steps": 6657, "loss": 0.0627, "lr": 1.4546265620785094e-09, "epoch": 6.976866456361725, "percentage": 99.67, "elapsed_time": "6:46:12", "remaining_time": "0:01:20"} +{"current_steps": 6640, "total_steps": 6657, "loss": 0.1401, "lr": 8.909285823910374e-10, "epoch": 6.982124079915878, "percentage": 99.74, "elapsed_time": "6:46:48", "remaining_time": "0:01:02"} +{"current_steps": 6645, "total_steps": 6657, "loss": 0.0778, "lr": 4.647143988067981e-10, "epoch": 6.987381703470032, "percentage": 99.82, "elapsed_time": "6:47:09", "remaining_time": "0:00:44"} +{"current_steps": 6650, "total_steps": 6657, "loss": 0.0699, "lr": 1.7598694132869853e-10, "epoch": 6.992639327024185, "percentage": 99.89, "elapsed_time": "6:47:32", "remaining_time": "0:00:25"} +{"current_steps": 6655, "total_steps": 6657, "loss": 0.0692, "lr": 2.474819481568247e-11, "epoch": 6.997896950578339, "percentage": 99.97, "elapsed_time": "6:47:50", "remaining_time": "0:00:07"} +{"current_steps": 6657, "total_steps": 6657, "epoch": 7.0, "percentage": 100.0, "elapsed_time": "6:48:58", "remaining_time": "0:00:00"} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..3e43dbe --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,14688 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 6657, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.005257623554153523, + "grad_norm": 14.338418407480448, + "learning_rate": 2.4024024024024026e-07, + "loss": 0.8123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.7896162271499634, + "step": 5, + "valid_targets_mean": 1423.4, + "valid_targets_min": 715 + }, + { + "epoch": 0.010515247108307046, + "grad_norm": 15.250363398872196, + "learning_rate": 5.405405405405406e-07, + "loss": 0.8146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.8242740035057068, + "step": 10, + "valid_targets_mean": 1462.5, + "valid_targets_min": 612 + }, + { + "epoch": 0.015772870662460567, + "grad_norm": 13.170887774620747, + "learning_rate": 8.40840840840841e-07, + "loss": 0.7847, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.749550461769104, + "step": 15, + "valid_targets_mean": 1514.8, + "valid_targets_min": 714 + }, + { + "epoch": 0.02103049421661409, + "grad_norm": 12.021580563192558, + "learning_rate": 1.1411411411411411e-06, + "loss": 0.772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.7531605958938599, + "step": 20, + "valid_targets_mean": 1450.1, + "valid_targets_min": 754 + }, + { + "epoch": 0.026288117770767613, + "grad_norm": 9.812753263810011, + "learning_rate": 1.4414414414414416e-06, + "loss": 0.7447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.7402019500732422, + "step": 25, + "valid_targets_mean": 1548.3, + "valid_targets_min": 718 + }, + { + "epoch": 0.031545741324921134, + "grad_norm": 7.151766176611262, + "learning_rate": 1.7417417417417418e-06, + "loss": 0.7118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6922894716262817, + "step": 30, + "valid_targets_mean": 1539.9, + "valid_targets_min": 731 + }, + { + "epoch": 0.03680336487907466, + "grad_norm": 6.414810551517701, + "learning_rate": 2.0420420420420424e-06, + "loss": 0.6655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6497060656547546, + "step": 35, + "valid_targets_mean": 1543.9, + "valid_targets_min": 725 + }, + { + "epoch": 0.04206098843322818, + "grad_norm": 5.524276666499735, + "learning_rate": 2.3423423423423424e-06, + "loss": 0.6395, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6292667984962463, + "step": 40, + "valid_targets_mean": 1371.5, + "valid_targets_min": 610 + }, + { + "epoch": 0.0473186119873817, + "grad_norm": 4.309382783812248, + "learning_rate": 2.642642642642643e-06, + "loss": 0.5812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5641481876373291, + "step": 45, + "valid_targets_mean": 1560.6, + "valid_targets_min": 720 + }, + { + "epoch": 0.052576235541535225, + "grad_norm": 3.977345905029791, + "learning_rate": 2.942942942942943e-06, + "loss": 0.5305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4955446720123291, + "step": 50, + "valid_targets_mean": 1439.9, + "valid_targets_min": 765 + }, + { + "epoch": 0.05783385909568875, + "grad_norm": 2.2191077624395343, + "learning_rate": 3.2432432432432437e-06, + "loss": 0.4958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5143522024154663, + "step": 55, + "valid_targets_mean": 1455.1, + "valid_targets_min": 593 + }, + { + "epoch": 0.06309148264984227, + "grad_norm": 1.699779263330342, + "learning_rate": 3.5435435435435437e-06, + "loss": 0.4717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4495570659637451, + "step": 60, + "valid_targets_mean": 1540.4, + "valid_targets_min": 704 + }, + { + "epoch": 0.0683491062039958, + "grad_norm": 1.558544874011996, + "learning_rate": 3.843843843843844e-06, + "loss": 0.4494, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.42444512248039246, + "step": 65, + "valid_targets_mean": 1387.2, + "valid_targets_min": 722 + }, + { + "epoch": 0.07360672975814932, + "grad_norm": 1.230255486271066, + "learning_rate": 4.1441441441441446e-06, + "loss": 0.4231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3952111601829529, + "step": 70, + "valid_targets_mean": 1581.9, + "valid_targets_min": 865 + }, + { + "epoch": 0.07886435331230283, + "grad_norm": 1.1773183195088508, + "learning_rate": 4.444444444444444e-06, + "loss": 0.4086, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4091920256614685, + "step": 75, + "valid_targets_mean": 1635.6, + "valid_targets_min": 797 + }, + { + "epoch": 0.08412197686645637, + "grad_norm": 1.212376527573756, + "learning_rate": 4.7447447447447454e-06, + "loss": 0.4084, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4411184787750244, + "step": 80, + "valid_targets_mean": 1542.4, + "valid_targets_min": 728 + }, + { + "epoch": 0.08937960042060988, + "grad_norm": 1.1207442958501526, + "learning_rate": 5.045045045045045e-06, + "loss": 0.3979, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39498135447502136, + "step": 85, + "valid_targets_mean": 1879.2, + "valid_targets_min": 683 + }, + { + "epoch": 0.0946372239747634, + "grad_norm": 1.2193312077699985, + "learning_rate": 5.345345345345346e-06, + "loss": 0.3871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39774513244628906, + "step": 90, + "valid_targets_mean": 1409.0, + "valid_targets_min": 706 + }, + { + "epoch": 0.09989484752891693, + "grad_norm": 1.1973976745204826, + "learning_rate": 5.645645645645647e-06, + "loss": 0.3651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36110740900039673, + "step": 95, + "valid_targets_mean": 1377.8, + "valid_targets_min": 625 + }, + { + "epoch": 0.10515247108307045, + "grad_norm": 0.9711829227364538, + "learning_rate": 5.945945945945947e-06, + "loss": 0.3606, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36348843574523926, + "step": 100, + "valid_targets_mean": 1811.9, + "valid_targets_min": 790 + }, + { + "epoch": 0.11041009463722397, + "grad_norm": 1.0436048206811175, + "learning_rate": 6.246246246246247e-06, + "loss": 0.3616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3778885304927826, + "step": 105, + "valid_targets_mean": 1814.3, + "valid_targets_min": 684 + }, + { + "epoch": 0.1156677181913775, + "grad_norm": 1.0480485575994163, + "learning_rate": 6.546546546546547e-06, + "loss": 0.3525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.327975332736969, + "step": 110, + "valid_targets_mean": 1493.4, + "valid_targets_min": 739 + }, + { + "epoch": 0.12092534174553102, + "grad_norm": 1.077406110311962, + "learning_rate": 6.846846846846848e-06, + "loss": 0.3401, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32423490285873413, + "step": 115, + "valid_targets_mean": 1456.2, + "valid_targets_min": 657 + }, + { + "epoch": 0.12618296529968454, + "grad_norm": 1.0059803201811535, + "learning_rate": 7.147147147147148e-06, + "loss": 0.3306, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32918429374694824, + "step": 120, + "valid_targets_mean": 1340.2, + "valid_targets_min": 783 + }, + { + "epoch": 0.13144058885383805, + "grad_norm": 1.0566785983665758, + "learning_rate": 7.447447447447448e-06, + "loss": 0.3261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3207254409790039, + "step": 125, + "valid_targets_mean": 1569.0, + "valid_targets_min": 734 + }, + { + "epoch": 0.1366982124079916, + "grad_norm": 0.982078723172686, + "learning_rate": 7.747747747747749e-06, + "loss": 0.3199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31849318742752075, + "step": 130, + "valid_targets_mean": 1595.7, + "valid_targets_min": 678 + }, + { + "epoch": 0.14195583596214512, + "grad_norm": 1.0044175903244321, + "learning_rate": 8.048048048048048e-06, + "loss": 0.3176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31595051288604736, + "step": 135, + "valid_targets_mean": 1801.3, + "valid_targets_min": 843 + }, + { + "epoch": 0.14721345951629863, + "grad_norm": 1.0412575758676315, + "learning_rate": 8.348348348348348e-06, + "loss": 0.3202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32682791352272034, + "step": 140, + "valid_targets_mean": 1652.4, + "valid_targets_min": 782 + }, + { + "epoch": 0.15247108307045215, + "grad_norm": 1.208059994485327, + "learning_rate": 8.64864864864865e-06, + "loss": 0.3065, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33247053623199463, + "step": 145, + "valid_targets_mean": 1684.6, + "valid_targets_min": 957 + }, + { + "epoch": 0.15772870662460567, + "grad_norm": 1.064399334326978, + "learning_rate": 8.94894894894895e-06, + "loss": 0.3157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32549673318862915, + "step": 150, + "valid_targets_mean": 1482.9, + "valid_targets_min": 713 + }, + { + "epoch": 0.16298633017875921, + "grad_norm": 1.024962358387954, + "learning_rate": 9.24924924924925e-06, + "loss": 0.3099, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3149445652961731, + "step": 155, + "valid_targets_mean": 1710.1, + "valid_targets_min": 726 + }, + { + "epoch": 0.16824395373291273, + "grad_norm": 0.9678110198452141, + "learning_rate": 9.54954954954955e-06, + "loss": 0.2996, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3004312217235565, + "step": 160, + "valid_targets_mean": 1638.9, + "valid_targets_min": 1147 + }, + { + "epoch": 0.17350157728706625, + "grad_norm": 1.0120192177417784, + "learning_rate": 9.849849849849851e-06, + "loss": 0.2971, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3038880228996277, + "step": 165, + "valid_targets_mean": 1525.7, + "valid_targets_min": 700 + }, + { + "epoch": 0.17875920084121977, + "grad_norm": 1.1147511433571193, + "learning_rate": 1.015015015015015e-05, + "loss": 0.285, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.268440842628479, + "step": 170, + "valid_targets_mean": 1255.4, + "valid_targets_min": 640 + }, + { + "epoch": 0.18401682439537329, + "grad_norm": 1.055664621119984, + "learning_rate": 1.0450450450450452e-05, + "loss": 0.2909, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28668320178985596, + "step": 175, + "valid_targets_mean": 1618.7, + "valid_targets_min": 968 + }, + { + "epoch": 0.1892744479495268, + "grad_norm": 1.045906383931406, + "learning_rate": 1.0750750750750751e-05, + "loss": 0.2916, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.275592178106308, + "step": 180, + "valid_targets_mean": 1466.6, + "valid_targets_min": 724 + }, + { + "epoch": 0.19453207150368035, + "grad_norm": 0.9773273807006373, + "learning_rate": 1.1051051051051051e-05, + "loss": 0.2937, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29187920689582825, + "step": 185, + "valid_targets_mean": 1455.1, + "valid_targets_min": 657 + }, + { + "epoch": 0.19978969505783387, + "grad_norm": 1.2727714967466175, + "learning_rate": 1.1351351351351352e-05, + "loss": 0.2923, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3005208373069763, + "step": 190, + "valid_targets_mean": 1309.6, + "valid_targets_min": 746 + }, + { + "epoch": 0.20504731861198738, + "grad_norm": 1.1244664228985333, + "learning_rate": 1.1651651651651652e-05, + "loss": 0.2952, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3339601755142212, + "step": 195, + "valid_targets_mean": 1437.6, + "valid_targets_min": 692 + }, + { + "epoch": 0.2103049421661409, + "grad_norm": 1.0742134062044455, + "learning_rate": 1.1951951951951951e-05, + "loss": 0.2827, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.260494202375412, + "step": 200, + "valid_targets_mean": 1344.7, + "valid_targets_min": 776 + }, + { + "epoch": 0.21556256572029442, + "grad_norm": 1.0009127030529608, + "learning_rate": 1.2252252252252253e-05, + "loss": 0.2786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27542197704315186, + "step": 205, + "valid_targets_mean": 1535.1, + "valid_targets_min": 703 + }, + { + "epoch": 0.22082018927444794, + "grad_norm": 1.0352483052750439, + "learning_rate": 1.2552552552552552e-05, + "loss": 0.2986, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2706538438796997, + "step": 210, + "valid_targets_mean": 1439.4, + "valid_targets_min": 573 + }, + { + "epoch": 0.22607781282860148, + "grad_norm": 0.9767573807189796, + "learning_rate": 1.2852852852852854e-05, + "loss": 0.3021, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3644488453865051, + "step": 215, + "valid_targets_mean": 1967.9, + "valid_targets_min": 734 + }, + { + "epoch": 0.231335436382755, + "grad_norm": 0.9537088929251228, + "learning_rate": 1.3153153153153155e-05, + "loss": 0.2764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2756945788860321, + "step": 220, + "valid_targets_mean": 1489.4, + "valid_targets_min": 607 + }, + { + "epoch": 0.23659305993690852, + "grad_norm": 0.9169366781867578, + "learning_rate": 1.3453453453453456e-05, + "loss": 0.2812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2865472733974457, + "step": 225, + "valid_targets_mean": 1777.6, + "valid_targets_min": 746 + }, + { + "epoch": 0.24185068349106204, + "grad_norm": 1.4480786530892165, + "learning_rate": 1.3753753753753756e-05, + "loss": 0.2858, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30552157759666443, + "step": 230, + "valid_targets_mean": 1648.7, + "valid_targets_min": 676 + }, + { + "epoch": 0.24710830704521555, + "grad_norm": 1.028602809234246, + "learning_rate": 1.4054054054054055e-05, + "loss": 0.274, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.258034348487854, + "step": 235, + "valid_targets_mean": 1432.2, + "valid_targets_min": 780 + }, + { + "epoch": 0.25236593059936907, + "grad_norm": 0.9618845035216391, + "learning_rate": 1.4354354354354357e-05, + "loss": 0.2672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26978522539138794, + "step": 240, + "valid_targets_mean": 1549.0, + "valid_targets_min": 677 + }, + { + "epoch": 0.2576235541535226, + "grad_norm": 1.1533463384468676, + "learning_rate": 1.4654654654654656e-05, + "loss": 0.2662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26131436228752136, + "step": 245, + "valid_targets_mean": 1158.9, + "valid_targets_min": 737 + }, + { + "epoch": 0.2628811777076761, + "grad_norm": 0.9892775002286349, + "learning_rate": 1.4954954954954957e-05, + "loss": 0.2838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30346208810806274, + "step": 250, + "valid_targets_mean": 1459.0, + "valid_targets_min": 726 + }, + { + "epoch": 0.26813880126182965, + "grad_norm": 0.8754555049354074, + "learning_rate": 1.5255255255255257e-05, + "loss": 0.2736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2807902693748474, + "step": 255, + "valid_targets_mean": 1715.9, + "valid_targets_min": 786 + }, + { + "epoch": 0.2733964248159832, + "grad_norm": 0.959347461436203, + "learning_rate": 1.555555555555556e-05, + "loss": 0.2668, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2573663890361786, + "step": 260, + "valid_targets_mean": 1492.1, + "valid_targets_min": 944 + }, + { + "epoch": 0.2786540483701367, + "grad_norm": 1.0059380552130364, + "learning_rate": 1.5855855855855858e-05, + "loss": 0.2701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2515960931777954, + "step": 265, + "valid_targets_mean": 1427.2, + "valid_targets_min": 963 + }, + { + "epoch": 0.28391167192429023, + "grad_norm": 1.0219411210537412, + "learning_rate": 1.6156156156156157e-05, + "loss": 0.2672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2408115416765213, + "step": 270, + "valid_targets_mean": 1435.9, + "valid_targets_min": 740 + }, + { + "epoch": 0.2891692954784437, + "grad_norm": 0.9020434518290471, + "learning_rate": 1.6456456456456457e-05, + "loss": 0.2696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2702721655368805, + "step": 275, + "valid_targets_mean": 1745.8, + "valid_targets_min": 727 + }, + { + "epoch": 0.29442691903259727, + "grad_norm": 1.0211125239673158, + "learning_rate": 1.6756756756756757e-05, + "loss": 0.2626, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27518072724342346, + "step": 280, + "valid_targets_mean": 1522.8, + "valid_targets_min": 636 + }, + { + "epoch": 0.2996845425867508, + "grad_norm": 1.0789002466783877, + "learning_rate": 1.705705705705706e-05, + "loss": 0.2593, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2558009624481201, + "step": 285, + "valid_targets_mean": 1510.6, + "valid_targets_min": 684 + }, + { + "epoch": 0.3049421661409043, + "grad_norm": 0.9665450503881389, + "learning_rate": 1.735735735735736e-05, + "loss": 0.2504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27275264263153076, + "step": 290, + "valid_targets_mean": 1509.7, + "valid_targets_min": 798 + }, + { + "epoch": 0.31019978969505785, + "grad_norm": 1.1483786872452868, + "learning_rate": 1.765765765765766e-05, + "loss": 0.2586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25014638900756836, + "step": 295, + "valid_targets_mean": 1489.8, + "valid_targets_min": 661 + }, + { + "epoch": 0.31545741324921134, + "grad_norm": 1.0398642224210262, + "learning_rate": 1.795795795795796e-05, + "loss": 0.2598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2589448392391205, + "step": 300, + "valid_targets_mean": 1510.6, + "valid_targets_min": 665 + }, + { + "epoch": 0.3207150368033649, + "grad_norm": 1.1747477187677065, + "learning_rate": 1.8258258258258258e-05, + "loss": 0.2544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27208995819091797, + "step": 305, + "valid_targets_mean": 1534.2, + "valid_targets_min": 692 + }, + { + "epoch": 0.32597266035751843, + "grad_norm": 1.210151289764671, + "learning_rate": 1.855855855855856e-05, + "loss": 0.2599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26101744174957275, + "step": 310, + "valid_targets_mean": 1480.6, + "valid_targets_min": 632 + }, + { + "epoch": 0.3312302839116719, + "grad_norm": 1.1167145455105136, + "learning_rate": 1.885885885885886e-05, + "loss": 0.2621, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.276511549949646, + "step": 315, + "valid_targets_mean": 1504.4, + "valid_targets_min": 922 + }, + { + "epoch": 0.33648790746582546, + "grad_norm": 0.9304470939793734, + "learning_rate": 1.915915915915916e-05, + "loss": 0.2508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24150532484054565, + "step": 320, + "valid_targets_mean": 1624.8, + "valid_targets_min": 710 + }, + { + "epoch": 0.34174553101997895, + "grad_norm": 0.9920499590419062, + "learning_rate": 1.9459459459459463e-05, + "loss": 0.261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2353840470314026, + "step": 325, + "valid_targets_mean": 1320.1, + "valid_targets_min": 589 + }, + { + "epoch": 0.3470031545741325, + "grad_norm": 1.2160498593989866, + "learning_rate": 1.9759759759759763e-05, + "loss": 0.2581, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27118825912475586, + "step": 330, + "valid_targets_mean": 1348.9, + "valid_targets_min": 750 + }, + { + "epoch": 0.352260778128286, + "grad_norm": 1.0827711833942575, + "learning_rate": 2.0060060060060062e-05, + "loss": 0.2696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2597997188568115, + "step": 335, + "valid_targets_mean": 1333.6, + "valid_targets_min": 641 + }, + { + "epoch": 0.35751840168243953, + "grad_norm": 0.9895783250955589, + "learning_rate": 2.0360360360360362e-05, + "loss": 0.2601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2682701349258423, + "step": 340, + "valid_targets_mean": 1649.7, + "valid_targets_min": 701 + }, + { + "epoch": 0.3627760252365931, + "grad_norm": 0.9057614789606002, + "learning_rate": 2.066066066066066e-05, + "loss": 0.2719, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23891566693782806, + "step": 345, + "valid_targets_mean": 1819.2, + "valid_targets_min": 1219 + }, + { + "epoch": 0.36803364879074657, + "grad_norm": 0.889783285793925, + "learning_rate": 2.0960960960960964e-05, + "loss": 0.2619, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2716279625892639, + "step": 350, + "valid_targets_mean": 1603.2, + "valid_targets_min": 770 + }, + { + "epoch": 0.3732912723449001, + "grad_norm": 0.9536373019208444, + "learning_rate": 2.1261261261261264e-05, + "loss": 0.257, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24380457401275635, + "step": 355, + "valid_targets_mean": 1434.4, + "valid_targets_min": 646 + }, + { + "epoch": 0.3785488958990536, + "grad_norm": 1.115443184798519, + "learning_rate": 2.1561561561561564e-05, + "loss": 0.256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26643645763397217, + "step": 360, + "valid_targets_mean": 1250.6, + "valid_targets_min": 723 + }, + { + "epoch": 0.38380651945320715, + "grad_norm": 0.9355379407440322, + "learning_rate": 2.1861861861861863e-05, + "loss": 0.2487, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23333057761192322, + "step": 365, + "valid_targets_mean": 1461.3, + "valid_targets_min": 808 + }, + { + "epoch": 0.3890641430073607, + "grad_norm": 1.077924971950134, + "learning_rate": 2.2162162162162163e-05, + "loss": 0.2629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2515951991081238, + "step": 370, + "valid_targets_mean": 1299.6, + "valid_targets_min": 688 + }, + { + "epoch": 0.3943217665615142, + "grad_norm": 0.9940302187277178, + "learning_rate": 2.2462462462462466e-05, + "loss": 0.2502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2508964240550995, + "step": 375, + "valid_targets_mean": 1337.1, + "valid_targets_min": 821 + }, + { + "epoch": 0.39957939011566773, + "grad_norm": 0.9542256345373819, + "learning_rate": 2.2762762762762765e-05, + "loss": 0.2447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23581913113594055, + "step": 380, + "valid_targets_mean": 1472.1, + "valid_targets_min": 849 + }, + { + "epoch": 0.4048370136698212, + "grad_norm": 1.1979823419315119, + "learning_rate": 2.3063063063063065e-05, + "loss": 0.238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23929482698440552, + "step": 385, + "valid_targets_mean": 1464.6, + "valid_targets_min": 781 + }, + { + "epoch": 0.41009463722397477, + "grad_norm": 1.1127158719166208, + "learning_rate": 2.3363363363363364e-05, + "loss": 0.2637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24247753620147705, + "step": 390, + "valid_targets_mean": 1378.8, + "valid_targets_min": 522 + }, + { + "epoch": 0.4153522607781283, + "grad_norm": 0.9956267557503373, + "learning_rate": 2.3663663663663664e-05, + "loss": 0.2404, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.212764710187912, + "step": 395, + "valid_targets_mean": 1256.2, + "valid_targets_min": 694 + }, + { + "epoch": 0.4206098843322818, + "grad_norm": 0.8406949700595693, + "learning_rate": 2.3963963963963967e-05, + "loss": 0.2501, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2643095850944519, + "step": 400, + "valid_targets_mean": 2054.5, + "valid_targets_min": 868 + }, + { + "epoch": 0.42586750788643535, + "grad_norm": 1.0208095852397516, + "learning_rate": 2.4264264264264267e-05, + "loss": 0.2278, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21735705435276031, + "step": 405, + "valid_targets_mean": 1743.0, + "valid_targets_min": 682 + }, + { + "epoch": 0.43112513144058884, + "grad_norm": 0.9853785993337534, + "learning_rate": 2.4564564564564566e-05, + "loss": 0.2372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26856714487075806, + "step": 410, + "valid_targets_mean": 1531.3, + "valid_targets_min": 797 + }, + { + "epoch": 0.4363827549947424, + "grad_norm": 0.8454632207154943, + "learning_rate": 2.4864864864864866e-05, + "loss": 0.2443, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24718914926052094, + "step": 415, + "valid_targets_mean": 1780.8, + "valid_targets_min": 639 + }, + { + "epoch": 0.4416403785488959, + "grad_norm": 1.3887975257786067, + "learning_rate": 2.5165165165165165e-05, + "loss": 0.248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24860824644565582, + "step": 420, + "valid_targets_mean": 1485.3, + "valid_targets_min": 920 + }, + { + "epoch": 0.4468980021030494, + "grad_norm": 0.9491452435962007, + "learning_rate": 2.5465465465465465e-05, + "loss": 0.243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24007216095924377, + "step": 425, + "valid_targets_mean": 1408.1, + "valid_targets_min": 874 + }, + { + "epoch": 0.45215562565720296, + "grad_norm": 0.8953373435850912, + "learning_rate": 2.5765765765765768e-05, + "loss": 0.2464, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24175237119197845, + "step": 430, + "valid_targets_mean": 1382.9, + "valid_targets_min": 557 + }, + { + "epoch": 0.45741324921135645, + "grad_norm": 1.214865138105168, + "learning_rate": 2.6066066066066067e-05, + "loss": 0.2522, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2626429498195648, + "step": 435, + "valid_targets_mean": 1290.4, + "valid_targets_min": 645 + }, + { + "epoch": 0.46267087276551, + "grad_norm": 0.9553001424874544, + "learning_rate": 2.6366366366366367e-05, + "loss": 0.2528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25079411268234253, + "step": 440, + "valid_targets_mean": 1449.4, + "valid_targets_min": 393 + }, + { + "epoch": 0.4679284963196635, + "grad_norm": 0.7781379429239883, + "learning_rate": 2.6666666666666667e-05, + "loss": 0.265, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2495959997177124, + "step": 445, + "valid_targets_mean": 2912.0, + "valid_targets_min": 458 + }, + { + "epoch": 0.47318611987381703, + "grad_norm": 0.6840617027205644, + "learning_rate": 2.6966966966966966e-05, + "loss": 0.1961, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17593586444854736, + "step": 450, + "valid_targets_mean": 3155.2, + "valid_targets_min": 1491 + }, + { + "epoch": 0.4784437434279706, + "grad_norm": 0.5786177388731288, + "learning_rate": 2.726726726726727e-05, + "loss": 0.1718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1732887625694275, + "step": 455, + "valid_targets_mean": 3495.2, + "valid_targets_min": 2068 + }, + { + "epoch": 0.48370136698212407, + "grad_norm": 1.0771543738683813, + "learning_rate": 2.756756756756757e-05, + "loss": 0.193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16611456871032715, + "step": 460, + "valid_targets_mean": 3671.3, + "valid_targets_min": 2425 + }, + { + "epoch": 0.4889589905362776, + "grad_norm": 0.583602213376592, + "learning_rate": 2.786786786786787e-05, + "loss": 0.1612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16041299700737, + "step": 465, + "valid_targets_mean": 3131.7, + "valid_targets_min": 1198 + }, + { + "epoch": 0.4942166140904311, + "grad_norm": 1.3152094414132671, + "learning_rate": 2.8168168168168168e-05, + "loss": 0.1958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2620253264904022, + "step": 470, + "valid_targets_mean": 1250.6, + "valid_targets_min": 663 + }, + { + "epoch": 0.49947423764458465, + "grad_norm": 0.6807191146727155, + "learning_rate": 2.8468468468468467e-05, + "loss": 0.2507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17434732615947723, + "step": 475, + "valid_targets_mean": 3237.3, + "valid_targets_min": 954 + }, + { + "epoch": 0.5047318611987381, + "grad_norm": 0.6089177015775961, + "learning_rate": 2.8768768768768774e-05, + "loss": 0.1682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19791068136692047, + "step": 480, + "valid_targets_mean": 3020.2, + "valid_targets_min": 781 + }, + { + "epoch": 0.5099894847528917, + "grad_norm": 0.4706237346681839, + "learning_rate": 2.9069069069069073e-05, + "loss": 0.1773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12845684587955475, + "step": 485, + "valid_targets_mean": 3357.7, + "valid_targets_min": 799 + }, + { + "epoch": 0.5152471083070452, + "grad_norm": 0.4312586193953129, + "learning_rate": 2.9369369369369373e-05, + "loss": 0.1879, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.130380317568779, + "step": 490, + "valid_targets_mean": 4872.1, + "valid_targets_min": 2802 + }, + { + "epoch": 0.5205047318611987, + "grad_norm": 0.5202579147947507, + "learning_rate": 2.9669669669669673e-05, + "loss": 0.1497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13587605953216553, + "step": 495, + "valid_targets_mean": 2990.6, + "valid_targets_min": 574 + }, + { + "epoch": 0.5257623554153522, + "grad_norm": 0.5227023766325937, + "learning_rate": 2.9969969969969976e-05, + "loss": 0.1552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1409936547279358, + "step": 500, + "valid_targets_mean": 3588.3, + "valid_targets_min": 813 + }, + { + "epoch": 0.5310199789695058, + "grad_norm": 0.73045070317564, + "learning_rate": 3.0270270270270275e-05, + "loss": 0.1684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.213816300034523, + "step": 505, + "valid_targets_mean": 2007.3, + "valid_targets_min": 593 + }, + { + "epoch": 0.5362776025236593, + "grad_norm": 0.5094046552569875, + "learning_rate": 3.0570570570570575e-05, + "loss": 0.1847, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14146271347999573, + "step": 510, + "valid_targets_mean": 3185.9, + "valid_targets_min": 1185 + }, + { + "epoch": 0.5415352260778128, + "grad_norm": 0.7568508998245875, + "learning_rate": 3.0870870870870874e-05, + "loss": 0.1521, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19519582390785217, + "step": 515, + "valid_targets_mean": 1841.9, + "valid_targets_min": 376 + }, + { + "epoch": 0.5467928496319664, + "grad_norm": 0.4641975005934539, + "learning_rate": 3.1171171171171174e-05, + "loss": 0.1439, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10629108548164368, + "step": 520, + "valid_targets_mean": 2904.6, + "valid_targets_min": 910 + }, + { + "epoch": 0.5520504731861199, + "grad_norm": 0.5461483785001882, + "learning_rate": 3.1471471471471473e-05, + "loss": 0.1312, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12796208262443542, + "step": 525, + "valid_targets_mean": 3779.6, + "valid_targets_min": 2798 + }, + { + "epoch": 0.5573080967402734, + "grad_norm": 0.5588226619732147, + "learning_rate": 3.177177177177177e-05, + "loss": 0.174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15388840436935425, + "step": 530, + "valid_targets_mean": 2611.1, + "valid_targets_min": 471 + }, + { + "epoch": 0.562565720294427, + "grad_norm": 0.5957845529610092, + "learning_rate": 3.207207207207207e-05, + "loss": 0.1653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.170301616191864, + "step": 535, + "valid_targets_mean": 2535.8, + "valid_targets_min": 652 + }, + { + "epoch": 0.5678233438485805, + "grad_norm": 0.5388537314765058, + "learning_rate": 3.237237237237238e-05, + "loss": 0.1651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16233664751052856, + "step": 540, + "valid_targets_mean": 3190.8, + "valid_targets_min": 474 + }, + { + "epoch": 0.573080967402734, + "grad_norm": 0.7842135944261199, + "learning_rate": 3.267267267267268e-05, + "loss": 0.161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1310577094554901, + "step": 545, + "valid_targets_mean": 3511.1, + "valid_targets_min": 640 + }, + { + "epoch": 0.5783385909568874, + "grad_norm": 0.45798139346184796, + "learning_rate": 3.297297297297298e-05, + "loss": 0.2085, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12865862250328064, + "step": 550, + "valid_targets_mean": 3521.9, + "valid_targets_min": 902 + }, + { + "epoch": 0.583596214511041, + "grad_norm": 0.5444764260919418, + "learning_rate": 3.327327327327328e-05, + "loss": 0.1379, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14510411024093628, + "step": 555, + "valid_targets_mean": 3062.9, + "valid_targets_min": 1568 + }, + { + "epoch": 0.5888538380651945, + "grad_norm": 1.04660911026327, + "learning_rate": 3.357357357357358e-05, + "loss": 0.193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2559956908226013, + "step": 560, + "valid_targets_mean": 1179.9, + "valid_targets_min": 534 + }, + { + "epoch": 0.594111461619348, + "grad_norm": 0.7171985572053283, + "learning_rate": 3.387387387387388e-05, + "loss": 0.1771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18237915635108948, + "step": 565, + "valid_targets_mean": 2067.3, + "valid_targets_min": 548 + }, + { + "epoch": 0.5993690851735016, + "grad_norm": 0.5433556965958147, + "learning_rate": 3.4174174174174176e-05, + "loss": 0.1446, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11692153662443161, + "step": 570, + "valid_targets_mean": 2839.8, + "valid_targets_min": 664 + }, + { + "epoch": 0.6046267087276551, + "grad_norm": 0.7251220527202389, + "learning_rate": 3.4474474474474476e-05, + "loss": 0.2978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3964594006538391, + "step": 575, + "valid_targets_mean": 2888.0, + "valid_targets_min": 878 + }, + { + "epoch": 0.6098843322818086, + "grad_norm": 0.6391538888404741, + "learning_rate": 3.4774774774774776e-05, + "loss": 0.2646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17046722769737244, + "step": 580, + "valid_targets_mean": 2609.9, + "valid_targets_min": 756 + }, + { + "epoch": 0.6151419558359621, + "grad_norm": 0.47641357301754167, + "learning_rate": 3.5075075075075075e-05, + "loss": 0.1573, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12254441529512405, + "step": 585, + "valid_targets_mean": 3436.7, + "valid_targets_min": 1968 + }, + { + "epoch": 0.6203995793901157, + "grad_norm": 0.7891340904704162, + "learning_rate": 3.5375375375375375e-05, + "loss": 0.2494, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4564532935619354, + "step": 590, + "valid_targets_mean": 2644.9, + "valid_targets_min": 1153 + }, + { + "epoch": 0.6256572029442692, + "grad_norm": 0.3660416250318796, + "learning_rate": 3.567567567567568e-05, + "loss": 0.161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11844614893198013, + "step": 595, + "valid_targets_mean": 4109.6, + "valid_targets_min": 1010 + }, + { + "epoch": 0.6309148264984227, + "grad_norm": 0.4193625327298725, + "learning_rate": 3.597597597597598e-05, + "loss": 0.1352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13238012790679932, + "step": 600, + "valid_targets_mean": 4150.4, + "valid_targets_min": 789 + }, + { + "epoch": 0.6361724500525763, + "grad_norm": 0.4545358997390923, + "learning_rate": 3.627627627627628e-05, + "loss": 0.1543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16790561378002167, + "step": 605, + "valid_targets_mean": 3907.9, + "valid_targets_min": 784 + }, + { + "epoch": 0.6414300736067298, + "grad_norm": 0.5238318432896075, + "learning_rate": 3.657657657657658e-05, + "loss": 0.1377, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1229844018816948, + "step": 610, + "valid_targets_mean": 2426.2, + "valid_targets_min": 488 + }, + { + "epoch": 0.6466876971608833, + "grad_norm": 0.5894750567460497, + "learning_rate": 3.687687687687688e-05, + "loss": 0.1438, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15686744451522827, + "step": 615, + "valid_targets_mean": 3060.2, + "valid_targets_min": 774 + }, + { + "epoch": 0.6519453207150369, + "grad_norm": 0.5707030448482538, + "learning_rate": 3.717717717717718e-05, + "loss": 0.1308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1290627419948578, + "step": 620, + "valid_targets_mean": 2013.9, + "valid_targets_min": 712 + }, + { + "epoch": 0.6572029442691903, + "grad_norm": 0.5570257691076398, + "learning_rate": 3.747747747747748e-05, + "loss": 0.139, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11516053229570389, + "step": 625, + "valid_targets_mean": 3445.8, + "valid_targets_min": 1770 + }, + { + "epoch": 0.6624605678233438, + "grad_norm": 0.4271290599062706, + "learning_rate": 3.777777777777778e-05, + "loss": 0.1201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11819294840097427, + "step": 630, + "valid_targets_mean": 3535.2, + "valid_targets_min": 1761 + }, + { + "epoch": 0.6677181913774973, + "grad_norm": 0.38302613544250624, + "learning_rate": 3.807807807807808e-05, + "loss": 0.2007, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09289303421974182, + "step": 635, + "valid_targets_mean": 3596.6, + "valid_targets_min": 772 + }, + { + "epoch": 0.6729758149316509, + "grad_norm": 0.5465024682283733, + "learning_rate": 3.837837837837838e-05, + "loss": 0.121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12863999605178833, + "step": 640, + "valid_targets_mean": 3249.3, + "valid_targets_min": 722 + }, + { + "epoch": 0.6782334384858044, + "grad_norm": 0.436044478357065, + "learning_rate": 3.8678678678678684e-05, + "loss": 0.1253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10554418712854385, + "step": 645, + "valid_targets_mean": 3712.8, + "valid_targets_min": 2535 + }, + { + "epoch": 0.6834910620399579, + "grad_norm": 0.47689707142391047, + "learning_rate": 3.897897897897898e-05, + "loss": 0.1263, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12995974719524384, + "step": 650, + "valid_targets_mean": 3262.0, + "valid_targets_min": 948 + }, + { + "epoch": 0.6887486855941115, + "grad_norm": 0.7118294827025267, + "learning_rate": 3.927927927927928e-05, + "loss": 0.172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2569500803947449, + "step": 655, + "valid_targets_mean": 1491.4, + "valid_targets_min": 490 + }, + { + "epoch": 0.694006309148265, + "grad_norm": 0.41210337778599393, + "learning_rate": 3.957957957957958e-05, + "loss": 0.1546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13101404905319214, + "step": 660, + "valid_targets_mean": 3696.4, + "valid_targets_min": 1196 + }, + { + "epoch": 0.6992639327024185, + "grad_norm": 0.5867759477767424, + "learning_rate": 3.987987987987988e-05, + "loss": 0.1793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17820844054222107, + "step": 665, + "valid_targets_mean": 1915.5, + "valid_targets_min": 575 + }, + { + "epoch": 0.704521556256572, + "grad_norm": 0.49597583572323783, + "learning_rate": 3.9999975251805184e-05, + "loss": 0.1507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13559472560882568, + "step": 670, + "valid_targets_mean": 2728.7, + "valid_targets_min": 524 + }, + { + "epoch": 0.7097791798107256, + "grad_norm": 0.5231404790443446, + "learning_rate": 3.9999824013058675e-05, + "loss": 0.1733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14908647537231445, + "step": 675, + "valid_targets_mean": 2827.4, + "valid_targets_min": 693 + }, + { + "epoch": 0.7150368033648791, + "grad_norm": 0.5497626230412085, + "learning_rate": 3.99995352856012e-05, + "loss": 0.1819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12418384104967117, + "step": 680, + "valid_targets_mean": 2377.9, + "valid_targets_min": 803 + }, + { + "epoch": 0.7202944269190326, + "grad_norm": 0.527548434551724, + "learning_rate": 3.999910907141761e-05, + "loss": 0.1487, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1570894569158554, + "step": 685, + "valid_targets_mean": 2733.9, + "valid_targets_min": 764 + }, + { + "epoch": 0.7255520504731862, + "grad_norm": 0.6018463628202171, + "learning_rate": 3.9998545373437924e-05, + "loss": 0.169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13867342472076416, + "step": 690, + "valid_targets_mean": 3892.4, + "valid_targets_min": 2393 + }, + { + "epoch": 0.7308096740273397, + "grad_norm": 0.43425828709088526, + "learning_rate": 3.999784419553728e-05, + "loss": 0.1534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12710916996002197, + "step": 695, + "valid_targets_mean": 3477.7, + "valid_targets_min": 804 + }, + { + "epoch": 0.7360672975814931, + "grad_norm": 0.38137677229642736, + "learning_rate": 3.9997005542535916e-05, + "loss": 0.1385, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13690686225891113, + "step": 700, + "valid_targets_mean": 4753.2, + "valid_targets_min": 1549 + }, + { + "epoch": 0.7413249211356467, + "grad_norm": 0.516289151825183, + "learning_rate": 3.9996029420199154e-05, + "loss": 0.1251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13586783409118652, + "step": 705, + "valid_targets_mean": 2988.9, + "valid_targets_min": 596 + }, + { + "epoch": 0.7465825446898002, + "grad_norm": 0.31228545003568126, + "learning_rate": 3.9994915835237336e-05, + "loss": 0.1189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09548960626125336, + "step": 710, + "valid_targets_mean": 4565.8, + "valid_targets_min": 896 + }, + { + "epoch": 0.7518401682439537, + "grad_norm": 0.3945771727525488, + "learning_rate": 3.999366479530581e-05, + "loss": 0.1325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11906930804252625, + "step": 715, + "valid_targets_mean": 3763.9, + "valid_targets_min": 2696 + }, + { + "epoch": 0.7570977917981072, + "grad_norm": 0.7247875055180788, + "learning_rate": 3.999227630900483e-05, + "loss": 0.1602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1936461329460144, + "step": 720, + "valid_targets_mean": 1522.7, + "valid_targets_min": 203 + }, + { + "epoch": 0.7623554153522608, + "grad_norm": 0.3900335889448843, + "learning_rate": 3.9990750385879554e-05, + "loss": 0.136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14570674300193787, + "step": 725, + "valid_targets_mean": 3974.7, + "valid_targets_min": 2443 + }, + { + "epoch": 0.7676130389064143, + "grad_norm": 0.6361138958968884, + "learning_rate": 3.998908703641993e-05, + "loss": 0.1516, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14177639782428741, + "step": 730, + "valid_targets_mean": 1714.2, + "valid_targets_min": 716 + }, + { + "epoch": 0.7728706624605678, + "grad_norm": 0.4223784054438746, + "learning_rate": 3.9987286272060644e-05, + "loss": 0.1138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1300775706768036, + "step": 735, + "valid_targets_mean": 3683.6, + "valid_targets_min": 905 + }, + { + "epoch": 0.7781282860147214, + "grad_norm": 0.4459119486952276, + "learning_rate": 3.998534810518104e-05, + "loss": 0.1468, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15969018638134003, + "step": 740, + "valid_targets_mean": 2942.3, + "valid_targets_min": 657 + }, + { + "epoch": 0.7833859095688749, + "grad_norm": 0.4291063688699222, + "learning_rate": 3.998327254910504e-05, + "loss": 0.1357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1203104555606842, + "step": 745, + "valid_targets_mean": 3584.6, + "valid_targets_min": 1744 + }, + { + "epoch": 0.7886435331230284, + "grad_norm": 0.581547670567422, + "learning_rate": 3.998105961810105e-05, + "loss": 0.1249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1824052333831787, + "step": 750, + "valid_targets_mean": 2055.4, + "valid_targets_min": 627 + }, + { + "epoch": 0.7939011566771819, + "grad_norm": 0.5621362218057954, + "learning_rate": 3.997870932738187e-05, + "loss": 0.1386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14463567733764648, + "step": 755, + "valid_targets_mean": 2057.7, + "valid_targets_min": 727 + }, + { + "epoch": 0.7991587802313355, + "grad_norm": 0.5485179695009602, + "learning_rate": 3.997622169310454e-05, + "loss": 0.1434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16229714453220367, + "step": 760, + "valid_targets_mean": 2091.9, + "valid_targets_min": 613 + }, + { + "epoch": 0.804416403785489, + "grad_norm": 0.4239666751101835, + "learning_rate": 3.9973596732370296e-05, + "loss": 0.1552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12120245397090912, + "step": 765, + "valid_targets_mean": 3179.4, + "valid_targets_min": 958 + }, + { + "epoch": 0.8096740273396424, + "grad_norm": 0.5585633426392805, + "learning_rate": 3.997083446322443e-05, + "loss": 0.1389, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13518524169921875, + "step": 770, + "valid_targets_mean": 2118.0, + "valid_targets_min": 495 + }, + { + "epoch": 0.814931650893796, + "grad_norm": 0.518219996419831, + "learning_rate": 3.9967934904656145e-05, + "loss": 0.1555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13838447630405426, + "step": 775, + "valid_targets_mean": 2350.0, + "valid_targets_min": 759 + }, + { + "epoch": 0.8201892744479495, + "grad_norm": 0.6963817974170337, + "learning_rate": 3.9964898076598445e-05, + "loss": 0.1601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22630974650382996, + "step": 780, + "valid_targets_mean": 1728.8, + "valid_targets_min": 521 + }, + { + "epoch": 0.825446898002103, + "grad_norm": 0.44668396794098764, + "learning_rate": 3.996172399992799e-05, + "loss": 0.1309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15021365880966187, + "step": 785, + "valid_targets_mean": 3473.8, + "valid_targets_min": 1032 + }, + { + "epoch": 0.8307045215562566, + "grad_norm": 0.3900514362810737, + "learning_rate": 3.995841269646496e-05, + "loss": 0.1123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09701497852802277, + "step": 790, + "valid_targets_mean": 3574.1, + "valid_targets_min": 841 + }, + { + "epoch": 0.8359621451104101, + "grad_norm": 0.4004405418682501, + "learning_rate": 3.995496418897291e-05, + "loss": 0.134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1217922791838646, + "step": 795, + "valid_targets_mean": 2909.7, + "valid_targets_min": 927 + }, + { + "epoch": 0.8412197686645636, + "grad_norm": 0.4705994456418147, + "learning_rate": 3.995137850115856e-05, + "loss": 0.1691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14017799496650696, + "step": 800, + "valid_targets_mean": 3022.2, + "valid_targets_min": 935 + }, + { + "epoch": 0.8464773922187171, + "grad_norm": 0.3801804840642882, + "learning_rate": 3.994765565767174e-05, + "loss": 0.1125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10342194885015488, + "step": 805, + "valid_targets_mean": 2978.2, + "valid_targets_min": 647 + }, + { + "epoch": 0.8517350157728707, + "grad_norm": 0.3497300756384891, + "learning_rate": 3.9943795684105104e-05, + "loss": 0.1261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10366655886173248, + "step": 810, + "valid_targets_mean": 4808.4, + "valid_targets_min": 976 + }, + { + "epoch": 0.8569926393270242, + "grad_norm": 0.36306185739960156, + "learning_rate": 3.993979860699403e-05, + "loss": 0.1292, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11242718994617462, + "step": 815, + "valid_targets_mean": 4306.9, + "valid_targets_min": 995 + }, + { + "epoch": 0.8622502628811777, + "grad_norm": 0.3413807088839418, + "learning_rate": 3.993566445381641e-05, + "loss": 0.1077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10108646750450134, + "step": 820, + "valid_targets_mean": 4484.6, + "valid_targets_min": 949 + }, + { + "epoch": 0.8675078864353313, + "grad_norm": 0.4415224606461962, + "learning_rate": 3.9931393252992454e-05, + "loss": 0.1396, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1572403758764267, + "step": 825, + "valid_targets_mean": 4246.4, + "valid_targets_min": 2724 + }, + { + "epoch": 0.8727655099894848, + "grad_norm": 0.3995550173486753, + "learning_rate": 3.992698503388453e-05, + "loss": 0.1173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10369531810283661, + "step": 830, + "valid_targets_mean": 2882.8, + "valid_targets_min": 424 + }, + { + "epoch": 0.8780231335436383, + "grad_norm": 0.41211727578638874, + "learning_rate": 3.992243982679691e-05, + "loss": 0.1599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14552940428256989, + "step": 835, + "valid_targets_mean": 2827.2, + "valid_targets_min": 531 + }, + { + "epoch": 0.8832807570977917, + "grad_norm": 0.3939868535260792, + "learning_rate": 3.991775766297562e-05, + "loss": 0.1485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10490452498197556, + "step": 840, + "valid_targets_mean": 3046.0, + "valid_targets_min": 855 + }, + { + "epoch": 0.8885383806519453, + "grad_norm": 0.44835264512341344, + "learning_rate": 3.991293857460815e-05, + "loss": 0.126, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20958760380744934, + "step": 845, + "valid_targets_mean": 2666.1, + "valid_targets_min": 722 + }, + { + "epoch": 0.8937960042060988, + "grad_norm": 0.3936824512794017, + "learning_rate": 3.9907982594823326e-05, + "loss": 0.1404, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12392207235097885, + "step": 850, + "valid_targets_mean": 3699.2, + "valid_targets_min": 1251 + }, + { + "epoch": 0.8990536277602523, + "grad_norm": 0.41845825991827296, + "learning_rate": 3.9902889757691e-05, + "loss": 0.1274, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12586721777915955, + "step": 855, + "valid_targets_mean": 2607.3, + "valid_targets_min": 632 + }, + { + "epoch": 0.9043112513144059, + "grad_norm": 0.5454890157347261, + "learning_rate": 3.9897660098221866e-05, + "loss": 0.1373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14049319922924042, + "step": 860, + "valid_targets_mean": 1881.4, + "valid_targets_min": 595 + }, + { + "epoch": 0.9095688748685594, + "grad_norm": 0.6780503481583131, + "learning_rate": 3.98922936523672e-05, + "loss": 0.1645, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21052402257919312, + "step": 865, + "valid_targets_mean": 1679.2, + "valid_targets_min": 520 + }, + { + "epoch": 0.9148264984227129, + "grad_norm": 0.3689833882584315, + "learning_rate": 3.9886790457018604e-05, + "loss": 0.1694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11735684424638748, + "step": 870, + "valid_targets_mean": 3019.8, + "valid_targets_min": 593 + }, + { + "epoch": 0.9200841219768665, + "grad_norm": 0.37814435403471575, + "learning_rate": 3.9881150550007776e-05, + "loss": 0.1383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10002461820840836, + "step": 875, + "valid_targets_mean": 3745.3, + "valid_targets_min": 1041 + }, + { + "epoch": 0.92534174553102, + "grad_norm": 0.4516740938999742, + "learning_rate": 3.987537397010624e-05, + "loss": 0.1413, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13232296705245972, + "step": 880, + "valid_targets_mean": 3255.9, + "valid_targets_min": 676 + }, + { + "epoch": 0.9305993690851735, + "grad_norm": 0.47744791805814685, + "learning_rate": 3.9869460757025064e-05, + "loss": 0.1253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13045115768909454, + "step": 885, + "valid_targets_mean": 2534.0, + "valid_targets_min": 616 + }, + { + "epoch": 0.935856992639327, + "grad_norm": 0.3906362600554991, + "learning_rate": 3.9863410951414616e-05, + "loss": 0.1256, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11122287809848785, + "step": 890, + "valid_targets_mean": 2999.9, + "valid_targets_min": 541 + }, + { + "epoch": 0.9411146161934806, + "grad_norm": 0.8413867252612346, + "learning_rate": 3.985722459486425e-05, + "loss": 0.1259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1752592921257019, + "step": 895, + "valid_targets_mean": 1254.4, + "valid_targets_min": 598 + }, + { + "epoch": 0.9463722397476341, + "grad_norm": 0.4227153957069782, + "learning_rate": 3.985090172990206e-05, + "loss": 0.1517, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1223062127828598, + "step": 900, + "valid_targets_mean": 3019.1, + "valid_targets_min": 950 + }, + { + "epoch": 0.9516298633017876, + "grad_norm": 0.4870668105829589, + "learning_rate": 3.984444239999455e-05, + "loss": 0.1169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14176815748214722, + "step": 905, + "valid_targets_mean": 2472.1, + "valid_targets_min": 680 + }, + { + "epoch": 0.9568874868559412, + "grad_norm": 0.4285690495389545, + "learning_rate": 3.9837846649546354e-05, + "loss": 0.124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1291240155696869, + "step": 910, + "valid_targets_mean": 3184.0, + "valid_targets_min": 774 + }, + { + "epoch": 0.9621451104100947, + "grad_norm": 0.37519230718292484, + "learning_rate": 3.9831114523899945e-05, + "loss": 0.1406, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12980321049690247, + "step": 915, + "valid_targets_mean": 4097.8, + "valid_targets_min": 1474 + }, + { + "epoch": 0.9674027339642481, + "grad_norm": 0.27810477043487897, + "learning_rate": 3.982424606933529e-05, + "loss": 0.1106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09453219175338745, + "step": 920, + "valid_targets_mean": 5379.9, + "valid_targets_min": 3677 + }, + { + "epoch": 0.9726603575184016, + "grad_norm": 0.3434991396480145, + "learning_rate": 3.981724133306954e-05, + "loss": 0.1193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11569681763648987, + "step": 925, + "valid_targets_mean": 3455.8, + "valid_targets_min": 675 + }, + { + "epoch": 0.9779179810725552, + "grad_norm": 0.37513775382439024, + "learning_rate": 3.981010036325674e-05, + "loss": 0.1248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12736296653747559, + "step": 930, + "valid_targets_mean": 3888.9, + "valid_targets_min": 795 + }, + { + "epoch": 0.9831756046267087, + "grad_norm": 0.465838240040823, + "learning_rate": 3.980282320898746e-05, + "loss": 0.237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2295304238796234, + "step": 935, + "valid_targets_mean": 2937.6, + "valid_targets_min": 605 + }, + { + "epoch": 0.9884332281808622, + "grad_norm": 0.40326534583786583, + "learning_rate": 3.9795409920288456e-05, + "loss": 0.1155, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11375357955694199, + "step": 940, + "valid_targets_mean": 3050.1, + "valid_targets_min": 941 + }, + { + "epoch": 0.9936908517350158, + "grad_norm": 0.409678281306497, + "learning_rate": 3.978786054812236e-05, + "loss": 0.1349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12136652320623398, + "step": 945, + "valid_targets_mean": 2817.5, + "valid_targets_min": 878 + }, + { + "epoch": 0.9989484752891693, + "grad_norm": 0.5472768786848023, + "learning_rate": 3.9780175144387304e-05, + "loss": 0.1584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.258529394865036, + "step": 950, + "valid_targets_mean": 2429.1, + "valid_targets_min": 955 + }, + { + "epoch": 1.0042060988433228, + "grad_norm": 0.8869798751645458, + "learning_rate": 3.977235376191656e-05, + "loss": 0.262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2949359714984894, + "step": 955, + "valid_targets_mean": 1576.1, + "valid_targets_min": 722 + }, + { + "epoch": 1.0094637223974763, + "grad_norm": 0.878915468864671, + "learning_rate": 3.9764396454478195e-05, + "loss": 0.258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25835198163986206, + "step": 960, + "valid_targets_mean": 1537.1, + "valid_targets_min": 725 + }, + { + "epoch": 1.0147213459516298, + "grad_norm": 0.841572126611415, + "learning_rate": 3.975630327677468e-05, + "loss": 0.2547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26211017370224, + "step": 965, + "valid_targets_mean": 1576.7, + "valid_targets_min": 874 + }, + { + "epoch": 1.0199789695057835, + "grad_norm": 0.9031581455432894, + "learning_rate": 3.974807428444254e-05, + "loss": 0.2499, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25593310594558716, + "step": 970, + "valid_targets_mean": 1409.7, + "valid_targets_min": 615 + }, + { + "epoch": 1.025236593059937, + "grad_norm": 0.7879468893368036, + "learning_rate": 3.973970953405195e-05, + "loss": 0.2455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2508862018585205, + "step": 975, + "valid_targets_mean": 1581.3, + "valid_targets_min": 715 + }, + { + "epoch": 1.0304942166140905, + "grad_norm": 0.823604647249457, + "learning_rate": 3.9731209083106354e-05, + "loss": 0.2525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29526305198669434, + "step": 980, + "valid_targets_mean": 1765.4, + "valid_targets_min": 625 + }, + { + "epoch": 1.035751840168244, + "grad_norm": 0.788289075689827, + "learning_rate": 3.972257299004206e-05, + "loss": 0.237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23224273324012756, + "step": 985, + "valid_targets_mean": 1440.2, + "valid_targets_min": 761 + }, + { + "epoch": 1.0410094637223974, + "grad_norm": 0.7228954505128617, + "learning_rate": 3.9713801314227867e-05, + "loss": 0.2523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32342734932899475, + "step": 990, + "valid_targets_mean": 1589.9, + "valid_targets_min": 703 + }, + { + "epoch": 1.046267087276551, + "grad_norm": 0.8104565386470577, + "learning_rate": 3.9704894115964615e-05, + "loss": 0.2396, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25293219089508057, + "step": 995, + "valid_targets_mean": 1544.3, + "valid_targets_min": 733 + }, + { + "epoch": 1.0515247108307044, + "grad_norm": 0.7039867278511459, + "learning_rate": 3.9695851456484805e-05, + "loss": 0.2427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25906291604042053, + "step": 1000, + "valid_targets_mean": 1915.3, + "valid_targets_min": 964 + }, + { + "epoch": 1.0567823343848581, + "grad_norm": 0.8153535915545935, + "learning_rate": 3.968667339795218e-05, + "loss": 0.231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24466818571090698, + "step": 1005, + "valid_targets_mean": 1516.1, + "valid_targets_min": 815 + }, + { + "epoch": 1.0620399579390116, + "grad_norm": 0.7463168942397396, + "learning_rate": 3.9677360003461246e-05, + "loss": 0.2492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24177688360214233, + "step": 1010, + "valid_targets_mean": 1713.7, + "valid_targets_min": 1027 + }, + { + "epoch": 1.0672975814931651, + "grad_norm": 0.7794388914992689, + "learning_rate": 3.966791133703691e-05, + "loss": 0.2329, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.230307936668396, + "step": 1015, + "valid_targets_mean": 1325.3, + "valid_targets_min": 570 + }, + { + "epoch": 1.0725552050473186, + "grad_norm": 0.9178047961430227, + "learning_rate": 3.965832746363397e-05, + "loss": 0.2243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23218250274658203, + "step": 1020, + "valid_targets_mean": 1349.9, + "valid_targets_min": 548 + }, + { + "epoch": 1.077812828601472, + "grad_norm": 0.7978419498391828, + "learning_rate": 3.964860844913676e-05, + "loss": 0.2219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2218141257762909, + "step": 1025, + "valid_targets_mean": 1355.5, + "valid_targets_min": 682 + }, + { + "epoch": 1.0830704521556256, + "grad_norm": 0.7540474715565492, + "learning_rate": 3.9638754360358585e-05, + "loss": 0.2361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23646913468837738, + "step": 1030, + "valid_targets_mean": 1716.7, + "valid_targets_min": 911 + }, + { + "epoch": 1.088328075709779, + "grad_norm": 1.1981264195677659, + "learning_rate": 3.962876526504134e-05, + "loss": 0.2451, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23529523611068726, + "step": 1035, + "valid_targets_mean": 1278.3, + "valid_targets_min": 597 + }, + { + "epoch": 1.0935856992639328, + "grad_norm": 0.8387090304069986, + "learning_rate": 3.961864123185502e-05, + "loss": 0.2426, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2465025931596756, + "step": 1040, + "valid_targets_mean": 1576.7, + "valid_targets_min": 772 + }, + { + "epoch": 1.0988433228180863, + "grad_norm": 0.8455985020405359, + "learning_rate": 3.9608382330397265e-05, + "loss": 0.2355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23955386877059937, + "step": 1045, + "valid_targets_mean": 1733.6, + "valid_targets_min": 940 + }, + { + "epoch": 1.1041009463722398, + "grad_norm": 0.7676754925673644, + "learning_rate": 3.959798863119284e-05, + "loss": 0.227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20279844105243683, + "step": 1050, + "valid_targets_mean": 1361.2, + "valid_targets_min": 785 + }, + { + "epoch": 1.1093585699263933, + "grad_norm": 0.7740129324176807, + "learning_rate": 3.9587460205693194e-05, + "loss": 0.2387, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2391810417175293, + "step": 1055, + "valid_targets_mean": 1457.7, + "valid_targets_min": 617 + }, + { + "epoch": 1.1146161934805467, + "grad_norm": 0.88745715053675, + "learning_rate": 3.9576797126275945e-05, + "loss": 0.2443, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24747875332832336, + "step": 1060, + "valid_targets_mean": 1281.0, + "valid_targets_min": 648 + }, + { + "epoch": 1.1198738170347002, + "grad_norm": 0.7629569780602973, + "learning_rate": 3.9565999466244384e-05, + "loss": 0.2286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22490420937538147, + "step": 1065, + "valid_targets_mean": 1466.2, + "valid_targets_min": 634 + }, + { + "epoch": 1.125131440588854, + "grad_norm": 0.7914740246261301, + "learning_rate": 3.955506729982699e-05, + "loss": 0.2265, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23777303099632263, + "step": 1070, + "valid_targets_mean": 1515.4, + "valid_targets_min": 774 + }, + { + "epoch": 1.1303890641430074, + "grad_norm": 0.6700069466657289, + "learning_rate": 3.9544000702176896e-05, + "loss": 0.2243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23064503073692322, + "step": 1075, + "valid_targets_mean": 1811.3, + "valid_targets_min": 563 + }, + { + "epoch": 1.135646687697161, + "grad_norm": 0.8263907754698312, + "learning_rate": 3.953279974937139e-05, + "loss": 0.2239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23290985822677612, + "step": 1080, + "valid_targets_mean": 1495.8, + "valid_targets_min": 837 + }, + { + "epoch": 1.1409043112513144, + "grad_norm": 0.7433952417861344, + "learning_rate": 3.9521464518411356e-05, + "loss": 0.2259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22422298789024353, + "step": 1085, + "valid_targets_mean": 1653.8, + "valid_targets_min": 803 + }, + { + "epoch": 1.146161934805468, + "grad_norm": 0.7292712211069998, + "learning_rate": 3.950999508722082e-05, + "loss": 0.2309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19926020503044128, + "step": 1090, + "valid_targets_mean": 1380.4, + "valid_targets_min": 797 + }, + { + "epoch": 1.1514195583596214, + "grad_norm": 0.7409594041378071, + "learning_rate": 3.9498391534646325e-05, + "loss": 0.2193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21704162657260895, + "step": 1095, + "valid_targets_mean": 1600.2, + "valid_targets_min": 646 + }, + { + "epoch": 1.1566771819137749, + "grad_norm": 0.8349230880619002, + "learning_rate": 3.948665394045646e-05, + "loss": 0.2294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23841853439807892, + "step": 1100, + "valid_targets_mean": 1447.8, + "valid_targets_min": 591 + }, + { + "epoch": 1.1619348054679284, + "grad_norm": 0.7517080365308488, + "learning_rate": 3.9474782385341255e-05, + "loss": 0.2306, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2518262267112732, + "step": 1105, + "valid_targets_mean": 1590.8, + "valid_targets_min": 796 + }, + { + "epoch": 1.167192429022082, + "grad_norm": 0.7595939144991899, + "learning_rate": 3.9462776950911684e-05, + "loss": 0.2241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22226208448410034, + "step": 1110, + "valid_targets_mean": 1486.0, + "valid_targets_min": 848 + }, + { + "epoch": 1.1724500525762356, + "grad_norm": 0.81977601249161, + "learning_rate": 3.9450637719699046e-05, + "loss": 0.2177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20779165625572205, + "step": 1115, + "valid_targets_mean": 1233.6, + "valid_targets_min": 665 + }, + { + "epoch": 1.177707676130389, + "grad_norm": 0.7504295904684078, + "learning_rate": 3.9438364775154436e-05, + "loss": 0.2177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20952507853507996, + "step": 1120, + "valid_targets_mean": 1408.1, + "valid_targets_min": 652 + }, + { + "epoch": 1.1829652996845426, + "grad_norm": 0.7313239983187266, + "learning_rate": 3.942595820164818e-05, + "loss": 0.2171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2542307674884796, + "step": 1125, + "valid_targets_mean": 1747.6, + "valid_targets_min": 662 + }, + { + "epoch": 1.188222923238696, + "grad_norm": 0.7761150837666538, + "learning_rate": 3.94134180844692e-05, + "loss": 0.2222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2359396070241928, + "step": 1130, + "valid_targets_mean": 1415.1, + "valid_targets_min": 746 + }, + { + "epoch": 1.1934805467928495, + "grad_norm": 0.8509889976913751, + "learning_rate": 3.940074450982449e-05, + "loss": 0.2186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22314172983169556, + "step": 1135, + "valid_targets_mean": 1241.3, + "valid_targets_min": 794 + }, + { + "epoch": 1.1987381703470033, + "grad_norm": 0.7953065055183305, + "learning_rate": 3.93879375648385e-05, + "loss": 0.2212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2226504534482956, + "step": 1140, + "valid_targets_mean": 1566.1, + "valid_targets_min": 773 + }, + { + "epoch": 1.2039957939011567, + "grad_norm": 0.7779219592633028, + "learning_rate": 3.9374997337552496e-05, + "loss": 0.2237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2328607589006424, + "step": 1145, + "valid_targets_mean": 1655.8, + "valid_targets_min": 741 + }, + { + "epoch": 1.2092534174553102, + "grad_norm": 0.9610600652279467, + "learning_rate": 3.936192391692404e-05, + "loss": 0.2267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20716674625873566, + "step": 1150, + "valid_targets_mean": 1325.1, + "valid_targets_min": 801 + }, + { + "epoch": 1.2145110410094637, + "grad_norm": 0.7570031091587086, + "learning_rate": 3.9348717392826306e-05, + "loss": 0.2148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24780115485191345, + "step": 1155, + "valid_targets_mean": 1859.9, + "valid_targets_min": 634 + }, + { + "epoch": 1.2197686645636172, + "grad_norm": 0.7619735134338017, + "learning_rate": 3.933537785604748e-05, + "loss": 0.2335, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24546000361442566, + "step": 1160, + "valid_targets_mean": 1715.2, + "valid_targets_min": 690 + }, + { + "epoch": 1.2250262881177707, + "grad_norm": 0.7201024650567306, + "learning_rate": 3.932190539829018e-05, + "loss": 0.2184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22544372081756592, + "step": 1165, + "valid_targets_mean": 1776.1, + "valid_targets_min": 718 + }, + { + "epoch": 1.2302839116719242, + "grad_norm": 0.7263537229376624, + "learning_rate": 3.9308300112170735e-05, + "loss": 0.2337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19575223326683044, + "step": 1170, + "valid_targets_mean": 1405.6, + "valid_targets_min": 685 + }, + { + "epoch": 1.235541535226078, + "grad_norm": 0.8579122640367931, + "learning_rate": 3.929456209121865e-05, + "loss": 0.2159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21605601906776428, + "step": 1175, + "valid_targets_mean": 1445.6, + "valid_targets_min": 744 + }, + { + "epoch": 1.2407991587802314, + "grad_norm": 0.8132894541239973, + "learning_rate": 3.928069142987589e-05, + "loss": 0.2225, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2238626778125763, + "step": 1180, + "valid_targets_mean": 1320.9, + "valid_targets_min": 650 + }, + { + "epoch": 1.2460567823343849, + "grad_norm": 0.8264251778813105, + "learning_rate": 3.926668822349625e-05, + "loss": 0.2242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23769277334213257, + "step": 1185, + "valid_targets_mean": 1461.5, + "valid_targets_min": 677 + }, + { + "epoch": 1.2513144058885384, + "grad_norm": 0.8544823460426906, + "learning_rate": 3.925255256834474e-05, + "loss": 0.209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21869561076164246, + "step": 1190, + "valid_targets_mean": 1330.2, + "valid_targets_min": 808 + }, + { + "epoch": 1.2565720294426919, + "grad_norm": 0.8468093850705543, + "learning_rate": 3.923828456159685e-05, + "loss": 0.214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21936550736427307, + "step": 1195, + "valid_targets_mean": 1413.0, + "valid_targets_min": 727 + }, + { + "epoch": 1.2618296529968454, + "grad_norm": 0.7583120584384649, + "learning_rate": 3.922388430133793e-05, + "loss": 0.2192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21635094285011292, + "step": 1200, + "valid_targets_mean": 1483.2, + "valid_targets_min": 907 + }, + { + "epoch": 1.267087276550999, + "grad_norm": 0.8225959765510902, + "learning_rate": 3.9209351886562535e-05, + "loss": 0.2232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23493990302085876, + "step": 1205, + "valid_targets_mean": 1516.4, + "valid_targets_min": 669 + }, + { + "epoch": 1.2723449001051526, + "grad_norm": 0.7504836784879231, + "learning_rate": 3.919468741717367e-05, + "loss": 0.218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20039993524551392, + "step": 1210, + "valid_targets_mean": 1541.4, + "valid_targets_min": 750 + }, + { + "epoch": 1.277602523659306, + "grad_norm": 0.8511947034089659, + "learning_rate": 3.9179890993982186e-05, + "loss": 0.2142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2272759974002838, + "step": 1215, + "valid_targets_mean": 1610.9, + "valid_targets_min": 736 + }, + { + "epoch": 1.2828601472134595, + "grad_norm": 0.832455549139663, + "learning_rate": 3.916496271870603e-05, + "loss": 0.2199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24332864582538605, + "step": 1220, + "valid_targets_mean": 1571.8, + "valid_targets_min": 694 + }, + { + "epoch": 1.288117770767613, + "grad_norm": 0.7044386170169681, + "learning_rate": 3.914990269396957e-05, + "loss": 0.2139, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20751051604747772, + "step": 1225, + "valid_targets_mean": 1560.8, + "valid_targets_min": 806 + }, + { + "epoch": 1.2933753943217665, + "grad_norm": 0.8464901864200083, + "learning_rate": 3.913471102330288e-05, + "loss": 0.2105, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21428729593753815, + "step": 1230, + "valid_targets_mean": 1341.3, + "valid_targets_min": 797 + }, + { + "epoch": 1.29863301787592, + "grad_norm": 0.7188817616548374, + "learning_rate": 3.911938781114105e-05, + "loss": 0.2149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19710156321525574, + "step": 1235, + "valid_targets_mean": 1448.0, + "valid_targets_min": 698 + }, + { + "epoch": 1.3038906414300735, + "grad_norm": 0.8802628923750944, + "learning_rate": 3.910393316282345e-05, + "loss": 0.2012, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20695164799690247, + "step": 1240, + "valid_targets_mean": 1379.1, + "valid_targets_min": 712 + }, + { + "epoch": 1.3091482649842272, + "grad_norm": 0.7828172742417011, + "learning_rate": 3.9088347184592974e-05, + "loss": 0.2148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2368948757648468, + "step": 1245, + "valid_targets_mean": 1699.4, + "valid_targets_min": 605 + }, + { + "epoch": 1.3144058885383807, + "grad_norm": 0.7538652735215456, + "learning_rate": 3.907262998359539e-05, + "loss": 0.2115, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2101295441389084, + "step": 1250, + "valid_targets_mean": 1718.9, + "valid_targets_min": 907 + }, + { + "epoch": 1.3196635120925342, + "grad_norm": 0.7619261257748762, + "learning_rate": 3.905678166787852e-05, + "loss": 0.2065, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22127626836299896, + "step": 1255, + "valid_targets_mean": 1481.1, + "valid_targets_min": 723 + }, + { + "epoch": 1.3249211356466877, + "grad_norm": 0.7763306255618148, + "learning_rate": 3.9040802346391555e-05, + "loss": 0.2153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2114461064338684, + "step": 1260, + "valid_targets_mean": 1562.9, + "valid_targets_min": 855 + }, + { + "epoch": 1.3301787592008412, + "grad_norm": 0.7539205539627757, + "learning_rate": 3.902469212898427e-05, + "loss": 0.2145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2315191626548767, + "step": 1265, + "valid_targets_mean": 1583.2, + "valid_targets_min": 700 + }, + { + "epoch": 1.3354363827549949, + "grad_norm": 0.6737336971557036, + "learning_rate": 3.900845112640631e-05, + "loss": 0.2134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20278924703598022, + "step": 1270, + "valid_targets_mean": 1613.6, + "valid_targets_min": 798 + }, + { + "epoch": 1.3406940063091484, + "grad_norm": 0.8831041851437693, + "learning_rate": 3.8992079450306355e-05, + "loss": 0.2175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22584810853004456, + "step": 1275, + "valid_targets_mean": 1279.4, + "valid_targets_min": 635 + }, + { + "epoch": 1.3459516298633019, + "grad_norm": 0.7802323334115124, + "learning_rate": 3.897557721323145e-05, + "loss": 0.2083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21834111213684082, + "step": 1280, + "valid_targets_mean": 1321.2, + "valid_targets_min": 244 + }, + { + "epoch": 1.3512092534174553, + "grad_norm": 0.8212012968134834, + "learning_rate": 3.895894452862614e-05, + "loss": 0.2242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22549492120742798, + "step": 1285, + "valid_targets_mean": 1293.1, + "valid_targets_min": 714 + }, + { + "epoch": 1.3564668769716088, + "grad_norm": 0.7917399078575781, + "learning_rate": 3.894218151083176e-05, + "loss": 0.2124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20146942138671875, + "step": 1290, + "valid_targets_mean": 1459.4, + "valid_targets_min": 585 + }, + { + "epoch": 1.3617245005257623, + "grad_norm": 0.7930453318217562, + "learning_rate": 3.892528827508562e-05, + "loss": 0.2299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23862096667289734, + "step": 1295, + "valid_targets_mean": 1683.5, + "valid_targets_min": 1048 + }, + { + "epoch": 1.3669821240799158, + "grad_norm": 0.6839803408958032, + "learning_rate": 3.890826493752018e-05, + "loss": 0.2106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19739103317260742, + "step": 1300, + "valid_targets_mean": 1473.7, + "valid_targets_min": 657 + }, + { + "epoch": 1.3722397476340693, + "grad_norm": 0.7396865012322326, + "learning_rate": 3.8891111615162314e-05, + "loss": 0.2175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19474495947360992, + "step": 1305, + "valid_targets_mean": 1436.2, + "valid_targets_min": 672 + }, + { + "epoch": 1.3774973711882228, + "grad_norm": 0.7133519470993522, + "learning_rate": 3.8873828425932486e-05, + "loss": 0.2099, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21923986077308655, + "step": 1310, + "valid_targets_mean": 1605.6, + "valid_targets_min": 691 + }, + { + "epoch": 1.3827549947423765, + "grad_norm": 0.9644653064060893, + "learning_rate": 3.8856415488643885e-05, + "loss": 0.2125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21021606028079987, + "step": 1315, + "valid_targets_mean": 1340.1, + "valid_targets_min": 732 + }, + { + "epoch": 1.38801261829653, + "grad_norm": 0.76524442768024, + "learning_rate": 3.88388729230017e-05, + "loss": 0.2165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2201576828956604, + "step": 1320, + "valid_targets_mean": 1546.2, + "valid_targets_min": 634 + }, + { + "epoch": 1.3932702418506835, + "grad_norm": 0.6325782587542048, + "learning_rate": 3.8821200849602215e-05, + "loss": 0.2074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2096240073442459, + "step": 1325, + "valid_targets_mean": 1788.0, + "valid_targets_min": 682 + }, + { + "epoch": 1.398527865404837, + "grad_norm": 0.7552381469693424, + "learning_rate": 3.880339938993204e-05, + "loss": 0.2046, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21048465371131897, + "step": 1330, + "valid_targets_mean": 1405.9, + "valid_targets_min": 596 + }, + { + "epoch": 1.4037854889589905, + "grad_norm": 0.9598153253569314, + "learning_rate": 3.878546866636724e-05, + "loss": 0.1995, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21250438690185547, + "step": 1335, + "valid_targets_mean": 1576.6, + "valid_targets_min": 687 + }, + { + "epoch": 1.4090431125131442, + "grad_norm": 0.7623950015764874, + "learning_rate": 3.876740880217248e-05, + "loss": 0.2218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20306065678596497, + "step": 1340, + "valid_targets_mean": 1605.6, + "valid_targets_min": 820 + }, + { + "epoch": 1.4143007360672977, + "grad_norm": 0.6868132325157823, + "learning_rate": 3.874921992150026e-05, + "loss": 0.2084, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20207521319389343, + "step": 1345, + "valid_targets_mean": 1734.6, + "valid_targets_min": 799 + }, + { + "epoch": 1.4195583596214512, + "grad_norm": 0.7260563750450043, + "learning_rate": 3.873090214938994e-05, + "loss": 0.201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21663132309913635, + "step": 1350, + "valid_targets_mean": 1587.2, + "valid_targets_min": 1047 + }, + { + "epoch": 1.4248159831756047, + "grad_norm": 0.7455453060468663, + "learning_rate": 3.871245561176698e-05, + "loss": 0.2009, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22021172940731049, + "step": 1355, + "valid_targets_mean": 1558.6, + "valid_targets_min": 741 + }, + { + "epoch": 1.4300736067297581, + "grad_norm": 0.7186863025208695, + "learning_rate": 3.869388043544204e-05, + "loss": 0.1913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20322775840759277, + "step": 1360, + "valid_targets_mean": 1574.2, + "valid_targets_min": 720 + }, + { + "epoch": 1.4353312302839116, + "grad_norm": 0.7749435575552408, + "learning_rate": 3.8675176748110076e-05, + "loss": 0.2093, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20808576047420502, + "step": 1365, + "valid_targets_mean": 1458.4, + "valid_targets_min": 935 + }, + { + "epoch": 1.4405888538380651, + "grad_norm": 0.7473803573920585, + "learning_rate": 3.865634467834953e-05, + "loss": 0.208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22573482990264893, + "step": 1370, + "valid_targets_mean": 1713.8, + "valid_targets_min": 684 + }, + { + "epoch": 1.4458464773922186, + "grad_norm": 0.8618415659674591, + "learning_rate": 3.863738435562139e-05, + "loss": 0.2063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1910347044467926, + "step": 1375, + "valid_targets_mean": 1463.6, + "valid_targets_min": 847 + }, + { + "epoch": 1.4511041009463723, + "grad_norm": 0.8170056621857298, + "learning_rate": 3.8618295910268316e-05, + "loss": 0.2082, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20702435076236725, + "step": 1380, + "valid_targets_mean": 1666.8, + "valid_targets_min": 1280 + }, + { + "epoch": 1.4563617245005258, + "grad_norm": 0.7023515013028406, + "learning_rate": 3.859907947351374e-05, + "loss": 0.2089, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21213456988334656, + "step": 1385, + "valid_targets_mean": 1611.5, + "valid_targets_min": 589 + }, + { + "epoch": 1.4616193480546793, + "grad_norm": 0.6669175601038381, + "learning_rate": 3.8579735177460994e-05, + "loss": 0.2149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2246095836162567, + "step": 1390, + "valid_targets_mean": 1812.6, + "valid_targets_min": 918 + }, + { + "epoch": 1.4668769716088328, + "grad_norm": 0.5769109450987918, + "learning_rate": 3.856026315509236e-05, + "loss": 0.1806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12236759811639786, + "step": 1395, + "valid_targets_mean": 3127.1, + "valid_targets_min": 543 + }, + { + "epoch": 1.4721345951629863, + "grad_norm": 0.38077630807651636, + "learning_rate": 3.8540663540268175e-05, + "loss": 0.1364, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11780562996864319, + "step": 1400, + "valid_targets_mean": 3549.7, + "valid_targets_min": 1024 + }, + { + "epoch": 1.4773922187171398, + "grad_norm": 0.35371933040544784, + "learning_rate": 3.852093646772592e-05, + "loss": 0.1175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0848422721028328, + "step": 1405, + "valid_targets_mean": 3607.6, + "valid_targets_min": 2777 + }, + { + "epoch": 1.4826498422712935, + "grad_norm": 0.4340521275453412, + "learning_rate": 3.850108207307927e-05, + "loss": 0.1378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14629028737545013, + "step": 1410, + "valid_targets_mean": 3433.5, + "valid_targets_min": 1095 + }, + { + "epoch": 1.487907465825447, + "grad_norm": 0.43194201643869023, + "learning_rate": 3.848110049281719e-05, + "loss": 0.1152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1387070119380951, + "step": 1415, + "valid_targets_mean": 2446.1, + "valid_targets_min": 560 + }, + { + "epoch": 1.4931650893796005, + "grad_norm": 0.4587451848968233, + "learning_rate": 3.846099186430297e-05, + "loss": 0.123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13479407131671906, + "step": 1420, + "valid_targets_mean": 2739.4, + "valid_targets_min": 663 + }, + { + "epoch": 1.498422712933754, + "grad_norm": 0.7533969406924347, + "learning_rate": 3.8440756325773296e-05, + "loss": 0.1913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19724354147911072, + "step": 1425, + "valid_targets_mean": 1537.3, + "valid_targets_min": 535 + }, + { + "epoch": 1.5036803364879074, + "grad_norm": 0.4415590187724359, + "learning_rate": 3.84203940163373e-05, + "loss": 0.1188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11975380778312683, + "step": 1430, + "valid_targets_mean": 3057.1, + "valid_targets_min": 811 + }, + { + "epoch": 1.508937960042061, + "grad_norm": 0.532020727469553, + "learning_rate": 3.83999050759756e-05, + "loss": 0.1426, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1732349991798401, + "step": 1435, + "valid_targets_mean": 2232.5, + "valid_targets_min": 883 + }, + { + "epoch": 1.5141955835962144, + "grad_norm": 0.2684342128074378, + "learning_rate": 3.837928964553933e-05, + "loss": 0.1438, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08347544074058533, + "step": 1440, + "valid_targets_mean": 5158.4, + "valid_targets_min": 3285 + }, + { + "epoch": 1.519453207150368, + "grad_norm": 0.5168820362972649, + "learning_rate": 3.835854786674918e-05, + "loss": 0.1117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09910877048969269, + "step": 1445, + "valid_targets_mean": 3384.3, + "valid_targets_min": 502 + }, + { + "epoch": 1.5247108307045214, + "grad_norm": 0.40504850236739504, + "learning_rate": 3.8337679882194443e-05, + "loss": 0.118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1263246089220047, + "step": 1450, + "valid_targets_mean": 3542.2, + "valid_targets_min": 773 + }, + { + "epoch": 1.5299684542586751, + "grad_norm": 0.5621595744994061, + "learning_rate": 3.8316685835331984e-05, + "loss": 0.1176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13412566483020782, + "step": 1455, + "valid_targets_mean": 1996.5, + "valid_targets_min": 492 + }, + { + "epoch": 1.5352260778128286, + "grad_norm": 0.3082212312592102, + "learning_rate": 3.8295565870485295e-05, + "loss": 0.1598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0963553711771965, + "step": 1460, + "valid_targets_mean": 4156.0, + "valid_targets_min": 1961 + }, + { + "epoch": 1.540483701366982, + "grad_norm": 0.33766667602063294, + "learning_rate": 3.827432013284349e-05, + "loss": 0.1087, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11290542781352997, + "step": 1465, + "valid_targets_mean": 3535.2, + "valid_targets_min": 1049 + }, + { + "epoch": 1.5457413249211358, + "grad_norm": 0.34872520309057936, + "learning_rate": 3.825294876846031e-05, + "loss": 0.1268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0966203510761261, + "step": 1470, + "valid_targets_mean": 2985.7, + "valid_targets_min": 841 + }, + { + "epoch": 1.5509989484752893, + "grad_norm": 0.367393613733167, + "learning_rate": 3.823145192425313e-05, + "loss": 0.1, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1047491729259491, + "step": 1475, + "valid_targets_mean": 3023.9, + "valid_targets_min": 720 + }, + { + "epoch": 1.5562565720294428, + "grad_norm": 0.6604084615314401, + "learning_rate": 3.8209829748001894e-05, + "loss": 0.1341, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13880418241024017, + "step": 1480, + "valid_targets_mean": 1272.2, + "valid_targets_min": 515 + }, + { + "epoch": 1.5615141955835963, + "grad_norm": 0.4529268402014723, + "learning_rate": 3.8188082388348186e-05, + "loss": 0.1313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11649923026561737, + "step": 1485, + "valid_targets_mean": 2375.1, + "valid_targets_min": 589 + }, + { + "epoch": 1.5667718191377498, + "grad_norm": 0.4727596426144311, + "learning_rate": 3.816620999479413e-05, + "loss": 0.1358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13613563776016235, + "step": 1490, + "valid_targets_mean": 3174.8, + "valid_targets_min": 1086 + }, + { + "epoch": 1.5720294426919033, + "grad_norm": 0.4114679133248355, + "learning_rate": 3.8144212717701424e-05, + "loss": 0.1369, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14915388822555542, + "step": 1495, + "valid_targets_mean": 2903.8, + "valid_targets_min": 511 + }, + { + "epoch": 1.5772870662460567, + "grad_norm": 0.4556729145812952, + "learning_rate": 3.812209070829025e-05, + "loss": 0.1754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12058291584253311, + "step": 1500, + "valid_targets_mean": 3491.4, + "valid_targets_min": 714 + }, + { + "epoch": 1.5825446898002102, + "grad_norm": 0.38678974999687793, + "learning_rate": 3.809984411863828e-05, + "loss": 0.109, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11663035303354263, + "step": 1505, + "valid_targets_mean": 3222.4, + "valid_targets_min": 669 + }, + { + "epoch": 1.5878023133543637, + "grad_norm": 0.6161590845052197, + "learning_rate": 3.80774731016796e-05, + "loss": 0.1376, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14975017309188843, + "step": 1510, + "valid_targets_mean": 1370.8, + "valid_targets_min": 598 + }, + { + "epoch": 1.5930599369085172, + "grad_norm": 0.5755669440762874, + "learning_rate": 3.805497781120369e-05, + "loss": 0.1563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16455599665641785, + "step": 1515, + "valid_targets_mean": 1732.1, + "valid_targets_min": 323 + }, + { + "epoch": 1.598317560462671, + "grad_norm": 0.34685314319658894, + "learning_rate": 3.8032358401854315e-05, + "loss": 0.1299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11952777206897736, + "step": 1520, + "valid_targets_mean": 3875.1, + "valid_targets_min": 2827 + }, + { + "epoch": 1.6035751840168244, + "grad_norm": 0.4769783021761849, + "learning_rate": 3.800961502912854e-05, + "loss": 0.2038, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30816492438316345, + "step": 1525, + "valid_targets_mean": 3032.8, + "valid_targets_min": 652 + }, + { + "epoch": 1.608832807570978, + "grad_norm": 0.4270619949322412, + "learning_rate": 3.798674784937557e-05, + "loss": 0.2697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11716529726982117, + "step": 1530, + "valid_targets_mean": 3728.1, + "valid_targets_min": 1932 + }, + { + "epoch": 1.6140904311251314, + "grad_norm": 0.5293141403191116, + "learning_rate": 3.7963757019795756e-05, + "loss": 0.1382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14936235547065735, + "step": 1535, + "valid_targets_mean": 2361.8, + "valid_targets_min": 820 + }, + { + "epoch": 1.619348054679285, + "grad_norm": 0.5621738528140149, + "learning_rate": 3.794064269843946e-05, + "loss": 0.1541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.263014554977417, + "step": 1540, + "valid_targets_mean": 2741.1, + "valid_targets_min": 727 + }, + { + "epoch": 1.6246056782334386, + "grad_norm": 0.3137718007315352, + "learning_rate": 3.791740504420599e-05, + "loss": 0.1986, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09868137538433075, + "step": 1545, + "valid_targets_mean": 4440.7, + "valid_targets_min": 636 + }, + { + "epoch": 1.629863301787592, + "grad_norm": 0.36903820234668044, + "learning_rate": 3.789404421684251e-05, + "loss": 0.1106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16915813088417053, + "step": 1550, + "valid_targets_mean": 4360.9, + "valid_targets_min": 1142 + }, + { + "epoch": 1.6351209253417456, + "grad_norm": 0.3761959595802902, + "learning_rate": 3.787056037694293e-05, + "loss": 0.123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13259699940681458, + "step": 1555, + "valid_targets_mean": 4034.8, + "valid_targets_min": 513 + }, + { + "epoch": 1.640378548895899, + "grad_norm": 0.3027133757228649, + "learning_rate": 3.784695368594682e-05, + "loss": 0.1213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08935047686100006, + "step": 1560, + "valid_targets_mean": 3534.8, + "valid_targets_min": 970 + }, + { + "epoch": 1.6456361724500526, + "grad_norm": 0.32760623697392594, + "learning_rate": 3.782322430613828e-05, + "loss": 0.1114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10545256733894348, + "step": 1565, + "valid_targets_mean": 3806.2, + "valid_targets_min": 748 + }, + { + "epoch": 1.650893796004206, + "grad_norm": 0.3884116537234466, + "learning_rate": 3.779937240064484e-05, + "loss": 0.1121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12552164494991302, + "step": 1570, + "valid_targets_mean": 3536.1, + "valid_targets_min": 1873 + }, + { + "epoch": 1.6561514195583595, + "grad_norm": 0.4868761468099754, + "learning_rate": 3.777539813343634e-05, + "loss": 0.1158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13782522082328796, + "step": 1575, + "valid_targets_mean": 2808.6, + "valid_targets_min": 1939 + }, + { + "epoch": 1.661409043112513, + "grad_norm": 0.4119111639303862, + "learning_rate": 3.7751301669323776e-05, + "loss": 0.0989, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1060827299952507, + "step": 1580, + "valid_targets_mean": 2556.4, + "valid_targets_min": 519 + }, + { + "epoch": 1.6666666666666665, + "grad_norm": 0.4784602722823778, + "learning_rate": 3.772708317395818e-05, + "loss": 0.1739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14642825722694397, + "step": 1585, + "valid_targets_mean": 2305.1, + "valid_targets_min": 645 + }, + { + "epoch": 1.6719242902208202, + "grad_norm": 0.39746257174216554, + "learning_rate": 3.770274281382952e-05, + "loss": 0.091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09272044897079468, + "step": 1590, + "valid_targets_mean": 2936.2, + "valid_targets_min": 472 + }, + { + "epoch": 1.6771819137749737, + "grad_norm": 0.3944235961948414, + "learning_rate": 3.767828075626551e-05, + "loss": 0.1056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09672108292579651, + "step": 1595, + "valid_targets_mean": 2775.9, + "valid_targets_min": 616 + }, + { + "epoch": 1.6824395373291272, + "grad_norm": 0.37014373898998776, + "learning_rate": 3.7653697169430456e-05, + "loss": 0.1, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10415723919868469, + "step": 1600, + "valid_targets_mean": 3847.4, + "valid_targets_min": 792 + }, + { + "epoch": 1.687697160883281, + "grad_norm": 0.4384129522035598, + "learning_rate": 3.762899222232413e-05, + "loss": 0.1216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18789973855018616, + "step": 1605, + "valid_targets_mean": 2382.8, + "valid_targets_min": 792 + }, + { + "epoch": 1.6929547844374344, + "grad_norm": 0.3349104796076761, + "learning_rate": 3.760416608478061e-05, + "loss": 0.1492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09439995139837265, + "step": 1610, + "valid_targets_mean": 3367.9, + "valid_targets_min": 729 + }, + { + "epoch": 1.698212407991588, + "grad_norm": 0.5162761958781051, + "learning_rate": 3.7579218927467044e-05, + "loss": 0.1422, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13445918262004852, + "step": 1615, + "valid_targets_mean": 1583.1, + "valid_targets_min": 612 + }, + { + "epoch": 1.7034700315457414, + "grad_norm": 0.6725188940138219, + "learning_rate": 3.7554150921882596e-05, + "loss": 0.1321, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.184066504240036, + "step": 1620, + "valid_targets_mean": 1594.0, + "valid_targets_min": 664 + }, + { + "epoch": 1.7087276550998949, + "grad_norm": 0.4638219043357721, + "learning_rate": 3.752896224035716e-05, + "loss": 0.1404, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14246805012226105, + "step": 1625, + "valid_targets_mean": 1982.2, + "valid_targets_min": 542 + }, + { + "epoch": 1.7139852786540484, + "grad_norm": 0.3837933566986813, + "learning_rate": 3.750365305605024e-05, + "loss": 0.1587, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10711023211479187, + "step": 1630, + "valid_targets_mean": 3892.2, + "valid_targets_min": 2440 + }, + { + "epoch": 1.7192429022082019, + "grad_norm": 0.4168973977243463, + "learning_rate": 3.7478223542949704e-05, + "loss": 0.1191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1168304830789566, + "step": 1635, + "valid_targets_mean": 2770.6, + "valid_targets_min": 629 + }, + { + "epoch": 1.7245005257623554, + "grad_norm": 0.4833222803482408, + "learning_rate": 3.745267387587065e-05, + "loss": 0.145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1150302141904831, + "step": 1640, + "valid_targets_mean": 3325.8, + "valid_targets_min": 755 + }, + { + "epoch": 1.7297581493165088, + "grad_norm": 0.42439987683019614, + "learning_rate": 3.742700423045416e-05, + "loss": 0.1284, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11056456714868546, + "step": 1645, + "valid_targets_mean": 1951.9, + "valid_targets_min": 485 + }, + { + "epoch": 1.7350157728706623, + "grad_norm": 0.45306162880433826, + "learning_rate": 3.7401214783166116e-05, + "loss": 0.1154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15018393099308014, + "step": 1650, + "valid_targets_mean": 3283.8, + "valid_targets_min": 605 + }, + { + "epoch": 1.7402733964248158, + "grad_norm": 0.31371898272324233, + "learning_rate": 3.737530571129596e-05, + "loss": 0.104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09127908945083618, + "step": 1655, + "valid_targets_mean": 4175.6, + "valid_targets_min": 848 + }, + { + "epoch": 1.7455310199789695, + "grad_norm": 0.32930513404044526, + "learning_rate": 3.734927719295551e-05, + "loss": 0.1063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09676264226436615, + "step": 1660, + "valid_targets_mean": 3676.2, + "valid_targets_min": 884 + }, + { + "epoch": 1.750788643533123, + "grad_norm": 0.34745180374694185, + "learning_rate": 3.732312940707772e-05, + "loss": 0.1079, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10899867117404938, + "step": 1665, + "valid_targets_mean": 3627.8, + "valid_targets_min": 905 + }, + { + "epoch": 1.7560462670872765, + "grad_norm": 0.36018071265294194, + "learning_rate": 3.729686253341543e-05, + "loss": 0.1234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1398889720439911, + "step": 1670, + "valid_targets_mean": 2844.2, + "valid_targets_min": 808 + }, + { + "epoch": 1.7613038906414302, + "grad_norm": 0.41573199528414245, + "learning_rate": 3.7270476752540163e-05, + "loss": 0.1224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12121666967868805, + "step": 1675, + "valid_targets_mean": 3541.2, + "valid_targets_min": 1814 + }, + { + "epoch": 1.7665615141955837, + "grad_norm": 0.45247583140007264, + "learning_rate": 3.724397224584086e-05, + "loss": 0.1303, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15463131666183472, + "step": 1680, + "valid_targets_mean": 2889.8, + "valid_targets_min": 931 + }, + { + "epoch": 1.7718191377497372, + "grad_norm": 0.3435107076206861, + "learning_rate": 3.7217349195522656e-05, + "loss": 0.097, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09973950684070587, + "step": 1685, + "valid_targets_mean": 3737.8, + "valid_targets_min": 2695 + }, + { + "epoch": 1.7770767613038907, + "grad_norm": 0.4872381123497153, + "learning_rate": 3.7190607784605604e-05, + "loss": 0.1191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1385570913553238, + "step": 1690, + "valid_targets_mean": 1918.2, + "valid_targets_min": 664 + }, + { + "epoch": 1.7823343848580442, + "grad_norm": 0.35320585242071134, + "learning_rate": 3.716374819692341e-05, + "loss": 0.1201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10413797199726105, + "step": 1695, + "valid_targets_mean": 3263.6, + "valid_targets_min": 1584 + }, + { + "epoch": 1.7875920084121977, + "grad_norm": 0.3438644233389937, + "learning_rate": 3.713677061712223e-05, + "loss": 0.0947, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10569068044424057, + "step": 1700, + "valid_targets_mean": 3247.5, + "valid_targets_min": 1003 + }, + { + "epoch": 1.7928496319663512, + "grad_norm": 0.36334760630074076, + "learning_rate": 3.7109675230659316e-05, + "loss": 0.122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10258142650127411, + "step": 1705, + "valid_targets_mean": 2887.5, + "valid_targets_min": 531 + }, + { + "epoch": 1.7981072555205047, + "grad_norm": 0.38849288393029024, + "learning_rate": 3.7082462223801784e-05, + "loss": 0.1173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10438039898872375, + "step": 1710, + "valid_targets_mean": 3249.1, + "valid_targets_min": 1003 + }, + { + "epoch": 1.8033648790746581, + "grad_norm": 0.4553296580473414, + "learning_rate": 3.7055131783625364e-05, + "loss": 0.1397, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11888349801301956, + "step": 1715, + "valid_targets_mean": 2108.2, + "valid_targets_min": 597 + }, + { + "epoch": 1.8086225026288116, + "grad_norm": 0.39704942824372713, + "learning_rate": 3.702768409801304e-05, + "loss": 0.1155, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10550612956285477, + "step": 1720, + "valid_targets_mean": 2457.1, + "valid_targets_min": 665 + }, + { + "epoch": 1.8138801261829653, + "grad_norm": 0.5298613398346509, + "learning_rate": 3.700011935565384e-05, + "loss": 0.1315, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1246199756860733, + "step": 1725, + "valid_targets_mean": 1751.3, + "valid_targets_min": 829 + }, + { + "epoch": 1.8191377497371188, + "grad_norm": 0.4953597273119919, + "learning_rate": 3.697243774604145e-05, + "loss": 0.1229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12704262137413025, + "step": 1730, + "valid_targets_mean": 1958.6, + "valid_targets_min": 650 + }, + { + "epoch": 1.8243953732912723, + "grad_norm": 0.45842367827210895, + "learning_rate": 3.6944639459473e-05, + "loss": 0.1248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12169969081878662, + "step": 1735, + "valid_targets_mean": 2192.6, + "valid_targets_min": 688 + }, + { + "epoch": 1.8296529968454258, + "grad_norm": 0.293515666490087, + "learning_rate": 3.69167246870477e-05, + "loss": 0.105, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07428356260061264, + "step": 1740, + "valid_targets_mean": 3516.8, + "valid_targets_min": 1836 + }, + { + "epoch": 1.8349106203995795, + "grad_norm": 0.3157149440919976, + "learning_rate": 3.6888693620665546e-05, + "loss": 0.1101, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07667044550180435, + "step": 1745, + "valid_targets_mean": 3454.2, + "valid_targets_min": 2543 + }, + { + "epoch": 1.840168243953733, + "grad_norm": 0.4153518110935024, + "learning_rate": 3.686054645302598e-05, + "loss": 0.142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11578220129013062, + "step": 1750, + "valid_targets_mean": 2119.8, + "valid_targets_min": 446 + }, + { + "epoch": 1.8454258675078865, + "grad_norm": 0.45207660433666463, + "learning_rate": 3.6832283377626603e-05, + "loss": 0.1021, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10898630321025848, + "step": 1755, + "valid_targets_mean": 2944.4, + "valid_targets_min": 681 + }, + { + "epoch": 1.85068349106204, + "grad_norm": 0.36201037251499957, + "learning_rate": 3.680390458876182e-05, + "loss": 0.1087, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12806929647922516, + "step": 1760, + "valid_targets_mean": 3749.1, + "valid_targets_min": 555 + }, + { + "epoch": 1.8559411146161935, + "grad_norm": 0.3132004258024364, + "learning_rate": 3.67754102815215e-05, + "loss": 0.1095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09528759121894836, + "step": 1765, + "valid_targets_mean": 3300.1, + "valid_targets_min": 484 + }, + { + "epoch": 1.861198738170347, + "grad_norm": 0.29773599774518333, + "learning_rate": 3.6746800651789636e-05, + "loss": 0.0937, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08347194641828537, + "step": 1770, + "valid_targets_mean": 4140.2, + "valid_targets_min": 861 + }, + { + "epoch": 1.8664563617245005, + "grad_norm": 0.4243554271166663, + "learning_rate": 3.671807589624302e-05, + "loss": 0.1095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1536114364862442, + "step": 1775, + "valid_targets_mean": 3947.6, + "valid_targets_min": 2020 + }, + { + "epoch": 1.871713985278654, + "grad_norm": 0.34987506489471215, + "learning_rate": 3.6689236212349865e-05, + "loss": 0.11, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11188700795173645, + "step": 1780, + "valid_targets_mean": 3724.9, + "valid_targets_min": 559 + }, + { + "epoch": 1.8769716088328074, + "grad_norm": 0.4332686685219863, + "learning_rate": 3.6660281798368485e-05, + "loss": 0.1323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1156931072473526, + "step": 1785, + "valid_targets_mean": 3279.9, + "valid_targets_min": 689 + }, + { + "epoch": 1.882229232386961, + "grad_norm": 0.4806651190458711, + "learning_rate": 3.663121285334586e-05, + "loss": 0.1361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24918416142463684, + "step": 1790, + "valid_targets_mean": 2807.4, + "valid_targets_min": 616 + }, + { + "epoch": 1.8874868559411146, + "grad_norm": 0.4214766257021576, + "learning_rate": 3.660202957711635e-05, + "loss": 0.0894, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11336281895637512, + "step": 1795, + "valid_targets_mean": 2360.1, + "valid_targets_min": 602 + }, + { + "epoch": 1.8927444794952681, + "grad_norm": 0.34895621187632125, + "learning_rate": 3.657273217030026e-05, + "loss": 0.1363, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11209186166524887, + "step": 1800, + "valid_targets_mean": 3388.2, + "valid_targets_min": 723 + }, + { + "epoch": 1.8980021030494216, + "grad_norm": 0.3762665012315893, + "learning_rate": 3.654332083430252e-05, + "loss": 0.1089, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11618360131978989, + "step": 1805, + "valid_targets_mean": 3025.7, + "valid_targets_min": 596 + }, + { + "epoch": 1.9032597266035753, + "grad_norm": 0.38840186150542616, + "learning_rate": 3.651379577131121e-05, + "loss": 0.115, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10022513568401337, + "step": 1810, + "valid_targets_mean": 3426.3, + "valid_targets_min": 743 + }, + { + "epoch": 1.9085173501577288, + "grad_norm": 0.4766747024321835, + "learning_rate": 3.648415718429629e-05, + "loss": 0.1294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14906629920005798, + "step": 1815, + "valid_targets_mean": 2339.8, + "valid_targets_min": 706 + }, + { + "epoch": 1.9137749737118823, + "grad_norm": 0.4337561229887625, + "learning_rate": 3.6454405277008087e-05, + "loss": 0.1635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1729247272014618, + "step": 1820, + "valid_targets_mean": 2480.9, + "valid_targets_min": 710 + }, + { + "epoch": 1.9190325972660358, + "grad_norm": 0.42775200454161955, + "learning_rate": 3.6424540253975985e-05, + "loss": 0.1227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10183486342430115, + "step": 1825, + "valid_targets_mean": 2543.4, + "valid_targets_min": 647 + }, + { + "epoch": 1.9242902208201893, + "grad_norm": 0.5433460772419119, + "learning_rate": 3.6394562320506955e-05, + "loss": 0.1162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1285993456840515, + "step": 1830, + "valid_targets_mean": 1684.0, + "valid_targets_min": 716 + }, + { + "epoch": 1.9295478443743428, + "grad_norm": 0.37589814485703194, + "learning_rate": 3.636447168268419e-05, + "loss": 0.1086, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11380413174629211, + "step": 1835, + "valid_targets_mean": 2971.1, + "valid_targets_min": 542 + }, + { + "epoch": 1.9348054679284963, + "grad_norm": 0.43694253835512376, + "learning_rate": 3.633426854736566e-05, + "loss": 0.1108, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1216128021478653, + "step": 1840, + "valid_targets_mean": 2640.1, + "valid_targets_min": 832 + }, + { + "epoch": 1.9400630914826498, + "grad_norm": 0.5351434455802149, + "learning_rate": 3.6303953122182695e-05, + "loss": 0.0971, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10800480842590332, + "step": 1845, + "valid_targets_mean": 2104.2, + "valid_targets_min": 635 + }, + { + "epoch": 1.9453207150368033, + "grad_norm": 0.40867408871935174, + "learning_rate": 3.6273525615538564e-05, + "loss": 0.1365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09918899834156036, + "step": 1850, + "valid_targets_mean": 2595.4, + "valid_targets_min": 756 + }, + { + "epoch": 1.9505783385909568, + "grad_norm": 0.4380264597384522, + "learning_rate": 3.6242986236607046e-05, + "loss": 0.0989, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10596967488527298, + "step": 1855, + "valid_targets_mean": 2202.8, + "valid_targets_min": 704 + }, + { + "epoch": 1.9558359621451105, + "grad_norm": 0.36890437184137304, + "learning_rate": 3.6212335195330976e-05, + "loss": 0.109, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09210898727178574, + "step": 1860, + "valid_targets_mean": 2810.8, + "valid_targets_min": 529 + }, + { + "epoch": 1.961093585699264, + "grad_norm": 0.4697428864373329, + "learning_rate": 3.618157270242082e-05, + "loss": 0.1203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13422542810440063, + "step": 1865, + "valid_targets_mean": 3550.4, + "valid_targets_min": 2017 + }, + { + "epoch": 1.9663512092534174, + "grad_norm": 0.259548983566761, + "learning_rate": 3.615069896935321e-05, + "loss": 0.1023, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08674701303243637, + "step": 1870, + "valid_targets_mean": 4882.1, + "valid_targets_min": 516 + }, + { + "epoch": 1.971608832807571, + "grad_norm": 0.32231575299830506, + "learning_rate": 3.6119714208369506e-05, + "loss": 0.0999, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10211837291717529, + "step": 1875, + "valid_targets_mean": 3580.6, + "valid_targets_min": 775 + }, + { + "epoch": 1.9768664563617246, + "grad_norm": 0.2994321725246202, + "learning_rate": 3.608861863247432e-05, + "loss": 0.1057, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09767977893352509, + "step": 1880, + "valid_targets_mean": 3815.4, + "valid_targets_min": 605 + }, + { + "epoch": 1.9821240799158781, + "grad_norm": 0.47456842792846704, + "learning_rate": 3.6057412455434075e-05, + "loss": 0.1915, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2623363733291626, + "step": 1885, + "valid_targets_mean": 2739.1, + "valid_targets_min": 1473 + }, + { + "epoch": 1.9873817034700316, + "grad_norm": 0.3347442976860358, + "learning_rate": 3.6026095891775494e-05, + "loss": 0.1217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10281935334205627, + "step": 1890, + "valid_targets_mean": 3168.9, + "valid_targets_min": 527 + }, + { + "epoch": 1.9926393270241851, + "grad_norm": 0.3646648981550504, + "learning_rate": 3.5994669156784184e-05, + "loss": 0.1161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11803923547267914, + "step": 1895, + "valid_targets_mean": 2708.9, + "valid_targets_min": 717 + }, + { + "epoch": 1.9978969505783386, + "grad_norm": 0.43282634780454476, + "learning_rate": 3.5963132466503107e-05, + "loss": 0.1142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1694352924823761, + "step": 1900, + "valid_targets_mean": 2495.1, + "valid_targets_min": 927 + }, + { + "epoch": 2.003154574132492, + "grad_norm": 0.7522957715698859, + "learning_rate": 3.593148603773111e-05, + "loss": 0.2091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25149911642074585, + "step": 1905, + "valid_targets_mean": 1611.4, + "valid_targets_min": 788 + }, + { + "epoch": 2.0084121976866456, + "grad_norm": 0.68414800058717, + "learning_rate": 3.5899730088021455e-05, + "loss": 0.214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19962722063064575, + "step": 1910, + "valid_targets_mean": 1591.2, + "valid_targets_min": 957 + }, + { + "epoch": 2.013669821240799, + "grad_norm": 0.6996957125797055, + "learning_rate": 3.586786483568028e-05, + "loss": 0.204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19607800245285034, + "step": 1915, + "valid_targets_mean": 1441.8, + "valid_targets_min": 796 + }, + { + "epoch": 2.0189274447949526, + "grad_norm": 0.7897949386423881, + "learning_rate": 3.583589049976514e-05, + "loss": 0.2023, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1937992125749588, + "step": 1920, + "valid_targets_mean": 1389.0, + "valid_targets_min": 704 + }, + { + "epoch": 2.024185068349106, + "grad_norm": 0.722154507796051, + "learning_rate": 3.580380730008348e-05, + "loss": 0.2009, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18644759058952332, + "step": 1925, + "valid_targets_mean": 1616.4, + "valid_targets_min": 638 + }, + { + "epoch": 2.0294426919032595, + "grad_norm": 0.6730997823927874, + "learning_rate": 3.577161545719113e-05, + "loss": 0.1992, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19347545504570007, + "step": 1930, + "valid_targets_mean": 1518.4, + "valid_targets_min": 727 + }, + { + "epoch": 2.034700315457413, + "grad_norm": 0.7160419813944304, + "learning_rate": 3.573931519239079e-05, + "loss": 0.2045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1721259206533432, + "step": 1935, + "valid_targets_mean": 1189.1, + "valid_targets_min": 655 + }, + { + "epoch": 2.039957939011567, + "grad_norm": 0.8271975666607679, + "learning_rate": 3.5706906727730496e-05, + "loss": 0.1926, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1924540102481842, + "step": 1940, + "valid_targets_mean": 1299.6, + "valid_targets_min": 714 + }, + { + "epoch": 2.0452155625657205, + "grad_norm": 0.7423576453869396, + "learning_rate": 3.567439028600211e-05, + "loss": 0.2106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19493412971496582, + "step": 1945, + "valid_targets_mean": 1416.0, + "valid_targets_min": 741 + }, + { + "epoch": 2.050473186119874, + "grad_norm": 0.6911407874363379, + "learning_rate": 3.564176609073979e-05, + "loss": 0.2001, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21674056351184845, + "step": 1950, + "valid_targets_mean": 1803.2, + "valid_targets_min": 663 + }, + { + "epoch": 2.0557308096740274, + "grad_norm": 0.6752145193438658, + "learning_rate": 3.5609034366218426e-05, + "loss": 0.1937, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19275152683258057, + "step": 1955, + "valid_targets_mean": 1608.1, + "valid_targets_min": 632 + }, + { + "epoch": 2.060988433228181, + "grad_norm": 2.530049303548015, + "learning_rate": 3.5576195337452146e-05, + "loss": 0.2066, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2066556066274643, + "step": 1960, + "valid_targets_mean": 1727.7, + "valid_targets_min": 933 + }, + { + "epoch": 2.0662460567823344, + "grad_norm": 0.6471764826910803, + "learning_rate": 3.55432492301927e-05, + "loss": 0.195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18466389179229736, + "step": 1965, + "valid_targets_mean": 1570.2, + "valid_targets_min": 717 + }, + { + "epoch": 2.071503680336488, + "grad_norm": 0.7097102879629508, + "learning_rate": 3.551019627092799e-05, + "loss": 0.1871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19337697327136993, + "step": 1970, + "valid_targets_mean": 1424.5, + "valid_targets_min": 659 + }, + { + "epoch": 2.0767613038906414, + "grad_norm": 0.6526169157837053, + "learning_rate": 3.547703668688044e-05, + "loss": 0.1843, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17414447665214539, + "step": 1975, + "valid_targets_mean": 1382.9, + "valid_targets_min": 642 + }, + { + "epoch": 2.082018927444795, + "grad_norm": 0.6878509073739828, + "learning_rate": 3.544377070600549e-05, + "loss": 0.194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17662809789180756, + "step": 1980, + "valid_targets_mean": 1406.4, + "valid_targets_min": 765 + }, + { + "epoch": 2.0872765509989484, + "grad_norm": 0.7004045433136924, + "learning_rate": 3.541039855699e-05, + "loss": 0.2055, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21233880519866943, + "step": 1985, + "valid_targets_mean": 1565.7, + "valid_targets_min": 631 + }, + { + "epoch": 2.092534174553102, + "grad_norm": 0.7746417992399471, + "learning_rate": 3.537692046925065e-05, + "loss": 0.2055, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21321943402290344, + "step": 1990, + "valid_targets_mean": 1478.0, + "valid_targets_min": 623 + }, + { + "epoch": 2.0977917981072554, + "grad_norm": 0.6885595749055048, + "learning_rate": 3.534333667293244e-05, + "loss": 0.1966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20462816953659058, + "step": 1995, + "valid_targets_mean": 1647.7, + "valid_targets_min": 818 + }, + { + "epoch": 2.103049421661409, + "grad_norm": 0.6525174503414687, + "learning_rate": 3.5309647398907056e-05, + "loss": 0.1966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18002676963806152, + "step": 2000, + "valid_targets_mean": 1647.3, + "valid_targets_min": 776 + }, + { + "epoch": 2.108307045215563, + "grad_norm": 0.7162817563018055, + "learning_rate": 3.527585287877125e-05, + "loss": 0.1944, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1998218595981598, + "step": 2005, + "valid_targets_mean": 1411.8, + "valid_targets_min": 623 + }, + { + "epoch": 2.1135646687697163, + "grad_norm": 0.7478725702392524, + "learning_rate": 3.5241953344845345e-05, + "loss": 0.2029, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2090776115655899, + "step": 2010, + "valid_targets_mean": 1483.4, + "valid_targets_min": 742 + }, + { + "epoch": 2.1188222923238698, + "grad_norm": 1.0183268180552594, + "learning_rate": 3.520794903017153e-05, + "loss": 0.1947, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17826226353645325, + "step": 2015, + "valid_targets_mean": 1242.0, + "valid_targets_min": 611 + }, + { + "epoch": 2.1240799158780233, + "grad_norm": 0.7146762033496259, + "learning_rate": 3.517384016851235e-05, + "loss": 0.1879, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19717749953269958, + "step": 2020, + "valid_targets_mean": 1514.0, + "valid_targets_min": 793 + }, + { + "epoch": 2.1293375394321767, + "grad_norm": 0.6660659625091833, + "learning_rate": 3.513962699434903e-05, + "loss": 0.1892, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1777816116809845, + "step": 2025, + "valid_targets_mean": 1553.8, + "valid_targets_min": 886 + }, + { + "epoch": 2.1345951629863302, + "grad_norm": 0.6750947443955108, + "learning_rate": 3.5105309742879894e-05, + "loss": 0.1872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17612925171852112, + "step": 2030, + "valid_targets_mean": 1525.9, + "valid_targets_min": 991 + }, + { + "epoch": 2.1398527865404837, + "grad_norm": 0.8047912256569687, + "learning_rate": 3.507088865001876e-05, + "loss": 0.1917, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19884192943572998, + "step": 2035, + "valid_targets_mean": 1614.5, + "valid_targets_min": 908 + }, + { + "epoch": 2.145110410094637, + "grad_norm": 0.726333342895076, + "learning_rate": 3.5036363952393296e-05, + "loss": 0.1996, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19761952757835388, + "step": 2040, + "valid_targets_mean": 1527.8, + "valid_targets_min": 680 + }, + { + "epoch": 2.1503680336487907, + "grad_norm": 0.6923696815562899, + "learning_rate": 3.500173588734339e-05, + "loss": 0.1809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1872178465127945, + "step": 2045, + "valid_targets_mean": 1494.4, + "valid_targets_min": 803 + }, + { + "epoch": 2.155625657202944, + "grad_norm": 0.7240265939827225, + "learning_rate": 3.4967004692919555e-05, + "loss": 0.1897, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18411380052566528, + "step": 2050, + "valid_targets_mean": 1455.6, + "valid_targets_min": 765 + }, + { + "epoch": 2.1608832807570977, + "grad_norm": 0.7671571892791755, + "learning_rate": 3.4932170607881226e-05, + "loss": 0.1923, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20216378569602966, + "step": 2055, + "valid_targets_mean": 1665.1, + "valid_targets_min": 804 + }, + { + "epoch": 2.166140904311251, + "grad_norm": 0.7262689528795186, + "learning_rate": 3.4897233871695205e-05, + "loss": 0.1936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.209253191947937, + "step": 2060, + "valid_targets_mean": 1509.7, + "valid_targets_min": 675 + }, + { + "epoch": 2.1713985278654047, + "grad_norm": 0.7438939712336335, + "learning_rate": 3.4862194724533934e-05, + "loss": 0.1865, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19082891941070557, + "step": 2065, + "valid_targets_mean": 1408.4, + "valid_targets_min": 882 + }, + { + "epoch": 2.176656151419558, + "grad_norm": 0.6782173450460893, + "learning_rate": 3.4827053407273894e-05, + "loss": 0.1825, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17597463726997375, + "step": 2070, + "valid_targets_mean": 1479.1, + "valid_targets_min": 660 + }, + { + "epoch": 2.181913774973712, + "grad_norm": 0.877909272775887, + "learning_rate": 3.4791810161493935e-05, + "loss": 0.1749, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17485372722148895, + "step": 2075, + "valid_targets_mean": 1459.5, + "valid_targets_min": 678 + }, + { + "epoch": 2.1871713985278656, + "grad_norm": 0.7142398254060238, + "learning_rate": 3.47564652294736e-05, + "loss": 0.1916, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19566529989242554, + "step": 2080, + "valid_targets_mean": 1485.9, + "valid_targets_min": 676 + }, + { + "epoch": 2.192429022082019, + "grad_norm": 0.6865584578057562, + "learning_rate": 3.472101885419149e-05, + "loss": 0.186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16726654767990112, + "step": 2085, + "valid_targets_mean": 1500.9, + "valid_targets_min": 849 + }, + { + "epoch": 2.1976866456361726, + "grad_norm": 0.7185195607043672, + "learning_rate": 3.468547127932358e-05, + "loss": 0.1867, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17954030632972717, + "step": 2090, + "valid_targets_mean": 1381.5, + "valid_targets_min": 607 + }, + { + "epoch": 2.202944269190326, + "grad_norm": 0.769727438652912, + "learning_rate": 3.4649822749241525e-05, + "loss": 0.1875, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18864957988262177, + "step": 2095, + "valid_targets_mean": 1446.8, + "valid_targets_min": 722 + }, + { + "epoch": 2.2082018927444795, + "grad_norm": 0.7577107328406806, + "learning_rate": 3.4614073509011e-05, + "loss": 0.2002, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2130323350429535, + "step": 2100, + "valid_targets_mean": 1633.3, + "valid_targets_min": 858 + }, + { + "epoch": 2.213459516298633, + "grad_norm": 0.8010918603868054, + "learning_rate": 3.4578223804390026e-05, + "loss": 0.1772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1817643791437149, + "step": 2105, + "valid_targets_mean": 1395.1, + "valid_targets_min": 689 + }, + { + "epoch": 2.2187171398527865, + "grad_norm": 0.7435288929665057, + "learning_rate": 3.454227388182725e-05, + "loss": 0.1985, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19536858797073364, + "step": 2110, + "valid_targets_mean": 1393.9, + "valid_targets_min": 692 + }, + { + "epoch": 2.22397476340694, + "grad_norm": 0.7034138762582146, + "learning_rate": 3.450622398846026e-05, + "loss": 0.1907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17521831393241882, + "step": 2115, + "valid_targets_mean": 1465.8, + "valid_targets_min": 828 + }, + { + "epoch": 2.2292323869610935, + "grad_norm": 0.7624198822734523, + "learning_rate": 3.447007437211392e-05, + "loss": 0.2048, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19831611216068268, + "step": 2120, + "valid_targets_mean": 1430.3, + "valid_targets_min": 618 + }, + { + "epoch": 2.234490010515247, + "grad_norm": 0.7792809407586418, + "learning_rate": 3.443382528129862e-05, + "loss": 0.1787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1771901547908783, + "step": 2125, + "valid_targets_mean": 1465.0, + "valid_targets_min": 629 + }, + { + "epoch": 2.2397476340694005, + "grad_norm": 0.7145972839785792, + "learning_rate": 3.4397476965208604e-05, + "loss": 0.1885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2003161609172821, + "step": 2130, + "valid_targets_mean": 1750.8, + "valid_targets_min": 982 + }, + { + "epoch": 2.245005257623554, + "grad_norm": 0.679727513368836, + "learning_rate": 3.43610296737202e-05, + "loss": 0.1893, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17647641897201538, + "step": 2135, + "valid_targets_mean": 1350.2, + "valid_targets_min": 699 + }, + { + "epoch": 2.250262881177708, + "grad_norm": 0.7168292262756297, + "learning_rate": 3.432448365739019e-05, + "loss": 0.1816, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17237195372581482, + "step": 2140, + "valid_targets_mean": 1412.2, + "valid_targets_min": 679 + }, + { + "epoch": 2.2555205047318614, + "grad_norm": 0.8046272311937696, + "learning_rate": 3.4287839167454016e-05, + "loss": 0.1822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17528027296066284, + "step": 2145, + "valid_targets_mean": 1399.9, + "valid_targets_min": 504 + }, + { + "epoch": 2.260778128286015, + "grad_norm": 0.6603447025307222, + "learning_rate": 3.4251096455824076e-05, + "loss": 0.1887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.184175044298172, + "step": 2150, + "valid_targets_mean": 1690.8, + "valid_targets_min": 1023 + }, + { + "epoch": 2.2660357518401684, + "grad_norm": 0.6434158822206664, + "learning_rate": 3.421425577508799e-05, + "loss": 0.1876, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1735583245754242, + "step": 2155, + "valid_targets_mean": 1568.6, + "valid_targets_min": 661 + }, + { + "epoch": 2.271293375394322, + "grad_norm": 0.8009726259772466, + "learning_rate": 3.417731737850687e-05, + "loss": 0.1921, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1895967721939087, + "step": 2160, + "valid_targets_mean": 1525.5, + "valid_targets_min": 889 + }, + { + "epoch": 2.2765509989484753, + "grad_norm": 0.643494307871804, + "learning_rate": 3.4140281520013595e-05, + "loss": 0.1785, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1769677698612213, + "step": 2165, + "valid_targets_mean": 1615.4, + "valid_targets_min": 723 + }, + { + "epoch": 2.281808622502629, + "grad_norm": 0.726869648079002, + "learning_rate": 3.4103148454211017e-05, + "loss": 0.1896, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17336110770702362, + "step": 2170, + "valid_targets_mean": 1416.5, + "valid_targets_min": 740 + }, + { + "epoch": 2.2870662460567823, + "grad_norm": 0.83309030190731, + "learning_rate": 3.4065918436370244e-05, + "loss": 0.1889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2031644731760025, + "step": 2175, + "valid_targets_mean": 1385.6, + "valid_targets_min": 797 + }, + { + "epoch": 2.292323869610936, + "grad_norm": 0.6900344242032486, + "learning_rate": 3.402859172242889e-05, + "loss": 0.18, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17275655269622803, + "step": 2180, + "valid_targets_mean": 1507.6, + "valid_targets_min": 752 + }, + { + "epoch": 2.2975814931650893, + "grad_norm": 0.7526050853659193, + "learning_rate": 3.399116856898931e-05, + "loss": 0.1876, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19981279969215393, + "step": 2185, + "valid_targets_mean": 1629.7, + "valid_targets_min": 789 + }, + { + "epoch": 2.302839116719243, + "grad_norm": 0.7564797889911168, + "learning_rate": 3.395364923331681e-05, + "loss": 0.171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17029231786727905, + "step": 2190, + "valid_targets_mean": 1373.2, + "valid_targets_min": 567 + }, + { + "epoch": 2.3080967402733963, + "grad_norm": 0.7394175133400129, + "learning_rate": 3.391603397333793e-05, + "loss": 0.1792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19023913145065308, + "step": 2195, + "valid_targets_mean": 1431.1, + "valid_targets_min": 639 + }, + { + "epoch": 2.3133543638275498, + "grad_norm": 0.6295547136967085, + "learning_rate": 3.387832304763861e-05, + "loss": 0.1888, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18990573287010193, + "step": 2200, + "valid_targets_mean": 1871.2, + "valid_targets_min": 746 + }, + { + "epoch": 2.3186119873817033, + "grad_norm": 0.7038322770500872, + "learning_rate": 3.384051671546247e-05, + "loss": 0.1755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.176752507686615, + "step": 2205, + "valid_targets_mean": 1237.9, + "valid_targets_min": 773 + }, + { + "epoch": 2.3238696109358568, + "grad_norm": 0.7286234859219742, + "learning_rate": 3.380261523670899e-05, + "loss": 0.1872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19026847183704376, + "step": 2210, + "valid_targets_mean": 1464.4, + "valid_targets_min": 679 + }, + { + "epoch": 2.3291272344900107, + "grad_norm": 0.6864763485278645, + "learning_rate": 3.376461887193173e-05, + "loss": 0.1828, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17656734585762024, + "step": 2215, + "valid_targets_mean": 1596.5, + "valid_targets_min": 818 + }, + { + "epoch": 2.334384858044164, + "grad_norm": 0.6830211289877853, + "learning_rate": 3.372652788233656e-05, + "loss": 0.1883, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18425202369689941, + "step": 2220, + "valid_targets_mean": 1417.5, + "valid_targets_min": 986 + }, + { + "epoch": 2.3396424815983177, + "grad_norm": 0.7745185199611311, + "learning_rate": 3.368834252977982e-05, + "loss": 0.1854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19619306921958923, + "step": 2225, + "valid_targets_mean": 1461.8, + "valid_targets_min": 773 + }, + { + "epoch": 2.344900105152471, + "grad_norm": 0.8146170127964989, + "learning_rate": 3.3650063076766586e-05, + "loss": 0.1815, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17531231045722961, + "step": 2230, + "valid_targets_mean": 1477.8, + "valid_targets_min": 789 + }, + { + "epoch": 2.3501577287066246, + "grad_norm": 0.6958921902069705, + "learning_rate": 3.3611689786448786e-05, + "loss": 0.1898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1900712549686432, + "step": 2235, + "valid_targets_mean": 1517.9, + "valid_targets_min": 655 + }, + { + "epoch": 2.355415352260778, + "grad_norm": 0.7717355000293836, + "learning_rate": 3.357322292262346e-05, + "loss": 0.1861, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2049972414970398, + "step": 2240, + "valid_targets_mean": 1540.2, + "valid_targets_min": 667 + }, + { + "epoch": 2.3606729758149316, + "grad_norm": 0.720791279524025, + "learning_rate": 3.353466274973092e-05, + "loss": 0.1929, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19426554441452026, + "step": 2245, + "valid_targets_mean": 1807.9, + "valid_targets_min": 1012 + }, + { + "epoch": 2.365930599369085, + "grad_norm": 0.7436225989082415, + "learning_rate": 3.3496009532852907e-05, + "loss": 0.1885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2064618468284607, + "step": 2250, + "valid_targets_mean": 1649.6, + "valid_targets_min": 684 + }, + { + "epoch": 2.3711882229232386, + "grad_norm": 0.7916354128987332, + "learning_rate": 3.345726353771082e-05, + "loss": 0.1872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18808495998382568, + "step": 2255, + "valid_targets_mean": 1682.4, + "valid_targets_min": 689 + }, + { + "epoch": 2.376445846477392, + "grad_norm": 0.7266901064565375, + "learning_rate": 3.341842503066384e-05, + "loss": 0.176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16265717148780823, + "step": 2260, + "valid_targets_mean": 1117.4, + "valid_targets_min": 620 + }, + { + "epoch": 2.3817034700315456, + "grad_norm": 0.721759727205511, + "learning_rate": 3.3379494278707136e-05, + "loss": 0.1839, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20615462958812714, + "step": 2265, + "valid_targets_mean": 1740.9, + "valid_targets_min": 656 + }, + { + "epoch": 2.386961093585699, + "grad_norm": 0.7544065247730406, + "learning_rate": 3.334047154947e-05, + "loss": 0.1845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21215087175369263, + "step": 2270, + "valid_targets_mean": 1843.3, + "valid_targets_min": 697 + }, + { + "epoch": 2.392218717139853, + "grad_norm": 0.7217184866490929, + "learning_rate": 3.330135711121404e-05, + "loss": 0.18, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18952813744544983, + "step": 2275, + "valid_targets_mean": 1573.6, + "valid_targets_min": 816 + }, + { + "epoch": 2.3974763406940065, + "grad_norm": 0.7785411563666498, + "learning_rate": 3.32621512328313e-05, + "loss": 0.1737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18069085478782654, + "step": 2280, + "valid_targets_mean": 1531.9, + "valid_targets_min": 959 + }, + { + "epoch": 2.40273396424816, + "grad_norm": 0.7234598318665647, + "learning_rate": 3.3222854183842434e-05, + "loss": 0.1725, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15989463031291962, + "step": 2285, + "valid_targets_mean": 1287.7, + "valid_targets_min": 747 + }, + { + "epoch": 2.4079915878023135, + "grad_norm": 0.7905465914706342, + "learning_rate": 3.318346623439486e-05, + "loss": 0.1944, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20036505162715912, + "step": 2290, + "valid_targets_mean": 1601.7, + "valid_targets_min": 933 + }, + { + "epoch": 2.413249211356467, + "grad_norm": 0.9397015694455316, + "learning_rate": 3.314398765526087e-05, + "loss": 0.1805, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17739002406597137, + "step": 2295, + "valid_targets_mean": 1477.9, + "valid_targets_min": 692 + }, + { + "epoch": 2.4185068349106205, + "grad_norm": 0.706169227209052, + "learning_rate": 3.310441871783581e-05, + "loss": 0.1714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16355757415294647, + "step": 2300, + "valid_targets_mean": 1497.5, + "valid_targets_min": 741 + }, + { + "epoch": 2.423764458464774, + "grad_norm": 0.6726507867346725, + "learning_rate": 3.3064759694136165e-05, + "loss": 0.1755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16233614087104797, + "step": 2305, + "valid_targets_mean": 1422.9, + "valid_targets_min": 639 + }, + { + "epoch": 2.4290220820189274, + "grad_norm": 0.9421393157827742, + "learning_rate": 3.302501085679776e-05, + "loss": 0.1666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.169845849275589, + "step": 2310, + "valid_targets_mean": 1248.6, + "valid_targets_min": 743 + }, + { + "epoch": 2.434279705573081, + "grad_norm": 0.6899969418752109, + "learning_rate": 3.29851724790738e-05, + "loss": 0.1806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17680177092552185, + "step": 2315, + "valid_targets_mean": 1485.9, + "valid_targets_min": 610 + }, + { + "epoch": 2.4395373291272344, + "grad_norm": 0.7056916333594729, + "learning_rate": 3.294524483483306e-05, + "loss": 0.1764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16916847229003906, + "step": 2320, + "valid_targets_mean": 1496.9, + "valid_targets_min": 695 + }, + { + "epoch": 2.444794952681388, + "grad_norm": 0.7207319728703836, + "learning_rate": 3.290522819855799e-05, + "loss": 0.1843, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1925514191389084, + "step": 2325, + "valid_targets_mean": 1693.9, + "valid_targets_min": 807 + }, + { + "epoch": 2.4500525762355414, + "grad_norm": 0.7096987739562477, + "learning_rate": 3.2865122845342776e-05, + "loss": 0.1782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16411489248275757, + "step": 2330, + "valid_targets_mean": 1390.0, + "valid_targets_min": 765 + }, + { + "epoch": 2.455310199789695, + "grad_norm": 0.7811065159791979, + "learning_rate": 3.282492905089151e-05, + "loss": 0.1793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19206087291240692, + "step": 2335, + "valid_targets_mean": 1381.8, + "valid_targets_min": 759 + }, + { + "epoch": 2.4605678233438484, + "grad_norm": 0.6634836472231062, + "learning_rate": 3.2784647091516285e-05, + "loss": 0.1827, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16965967416763306, + "step": 2340, + "valid_targets_mean": 1558.3, + "valid_targets_min": 692 + }, + { + "epoch": 2.465825446898002, + "grad_norm": 0.5874972460874052, + "learning_rate": 3.274427724413527e-05, + "loss": 0.1733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21588899195194244, + "step": 2345, + "valid_targets_mean": 2055.1, + "valid_targets_min": 333 + }, + { + "epoch": 2.471083070452156, + "grad_norm": 0.36021750066691155, + "learning_rate": 3.270381978627081e-05, + "loss": 0.1181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1089751273393631, + "step": 2350, + "valid_targets_mean": 3535.2, + "valid_targets_min": 712 + }, + { + "epoch": 2.4763406940063093, + "grad_norm": 0.4433635986475339, + "learning_rate": 3.266327499604755e-05, + "loss": 0.1067, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1179640144109726, + "step": 2355, + "valid_targets_mean": 2602.1, + "valid_targets_min": 700 + }, + { + "epoch": 2.481598317560463, + "grad_norm": 0.4262428373949676, + "learning_rate": 3.262264315219049e-05, + "loss": 0.1084, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15705761313438416, + "step": 2360, + "valid_targets_mean": 3028.2, + "valid_targets_min": 803 + }, + { + "epoch": 2.4868559411146163, + "grad_norm": 0.39713810856872656, + "learning_rate": 3.258192453402306e-05, + "loss": 0.1013, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09887437522411346, + "step": 2365, + "valid_targets_mean": 2374.9, + "valid_targets_min": 641 + }, + { + "epoch": 2.4921135646687698, + "grad_norm": 0.3798965505081102, + "learning_rate": 3.254111942146526e-05, + "loss": 0.1067, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10583527386188507, + "step": 2370, + "valid_targets_mean": 2866.5, + "valid_targets_min": 806 + }, + { + "epoch": 2.4973711882229233, + "grad_norm": 0.6909618223654735, + "learning_rate": 3.2500228095031677e-05, + "loss": 0.156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1656060814857483, + "step": 2375, + "valid_targets_mean": 1443.8, + "valid_targets_min": 539 + }, + { + "epoch": 2.5026288117770767, + "grad_norm": 0.3645376950455108, + "learning_rate": 3.2459250835829553e-05, + "loss": 0.118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11906848102807999, + "step": 2380, + "valid_targets_mean": 3098.6, + "valid_targets_min": 920 + }, + { + "epoch": 2.5078864353312302, + "grad_norm": 0.3985346121901146, + "learning_rate": 3.241818792555692e-05, + "loss": 0.1154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11283739656209946, + "step": 2385, + "valid_targets_mean": 2836.0, + "valid_targets_min": 859 + }, + { + "epoch": 2.5131440588853837, + "grad_norm": 0.2671267899566765, + "learning_rate": 3.2377039646500565e-05, + "loss": 0.1431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06598129123449326, + "step": 2390, + "valid_targets_mean": 4075.8, + "valid_targets_min": 738 + }, + { + "epoch": 2.518401682439537, + "grad_norm": 0.34863111771096733, + "learning_rate": 3.2335806281534195e-05, + "loss": 0.0944, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12008039653301239, + "step": 2395, + "valid_targets_mean": 4244.4, + "valid_targets_min": 2091 + }, + { + "epoch": 2.5236593059936907, + "grad_norm": 0.34054345877293296, + "learning_rate": 3.229448811411639e-05, + "loss": 0.0988, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10303230583667755, + "step": 2400, + "valid_targets_mean": 4220.3, + "valid_targets_min": 600 + }, + { + "epoch": 2.5289169295478446, + "grad_norm": 0.33204504204343716, + "learning_rate": 3.225308542828874e-05, + "loss": 0.1014, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0962679535150528, + "step": 2405, + "valid_targets_mean": 3845.9, + "valid_targets_min": 518 + }, + { + "epoch": 2.534174553101998, + "grad_norm": 0.3896243175443543, + "learning_rate": 3.221159850867385e-05, + "loss": 0.1491, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1067102700471878, + "step": 2410, + "valid_targets_mean": 2542.9, + "valid_targets_min": 549 + }, + { + "epoch": 2.5394321766561516, + "grad_norm": 0.3002808227690645, + "learning_rate": 3.217002764047338e-05, + "loss": 0.0921, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08377303183078766, + "step": 2415, + "valid_targets_mean": 3707.9, + "valid_targets_min": 2724 + }, + { + "epoch": 2.544689800210305, + "grad_norm": 0.5028060391316009, + "learning_rate": 3.212837310946609e-05, + "loss": 0.1129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12688994407653809, + "step": 2420, + "valid_targets_mean": 1648.7, + "valid_targets_min": 518 + }, + { + "epoch": 2.5499474237644586, + "grad_norm": 0.28550508665248653, + "learning_rate": 3.20866352020059e-05, + "loss": 0.0851, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07078361511230469, + "step": 2425, + "valid_targets_mean": 3318.9, + "valid_targets_min": 844 + }, + { + "epoch": 2.555205047318612, + "grad_norm": 0.45015381265804133, + "learning_rate": 3.204481420501989e-05, + "loss": 0.1112, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10451000183820724, + "step": 2430, + "valid_targets_mean": 2370.6, + "valid_targets_min": 516 + }, + { + "epoch": 2.5604626708727656, + "grad_norm": 0.5125497887078124, + "learning_rate": 3.200291040600632e-05, + "loss": 0.1183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1507492959499359, + "step": 2435, + "valid_targets_mean": 2091.5, + "valid_targets_min": 557 + }, + { + "epoch": 2.565720294426919, + "grad_norm": 0.42792156593961106, + "learning_rate": 3.196092409303272e-05, + "loss": 0.1145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11543115228414536, + "step": 2440, + "valid_targets_mean": 2961.9, + "valid_targets_min": 721 + }, + { + "epoch": 2.5709779179810726, + "grad_norm": 0.38404609759881503, + "learning_rate": 3.1918855554733804e-05, + "loss": 0.1165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1083003580570221, + "step": 2445, + "valid_targets_mean": 2553.1, + "valid_targets_min": 615 + }, + { + "epoch": 2.576235541535226, + "grad_norm": 0.4435954547963332, + "learning_rate": 3.187670508030959e-05, + "loss": 0.1602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1930696666240692, + "step": 2450, + "valid_targets_mean": 3499.9, + "valid_targets_min": 2609 + }, + { + "epoch": 2.5814931650893795, + "grad_norm": 0.37553455052194584, + "learning_rate": 3.183447295952334e-05, + "loss": 0.0959, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07744848728179932, + "step": 2455, + "valid_targets_mean": 4125.6, + "valid_targets_min": 3732 + }, + { + "epoch": 2.586750788643533, + "grad_norm": 0.6469228961036678, + "learning_rate": 3.1792159482699606e-05, + "loss": 0.1151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15886491537094116, + "step": 2460, + "valid_targets_mean": 1383.1, + "valid_targets_min": 714 + }, + { + "epoch": 2.5920084121976865, + "grad_norm": 0.4191589635266597, + "learning_rate": 3.174976494072222e-05, + "loss": 0.1332, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10635621100664139, + "step": 2465, + "valid_targets_mean": 2926.9, + "valid_targets_min": 754 + }, + { + "epoch": 2.59726603575184, + "grad_norm": 0.3402858715999574, + "learning_rate": 3.170728962503227e-05, + "loss": 0.1213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10382866114377975, + "step": 2470, + "valid_targets_mean": 3999.0, + "valid_targets_min": 3178 + }, + { + "epoch": 2.6025236593059935, + "grad_norm": 0.6105994900667755, + "learning_rate": 3.1664733827626174e-05, + "loss": 0.1468, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26841890811920166, + "step": 2475, + "valid_targets_mean": 1954.8, + "valid_targets_min": 568 + }, + { + "epoch": 2.607781282860147, + "grad_norm": 0.4198025547006107, + "learning_rate": 3.1622097841053574e-05, + "loss": 0.2823, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1229616105556488, + "step": 2480, + "valid_targets_mean": 3351.6, + "valid_targets_min": 1248 + }, + { + "epoch": 2.6130389064143005, + "grad_norm": 0.4213232846564987, + "learning_rate": 3.15793819584154e-05, + "loss": 0.1153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11470212042331696, + "step": 2485, + "valid_targets_mean": 2846.6, + "valid_targets_min": 833 + }, + { + "epoch": 2.6182965299684544, + "grad_norm": 0.4077725239227606, + "learning_rate": 3.1536586473361815e-05, + "loss": 0.1154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11247079819440842, + "step": 2490, + "valid_targets_mean": 2976.6, + "valid_targets_min": 798 + }, + { + "epoch": 2.623554153522608, + "grad_norm": 0.42956799077435265, + "learning_rate": 3.149371168009022e-05, + "loss": 0.2104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12841041386127472, + "step": 2495, + "valid_targets_mean": 3806.4, + "valid_targets_min": 2258 + }, + { + "epoch": 2.6288117770767614, + "grad_norm": 0.2734498060324378, + "learning_rate": 3.145075787334319e-05, + "loss": 0.0852, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08369526267051697, + "step": 2500, + "valid_targets_mean": 4550.2, + "valid_targets_min": 1466 + }, + { + "epoch": 2.634069400630915, + "grad_norm": 0.33376159399304134, + "learning_rate": 3.140772534840652e-05, + "loss": 0.1143, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08720729500055313, + "step": 2505, + "valid_targets_mean": 3354.7, + "valid_targets_min": 560 + }, + { + "epoch": 2.6393270241850684, + "grad_norm": 0.37737954881427266, + "learning_rate": 3.1364614401107126e-05, + "loss": 0.1145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11012201756238937, + "step": 2510, + "valid_targets_mean": 3625.6, + "valid_targets_min": 846 + }, + { + "epoch": 2.644584647739222, + "grad_norm": 0.37165700619929226, + "learning_rate": 3.1321425327811044e-05, + "loss": 0.0944, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09358272701501846, + "step": 2515, + "valid_targets_mean": 3525.6, + "valid_targets_min": 1352 + }, + { + "epoch": 2.6498422712933754, + "grad_norm": 0.3563107369337592, + "learning_rate": 3.127815842542138e-05, + "loss": 0.0951, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08345861732959747, + "step": 2520, + "valid_targets_mean": 3017.1, + "valid_targets_min": 536 + }, + { + "epoch": 2.655099894847529, + "grad_norm": 0.37286748074995496, + "learning_rate": 3.1234813991376296e-05, + "loss": 0.0996, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08975954353809357, + "step": 2525, + "valid_targets_mean": 3565.4, + "valid_targets_min": 2264 + }, + { + "epoch": 2.6603575184016823, + "grad_norm": 0.3581750525136319, + "learning_rate": 3.119139232364693e-05, + "loss": 0.0923, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08885161578655243, + "step": 2530, + "valid_targets_mean": 3534.2, + "valid_targets_min": 820 + }, + { + "epoch": 2.665615141955836, + "grad_norm": 0.6377410460278035, + "learning_rate": 3.1147893720735356e-05, + "loss": 0.1472, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3027060031890869, + "step": 2535, + "valid_targets_mean": 2330.0, + "valid_targets_min": 732 + }, + { + "epoch": 2.6708727655099898, + "grad_norm": 0.3094795945561733, + "learning_rate": 3.110431848167255e-05, + "loss": 0.0895, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08099585771560669, + "step": 2540, + "valid_targets_mean": 3734.1, + "valid_targets_min": 2719 + }, + { + "epoch": 2.6761303890641432, + "grad_norm": 0.5046490055828962, + "learning_rate": 3.106066690601633e-05, + "loss": 0.0907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11604218930006027, + "step": 2545, + "valid_targets_mean": 1698.0, + "valid_targets_min": 667 + }, + { + "epoch": 2.6813880126182967, + "grad_norm": 0.2908057278242517, + "learning_rate": 3.101693929384927e-05, + "loss": 0.0861, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07138262689113617, + "step": 2550, + "valid_targets_mean": 3763.1, + "valid_targets_min": 2353 + }, + { + "epoch": 2.6866456361724502, + "grad_norm": 0.3616701521439811, + "learning_rate": 3.097313594577667e-05, + "loss": 0.0914, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09038901329040527, + "step": 2555, + "valid_targets_mean": 2663.5, + "valid_targets_min": 549 + }, + { + "epoch": 2.6919032597266037, + "grad_norm": 0.3352070782547861, + "learning_rate": 3.092925716292447e-05, + "loss": 0.1479, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09159399569034576, + "step": 2560, + "valid_targets_mean": 3527.6, + "valid_targets_min": 908 + }, + { + "epoch": 2.697160883280757, + "grad_norm": 0.42113658320185515, + "learning_rate": 3.088530324693719e-05, + "loss": 0.1203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15114620327949524, + "step": 2565, + "valid_targets_mean": 3102.0, + "valid_targets_min": 647 + }, + { + "epoch": 2.7024185068349107, + "grad_norm": 0.40640939297887746, + "learning_rate": 3.0841274499975855e-05, + "loss": 0.1055, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08857488632202148, + "step": 2570, + "valid_targets_mean": 2138.5, + "valid_targets_min": 598 + }, + { + "epoch": 2.707676130389064, + "grad_norm": 0.7582232852961461, + "learning_rate": 3.079717122471591e-05, + "loss": 0.1288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18078215420246124, + "step": 2575, + "valid_targets_mean": 1224.7, + "valid_targets_min": 578 + }, + { + "epoch": 2.7129337539432177, + "grad_norm": 0.501960236405102, + "learning_rate": 3.075299372434515e-05, + "loss": 0.147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13699260354042053, + "step": 2580, + "valid_targets_mean": 1942.3, + "valid_targets_min": 695 + }, + { + "epoch": 2.718191377497371, + "grad_norm": 0.47803679623958584, + "learning_rate": 3.0708742302561606e-05, + "loss": 0.1035, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12304352223873138, + "step": 2585, + "valid_targets_mean": 2461.9, + "valid_targets_min": 691 + }, + { + "epoch": 2.7234490010515247, + "grad_norm": 0.33458570722565917, + "learning_rate": 3.066441726357153e-05, + "loss": 0.1293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08373156189918518, + "step": 2590, + "valid_targets_mean": 3349.7, + "valid_targets_min": 728 + }, + { + "epoch": 2.728706624605678, + "grad_norm": 0.5810954421117924, + "learning_rate": 3.062001891208721e-05, + "loss": 0.1137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14271876215934753, + "step": 2595, + "valid_targets_mean": 1778.2, + "valid_targets_min": 523 + }, + { + "epoch": 2.7339642481598316, + "grad_norm": 0.33866352401190314, + "learning_rate": 3.0575547553324944e-05, + "loss": 0.0947, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09259197115898132, + "step": 2600, + "valid_targets_mean": 3331.4, + "valid_targets_min": 468 + }, + { + "epoch": 2.739221871713985, + "grad_norm": 0.2455960232652355, + "learning_rate": 3.053100349300291e-05, + "loss": 0.1022, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0542142353951931, + "step": 2605, + "valid_targets_mean": 4553.9, + "valid_targets_min": 580 + }, + { + "epoch": 2.7444794952681386, + "grad_norm": 0.36336803839048115, + "learning_rate": 3.0486387037339074e-05, + "loss": 0.0927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08497624844312668, + "step": 2610, + "valid_targets_mean": 2921.8, + "valid_targets_min": 459 + }, + { + "epoch": 2.749737118822292, + "grad_norm": 0.35976051819140875, + "learning_rate": 3.0441698493049078e-05, + "loss": 0.093, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11405770480632782, + "step": 2615, + "valid_targets_mean": 4009.8, + "valid_targets_min": 981 + }, + { + "epoch": 2.7549947423764456, + "grad_norm": 0.44373125167841454, + "learning_rate": 3.0396938167344153e-05, + "loss": 0.1042, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13850055634975433, + "step": 2620, + "valid_targets_mean": 2390.6, + "valid_targets_min": 697 + }, + { + "epoch": 2.7602523659305995, + "grad_norm": 0.6014635705304406, + "learning_rate": 3.0352106367928974e-05, + "loss": 0.1114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10286036133766174, + "step": 2625, + "valid_targets_mean": 3466.8, + "valid_targets_min": 1165 + }, + { + "epoch": 2.765509989484753, + "grad_norm": 0.38367063359718373, + "learning_rate": 3.030720340299957e-05, + "loss": 0.1102, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1048555076122284, + "step": 2630, + "valid_targets_mean": 3781.1, + "valid_targets_min": 2901 + }, + { + "epoch": 2.7707676130389065, + "grad_norm": 0.33202146237403757, + "learning_rate": 3.0262229581241197e-05, + "loss": 0.0954, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07668071240186691, + "step": 2635, + "valid_targets_mean": 3460.7, + "valid_targets_min": 868 + }, + { + "epoch": 2.77602523659306, + "grad_norm": 0.36664381710018973, + "learning_rate": 3.0217185211826218e-05, + "loss": 0.0983, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08506757020950317, + "step": 2640, + "valid_targets_mean": 2821.6, + "valid_targets_min": 502 + }, + { + "epoch": 2.7812828601472135, + "grad_norm": 0.3365892420413774, + "learning_rate": 3.0172070604411957e-05, + "loss": 0.1108, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07851487398147583, + "step": 2645, + "valid_targets_mean": 3193.8, + "valid_targets_min": 724 + }, + { + "epoch": 2.786540483701367, + "grad_norm": 0.3107521954990926, + "learning_rate": 3.0126886069138623e-05, + "loss": 0.0822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07940559089183807, + "step": 2650, + "valid_targets_mean": 3795.8, + "valid_targets_min": 2769 + }, + { + "epoch": 2.7917981072555205, + "grad_norm": 0.3865245672571263, + "learning_rate": 3.0081631916627114e-05, + "loss": 0.1071, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09602253884077072, + "step": 2655, + "valid_targets_mean": 2893.9, + "valid_targets_min": 591 + }, + { + "epoch": 2.797055730809674, + "grad_norm": 0.33169201070750626, + "learning_rate": 3.003630845797693e-05, + "loss": 0.1015, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07864493131637573, + "step": 2660, + "valid_targets_mean": 3111.2, + "valid_targets_min": 387 + }, + { + "epoch": 2.8023133543638274, + "grad_norm": 0.3946357391314771, + "learning_rate": 2.9990916004763996e-05, + "loss": 0.1201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10464547574520111, + "step": 2665, + "valid_targets_mean": 3326.2, + "valid_targets_min": 773 + }, + { + "epoch": 2.807570977917981, + "grad_norm": 0.43507979308801414, + "learning_rate": 2.9945454869038562e-05, + "loss": 0.1032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10327298939228058, + "step": 2670, + "valid_targets_mean": 2347.0, + "valid_targets_min": 457 + }, + { + "epoch": 2.812828601472135, + "grad_norm": 0.5930253754750565, + "learning_rate": 2.9899925363323022e-05, + "loss": 0.1106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11379785090684891, + "step": 2675, + "valid_targets_mean": 1337.1, + "valid_targets_min": 482 + }, + { + "epoch": 2.8180862250262884, + "grad_norm": 0.5468279937396003, + "learning_rate": 2.9854327800609775e-05, + "loss": 0.1079, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.158033549785614, + "step": 2680, + "valid_targets_mean": 1989.9, + "valid_targets_min": 708 + }, + { + "epoch": 2.823343848580442, + "grad_norm": 0.43620465228141875, + "learning_rate": 2.98086624943591e-05, + "loss": 0.1118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09712845087051392, + "step": 2685, + "valid_targets_mean": 2470.5, + "valid_targets_min": 617 + }, + { + "epoch": 2.8286014721345953, + "grad_norm": 0.450086400246681, + "learning_rate": 2.976292975849696e-05, + "loss": 0.1015, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10751894861459732, + "step": 2690, + "valid_targets_mean": 2661.2, + "valid_targets_min": 720 + }, + { + "epoch": 2.833859095688749, + "grad_norm": 0.7370201864241585, + "learning_rate": 2.9717129907412857e-05, + "loss": 0.0965, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12240259349346161, + "step": 2695, + "valid_targets_mean": 1585.1, + "valid_targets_min": 526 + }, + { + "epoch": 2.8391167192429023, + "grad_norm": 0.6016270598846267, + "learning_rate": 2.9671263255957697e-05, + "loss": 0.1192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1466367393732071, + "step": 2700, + "valid_targets_mean": 1623.9, + "valid_targets_min": 637 + }, + { + "epoch": 2.844374342797056, + "grad_norm": 0.27378648653324206, + "learning_rate": 2.9625330119441584e-05, + "loss": 0.0914, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06150565296411514, + "step": 2705, + "valid_targets_mean": 3762.9, + "valid_targets_min": 1440 + }, + { + "epoch": 2.8496319663512093, + "grad_norm": 0.38144492600379587, + "learning_rate": 2.957933081363169e-05, + "loss": 0.0922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09528522193431854, + "step": 2710, + "valid_targets_mean": 4096.2, + "valid_targets_min": 1773 + }, + { + "epoch": 2.854889589905363, + "grad_norm": 0.3884763720171555, + "learning_rate": 2.953326565475006e-05, + "loss": 0.1032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11531278491020203, + "step": 2715, + "valid_targets_mean": 2958.6, + "valid_targets_min": 721 + }, + { + "epoch": 2.8601472134595163, + "grad_norm": 0.43329164171687823, + "learning_rate": 2.9487134959471445e-05, + "loss": 0.0851, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08293789625167847, + "step": 2720, + "valid_targets_mean": 3515.3, + "valid_targets_min": 667 + }, + { + "epoch": 2.8654048370136698, + "grad_norm": 0.3359357271875174, + "learning_rate": 2.944093904492113e-05, + "loss": 0.0845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09996461868286133, + "step": 2725, + "valid_targets_mean": 4868.8, + "valid_targets_min": 4077 + }, + { + "epoch": 2.8706624605678233, + "grad_norm": 0.3620467852222587, + "learning_rate": 2.9394678228672737e-05, + "loss": 0.1054, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09600923955440521, + "step": 2730, + "valid_targets_mean": 3206.5, + "valid_targets_min": 739 + }, + { + "epoch": 2.8759200841219767, + "grad_norm": 0.4294598207270518, + "learning_rate": 2.9348352828746076e-05, + "loss": 0.1182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12123970687389374, + "step": 2735, + "valid_targets_mean": 3558.0, + "valid_targets_min": 1090 + }, + { + "epoch": 2.8811777076761302, + "grad_norm": 0.3751032778537608, + "learning_rate": 2.9301963163604916e-05, + "loss": 0.0971, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10285535454750061, + "step": 2740, + "valid_targets_mean": 3212.2, + "valid_targets_min": 704 + }, + { + "epoch": 2.8864353312302837, + "grad_norm": 0.39934160243166605, + "learning_rate": 2.925550955215483e-05, + "loss": 0.104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0928100049495697, + "step": 2745, + "valid_targets_mean": 2616.9, + "valid_targets_min": 774 + }, + { + "epoch": 2.891692954784437, + "grad_norm": 0.34278090460592187, + "learning_rate": 2.9208992313740993e-05, + "loss": 0.1209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.079774871468544, + "step": 2750, + "valid_targets_mean": 2966.9, + "valid_targets_min": 485 + }, + { + "epoch": 2.8969505783385907, + "grad_norm": 0.39632647949724764, + "learning_rate": 2.916241176814596e-05, + "loss": 0.0953, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.105677530169487, + "step": 2755, + "valid_targets_mean": 3414.2, + "valid_targets_min": 1048 + }, + { + "epoch": 2.9022082018927446, + "grad_norm": 0.363079019721272, + "learning_rate": 2.9115768235587526e-05, + "loss": 0.1037, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08583900332450867, + "step": 2760, + "valid_targets_mean": 2722.4, + "valid_targets_min": 722 + }, + { + "epoch": 2.907465825446898, + "grad_norm": 0.4827175083183273, + "learning_rate": 2.9069062036716454e-05, + "loss": 0.1053, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15601631999015808, + "step": 2765, + "valid_targets_mean": 2128.9, + "valid_targets_min": 647 + }, + { + "epoch": 2.9127234490010516, + "grad_norm": 0.5147558124862559, + "learning_rate": 2.9022293492614334e-05, + "loss": 0.1407, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14720407128334045, + "step": 2770, + "valid_targets_mean": 1685.1, + "valid_targets_min": 570 + }, + { + "epoch": 2.917981072555205, + "grad_norm": 0.4024304901026711, + "learning_rate": 2.8975462924791334e-05, + "loss": 0.1217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09662102162837982, + "step": 2775, + "valid_targets_mean": 3289.2, + "valid_targets_min": 722 + }, + { + "epoch": 2.9232386961093586, + "grad_norm": 0.4295888137611356, + "learning_rate": 2.892857065518401e-05, + "loss": 0.0979, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09456585347652435, + "step": 2780, + "valid_targets_mean": 2418.6, + "valid_targets_min": 635 + }, + { + "epoch": 2.928496319663512, + "grad_norm": 0.4074202459874554, + "learning_rate": 2.8881617006153072e-05, + "loss": 0.0985, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09729564189910889, + "step": 2785, + "valid_targets_mean": 3266.9, + "valid_targets_min": 1591 + }, + { + "epoch": 2.9337539432176656, + "grad_norm": 0.35867945229490455, + "learning_rate": 2.8834602300481207e-05, + "loss": 0.096, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08896251022815704, + "step": 2790, + "valid_targets_mean": 2896.2, + "valid_targets_min": 798 + }, + { + "epoch": 2.939011566771819, + "grad_norm": 0.35111362106844274, + "learning_rate": 2.878752686137082e-05, + "loss": 0.0872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07829457521438599, + "step": 2795, + "valid_targets_mean": 2914.8, + "valid_targets_min": 785 + }, + { + "epoch": 2.9442691903259726, + "grad_norm": 0.3919187245924711, + "learning_rate": 2.874039101244183e-05, + "loss": 0.1209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08010765165090561, + "step": 2800, + "valid_targets_mean": 2452.7, + "valid_targets_min": 766 + }, + { + "epoch": 2.949526813880126, + "grad_norm": 0.3383637041508128, + "learning_rate": 2.869319507772944e-05, + "loss": 0.0869, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08066456019878387, + "step": 2805, + "valid_targets_mean": 3346.1, + "valid_targets_min": 797 + }, + { + "epoch": 2.9547844374342795, + "grad_norm": 0.48600589489422974, + "learning_rate": 2.864593938168192e-05, + "loss": 0.0979, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11215262115001678, + "step": 2810, + "valid_targets_mean": 2584.3, + "valid_targets_min": 786 + }, + { + "epoch": 2.9600420609884335, + "grad_norm": 0.44437151597046454, + "learning_rate": 2.8598624249158367e-05, + "loss": 0.0978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11946041882038116, + "step": 2815, + "valid_targets_mean": 3273.0, + "valid_targets_min": 1825 + }, + { + "epoch": 2.965299684542587, + "grad_norm": 0.35284810030283154, + "learning_rate": 2.855125000542647e-05, + "loss": 0.0992, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10998371988534927, + "step": 2820, + "valid_targets_mean": 4220.7, + "valid_targets_min": 2274 + }, + { + "epoch": 2.9705573080967405, + "grad_norm": 0.35176137941790625, + "learning_rate": 2.8503816976160278e-05, + "loss": 0.0855, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10145671665668488, + "step": 2825, + "valid_targets_mean": 3440.8, + "valid_targets_min": 678 + }, + { + "epoch": 2.975814931650894, + "grad_norm": 0.2819778869137246, + "learning_rate": 2.8456325487437966e-05, + "loss": 0.0936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08021849393844604, + "step": 2830, + "valid_targets_mean": 4000.1, + "valid_targets_min": 1017 + }, + { + "epoch": 2.9810725552050474, + "grad_norm": 0.5532322086602542, + "learning_rate": 2.8408775865739578e-05, + "loss": 0.1435, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21923969686031342, + "step": 2835, + "valid_targets_mean": 2186.7, + "valid_targets_min": 709 + }, + { + "epoch": 2.986330178759201, + "grad_norm": 0.32650521446979597, + "learning_rate": 2.8361168437944817e-05, + "loss": 0.1388, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07725784182548523, + "step": 2840, + "valid_targets_mean": 3648.8, + "valid_targets_min": 1097 + }, + { + "epoch": 2.9915878023133544, + "grad_norm": 0.34666087266548073, + "learning_rate": 2.8313503531330738e-05, + "loss": 0.0995, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08928908407688141, + "step": 2845, + "valid_targets_mean": 3303.7, + "valid_targets_min": 840 + }, + { + "epoch": 2.996845425867508, + "grad_norm": 0.3357698377603881, + "learning_rate": 2.826578147356956e-05, + "loss": 0.0915, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08458413183689117, + "step": 2850, + "valid_targets_mean": 3666.4, + "valid_targets_min": 2555 + }, + { + "epoch": 3.0021030494216614, + "grad_norm": 1.0153830951441243, + "learning_rate": 2.8218002592726384e-05, + "loss": 0.1717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2009751796722412, + "step": 2855, + "valid_targets_mean": 1553.1, + "valid_targets_min": 777 + }, + { + "epoch": 3.007360672975815, + "grad_norm": 0.9578716659295562, + "learning_rate": 2.8170167217256934e-05, + "loss": 0.1962, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.188889741897583, + "step": 2860, + "valid_targets_mean": 1439.1, + "valid_targets_min": 705 + }, + { + "epoch": 3.0126182965299684, + "grad_norm": 0.7480346808263534, + "learning_rate": 2.8122275676005304e-05, + "loss": 0.1772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.178565114736557, + "step": 2865, + "valid_targets_mean": 1826.0, + "valid_targets_min": 906 + }, + { + "epoch": 3.017875920084122, + "grad_norm": 0.7757563210471313, + "learning_rate": 2.807432829820171e-05, + "loss": 0.1752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1563483327627182, + "step": 2870, + "valid_targets_mean": 1316.2, + "valid_targets_min": 680 + }, + { + "epoch": 3.0231335436382754, + "grad_norm": 0.760475622783321, + "learning_rate": 2.8026325413460215e-05, + "loss": 0.1745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1838594377040863, + "step": 2875, + "valid_targets_mean": 1914.6, + "valid_targets_min": 819 + }, + { + "epoch": 3.028391167192429, + "grad_norm": 1.186599430117999, + "learning_rate": 2.7978267351776448e-05, + "loss": 0.1726, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17113059759140015, + "step": 2880, + "valid_targets_mean": 1383.9, + "valid_targets_min": 734 + }, + { + "epoch": 3.0336487907465823, + "grad_norm": 0.8368328709994571, + "learning_rate": 2.7930154443525377e-05, + "loss": 0.1807, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17734494805335999, + "step": 2885, + "valid_targets_mean": 1688.6, + "valid_targets_min": 930 + }, + { + "epoch": 3.0389064143007363, + "grad_norm": 0.7638523242688803, + "learning_rate": 2.7881987019458992e-05, + "loss": 0.1618, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1756438910961151, + "step": 2890, + "valid_targets_mean": 1434.3, + "valid_targets_min": 737 + }, + { + "epoch": 3.0441640378548898, + "grad_norm": 0.7603017766385608, + "learning_rate": 2.7833765410704062e-05, + "loss": 0.1839, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15442828834056854, + "step": 2895, + "valid_targets_mean": 1524.4, + "valid_targets_min": 759 + }, + { + "epoch": 3.0494216614090432, + "grad_norm": 0.737220054601989, + "learning_rate": 2.778548994875984e-05, + "loss": 0.1688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18120095133781433, + "step": 2900, + "valid_targets_mean": 1687.2, + "valid_targets_min": 753 + }, + { + "epoch": 3.0546792849631967, + "grad_norm": 0.7219646311378729, + "learning_rate": 2.7737160965495794e-05, + "loss": 0.1715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17094483971595764, + "step": 2905, + "valid_targets_mean": 1439.9, + "valid_targets_min": 572 + }, + { + "epoch": 3.0599369085173502, + "grad_norm": 0.947320515199349, + "learning_rate": 2.768877879314935e-05, + "loss": 0.1747, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16819295287132263, + "step": 2910, + "valid_targets_mean": 1466.9, + "valid_targets_min": 679 + }, + { + "epoch": 3.0651945320715037, + "grad_norm": 0.8503023131710876, + "learning_rate": 2.7640343764323535e-05, + "loss": 0.1753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18158873915672302, + "step": 2915, + "valid_targets_mean": 1352.6, + "valid_targets_min": 771 + }, + { + "epoch": 3.070452155625657, + "grad_norm": 0.8407989228763362, + "learning_rate": 2.7591856211984783e-05, + "loss": 0.1568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15310627222061157, + "step": 2920, + "valid_targets_mean": 1488.1, + "valid_targets_min": 842 + }, + { + "epoch": 3.0757097791798107, + "grad_norm": 0.7397249986657985, + "learning_rate": 2.7543316469460565e-05, + "loss": 0.1607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15087135136127472, + "step": 2925, + "valid_targets_mean": 1451.6, + "valid_targets_min": 822 + }, + { + "epoch": 3.080967402733964, + "grad_norm": 0.9921587508535049, + "learning_rate": 2.7494724870437147e-05, + "loss": 0.1675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2060733586549759, + "step": 2930, + "valid_targets_mean": 1735.0, + "valid_targets_min": 675 + }, + { + "epoch": 3.0862250262881177, + "grad_norm": 1.1741681951047909, + "learning_rate": 2.7446081748957306e-05, + "loss": 0.1702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1643257737159729, + "step": 2935, + "valid_targets_mean": 1400.9, + "valid_targets_min": 892 + }, + { + "epoch": 3.091482649842271, + "grad_norm": 0.7875876053749578, + "learning_rate": 2.7397387439417963e-05, + "loss": 0.1759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15686167776584625, + "step": 2940, + "valid_targets_mean": 1561.1, + "valid_targets_min": 656 + }, + { + "epoch": 3.0967402733964247, + "grad_norm": 0.7115401492107658, + "learning_rate": 2.7348642276567973e-05, + "loss": 0.1696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1451965719461441, + "step": 2945, + "valid_targets_mean": 1369.1, + "valid_targets_min": 681 + }, + { + "epoch": 3.101997896950578, + "grad_norm": 0.9011516758528701, + "learning_rate": 2.729984659550576e-05, + "loss": 0.1727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16926482319831848, + "step": 2950, + "valid_targets_mean": 1467.0, + "valid_targets_min": 548 + }, + { + "epoch": 3.107255520504732, + "grad_norm": 0.7346400824252598, + "learning_rate": 2.7251000731677035e-05, + "loss": 0.1645, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15575602650642395, + "step": 2955, + "valid_targets_mean": 1453.6, + "valid_targets_min": 658 + }, + { + "epoch": 3.1125131440588856, + "grad_norm": 0.7689981957849497, + "learning_rate": 2.72021050208725e-05, + "loss": 0.1731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1617649793624878, + "step": 2960, + "valid_targets_mean": 1422.1, + "valid_targets_min": 650 + }, + { + "epoch": 3.117770767613039, + "grad_norm": 0.7275665796012852, + "learning_rate": 2.715315979922552e-05, + "loss": 0.1712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1705387532711029, + "step": 2965, + "valid_targets_mean": 1521.4, + "valid_targets_min": 734 + }, + { + "epoch": 3.1230283911671926, + "grad_norm": 0.7604520991119563, + "learning_rate": 2.7104165403209843e-05, + "loss": 0.1568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16756518185138702, + "step": 2970, + "valid_targets_mean": 1364.1, + "valid_targets_min": 704 + }, + { + "epoch": 3.128286014721346, + "grad_norm": 0.7525764441573639, + "learning_rate": 2.7055122169637224e-05, + "loss": 0.1641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16290804743766785, + "step": 2975, + "valid_targets_mean": 1556.6, + "valid_targets_min": 686 + }, + { + "epoch": 3.1335436382754995, + "grad_norm": 0.7153059892862654, + "learning_rate": 2.7006030435655205e-05, + "loss": 0.1616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17549902200698853, + "step": 2980, + "valid_targets_mean": 1657.5, + "valid_targets_min": 697 + }, + { + "epoch": 3.138801261829653, + "grad_norm": 0.6877280286226078, + "learning_rate": 2.6956890538744703e-05, + "loss": 0.1622, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1564432978630066, + "step": 2985, + "valid_targets_mean": 1545.4, + "valid_targets_min": 779 + }, + { + "epoch": 3.1440588853838065, + "grad_norm": 0.7131649105297678, + "learning_rate": 2.6907702816717742e-05, + "loss": 0.1738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17285484075546265, + "step": 2990, + "valid_targets_mean": 1601.5, + "valid_targets_min": 729 + }, + { + "epoch": 3.14931650893796, + "grad_norm": 0.7209399409701057, + "learning_rate": 2.685846760771513e-05, + "loss": 0.1583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.154525026679039, + "step": 2995, + "valid_targets_mean": 1467.6, + "valid_targets_min": 661 + }, + { + "epoch": 3.1545741324921135, + "grad_norm": 0.7377486784686006, + "learning_rate": 2.6809185250204113e-05, + "loss": 0.1642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16196948289871216, + "step": 3000, + "valid_targets_mean": 1607.2, + "valid_targets_min": 847 + }, + { + "epoch": 3.159831756046267, + "grad_norm": 0.7947954324853301, + "learning_rate": 2.6759856082976066e-05, + "loss": 0.1619, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16255517303943634, + "step": 3005, + "valid_targets_mean": 1486.9, + "valid_targets_min": 758 + }, + { + "epoch": 3.1650893796004205, + "grad_norm": 0.7241271651539062, + "learning_rate": 2.6710480445144145e-05, + "loss": 0.1655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13190844655036926, + "step": 3010, + "valid_targets_mean": 1312.7, + "valid_targets_min": 800 + }, + { + "epoch": 3.170347003154574, + "grad_norm": 0.6916520204368889, + "learning_rate": 2.666105867614099e-05, + "loss": 0.1631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14786486327648163, + "step": 3015, + "valid_targets_mean": 1413.6, + "valid_targets_min": 803 + }, + { + "epoch": 3.1756046267087275, + "grad_norm": 0.7225488622283363, + "learning_rate": 2.6611591115716345e-05, + "loss": 0.1594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.151035338640213, + "step": 3020, + "valid_targets_mean": 1449.5, + "valid_targets_min": 749 + }, + { + "epoch": 3.1808622502628814, + "grad_norm": 0.7266035531391442, + "learning_rate": 2.6562078103934755e-05, + "loss": 0.1499, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15991780161857605, + "step": 3025, + "valid_targets_mean": 1542.5, + "valid_targets_min": 837 + }, + { + "epoch": 3.186119873817035, + "grad_norm": 0.6771043280351067, + "learning_rate": 2.6512519981173238e-05, + "loss": 0.162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14401499927043915, + "step": 3030, + "valid_targets_mean": 1336.9, + "valid_targets_min": 649 + }, + { + "epoch": 3.1913774973711884, + "grad_norm": 0.692571795606894, + "learning_rate": 2.64629170881189e-05, + "loss": 0.1631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17246052622795105, + "step": 3035, + "valid_targets_mean": 1638.3, + "valid_targets_min": 765 + }, + { + "epoch": 3.196635120925342, + "grad_norm": 0.7418225870797496, + "learning_rate": 2.641326976576664e-05, + "loss": 0.1584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1674758940935135, + "step": 3040, + "valid_targets_mean": 1561.9, + "valid_targets_min": 875 + }, + { + "epoch": 3.2018927444794953, + "grad_norm": 0.7497987069090254, + "learning_rate": 2.6363578355416772e-05, + "loss": 0.158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14803823828697205, + "step": 3045, + "valid_targets_mean": 1301.5, + "valid_targets_min": 655 + }, + { + "epoch": 3.207150368033649, + "grad_norm": 0.7653406210240195, + "learning_rate": 2.6313843198672712e-05, + "loss": 0.167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17800885438919067, + "step": 3050, + "valid_targets_mean": 1505.0, + "valid_targets_min": 669 + }, + { + "epoch": 3.2124079915878023, + "grad_norm": 0.7238788011079397, + "learning_rate": 2.6264064637438585e-05, + "loss": 0.1591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15153130888938904, + "step": 3055, + "valid_targets_mean": 1330.0, + "valid_targets_min": 733 + }, + { + "epoch": 3.217665615141956, + "grad_norm": 0.7122139126458839, + "learning_rate": 2.6214243013916915e-05, + "loss": 0.1689, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1740637719631195, + "step": 3060, + "valid_targets_mean": 1778.6, + "valid_targets_min": 703 + }, + { + "epoch": 3.2229232386961093, + "grad_norm": 0.7193052428946505, + "learning_rate": 2.616437867060627e-05, + "loss": 0.1678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1674605756998062, + "step": 3065, + "valid_targets_mean": 1651.5, + "valid_targets_min": 846 + }, + { + "epoch": 3.228180862250263, + "grad_norm": 0.7181685142725793, + "learning_rate": 2.6114471950298853e-05, + "loss": 0.1714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16833847761154175, + "step": 3070, + "valid_targets_mean": 1563.4, + "valid_targets_min": 764 + }, + { + "epoch": 3.2334384858044163, + "grad_norm": 0.7143088741566662, + "learning_rate": 2.6064523196078248e-05, + "loss": 0.1563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14991122484207153, + "step": 3075, + "valid_targets_mean": 1565.3, + "valid_targets_min": 822 + }, + { + "epoch": 3.2386961093585698, + "grad_norm": 0.7341874380160334, + "learning_rate": 2.6014532751316937e-05, + "loss": 0.1572, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16907265782356262, + "step": 3080, + "valid_targets_mean": 1692.3, + "valid_targets_min": 1261 + }, + { + "epoch": 3.2439537329127233, + "grad_norm": 0.6934991412753697, + "learning_rate": 2.5964500959674057e-05, + "loss": 0.1675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16206017136573792, + "step": 3085, + "valid_targets_mean": 1596.2, + "valid_targets_min": 787 + }, + { + "epoch": 3.249211356466877, + "grad_norm": 0.6877269014343796, + "learning_rate": 2.5914428165092956e-05, + "loss": 0.1557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14042839407920837, + "step": 3090, + "valid_targets_mean": 1312.9, + "valid_targets_min": 549 + }, + { + "epoch": 3.2544689800210307, + "grad_norm": 0.7094728806143602, + "learning_rate": 2.5864314711798856e-05, + "loss": 0.1558, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1553335189819336, + "step": 3095, + "valid_targets_mean": 1427.8, + "valid_targets_min": 725 + }, + { + "epoch": 3.259726603575184, + "grad_norm": 0.7617044512032434, + "learning_rate": 2.5814160944296495e-05, + "loss": 0.1606, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18396365642547607, + "step": 3100, + "valid_targets_mean": 1703.9, + "valid_targets_min": 734 + }, + { + "epoch": 3.2649842271293377, + "grad_norm": 0.7317380322454258, + "learning_rate": 2.5763967207367752e-05, + "loss": 0.1623, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1718815267086029, + "step": 3105, + "valid_targets_mean": 1470.5, + "valid_targets_min": 744 + }, + { + "epoch": 3.270241850683491, + "grad_norm": 0.7513859068792503, + "learning_rate": 2.5713733846069272e-05, + "loss": 0.1639, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1729550063610077, + "step": 3110, + "valid_targets_mean": 1629.8, + "valid_targets_min": 545 + }, + { + "epoch": 3.2754994742376446, + "grad_norm": 0.6848120186596997, + "learning_rate": 2.56634612057301e-05, + "loss": 0.1549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15244747698307037, + "step": 3115, + "valid_targets_mean": 1504.1, + "valid_targets_min": 622 + }, + { + "epoch": 3.280757097791798, + "grad_norm": 0.673535190257926, + "learning_rate": 2.561314963194929e-05, + "loss": 0.1641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18910765647888184, + "step": 3120, + "valid_targets_mean": 1898.6, + "valid_targets_min": 1078 + }, + { + "epoch": 3.2860147213459516, + "grad_norm": 0.7007516882187462, + "learning_rate": 2.556279947059358e-05, + "loss": 0.1565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1472591906785965, + "step": 3125, + "valid_targets_mean": 1387.5, + "valid_targets_min": 694 + }, + { + "epoch": 3.291272344900105, + "grad_norm": 0.7129920724705783, + "learning_rate": 2.551241106779494e-05, + "loss": 0.161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1591213494539261, + "step": 3130, + "valid_targets_mean": 1548.0, + "valid_targets_min": 673 + }, + { + "epoch": 3.2965299684542586, + "grad_norm": 0.6984102460849012, + "learning_rate": 2.5461984769948244e-05, + "loss": 0.157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1496220827102661, + "step": 3135, + "valid_targets_mean": 1337.4, + "valid_targets_min": 674 + }, + { + "epoch": 3.301787592008412, + "grad_norm": 0.7558744584631479, + "learning_rate": 2.5411520923708874e-05, + "loss": 0.1534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.144131600856781, + "step": 3140, + "valid_targets_mean": 1283.6, + "valid_targets_min": 661 + }, + { + "epoch": 3.3070452155625656, + "grad_norm": 0.727010367044851, + "learning_rate": 2.536101987599036e-05, + "loss": 0.1502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15996873378753662, + "step": 3145, + "valid_targets_mean": 1546.1, + "valid_targets_min": 727 + }, + { + "epoch": 3.312302839116719, + "grad_norm": 0.7935522676962602, + "learning_rate": 2.5310481973961935e-05, + "loss": 0.1635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1686829924583435, + "step": 3150, + "valid_targets_mean": 1664.6, + "valid_targets_min": 1181 + }, + { + "epoch": 3.3175604626708726, + "grad_norm": 0.6987166115939653, + "learning_rate": 2.5259907565046217e-05, + "loss": 0.1543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14374563097953796, + "step": 3155, + "valid_targets_mean": 1281.3, + "valid_targets_min": 613 + }, + { + "epoch": 3.322818086225026, + "grad_norm": 0.7704153064356908, + "learning_rate": 2.5209296996916774e-05, + "loss": 0.1579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1557459533214569, + "step": 3160, + "valid_targets_mean": 1460.1, + "valid_targets_min": 954 + }, + { + "epoch": 3.32807570977918, + "grad_norm": 0.7304887901019812, + "learning_rate": 2.5158650617495753e-05, + "loss": 0.1596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1635679304599762, + "step": 3165, + "valid_targets_mean": 1661.1, + "valid_targets_min": 1048 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 0.7871658306590985, + "learning_rate": 2.5107968774951504e-05, + "loss": 0.161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15143483877182007, + "step": 3170, + "valid_targets_mean": 1388.9, + "valid_targets_min": 635 + }, + { + "epoch": 3.338590956887487, + "grad_norm": 0.685491759966997, + "learning_rate": 2.5057251817696138e-05, + "loss": 0.1579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16114577651023865, + "step": 3175, + "valid_targets_mean": 1682.4, + "valid_targets_min": 592 + }, + { + "epoch": 3.3438485804416405, + "grad_norm": 0.7689982433044418, + "learning_rate": 2.5006500094383176e-05, + "loss": 0.1597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17659153044223785, + "step": 3180, + "valid_targets_mean": 1581.1, + "valid_targets_min": 662 + }, + { + "epoch": 3.349106203995794, + "grad_norm": 0.8406749636556262, + "learning_rate": 2.4955713953905155e-05, + "loss": 0.1601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16790682077407837, + "step": 3185, + "valid_targets_mean": 1454.1, + "valid_targets_min": 622 + }, + { + "epoch": 3.3543638275499474, + "grad_norm": 0.7777439527883708, + "learning_rate": 2.490489374539118e-05, + "loss": 0.1555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1627415120601654, + "step": 3190, + "valid_targets_mean": 1489.4, + "valid_targets_min": 625 + }, + { + "epoch": 3.359621451104101, + "grad_norm": 0.797641636045962, + "learning_rate": 2.4854039818204577e-05, + "loss": 0.1681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1709059476852417, + "step": 3195, + "valid_targets_mean": 1609.5, + "valid_targets_min": 888 + }, + { + "epoch": 3.3648790746582544, + "grad_norm": 0.7371462162762332, + "learning_rate": 2.480315252194047e-05, + "loss": 0.1617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15439459681510925, + "step": 3200, + "valid_targets_mean": 1547.6, + "valid_targets_min": 635 + }, + { + "epoch": 3.370136698212408, + "grad_norm": 1.093522669128121, + "learning_rate": 2.4752232206423387e-05, + "loss": 0.1632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16903162002563477, + "step": 3205, + "valid_targets_mean": 1502.6, + "valid_targets_min": 522 + }, + { + "epoch": 3.3753943217665614, + "grad_norm": 0.707815228654526, + "learning_rate": 2.4701279221704812e-05, + "loss": 0.1577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14688000082969666, + "step": 3210, + "valid_targets_mean": 1349.7, + "valid_targets_min": 697 + }, + { + "epoch": 3.380651945320715, + "grad_norm": 0.7063788662374152, + "learning_rate": 2.4650293918060845e-05, + "loss": 0.149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14401158690452576, + "step": 3215, + "valid_targets_mean": 1485.7, + "valid_targets_min": 869 + }, + { + "epoch": 3.3859095688748684, + "grad_norm": 0.7506740453173287, + "learning_rate": 2.4599276645989763e-05, + "loss": 0.1568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1596394181251526, + "step": 3220, + "valid_targets_mean": 1525.7, + "valid_targets_min": 769 + }, + { + "epoch": 3.3911671924290223, + "grad_norm": 0.7638974947948802, + "learning_rate": 2.4548227756209593e-05, + "loss": 0.1622, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15278524160385132, + "step": 3225, + "valid_targets_mean": 1422.0, + "valid_targets_min": 745 + }, + { + "epoch": 3.396424815983176, + "grad_norm": 0.7141801225138629, + "learning_rate": 2.4497147599655726e-05, + "loss": 0.153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14236235618591309, + "step": 3230, + "valid_targets_mean": 1337.4, + "valid_targets_min": 680 + }, + { + "epoch": 3.4016824395373293, + "grad_norm": 0.6768440619173867, + "learning_rate": 2.44460365274785e-05, + "loss": 0.1557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16601522266864777, + "step": 3235, + "valid_targets_mean": 1771.9, + "valid_targets_min": 964 + }, + { + "epoch": 3.406940063091483, + "grad_norm": 0.7518520439776654, + "learning_rate": 2.4394894891040774e-05, + "loss": 0.1633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18775448203086853, + "step": 3240, + "valid_targets_mean": 1592.7, + "valid_targets_min": 851 + }, + { + "epoch": 3.4121976866456363, + "grad_norm": 0.7107350846356748, + "learning_rate": 2.434372304191553e-05, + "loss": 0.1605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1428951621055603, + "step": 3245, + "valid_targets_mean": 1457.7, + "valid_targets_min": 727 + }, + { + "epoch": 3.4174553101997898, + "grad_norm": 0.6855270749314717, + "learning_rate": 2.4292521331883432e-05, + "loss": 0.1541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15996533632278442, + "step": 3250, + "valid_targets_mean": 1498.1, + "valid_targets_min": 734 + }, + { + "epoch": 3.4227129337539433, + "grad_norm": 0.5324207323189583, + "learning_rate": 2.4241290112930448e-05, + "loss": 0.153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11845308542251587, + "step": 3255, + "valid_targets_mean": 2187.2, + "valid_targets_min": 917 + }, + { + "epoch": 3.4279705573080967, + "grad_norm": 0.6765960167126789, + "learning_rate": 2.4190029737245368e-05, + "loss": 0.1425, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13978411257266998, + "step": 3260, + "valid_targets_mean": 1544.4, + "valid_targets_min": 930 + }, + { + "epoch": 3.4332281808622502, + "grad_norm": 0.716060654288124, + "learning_rate": 2.4138740557217462e-05, + "loss": 0.1561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1514171063899994, + "step": 3265, + "valid_targets_mean": 1543.6, + "valid_targets_min": 570 + }, + { + "epoch": 3.4384858044164037, + "grad_norm": 0.7255969168782287, + "learning_rate": 2.4087422925433988e-05, + "loss": 0.1528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15819844603538513, + "step": 3270, + "valid_targets_mean": 1659.8, + "valid_targets_min": 870 + }, + { + "epoch": 3.443743427970557, + "grad_norm": 0.7386851568481033, + "learning_rate": 2.4036077194677803e-05, + "loss": 0.1557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15675535798072815, + "step": 3275, + "valid_targets_mean": 1667.2, + "valid_targets_min": 1070 + }, + { + "epoch": 3.4490010515247107, + "grad_norm": 0.7215547346775798, + "learning_rate": 2.3984703717924932e-05, + "loss": 0.1599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18710145354270935, + "step": 3280, + "valid_targets_mean": 1765.9, + "valid_targets_min": 626 + }, + { + "epoch": 3.454258675078864, + "grad_norm": 0.7313678336589342, + "learning_rate": 2.3933302848342127e-05, + "loss": 0.1494, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.148451030254364, + "step": 3285, + "valid_targets_mean": 1467.1, + "valid_targets_min": 591 + }, + { + "epoch": 3.4595162986330177, + "grad_norm": 0.7838903247589883, + "learning_rate": 2.388187493928447e-05, + "loss": 0.1607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15493765473365784, + "step": 3290, + "valid_targets_mean": 1358.4, + "valid_targets_min": 646 + }, + { + "epoch": 3.464773922187171, + "grad_norm": 0.5443099682007434, + "learning_rate": 2.3830420344292922e-05, + "loss": 0.1424, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10775116086006165, + "step": 3295, + "valid_targets_mean": 2683.5, + "valid_targets_min": 876 + }, + { + "epoch": 3.470031545741325, + "grad_norm": 0.5030562872301405, + "learning_rate": 2.377893941709189e-05, + "loss": 0.1226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11221227049827576, + "step": 3300, + "valid_targets_mean": 2004.5, + "valid_targets_min": 324 + }, + { + "epoch": 3.4752891692954786, + "grad_norm": 0.3703418740280842, + "learning_rate": 2.3727432511586802e-05, + "loss": 0.0918, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09657759964466095, + "step": 3305, + "valid_targets_mean": 3137.6, + "valid_targets_min": 1064 + }, + { + "epoch": 3.480546792849632, + "grad_norm": 0.36882944206470225, + "learning_rate": 2.3675899981861675e-05, + "loss": 0.0882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09505809843540192, + "step": 3310, + "valid_targets_mean": 3310.0, + "valid_targets_min": 710 + }, + { + "epoch": 3.4858044164037856, + "grad_norm": 0.2980443098259448, + "learning_rate": 2.362434218217668e-05, + "loss": 0.1009, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06509885936975479, + "step": 3315, + "valid_targets_mean": 3544.6, + "valid_targets_min": 1223 + }, + { + "epoch": 3.491062039957939, + "grad_norm": 0.3775797942330371, + "learning_rate": 2.3572759466965706e-05, + "loss": 0.0924, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08893433213233948, + "step": 3320, + "valid_targets_mean": 3144.9, + "valid_targets_min": 889 + }, + { + "epoch": 3.4963196635120926, + "grad_norm": 0.6051734616310629, + "learning_rate": 2.3521152190833934e-05, + "loss": 0.13, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1466870903968811, + "step": 3325, + "valid_targets_mean": 1682.2, + "valid_targets_min": 559 + }, + { + "epoch": 3.501577287066246, + "grad_norm": 0.35000643454214686, + "learning_rate": 2.346952070855537e-05, + "loss": 0.1134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08239797502756119, + "step": 3330, + "valid_targets_mean": 3284.0, + "valid_targets_min": 621 + }, + { + "epoch": 3.5068349106203995, + "grad_norm": 0.41031362369237406, + "learning_rate": 2.3417865375070433e-05, + "loss": 0.1034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11980154365301132, + "step": 3335, + "valid_targets_mean": 3136.8, + "valid_targets_min": 846 + }, + { + "epoch": 3.512092534174553, + "grad_norm": 0.4072089673499259, + "learning_rate": 2.336618654548352e-05, + "loss": 0.1374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14663591980934143, + "step": 3340, + "valid_targets_mean": 3446.4, + "valid_targets_min": 1369 + }, + { + "epoch": 3.5173501577287065, + "grad_norm": 0.3727442216958196, + "learning_rate": 2.331448457506053e-05, + "loss": 0.0734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10031040012836456, + "step": 3345, + "valid_targets_mean": 3461.6, + "valid_targets_min": 754 + }, + { + "epoch": 3.52260778128286, + "grad_norm": 0.3994968154863205, + "learning_rate": 2.326275981922645e-05, + "loss": 0.091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09783968329429626, + "step": 3350, + "valid_targets_mean": 2281.7, + "valid_targets_min": 447 + }, + { + "epoch": 3.527865404837014, + "grad_norm": 0.3550302352287683, + "learning_rate": 2.3211012633562923e-05, + "loss": 0.0907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09796998649835587, + "step": 3355, + "valid_targets_mean": 3140.8, + "valid_targets_min": 754 + }, + { + "epoch": 3.5331230283911674, + "grad_norm": 0.5326623542188318, + "learning_rate": 2.3159243373805764e-05, + "loss": 0.1328, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23072634637355804, + "step": 3360, + "valid_targets_mean": 1930.1, + "valid_targets_min": 604 + }, + { + "epoch": 3.538380651945321, + "grad_norm": 0.3688458186750891, + "learning_rate": 2.3107452395842542e-05, + "loss": 0.0851, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0918334573507309, + "step": 3365, + "valid_targets_mean": 3476.9, + "valid_targets_min": 1743 + }, + { + "epoch": 3.5436382754994744, + "grad_norm": 0.37694886254982934, + "learning_rate": 2.3055640055710132e-05, + "loss": 0.0913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09475556015968323, + "step": 3370, + "valid_targets_mean": 2934.9, + "valid_targets_min": 574 + }, + { + "epoch": 3.548895899053628, + "grad_norm": 0.361710277299611, + "learning_rate": 2.3003806709592268e-05, + "loss": 0.0841, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08646224439144135, + "step": 3375, + "valid_targets_mean": 2977.2, + "valid_targets_min": 523 + }, + { + "epoch": 3.5541535226077814, + "grad_norm": 0.3923783065630929, + "learning_rate": 2.295195271381707e-05, + "loss": 0.0928, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10263008624315262, + "step": 3380, + "valid_targets_mean": 3493.9, + "valid_targets_min": 2189 + }, + { + "epoch": 3.559411146161935, + "grad_norm": 0.32157805666456296, + "learning_rate": 2.290007842485463e-05, + "loss": 0.0957, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07095207273960114, + "step": 3385, + "valid_targets_mean": 3361.6, + "valid_targets_min": 812 + }, + { + "epoch": 3.5646687697160884, + "grad_norm": 0.45144904186150264, + "learning_rate": 2.2848184199314546e-05, + "loss": 0.1074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11917275935411453, + "step": 3390, + "valid_targets_mean": 2514.4, + "valid_targets_min": 597 + }, + { + "epoch": 3.569926393270242, + "grad_norm": 0.443691654398637, + "learning_rate": 2.2796270393943472e-05, + "loss": 0.1029, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1120077446103096, + "step": 3395, + "valid_targets_mean": 2462.6, + "valid_targets_min": 701 + }, + { + "epoch": 3.5751840168243953, + "grad_norm": 0.444025520690789, + "learning_rate": 2.274433736562264e-05, + "loss": 0.1275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19354555010795593, + "step": 3400, + "valid_targets_mean": 2976.1, + "valid_targets_min": 1297 + }, + { + "epoch": 3.580441640378549, + "grad_norm": 0.3573000146572299, + "learning_rate": 2.2692385471365465e-05, + "loss": 0.105, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08170770108699799, + "step": 3405, + "valid_targets_mean": 2692.6, + "valid_targets_min": 786 + }, + { + "epoch": 3.5856992639327023, + "grad_norm": 0.42377566609320155, + "learning_rate": 2.264041506831503e-05, + "loss": 0.0876, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09044261276721954, + "step": 3410, + "valid_targets_mean": 2406.3, + "valid_targets_min": 550 + }, + { + "epoch": 3.590956887486856, + "grad_norm": 0.403249296947159, + "learning_rate": 2.258842651374166e-05, + "loss": 0.1273, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09162634611129761, + "step": 3415, + "valid_targets_mean": 3270.2, + "valid_targets_min": 1304 + }, + { + "epoch": 3.5962145110410093, + "grad_norm": 0.36497118438614784, + "learning_rate": 2.2536420165040478e-05, + "loss": 0.1072, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07757177948951721, + "step": 3420, + "valid_targets_mean": 2861.4, + "valid_targets_min": 753 + }, + { + "epoch": 3.601472134595163, + "grad_norm": 0.4814404833783927, + "learning_rate": 2.248439637972892e-05, + "loss": 0.1004, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10776933282613754, + "step": 3425, + "valid_targets_mean": 2105.1, + "valid_targets_min": 619 + }, + { + "epoch": 3.6067297581493163, + "grad_norm": 0.4649186798035194, + "learning_rate": 2.2432355515444284e-05, + "loss": 0.2917, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23355937004089355, + "step": 3430, + "valid_targets_mean": 2730.5, + "valid_targets_min": 987 + }, + { + "epoch": 3.61198738170347, + "grad_norm": 0.49596750031321396, + "learning_rate": 2.2380297929941296e-05, + "loss": 0.1039, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12732480466365814, + "step": 3435, + "valid_targets_mean": 1982.8, + "valid_targets_min": 329 + }, + { + "epoch": 3.6172450052576237, + "grad_norm": 0.6331886046407993, + "learning_rate": 2.2328223981089613e-05, + "loss": 0.1036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11068670451641083, + "step": 3440, + "valid_targets_mean": 1372.6, + "valid_targets_min": 608 + }, + { + "epoch": 3.622502628811777, + "grad_norm": 0.3937143683860141, + "learning_rate": 2.2276134026871393e-05, + "loss": 0.1906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09766163676977158, + "step": 3445, + "valid_targets_mean": 3164.8, + "valid_targets_min": 666 + }, + { + "epoch": 3.6277602523659307, + "grad_norm": 0.24350799716030153, + "learning_rate": 2.222402842537882e-05, + "loss": 0.0842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05802469328045845, + "step": 3450, + "valid_targets_mean": 5015.1, + "valid_targets_min": 3529 + }, + { + "epoch": 3.633017875920084, + "grad_norm": 0.37203436731137035, + "learning_rate": 2.2171907534811652e-05, + "loss": 0.1021, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10450957715511322, + "step": 3455, + "valid_targets_mean": 4336.9, + "valid_targets_min": 1746 + }, + { + "epoch": 3.6382754994742377, + "grad_norm": 0.41643184533502453, + "learning_rate": 2.2119771713474732e-05, + "loss": 0.0976, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11065898835659027, + "step": 3460, + "valid_targets_mean": 3651.8, + "valid_targets_min": 1226 + }, + { + "epoch": 3.643533123028391, + "grad_norm": 0.3934006119910143, + "learning_rate": 2.2067621319775564e-05, + "loss": 0.086, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09729157388210297, + "step": 3465, + "valid_targets_mean": 3846.0, + "valid_targets_min": 798 + }, + { + "epoch": 3.6487907465825447, + "grad_norm": 0.4246895638591527, + "learning_rate": 2.201545671222183e-05, + "loss": 0.0868, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0853077694773674, + "step": 3470, + "valid_targets_mean": 2358.4, + "valid_targets_min": 533 + }, + { + "epoch": 3.654048370136698, + "grad_norm": 0.4459133041758443, + "learning_rate": 2.1963278249418894e-05, + "loss": 0.0865, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10066591203212738, + "step": 3475, + "valid_targets_mean": 2915.3, + "valid_targets_min": 833 + }, + { + "epoch": 3.6593059936908516, + "grad_norm": 0.3330196394799087, + "learning_rate": 2.191108629006742e-05, + "loss": 0.0832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06883084774017334, + "step": 3480, + "valid_targets_mean": 3654.9, + "valid_targets_min": 1421 + }, + { + "epoch": 3.664563617245005, + "grad_norm": 0.4830073737471554, + "learning_rate": 2.1858881192960814e-05, + "loss": 0.0913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11881092190742493, + "step": 3485, + "valid_targets_mean": 2283.9, + "valid_targets_min": 739 + }, + { + "epoch": 3.669821240799159, + "grad_norm": 0.530118378630708, + "learning_rate": 2.180666331698281e-05, + "loss": 0.1208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09584526717662811, + "step": 3490, + "valid_targets_mean": 1388.7, + "valid_targets_min": 535 + }, + { + "epoch": 3.6750788643533125, + "grad_norm": 0.43984703491201566, + "learning_rate": 2.1754433021104985e-05, + "loss": 0.0729, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08105123788118362, + "step": 3495, + "valid_targets_mean": 1633.8, + "valid_targets_min": 585 + }, + { + "epoch": 3.680336487907466, + "grad_norm": 0.4233341877084485, + "learning_rate": 2.170219066438431e-05, + "loss": 0.0832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09127180278301239, + "step": 3500, + "valid_targets_mean": 2779.3, + "valid_targets_min": 582 + }, + { + "epoch": 3.6855941114616195, + "grad_norm": 0.387700922362627, + "learning_rate": 2.164993660596065e-05, + "loss": 0.0772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08709383010864258, + "step": 3505, + "valid_targets_mean": 2962.7, + "valid_targets_min": 516 + }, + { + "epoch": 3.690851735015773, + "grad_norm": 0.40290885939492926, + "learning_rate": 2.1597671205054326e-05, + "loss": 0.1306, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0947260856628418, + "step": 3510, + "valid_targets_mean": 3386.9, + "valid_targets_min": 999 + }, + { + "epoch": 3.6961093585699265, + "grad_norm": 0.48380195922315133, + "learning_rate": 2.1545394820963637e-05, + "loss": 0.097, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13147683441638947, + "step": 3515, + "valid_targets_mean": 2628.9, + "valid_targets_min": 696 + }, + { + "epoch": 3.70136698212408, + "grad_norm": 0.36573554154558774, + "learning_rate": 2.149310781306237e-05, + "loss": 0.104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07513891160488129, + "step": 3520, + "valid_targets_mean": 2978.2, + "valid_targets_min": 635 + }, + { + "epoch": 3.7066246056782335, + "grad_norm": 0.5112486521023695, + "learning_rate": 2.1440810540797354e-05, + "loss": 0.0952, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10247130692005157, + "step": 3525, + "valid_targets_mean": 1913.0, + "valid_targets_min": 536 + }, + { + "epoch": 3.711882229232387, + "grad_norm": 0.5697643091664327, + "learning_rate": 2.1388503363685985e-05, + "loss": 0.1391, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19898897409439087, + "step": 3530, + "valid_targets_mean": 2542.8, + "valid_targets_min": 853 + }, + { + "epoch": 3.7171398527865405, + "grad_norm": 0.435081498437529, + "learning_rate": 2.133618664131374e-05, + "loss": 0.0949, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09856528043746948, + "step": 3535, + "valid_targets_mean": 3325.2, + "valid_targets_min": 2201 + }, + { + "epoch": 3.722397476340694, + "grad_norm": 0.5389864275451385, + "learning_rate": 2.1283860733331722e-05, + "loss": 0.1215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11500448733568192, + "step": 3540, + "valid_targets_mean": 1593.0, + "valid_targets_min": 687 + }, + { + "epoch": 3.7276550998948474, + "grad_norm": 0.5706213977134992, + "learning_rate": 2.123152599945417e-05, + "loss": 0.0908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10599765926599503, + "step": 3545, + "valid_targets_mean": 1756.6, + "valid_targets_min": 449 + }, + { + "epoch": 3.732912723449001, + "grad_norm": 0.3304693098437503, + "learning_rate": 2.1179182799456024e-05, + "loss": 0.0924, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08064989745616913, + "step": 3550, + "valid_targets_mean": 3063.7, + "valid_targets_min": 658 + }, + { + "epoch": 3.7381703470031544, + "grad_norm": 0.530788508146966, + "learning_rate": 2.112683149317039e-05, + "loss": 0.098, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10121424496173859, + "step": 3555, + "valid_targets_mean": 1936.0, + "valid_targets_min": 516 + }, + { + "epoch": 3.743427970557308, + "grad_norm": 0.3286043768042034, + "learning_rate": 2.1074472440486118e-05, + "loss": 0.0765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08935829252004623, + "step": 3560, + "valid_targets_mean": 3996.3, + "valid_targets_min": 726 + }, + { + "epoch": 3.7486855941114614, + "grad_norm": 0.36550474794956694, + "learning_rate": 2.102210600134531e-05, + "loss": 0.0768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09652663767337799, + "step": 3565, + "valid_targets_mean": 3944.3, + "valid_targets_min": 2036 + }, + { + "epoch": 3.753943217665615, + "grad_norm": 0.4325741552974306, + "learning_rate": 2.096973253574084e-05, + "loss": 0.0882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09666009247303009, + "step": 3570, + "valid_targets_mean": 2266.8, + "valid_targets_min": 531 + }, + { + "epoch": 3.759200841219769, + "grad_norm": 0.40791655447527125, + "learning_rate": 2.09173524037139e-05, + "loss": 0.1056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09209573268890381, + "step": 3575, + "valid_targets_mean": 2960.6, + "valid_targets_min": 744 + }, + { + "epoch": 3.7644584647739223, + "grad_norm": 0.38772339677116957, + "learning_rate": 2.0864965965351495e-05, + "loss": 0.0999, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0913030356168747, + "step": 3580, + "valid_targets_mean": 3425.1, + "valid_targets_min": 891 + }, + { + "epoch": 3.769716088328076, + "grad_norm": 0.35781538705517263, + "learning_rate": 2.081257358078398e-05, + "loss": 0.09, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08367600291967392, + "step": 3585, + "valid_targets_mean": 3363.6, + "valid_targets_min": 2366 + }, + { + "epoch": 3.7749737118822293, + "grad_norm": 0.43063352480767536, + "learning_rate": 2.0760175610182613e-05, + "loss": 0.0861, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10648873448371887, + "step": 3590, + "valid_targets_mean": 2006.6, + "valid_targets_min": 509 + }, + { + "epoch": 3.780231335436383, + "grad_norm": 0.4255843199869726, + "learning_rate": 2.0707772413757016e-05, + "loss": 0.0999, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1046498566865921, + "step": 3595, + "valid_targets_mean": 3465.1, + "valid_targets_min": 1158 + }, + { + "epoch": 3.7854889589905363, + "grad_norm": 0.3238682484345786, + "learning_rate": 2.0655364351752763e-05, + "loss": 0.0718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07490940392017365, + "step": 3600, + "valid_targets_mean": 3908.9, + "valid_targets_min": 2848 + }, + { + "epoch": 3.7907465825446898, + "grad_norm": 0.38324966614871386, + "learning_rate": 2.060295178444887e-05, + "loss": 0.0918, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07175049185752869, + "step": 3605, + "valid_targets_mean": 2616.4, + "valid_targets_min": 539 + }, + { + "epoch": 3.7960042060988433, + "grad_norm": 0.443143738568688, + "learning_rate": 2.055053507215533e-05, + "loss": 0.092, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09204816818237305, + "step": 3610, + "valid_targets_mean": 2680.9, + "valid_targets_min": 1016 + }, + { + "epoch": 3.8012618296529967, + "grad_norm": 0.610462234387402, + "learning_rate": 2.049811457521061e-05, + "loss": 0.1009, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14608034491539001, + "step": 3615, + "valid_targets_mean": 1645.9, + "valid_targets_min": 811 + }, + { + "epoch": 3.8065194532071502, + "grad_norm": 0.3921422546334914, + "learning_rate": 2.0445690653979216e-05, + "loss": 0.0913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06947421282529831, + "step": 3620, + "valid_targets_mean": 2570.3, + "valid_targets_min": 697 + }, + { + "epoch": 3.8117770767613037, + "grad_norm": 0.6663493421615169, + "learning_rate": 2.039326366884919e-05, + "loss": 0.0947, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12514297664165497, + "step": 3625, + "valid_targets_mean": 1396.1, + "valid_targets_min": 605 + }, + { + "epoch": 3.8170347003154577, + "grad_norm": 0.427751684134174, + "learning_rate": 2.034083398022963e-05, + "loss": 0.0866, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07771643996238708, + "step": 3630, + "valid_targets_mean": 2454.5, + "valid_targets_min": 804 + }, + { + "epoch": 3.822292323869611, + "grad_norm": 0.44758511691685116, + "learning_rate": 2.028840194854822e-05, + "loss": 0.1117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08018139749765396, + "step": 3635, + "valid_targets_mean": 3334.7, + "valid_targets_min": 1018 + }, + { + "epoch": 3.8275499474237646, + "grad_norm": 0.35793157543173476, + "learning_rate": 2.0235967934248756e-05, + "loss": 0.0883, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07716187834739685, + "step": 3640, + "valid_targets_mean": 3069.6, + "valid_targets_min": 524 + }, + { + "epoch": 3.832807570977918, + "grad_norm": 0.4702038685141682, + "learning_rate": 2.018353229778867e-05, + "loss": 0.0819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08833232522010803, + "step": 3645, + "valid_targets_mean": 2432.9, + "valid_targets_min": 516 + }, + { + "epoch": 3.8380651945320716, + "grad_norm": 0.47312598716062043, + "learning_rate": 2.0131095399636522e-05, + "loss": 0.1024, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12234270572662354, + "step": 3650, + "valid_targets_mean": 2296.9, + "valid_targets_min": 342 + }, + { + "epoch": 3.843322818086225, + "grad_norm": 0.4050671404779756, + "learning_rate": 2.0078657600269573e-05, + "loss": 0.0963, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08255276829004288, + "step": 3655, + "valid_targets_mean": 3508.6, + "valid_targets_min": 1935 + }, + { + "epoch": 3.8485804416403786, + "grad_norm": 0.4596813722597804, + "learning_rate": 2.0026219260171262e-05, + "loss": 0.0745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0901421308517456, + "step": 3660, + "valid_targets_mean": 2384.8, + "valid_targets_min": 564 + }, + { + "epoch": 3.853838065194532, + "grad_norm": 0.3165528104327489, + "learning_rate": 1.9973780739828748e-05, + "loss": 0.0883, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0759531706571579, + "step": 3665, + "valid_targets_mean": 4542.6, + "valid_targets_min": 2555 + }, + { + "epoch": 3.8590956887486856, + "grad_norm": 0.29289406468084694, + "learning_rate": 1.9921342399730433e-05, + "loss": 0.0806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06938598304986954, + "step": 3670, + "valid_targets_mean": 3932.6, + "valid_targets_min": 837 + }, + { + "epoch": 3.864353312302839, + "grad_norm": 0.3940315334405671, + "learning_rate": 1.9868904600363485e-05, + "loss": 0.0717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07881227135658264, + "step": 3675, + "valid_targets_mean": 2556.8, + "valid_targets_min": 618 + }, + { + "epoch": 3.8696109358569926, + "grad_norm": 0.3051306597473392, + "learning_rate": 1.9816467702211342e-05, + "loss": 0.0955, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06934002041816711, + "step": 3680, + "valid_targets_mean": 3895.4, + "valid_targets_min": 2944 + }, + { + "epoch": 3.874868559411146, + "grad_norm": 0.4777520869374522, + "learning_rate": 1.9764032065751248e-05, + "loss": 0.1015, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19092978537082672, + "step": 3685, + "valid_targets_mean": 3167.4, + "valid_targets_min": 1536 + }, + { + "epoch": 3.8801261829652995, + "grad_norm": 0.40689502117218584, + "learning_rate": 1.971159805145178e-05, + "loss": 0.0896, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07239830493927002, + "step": 3690, + "valid_targets_mean": 2931.6, + "valid_targets_min": 509 + }, + { + "epoch": 3.885383806519453, + "grad_norm": 0.30382264737261905, + "learning_rate": 1.965916601977038e-05, + "loss": 0.0961, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05693751573562622, + "step": 3695, + "valid_targets_mean": 3162.2, + "valid_targets_min": 633 + }, + { + "epoch": 3.8906414300736065, + "grad_norm": 0.3839793777783549, + "learning_rate": 1.9606736331150812e-05, + "loss": 0.1091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08582743257284164, + "step": 3700, + "valid_targets_mean": 3075.7, + "valid_targets_min": 737 + }, + { + "epoch": 3.89589905362776, + "grad_norm": 0.39810159285735625, + "learning_rate": 1.9554309346020784e-05, + "loss": 0.0793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08765186369419098, + "step": 3705, + "valid_targets_mean": 3087.6, + "valid_targets_min": 492 + }, + { + "epoch": 3.9011566771819135, + "grad_norm": 0.7291109076977612, + "learning_rate": 1.9501885424789394e-05, + "loss": 0.0942, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10978750884532928, + "step": 3710, + "valid_targets_mean": 1690.7, + "valid_targets_min": 697 + }, + { + "epoch": 3.9064143007360674, + "grad_norm": 0.41431235708407105, + "learning_rate": 1.9449464927844677e-05, + "loss": 0.0802, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07006799429655075, + "step": 3715, + "valid_targets_mean": 3497.7, + "valid_targets_min": 923 + }, + { + "epoch": 3.911671924290221, + "grad_norm": 0.5883270075280208, + "learning_rate": 1.939704821555113e-05, + "loss": 0.1272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11378846317529678, + "step": 3720, + "valid_targets_mean": 1610.4, + "valid_targets_min": 531 + }, + { + "epoch": 3.9169295478443744, + "grad_norm": 0.575035243155632, + "learning_rate": 1.9344635648247244e-05, + "loss": 0.1179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1258980929851532, + "step": 3725, + "valid_targets_mean": 1691.8, + "valid_targets_min": 575 + }, + { + "epoch": 3.922187171398528, + "grad_norm": 0.4417901219788984, + "learning_rate": 1.9292227586242994e-05, + "loss": 0.0873, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09100116789340973, + "step": 3730, + "valid_targets_mean": 3664.4, + "valid_targets_min": 2784 + }, + { + "epoch": 3.9274447949526814, + "grad_norm": 0.3826363350707075, + "learning_rate": 1.9239824389817397e-05, + "loss": 0.0862, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08645817637443542, + "step": 3735, + "valid_targets_mean": 3575.8, + "valid_targets_min": 2490 + }, + { + "epoch": 3.932702418506835, + "grad_norm": 0.38902984620855535, + "learning_rate": 1.9187426419216026e-05, + "loss": 0.0867, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07746557891368866, + "step": 3740, + "valid_targets_mean": 2629.2, + "valid_targets_min": 767 + }, + { + "epoch": 3.9379600420609884, + "grad_norm": 0.4440574135528579, + "learning_rate": 1.9135034034648515e-05, + "loss": 0.0777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07846144586801529, + "step": 3745, + "valid_targets_mean": 1929.7, + "valid_targets_min": 729 + }, + { + "epoch": 3.943217665615142, + "grad_norm": 0.7524988102545428, + "learning_rate": 1.90826475962861e-05, + "loss": 0.1056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1300787627696991, + "step": 3750, + "valid_targets_mean": 1150.9, + "valid_targets_min": 637 + }, + { + "epoch": 3.9484752891692954, + "grad_norm": 0.43377208971208925, + "learning_rate": 1.9030267464259164e-05, + "loss": 0.0763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07985401898622513, + "step": 3755, + "valid_targets_mean": 2598.5, + "valid_targets_min": 701 + }, + { + "epoch": 3.953732912723449, + "grad_norm": 0.4014449730261738, + "learning_rate": 1.8977893998654692e-05, + "loss": 0.081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07844674587249756, + "step": 3760, + "valid_targets_mean": 3492.6, + "valid_targets_min": 1606 + }, + { + "epoch": 3.958990536277603, + "grad_norm": 0.39365643476834655, + "learning_rate": 1.8925527559513886e-05, + "loss": 0.0839, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08387859165668488, + "step": 3765, + "valid_targets_mean": 3144.2, + "valid_targets_min": 678 + }, + { + "epoch": 3.9642481598317563, + "grad_norm": 0.3250462893913423, + "learning_rate": 1.8873168506829614e-05, + "loss": 0.0897, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08184637129306793, + "step": 3770, + "valid_targets_mean": 3732.2, + "valid_targets_min": 350 + }, + { + "epoch": 3.9695057833859098, + "grad_norm": 0.3016014649791314, + "learning_rate": 1.882081720054398e-05, + "loss": 0.0773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0785076767206192, + "step": 3775, + "valid_targets_mean": 3899.2, + "valid_targets_min": 761 + }, + { + "epoch": 3.9747634069400632, + "grad_norm": 0.5336857328401563, + "learning_rate": 1.876847400054583e-05, + "loss": 0.0856, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10663610696792603, + "step": 3780, + "valid_targets_mean": 1649.2, + "valid_targets_min": 507 + }, + { + "epoch": 3.9800210304942167, + "grad_norm": 0.541958266460311, + "learning_rate": 1.8716139266668288e-05, + "loss": 0.1051, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17506924271583557, + "step": 3785, + "valid_targets_mean": 2908.9, + "valid_targets_min": 780 + }, + { + "epoch": 3.9852786540483702, + "grad_norm": 0.3498458322406007, + "learning_rate": 1.8663813358686267e-05, + "loss": 0.1526, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07919283956289291, + "step": 3790, + "valid_targets_mean": 3729.9, + "valid_targets_min": 2347 + }, + { + "epoch": 3.9905362776025237, + "grad_norm": 0.36650049584069216, + "learning_rate": 1.8611496636314025e-05, + "loss": 0.0855, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08009093999862671, + "step": 3795, + "valid_targets_mean": 3266.7, + "valid_targets_min": 716 + }, + { + "epoch": 3.995793901156677, + "grad_norm": 0.367992552240383, + "learning_rate": 1.8559189459202653e-05, + "loss": 0.0809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08165628463029861, + "step": 3800, + "valid_targets_mean": 3567.1, + "valid_targets_min": 2245 + }, + { + "epoch": 4.001051524710831, + "grad_norm": 0.8765624437475582, + "learning_rate": 1.8506892186937636e-05, + "loss": 0.1319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16299240291118622, + "step": 3805, + "valid_targets_mean": 1607.4, + "valid_targets_min": 544 + }, + { + "epoch": 4.006309148264984, + "grad_norm": 0.7481661006229169, + "learning_rate": 1.845460517903637e-05, + "loss": 0.1762, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16049407422542572, + "step": 3810, + "valid_targets_mean": 1344.6, + "valid_targets_min": 632 + }, + { + "epoch": 4.011566771819138, + "grad_norm": 0.823041699440833, + "learning_rate": 1.8402328794945678e-05, + "loss": 0.1594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.161682590842247, + "step": 3815, + "valid_targets_mean": 1412.5, + "valid_targets_min": 514 + }, + { + "epoch": 4.016824395373291, + "grad_norm": 0.8263312908136765, + "learning_rate": 1.8350063394039352e-05, + "loss": 0.1582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16678602993488312, + "step": 3820, + "valid_targets_mean": 1409.8, + "valid_targets_min": 701 + }, + { + "epoch": 4.022082018927445, + "grad_norm": 0.7299100853844972, + "learning_rate": 1.82978093356157e-05, + "loss": 0.1471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16625288128852844, + "step": 3825, + "valid_targets_mean": 1655.4, + "valid_targets_min": 742 + }, + { + "epoch": 4.027339642481598, + "grad_norm": 0.7573676951715616, + "learning_rate": 1.824556697889502e-05, + "loss": 0.1544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.134572371840477, + "step": 3830, + "valid_targets_mean": 1219.5, + "valid_targets_min": 746 + }, + { + "epoch": 4.032597266035752, + "grad_norm": 0.7460064071823531, + "learning_rate": 1.8193336683017197e-05, + "loss": 0.1589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14512114226818085, + "step": 3835, + "valid_targets_mean": 1534.8, + "valid_targets_min": 774 + }, + { + "epoch": 4.037854889589905, + "grad_norm": 0.7467667152761563, + "learning_rate": 1.8141118807039193e-05, + "loss": 0.1416, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13977526128292084, + "step": 3840, + "valid_targets_mean": 1388.5, + "valid_targets_min": 648 + }, + { + "epoch": 4.043112513144059, + "grad_norm": 0.7466259477771547, + "learning_rate": 1.8088913709932582e-05, + "loss": 0.1644, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14778225123882294, + "step": 3845, + "valid_targets_mean": 1322.9, + "valid_targets_min": 708 + }, + { + "epoch": 4.048370136698212, + "grad_norm": 0.6691684846073358, + "learning_rate": 1.8036721750581106e-05, + "loss": 0.1429, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13036976754665375, + "step": 3850, + "valid_targets_mean": 1573.5, + "valid_targets_min": 788 + }, + { + "epoch": 4.053627760252366, + "grad_norm": 0.7956145711586533, + "learning_rate": 1.7984543287778185e-05, + "loss": 0.1521, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13885369896888733, + "step": 3855, + "valid_targets_mean": 1277.2, + "valid_targets_min": 615 + }, + { + "epoch": 4.058885383806519, + "grad_norm": 0.7472855226416464, + "learning_rate": 1.7932378680224443e-05, + "loss": 0.1534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1496802419424057, + "step": 3860, + "valid_targets_mean": 1454.9, + "valid_targets_min": 627 + }, + { + "epoch": 4.064143007360673, + "grad_norm": 0.7075850524471489, + "learning_rate": 1.7880228286525275e-05, + "loss": 0.1511, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14226964116096497, + "step": 3865, + "valid_targets_mean": 1323.2, + "valid_targets_min": 753 + }, + { + "epoch": 4.069400630914826, + "grad_norm": 0.7202367129387591, + "learning_rate": 1.782809246518836e-05, + "loss": 0.1434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14475390315055847, + "step": 3870, + "valid_targets_mean": 1491.4, + "valid_targets_min": 740 + }, + { + "epoch": 4.0746582544689804, + "grad_norm": 0.7464426028911113, + "learning_rate": 1.7775971574621186e-05, + "loss": 0.1401, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14927147328853607, + "step": 3875, + "valid_targets_mean": 1625.9, + "valid_targets_min": 642 + }, + { + "epoch": 4.079915878023134, + "grad_norm": 0.7237691590361065, + "learning_rate": 1.772386597312861e-05, + "loss": 0.1341, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1303875744342804, + "step": 3880, + "valid_targets_mean": 1360.6, + "valid_targets_min": 518 + }, + { + "epoch": 4.085173501577287, + "grad_norm": 0.8588529853471539, + "learning_rate": 1.7671776018910397e-05, + "loss": 0.1565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15439622104167938, + "step": 3885, + "valid_targets_mean": 1484.4, + "valid_targets_min": 721 + }, + { + "epoch": 4.090431125131441, + "grad_norm": 0.7780792565743658, + "learning_rate": 1.761970207005871e-05, + "loss": 0.1563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1545676290988922, + "step": 3890, + "valid_targets_mean": 1509.9, + "valid_targets_min": 727 + }, + { + "epoch": 4.095688748685594, + "grad_norm": 0.6781007500933507, + "learning_rate": 1.756764448455572e-05, + "loss": 0.1493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13919678330421448, + "step": 3895, + "valid_targets_mean": 1507.2, + "valid_targets_min": 803 + }, + { + "epoch": 4.100946372239748, + "grad_norm": 0.7744685293110305, + "learning_rate": 1.7515603620271087e-05, + "loss": 0.1454, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16268318891525269, + "step": 3900, + "valid_targets_mean": 1620.9, + "valid_targets_min": 698 + }, + { + "epoch": 4.106203995793901, + "grad_norm": 0.7738920005389524, + "learning_rate": 1.7463579834959525e-05, + "loss": 0.1464, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1758730560541153, + "step": 3905, + "valid_targets_mean": 2104.2, + "valid_targets_min": 832 + }, + { + "epoch": 4.111461619348055, + "grad_norm": 0.7449550384729909, + "learning_rate": 1.7411573486258343e-05, + "loss": 0.1477, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14727957546710968, + "step": 3910, + "valid_targets_mean": 1727.2, + "valid_targets_min": 891 + }, + { + "epoch": 4.116719242902208, + "grad_norm": 0.8013214299941451, + "learning_rate": 1.735958493168498e-05, + "loss": 0.1451, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15961864590644836, + "step": 3915, + "valid_targets_mean": 1509.1, + "valid_targets_min": 621 + }, + { + "epoch": 4.121976866456362, + "grad_norm": 0.7229377546821801, + "learning_rate": 1.730761452863454e-05, + "loss": 0.1349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12209049612283707, + "step": 3920, + "valid_targets_mean": 1330.2, + "valid_targets_min": 697 + }, + { + "epoch": 4.127234490010515, + "grad_norm": 0.7432586495114009, + "learning_rate": 1.7255662634377365e-05, + "loss": 0.1407, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14521485567092896, + "step": 3925, + "valid_targets_mean": 1642.9, + "valid_targets_min": 824 + }, + { + "epoch": 4.132492113564669, + "grad_norm": 0.9356614070212093, + "learning_rate": 1.720372960605654e-05, + "loss": 0.1364, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12674985826015472, + "step": 3930, + "valid_targets_mean": 1331.2, + "valid_targets_min": 584 + }, + { + "epoch": 4.137749737118822, + "grad_norm": 0.7246993147868624, + "learning_rate": 1.715181580068546e-05, + "loss": 0.1453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1463506817817688, + "step": 3935, + "valid_targets_mean": 1734.8, + "valid_targets_min": 1033 + }, + { + "epoch": 4.143007360672976, + "grad_norm": 0.7553205934847995, + "learning_rate": 1.7099921575145372e-05, + "loss": 0.1485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1663040667772293, + "step": 3940, + "valid_targets_mean": 1724.6, + "valid_targets_min": 1007 + }, + { + "epoch": 4.148264984227129, + "grad_norm": 0.7026869178890462, + "learning_rate": 1.7048047286182945e-05, + "loss": 0.1401, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1284397542476654, + "step": 3945, + "valid_targets_mean": 1342.1, + "valid_targets_min": 613 + }, + { + "epoch": 4.153522607781283, + "grad_norm": 0.7571640035397575, + "learning_rate": 1.6996193290407742e-05, + "loss": 0.1407, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14246192574501038, + "step": 3950, + "valid_targets_mean": 1532.2, + "valid_targets_min": 768 + }, + { + "epoch": 4.158780231335436, + "grad_norm": 0.7746359454091577, + "learning_rate": 1.694435994428987e-05, + "loss": 0.1392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11980274319648743, + "step": 3955, + "valid_targets_mean": 1112.7, + "valid_targets_min": 643 + }, + { + "epoch": 4.16403785488959, + "grad_norm": 0.7789865885616932, + "learning_rate": 1.6892547604157464e-05, + "loss": 0.148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1385720670223236, + "step": 3960, + "valid_targets_mean": 1402.3, + "valid_targets_min": 841 + }, + { + "epoch": 4.169295478443743, + "grad_norm": 0.7409379672132878, + "learning_rate": 1.6840756626194242e-05, + "loss": 0.1372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14954648911952972, + "step": 3965, + "valid_targets_mean": 1729.4, + "valid_targets_min": 705 + }, + { + "epoch": 4.174553101997897, + "grad_norm": 0.8830404211700101, + "learning_rate": 1.678898736643708e-05, + "loss": 0.1355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1382630616426468, + "step": 3970, + "valid_targets_mean": 1233.4, + "valid_targets_min": 716 + }, + { + "epoch": 4.17981072555205, + "grad_norm": 0.7832949923877968, + "learning_rate": 1.6737240180773554e-05, + "loss": 0.1257, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12926602363586426, + "step": 3975, + "valid_targets_mean": 1254.3, + "valid_targets_min": 627 + }, + { + "epoch": 4.185068349106204, + "grad_norm": 0.7460084475449279, + "learning_rate": 1.6685515424939478e-05, + "loss": 0.1419, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1309753954410553, + "step": 3980, + "valid_targets_mean": 1454.9, + "valid_targets_min": 619 + }, + { + "epoch": 4.190325972660357, + "grad_norm": 0.7826957982549188, + "learning_rate": 1.6633813454516486e-05, + "loss": 0.1334, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13416488468647003, + "step": 3985, + "valid_targets_mean": 1471.0, + "valid_targets_min": 734 + }, + { + "epoch": 4.195583596214511, + "grad_norm": 0.7505831254614799, + "learning_rate": 1.658213462492957e-05, + "loss": 0.1362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12968222796916962, + "step": 3990, + "valid_targets_mean": 1367.9, + "valid_targets_min": 700 + }, + { + "epoch": 4.200841219768664, + "grad_norm": 0.7201532762160492, + "learning_rate": 1.6530479291444636e-05, + "loss": 0.1378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13182759284973145, + "step": 3995, + "valid_targets_mean": 1632.7, + "valid_targets_min": 892 + }, + { + "epoch": 4.206098843322818, + "grad_norm": 0.7095273753928816, + "learning_rate": 1.6478847809166066e-05, + "loss": 0.1368, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12015292048454285, + "step": 4000, + "valid_targets_mean": 1318.6, + "valid_targets_min": 711 + }, + { + "epoch": 4.211356466876971, + "grad_norm": 0.7210862846864323, + "learning_rate": 1.64272405330343e-05, + "loss": 0.1412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13294000923633575, + "step": 4005, + "valid_targets_mean": 1515.7, + "valid_targets_min": 682 + }, + { + "epoch": 4.216614090431126, + "grad_norm": 0.7963901121025583, + "learning_rate": 1.6375657817823323e-05, + "loss": 0.1413, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14794018864631653, + "step": 4010, + "valid_targets_mean": 1453.2, + "valid_targets_min": 818 + }, + { + "epoch": 4.221871713985279, + "grad_norm": 0.7827226440279664, + "learning_rate": 1.6324100018138328e-05, + "loss": 0.1452, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1447080820798874, + "step": 4015, + "valid_targets_mean": 1779.6, + "valid_targets_min": 1011 + }, + { + "epoch": 4.2271293375394325, + "grad_norm": 0.7558380275172935, + "learning_rate": 1.6272567488413204e-05, + "loss": 0.1491, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12958121299743652, + "step": 4020, + "valid_targets_mean": 1324.1, + "valid_targets_min": 748 + }, + { + "epoch": 4.232386961093586, + "grad_norm": 0.7897997966258203, + "learning_rate": 1.6221060582908115e-05, + "loss": 0.1359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14728528261184692, + "step": 4025, + "valid_targets_mean": 1583.6, + "valid_targets_min": 699 + }, + { + "epoch": 4.2376445846477395, + "grad_norm": 0.7658583092317679, + "learning_rate": 1.616957965570708e-05, + "loss": 0.13, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11436955630779266, + "step": 4030, + "valid_targets_mean": 1097.2, + "valid_targets_min": 534 + }, + { + "epoch": 4.242902208201893, + "grad_norm": 0.8157009302440904, + "learning_rate": 1.6118125060715534e-05, + "loss": 0.1457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13531437516212463, + "step": 4035, + "valid_targets_mean": 1354.6, + "valid_targets_min": 687 + }, + { + "epoch": 4.2481598317560465, + "grad_norm": 0.7601170245470381, + "learning_rate": 1.6066697151657876e-05, + "loss": 0.1366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13570845127105713, + "step": 4040, + "valid_targets_mean": 1476.6, + "valid_targets_min": 825 + }, + { + "epoch": 4.2534174553102, + "grad_norm": 0.7717376680599746, + "learning_rate": 1.601529628207508e-05, + "loss": 0.1294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1313013732433319, + "step": 4045, + "valid_targets_mean": 1383.5, + "valid_targets_min": 829 + }, + { + "epoch": 4.2586750788643535, + "grad_norm": 0.749117955068463, + "learning_rate": 1.5963922805322204e-05, + "loss": 0.133, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1446974277496338, + "step": 4050, + "valid_targets_mean": 1727.3, + "valid_targets_min": 889 + }, + { + "epoch": 4.263932702418507, + "grad_norm": 0.7128507845507394, + "learning_rate": 1.5912577074566016e-05, + "loss": 0.1407, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1259775459766388, + "step": 4055, + "valid_targets_mean": 1545.9, + "valid_targets_min": 836 + }, + { + "epoch": 4.2691903259726605, + "grad_norm": 0.8222387636285143, + "learning_rate": 1.5861259442782548e-05, + "loss": 0.1409, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1394100785255432, + "step": 4060, + "valid_targets_mean": 1450.4, + "valid_targets_min": 554 + }, + { + "epoch": 4.274447949526814, + "grad_norm": 0.7388188465031793, + "learning_rate": 1.580997026275464e-05, + "loss": 0.1366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.135959655046463, + "step": 4065, + "valid_targets_mean": 1381.1, + "valid_targets_min": 843 + }, + { + "epoch": 4.279705573080967, + "grad_norm": 0.7268212505021541, + "learning_rate": 1.5758709887069562e-05, + "loss": 0.1342, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14015160501003265, + "step": 4070, + "valid_targets_mean": 1789.8, + "valid_targets_min": 802 + }, + { + "epoch": 4.284963196635121, + "grad_norm": 0.7873039628206715, + "learning_rate": 1.570747866811658e-05, + "loss": 0.1416, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13989371061325073, + "step": 4075, + "valid_targets_mean": 1478.9, + "valid_targets_min": 612 + }, + { + "epoch": 4.290220820189274, + "grad_norm": 0.8078812721593096, + "learning_rate": 1.5656276958084478e-05, + "loss": 0.1368, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12958908081054688, + "step": 4080, + "valid_targets_mean": 1466.4, + "valid_targets_min": 841 + }, + { + "epoch": 4.295478443743428, + "grad_norm": 0.8344185060991246, + "learning_rate": 1.560510510895923e-05, + "loss": 0.1363, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13817384839057922, + "step": 4085, + "valid_targets_mean": 1375.2, + "valid_targets_min": 705 + }, + { + "epoch": 4.300736067297581, + "grad_norm": 0.7543615952982942, + "learning_rate": 1.5553963472521506e-05, + "loss": 0.1319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1319047212600708, + "step": 4090, + "valid_targets_mean": 1385.6, + "valid_targets_min": 818 + }, + { + "epoch": 4.305993690851735, + "grad_norm": 0.6888589072225046, + "learning_rate": 1.5502852400344277e-05, + "loss": 0.1259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10959073901176453, + "step": 4095, + "valid_targets_mean": 1475.1, + "valid_targets_min": 604 + }, + { + "epoch": 4.311251314405888, + "grad_norm": 0.7921401180973303, + "learning_rate": 1.545177224379041e-05, + "loss": 0.1395, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12821754813194275, + "step": 4100, + "valid_targets_mean": 1375.1, + "valid_targets_min": 647 + }, + { + "epoch": 4.316508937960042, + "grad_norm": 0.7787601011408837, + "learning_rate": 1.5400723354010244e-05, + "loss": 0.1381, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11903885006904602, + "step": 4105, + "valid_targets_mean": 1445.8, + "valid_targets_min": 728 + }, + { + "epoch": 4.321766561514195, + "grad_norm": 0.8161407948204759, + "learning_rate": 1.5349706081939158e-05, + "loss": 0.1338, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13431984186172485, + "step": 4110, + "valid_targets_mean": 1296.5, + "valid_targets_min": 713 + }, + { + "epoch": 4.327024185068349, + "grad_norm": 0.7940002845904401, + "learning_rate": 1.5298720778295195e-05, + "loss": 0.1357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12968891859054565, + "step": 4115, + "valid_targets_mean": 1412.1, + "valid_targets_min": 840 + }, + { + "epoch": 4.332281808622502, + "grad_norm": 0.7674943122933603, + "learning_rate": 1.5247767793576625e-05, + "loss": 0.1401, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14080610871315002, + "step": 4120, + "valid_targets_mean": 1550.4, + "valid_targets_min": 772 + }, + { + "epoch": 4.337539432176656, + "grad_norm": 0.7660184152293514, + "learning_rate": 1.519684747805953e-05, + "loss": 0.1335, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.143622487783432, + "step": 4125, + "valid_targets_mean": 1600.9, + "valid_targets_min": 636 + }, + { + "epoch": 4.342797055730809, + "grad_norm": 0.7932998666812077, + "learning_rate": 1.5145960181795421e-05, + "loss": 0.1348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12783610820770264, + "step": 4130, + "valid_targets_mean": 1450.6, + "valid_targets_min": 641 + }, + { + "epoch": 4.348054679284963, + "grad_norm": 0.7858927769703171, + "learning_rate": 1.509510625460883e-05, + "loss": 0.1378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13657426834106445, + "step": 4135, + "valid_targets_mean": 1553.0, + "valid_targets_min": 766 + }, + { + "epoch": 4.353312302839116, + "grad_norm": 0.7871058542091913, + "learning_rate": 1.5044286046094851e-05, + "loss": 0.1319, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11590927839279175, + "step": 4140, + "valid_targets_mean": 1248.1, + "valid_targets_min": 729 + }, + { + "epoch": 4.358569926393271, + "grad_norm": 0.8154723418333981, + "learning_rate": 1.4993499905616823e-05, + "loss": 0.1431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14865374565124512, + "step": 4145, + "valid_targets_mean": 1415.8, + "valid_targets_min": 682 + }, + { + "epoch": 4.363827549947424, + "grad_norm": 0.7792253533823932, + "learning_rate": 1.494274818230387e-05, + "loss": 0.1427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12811344861984253, + "step": 4150, + "valid_targets_mean": 1598.4, + "valid_targets_min": 738 + }, + { + "epoch": 4.369085173501578, + "grad_norm": 0.821669667594602, + "learning_rate": 1.4892031225048503e-05, + "loss": 0.1368, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13663919270038605, + "step": 4155, + "valid_targets_mean": 1378.1, + "valid_targets_min": 731 + }, + { + "epoch": 4.374342797055731, + "grad_norm": 0.8412746945998283, + "learning_rate": 1.4841349382504247e-05, + "loss": 0.1403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15202023088932037, + "step": 4160, + "valid_targets_mean": 1548.3, + "valid_targets_min": 793 + }, + { + "epoch": 4.379600420609885, + "grad_norm": 0.7345150853195603, + "learning_rate": 1.4790703003083236e-05, + "loss": 0.1267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11928276717662811, + "step": 4165, + "valid_targets_mean": 1389.6, + "valid_targets_min": 564 + }, + { + "epoch": 4.384858044164038, + "grad_norm": 0.77422626293784, + "learning_rate": 1.4740092434953793e-05, + "loss": 0.1312, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12621235847473145, + "step": 4170, + "valid_targets_mean": 1520.1, + "valid_targets_min": 651 + }, + { + "epoch": 4.390115667718192, + "grad_norm": 0.8112255793265316, + "learning_rate": 1.4689518026038065e-05, + "loss": 0.1394, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12599149346351624, + "step": 4175, + "valid_targets_mean": 1298.6, + "valid_targets_min": 687 + }, + { + "epoch": 4.395373291272345, + "grad_norm": 0.7926395106584089, + "learning_rate": 1.4638980124009649e-05, + "loss": 0.132, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1336921900510788, + "step": 4180, + "valid_targets_mean": 1391.2, + "valid_targets_min": 703 + }, + { + "epoch": 4.400630914826499, + "grad_norm": 0.7275052659304198, + "learning_rate": 1.458847907629113e-05, + "loss": 0.1279, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12254877388477325, + "step": 4185, + "valid_targets_mean": 1421.6, + "valid_targets_min": 1045 + }, + { + "epoch": 4.405888538380652, + "grad_norm": 0.8102367575631457, + "learning_rate": 1.4538015230051761e-05, + "loss": 0.1369, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1422000229358673, + "step": 4190, + "valid_targets_mean": 1567.6, + "valid_targets_min": 764 + }, + { + "epoch": 4.411146161934806, + "grad_norm": 0.8033090146453002, + "learning_rate": 1.4487588932205072e-05, + "loss": 0.1446, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15437643229961395, + "step": 4195, + "valid_targets_mean": 1550.9, + "valid_targets_min": 826 + }, + { + "epoch": 4.416403785488959, + "grad_norm": 0.8268837868112663, + "learning_rate": 1.4437200529406425e-05, + "loss": 0.1286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14400029182434082, + "step": 4200, + "valid_targets_mean": 1678.6, + "valid_targets_min": 1011 + }, + { + "epoch": 4.421661409043113, + "grad_norm": 0.7360355045558858, + "learning_rate": 1.4386850368050706e-05, + "loss": 0.1388, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14487197995185852, + "step": 4205, + "valid_targets_mean": 1625.6, + "valid_targets_min": 823 + }, + { + "epoch": 4.426919032597266, + "grad_norm": 0.7108983738475014, + "learning_rate": 1.433653879426991e-05, + "loss": 0.1186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11359036713838577, + "step": 4210, + "valid_targets_mean": 1488.6, + "valid_targets_min": 595 + }, + { + "epoch": 4.4321766561514195, + "grad_norm": 0.8159538706965361, + "learning_rate": 1.4286266153930733e-05, + "loss": 0.1311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13606056571006775, + "step": 4215, + "valid_targets_mean": 1418.0, + "valid_targets_min": 693 + }, + { + "epoch": 4.437434279705573, + "grad_norm": 0.940569049580046, + "learning_rate": 1.4236032792632251e-05, + "loss": 0.1306, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13143770396709442, + "step": 4220, + "valid_targets_mean": 1187.5, + "valid_targets_min": 720 + }, + { + "epoch": 4.4426919032597265, + "grad_norm": 0.8112256287758004, + "learning_rate": 1.4185839055703511e-05, + "loss": 0.1355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12770329415798187, + "step": 4225, + "valid_targets_mean": 1670.4, + "valid_targets_min": 685 + }, + { + "epoch": 4.44794952681388, + "grad_norm": 0.7295873729938533, + "learning_rate": 1.4135685288201151e-05, + "loss": 0.1317, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12795163691043854, + "step": 4230, + "valid_targets_mean": 1437.3, + "valid_targets_min": 658 + }, + { + "epoch": 4.4532071503680335, + "grad_norm": 0.7438639117925115, + "learning_rate": 1.4085571834907046e-05, + "loss": 0.1355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13000422716140747, + "step": 4235, + "valid_targets_mean": 1597.1, + "valid_targets_min": 888 + }, + { + "epoch": 4.458464773922187, + "grad_norm": 0.7854347619692846, + "learning_rate": 1.4035499040325946e-05, + "loss": 0.1349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13853585720062256, + "step": 4240, + "valid_targets_mean": 1561.9, + "valid_targets_min": 897 + }, + { + "epoch": 4.4637223974763405, + "grad_norm": 0.6810473176157009, + "learning_rate": 1.3985467248683064e-05, + "loss": 0.1296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12546569108963013, + "step": 4245, + "valid_targets_mean": 2256.9, + "valid_targets_min": 494 + }, + { + "epoch": 4.468980021030494, + "grad_norm": 0.4278573598882043, + "learning_rate": 1.3935476803921755e-05, + "loss": 0.1083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0785178393125534, + "step": 4250, + "valid_targets_mean": 3085.9, + "valid_targets_min": 834 + }, + { + "epoch": 4.4742376445846475, + "grad_norm": 0.4168789086494719, + "learning_rate": 1.3885528049701148e-05, + "loss": 0.0839, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08329863101243973, + "step": 4255, + "valid_targets_mean": 3431.4, + "valid_targets_min": 2790 + }, + { + "epoch": 4.479495268138801, + "grad_norm": 0.38746000134994446, + "learning_rate": 1.3835621329393738e-05, + "loss": 0.0788, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08167532086372375, + "step": 4260, + "valid_targets_mean": 3551.9, + "valid_targets_min": 2133 + }, + { + "epoch": 4.484752891692954, + "grad_norm": 0.411473617129491, + "learning_rate": 1.3785756986083091e-05, + "loss": 0.0967, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0843782126903534, + "step": 4265, + "valid_targets_mean": 2739.8, + "valid_targets_min": 526 + }, + { + "epoch": 4.490010515247108, + "grad_norm": 0.431217037259204, + "learning_rate": 1.3735935362561419e-05, + "loss": 0.0779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07926741987466812, + "step": 4270, + "valid_targets_mean": 2461.2, + "valid_targets_min": 662 + }, + { + "epoch": 4.495268138801261, + "grad_norm": 0.9614097819386166, + "learning_rate": 1.3686156801327293e-05, + "loss": 0.1084, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14519354701042175, + "step": 4275, + "valid_targets_mean": 1136.5, + "valid_targets_min": 525 + }, + { + "epoch": 4.500525762355416, + "grad_norm": 0.3988218392818991, + "learning_rate": 1.3636421644583231e-05, + "loss": 0.1138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07371596992015839, + "step": 4280, + "valid_targets_mean": 3162.1, + "valid_targets_min": 647 + }, + { + "epoch": 4.505783385909568, + "grad_norm": 0.3780646101186313, + "learning_rate": 1.3586730234233367e-05, + "loss": 0.0859, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07808525860309601, + "step": 4285, + "valid_targets_mean": 2876.7, + "valid_targets_min": 718 + }, + { + "epoch": 4.511041009463723, + "grad_norm": 0.5817849072696845, + "learning_rate": 1.3537082911881106e-05, + "loss": 0.1196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21639221906661987, + "step": 4290, + "valid_targets_mean": 2300.2, + "valid_targets_min": 246 + }, + { + "epoch": 4.516298633017876, + "grad_norm": 0.301823494032142, + "learning_rate": 1.3487480018826772e-05, + "loss": 0.0742, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06836953014135361, + "step": 4295, + "valid_targets_mean": 5440.1, + "valid_targets_min": 636 + }, + { + "epoch": 4.52155625657203, + "grad_norm": 0.355945615977774, + "learning_rate": 1.343792189606525e-05, + "loss": 0.0812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08197809755802155, + "step": 4300, + "valid_targets_mean": 2827.2, + "valid_targets_min": 826 + }, + { + "epoch": 4.526813880126183, + "grad_norm": 0.3197853950404343, + "learning_rate": 1.338840888428366e-05, + "loss": 0.081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07435546815395355, + "step": 4305, + "valid_targets_mean": 3693.8, + "valid_targets_min": 1006 + }, + { + "epoch": 4.532071503680337, + "grad_norm": 0.40653873135896135, + "learning_rate": 1.3338941323859023e-05, + "loss": 0.095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10022848844528198, + "step": 4310, + "valid_targets_mean": 2589.2, + "valid_targets_min": 688 + }, + { + "epoch": 4.53732912723449, + "grad_norm": 0.37737572640615413, + "learning_rate": 1.3289519554855858e-05, + "loss": 0.1014, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07328416407108307, + "step": 4315, + "valid_targets_mean": 2860.2, + "valid_targets_min": 553 + }, + { + "epoch": 4.542586750788644, + "grad_norm": 0.40099120673343713, + "learning_rate": 1.3240143917023938e-05, + "loss": 0.0797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07614661753177643, + "step": 4320, + "valid_targets_mean": 2444.4, + "valid_targets_min": 566 + }, + { + "epoch": 4.547844374342797, + "grad_norm": 0.3647763568957528, + "learning_rate": 1.3190814749795893e-05, + "loss": 0.0756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07858487218618393, + "step": 4325, + "valid_targets_mean": 3651.4, + "valid_targets_min": 1111 + }, + { + "epoch": 4.553101997896951, + "grad_norm": 0.5543459716744039, + "learning_rate": 1.3141532392284873e-05, + "loss": 0.0798, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12775170803070068, + "step": 4330, + "valid_targets_mean": 1973.4, + "valid_targets_min": 622 + }, + { + "epoch": 4.558359621451104, + "grad_norm": 0.5362310705045473, + "learning_rate": 1.3092297183282261e-05, + "loss": 0.0903, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10443666577339172, + "step": 4335, + "valid_targets_mean": 1952.9, + "valid_targets_min": 493 + }, + { + "epoch": 4.563617245005258, + "grad_norm": 0.4033115586535664, + "learning_rate": 1.3043109461255305e-05, + "loss": 0.0864, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07978132367134094, + "step": 4340, + "valid_targets_mean": 3431.8, + "valid_targets_min": 1302 + }, + { + "epoch": 4.568874868559411, + "grad_norm": 0.4532542937767393, + "learning_rate": 1.29939695643448e-05, + "loss": 0.0917, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0826805830001831, + "step": 4345, + "valid_targets_mean": 2481.8, + "valid_targets_min": 802 + }, + { + "epoch": 4.574132492113565, + "grad_norm": 0.5536552657539693, + "learning_rate": 1.2944877830362777e-05, + "loss": 0.099, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13342218101024628, + "step": 4350, + "valid_targets_mean": 2078.3, + "valid_targets_min": 957 + }, + { + "epoch": 4.579390115667718, + "grad_norm": 0.3862802794923409, + "learning_rate": 1.289583459679017e-05, + "loss": 0.1143, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08603660762310028, + "step": 4355, + "valid_targets_mean": 2960.0, + "valid_targets_min": 641 + }, + { + "epoch": 4.584647739221872, + "grad_norm": 0.4240050269230897, + "learning_rate": 1.2846840200774484e-05, + "loss": 0.0753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0871136486530304, + "step": 4360, + "valid_targets_mean": 2375.6, + "valid_targets_min": 711 + }, + { + "epoch": 4.589905362776025, + "grad_norm": 0.4574251143763385, + "learning_rate": 1.2797894979127503e-05, + "loss": 0.1135, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12482558935880661, + "step": 4365, + "valid_targets_mean": 2584.3, + "valid_targets_min": 700 + }, + { + "epoch": 4.595162986330179, + "grad_norm": 0.47247433988660864, + "learning_rate": 1.2748999268322977e-05, + "loss": 0.0975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11209756135940552, + "step": 4370, + "valid_targets_mean": 2515.2, + "valid_targets_min": 538 + }, + { + "epoch": 4.600420609884332, + "grad_norm": 0.56721988852021, + "learning_rate": 1.2700153404494247e-05, + "loss": 0.0838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12281329184770584, + "step": 4375, + "valid_targets_mean": 2212.8, + "valid_targets_min": 817 + }, + { + "epoch": 4.605678233438486, + "grad_norm": 0.6444590231201038, + "learning_rate": 1.2651357723432027e-05, + "loss": 0.2514, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39276736974716187, + "step": 4380, + "valid_targets_mean": 2390.8, + "valid_targets_min": 807 + }, + { + "epoch": 4.610935856992639, + "grad_norm": 0.425248494736928, + "learning_rate": 1.2602612560582044e-05, + "loss": 0.113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07209916412830353, + "step": 4385, + "valid_targets_mean": 2108.3, + "valid_targets_min": 507 + }, + { + "epoch": 4.616193480546793, + "grad_norm": 0.392468813631525, + "learning_rate": 1.2553918251042701e-05, + "loss": 0.0956, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09675078094005585, + "step": 4390, + "valid_targets_mean": 3110.5, + "valid_targets_min": 615 + }, + { + "epoch": 4.621451104100946, + "grad_norm": 0.4435373510269777, + "learning_rate": 1.2505275129562851e-05, + "loss": 0.1788, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15841440856456757, + "step": 4395, + "valid_targets_mean": 2953.4, + "valid_targets_min": 826 + }, + { + "epoch": 4.6267087276550996, + "grad_norm": 0.3940934433584046, + "learning_rate": 1.2456683530539446e-05, + "loss": 0.0821, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08367893099784851, + "step": 4400, + "valid_targets_mean": 3337.5, + "valid_targets_min": 830 + }, + { + "epoch": 4.631966351209253, + "grad_norm": 0.43397069656142984, + "learning_rate": 1.2408143788015225e-05, + "loss": 0.0837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09857040643692017, + "step": 4405, + "valid_targets_mean": 4345.2, + "valid_targets_min": 2665 + }, + { + "epoch": 4.6372239747634065, + "grad_norm": 0.34164095688934526, + "learning_rate": 1.2359656235676468e-05, + "loss": 0.0858, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07392176985740662, + "step": 4410, + "valid_targets_mean": 3302.3, + "valid_targets_min": 595 + }, + { + "epoch": 4.642481598317561, + "grad_norm": 0.3879656343424053, + "learning_rate": 1.231122120685066e-05, + "loss": 0.0783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07721789181232452, + "step": 4415, + "valid_targets_mean": 2945.6, + "valid_targets_min": 825 + }, + { + "epoch": 4.6477392218717135, + "grad_norm": 0.38811962478696127, + "learning_rate": 1.2262839034504208e-05, + "loss": 0.0796, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06672494113445282, + "step": 4420, + "valid_targets_mean": 2946.4, + "valid_targets_min": 672 + }, + { + "epoch": 4.652996845425868, + "grad_norm": 0.388998894081642, + "learning_rate": 1.2214510051240164e-05, + "loss": 0.0732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07016399502754211, + "step": 4425, + "valid_targets_mean": 3049.2, + "valid_targets_min": 1027 + }, + { + "epoch": 4.658254468980021, + "grad_norm": 0.3619988789598876, + "learning_rate": 1.2166234589295951e-05, + "loss": 0.0789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07090885937213898, + "step": 4430, + "valid_targets_mean": 3680.9, + "valid_targets_min": 2540 + }, + { + "epoch": 4.663512092534175, + "grad_norm": 0.4254054079613278, + "learning_rate": 1.2118012980541013e-05, + "loss": 0.0716, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08978554606437683, + "step": 4435, + "valid_targets_mean": 3068.1, + "valid_targets_min": 576 + }, + { + "epoch": 4.668769716088328, + "grad_norm": 0.3147850073376493, + "learning_rate": 1.2069845556474626e-05, + "loss": 0.1116, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04920667037367821, + "step": 4440, + "valid_targets_mean": 3092.3, + "valid_targets_min": 623 + }, + { + "epoch": 4.674027339642482, + "grad_norm": 0.33164638804064306, + "learning_rate": 1.2021732648223553e-05, + "loss": 0.0665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05687657743692398, + "step": 4445, + "valid_targets_mean": 3000.5, + "valid_targets_min": 583 + }, + { + "epoch": 4.679284963196635, + "grad_norm": 0.3718431037513027, + "learning_rate": 1.1973674586539791e-05, + "loss": 0.0711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07306220382452011, + "step": 4450, + "valid_targets_mean": 3647.8, + "valid_targets_min": 851 + }, + { + "epoch": 4.684542586750789, + "grad_norm": 0.4791926891231177, + "learning_rate": 1.1925671701798292e-05, + "loss": 0.0688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.061685558408498764, + "step": 4455, + "valid_targets_mean": 2608.7, + "valid_targets_min": 526 + }, + { + "epoch": 4.689800210304942, + "grad_norm": 0.6344681573125026, + "learning_rate": 1.1877724323994704e-05, + "loss": 0.1146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14251452684402466, + "step": 4460, + "valid_targets_mean": 1622.6, + "valid_targets_min": 195 + }, + { + "epoch": 4.695057833859096, + "grad_norm": 0.4439081129575039, + "learning_rate": 1.1829832782743074e-05, + "loss": 0.0798, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1002945601940155, + "step": 4465, + "valid_targets_mean": 3278.1, + "valid_targets_min": 840 + }, + { + "epoch": 4.700315457413249, + "grad_norm": 0.45781425409055415, + "learning_rate": 1.178199740727362e-05, + "loss": 0.1033, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08636211603879929, + "step": 4470, + "valid_targets_mean": 2136.1, + "valid_targets_min": 684 + }, + { + "epoch": 4.705573080967403, + "grad_norm": 0.364943194677604, + "learning_rate": 1.1734218526430446e-05, + "loss": 0.0781, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07217290997505188, + "step": 4475, + "valid_targets_mean": 3365.9, + "valid_targets_min": 847 + }, + { + "epoch": 4.710830704521556, + "grad_norm": 0.9066742961112946, + "learning_rate": 1.1686496468669269e-05, + "loss": 0.1061, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11579406261444092, + "step": 4480, + "valid_targets_mean": 1630.2, + "valid_targets_min": 824 + }, + { + "epoch": 4.71608832807571, + "grad_norm": 0.42100013149054094, + "learning_rate": 1.1638831562055191e-05, + "loss": 0.1041, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07590813934803009, + "step": 4485, + "valid_targets_mean": 2701.2, + "valid_targets_min": 1039 + }, + { + "epoch": 4.721345951629863, + "grad_norm": 0.6775479142011301, + "learning_rate": 1.1591224134260425e-05, + "loss": 0.106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16865578293800354, + "step": 4490, + "valid_targets_mean": 1553.4, + "valid_targets_min": 649 + }, + { + "epoch": 4.726603575184017, + "grad_norm": 0.4327586439278677, + "learning_rate": 1.1543674512562037e-05, + "loss": 0.0805, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07839912176132202, + "step": 4495, + "valid_targets_mean": 2974.8, + "valid_targets_min": 707 + }, + { + "epoch": 4.73186119873817, + "grad_norm": 0.4034140352173875, + "learning_rate": 1.1496183023839729e-05, + "loss": 0.0859, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07873895764350891, + "step": 4500, + "valid_targets_mean": 3796.6, + "valid_targets_min": 2850 + }, + { + "epoch": 4.737118822292324, + "grad_norm": 0.48243823219814264, + "learning_rate": 1.144874999457354e-05, + "loss": 0.0838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0818183571100235, + "step": 4505, + "valid_targets_mean": 2880.9, + "valid_targets_min": 633 + }, + { + "epoch": 4.742376445846477, + "grad_norm": 0.36978017739421254, + "learning_rate": 1.1401375750841637e-05, + "loss": 0.0701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08065995573997498, + "step": 4510, + "valid_targets_mean": 4210.0, + "valid_targets_min": 763 + }, + { + "epoch": 4.747634069400631, + "grad_norm": 0.33331605818915316, + "learning_rate": 1.1354060618318086e-05, + "loss": 0.0664, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0686541348695755, + "step": 4515, + "valid_targets_mean": 4181.8, + "valid_targets_min": 2540 + }, + { + "epoch": 4.752891692954784, + "grad_norm": 0.33795353883889473, + "learning_rate": 1.1306804922270568e-05, + "loss": 0.0789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06736970692873001, + "step": 4520, + "valid_targets_mean": 4073.6, + "valid_targets_min": 1859 + }, + { + "epoch": 4.758149316508938, + "grad_norm": 0.3586629736315267, + "learning_rate": 1.1259608987558175e-05, + "loss": 0.0955, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.060045026242733, + "step": 4525, + "valid_targets_mean": 2830.4, + "valid_targets_min": 893 + }, + { + "epoch": 4.763406940063091, + "grad_norm": 0.4917418712564499, + "learning_rate": 1.1212473138629187e-05, + "loss": 0.0905, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09968134015798569, + "step": 4530, + "valid_targets_mean": 2476.3, + "valid_targets_min": 753 + }, + { + "epoch": 4.768664563617245, + "grad_norm": 0.2803048818061113, + "learning_rate": 1.1165397699518797e-05, + "loss": 0.0822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.047491952776908875, + "step": 4535, + "valid_targets_mean": 4019.0, + "valid_targets_min": 1180 + }, + { + "epoch": 4.773922187171398, + "grad_norm": 0.3874406407586897, + "learning_rate": 1.1118382993846933e-05, + "loss": 0.0725, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08119888603687286, + "step": 4540, + "valid_targets_mean": 3585.2, + "valid_targets_min": 945 + }, + { + "epoch": 4.779179810725552, + "grad_norm": 0.48850349650856384, + "learning_rate": 1.1071429344816003e-05, + "loss": 0.0891, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09316246956586838, + "step": 4545, + "valid_targets_mean": 3281.9, + "valid_targets_min": 907 + }, + { + "epoch": 4.784437434279706, + "grad_norm": 0.29441890076300864, + "learning_rate": 1.102453707520867e-05, + "loss": 0.0684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.050600532442331314, + "step": 4550, + "valid_targets_mean": 3693.3, + "valid_targets_min": 980 + }, + { + "epoch": 4.789695057833859, + "grad_norm": 0.6218549206409588, + "learning_rate": 1.0977706507385673e-05, + "loss": 0.0822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09425666928291321, + "step": 4555, + "valid_targets_mean": 1394.1, + "valid_targets_min": 478 + }, + { + "epoch": 4.794952681388013, + "grad_norm": 0.5405869895858056, + "learning_rate": 1.0930937963283554e-05, + "loss": 0.0774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10098089277744293, + "step": 4560, + "valid_targets_mean": 3008.5, + "valid_targets_min": 1020 + }, + { + "epoch": 4.8002103049421665, + "grad_norm": 0.5414687913481461, + "learning_rate": 1.088423176441248e-05, + "loss": 0.0794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09131792187690735, + "step": 4565, + "valid_targets_mean": 1961.6, + "valid_targets_min": 680 + }, + { + "epoch": 4.80546792849632, + "grad_norm": 0.5143395929290274, + "learning_rate": 1.0837588231854044e-05, + "loss": 0.0944, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10974930226802826, + "step": 4570, + "valid_targets_mean": 2237.1, + "valid_targets_min": 748 + }, + { + "epoch": 4.8107255520504735, + "grad_norm": 0.534398402750308, + "learning_rate": 1.0791007686259019e-05, + "loss": 0.0727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07966640591621399, + "step": 4575, + "valid_targets_mean": 1692.9, + "valid_targets_min": 705 + }, + { + "epoch": 4.815983175604627, + "grad_norm": 0.4046720762065736, + "learning_rate": 1.0744490447845172e-05, + "loss": 0.0839, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05888023599982262, + "step": 4580, + "valid_targets_mean": 2879.0, + "valid_targets_min": 622 + }, + { + "epoch": 4.8212407991587805, + "grad_norm": 0.602601321603702, + "learning_rate": 1.0698036836395084e-05, + "loss": 0.0991, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07196278870105743, + "step": 4585, + "valid_targets_mean": 3544.4, + "valid_targets_min": 1322 + }, + { + "epoch": 4.826498422712934, + "grad_norm": 0.4184262869470214, + "learning_rate": 1.0651647171253936e-05, + "loss": 0.0787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07269267737865448, + "step": 4590, + "valid_targets_mean": 2639.8, + "valid_targets_min": 623 + }, + { + "epoch": 4.831756046267087, + "grad_norm": 0.5176677296781518, + "learning_rate": 1.0605321771327267e-05, + "loss": 0.0699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09694170951843262, + "step": 4595, + "valid_targets_mean": 2873.4, + "valid_targets_min": 690 + }, + { + "epoch": 4.837013669821241, + "grad_norm": 0.4778189406353667, + "learning_rate": 1.0559060955078873e-05, + "loss": 0.0847, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12576048076152802, + "step": 4600, + "valid_targets_mean": 2972.7, + "valid_targets_min": 1420 + }, + { + "epoch": 4.842271293375394, + "grad_norm": 0.4170832496590639, + "learning_rate": 1.0512865040528558e-05, + "loss": 0.0933, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07232730090618134, + "step": 4605, + "valid_targets_mean": 3139.2, + "valid_targets_min": 1109 + }, + { + "epoch": 4.847528916929548, + "grad_norm": 0.4524793562623884, + "learning_rate": 1.0466734345249946e-05, + "loss": 0.064, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07442990690469742, + "step": 4610, + "valid_targets_mean": 2407.1, + "valid_targets_min": 619 + }, + { + "epoch": 4.852786540483701, + "grad_norm": 0.5232546604492923, + "learning_rate": 1.0420669186368311e-05, + "loss": 0.0816, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08989334106445312, + "step": 4615, + "valid_targets_mean": 2313.1, + "valid_targets_min": 486 + }, + { + "epoch": 4.858044164037855, + "grad_norm": 0.37937194140200015, + "learning_rate": 1.0374669880558419e-05, + "loss": 0.0725, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07011362165212631, + "step": 4620, + "valid_targets_mean": 3987.6, + "valid_targets_min": 1057 + }, + { + "epoch": 4.863301787592008, + "grad_norm": 0.35505357590428044, + "learning_rate": 1.0328736744042311e-05, + "loss": 0.0616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06438374519348145, + "step": 4625, + "valid_targets_mean": 4085.2, + "valid_targets_min": 1243 + }, + { + "epoch": 4.868559411146162, + "grad_norm": 0.3765782150134594, + "learning_rate": 1.0282870092587144e-05, + "loss": 0.0878, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07843618094921112, + "step": 4630, + "valid_targets_mean": 4141.8, + "valid_targets_min": 3322 + }, + { + "epoch": 4.873817034700315, + "grad_norm": 0.4153059992226463, + "learning_rate": 1.023707024150305e-05, + "loss": 0.0686, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07007251679897308, + "step": 4635, + "valid_targets_mean": 2203.5, + "valid_targets_min": 660 + }, + { + "epoch": 4.879074658254469, + "grad_norm": 0.39185270066638633, + "learning_rate": 1.0191337505640905e-05, + "loss": 0.1021, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06810898333787918, + "step": 4640, + "valid_targets_mean": 2767.1, + "valid_targets_min": 564 + }, + { + "epoch": 4.884332281808622, + "grad_norm": 0.2948352603788897, + "learning_rate": 1.0145672199390226e-05, + "loss": 0.0899, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.040803369134664536, + "step": 4645, + "valid_targets_mean": 3203.1, + "valid_targets_min": 629 + }, + { + "epoch": 4.889589905362776, + "grad_norm": 0.5282288868271211, + "learning_rate": 1.010007463667699e-05, + "loss": 0.0918, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1312532275915146, + "step": 4650, + "valid_targets_mean": 3090.6, + "valid_targets_min": 1556 + }, + { + "epoch": 4.894847528916929, + "grad_norm": 0.4137506408486238, + "learning_rate": 1.0054545130961441e-05, + "loss": 0.0692, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06519424915313721, + "step": 4655, + "valid_targets_mean": 3816.9, + "valid_targets_min": 2619 + }, + { + "epoch": 4.900105152471083, + "grad_norm": 0.53057303622189, + "learning_rate": 1.0009083995236009e-05, + "loss": 0.0789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08234363049268723, + "step": 4660, + "valid_targets_mean": 1804.8, + "valid_targets_min": 559 + }, + { + "epoch": 4.905362776025236, + "grad_norm": 0.4063632976890675, + "learning_rate": 9.963691542023079e-06, + "loss": 0.0772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07967343181371689, + "step": 4665, + "valid_targets_mean": 3685.7, + "valid_targets_min": 953 + }, + { + "epoch": 4.91062039957939, + "grad_norm": 0.7734429760367383, + "learning_rate": 9.918368083372884e-06, + "loss": 0.1058, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1001024842262268, + "step": 4670, + "valid_targets_mean": 1168.2, + "valid_targets_min": 337 + }, + { + "epoch": 4.915878023133543, + "grad_norm": 0.4888579976068423, + "learning_rate": 9.87311393086138e-06, + "loss": 0.1031, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09964701533317566, + "step": 4675, + "valid_targets_mean": 2344.0, + "valid_targets_min": 459 + }, + { + "epoch": 4.921135646687697, + "grad_norm": 0.5202593321678269, + "learning_rate": 9.827929395588048e-06, + "loss": 0.0837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09963840991258621, + "step": 4680, + "valid_targets_mean": 2441.2, + "valid_targets_min": 529 + }, + { + "epoch": 4.926393270241851, + "grad_norm": 0.38623067468242067, + "learning_rate": 9.782814788173787e-06, + "loss": 0.0771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0651584044098854, + "step": 4685, + "valid_targets_mean": 3706.1, + "valid_targets_min": 2692 + }, + { + "epoch": 4.931650893796004, + "grad_norm": 0.43803480968353947, + "learning_rate": 9.737770418758808e-06, + "loss": 0.0789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08176561444997787, + "step": 4690, + "valid_targets_mean": 3458.6, + "valid_targets_min": 2197 + }, + { + "epoch": 4.936908517350158, + "grad_norm": 0.3810855503521825, + "learning_rate": 9.692796597000438e-06, + "loss": 0.0677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05980216711759567, + "step": 4695, + "valid_targets_mean": 2549.4, + "valid_targets_min": 856 + }, + { + "epoch": 4.942166140904312, + "grad_norm": 1.0239897846847723, + "learning_rate": 9.64789363207103e-06, + "loss": 0.0846, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12131184339523315, + "step": 4700, + "valid_targets_mean": 848.2, + "valid_targets_min": 563 + }, + { + "epoch": 4.947423764458465, + "grad_norm": 0.3832887411067774, + "learning_rate": 9.603061832655847e-06, + "loss": 0.0765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0646553784608841, + "step": 4705, + "valid_targets_mean": 2999.2, + "valid_targets_min": 719 + }, + { + "epoch": 4.952681388012619, + "grad_norm": 0.47030377743660756, + "learning_rate": 9.55830150695093e-06, + "loss": 0.0721, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06938720494508743, + "step": 4710, + "valid_targets_mean": 2648.6, + "valid_targets_min": 779 + }, + { + "epoch": 4.957939011566772, + "grad_norm": 0.4582636643027542, + "learning_rate": 9.513612962660935e-06, + "loss": 0.0725, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0744820311665535, + "step": 4715, + "valid_targets_mean": 3591.1, + "valid_targets_min": 2538 + }, + { + "epoch": 4.963196635120926, + "grad_norm": 0.32015615688546256, + "learning_rate": 9.468996506997093e-06, + "loss": 0.08, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05729362741112709, + "step": 4720, + "valid_targets_mean": 4131.8, + "valid_targets_min": 485 + }, + { + "epoch": 4.968454258675079, + "grad_norm": 0.30273559116878307, + "learning_rate": 9.424452446675059e-06, + "loss": 0.0697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07140018790960312, + "step": 4725, + "valid_targets_mean": 4730.3, + "valid_targets_min": 800 + }, + { + "epoch": 4.9737118822292326, + "grad_norm": 0.37197894705917245, + "learning_rate": 9.379981087912795e-06, + "loss": 0.0704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0704803466796875, + "step": 4730, + "valid_targets_mean": 3955.4, + "valid_targets_min": 740 + }, + { + "epoch": 4.978969505783386, + "grad_norm": 0.4471685236026393, + "learning_rate": 9.33558273642848e-06, + "loss": 0.0828, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.106879323720932, + "step": 4735, + "valid_targets_mean": 2921.7, + "valid_targets_min": 872 + }, + { + "epoch": 4.9842271293375395, + "grad_norm": 0.40984484847997404, + "learning_rate": 9.291257697438393e-06, + "loss": 0.1595, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07882986217737198, + "step": 4740, + "valid_targets_mean": 3169.6, + "valid_targets_min": 1015 + }, + { + "epoch": 4.989484752891693, + "grad_norm": 0.47209456294328006, + "learning_rate": 9.247006275654861e-06, + "loss": 0.0759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11203093826770782, + "step": 4745, + "valid_targets_mean": 2371.4, + "valid_targets_min": 967 + }, + { + "epoch": 4.9947423764458465, + "grad_norm": 0.36764238768667556, + "learning_rate": 9.202828775284101e-06, + "loss": 0.0709, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06462462246417999, + "step": 4750, + "valid_targets_mean": 3504.9, + "valid_targets_min": 886 + }, + { + "epoch": 5.0, + "grad_norm": 0.4445734168860689, + "learning_rate": 9.158725500024148e-06, + "loss": 0.1046, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0679134726524353, + "step": 4755, + "valid_targets_mean": 2843.0, + "valid_targets_min": 791 + }, + { + "epoch": 5.0052576235541535, + "grad_norm": 0.9005010386115802, + "learning_rate": 9.114696753062816e-06, + "loss": 0.158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1408640295267105, + "step": 4760, + "valid_targets_mean": 1423.4, + "valid_targets_min": 715 + }, + { + "epoch": 5.010515247108307, + "grad_norm": 0.7837957016055458, + "learning_rate": 9.07074283707554e-06, + "loss": 0.1411, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13309352099895477, + "step": 4765, + "valid_targets_mean": 1462.5, + "valid_targets_min": 612 + }, + { + "epoch": 5.0157728706624605, + "grad_norm": 0.7528858768740857, + "learning_rate": 9.026864054223337e-06, + "loss": 0.1383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1365063637495041, + "step": 4770, + "valid_targets_mean": 1514.8, + "valid_targets_min": 714 + }, + { + "epoch": 5.021030494216614, + "grad_norm": 0.7788963160976209, + "learning_rate": 8.98306070615073e-06, + "loss": 0.1289, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12093943357467651, + "step": 4775, + "valid_targets_mean": 1450.1, + "valid_targets_min": 754 + }, + { + "epoch": 5.0262881177707674, + "grad_norm": 0.8483084654151909, + "learning_rate": 8.93933309398368e-06, + "loss": 0.1422, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15616527199745178, + "step": 4780, + "valid_targets_mean": 1548.3, + "valid_targets_min": 718 + }, + { + "epoch": 5.031545741324921, + "grad_norm": 0.7540250321317807, + "learning_rate": 8.89568151832745e-06, + "loss": 0.1382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13268810510635376, + "step": 4785, + "valid_targets_mean": 1539.9, + "valid_targets_min": 731 + }, + { + "epoch": 5.036803364879074, + "grad_norm": 0.7300297057608534, + "learning_rate": 8.852106279264643e-06, + "loss": 0.1244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12216250598430634, + "step": 4790, + "valid_targets_mean": 1543.9, + "valid_targets_min": 725 + }, + { + "epoch": 5.042060988433228, + "grad_norm": 0.8339538250265318, + "learning_rate": 8.808607676353074e-06, + "loss": 0.1441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13071037828922272, + "step": 4795, + "valid_targets_mean": 1371.5, + "valid_targets_min": 610 + }, + { + "epoch": 5.047318611987381, + "grad_norm": 0.7673761030296782, + "learning_rate": 8.765186008623706e-06, + "loss": 0.1275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13140574097633362, + "step": 4800, + "valid_targets_mean": 1560.6, + "valid_targets_min": 720 + }, + { + "epoch": 5.052576235541535, + "grad_norm": 0.7277150857883916, + "learning_rate": 8.721841574578617e-06, + "loss": 0.133, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11650878190994263, + "step": 4805, + "valid_targets_mean": 1439.9, + "valid_targets_min": 765 + }, + { + "epoch": 5.057833859095688, + "grad_norm": 0.9002869437597668, + "learning_rate": 8.678574672188963e-06, + "loss": 0.1313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15190349519252777, + "step": 4810, + "valid_targets_mean": 1455.1, + "valid_targets_min": 593 + }, + { + "epoch": 5.063091482649842, + "grad_norm": 0.7262265694953299, + "learning_rate": 8.635385598892881e-06, + "loss": 0.133, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11696530133485794, + "step": 4815, + "valid_targets_mean": 1540.4, + "valid_targets_min": 704 + }, + { + "epoch": 5.068349106203995, + "grad_norm": 0.7401947702855186, + "learning_rate": 8.592274651593482e-06, + "loss": 0.1244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1198652982711792, + "step": 4820, + "valid_targets_mean": 1387.2, + "valid_targets_min": 722 + }, + { + "epoch": 5.07360672975815, + "grad_norm": 0.7212377234961793, + "learning_rate": 8.549242126656814e-06, + "loss": 0.1225, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11112427711486816, + "step": 4825, + "valid_targets_mean": 1581.9, + "valid_targets_min": 865 + }, + { + "epoch": 5.078864353312303, + "grad_norm": 0.8193893885403605, + "learning_rate": 8.506288319909793e-06, + "loss": 0.1195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13221386075019836, + "step": 4830, + "valid_targets_mean": 1635.6, + "valid_targets_min": 797 + }, + { + "epoch": 5.084121976866457, + "grad_norm": 0.8628254394165564, + "learning_rate": 8.463413526638186e-06, + "loss": 0.1336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14875589311122894, + "step": 4835, + "valid_targets_mean": 1542.4, + "valid_targets_min": 728 + }, + { + "epoch": 5.08937960042061, + "grad_norm": 0.861777417260219, + "learning_rate": 8.420618041584604e-06, + "loss": 0.1362, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15293246507644653, + "step": 4840, + "valid_targets_mean": 1879.2, + "valid_targets_min": 683 + }, + { + "epoch": 5.094637223974764, + "grad_norm": 0.8301782418150703, + "learning_rate": 8.377902158946427e-06, + "loss": 0.1341, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13325785100460052, + "step": 4845, + "valid_targets_mean": 1409.0, + "valid_targets_min": 706 + }, + { + "epoch": 5.099894847528917, + "grad_norm": 0.7828651354098343, + "learning_rate": 8.335266172373832e-06, + "loss": 0.1225, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11418794840574265, + "step": 4850, + "valid_targets_mean": 1377.8, + "valid_targets_min": 625 + }, + { + "epoch": 5.105152471083071, + "grad_norm": 0.7527964194617114, + "learning_rate": 8.292710374967737e-06, + "loss": 0.1255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13174590468406677, + "step": 4855, + "valid_targets_mean": 1811.9, + "valid_targets_min": 790 + }, + { + "epoch": 5.110410094637224, + "grad_norm": 0.8640937270300099, + "learning_rate": 8.250235059277792e-06, + "loss": 0.1332, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15677103400230408, + "step": 4860, + "valid_targets_mean": 1814.3, + "valid_targets_min": 684 + }, + { + "epoch": 5.115667718191378, + "grad_norm": 0.7204529998017919, + "learning_rate": 8.207840517300398e-06, + "loss": 0.1236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10853917896747589, + "step": 4865, + "valid_targets_mean": 1493.4, + "valid_targets_min": 739 + }, + { + "epoch": 5.120925341745531, + "grad_norm": 0.7863794725114303, + "learning_rate": 8.165527040476666e-06, + "loss": 0.1237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11814877390861511, + "step": 4870, + "valid_targets_mean": 1456.2, + "valid_targets_min": 657 + }, + { + "epoch": 5.126182965299685, + "grad_norm": 0.8210569699214127, + "learning_rate": 8.123294919690413e-06, + "loss": 0.117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11533160507678986, + "step": 4875, + "valid_targets_mean": 1340.2, + "valid_targets_min": 783 + }, + { + "epoch": 5.131440588853838, + "grad_norm": 0.9282844049860665, + "learning_rate": 8.081144445266201e-06, + "loss": 0.1251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.132160946726799, + "step": 4880, + "valid_targets_mean": 1569.0, + "valid_targets_min": 734 + }, + { + "epoch": 5.136698212407992, + "grad_norm": 0.8233172786784679, + "learning_rate": 8.039075906967293e-06, + "loss": 0.1234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13213878870010376, + "step": 4885, + "valid_targets_mean": 1595.7, + "valid_targets_min": 678 + }, + { + "epoch": 5.141955835962145, + "grad_norm": 0.7749844452020539, + "learning_rate": 7.99708959399368e-06, + "loss": 0.1277, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12865442037582397, + "step": 4890, + "valid_targets_mean": 1801.3, + "valid_targets_min": 843 + }, + { + "epoch": 5.147213459516299, + "grad_norm": 0.7508507714627392, + "learning_rate": 7.955185794980117e-06, + "loss": 0.1293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1306474506855011, + "step": 4895, + "valid_targets_mean": 1652.4, + "valid_targets_min": 782 + }, + { + "epoch": 5.152471083070452, + "grad_norm": 0.7429461379936947, + "learning_rate": 7.913364797994111e-06, + "loss": 0.1208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13677969574928284, + "step": 4900, + "valid_targets_mean": 1684.6, + "valid_targets_min": 957 + }, + { + "epoch": 5.157728706624606, + "grad_norm": 0.7965727542916411, + "learning_rate": 7.871626890533917e-06, + "loss": 0.1253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1270841658115387, + "step": 4905, + "valid_targets_mean": 1482.9, + "valid_targets_min": 713 + }, + { + "epoch": 5.162986330178759, + "grad_norm": 0.8872711618376462, + "learning_rate": 7.829972359526626e-06, + "loss": 0.1249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12530039250850677, + "step": 4910, + "valid_targets_mean": 1710.1, + "valid_targets_min": 726 + }, + { + "epoch": 5.168243953732913, + "grad_norm": 0.8390256007090793, + "learning_rate": 7.788401491326155e-06, + "loss": 0.1167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12561118602752686, + "step": 4915, + "valid_targets_mean": 1638.9, + "valid_targets_min": 1147 + }, + { + "epoch": 5.173501577287066, + "grad_norm": 0.7855405662254581, + "learning_rate": 7.746914571711264e-06, + "loss": 0.1196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12468838691711426, + "step": 4920, + "valid_targets_mean": 1525.7, + "valid_targets_min": 700 + }, + { + "epoch": 5.1787592008412195, + "grad_norm": 0.7809332920528532, + "learning_rate": 7.705511885883612e-06, + "loss": 0.1099, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10117724537849426, + "step": 4925, + "valid_targets_mean": 1255.4, + "valid_targets_min": 640 + }, + { + "epoch": 5.184016824395373, + "grad_norm": 0.8513594581031201, + "learning_rate": 7.664193718465814e-06, + "loss": 0.124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12775394320487976, + "step": 4930, + "valid_targets_mean": 1618.7, + "valid_targets_min": 968 + }, + { + "epoch": 5.1892744479495265, + "grad_norm": 0.7692459940995934, + "learning_rate": 7.622960353499438e-06, + "loss": 0.1144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11013204604387283, + "step": 4935, + "valid_targets_mean": 1466.6, + "valid_targets_min": 724 + }, + { + "epoch": 5.19453207150368, + "grad_norm": 0.8329152272449081, + "learning_rate": 7.581812074443084e-06, + "loss": 0.1196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12576130032539368, + "step": 4940, + "valid_targets_mean": 1455.1, + "valid_targets_min": 657 + }, + { + "epoch": 5.1997896950578335, + "grad_norm": 1.0151564464097922, + "learning_rate": 7.5407491641704464e-06, + "loss": 0.1195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1216731071472168, + "step": 4945, + "valid_targets_mean": 1309.6, + "valid_targets_min": 746 + }, + { + "epoch": 5.205047318611987, + "grad_norm": 0.9179001865879102, + "learning_rate": 7.499771904968332e-06, + "loss": 0.1213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1344885230064392, + "step": 4950, + "valid_targets_mean": 1437.6, + "valid_targets_min": 692 + }, + { + "epoch": 5.2103049421661405, + "grad_norm": 0.862974881249526, + "learning_rate": 7.45888057853474e-06, + "loss": 0.1195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.111325204372406, + "step": 4955, + "valid_targets_mean": 1344.7, + "valid_targets_min": 776 + }, + { + "epoch": 5.215562565720294, + "grad_norm": 0.7333557496808919, + "learning_rate": 7.418075465976944e-06, + "loss": 0.1193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11829602718353271, + "step": 4960, + "valid_targets_mean": 1535.1, + "valid_targets_min": 703 + }, + { + "epoch": 5.220820189274448, + "grad_norm": 0.7830640312550541, + "learning_rate": 7.3773568478095184e-06, + "loss": 0.1269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11570233106613159, + "step": 4965, + "valid_targets_mean": 1439.4, + "valid_targets_min": 573 + }, + { + "epoch": 5.226077812828602, + "grad_norm": 0.9281018824741001, + "learning_rate": 7.336725003952456e-06, + "loss": 0.1347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.171891987323761, + "step": 4970, + "valid_targets_mean": 1967.9, + "valid_targets_min": 734 + }, + { + "epoch": 5.231335436382755, + "grad_norm": 0.7602047228259172, + "learning_rate": 7.296180213729196e-06, + "loss": 0.1142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11368528008460999, + "step": 4975, + "valid_targets_mean": 1489.4, + "valid_targets_min": 607 + }, + { + "epoch": 5.236593059936909, + "grad_norm": 0.8163873914502632, + "learning_rate": 7.255722755864734e-06, + "loss": 0.1179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1246756985783577, + "step": 4980, + "valid_targets_mean": 1777.6, + "valid_targets_min": 746 + }, + { + "epoch": 5.241850683491062, + "grad_norm": 0.9037339124607958, + "learning_rate": 7.21535290848372e-06, + "loss": 0.1216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13662774860858917, + "step": 4985, + "valid_targets_mean": 1648.7, + "valid_targets_min": 676 + }, + { + "epoch": 5.247108307045216, + "grad_norm": 0.8510932605814922, + "learning_rate": 7.175070949108496e-06, + "loss": 0.1179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12033072113990784, + "step": 4990, + "valid_targets_mean": 1432.2, + "valid_targets_min": 780 + }, + { + "epoch": 5.252365930599369, + "grad_norm": 0.8274094624263375, + "learning_rate": 7.1348771546572315e-06, + "loss": 0.1126, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11697205901145935, + "step": 4995, + "valid_targets_mean": 1549.0, + "valid_targets_min": 677 + }, + { + "epoch": 5.257623554153523, + "grad_norm": 0.809619153579325, + "learning_rate": 7.09477180144202e-06, + "loss": 0.1122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09959600865840912, + "step": 5000, + "valid_targets_mean": 1158.9, + "valid_targets_min": 737 + }, + { + "epoch": 5.262881177707676, + "grad_norm": 0.8799151043524882, + "learning_rate": 7.054755165166945e-06, + "loss": 0.126, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12933531403541565, + "step": 5005, + "valid_targets_mean": 1459.0, + "valid_targets_min": 726 + }, + { + "epoch": 5.26813880126183, + "grad_norm": 0.776583499205732, + "learning_rate": 7.014827520926206e-06, + "loss": 0.1205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12277175486087799, + "step": 5010, + "valid_targets_mean": 1715.9, + "valid_targets_min": 786 + }, + { + "epoch": 5.273396424815983, + "grad_norm": 0.8198930875261842, + "learning_rate": 6.9749891432022505e-06, + "loss": 0.1192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11355794966220856, + "step": 5015, + "valid_targets_mean": 1492.1, + "valid_targets_min": 944 + }, + { + "epoch": 5.278654048370137, + "grad_norm": 0.7682054981686061, + "learning_rate": 6.935240305863844e-06, + "loss": 0.1158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10861600935459137, + "step": 5020, + "valid_targets_mean": 1427.2, + "valid_targets_min": 963 + }, + { + "epoch": 5.28391167192429, + "grad_norm": 0.7566950494544487, + "learning_rate": 6.895581282164201e-06, + "loss": 0.124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10474662482738495, + "step": 5025, + "valid_targets_mean": 1435.9, + "valid_targets_min": 740 + }, + { + "epoch": 5.289169295478444, + "grad_norm": 0.7390738713121238, + "learning_rate": 6.856012344739138e-06, + "loss": 0.1212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12179842591285706, + "step": 5030, + "valid_targets_mean": 1745.8, + "valid_targets_min": 727 + }, + { + "epoch": 5.294426919032597, + "grad_norm": 0.799283542407322, + "learning_rate": 6.816533765605144e-06, + "loss": 0.1171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12094929069280624, + "step": 5035, + "valid_targets_mean": 1522.8, + "valid_targets_min": 636 + }, + { + "epoch": 5.299684542586751, + "grad_norm": 0.749074534278653, + "learning_rate": 6.7771458161575685e-06, + "loss": 0.1155, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11563584953546524, + "step": 5040, + "valid_targets_mean": 1510.6, + "valid_targets_min": 684 + }, + { + "epoch": 5.304942166140904, + "grad_norm": 0.8262227489966326, + "learning_rate": 6.737848767168709e-06, + "loss": 0.1137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12176688760519028, + "step": 5045, + "valid_targets_mean": 1509.7, + "valid_targets_min": 798 + }, + { + "epoch": 5.310199789695058, + "grad_norm": 0.8695341534442352, + "learning_rate": 6.698642888785965e-06, + "loss": 0.1176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11329036951065063, + "step": 5050, + "valid_targets_mean": 1489.8, + "valid_targets_min": 661 + }, + { + "epoch": 5.315457413249211, + "grad_norm": 0.8391937804125835, + "learning_rate": 6.659528450530006e-06, + "loss": 0.1232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11793771386146545, + "step": 5055, + "valid_targets_mean": 1510.6, + "valid_targets_min": 665 + }, + { + "epoch": 5.320715036803365, + "grad_norm": 0.8816010968775818, + "learning_rate": 6.6205057212928755e-06, + "loss": 0.1137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1263437271118164, + "step": 5060, + "valid_targets_mean": 1534.2, + "valid_targets_min": 692 + }, + { + "epoch": 5.325972660357518, + "grad_norm": 0.8237219106131886, + "learning_rate": 6.5815749693361645e-06, + "loss": 0.1185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.121507428586483, + "step": 5065, + "valid_targets_mean": 1480.6, + "valid_targets_min": 632 + }, + { + "epoch": 5.331230283911672, + "grad_norm": 0.9004329929490859, + "learning_rate": 6.542736462289188e-06, + "loss": 0.1204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12235601246356964, + "step": 5070, + "valid_targets_mean": 1504.4, + "valid_targets_min": 922 + }, + { + "epoch": 5.336487907465825, + "grad_norm": 0.7496268161167585, + "learning_rate": 6.503990467147101e-06, + "loss": 0.1151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1159660667181015, + "step": 5075, + "valid_targets_mean": 1624.8, + "valid_targets_min": 710 + }, + { + "epoch": 5.341745531019979, + "grad_norm": 0.7777571192038039, + "learning_rate": 6.465337250269086e-06, + "loss": 0.1196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10574095696210861, + "step": 5080, + "valid_targets_mean": 1320.1, + "valid_targets_min": 589 + }, + { + "epoch": 5.347003154574132, + "grad_norm": 0.833015588661154, + "learning_rate": 6.426777077376538e-06, + "loss": 0.1181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12290021777153015, + "step": 5085, + "valid_targets_mean": 1348.9, + "valid_targets_min": 750 + }, + { + "epoch": 5.352260778128286, + "grad_norm": 0.8521212672387877, + "learning_rate": 6.388310213551223e-06, + "loss": 0.1169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10389544069766998, + "step": 5090, + "valid_targets_mean": 1333.6, + "valid_targets_min": 641 + }, + { + "epoch": 5.357518401682439, + "grad_norm": 0.9133948974777728, + "learning_rate": 6.349936923233422e-06, + "loss": 0.1186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13117776811122894, + "step": 5095, + "valid_targets_mean": 1649.7, + "valid_targets_min": 701 + }, + { + "epoch": 5.3627760252365935, + "grad_norm": 0.783763722428151, + "learning_rate": 6.311657470220178e-06, + "loss": 0.1282, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11626783758401871, + "step": 5100, + "valid_targets_mean": 1819.2, + "valid_targets_min": 1219 + }, + { + "epoch": 5.368033648790747, + "grad_norm": 0.7799184597124955, + "learning_rate": 6.273472117663446e-06, + "loss": 0.1177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12296243757009506, + "step": 5105, + "valid_targets_mean": 1603.2, + "valid_targets_min": 770 + }, + { + "epoch": 5.3732912723449004, + "grad_norm": 0.7878676819686895, + "learning_rate": 6.2353811280682715e-06, + "loss": 0.1187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11289148777723312, + "step": 5110, + "valid_targets_mean": 1434.4, + "valid_targets_min": 646 + }, + { + "epoch": 5.378548895899054, + "grad_norm": 0.9061613572105647, + "learning_rate": 6.19738476329101e-06, + "loss": 0.1158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11136360466480255, + "step": 5115, + "valid_targets_mean": 1250.6, + "valid_targets_min": 723 + }, + { + "epoch": 5.383806519453207, + "grad_norm": 0.790316147629043, + "learning_rate": 6.159483284537533e-06, + "loss": 0.112, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10790354013442993, + "step": 5120, + "valid_targets_mean": 1461.3, + "valid_targets_min": 808 + }, + { + "epoch": 5.389064143007361, + "grad_norm": 0.8603836573474936, + "learning_rate": 6.121676952361395e-06, + "loss": 0.1204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11386443674564362, + "step": 5125, + "valid_targets_mean": 1299.6, + "valid_targets_min": 688 + }, + { + "epoch": 5.394321766561514, + "grad_norm": 0.8504138415456971, + "learning_rate": 6.083966026662076e-06, + "loss": 0.112, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11406168341636658, + "step": 5130, + "valid_targets_mean": 1337.1, + "valid_targets_min": 821 + }, + { + "epoch": 5.399579390115668, + "grad_norm": 0.8004192475623578, + "learning_rate": 6.046350766683194e-06, + "loss": 0.113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11288442462682724, + "step": 5135, + "valid_targets_mean": 1472.1, + "valid_targets_min": 849 + }, + { + "epoch": 5.404837013669821, + "grad_norm": 0.802643951488826, + "learning_rate": 6.0088314310107e-06, + "loss": 0.1186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11967991292476654, + "step": 5140, + "valid_targets_mean": 1464.6, + "valid_targets_min": 781 + }, + { + "epoch": 5.410094637223975, + "grad_norm": 0.7966846618111609, + "learning_rate": 5.9714082775711115e-06, + "loss": 0.1229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10847964137792587, + "step": 5145, + "valid_targets_mean": 1378.8, + "valid_targets_min": 522 + }, + { + "epoch": 5.415352260778128, + "grad_norm": 0.7232157253256309, + "learning_rate": 5.934081563629764e-06, + "loss": 0.1136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09298036247491837, + "step": 5150, + "valid_targets_mean": 1256.2, + "valid_targets_min": 694 + }, + { + "epoch": 5.420609884332282, + "grad_norm": 0.8020934155278029, + "learning_rate": 5.896851545788987e-06, + "loss": 0.1217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13270168006420135, + "step": 5155, + "valid_targets_mean": 2054.5, + "valid_targets_min": 868 + }, + { + "epoch": 5.425867507886435, + "grad_norm": 0.7236585753424789, + "learning_rate": 5.859718479986407e-06, + "loss": 0.1091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10811392217874527, + "step": 5160, + "valid_targets_mean": 1743.0, + "valid_targets_min": 682 + }, + { + "epoch": 5.431125131440589, + "grad_norm": 0.8626665525950744, + "learning_rate": 5.822682621493132e-06, + "loss": 0.1099, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13062599301338196, + "step": 5165, + "valid_targets_mean": 1531.3, + "valid_targets_min": 797 + }, + { + "epoch": 5.436382754994742, + "grad_norm": 0.7359508503916516, + "learning_rate": 5.7857442249120155e-06, + "loss": 0.1141, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11487710475921631, + "step": 5170, + "valid_targets_mean": 1780.8, + "valid_targets_min": 639 + }, + { + "epoch": 5.441640378548896, + "grad_norm": 0.7896426613147863, + "learning_rate": 5.748903544175934e-06, + "loss": 0.1187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1125001609325409, + "step": 5175, + "valid_targets_mean": 1485.3, + "valid_targets_min": 920 + }, + { + "epoch": 5.446898002103049, + "grad_norm": 0.8392109408911339, + "learning_rate": 5.712160832545992e-06, + "loss": 0.1151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10780222713947296, + "step": 5180, + "valid_targets_mean": 1408.1, + "valid_targets_min": 874 + }, + { + "epoch": 5.452155625657203, + "grad_norm": 0.7633564455285415, + "learning_rate": 5.675516342609811e-06, + "loss": 0.1178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11002111434936523, + "step": 5185, + "valid_targets_mean": 1382.9, + "valid_targets_min": 557 + }, + { + "epoch": 5.457413249211356, + "grad_norm": 0.8817069529885941, + "learning_rate": 5.638970326279802e-06, + "loss": 0.1153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12003359943628311, + "step": 5190, + "valid_targets_mean": 1290.4, + "valid_targets_min": 645 + }, + { + "epoch": 5.46267087276551, + "grad_norm": 0.888054456172169, + "learning_rate": 5.602523034791407e-06, + "loss": 0.1148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1100107878446579, + "step": 5195, + "valid_targets_mean": 1449.4, + "valid_targets_min": 393 + }, + { + "epoch": 5.467928496319663, + "grad_norm": 0.6440286107130968, + "learning_rate": 5.566174718701378e-06, + "loss": 0.1071, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10465237498283386, + "step": 5200, + "valid_targets_mean": 2912.0, + "valid_targets_min": 458 + }, + { + "epoch": 5.473186119873817, + "grad_norm": 0.4906964684696538, + "learning_rate": 5.529925627886079e-06, + "loss": 0.0757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07015372812747955, + "step": 5205, + "valid_targets_mean": 3155.2, + "valid_targets_min": 1491 + }, + { + "epoch": 5.47844374342797, + "grad_norm": 0.4411436651011096, + "learning_rate": 5.493776011539749e-06, + "loss": 0.0722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07336966693401337, + "step": 5210, + "valid_targets_mean": 3495.2, + "valid_targets_min": 2068 + }, + { + "epoch": 5.483701366982124, + "grad_norm": 0.38990773237108856, + "learning_rate": 5.457726118172761e-06, + "loss": 0.0886, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07696102559566498, + "step": 5215, + "valid_targets_mean": 3671.3, + "valid_targets_min": 2425 + }, + { + "epoch": 5.488958990536277, + "grad_norm": 0.40235911971158483, + "learning_rate": 5.421776195609982e-06, + "loss": 0.0718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07264817506074905, + "step": 5220, + "valid_targets_mean": 3131.7, + "valid_targets_min": 1198 + }, + { + "epoch": 5.494216614090431, + "grad_norm": 0.7366905018196068, + "learning_rate": 5.385926490989e-06, + "loss": 0.0872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13003386557102203, + "step": 5225, + "valid_targets_mean": 1250.6, + "valid_targets_min": 663 + }, + { + "epoch": 5.499474237644584, + "grad_norm": 0.43867962358000606, + "learning_rate": 5.350177250758479e-06, + "loss": 0.1184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08369845151901245, + "step": 5230, + "valid_targets_mean": 3237.3, + "valid_targets_min": 954 + }, + { + "epoch": 5.504731861198739, + "grad_norm": 0.4840523314937838, + "learning_rate": 5.314528720676424e-06, + "loss": 0.0779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09003119170665741, + "step": 5235, + "valid_targets_mean": 3020.2, + "valid_targets_min": 781 + }, + { + "epoch": 5.509989484752892, + "grad_norm": 0.3522203690286094, + "learning_rate": 5.2789811458085085e-06, + "loss": 0.0837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.055763859301805496, + "step": 5240, + "valid_targets_mean": 3357.7, + "valid_targets_min": 799 + }, + { + "epoch": 5.515247108307046, + "grad_norm": 0.34572863197273945, + "learning_rate": 5.243534770526404e-06, + "loss": 0.0968, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05645620822906494, + "step": 5245, + "valid_targets_mean": 4872.1, + "valid_targets_min": 2802 + }, + { + "epoch": 5.520504731861199, + "grad_norm": 0.4146113190989375, + "learning_rate": 5.208189838506074e-06, + "loss": 0.0717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06516373157501221, + "step": 5250, + "valid_targets_mean": 2990.6, + "valid_targets_min": 574 + }, + { + "epoch": 5.5257623554153525, + "grad_norm": 0.3409140953289381, + "learning_rate": 5.172946592726109e-06, + "loss": 0.0757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06866136193275452, + "step": 5255, + "valid_targets_mean": 3588.3, + "valid_targets_min": 813 + }, + { + "epoch": 5.531019978969506, + "grad_norm": 0.5133526501421567, + "learning_rate": 5.137805275466072e-06, + "loss": 0.082, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11353409290313721, + "step": 5260, + "valid_targets_mean": 2007.3, + "valid_targets_min": 593 + }, + { + "epoch": 5.5362776025236595, + "grad_norm": 0.39553778922882177, + "learning_rate": 5.1027661283048036e-06, + "loss": 0.0988, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0642944723367691, + "step": 5265, + "valid_targets_mean": 3185.9, + "valid_targets_min": 1185 + }, + { + "epoch": 5.541535226077813, + "grad_norm": 0.537024794805001, + "learning_rate": 5.067829392118775e-06, + "loss": 0.0718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09173554182052612, + "step": 5270, + "valid_targets_mean": 1841.9, + "valid_targets_min": 376 + }, + { + "epoch": 5.5467928496319665, + "grad_norm": 0.3928391902108944, + "learning_rate": 5.03299530708045e-06, + "loss": 0.0682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.048176899552345276, + "step": 5275, + "valid_targets_mean": 2904.6, + "valid_targets_min": 910 + }, + { + "epoch": 5.55205047318612, + "grad_norm": 0.3710063012964714, + "learning_rate": 4.998264112656617e-06, + "loss": 0.0636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06277212500572205, + "step": 5280, + "valid_targets_mean": 3779.6, + "valid_targets_min": 2798 + }, + { + "epoch": 5.5573080967402735, + "grad_norm": 0.47273065075899023, + "learning_rate": 4.963636047606712e-06, + "loss": 0.0861, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0711083710193634, + "step": 5285, + "valid_targets_mean": 2611.1, + "valid_targets_min": 471 + }, + { + "epoch": 5.562565720294427, + "grad_norm": 0.4563423626132676, + "learning_rate": 4.929111349981244e-06, + "loss": 0.0827, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08847686648368835, + "step": 5290, + "valid_targets_mean": 2535.8, + "valid_targets_min": 652 + }, + { + "epoch": 5.5678233438485805, + "grad_norm": 0.4951752395502576, + "learning_rate": 4.894690257120114e-06, + "loss": 0.0831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08223095536231995, + "step": 5295, + "valid_targets_mean": 3190.8, + "valid_targets_min": 474 + }, + { + "epoch": 5.573080967402734, + "grad_norm": 0.3781062874552959, + "learning_rate": 4.860373005650985e-06, + "loss": 0.0806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0642784982919693, + "step": 5300, + "valid_targets_mean": 3511.1, + "valid_targets_min": 640 + }, + { + "epoch": 5.578338590956887, + "grad_norm": 0.3607086035283592, + "learning_rate": 4.826159831487656e-06, + "loss": 0.1142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06361603736877441, + "step": 5305, + "valid_targets_mean": 3521.9, + "valid_targets_min": 902 + }, + { + "epoch": 5.583596214511041, + "grad_norm": 0.4047310585797599, + "learning_rate": 4.792050969828474e-06, + "loss": 0.0676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0712086632847786, + "step": 5310, + "valid_targets_mean": 3062.9, + "valid_targets_min": 1568 + }, + { + "epoch": 5.588853838065194, + "grad_norm": 0.7671277660530172, + "learning_rate": 4.758046655154664e-06, + "loss": 0.0968, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11731690913438797, + "step": 5315, + "valid_targets_mean": 1179.9, + "valid_targets_min": 534 + }, + { + "epoch": 5.594111461619348, + "grad_norm": 0.5536578463924902, + "learning_rate": 4.72414712122875e-06, + "loss": 0.091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10014639794826508, + "step": 5320, + "valid_targets_mean": 2067.3, + "valid_targets_min": 548 + }, + { + "epoch": 5.599369085173501, + "grad_norm": 0.4516327627947546, + "learning_rate": 4.690352601092954e-06, + "loss": 0.0744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06019534170627594, + "step": 5325, + "valid_targets_mean": 2839.8, + "valid_targets_min": 664 + }, + { + "epoch": 5.604626708727655, + "grad_norm": 0.5889218711339791, + "learning_rate": 4.656663327067563e-06, + "loss": 0.1864, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2736669182777405, + "step": 5330, + "valid_targets_mean": 2888.0, + "valid_targets_min": 878 + }, + { + "epoch": 5.609884332281808, + "grad_norm": 0.5600330545340294, + "learning_rate": 4.623079530749355e-06, + "loss": 0.1678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08541359007358551, + "step": 5335, + "valid_targets_mean": 2609.9, + "valid_targets_min": 756 + }, + { + "epoch": 5.615141955835962, + "grad_norm": 0.3511085199552129, + "learning_rate": 4.589601443010012e-06, + "loss": 0.0816, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06310705095529556, + "step": 5340, + "valid_targets_mean": 3436.7, + "valid_targets_min": 1968 + }, + { + "epoch": 5.620399579390115, + "grad_norm": 0.6721906973672241, + "learning_rate": 4.55622929399451e-06, + "loss": 0.1564, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33643388748168945, + "step": 5345, + "valid_targets_mean": 2644.9, + "valid_targets_min": 1153 + }, + { + "epoch": 5.625657202944269, + "grad_norm": 0.3166222980260612, + "learning_rate": 4.522963313119564e-06, + "loss": 0.0894, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.059898123145103455, + "step": 5350, + "valid_targets_mean": 4109.6, + "valid_targets_min": 1010 + }, + { + "epoch": 5.630914826498422, + "grad_norm": 0.371302765978117, + "learning_rate": 4.48980372907202e-06, + "loss": 0.0741, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06634600460529327, + "step": 5355, + "valid_targets_mean": 4150.4, + "valid_targets_min": 789 + }, + { + "epoch": 5.636172450052577, + "grad_norm": 0.4363528525094378, + "learning_rate": 4.456750769807303e-06, + "loss": 0.083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09076633304357529, + "step": 5360, + "valid_targets_mean": 3907.9, + "valid_targets_min": 784 + }, + { + "epoch": 5.641430073606729, + "grad_norm": 0.3881675184846364, + "learning_rate": 4.4238046625478635e-06, + "loss": 0.0703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06289593130350113, + "step": 5365, + "valid_targets_mean": 2426.2, + "valid_targets_min": 488 + }, + { + "epoch": 5.646687697160884, + "grad_norm": 0.44115229165527364, + "learning_rate": 4.390965633781579e-06, + "loss": 0.0747, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08500753343105316, + "step": 5370, + "valid_targets_mean": 3060.2, + "valid_targets_min": 774 + }, + { + "epoch": 5.651945320715037, + "grad_norm": 0.4163457086701572, + "learning_rate": 4.358233909260215e-06, + "loss": 0.0653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06481081247329712, + "step": 5375, + "valid_targets_mean": 2013.9, + "valid_targets_min": 712 + }, + { + "epoch": 5.657202944269191, + "grad_norm": 0.3489182883360053, + "learning_rate": 4.3256097139978934e-06, + "loss": 0.0716, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.057638153433799744, + "step": 5380, + "valid_targets_mean": 3445.8, + "valid_targets_min": 1770 + }, + { + "epoch": 5.662460567823344, + "grad_norm": 0.36595440417044695, + "learning_rate": 4.293093272269513e-06, + "loss": 0.0611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.060730718076229095, + "step": 5385, + "valid_targets_mean": 3535.2, + "valid_targets_min": 1761 + }, + { + "epoch": 5.667718191377498, + "grad_norm": 0.3444894547671019, + "learning_rate": 4.260684807609217e-06, + "loss": 0.1103, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.043215736746788025, + "step": 5390, + "valid_targets_mean": 3596.6, + "valid_targets_min": 772 + }, + { + "epoch": 5.672975814931651, + "grad_norm": 0.4007388592256331, + "learning_rate": 4.22838454280887e-06, + "loss": 0.0585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.061987798660993576, + "step": 5395, + "valid_targets_mean": 3249.3, + "valid_targets_min": 722 + }, + { + "epoch": 5.678233438485805, + "grad_norm": 0.3675365529597313, + "learning_rate": 4.196192699916528e-06, + "loss": 0.0612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05437369644641876, + "step": 5400, + "valid_targets_mean": 3712.8, + "valid_targets_min": 2535 + }, + { + "epoch": 5.683491062039958, + "grad_norm": 0.43640798823250465, + "learning_rate": 4.164109500234865e-06, + "loss": 0.0648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06860943138599396, + "step": 5405, + "valid_targets_mean": 3262.0, + "valid_targets_min": 948 + }, + { + "epoch": 5.688748685594112, + "grad_norm": 0.6561863580374057, + "learning_rate": 4.1321351643197235e-06, + "loss": 0.0898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13322700560092926, + "step": 5410, + "valid_targets_mean": 1491.4, + "valid_targets_min": 490 + }, + { + "epoch": 5.694006309148265, + "grad_norm": 0.4358763223896758, + "learning_rate": 4.100269911978549e-06, + "loss": 0.0803, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07011303305625916, + "step": 5415, + "valid_targets_mean": 3696.4, + "valid_targets_min": 1196 + }, + { + "epoch": 5.699263932702419, + "grad_norm": 0.5739171072047936, + "learning_rate": 4.068513962268892e-06, + "loss": 0.0977, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08923690021038055, + "step": 5420, + "valid_targets_mean": 1915.5, + "valid_targets_min": 575 + }, + { + "epoch": 5.704521556256572, + "grad_norm": 0.42608581865170847, + "learning_rate": 4.036867533496895e-06, + "loss": 0.0728, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06260919570922852, + "step": 5425, + "valid_targets_mean": 2728.7, + "valid_targets_min": 524 + }, + { + "epoch": 5.709779179810726, + "grad_norm": 0.4701969559376083, + "learning_rate": 4.00533084321582e-06, + "loss": 0.0887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07386186718940735, + "step": 5430, + "valid_targets_mean": 2827.4, + "valid_targets_min": 693 + }, + { + "epoch": 5.715036803364879, + "grad_norm": 0.43626358271938687, + "learning_rate": 3.9739041082245114e-06, + "loss": 0.1038, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07005483657121658, + "step": 5435, + "valid_targets_mean": 2377.9, + "valid_targets_min": 803 + }, + { + "epoch": 5.720294426919033, + "grad_norm": 0.4464928971357519, + "learning_rate": 3.942587544565932e-06, + "loss": 0.0799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08836561441421509, + "step": 5440, + "valid_targets_mean": 2733.9, + "valid_targets_min": 764 + }, + { + "epoch": 5.725552050473186, + "grad_norm": 0.4369438601678796, + "learning_rate": 3.9113813675256816e-06, + "loss": 0.0895, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06848982721567154, + "step": 5445, + "valid_targets_mean": 3892.4, + "valid_targets_min": 2393 + }, + { + "epoch": 5.7308096740273395, + "grad_norm": 0.41273524626035957, + "learning_rate": 3.8802857916305006e-06, + "loss": 0.0778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06610330939292908, + "step": 5450, + "valid_targets_mean": 3477.7, + "valid_targets_min": 804 + }, + { + "epoch": 5.736067297581493, + "grad_norm": 0.3871325032451887, + "learning_rate": 3.849301030646797e-06, + "loss": 0.0771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07895655930042267, + "step": 5455, + "valid_targets_mean": 4753.2, + "valid_targets_min": 1549 + }, + { + "epoch": 5.7413249211356465, + "grad_norm": 0.4217346153198867, + "learning_rate": 3.818427297579186e-06, + "loss": 0.0643, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06543637812137604, + "step": 5460, + "valid_targets_mean": 2988.9, + "valid_targets_min": 596 + }, + { + "epoch": 5.7465825446898, + "grad_norm": 0.3136343984963145, + "learning_rate": 3.787664804669027e-06, + "loss": 0.0625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.051543548703193665, + "step": 5465, + "valid_targets_mean": 4565.8, + "valid_targets_min": 896 + }, + { + "epoch": 5.7518401682439535, + "grad_norm": 0.3621915752518174, + "learning_rate": 3.7570137633929647e-06, + "loss": 0.0723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06575092673301697, + "step": 5470, + "valid_targets_mean": 3763.9, + "valid_targets_min": 2696 + }, + { + "epoch": 5.757097791798107, + "grad_norm": 0.6122320525546388, + "learning_rate": 3.7264743844614424e-06, + "loss": 0.089, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10286790877580643, + "step": 5475, + "valid_targets_mean": 1522.7, + "valid_targets_min": 203 + }, + { + "epoch": 5.7623554153522605, + "grad_norm": 0.4098948000241012, + "learning_rate": 3.6960468778173097e-06, + "loss": 0.0761, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0847419798374176, + "step": 5480, + "valid_targets_mean": 3974.7, + "valid_targets_min": 2443 + }, + { + "epoch": 5.767613038906414, + "grad_norm": 0.5008404289507667, + "learning_rate": 3.665731452634347e-06, + "loss": 0.0854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07621408998966217, + "step": 5485, + "valid_targets_mean": 1714.2, + "valid_targets_min": 716 + }, + { + "epoch": 5.7728706624605675, + "grad_norm": 0.3784535088474085, + "learning_rate": 3.6355283173158153e-06, + "loss": 0.0598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06724974513053894, + "step": 5490, + "valid_targets_mean": 3683.6, + "valid_targets_min": 905 + }, + { + "epoch": 5.778128286014722, + "grad_norm": 0.4780606297581987, + "learning_rate": 3.6054376794930467e-06, + "loss": 0.0791, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08645504713058472, + "step": 5495, + "valid_targets_mean": 2942.3, + "valid_targets_min": 657 + }, + { + "epoch": 5.783385909568874, + "grad_norm": 0.40912613469195874, + "learning_rate": 3.5754597460240216e-06, + "loss": 0.0695, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06450048089027405, + "step": 5500, + "valid_targets_mean": 3584.6, + "valid_targets_min": 1744 + }, + { + "epoch": 5.788643533123029, + "grad_norm": 0.6190582766331852, + "learning_rate": 3.5455947229919185e-06, + "loss": 0.0671, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10218099504709244, + "step": 5505, + "valid_targets_mean": 2055.4, + "valid_targets_min": 627 + }, + { + "epoch": 5.793901156677181, + "grad_norm": 0.4863579439971359, + "learning_rate": 3.515842815703716e-06, + "loss": 0.0684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07434908300638199, + "step": 5510, + "valid_targets_mean": 2057.7, + "valid_targets_min": 727 + }, + { + "epoch": 5.799158780231336, + "grad_norm": 0.5401129388023387, + "learning_rate": 3.4862042286887943e-06, + "loss": 0.0745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08237025141716003, + "step": 5515, + "valid_targets_mean": 2091.9, + "valid_targets_min": 613 + }, + { + "epoch": 5.804416403785489, + "grad_norm": 0.39507077111937705, + "learning_rate": 3.456679165697494e-06, + "loss": 0.083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06957225501537323, + "step": 5520, + "valid_targets_mean": 3179.4, + "valid_targets_min": 958 + }, + { + "epoch": 5.809674027339643, + "grad_norm": 0.4904748389811793, + "learning_rate": 3.427267829699741e-06, + "loss": 0.0715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06819421052932739, + "step": 5525, + "valid_targets_mean": 2118.0, + "valid_targets_min": 495 + }, + { + "epoch": 5.814931650893796, + "grad_norm": 0.4705278464386405, + "learning_rate": 3.3979704228836586e-06, + "loss": 0.0804, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07215887308120728, + "step": 5530, + "valid_targets_mean": 2350.0, + "valid_targets_min": 759 + }, + { + "epoch": 5.82018927444795, + "grad_norm": 0.6215471041321221, + "learning_rate": 3.3687871466541424e-06, + "loss": 0.088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12827762961387634, + "step": 5535, + "valid_targets_mean": 1728.8, + "valid_targets_min": 521 + }, + { + "epoch": 5.825446898002103, + "grad_norm": 0.5242690029033589, + "learning_rate": 3.339718201631521e-06, + "loss": 0.0716, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08257465064525604, + "step": 5540, + "valid_targets_mean": 3473.8, + "valid_targets_min": 1032 + }, + { + "epoch": 5.830704521556257, + "grad_norm": 0.3725043819909827, + "learning_rate": 3.3107637876501352e-06, + "loss": 0.0591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04687100648880005, + "step": 5545, + "valid_targets_mean": 3574.1, + "valid_targets_min": 841 + }, + { + "epoch": 5.83596214511041, + "grad_norm": 0.4061944510779138, + "learning_rate": 3.2819241037569838e-06, + "loss": 0.0713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0636071115732193, + "step": 5550, + "valid_targets_mean": 2909.7, + "valid_targets_min": 927 + }, + { + "epoch": 5.841219768664564, + "grad_norm": 0.4711251498892174, + "learning_rate": 3.253199348210372e-06, + "loss": 0.0957, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07953070849180222, + "step": 5555, + "valid_targets_mean": 3022.2, + "valid_targets_min": 935 + }, + { + "epoch": 5.846477392218717, + "grad_norm": 0.3797374307174812, + "learning_rate": 3.2245897184785103e-06, + "loss": 0.0576, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05033276230096817, + "step": 5560, + "valid_targets_mean": 2978.2, + "valid_targets_min": 647 + }, + { + "epoch": 5.851735015772871, + "grad_norm": 0.3727209934715664, + "learning_rate": 3.1960954112381825e-06, + "loss": 0.0723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06503856182098389, + "step": 5565, + "valid_targets_mean": 4808.4, + "valid_targets_min": 976 + }, + { + "epoch": 5.856992639327024, + "grad_norm": 0.3672931375398731, + "learning_rate": 3.1677166223733934e-06, + "loss": 0.0696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06206974387168884, + "step": 5570, + "valid_targets_mean": 4306.9, + "valid_targets_min": 995 + }, + { + "epoch": 5.862250262881178, + "grad_norm": 0.3247757190871145, + "learning_rate": 3.1394535469740273e-06, + "loss": 0.0571, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.053987935185432434, + "step": 5575, + "valid_targets_mean": 4484.6, + "valid_targets_min": 949 + }, + { + "epoch": 5.867507886435331, + "grad_norm": 0.44223634627808067, + "learning_rate": 3.111306379334462e-06, + "loss": 0.0786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09267568588256836, + "step": 5580, + "valid_targets_mean": 4246.4, + "valid_targets_min": 2724 + }, + { + "epoch": 5.872765509989485, + "grad_norm": 0.33839878301938703, + "learning_rate": 3.083275312952301e-06, + "loss": 0.0642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05013374239206314, + "step": 5585, + "valid_targets_mean": 2882.8, + "valid_targets_min": 424 + }, + { + "epoch": 5.878023133543638, + "grad_norm": 0.41545640646602267, + "learning_rate": 3.055360540527006e-06, + "loss": 0.0953, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07960711419582367, + "step": 5590, + "valid_targets_mean": 2827.2, + "valid_targets_min": 531 + }, + { + "epoch": 5.883280757097792, + "grad_norm": 0.3289053035698674, + "learning_rate": 3.0275622539585556e-06, + "loss": 0.0888, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.059524841606616974, + "step": 5595, + "valid_targets_mean": 3046.0, + "valid_targets_min": 855 + }, + { + "epoch": 5.888538380651945, + "grad_norm": 0.48164133495913375, + "learning_rate": 2.999880644346165e-06, + "loss": 0.0674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12587189674377441, + "step": 5600, + "valid_targets_mean": 2666.1, + "valid_targets_min": 722 + }, + { + "epoch": 5.893796004206099, + "grad_norm": 0.437680418717453, + "learning_rate": 2.9723159019869597e-06, + "loss": 0.0757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06669393181800842, + "step": 5605, + "valid_targets_mean": 3699.2, + "valid_targets_min": 1251 + }, + { + "epoch": 5.899053627760252, + "grad_norm": 0.4552807263478258, + "learning_rate": 2.9448682163746413e-06, + "loss": 0.0693, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06853412091732025, + "step": 5610, + "valid_targets_mean": 2607.3, + "valid_targets_min": 632 + }, + { + "epoch": 5.904311251314406, + "grad_norm": 0.5564581144895816, + "learning_rate": 2.917537776198216e-06, + "loss": 0.0699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06953615695238113, + "step": 5615, + "valid_targets_mean": 1881.4, + "valid_targets_min": 595 + }, + { + "epoch": 5.909568874868559, + "grad_norm": 0.7076667181439968, + "learning_rate": 2.8903247693406932e-06, + "loss": 0.0932, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12509091198444366, + "step": 5620, + "valid_targets_mean": 1679.2, + "valid_targets_min": 520 + }, + { + "epoch": 5.914826498422713, + "grad_norm": 0.37113324347173715, + "learning_rate": 2.863229382877777e-06, + "loss": 0.0951, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06510509550571442, + "step": 5625, + "valid_targets_mean": 3019.8, + "valid_targets_min": 593 + }, + { + "epoch": 5.920084121976867, + "grad_norm": 0.37553389579031443, + "learning_rate": 2.8362518030765904e-06, + "loss": 0.076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05372827500104904, + "step": 5630, + "valid_targets_mean": 3745.3, + "valid_targets_min": 1041 + }, + { + "epoch": 5.9253417455310196, + "grad_norm": 0.45557279748286655, + "learning_rate": 2.8093922153944065e-06, + "loss": 0.0763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07182173430919647, + "step": 5635, + "valid_targets_mean": 3255.9, + "valid_targets_min": 676 + }, + { + "epoch": 5.930599369085174, + "grad_norm": 0.5217169672169305, + "learning_rate": 2.782650804477347e-06, + "loss": 0.0686, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07140591740608215, + "step": 5640, + "valid_targets_mean": 2534.0, + "valid_targets_min": 616 + }, + { + "epoch": 5.9358569926393265, + "grad_norm": 0.39912615203410573, + "learning_rate": 2.7560277541591427e-06, + "loss": 0.0652, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05698162317276001, + "step": 5645, + "valid_targets_mean": 2999.9, + "valid_targets_min": 541 + }, + { + "epoch": 5.941114616193481, + "grad_norm": 0.8478603884193808, + "learning_rate": 2.7295232474598445e-06, + "loss": 0.0655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09507367759943008, + "step": 5650, + "valid_targets_mean": 1254.4, + "valid_targets_min": 598 + }, + { + "epoch": 5.946372239747634, + "grad_norm": 0.4861537301010663, + "learning_rate": 2.703137466584571e-06, + "loss": 0.0798, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06851141899824142, + "step": 5655, + "valid_targets_mean": 3019.1, + "valid_targets_min": 950 + }, + { + "epoch": 5.951629863301788, + "grad_norm": 0.5360077718524827, + "learning_rate": 2.6768705929222827e-06, + "loss": 0.0645, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07459817826747894, + "step": 5660, + "valid_targets_mean": 2472.1, + "valid_targets_min": 680 + }, + { + "epoch": 5.956887486855941, + "grad_norm": 0.43417037003952114, + "learning_rate": 2.6507228070444922e-06, + "loss": 0.0643, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06640955805778503, + "step": 5665, + "valid_targets_mean": 3184.0, + "valid_targets_min": 774 + }, + { + "epoch": 5.962145110410095, + "grad_norm": 0.4363314543656136, + "learning_rate": 2.6246942887040416e-06, + "loss": 0.0765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07214406877756119, + "step": 5670, + "valid_targets_mean": 4097.8, + "valid_targets_min": 1474 + }, + { + "epoch": 5.967402733964248, + "grad_norm": 0.35176548595035, + "learning_rate": 2.5987852168338922e-06, + "loss": 0.0616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.052599914371967316, + "step": 5675, + "valid_targets_mean": 5379.9, + "valid_targets_min": 3677 + }, + { + "epoch": 5.972660357518402, + "grad_norm": 0.3953005192034419, + "learning_rate": 2.5729957695458454e-06, + "loss": 0.0646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05832815542817116, + "step": 5680, + "valid_targets_mean": 3455.8, + "valid_targets_min": 675 + }, + { + "epoch": 5.977917981072555, + "grad_norm": 0.45263421607065923, + "learning_rate": 2.5473261241293547e-06, + "loss": 0.0692, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07711224257946014, + "step": 5685, + "valid_targets_mean": 3888.9, + "valid_targets_min": 795 + }, + { + "epoch": 5.983175604626709, + "grad_norm": 0.4832589836132502, + "learning_rate": 2.521776457050302e-06, + "loss": 0.1572, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1471479833126068, + "step": 5690, + "valid_targets_mean": 2937.6, + "valid_targets_min": 605 + }, + { + "epoch": 5.988433228180862, + "grad_norm": 0.3635672452356186, + "learning_rate": 2.4963469439497703e-06, + "loss": 0.0632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.059753306210041046, + "step": 5695, + "valid_targets_mean": 3050.1, + "valid_targets_min": 941 + }, + { + "epoch": 5.993690851735016, + "grad_norm": 0.43990503610989284, + "learning_rate": 2.4710377596428404e-06, + "loss": 0.0734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06187095493078232, + "step": 5700, + "valid_targets_mean": 2817.5, + "valid_targets_min": 878 + }, + { + "epoch": 5.998948475289169, + "grad_norm": 0.5950208952697916, + "learning_rate": 2.4458490781174084e-06, + "loss": 0.0968, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1841873973608017, + "step": 5705, + "valid_targets_mean": 2429.1, + "valid_targets_min": 955 + }, + { + "epoch": 6.004206098843323, + "grad_norm": 1.056405448573602, + "learning_rate": 2.4207810725329583e-06, + "loss": 0.1304, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15957674384117126, + "step": 5710, + "valid_targets_mean": 1576.1, + "valid_targets_min": 722 + }, + { + "epoch": 6.009463722397476, + "grad_norm": 0.8832827367460852, + "learning_rate": 2.395833915219401e-06, + "loss": 0.1315, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13401654362678528, + "step": 5715, + "valid_targets_mean": 1537.1, + "valid_targets_min": 725 + }, + { + "epoch": 6.01472134595163, + "grad_norm": 0.8438837993875398, + "learning_rate": 2.3710077776758713e-06, + "loss": 0.1261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13347040116786957, + "step": 5720, + "valid_targets_mean": 1576.7, + "valid_targets_min": 874 + }, + { + "epoch": 6.019978969505783, + "grad_norm": 0.8243968700714618, + "learning_rate": 2.3463028305695447e-06, + "loss": 0.1207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12472514808177948, + "step": 5725, + "valid_targets_mean": 1409.7, + "valid_targets_min": 615 + }, + { + "epoch": 6.025236593059937, + "grad_norm": 0.8443977567975072, + "learning_rate": 2.3217192437344925e-06, + "loss": 0.1244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12544682621955872, + "step": 5730, + "valid_targets_mean": 1581.3, + "valid_targets_min": 715 + }, + { + "epoch": 6.03049421661409, + "grad_norm": 0.8262835707529608, + "learning_rate": 2.2972571861704784e-06, + "loss": 0.1315, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.161672905087471, + "step": 5735, + "valid_targets_mean": 1765.4, + "valid_targets_min": 625 + }, + { + "epoch": 6.035751840168244, + "grad_norm": 0.7493785382076814, + "learning_rate": 2.2729168260418224e-06, + "loss": 0.1153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11074835062026978, + "step": 5740, + "valid_targets_mean": 1440.2, + "valid_targets_min": 761 + }, + { + "epoch": 6.041009463722397, + "grad_norm": 0.7650150016103158, + "learning_rate": 2.2486983306762332e-06, + "loss": 0.1312, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2043965756893158, + "step": 5745, + "valid_targets_mean": 1589.9, + "valid_targets_min": 703 + }, + { + "epoch": 6.046267087276551, + "grad_norm": 0.8118441009037011, + "learning_rate": 2.224601866563665e-06, + "loss": 0.1168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12593913078308105, + "step": 5750, + "valid_targets_mean": 1544.3, + "valid_targets_min": 733 + }, + { + "epoch": 6.051524710830704, + "grad_norm": 0.8488874207848223, + "learning_rate": 2.2006275993551563e-06, + "loss": 0.1251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1356077790260315, + "step": 5755, + "valid_targets_mean": 1915.3, + "valid_targets_min": 964 + }, + { + "epoch": 6.056782334384858, + "grad_norm": 0.7943128359084249, + "learning_rate": 2.176775693861719e-06, + "loss": 0.1132, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11740730702877045, + "step": 5760, + "valid_targets_mean": 1516.1, + "valid_targets_min": 815 + }, + { + "epoch": 6.062039957939011, + "grad_norm": 0.8281286315241209, + "learning_rate": 2.1530463140531886e-06, + "loss": 0.1291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12747472524642944, + "step": 5765, + "valid_targets_mean": 1713.7, + "valid_targets_min": 1027 + }, + { + "epoch": 6.067297581493165, + "grad_norm": 0.8099123331780564, + "learning_rate": 2.129439623057077e-06, + "loss": 0.113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10697382688522339, + "step": 5770, + "valid_targets_mean": 1325.3, + "valid_targets_min": 570 + }, + { + "epoch": 6.072555205047319, + "grad_norm": 0.8065990975663488, + "learning_rate": 2.105955783157498e-06, + "loss": 0.1138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11023028194904327, + "step": 5775, + "valid_targets_mean": 1349.9, + "valid_targets_min": 548 + }, + { + "epoch": 6.0778128286014725, + "grad_norm": 0.7851180412673159, + "learning_rate": 2.0825949557940174e-06, + "loss": 0.1054, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10124966502189636, + "step": 5780, + "valid_targets_mean": 1355.5, + "valid_targets_min": 682 + }, + { + "epoch": 6.083070452155626, + "grad_norm": 0.79802762010396, + "learning_rate": 2.059357301560547e-06, + "loss": 0.1194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11919783800840378, + "step": 5785, + "valid_targets_mean": 1716.7, + "valid_targets_min": 911 + }, + { + "epoch": 6.0883280757097795, + "grad_norm": 0.8707769812997141, + "learning_rate": 2.036242980204244e-06, + "loss": 0.124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11366402357816696, + "step": 5790, + "valid_targets_mean": 1278.3, + "valid_targets_min": 597 + }, + { + "epoch": 6.093585699263933, + "grad_norm": 0.8560519537309498, + "learning_rate": 2.0132521506244294e-06, + "loss": 0.1277, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1303253173828125, + "step": 5795, + "valid_targets_mean": 1576.7, + "valid_targets_min": 772 + }, + { + "epoch": 6.0988433228180865, + "grad_norm": 0.8024527979995779, + "learning_rate": 1.9903849708714664e-06, + "loss": 0.1158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1220255121588707, + "step": 5800, + "valid_targets_mean": 1733.6, + "valid_targets_min": 940 + }, + { + "epoch": 6.10410094637224, + "grad_norm": 0.7302956525360211, + "learning_rate": 1.967641598145684e-06, + "loss": 0.1121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09471730142831802, + "step": 5805, + "valid_targets_mean": 1361.2, + "valid_targets_min": 785 + }, + { + "epoch": 6.1093585699263935, + "grad_norm": 0.7809589037777808, + "learning_rate": 1.9450221887963194e-06, + "loss": 0.1175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1004934087395668, + "step": 5810, + "valid_targets_mean": 1457.7, + "valid_targets_min": 617 + }, + { + "epoch": 6.114616193480547, + "grad_norm": 0.8687769105285652, + "learning_rate": 1.922526898320407e-06, + "loss": 0.1219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11705152690410614, + "step": 5815, + "valid_targets_mean": 1281.0, + "valid_targets_min": 648 + }, + { + "epoch": 6.1198738170347005, + "grad_norm": 1.0257711661224076, + "learning_rate": 1.900155881361727e-06, + "loss": 0.1109, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10995705425739288, + "step": 5820, + "valid_targets_mean": 1466.2, + "valid_targets_min": 634 + }, + { + "epoch": 6.125131440588854, + "grad_norm": 0.8509635243706488, + "learning_rate": 1.8779092917097564e-06, + "loss": 0.1081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11150787770748138, + "step": 5825, + "valid_targets_mean": 1515.4, + "valid_targets_min": 774 + }, + { + "epoch": 6.130389064143007, + "grad_norm": 0.7561951051975627, + "learning_rate": 1.85578728229858e-06, + "loss": 0.1112, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11924959719181061, + "step": 5830, + "valid_targets_mean": 1811.3, + "valid_targets_min": 563 + }, + { + "epoch": 6.135646687697161, + "grad_norm": 0.8704344303654569, + "learning_rate": 1.8337900052058732e-06, + "loss": 0.1133, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1195128783583641, + "step": 5835, + "valid_targets_mean": 1495.8, + "valid_targets_min": 837 + }, + { + "epoch": 6.140904311251314, + "grad_norm": 0.8398306482112976, + "learning_rate": 1.811917611651821e-06, + "loss": 0.1186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12384511530399323, + "step": 5840, + "valid_targets_mean": 1653.8, + "valid_targets_min": 803 + }, + { + "epoch": 6.146161934805468, + "grad_norm": 0.7651328935133273, + "learning_rate": 1.7901702519981068e-06, + "loss": 0.1184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10116033256053925, + "step": 5845, + "valid_targets_mean": 1380.4, + "valid_targets_min": 797 + }, + { + "epoch": 6.151419558359621, + "grad_norm": 0.7954008913290201, + "learning_rate": 1.7685480757468765e-06, + "loss": 0.1094, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10791818052530289, + "step": 5850, + "valid_targets_mean": 1600.2, + "valid_targets_min": 646 + }, + { + "epoch": 6.156677181913775, + "grad_norm": 0.8377229014967783, + "learning_rate": 1.7470512315396894e-06, + "loss": 0.1169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1209423616528511, + "step": 5855, + "valid_targets_mean": 1447.8, + "valid_targets_min": 591 + }, + { + "epoch": 6.161934805467928, + "grad_norm": 0.8378541971434684, + "learning_rate": 1.7256798671565111e-06, + "loss": 0.1146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1366468369960785, + "step": 5860, + "valid_targets_mean": 1590.8, + "valid_targets_min": 796 + }, + { + "epoch": 6.167192429022082, + "grad_norm": 0.8048332759300525, + "learning_rate": 1.7044341295147116e-06, + "loss": 0.107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10240399837493896, + "step": 5865, + "valid_targets_mean": 1486.0, + "valid_targets_min": 848 + }, + { + "epoch": 6.172450052576235, + "grad_norm": 0.7908342032482969, + "learning_rate": 1.683314164668024e-06, + "loss": 0.1095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09846100956201553, + "step": 5870, + "valid_targets_mean": 1233.6, + "valid_targets_min": 665 + }, + { + "epoch": 6.177707676130389, + "grad_norm": 0.7476579438876946, + "learning_rate": 1.6623201178055603e-06, + "loss": 0.1042, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09678193926811218, + "step": 5875, + "valid_targets_mean": 1408.1, + "valid_targets_min": 652 + }, + { + "epoch": 6.182965299684542, + "grad_norm": 0.918072502833983, + "learning_rate": 1.6414521332508183e-06, + "loss": 0.108, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1330566555261612, + "step": 5880, + "valid_targets_mean": 1747.6, + "valid_targets_min": 662 + }, + { + "epoch": 6.188222923238696, + "grad_norm": 0.851594586310522, + "learning_rate": 1.6207103544606795e-06, + "loss": 0.1074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1153300553560257, + "step": 5885, + "valid_targets_mean": 1415.1, + "valid_targets_min": 746 + }, + { + "epoch": 6.193480546792849, + "grad_norm": 0.8913181180169621, + "learning_rate": 1.6000949240244047e-06, + "loss": 0.1065, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1070786714553833, + "step": 5890, + "valid_targets_mean": 1241.3, + "valid_targets_min": 794 + }, + { + "epoch": 6.198738170347003, + "grad_norm": 0.877886888931176, + "learning_rate": 1.5796059836626998e-06, + "loss": 0.11, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11365164816379547, + "step": 5895, + "valid_targets_mean": 1566.1, + "valid_targets_min": 773 + }, + { + "epoch": 6.203995793901156, + "grad_norm": 0.9072964812458807, + "learning_rate": 1.5592436742267048e-06, + "loss": 0.1085, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12517592310905457, + "step": 5900, + "valid_targets_mean": 1655.8, + "valid_targets_min": 741 + }, + { + "epoch": 6.20925341745531, + "grad_norm": 0.8664928853792824, + "learning_rate": 1.5390081356970331e-06, + "loss": 0.1138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10317286849021912, + "step": 5905, + "valid_targets_mean": 1325.1, + "valid_targets_min": 801 + }, + { + "epoch": 6.214511041009464, + "grad_norm": 0.8472902836032864, + "learning_rate": 1.5188995071828117e-06, + "loss": 0.1084, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13048255443572998, + "step": 5910, + "valid_targets_mean": 1859.9, + "valid_targets_min": 634 + }, + { + "epoch": 6.219768664563618, + "grad_norm": 0.9817704310215676, + "learning_rate": 1.498917926920731e-06, + "loss": 0.1175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13278211653232574, + "step": 5915, + "valid_targets_mean": 1715.2, + "valid_targets_min": 690 + }, + { + "epoch": 6.225026288117771, + "grad_norm": 0.8300762826519462, + "learning_rate": 1.4790635322740855e-06, + "loss": 0.1132, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12267100811004639, + "step": 5920, + "valid_targets_mean": 1776.1, + "valid_targets_min": 718 + }, + { + "epoch": 6.230283911671925, + "grad_norm": 0.7485334780876247, + "learning_rate": 1.4593364597318305e-06, + "loss": 0.1154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0925360918045044, + "step": 5925, + "valid_targets_mean": 1405.6, + "valid_targets_min": 685 + }, + { + "epoch": 6.235541535226078, + "grad_norm": 0.7991821174411781, + "learning_rate": 1.4397368449076443e-06, + "loss": 0.1063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1004900187253952, + "step": 5930, + "valid_targets_mean": 1445.6, + "valid_targets_min": 744 + }, + { + "epoch": 6.240799158780232, + "grad_norm": 0.8794127672319397, + "learning_rate": 1.4202648225390103e-06, + "loss": 0.109, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10697475075721741, + "step": 5935, + "valid_targets_mean": 1320.9, + "valid_targets_min": 650 + }, + { + "epoch": 6.246056782334385, + "grad_norm": 0.8342636597973683, + "learning_rate": 1.4009205264862646e-06, + "loss": 0.1106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11376285552978516, + "step": 5940, + "valid_targets_mean": 1461.5, + "valid_targets_min": 677 + }, + { + "epoch": 6.251314405888539, + "grad_norm": 0.8477474608441585, + "learning_rate": 1.3817040897316903e-06, + "loss": 0.103, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11012783646583557, + "step": 5945, + "valid_targets_mean": 1330.2, + "valid_targets_min": 808 + }, + { + "epoch": 6.256572029442692, + "grad_norm": 0.8454187125928052, + "learning_rate": 1.362615644378611e-06, + "loss": 0.1058, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10677572339773178, + "step": 5950, + "valid_targets_mean": 1413.0, + "valid_targets_min": 727 + }, + { + "epoch": 6.261829652996846, + "grad_norm": 0.8280382374490074, + "learning_rate": 1.3436553216504721e-06, + "loss": 0.1102, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10449901968240738, + "step": 5955, + "valid_targets_mean": 1483.2, + "valid_targets_min": 907 + }, + { + "epoch": 6.267087276550999, + "grad_norm": 0.8761840994592133, + "learning_rate": 1.324823251889924e-06, + "loss": 0.1118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11426833271980286, + "step": 5960, + "valid_targets_mean": 1516.4, + "valid_targets_min": 669 + }, + { + "epoch": 6.2723449001051526, + "grad_norm": 0.7883641561966342, + "learning_rate": 1.3061195645579661e-06, + "loss": 0.1112, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09905272722244263, + "step": 5965, + "valid_targets_mean": 1541.4, + "valid_targets_min": 750 + }, + { + "epoch": 6.277602523659306, + "grad_norm": 0.8300834931135926, + "learning_rate": 1.2875443882330218e-06, + "loss": 0.107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11823458224534988, + "step": 5970, + "valid_targets_mean": 1610.9, + "valid_targets_min": 736 + }, + { + "epoch": 6.2828601472134595, + "grad_norm": 0.9275697681576301, + "learning_rate": 1.269097850610066e-06, + "loss": 0.1148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12318852543830872, + "step": 5975, + "valid_targets_mean": 1571.8, + "valid_targets_min": 694 + }, + { + "epoch": 6.288117770767613, + "grad_norm": 0.8032415472343256, + "learning_rate": 1.250780078499747e-06, + "loss": 0.1075, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11173338443040848, + "step": 5980, + "valid_targets_mean": 1560.8, + "valid_targets_min": 806 + }, + { + "epoch": 6.2933753943217665, + "grad_norm": 0.8263637067303546, + "learning_rate": 1.2325911978275196e-06, + "loss": 0.1072, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10697021335363388, + "step": 5985, + "valid_targets_mean": 1341.3, + "valid_targets_min": 797 + }, + { + "epoch": 6.29863301787592, + "grad_norm": 0.7947732322040042, + "learning_rate": 1.214531333632769e-06, + "loss": 0.1062, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09152212738990784, + "step": 5990, + "valid_targets_mean": 1448.0, + "valid_targets_min": 698 + }, + { + "epoch": 6.3038906414300735, + "grad_norm": 0.861027582543773, + "learning_rate": 1.1966006100679596e-06, + "loss": 0.1029, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10293729603290558, + "step": 5995, + "valid_targets_mean": 1379.1, + "valid_targets_min": 712 + }, + { + "epoch": 6.309148264984227, + "grad_norm": 0.9826437169525866, + "learning_rate": 1.1787991503977846e-06, + "loss": 0.1098, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13235735893249512, + "step": 6000, + "valid_targets_mean": 1699.4, + "valid_targets_min": 605 + }, + { + "epoch": 6.3144058885383805, + "grad_norm": 0.9541692555323759, + "learning_rate": 1.1611270769983051e-06, + "loss": 0.1131, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11527444422245026, + "step": 6005, + "valid_targets_mean": 1718.9, + "valid_targets_min": 907 + }, + { + "epoch": 6.319663512092534, + "grad_norm": 0.8316730507580145, + "learning_rate": 1.143584511356115e-06, + "loss": 0.1023, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10918165743350983, + "step": 6010, + "valid_targets_mean": 1481.1, + "valid_targets_min": 723 + }, + { + "epoch": 6.3249211356466875, + "grad_norm": 0.7859618568757966, + "learning_rate": 1.1261715740675205e-06, + "loss": 0.1092, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11200223863124847, + "step": 6015, + "valid_targets_mean": 1562.9, + "valid_targets_min": 855 + }, + { + "epoch": 6.330178759200841, + "grad_norm": 0.8357770703657998, + "learning_rate": 1.108888384837683e-06, + "loss": 0.1101, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11746874451637268, + "step": 6020, + "valid_targets_mean": 1583.2, + "valid_targets_min": 700 + }, + { + "epoch": 6.335436382754994, + "grad_norm": 0.7763446678484582, + "learning_rate": 1.0917350624798262e-06, + "loss": 0.1068, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10101410001516342, + "step": 6025, + "valid_targets_mean": 1613.6, + "valid_targets_min": 798 + }, + { + "epoch": 6.340694006309148, + "grad_norm": 0.9173311179547599, + "learning_rate": 1.07471172491439e-06, + "loss": 0.1118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10745055973529816, + "step": 6030, + "valid_targets_mean": 1279.4, + "valid_targets_min": 635 + }, + { + "epoch": 6.345951629863301, + "grad_norm": 0.8344114280070869, + "learning_rate": 1.0578184891682408e-06, + "loss": 0.1047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09839329868555069, + "step": 6035, + "valid_targets_mean": 1321.2, + "valid_targets_min": 244 + }, + { + "epoch": 6.351209253417455, + "grad_norm": 0.9102659735172188, + "learning_rate": 1.041055471373864e-06, + "loss": 0.1097, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10918129980564117, + "step": 6040, + "valid_targets_mean": 1293.1, + "valid_targets_min": 714 + }, + { + "epoch": 6.356466876971609, + "grad_norm": 0.797875234386722, + "learning_rate": 1.0244227867685597e-06, + "loss": 0.1032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10231631249189377, + "step": 6045, + "valid_targets_mean": 1459.4, + "valid_targets_min": 585 + }, + { + "epoch": 6.361724500525763, + "grad_norm": 0.8982983030084953, + "learning_rate": 1.0079205496936484e-06, + "loss": 0.121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12712499499320984, + "step": 6050, + "valid_targets_mean": 1683.5, + "valid_targets_min": 1048 + }, + { + "epoch": 6.366982124079916, + "grad_norm": 0.735713484328951, + "learning_rate": 9.915488735936995e-07, + "loss": 0.1065, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09809092432260513, + "step": 6055, + "valid_targets_mean": 1473.7, + "valid_targets_min": 657 + }, + { + "epoch": 6.37223974763407, + "grad_norm": 0.8151161730500948, + "learning_rate": 9.753078710157316e-07, + "loss": 0.1102, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09711694717407227, + "step": 6060, + "valid_targets_mean": 1436.2, + "valid_targets_min": 672 + }, + { + "epoch": 6.377497371188223, + "grad_norm": 0.8749570747293455, + "learning_rate": 9.59197653608448e-07, + "loss": 0.1068, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11560262739658356, + "step": 6065, + "valid_targets_mean": 1605.6, + "valid_targets_min": 691 + }, + { + "epoch": 6.382754994742377, + "grad_norm": 0.8715454554432535, + "learning_rate": 9.432183321214805e-07, + "loss": 0.1024, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10432256758213043, + "step": 6070, + "valid_targets_mean": 1340.1, + "valid_targets_min": 732 + }, + { + "epoch": 6.38801261829653, + "grad_norm": 0.8525222943418735, + "learning_rate": 9.273700164046162e-07, + "loss": 0.11, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11443788558244705, + "step": 6075, + "valid_targets_mean": 1546.2, + "valid_targets_min": 634 + }, + { + "epoch": 6.393270241850684, + "grad_norm": 0.7308095830875175, + "learning_rate": 9.11652815407027e-07, + "loss": 0.1019, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1003701239824295, + "step": 6080, + "valid_targets_mean": 1788.0, + "valid_targets_min": 682 + }, + { + "epoch": 6.398527865404837, + "grad_norm": 0.867952223512354, + "learning_rate": 8.960668371765569e-07, + "loss": 0.1033, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10790401697158813, + "step": 6085, + "valid_targets_mean": 1405.9, + "valid_targets_min": 596 + }, + { + "epoch": 6.403785488958991, + "grad_norm": 0.8806644704943195, + "learning_rate": 8.806121888589492e-07, + "loss": 0.1074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11704282462596893, + "step": 6090, + "valid_targets_mean": 1576.6, + "valid_targets_min": 687 + }, + { + "epoch": 6.409043112513144, + "grad_norm": 0.8128337498310754, + "learning_rate": 8.652889766971229e-07, + "loss": 0.1148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1078612431883812, + "step": 6095, + "valid_targets_mean": 1605.6, + "valid_targets_min": 820 + }, + { + "epoch": 6.414300736067298, + "grad_norm": 0.8091111187848541, + "learning_rate": 8.500973060304374e-07, + "loss": 0.107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10989522933959961, + "step": 6100, + "valid_targets_mean": 1734.6, + "valid_targets_min": 799 + }, + { + "epoch": 6.419558359621451, + "grad_norm": 0.8677978121525696, + "learning_rate": 8.350372812939778e-07, + "loss": 0.1038, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11552289873361588, + "step": 6105, + "valid_targets_mean": 1587.2, + "valid_targets_min": 1047 + }, + { + "epoch": 6.424815983175605, + "grad_norm": 0.8175987568356211, + "learning_rate": 8.201090060178174e-07, + "loss": 0.1047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1079326868057251, + "step": 6110, + "valid_targets_mean": 1558.6, + "valid_targets_min": 741 + }, + { + "epoch": 6.430073606729758, + "grad_norm": 0.7713354444357884, + "learning_rate": 8.053125828263297e-07, + "loss": 0.0963, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10174345970153809, + "step": 6115, + "valid_targets_mean": 1574.2, + "valid_targets_min": 720 + }, + { + "epoch": 6.435331230283912, + "grad_norm": 0.8772480874069559, + "learning_rate": 7.906481134374688e-07, + "loss": 0.1071, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10616987943649292, + "step": 6120, + "valid_targets_mean": 1458.4, + "valid_targets_min": 935 + }, + { + "epoch": 6.440588853838065, + "grad_norm": 0.8957916481779873, + "learning_rate": 7.761156986620677e-07, + "loss": 0.1089, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12607461214065552, + "step": 6125, + "valid_targets_mean": 1713.8, + "valid_targets_min": 684 + }, + { + "epoch": 6.445846477392219, + "grad_norm": 0.7721506634303918, + "learning_rate": 7.617154384031545e-07, + "loss": 0.1066, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10052677243947983, + "step": 6130, + "valid_targets_mean": 1463.6, + "valid_targets_min": 847 + }, + { + "epoch": 6.451104100946372, + "grad_norm": 0.7411463137773325, + "learning_rate": 7.474474316552638e-07, + "loss": 0.1074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1051454246044159, + "step": 6135, + "valid_targets_mean": 1666.8, + "valid_targets_min": 1280 + }, + { + "epoch": 6.456361724500526, + "grad_norm": 0.7981849402747365, + "learning_rate": 7.33311776503749e-07, + "loss": 0.1031, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1067662388086319, + "step": 6140, + "valid_targets_mean": 1611.5, + "valid_targets_min": 589 + }, + { + "epoch": 6.461619348054679, + "grad_norm": 0.7987623242866554, + "learning_rate": 7.193085701241175e-07, + "loss": 0.1066, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11374513059854507, + "step": 6145, + "valid_targets_mean": 1812.6, + "valid_targets_min": 918 + }, + { + "epoch": 6.466876971608833, + "grad_norm": 0.5524972757850786, + "learning_rate": 7.054379087813568e-07, + "loss": 0.1009, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07066299021244049, + "step": 6150, + "valid_targets_mean": 3127.1, + "valid_targets_min": 543 + }, + { + "epoch": 6.472134595162986, + "grad_norm": 0.48501086183905745, + "learning_rate": 6.916998878292691e-07, + "loss": 0.0786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06848639249801636, + "step": 6155, + "valid_targets_mean": 3549.7, + "valid_targets_min": 1024 + }, + { + "epoch": 6.4773922187171395, + "grad_norm": 0.3794394367263926, + "learning_rate": 6.780946017098289e-07, + "loss": 0.0683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.047510214149951935, + "step": 6160, + "valid_targets_mean": 3607.6, + "valid_targets_min": 2777 + }, + { + "epoch": 6.482649842271293, + "grad_norm": 0.5550466030514543, + "learning_rate": 6.646221439525225e-07, + "loss": 0.0853, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08951736986637115, + "step": 6165, + "valid_targets_mean": 3433.5, + "valid_targets_min": 1095 + }, + { + "epoch": 6.4879074658254465, + "grad_norm": 0.5207584835394102, + "learning_rate": 6.512826071737021e-07, + "loss": 0.0697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08131983876228333, + "step": 6170, + "valid_targets_mean": 2446.1, + "valid_targets_min": 560 + }, + { + "epoch": 6.4931650893796, + "grad_norm": 0.544380953297463, + "learning_rate": 6.380760830759669e-07, + "loss": 0.0723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08120328187942505, + "step": 6175, + "valid_targets_mean": 2739.4, + "valid_targets_min": 663 + }, + { + "epoch": 6.498422712933754, + "grad_norm": 0.8360085997505144, + "learning_rate": 6.250026624475092e-07, + "loss": 0.1223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1252402663230896, + "step": 6180, + "valid_targets_mean": 1537.3, + "valid_targets_min": 535 + }, + { + "epoch": 6.503680336487907, + "grad_norm": 0.46518429024428354, + "learning_rate": 6.12062435161509e-07, + "loss": 0.0732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06999659538269043, + "step": 6185, + "valid_targets_mean": 3057.1, + "valid_targets_min": 811 + }, + { + "epoch": 6.508937960042061, + "grad_norm": 0.5337598823318066, + "learning_rate": 5.992554901755121e-07, + "loss": 0.0869, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10769539326429367, + "step": 6190, + "valid_targets_mean": 2232.5, + "valid_targets_min": 883 + }, + { + "epoch": 6.514195583596215, + "grad_norm": 0.3480269503418339, + "learning_rate": 5.865819155308039e-07, + "loss": 0.0938, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04843660816550255, + "step": 6195, + "valid_targets_mean": 5158.4, + "valid_targets_min": 3285 + }, + { + "epoch": 6.519453207150368, + "grad_norm": 0.39725832400756, + "learning_rate": 5.740417983518253e-07, + "loss": 0.0675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05901825428009033, + "step": 6200, + "valid_targets_mean": 3384.3, + "valid_targets_min": 502 + }, + { + "epoch": 6.524710830704522, + "grad_norm": 0.46447315886120744, + "learning_rate": 5.61635224845567e-07, + "loss": 0.0725, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07760268449783325, + "step": 6205, + "valid_targets_mean": 3542.2, + "valid_targets_min": 773 + }, + { + "epoch": 6.529968454258675, + "grad_norm": 0.5801427464493447, + "learning_rate": 5.493622803009602e-07, + "loss": 0.07, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07645406574010849, + "step": 6210, + "valid_targets_mean": 1996.5, + "valid_targets_min": 492 + }, + { + "epoch": 6.535226077812829, + "grad_norm": 0.38317492793314245, + "learning_rate": 5.372230490883246e-07, + "loss": 0.105, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.056134678423404694, + "step": 6215, + "valid_targets_mean": 4156.0, + "valid_targets_min": 1961 + }, + { + "epoch": 6.540483701366982, + "grad_norm": 0.41486826474114835, + "learning_rate": 5.252176146587484e-07, + "loss": 0.0636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06410335004329681, + "step": 6220, + "valid_targets_mean": 3535.2, + "valid_targets_min": 1049 + }, + { + "epoch": 6.545741324921136, + "grad_norm": 0.38424545726777193, + "learning_rate": 5.133460595435447e-07, + "loss": 0.0734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05492096766829491, + "step": 6225, + "valid_targets_mean": 2985.7, + "valid_targets_min": 841 + }, + { + "epoch": 6.550998948475289, + "grad_norm": 0.49335827474356153, + "learning_rate": 5.016084653536756e-07, + "loss": 0.0581, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.061297088861465454, + "step": 6230, + "valid_targets_mean": 3023.9, + "valid_targets_min": 720 + }, + { + "epoch": 6.556256572029443, + "grad_norm": 0.6635861323521509, + "learning_rate": 4.900049127791851e-07, + "loss": 0.0814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08327782154083252, + "step": 6235, + "valid_targets_mean": 1272.2, + "valid_targets_min": 515 + }, + { + "epoch": 6.561514195583596, + "grad_norm": 0.43734083333484386, + "learning_rate": 4.785354815886445e-07, + "loss": 0.0758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06597829610109329, + "step": 6240, + "valid_targets_mean": 2375.1, + "valid_targets_min": 589 + }, + { + "epoch": 6.56677181913775, + "grad_norm": 0.49182093385310827, + "learning_rate": 4.6720025062862106e-07, + "loss": 0.0811, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08136197924613953, + "step": 6245, + "valid_targets_mean": 3174.8, + "valid_targets_min": 1086 + }, + { + "epoch": 6.572029442691903, + "grad_norm": 0.5150382452934233, + "learning_rate": 4.559992978231087e-07, + "loss": 0.0812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0961027592420578, + "step": 6250, + "valid_targets_mean": 2903.8, + "valid_targets_min": 511 + }, + { + "epoch": 6.577287066246057, + "grad_norm": 0.47101295595598486, + "learning_rate": 4.4493270017301305e-07, + "loss": 0.1107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07581216841936111, + "step": 6255, + "valid_targets_mean": 3491.4, + "valid_targets_min": 714 + }, + { + "epoch": 6.58254468980021, + "grad_norm": 0.4648965066149495, + "learning_rate": 4.340005337556186e-07, + "loss": 0.0637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06689706444740295, + "step": 6260, + "valid_targets_mean": 3222.4, + "valid_targets_min": 669 + }, + { + "epoch": 6.587802313354364, + "grad_norm": 0.6564120309065031, + "learning_rate": 4.232028737240623e-07, + "loss": 0.0847, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09197407960891724, + "step": 6265, + "valid_targets_mean": 1370.8, + "valid_targets_min": 598 + }, + { + "epoch": 6.593059936908517, + "grad_norm": 0.6647659425431381, + "learning_rate": 4.125397943068099e-07, + "loss": 0.0913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09101127833127975, + "step": 6270, + "valid_targets_mean": 1732.1, + "valid_targets_min": 323 + }, + { + "epoch": 6.598317560462671, + "grad_norm": 0.4655433765952755, + "learning_rate": 4.0201136880716027e-07, + "loss": 0.0797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07048631459474564, + "step": 6275, + "valid_targets_mean": 3875.1, + "valid_targets_min": 2827 + }, + { + "epoch": 6.603575184016824, + "grad_norm": 0.578990966436419, + "learning_rate": 3.9161766960273517e-07, + "loss": 0.1396, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23547233641147614, + "step": 6280, + "valid_targets_mean": 3032.8, + "valid_targets_min": 652 + }, + { + "epoch": 6.608832807570978, + "grad_norm": 0.44296797004729804, + "learning_rate": 3.8135876814497927e-07, + "loss": 0.2018, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0736391618847847, + "step": 6285, + "valid_targets_mean": 3728.1, + "valid_targets_min": 1932 + }, + { + "epoch": 6.614090431125131, + "grad_norm": 0.5345478654954723, + "learning_rate": 3.7123473495866314e-07, + "loss": 0.0825, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0921144187450409, + "step": 6290, + "valid_targets_mean": 2361.8, + "valid_targets_min": 820 + }, + { + "epoch": 6.619348054679285, + "grad_norm": 0.6712258843084848, + "learning_rate": 3.61245639641421e-07, + "loss": 0.0984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1883959174156189, + "step": 6295, + "valid_targets_mean": 2741.1, + "valid_targets_min": 727 + }, + { + "epoch": 6.624605678233438, + "grad_norm": 0.36617906460561456, + "learning_rate": 3.513915508632448e-07, + "loss": 0.1422, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.062396831810474396, + "step": 6300, + "valid_targets_mean": 4440.7, + "valid_targets_min": 636 + }, + { + "epoch": 6.629863301787592, + "grad_norm": 0.5480528569445628, + "learning_rate": 3.4167253636602893e-07, + "loss": 0.0709, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1156037449836731, + "step": 6305, + "valid_targets_mean": 4360.9, + "valid_targets_min": 1142 + }, + { + "epoch": 6.635120925341745, + "grad_norm": 0.45482490047304375, + "learning_rate": 3.3208866296310147e-07, + "loss": 0.0756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08658730238676071, + "step": 6310, + "valid_targets_mean": 4034.8, + "valid_targets_min": 513 + }, + { + "epoch": 6.6403785488958995, + "grad_norm": 0.33861109819356333, + "learning_rate": 3.2263999653876057e-07, + "loss": 0.0728, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05370061844587326, + "step": 6315, + "valid_targets_mean": 3534.8, + "valid_targets_min": 970 + }, + { + "epoch": 6.645636172450052, + "grad_norm": 0.381047002476017, + "learning_rate": 3.133266020478254e-07, + "loss": 0.0672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06742705404758453, + "step": 6320, + "valid_targets_mean": 3806.2, + "valid_targets_min": 748 + }, + { + "epoch": 6.6508937960042065, + "grad_norm": 0.48814012056034195, + "learning_rate": 3.0414854351519476e-07, + "loss": 0.0665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07534006237983704, + "step": 6325, + "valid_targets_mean": 3536.1, + "valid_targets_min": 1873 + }, + { + "epoch": 6.65615141955836, + "grad_norm": 0.5135246496208872, + "learning_rate": 2.951058840353893e-07, + "loss": 0.0699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08683723211288452, + "step": 6330, + "valid_targets_mean": 2808.6, + "valid_targets_min": 1939 + }, + { + "epoch": 6.6614090431125135, + "grad_norm": 0.4131782280152961, + "learning_rate": 2.861986857721388e-07, + "loss": 0.0579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.060513995587825775, + "step": 6335, + "valid_targets_mean": 2556.4, + "valid_targets_min": 519 + }, + { + "epoch": 6.666666666666667, + "grad_norm": 0.5623539902771256, + "learning_rate": 2.7742700995794457e-07, + "loss": 0.11, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09023596346378326, + "step": 6340, + "valid_targets_mean": 2305.1, + "valid_targets_min": 645 + }, + { + "epoch": 6.6719242902208205, + "grad_norm": 0.3718507555452981, + "learning_rate": 2.687909168936509e-07, + "loss": 0.0524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.052152276039123535, + "step": 6345, + "valid_targets_mean": 2936.2, + "valid_targets_min": 472 + }, + { + "epoch": 6.677181913774974, + "grad_norm": 0.3654923398679437, + "learning_rate": 2.6029046594805206e-07, + "loss": 0.0605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.056927390396595, + "step": 6350, + "valid_targets_mean": 2775.9, + "valid_targets_min": 616 + }, + { + "epoch": 6.682439537329127, + "grad_norm": 0.4102354641164959, + "learning_rate": 2.519257155574617e-07, + "loss": 0.0596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06437982618808746, + "step": 6355, + "valid_targets_mean": 3847.4, + "valid_targets_min": 792 + }, + { + "epoch": 6.687697160883281, + "grad_norm": 0.48384970907690883, + "learning_rate": 2.436967232253218e-07, + "loss": 0.0738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1256387084722519, + "step": 6360, + "valid_targets_mean": 2382.8, + "valid_targets_min": 792 + }, + { + "epoch": 6.692954784437434, + "grad_norm": 0.3651370754933973, + "learning_rate": 2.3560354552180976e-07, + "loss": 0.0895, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05824588984251022, + "step": 6365, + "valid_targets_mean": 3367.9, + "valid_targets_min": 729 + }, + { + "epoch": 6.698212407991588, + "grad_norm": 0.5849891621462965, + "learning_rate": 2.27646238083441e-07, + "loss": 0.0908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0712180882692337, + "step": 6370, + "valid_targets_mean": 1583.1, + "valid_targets_min": 612 + }, + { + "epoch": 6.703470031545741, + "grad_norm": 0.6839730797579563, + "learning_rate": 2.1982485561269805e-07, + "loss": 0.075, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09953617304563522, + "step": 6375, + "valid_targets_mean": 1594.0, + "valid_targets_min": 664 + }, + { + "epoch": 6.708727655099895, + "grad_norm": 0.5295988879787382, + "learning_rate": 2.1213945187763764e-07, + "loss": 0.0837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08865515142679214, + "step": 6380, + "valid_targets_mean": 1982.2, + "valid_targets_min": 542 + }, + { + "epoch": 6.713985278654048, + "grad_norm": 0.38690209052475394, + "learning_rate": 2.0459007971154632e-07, + "loss": 0.1014, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0660017654299736, + "step": 6385, + "valid_targets_mean": 3892.2, + "valid_targets_min": 2440 + }, + { + "epoch": 6.719242902208202, + "grad_norm": 0.45309379568257596, + "learning_rate": 1.9717679101254549e-07, + "loss": 0.0736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06845031678676605, + "step": 6390, + "valid_targets_mean": 2770.6, + "valid_targets_min": 629 + }, + { + "epoch": 6.724500525762355, + "grad_norm": 0.4955813216821868, + "learning_rate": 1.898996367432604e-07, + "loss": 0.09, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0703897699713707, + "step": 6395, + "valid_targets_mean": 3325.8, + "valid_targets_min": 755 + }, + { + "epoch": 6.729758149316509, + "grad_norm": 0.4469529556688231, + "learning_rate": 1.8275866693046263e-07, + "loss": 0.0751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06479282677173615, + "step": 6400, + "valid_targets_mean": 1951.9, + "valid_targets_min": 485 + }, + { + "epoch": 6.735015772870662, + "grad_norm": 0.45838941321833887, + "learning_rate": 1.7575393066471714e-07, + "loss": 0.0722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10326488316059113, + "step": 6405, + "valid_targets_mean": 3283.8, + "valid_targets_min": 605 + }, + { + "epoch": 6.740273396424816, + "grad_norm": 0.34698247212854855, + "learning_rate": 1.6888547610005802e-07, + "loss": 0.0645, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05749543383717537, + "step": 6410, + "valid_targets_mean": 4175.6, + "valid_targets_min": 848 + }, + { + "epoch": 6.745531019978969, + "grad_norm": 0.3779011861557256, + "learning_rate": 1.6215335045364656e-07, + "loss": 0.0631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.054585136473178864, + "step": 6415, + "valid_targets_mean": 3676.2, + "valid_targets_min": 884 + }, + { + "epoch": 6.750788643533123, + "grad_norm": 0.41789036578968497, + "learning_rate": 1.5555760000545595e-07, + "loss": 0.0673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06992784142494202, + "step": 6420, + "valid_targets_mean": 3627.8, + "valid_targets_min": 905 + }, + { + "epoch": 6.756046267087276, + "grad_norm": 0.42404967047065883, + "learning_rate": 1.4909827009794486e-07, + "loss": 0.0792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0973619669675827, + "step": 6425, + "valid_targets_mean": 2844.2, + "valid_targets_min": 808 + }, + { + "epoch": 6.76130389064143, + "grad_norm": 0.4336047857713409, + "learning_rate": 1.4277540513575328e-07, + "loss": 0.077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08207525312900543, + "step": 6430, + "valid_targets_mean": 3541.2, + "valid_targets_min": 1814 + }, + { + "epoch": 6.766561514195583, + "grad_norm": 0.5364441964738733, + "learning_rate": 1.3658904858538936e-07, + "loss": 0.0848, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09877628087997437, + "step": 6435, + "valid_targets_mean": 2889.8, + "valid_targets_min": 931 + }, + { + "epoch": 6.771819137749737, + "grad_norm": 0.36625564062895793, + "learning_rate": 1.3053924297493858e-07, + "loss": 0.0595, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06484304368495941, + "step": 6440, + "valid_targets_mean": 3737.8, + "valid_targets_min": 2695 + }, + { + "epoch": 6.77707676130389, + "grad_norm": 0.5412776019933817, + "learning_rate": 1.2462602989376404e-07, + "loss": 0.0726, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08294028043746948, + "step": 6445, + "valid_targets_mean": 1918.2, + "valid_targets_min": 664 + }, + { + "epoch": 6.782334384858045, + "grad_norm": 0.3984422160277871, + "learning_rate": 1.1884944999222658e-07, + "loss": 0.0709, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05830984562635422, + "step": 6450, + "valid_targets_mean": 3263.6, + "valid_targets_min": 1584 + }, + { + "epoch": 6.787592008412197, + "grad_norm": 0.39046213687264053, + "learning_rate": 1.1320954298140063e-07, + "loss": 0.0574, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0653320848941803, + "step": 6455, + "valid_targets_mean": 3247.5, + "valid_targets_min": 1003 + }, + { + "epoch": 6.792849631966352, + "grad_norm": 0.3886721526102773, + "learning_rate": 1.0770634763280552e-07, + "loss": 0.0715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06041385605931282, + "step": 6460, + "valid_targets_mean": 2887.5, + "valid_targets_min": 531 + }, + { + "epoch": 6.798107255520505, + "grad_norm": 0.42114525909840816, + "learning_rate": 1.023399017781368e-07, + "loss": 0.0704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06531903892755508, + "step": 6465, + "valid_targets_mean": 3249.1, + "valid_targets_min": 1003 + }, + { + "epoch": 6.803364879074659, + "grad_norm": 0.4703782400079518, + "learning_rate": 9.711024230900423e-08, + "loss": 0.0828, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06593647599220276, + "step": 6470, + "valid_targets_mean": 2108.2, + "valid_targets_min": 597 + }, + { + "epoch": 6.808622502628812, + "grad_norm": 0.4250471242642898, + "learning_rate": 9.201740517668089e-08, + "loss": 0.0693, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06148836016654968, + "step": 6475, + "valid_targets_mean": 2457.1, + "valid_targets_min": 665 + }, + { + "epoch": 6.813880126182966, + "grad_norm": 0.5638516909636172, + "learning_rate": 8.706142539185447e-08, + "loss": 0.0767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07306583970785141, + "step": 6480, + "valid_targets_mean": 1751.3, + "valid_targets_min": 829 + }, + { + "epoch": 6.819137749737119, + "grad_norm": 0.4540823384194377, + "learning_rate": 8.224233702438966e-08, + "loss": 0.0744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07954973727464676, + "step": 6485, + "valid_targets_mean": 1958.6, + "valid_targets_min": 650 + }, + { + "epoch": 6.8243953732912725, + "grad_norm": 0.4910919666792136, + "learning_rate": 7.756017320309283e-08, + "loss": 0.0775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07550451904535294, + "step": 6490, + "valid_targets_mean": 2192.6, + "valid_targets_min": 688 + }, + { + "epoch": 6.829652996845426, + "grad_norm": 0.4206177656567329, + "learning_rate": 7.301496611547665e-08, + "loss": 0.0638, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.041690800338983536, + "step": 6495, + "valid_targets_mean": 3516.8, + "valid_targets_min": 1836 + }, + { + "epoch": 6.8349106203995795, + "grad_norm": 0.3773738904949249, + "learning_rate": 6.86067470075491e-08, + "loss": 0.0653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.04687555879354477, + "step": 6500, + "valid_targets_mean": 3454.2, + "valid_targets_min": 2543 + }, + { + "epoch": 6.840168243953733, + "grad_norm": 0.4563756234236278, + "learning_rate": 6.433554618359816e-08, + "loss": 0.0896, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07210962474346161, + "step": 6505, + "valid_targets_mean": 2119.8, + "valid_targets_min": 446 + }, + { + "epoch": 6.8454258675078865, + "grad_norm": 0.5475546212908606, + "learning_rate": 6.020139300597638e-08, + "loss": 0.061, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.060839422047138214, + "step": 6510, + "valid_targets_mean": 2944.4, + "valid_targets_min": 681 + }, + { + "epoch": 6.85068349106204, + "grad_norm": 0.4804288102156721, + "learning_rate": 5.620431589490105e-08, + "loss": 0.0671, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0812021791934967, + "step": 6515, + "valid_targets_mean": 3749.1, + "valid_targets_min": 555 + }, + { + "epoch": 6.8559411146161935, + "grad_norm": 0.37197474506671335, + "learning_rate": 5.234434232826324e-08, + "loss": 0.0679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05977034568786621, + "step": 6520, + "valid_targets_mean": 3300.1, + "valid_targets_min": 484 + }, + { + "epoch": 6.861198738170347, + "grad_norm": 0.34878996125982237, + "learning_rate": 4.862149884143907e-08, + "loss": 0.0568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05171991139650345, + "step": 6525, + "valid_targets_mean": 4140.2, + "valid_targets_min": 861 + }, + { + "epoch": 6.8664563617245005, + "grad_norm": 0.5125166627753399, + "learning_rate": 4.503581102709875e-08, + "loss": 0.0686, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.10298755764961243, + "step": 6530, + "valid_targets_mean": 3947.6, + "valid_targets_min": 2020 + }, + { + "epoch": 6.871713985278654, + "grad_norm": 0.43331284972761114, + "learning_rate": 4.1587303535040035e-08, + "loss": 0.0704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0699128806591034, + "step": 6535, + "valid_targets_mean": 3724.9, + "valid_targets_min": 559 + }, + { + "epoch": 6.8769716088328074, + "grad_norm": 0.4372840859803025, + "learning_rate": 3.827600007201282e-08, + "loss": 0.0868, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06751592457294464, + "step": 6540, + "valid_targets_mean": 3279.9, + "valid_targets_min": 689 + }, + { + "epoch": 6.882229232386961, + "grad_norm": 0.5845756715897907, + "learning_rate": 3.510192340156149e-08, + "loss": 0.0898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18710404634475708, + "step": 6545, + "valid_targets_mean": 2807.4, + "valid_targets_min": 616 + }, + { + "epoch": 6.887486855941114, + "grad_norm": 0.4566154404397145, + "learning_rate": 3.20650953438606e-08, + "loss": 0.0517, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0638362467288971, + "step": 6550, + "valid_targets_mean": 2360.1, + "valid_targets_min": 602 + }, + { + "epoch": 6.892744479495268, + "grad_norm": 0.4271135835525021, + "learning_rate": 2.9165536775574987e-08, + "loss": 0.0847, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.0660814568400383, + "step": 6555, + "valid_targets_mean": 3388.2, + "valid_targets_min": 723 + }, + { + "epoch": 6.898002103049421, + "grad_norm": 0.4199480926290346, + "learning_rate": 2.6403267629706575e-08, + "loss": 0.0665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06848834455013275, + "step": 6560, + "valid_targets_mean": 3025.7, + "valid_targets_min": 596 + }, + { + "epoch": 6.903259726603575, + "grad_norm": 0.38400761009100814, + "learning_rate": 2.3778306895467785e-08, + "loss": 0.067, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05867878720164299, + "step": 6565, + "valid_targets_mean": 3426.3, + "valid_targets_min": 743 + }, + { + "epoch": 6.908517350157728, + "grad_norm": 0.4830151526372453, + "learning_rate": 2.1290672618135e-08, + "loss": 0.0793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.09053267538547516, + "step": 6570, + "valid_targets_mean": 2339.8, + "valid_targets_min": 706 + }, + { + "epoch": 6.913774973711882, + "grad_norm": 0.490713299022986, + "learning_rate": 1.8940381898946424e-08, + "loss": 0.104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1143651232123375, + "step": 6575, + "valid_targets_mean": 2480.9, + "valid_targets_min": 710 + }, + { + "epoch": 6.919032597266035, + "grad_norm": 0.41154086999628936, + "learning_rate": 1.6727450894959973e-08, + "loss": 0.0759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06120670586824417, + "step": 6580, + "valid_targets_mean": 2543.4, + "valid_targets_min": 647 + }, + { + "epoch": 6.92429022082019, + "grad_norm": 0.523892654936522, + "learning_rate": 1.4651894818966671e-08, + "loss": 0.07, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07591065764427185, + "step": 6585, + "valid_targets_mean": 1684.0, + "valid_targets_min": 716 + }, + { + "epoch": 6.929547844374342, + "grad_norm": 0.4250053502226287, + "learning_rate": 1.2713727939364096e-08, + "loss": 0.0661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06668030470609665, + "step": 6590, + "valid_targets_mean": 2971.1, + "valid_targets_min": 542 + }, + { + "epoch": 6.934805467928497, + "grad_norm": 0.47442923744851695, + "learning_rate": 1.091296358007643e-08, + "loss": 0.0654, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06776121258735657, + "step": 6595, + "valid_targets_mean": 2640.1, + "valid_targets_min": 832 + }, + { + "epoch": 6.94006309148265, + "grad_norm": 0.4650772531933131, + "learning_rate": 9.249614120450113e-09, + "loss": 0.0555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.061403002589941025, + "step": 6600, + "valid_targets_mean": 2104.2, + "valid_targets_min": 635 + }, + { + "epoch": 6.945320715036804, + "grad_norm": 0.432011520330159, + "learning_rate": 7.723690995171673e-09, + "loss": 0.0812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.058847442269325256, + "step": 6605, + "valid_targets_mean": 2595.4, + "valid_targets_min": 756 + }, + { + "epoch": 6.950578338590957, + "grad_norm": 0.48972432746720485, + "learning_rate": 6.335204694196684e-09, + "loss": 0.0607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.06145904213190079, + "step": 6610, + "valid_targets_mean": 2202.8, + "valid_targets_min": 704 + }, + { + "epoch": 6.955835962145111, + "grad_norm": 0.37478262650741523, + "learning_rate": 5.084164762667598e-09, + "loss": 0.0632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05222868174314499, + "step": 6615, + "valid_targets_mean": 2810.8, + "valid_targets_min": 529 + }, + { + "epoch": 6.961093585699264, + "grad_norm": 0.5353009120426401, + "learning_rate": 3.970579800853802e-09, + "loss": 0.0718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.08522634208202362, + "step": 6620, + "valid_targets_mean": 3550.4, + "valid_targets_min": 2017 + }, + { + "epoch": 6.966351209253418, + "grad_norm": 0.3423937100860187, + "learning_rate": 2.9944574640894398e-09, + "loss": 0.0631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.053340837359428406, + "step": 6625, + "valid_targets_mean": 4882.1, + "valid_targets_min": 516 + }, + { + "epoch": 6.971608832807571, + "grad_norm": 0.38481907467412085, + "learning_rate": 2.1558044627267847e-09, + "loss": 0.0609, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.060645852237939835, + "step": 6630, + "valid_targets_mean": 3580.6, + "valid_targets_min": 775 + }, + { + "epoch": 6.976866456361725, + "grad_norm": 0.3561829621876444, + "learning_rate": 1.4546265620785094e-09, + "loss": 0.0627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05833351984620094, + "step": 6635, + "valid_targets_mean": 3815.4, + "valid_targets_min": 605 + }, + { + "epoch": 6.982124079915878, + "grad_norm": 0.7199776089504053, + "learning_rate": 8.909285823910374e-10, + "loss": 0.1401, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20033405721187592, + "step": 6640, + "valid_targets_mean": 2739.1, + "valid_targets_min": 1473 + }, + { + "epoch": 6.987381703470032, + "grad_norm": 0.3796322592574655, + "learning_rate": 4.647143988067981e-10, + "loss": 0.0778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.062201619148254395, + "step": 6645, + "valid_targets_mean": 3168.9, + "valid_targets_min": 527 + }, + { + "epoch": 6.992639327024185, + "grad_norm": 0.3951104364519597, + "learning_rate": 1.7598694132869853e-10, + "loss": 0.0699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.07217049598693848, + "step": 6650, + "valid_targets_mean": 2708.9, + "valid_targets_min": 717 + }, + { + "epoch": 6.997896950578339, + "grad_norm": 0.48474520987982883, + "learning_rate": 2.474819481568247e-11, + "loss": 0.0692, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11167128384113312, + "step": 6655, + "valid_targets_mean": 2495.1, + "valid_targets_min": 927 + }, + { + "epoch": 7.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.05594291538000107, + "step": 6657, + "total_flos": 1429285307547648.0, + "train_loss": 0.14208543798229403, + "train_runtime": 24540.64, + "train_samples_per_second": 4.338, + "train_steps_per_second": 0.271, + "valid_targets_mean": 2843.0, + "valid_targets_min": 791 + } + ], + "logging_steps": 5, + "max_steps": 6657, + "num_input_tokens_seen": 0, + "num_train_epochs": 7, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1429285307547648.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..37c0d3f --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39bb5d263776b5abc87e87bf44ca4eb27947648dc412ac9700f60395955ac604 +size 8529 diff --git a/training_loss.png b/training_loss.png new file mode 100644 index 0000000..7043ad4 Binary files /dev/null and b/training_loss.png differ diff --git a/vocab.json b/vocab.json new file mode 100644 index 0000000..6c49fc6 --- /dev/null +++ b/vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910 +size 2776833