commit f3e1426944954e800fc4219b3fc09d2ae7d6e7ec Author: ModelHub XC Date: Sun May 10 17:38:27 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: open-sci/sft__ot30k_Qwen3-1.7B-Base-SFT-Tulu3-decontaminated Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..d0e278d --- /dev/null +++ b/README.md @@ -0,0 +1,63 @@ +--- +library_name: transformers +license: other +base_model: ali-elganzory/Qwen3-1.7B-Base-SFT-Tulu3-decontaminated +tags: +- llama-factory +- full +- generated_from_trainer +datasets: +- arrow +model-index: +- name: sft__f679a5c592c8dffb__b4bfd93d8848cb99e95a__qwen3-steps + results: [] +--- + + + +# sft__f679a5c592c8dffb__b4bfd93d8848cb99e95a__qwen3-steps + +This model is a fine-tuned version of [ali-elganzory/Qwen3-1.7B-Base-SFT-Tulu3-decontaminated](https://huggingface.co/ali-elganzory/Qwen3-1.7B-Base-SFT-Tulu3-decontaminated) on the /gpfs/scratch/ehpc524/ot/hf_hub/datasets/open-thoughts_open_thoughts3-1.2_m_30000_samples/default/0.0.0/f679a5c592c8dffb dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 4e-05 +- train_batch_size: 1 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 32 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 128 +- total_eval_batch_size: 256 +- optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_steps: 0.1 +- num_epochs: 5.0 + +### Training results + + + +### Framework versions + +- Transformers 5.5.0 +- Pytorch 2.10.0+cu128 +- Datasets 4.8.4 +- Tokenizers 0.22.2 diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..6591895 --- /dev/null +++ b/all_results.json @@ -0,0 +1,12 @@ +{ + "epoch": 5.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4691426753997803, + "total_flos": 1658037051588608.0, + "train_loss": 1.0214215231449046, + "train_runtime": 6105.894, + "train_samples_per_second": 24.566, + "train_steps_per_second": 0.192, + "valid_targets_mean": 13499.5, + "valid_targets_min": 3345 +} \ No newline at end of file diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..531b163 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,25 @@ + +{%- for message in messages -%} + {%- if message["role"] == "system" -%} + {{- "<|system|> +" + message["content"] + " +" -}} + {%- elif message["role"] == "user" -%} + {{- "<|user|> +" + message["content"] + " +" -}} + {%- elif message["role"] == "assistant" -%} + {%- if not loop.last -%} + {{- "<|assistant|> +" + message["content"] + eos_token + " +" -}} + {%- else -%} + {{- "<|assistant|> +" + message["content"] + eos_token -}} + {%- endif -%} + {%- endif -%} + {%- if loop.last and add_generation_prompt -%} + {{- "<|assistant|> +" -}} + {%- endif -%} +{%- endfor -%} diff --git a/config.json b/config.json new file mode 100644 index 0000000..fcd964a --- /dev/null +++ b/config.json @@ -0,0 +1,63 @@ +{ + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": null, + "dtype": "bfloat16", + "eos_token_id": 151645, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 6144, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen3", + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "pad_token_id": 151643, + "rms_norm_eps": 1e-06, + "rope_parameters": { + "rope_theta": 1000000, + "rope_type": "default" + }, + "sliding_window": null, + "tie_word_embeddings": true, + "transformers_version": "5.5.0", + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151680 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..e3d93ac --- /dev/null +++ b/generation_config.json @@ -0,0 +1,9 @@ +{ + "eos_token_id": [ + 151645, + 151643 + ], + "max_new_tokens": 2048, + "pad_token_id": 151643, + "transformers_version": "5.5.0" +} diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..b745974 --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d87eea7f22155ab353ad6650b9788e7ad509b4919aac245c3746e5abdb62af2f +size 3440137032 diff --git a/run_summary.json b/run_summary.json new file mode 100644 index 0000000..31aa97e --- /dev/null +++ b/run_summary.json @@ -0,0 +1,12 @@ +{ + "agent_name": "f679a5c592c8dffb", + "training_start": null, + "training_end": null, + "created_by": "DCAgent", + "base_model_name": "/gpfs/scratch/ehpc524/ot/hf_hub/models--ali-elganzory--Qwen3-1.7B-Base-SFT-Tulu3-decontaminated/snapshots/b4bfd93d8848cb99e95aca440452d4a50fafbfce/", + "dataset_name": "/gpfs/scratch/ehpc524/ot/hf_hub/datasets/open-thoughts_open_thoughts3-1.2_m_30000_samples/default/0.0.0/f679a5c592c8dffb", + "training_type": "SFT", + "training_parameters": "https://huggingface.co/mlfoundations-dev/sft__f679a5c592c8dffb__b4bfd93d8848cb99e95a__qwen3-steps/blob/main/config.json", + "wandb_link": null, + "traces_location_s3": null +} \ No newline at end of file diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..a17e7b7 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfb71699fc3ef7cacb0ce6b3d288d36e427ccb020f0ef5db77461a572d25481d +size 11423212 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..2c0952e --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,15 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "is_local": true, + "model_max_length": 16384, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..6591895 --- /dev/null +++ b/train_results.json @@ -0,0 +1,12 @@ +{ + "epoch": 5.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4691426753997803, + "total_flos": 1658037051588608.0, + "train_loss": 1.0214215231449046, + "train_runtime": 6105.894, + "train_samples_per_second": 24.566, + "train_steps_per_second": 0.192, + "valid_targets_mean": 13499.5, + "valid_targets_min": 3345 +} \ No newline at end of file diff --git a/trainer_log.jsonl b/trainer_log.jsonl new file mode 100644 index 0000000..c9edebb --- /dev/null +++ b/trainer_log.jsonl @@ -0,0 +1,1176 @@ +{"current_steps": 1, "total_steps": 1175, "loss": 1.3665432929992676, "lr": 0.0, "epoch": 0.0042643923240938165, "percentage": 0.09, "elapsed_time": "0:00:20", "remaining_time": "6:48:54"} +{"current_steps": 2, "total_steps": 1175, "loss": 1.3736059665679932, "lr": 3.3898305084745766e-07, "epoch": 0.008528784648187633, "percentage": 0.17, "elapsed_time": "0:00:27", "remaining_time": "4:28:27"} +{"current_steps": 3, "total_steps": 1175, "loss": 1.3179807662963867, "lr": 6.779661016949153e-07, "epoch": 0.01279317697228145, "percentage": 0.26, "elapsed_time": "0:00:33", "remaining_time": "3:37:25"} +{"current_steps": 4, "total_steps": 1175, "loss": 1.37325918674469, "lr": 1.016949152542373e-06, "epoch": 0.017057569296375266, "percentage": 0.34, "elapsed_time": "0:00:39", "remaining_time": "3:12:07"} +{"current_steps": 5, "total_steps": 1175, "loss": 1.3412843942642212, "lr": 1.3559322033898307e-06, "epoch": 0.021321961620469083, "percentage": 0.43, "elapsed_time": "0:00:44", "remaining_time": "2:54:05"} +{"current_steps": 6, "total_steps": 1175, "loss": 1.32222318649292, "lr": 1.6949152542372882e-06, "epoch": 0.0255863539445629, "percentage": 0.51, "elapsed_time": "0:00:50", "remaining_time": "2:42:27"} +{"current_steps": 7, "total_steps": 1175, "loss": 1.3199026584625244, "lr": 2.033898305084746e-06, "epoch": 0.029850746268656716, "percentage": 0.6, "elapsed_time": "0:00:55", "remaining_time": "2:34:47"} +{"current_steps": 8, "total_steps": 1175, "loss": 1.3366254568099976, "lr": 2.372881355932204e-06, "epoch": 0.03411513859275053, "percentage": 0.68, "elapsed_time": "0:01:00", "remaining_time": "2:27:59"} +{"current_steps": 9, "total_steps": 1175, "loss": 1.3531262874603271, "lr": 2.7118644067796613e-06, "epoch": 0.03837953091684435, "percentage": 0.77, "elapsed_time": "0:01:06", "remaining_time": "2:22:34"} +{"current_steps": 10, "total_steps": 1175, "loss": 1.3007540702819824, "lr": 3.0508474576271192e-06, "epoch": 0.042643923240938165, "percentage": 0.85, "elapsed_time": "0:01:11", "remaining_time": "2:18:12"} +{"current_steps": 11, "total_steps": 1175, "loss": 1.324140191078186, "lr": 3.3898305084745763e-06, "epoch": 0.046908315565031986, "percentage": 0.94, "elapsed_time": "0:01:16", "remaining_time": "2:14:34"} +{"current_steps": 12, "total_steps": 1175, "loss": 1.2988896369934082, "lr": 3.7288135593220342e-06, "epoch": 0.0511727078891258, "percentage": 1.02, "elapsed_time": "0:01:21", "remaining_time": "2:11:43"} +{"current_steps": 13, "total_steps": 1175, "loss": 1.3134877681732178, "lr": 4.067796610169492e-06, "epoch": 0.05543710021321962, "percentage": 1.11, "elapsed_time": "0:01:26", "remaining_time": "2:09:07"} +{"current_steps": 14, "total_steps": 1175, "loss": 1.3092262744903564, "lr": 4.40677966101695e-06, "epoch": 0.05970149253731343, "percentage": 1.19, "elapsed_time": "0:01:31", "remaining_time": "2:06:52"} +{"current_steps": 15, "total_steps": 1175, "loss": 1.3089406490325928, "lr": 4.745762711864408e-06, "epoch": 0.06396588486140725, "percentage": 1.28, "elapsed_time": "0:01:36", "remaining_time": "2:04:52"} +{"current_steps": 16, "total_steps": 1175, "loss": 1.2389326095581055, "lr": 5.084745762711865e-06, "epoch": 0.06823027718550106, "percentage": 1.36, "elapsed_time": "0:01:42", "remaining_time": "2:03:18"} +{"current_steps": 17, "total_steps": 1175, "loss": 1.2891302108764648, "lr": 5.423728813559323e-06, "epoch": 0.07249466950959488, "percentage": 1.45, "elapsed_time": "0:01:47", "remaining_time": "2:01:58"} +{"current_steps": 18, "total_steps": 1175, "loss": 1.3249022960662842, "lr": 5.7627118644067805e-06, "epoch": 0.0767590618336887, "percentage": 1.53, "elapsed_time": "0:01:52", "remaining_time": "2:00:37"} +{"current_steps": 19, "total_steps": 1175, "loss": 1.2798724174499512, "lr": 6.1016949152542385e-06, "epoch": 0.08102345415778252, "percentage": 1.62, "elapsed_time": "0:01:57", "remaining_time": "1:59:26"} +{"current_steps": 20, "total_steps": 1175, "loss": 1.3002123832702637, "lr": 6.440677966101695e-06, "epoch": 0.08528784648187633, "percentage": 1.7, "elapsed_time": "0:02:02", "remaining_time": "1:58:18"} +{"current_steps": 21, "total_steps": 1175, "loss": 1.2929916381835938, "lr": 6.779661016949153e-06, "epoch": 0.08955223880597014, "percentage": 1.79, "elapsed_time": "0:02:08", "remaining_time": "1:57:16"} +{"current_steps": 22, "total_steps": 1175, "loss": 1.2641987800598145, "lr": 7.1186440677966106e-06, "epoch": 0.09381663113006397, "percentage": 1.87, "elapsed_time": "0:02:13", "remaining_time": "1:56:23"} +{"current_steps": 23, "total_steps": 1175, "loss": 1.2987055778503418, "lr": 7.4576271186440685e-06, "epoch": 0.09808102345415778, "percentage": 1.96, "elapsed_time": "0:02:18", "remaining_time": "1:55:31"} +{"current_steps": 24, "total_steps": 1175, "loss": 1.2431552410125732, "lr": 7.796610169491526e-06, "epoch": 0.1023454157782516, "percentage": 2.04, "elapsed_time": "0:02:23", "remaining_time": "1:54:42"} +{"current_steps": 25, "total_steps": 1175, "loss": 1.238523006439209, "lr": 8.135593220338983e-06, "epoch": 0.10660980810234541, "percentage": 2.13, "elapsed_time": "0:02:29", "remaining_time": "1:54:16"} +{"current_steps": 26, "total_steps": 1175, "loss": 1.2010830640792847, "lr": 8.47457627118644e-06, "epoch": 0.11087420042643924, "percentage": 2.21, "elapsed_time": "0:02:34", "remaining_time": "1:53:36"} +{"current_steps": 27, "total_steps": 1175, "loss": 1.1597228050231934, "lr": 8.8135593220339e-06, "epoch": 0.11513859275053305, "percentage": 2.3, "elapsed_time": "0:02:39", "remaining_time": "1:52:54"} +{"current_steps": 28, "total_steps": 1175, "loss": 1.2551610469818115, "lr": 9.152542372881356e-06, "epoch": 0.11940298507462686, "percentage": 2.38, "elapsed_time": "0:02:44", "remaining_time": "1:52:18"} +{"current_steps": 29, "total_steps": 1175, "loss": 1.1766114234924316, "lr": 9.491525423728815e-06, "epoch": 0.12366737739872068, "percentage": 2.47, "elapsed_time": "0:02:49", "remaining_time": "1:51:41"} +{"current_steps": 30, "total_steps": 1175, "loss": 1.2415480613708496, "lr": 9.830508474576272e-06, "epoch": 0.1279317697228145, "percentage": 2.55, "elapsed_time": "0:02:54", "remaining_time": "1:51:11"} +{"current_steps": 31, "total_steps": 1175, "loss": 1.1535303592681885, "lr": 1.016949152542373e-05, "epoch": 0.13219616204690832, "percentage": 2.64, "elapsed_time": "0:02:59", "remaining_time": "1:50:39"} +{"current_steps": 32, "total_steps": 1175, "loss": 1.1601849794387817, "lr": 1.0508474576271188e-05, "epoch": 0.13646055437100213, "percentage": 2.72, "elapsed_time": "0:03:05", "remaining_time": "1:50:09"} +{"current_steps": 33, "total_steps": 1175, "loss": 1.2404439449310303, "lr": 1.0847457627118645e-05, "epoch": 0.14072494669509594, "percentage": 2.81, "elapsed_time": "0:03:10", "remaining_time": "1:49:41"} +{"current_steps": 34, "total_steps": 1175, "loss": 1.1997580528259277, "lr": 1.1186440677966102e-05, "epoch": 0.14498933901918976, "percentage": 2.89, "elapsed_time": "0:03:15", "remaining_time": "1:49:13"} +{"current_steps": 35, "total_steps": 1175, "loss": 1.1844682693481445, "lr": 1.1525423728813561e-05, "epoch": 0.14925373134328357, "percentage": 2.98, "elapsed_time": "0:03:20", "remaining_time": "1:48:48"} +{"current_steps": 36, "total_steps": 1175, "loss": 1.1813849210739136, "lr": 1.1864406779661018e-05, "epoch": 0.1535181236673774, "percentage": 3.06, "elapsed_time": "0:03:25", "remaining_time": "1:48:27"} +{"current_steps": 37, "total_steps": 1175, "loss": 1.1754765510559082, "lr": 1.2203389830508477e-05, "epoch": 0.15778251599147122, "percentage": 3.15, "elapsed_time": "0:03:30", "remaining_time": "1:48:04"} +{"current_steps": 38, "total_steps": 1175, "loss": 1.1840746402740479, "lr": 1.2542372881355932e-05, "epoch": 0.16204690831556504, "percentage": 3.23, "elapsed_time": "0:03:35", "remaining_time": "1:47:40"} +{"current_steps": 39, "total_steps": 1175, "loss": 1.2005257606506348, "lr": 1.288135593220339e-05, "epoch": 0.16631130063965885, "percentage": 3.32, "elapsed_time": "0:03:41", "remaining_time": "1:47:20"} +{"current_steps": 40, "total_steps": 1175, "loss": 1.1401925086975098, "lr": 1.3220338983050848e-05, "epoch": 0.17057569296375266, "percentage": 3.4, "elapsed_time": "0:03:46", "remaining_time": "1:46:58"} +{"current_steps": 41, "total_steps": 1175, "loss": 1.1670279502868652, "lr": 1.3559322033898305e-05, "epoch": 0.17484008528784648, "percentage": 3.49, "elapsed_time": "0:03:51", "remaining_time": "1:46:39"} +{"current_steps": 42, "total_steps": 1175, "loss": 1.130545973777771, "lr": 1.3898305084745764e-05, "epoch": 0.1791044776119403, "percentage": 3.57, "elapsed_time": "0:03:56", "remaining_time": "1:46:19"} +{"current_steps": 43, "total_steps": 1175, "loss": 1.1756665706634521, "lr": 1.4237288135593221e-05, "epoch": 0.18336886993603413, "percentage": 3.66, "elapsed_time": "0:04:01", "remaining_time": "1:45:59"} +{"current_steps": 44, "total_steps": 1175, "loss": 1.1239181756973267, "lr": 1.4576271186440678e-05, "epoch": 0.18763326226012794, "percentage": 3.74, "elapsed_time": "0:04:06", "remaining_time": "1:45:41"} +{"current_steps": 45, "total_steps": 1175, "loss": 1.1461116075515747, "lr": 1.4915254237288137e-05, "epoch": 0.19189765458422176, "percentage": 3.83, "elapsed_time": "0:04:11", "remaining_time": "1:45:24"} +{"current_steps": 46, "total_steps": 1175, "loss": 1.1434454917907715, "lr": 1.5254237288135594e-05, "epoch": 0.19616204690831557, "percentage": 3.91, "elapsed_time": "0:04:17", "remaining_time": "1:45:09"} +{"current_steps": 47, "total_steps": 1175, "loss": 1.1971436738967896, "lr": 1.5593220338983053e-05, "epoch": 0.20042643923240938, "percentage": 4.0, "elapsed_time": "0:04:22", "remaining_time": "1:44:52"} +{"current_steps": 48, "total_steps": 1175, "loss": 1.1085567474365234, "lr": 1.593220338983051e-05, "epoch": 0.2046908315565032, "percentage": 4.09, "elapsed_time": "0:04:27", "remaining_time": "1:44:35"} +{"current_steps": 49, "total_steps": 1175, "loss": 1.1042619943618774, "lr": 1.6271186440677967e-05, "epoch": 0.208955223880597, "percentage": 4.17, "elapsed_time": "0:04:32", "remaining_time": "1:44:20"} +{"current_steps": 50, "total_steps": 1175, "loss": 1.1156997680664062, "lr": 1.6610169491525424e-05, "epoch": 0.21321961620469082, "percentage": 4.26, "elapsed_time": "0:04:37", "remaining_time": "1:44:03"} +{"current_steps": 51, "total_steps": 1175, "loss": 1.137367844581604, "lr": 1.694915254237288e-05, "epoch": 0.21748400852878466, "percentage": 4.34, "elapsed_time": "0:04:42", "remaining_time": "1:43:49"} +{"current_steps": 52, "total_steps": 1175, "loss": 1.105331301689148, "lr": 1.728813559322034e-05, "epoch": 0.22174840085287847, "percentage": 4.43, "elapsed_time": "0:04:47", "remaining_time": "1:43:36"} +{"current_steps": 53, "total_steps": 1175, "loss": 1.0930171012878418, "lr": 1.76271186440678e-05, "epoch": 0.2260127931769723, "percentage": 4.51, "elapsed_time": "0:04:52", "remaining_time": "1:43:22"} +{"current_steps": 54, "total_steps": 1175, "loss": 1.1302450895309448, "lr": 1.7966101694915256e-05, "epoch": 0.2302771855010661, "percentage": 4.6, "elapsed_time": "0:04:58", "remaining_time": "1:43:08"} +{"current_steps": 55, "total_steps": 1175, "loss": 1.152329921722412, "lr": 1.8305084745762713e-05, "epoch": 0.2345415778251599, "percentage": 4.68, "elapsed_time": "0:05:03", "remaining_time": "1:42:55"} +{"current_steps": 56, "total_steps": 1175, "loss": 1.1638445854187012, "lr": 1.864406779661017e-05, "epoch": 0.23880597014925373, "percentage": 4.77, "elapsed_time": "0:05:08", "remaining_time": "1:42:48"} +{"current_steps": 57, "total_steps": 1175, "loss": 1.1405789852142334, "lr": 1.898305084745763e-05, "epoch": 0.24307036247334754, "percentage": 4.85, "elapsed_time": "0:05:13", "remaining_time": "1:42:35"} +{"current_steps": 58, "total_steps": 1175, "loss": 1.1632418632507324, "lr": 1.9322033898305087e-05, "epoch": 0.24733475479744135, "percentage": 4.94, "elapsed_time": "0:05:18", "remaining_time": "1:42:23"} +{"current_steps": 59, "total_steps": 1175, "loss": 1.1034752130508423, "lr": 1.9661016949152545e-05, "epoch": 0.2515991471215352, "percentage": 5.02, "elapsed_time": "0:05:24", "remaining_time": "1:42:09"} +{"current_steps": 60, "total_steps": 1175, "loss": 1.1177839040756226, "lr": 2e-05, "epoch": 0.255863539445629, "percentage": 5.11, "elapsed_time": "0:05:29", "remaining_time": "1:41:58"} +{"current_steps": 61, "total_steps": 1175, "loss": 1.1403781175613403, "lr": 2.033898305084746e-05, "epoch": 0.2601279317697228, "percentage": 5.19, "elapsed_time": "0:05:34", "remaining_time": "1:41:46"} +{"current_steps": 62, "total_steps": 1175, "loss": 1.1196714639663696, "lr": 2.0677966101694916e-05, "epoch": 0.26439232409381663, "percentage": 5.28, "elapsed_time": "0:05:39", "remaining_time": "1:41:35"} +{"current_steps": 63, "total_steps": 1175, "loss": 1.0709521770477295, "lr": 2.1016949152542376e-05, "epoch": 0.26865671641791045, "percentage": 5.36, "elapsed_time": "0:05:44", "remaining_time": "1:41:25"} +{"current_steps": 64, "total_steps": 1175, "loss": 1.1754413843154907, "lr": 2.1355932203389833e-05, "epoch": 0.27292110874200426, "percentage": 5.45, "elapsed_time": "0:05:49", "remaining_time": "1:41:14"} +{"current_steps": 65, "total_steps": 1175, "loss": 1.1435012817382812, "lr": 2.169491525423729e-05, "epoch": 0.2771855010660981, "percentage": 5.53, "elapsed_time": "0:05:55", "remaining_time": "1:41:03"} +{"current_steps": 66, "total_steps": 1175, "loss": 1.1329200267791748, "lr": 2.2033898305084748e-05, "epoch": 0.2814498933901919, "percentage": 5.62, "elapsed_time": "0:06:00", "remaining_time": "1:40:51"} +{"current_steps": 67, "total_steps": 1175, "loss": 1.1038789749145508, "lr": 2.2372881355932205e-05, "epoch": 0.2857142857142857, "percentage": 5.7, "elapsed_time": "0:06:05", "remaining_time": "1:40:40"} +{"current_steps": 68, "total_steps": 1175, "loss": 1.1426966190338135, "lr": 2.2711864406779665e-05, "epoch": 0.2899786780383795, "percentage": 5.79, "elapsed_time": "0:06:10", "remaining_time": "1:40:31"} +{"current_steps": 69, "total_steps": 1175, "loss": 1.1619558334350586, "lr": 2.3050847457627122e-05, "epoch": 0.2942430703624733, "percentage": 5.87, "elapsed_time": "0:06:15", "remaining_time": "1:40:20"} +{"current_steps": 70, "total_steps": 1175, "loss": 1.1545188426971436, "lr": 2.338983050847458e-05, "epoch": 0.29850746268656714, "percentage": 5.96, "elapsed_time": "0:06:20", "remaining_time": "1:40:09"} +{"current_steps": 71, "total_steps": 1175, "loss": 1.0535039901733398, "lr": 2.3728813559322036e-05, "epoch": 0.302771855010661, "percentage": 6.04, "elapsed_time": "0:06:25", "remaining_time": "1:40:00"} +{"current_steps": 72, "total_steps": 1175, "loss": 1.1028974056243896, "lr": 2.406779661016949e-05, "epoch": 0.3070362473347548, "percentage": 6.13, "elapsed_time": "0:06:31", "remaining_time": "1:39:50"} +{"current_steps": 73, "total_steps": 1175, "loss": 1.112041711807251, "lr": 2.4406779661016954e-05, "epoch": 0.31130063965884863, "percentage": 6.21, "elapsed_time": "0:06:36", "remaining_time": "1:39:38"} +{"current_steps": 74, "total_steps": 1175, "loss": 1.1347367763519287, "lr": 2.474576271186441e-05, "epoch": 0.31556503198294245, "percentage": 6.3, "elapsed_time": "0:06:41", "remaining_time": "1:39:28"} +{"current_steps": 75, "total_steps": 1175, "loss": 1.1315557956695557, "lr": 2.5084745762711865e-05, "epoch": 0.31982942430703626, "percentage": 6.38, "elapsed_time": "0:06:46", "remaining_time": "1:39:18"} +{"current_steps": 76, "total_steps": 1175, "loss": 1.1246960163116455, "lr": 2.5423728813559322e-05, "epoch": 0.32409381663113007, "percentage": 6.47, "elapsed_time": "0:06:51", "remaining_time": "1:39:09"} +{"current_steps": 77, "total_steps": 1175, "loss": 1.0751593112945557, "lr": 2.576271186440678e-05, "epoch": 0.3283582089552239, "percentage": 6.55, "elapsed_time": "0:06:56", "remaining_time": "1:38:59"} +{"current_steps": 78, "total_steps": 1175, "loss": 1.1375093460083008, "lr": 2.610169491525424e-05, "epoch": 0.3326226012793177, "percentage": 6.64, "elapsed_time": "0:07:01", "remaining_time": "1:38:50"} +{"current_steps": 79, "total_steps": 1175, "loss": 1.0897612571716309, "lr": 2.6440677966101696e-05, "epoch": 0.3368869936034115, "percentage": 6.72, "elapsed_time": "0:07:06", "remaining_time": "1:38:41"} +{"current_steps": 80, "total_steps": 1175, "loss": 1.0993152856826782, "lr": 2.6779661016949153e-05, "epoch": 0.3411513859275053, "percentage": 6.81, "elapsed_time": "0:07:12", "remaining_time": "1:38:33"} +{"current_steps": 81, "total_steps": 1175, "loss": 1.1197320222854614, "lr": 2.711864406779661e-05, "epoch": 0.34541577825159914, "percentage": 6.89, "elapsed_time": "0:07:17", "remaining_time": "1:38:24"} +{"current_steps": 82, "total_steps": 1175, "loss": 1.0771827697753906, "lr": 2.7457627118644068e-05, "epoch": 0.34968017057569295, "percentage": 6.98, "elapsed_time": "0:07:22", "remaining_time": "1:38:15"} +{"current_steps": 83, "total_steps": 1175, "loss": 1.0495096445083618, "lr": 2.7796610169491528e-05, "epoch": 0.35394456289978676, "percentage": 7.06, "elapsed_time": "0:07:27", "remaining_time": "1:38:07"} +{"current_steps": 84, "total_steps": 1175, "loss": 1.125932216644287, "lr": 2.8135593220338985e-05, "epoch": 0.3582089552238806, "percentage": 7.15, "elapsed_time": "0:07:32", "remaining_time": "1:37:59"} +{"current_steps": 85, "total_steps": 1175, "loss": 1.099273681640625, "lr": 2.8474576271186442e-05, "epoch": 0.3624733475479744, "percentage": 7.23, "elapsed_time": "0:07:38", "remaining_time": "1:37:53"} +{"current_steps": 86, "total_steps": 1175, "loss": 1.0962635278701782, "lr": 2.88135593220339e-05, "epoch": 0.36673773987206826, "percentage": 7.32, "elapsed_time": "0:07:43", "remaining_time": "1:37:45"} +{"current_steps": 87, "total_steps": 1175, "loss": 1.089374303817749, "lr": 2.9152542372881356e-05, "epoch": 0.37100213219616207, "percentage": 7.4, "elapsed_time": "0:07:48", "remaining_time": "1:37:37"} +{"current_steps": 88, "total_steps": 1175, "loss": 1.1430811882019043, "lr": 2.9491525423728817e-05, "epoch": 0.3752665245202559, "percentage": 7.49, "elapsed_time": "0:07:53", "remaining_time": "1:37:28"} +{"current_steps": 89, "total_steps": 1175, "loss": 1.0902841091156006, "lr": 2.9830508474576274e-05, "epoch": 0.3795309168443497, "percentage": 7.57, "elapsed_time": "0:07:58", "remaining_time": "1:37:22"} +{"current_steps": 90, "total_steps": 1175, "loss": 1.1340059041976929, "lr": 3.016949152542373e-05, "epoch": 0.3837953091684435, "percentage": 7.66, "elapsed_time": "0:08:03", "remaining_time": "1:37:13"} +{"current_steps": 91, "total_steps": 1175, "loss": 1.1458442211151123, "lr": 3.0508474576271188e-05, "epoch": 0.3880597014925373, "percentage": 7.74, "elapsed_time": "0:08:09", "remaining_time": "1:37:05"} +{"current_steps": 92, "total_steps": 1175, "loss": 1.048313856124878, "lr": 3.084745762711865e-05, "epoch": 0.39232409381663114, "percentage": 7.83, "elapsed_time": "0:08:14", "remaining_time": "1:36:57"} +{"current_steps": 93, "total_steps": 1175, "loss": 1.1587541103363037, "lr": 3.1186440677966106e-05, "epoch": 0.39658848614072495, "percentage": 7.91, "elapsed_time": "0:08:19", "remaining_time": "1:36:49"} +{"current_steps": 94, "total_steps": 1175, "loss": 1.0834836959838867, "lr": 3.152542372881356e-05, "epoch": 0.40085287846481876, "percentage": 8.0, "elapsed_time": "0:08:24", "remaining_time": "1:36:40"} +{"current_steps": 95, "total_steps": 1175, "loss": 1.0956907272338867, "lr": 3.186440677966102e-05, "epoch": 0.4051172707889126, "percentage": 8.09, "elapsed_time": "0:08:29", "remaining_time": "1:36:33"} +{"current_steps": 96, "total_steps": 1175, "loss": 1.0783826112747192, "lr": 3.2203389830508473e-05, "epoch": 0.4093816631130064, "percentage": 8.17, "elapsed_time": "0:08:34", "remaining_time": "1:36:24"} +{"current_steps": 97, "total_steps": 1175, "loss": 1.0799309015274048, "lr": 3.2542372881355934e-05, "epoch": 0.4136460554371002, "percentage": 8.26, "elapsed_time": "0:08:39", "remaining_time": "1:36:17"} +{"current_steps": 98, "total_steps": 1175, "loss": 1.0566236972808838, "lr": 3.2881355932203394e-05, "epoch": 0.417910447761194, "percentage": 8.34, "elapsed_time": "0:08:45", "remaining_time": "1:36:09"} +{"current_steps": 99, "total_steps": 1175, "loss": 1.0985007286071777, "lr": 3.322033898305085e-05, "epoch": 0.42217484008528783, "percentage": 8.43, "elapsed_time": "0:08:50", "remaining_time": "1:36:03"} +{"current_steps": 100, "total_steps": 1175, "loss": 1.083620548248291, "lr": 3.355932203389831e-05, "epoch": 0.42643923240938164, "percentage": 8.51, "elapsed_time": "0:08:55", "remaining_time": "1:35:55"} +{"current_steps": 101, "total_steps": 1175, "loss": 1.104163646697998, "lr": 3.389830508474576e-05, "epoch": 0.43070362473347545, "percentage": 8.6, "elapsed_time": "0:09:00", "remaining_time": "1:35:48"} +{"current_steps": 102, "total_steps": 1175, "loss": 1.1375088691711426, "lr": 3.423728813559322e-05, "epoch": 0.4349680170575693, "percentage": 8.68, "elapsed_time": "0:09:05", "remaining_time": "1:35:40"} +{"current_steps": 103, "total_steps": 1175, "loss": 1.106834888458252, "lr": 3.457627118644068e-05, "epoch": 0.43923240938166314, "percentage": 8.77, "elapsed_time": "0:09:10", "remaining_time": "1:35:33"} +{"current_steps": 104, "total_steps": 1175, "loss": 1.0580928325653076, "lr": 3.491525423728814e-05, "epoch": 0.44349680170575695, "percentage": 8.85, "elapsed_time": "0:09:16", "remaining_time": "1:35:26"} +{"current_steps": 105, "total_steps": 1175, "loss": 1.0532739162445068, "lr": 3.52542372881356e-05, "epoch": 0.44776119402985076, "percentage": 8.94, "elapsed_time": "0:09:21", "remaining_time": "1:35:19"} +{"current_steps": 106, "total_steps": 1175, "loss": 1.0518145561218262, "lr": 3.559322033898305e-05, "epoch": 0.4520255863539446, "percentage": 9.02, "elapsed_time": "0:09:26", "remaining_time": "1:35:11"} +{"current_steps": 107, "total_steps": 1175, "loss": 1.0239077806472778, "lr": 3.593220338983051e-05, "epoch": 0.4562899786780384, "percentage": 9.11, "elapsed_time": "0:09:31", "remaining_time": "1:35:04"} +{"current_steps": 108, "total_steps": 1175, "loss": 1.121671199798584, "lr": 3.627118644067797e-05, "epoch": 0.4605543710021322, "percentage": 9.19, "elapsed_time": "0:09:36", "remaining_time": "1:34:57"} +{"current_steps": 109, "total_steps": 1175, "loss": 1.1195881366729736, "lr": 3.6610169491525426e-05, "epoch": 0.464818763326226, "percentage": 9.28, "elapsed_time": "0:09:41", "remaining_time": "1:34:50"} +{"current_steps": 110, "total_steps": 1175, "loss": 1.0606300830841064, "lr": 3.6949152542372886e-05, "epoch": 0.4690831556503198, "percentage": 9.36, "elapsed_time": "0:09:47", "remaining_time": "1:34:43"} +{"current_steps": 111, "total_steps": 1175, "loss": 1.077075481414795, "lr": 3.728813559322034e-05, "epoch": 0.47334754797441364, "percentage": 9.45, "elapsed_time": "0:09:52", "remaining_time": "1:34:35"} +{"current_steps": 112, "total_steps": 1175, "loss": 1.1480183601379395, "lr": 3.76271186440678e-05, "epoch": 0.47761194029850745, "percentage": 9.53, "elapsed_time": "0:09:57", "remaining_time": "1:34:28"} +{"current_steps": 113, "total_steps": 1175, "loss": 1.1095085144042969, "lr": 3.796610169491526e-05, "epoch": 0.48187633262260127, "percentage": 9.62, "elapsed_time": "0:10:02", "remaining_time": "1:34:20"} +{"current_steps": 114, "total_steps": 1175, "loss": 1.121692419052124, "lr": 3.8305084745762714e-05, "epoch": 0.4861407249466951, "percentage": 9.7, "elapsed_time": "0:10:07", "remaining_time": "1:34:13"} +{"current_steps": 115, "total_steps": 1175, "loss": 1.062641978263855, "lr": 3.8644067796610175e-05, "epoch": 0.4904051172707889, "percentage": 9.79, "elapsed_time": "0:10:12", "remaining_time": "1:34:06"} +{"current_steps": 116, "total_steps": 1175, "loss": 1.091822624206543, "lr": 3.898305084745763e-05, "epoch": 0.4946695095948827, "percentage": 9.87, "elapsed_time": "0:10:17", "remaining_time": "1:33:59"} +{"current_steps": 117, "total_steps": 1175, "loss": 1.042148470878601, "lr": 3.932203389830509e-05, "epoch": 0.4989339019189765, "percentage": 9.96, "elapsed_time": "0:10:22", "remaining_time": "1:33:52"} +{"current_steps": 118, "total_steps": 1175, "loss": 1.0997896194458008, "lr": 3.966101694915255e-05, "epoch": 0.5031982942430704, "percentage": 10.04, "elapsed_time": "0:10:28", "remaining_time": "1:33:45"} +{"current_steps": 119, "total_steps": 1175, "loss": 1.087050199508667, "lr": 4e-05, "epoch": 0.5074626865671642, "percentage": 10.13, "elapsed_time": "0:10:33", "remaining_time": "1:33:38"} +{"current_steps": 120, "total_steps": 1175, "loss": 1.1498842239379883, "lr": 3.999991166161585e-05, "epoch": 0.511727078891258, "percentage": 10.21, "elapsed_time": "0:10:38", "remaining_time": "1:33:31"} +{"current_steps": 121, "total_steps": 1175, "loss": 1.090078592300415, "lr": 3.999964664724376e-05, "epoch": 0.5159914712153518, "percentage": 10.3, "elapsed_time": "0:10:43", "remaining_time": "1:33:25"} +{"current_steps": 122, "total_steps": 1175, "loss": 1.0715370178222656, "lr": 3.999920495922483e-05, "epoch": 0.5202558635394456, "percentage": 10.38, "elapsed_time": "0:10:48", "remaining_time": "1:33:17"} +{"current_steps": 123, "total_steps": 1175, "loss": 1.084350824356079, "lr": 3.999858660146085e-05, "epoch": 0.5245202558635395, "percentage": 10.47, "elapsed_time": "0:10:53", "remaining_time": "1:33:10"} +{"current_steps": 124, "total_steps": 1175, "loss": 1.070378303527832, "lr": 3.999779157941431e-05, "epoch": 0.5287846481876333, "percentage": 10.55, "elapsed_time": "0:10:58", "remaining_time": "1:33:04"} +{"current_steps": 125, "total_steps": 1175, "loss": 1.0922883749008179, "lr": 3.99968199001083e-05, "epoch": 0.5330490405117271, "percentage": 10.64, "elapsed_time": "0:11:03", "remaining_time": "1:32:57"} +{"current_steps": 126, "total_steps": 1175, "loss": 1.043330192565918, "lr": 3.999567157212646e-05, "epoch": 0.5373134328358209, "percentage": 10.72, "elapsed_time": "0:11:09", "remaining_time": "1:32:50"} +{"current_steps": 127, "total_steps": 1175, "loss": 1.0537865161895752, "lr": 3.9994346605612955e-05, "epoch": 0.5415778251599147, "percentage": 10.81, "elapsed_time": "0:11:14", "remaining_time": "1:32:43"} +{"current_steps": 128, "total_steps": 1175, "loss": 1.0584338903427124, "lr": 3.999284501227232e-05, "epoch": 0.5458422174840085, "percentage": 10.89, "elapsed_time": "0:11:19", "remaining_time": "1:32:36"} +{"current_steps": 129, "total_steps": 1175, "loss": 1.1101102828979492, "lr": 3.9991166805369393e-05, "epoch": 0.5501066098081023, "percentage": 10.98, "elapsed_time": "0:11:24", "remaining_time": "1:32:29"} +{"current_steps": 130, "total_steps": 1175, "loss": 1.1048550605773926, "lr": 3.9989311999729166e-05, "epoch": 0.5543710021321961, "percentage": 11.06, "elapsed_time": "0:11:29", "remaining_time": "1:32:23"} +{"current_steps": 131, "total_steps": 1175, "loss": 1.0684092044830322, "lr": 3.99872806117367e-05, "epoch": 0.55863539445629, "percentage": 11.15, "elapsed_time": "0:11:34", "remaining_time": "1:32:16"} +{"current_steps": 132, "total_steps": 1175, "loss": 1.06695556640625, "lr": 3.998507265933696e-05, "epoch": 0.5628997867803838, "percentage": 11.23, "elapsed_time": "0:11:39", "remaining_time": "1:32:10"} +{"current_steps": 133, "total_steps": 1175, "loss": 1.1031931638717651, "lr": 3.9982688162034624e-05, "epoch": 0.5671641791044776, "percentage": 11.32, "elapsed_time": "0:11:44", "remaining_time": "1:32:03"} +{"current_steps": 134, "total_steps": 1175, "loss": 1.1016449928283691, "lr": 3.998012714089397e-05, "epoch": 0.5714285714285714, "percentage": 11.4, "elapsed_time": "0:11:50", "remaining_time": "1:31:56"} +{"current_steps": 135, "total_steps": 1175, "loss": 1.0966145992279053, "lr": 3.997738961853863e-05, "epoch": 0.5756929637526652, "percentage": 11.49, "elapsed_time": "0:11:55", "remaining_time": "1:31:49"} +{"current_steps": 136, "total_steps": 1175, "loss": 1.055633783340454, "lr": 3.9974475619151445e-05, "epoch": 0.579957356076759, "percentage": 11.57, "elapsed_time": "0:12:00", "remaining_time": "1:31:43"} +{"current_steps": 137, "total_steps": 1175, "loss": 1.036048412322998, "lr": 3.997138516847422e-05, "epoch": 0.5842217484008528, "percentage": 11.66, "elapsed_time": "0:12:05", "remaining_time": "1:31:37"} +{"current_steps": 138, "total_steps": 1175, "loss": 1.1014585494995117, "lr": 3.9968118293807476e-05, "epoch": 0.5884861407249466, "percentage": 11.74, "elapsed_time": "0:12:10", "remaining_time": "1:31:30"} +{"current_steps": 139, "total_steps": 1175, "loss": 1.0730267763137817, "lr": 3.996467502401028e-05, "epoch": 0.5927505330490405, "percentage": 11.83, "elapsed_time": "0:12:15", "remaining_time": "1:31:24"} +{"current_steps": 140, "total_steps": 1175, "loss": 1.0382061004638672, "lr": 3.9961055389499904e-05, "epoch": 0.5970149253731343, "percentage": 11.91, "elapsed_time": "0:12:21", "remaining_time": "1:31:18"} +{"current_steps": 141, "total_steps": 1175, "loss": 1.0722460746765137, "lr": 3.995725942225162e-05, "epoch": 0.6012793176972282, "percentage": 12.0, "elapsed_time": "0:12:26", "remaining_time": "1:31:12"} +{"current_steps": 142, "total_steps": 1175, "loss": 1.0785164833068848, "lr": 3.995328715579839e-05, "epoch": 0.605543710021322, "percentage": 12.09, "elapsed_time": "0:12:31", "remaining_time": "1:31:05"} +{"current_steps": 143, "total_steps": 1175, "loss": 1.0484199523925781, "lr": 3.994913862523058e-05, "epoch": 0.6098081023454158, "percentage": 12.17, "elapsed_time": "0:12:36", "remaining_time": "1:30:59"} +{"current_steps": 144, "total_steps": 1175, "loss": 1.0812712907791138, "lr": 3.9944813867195624e-05, "epoch": 0.6140724946695096, "percentage": 12.26, "elapsed_time": "0:12:41", "remaining_time": "1:30:53"} +{"current_steps": 145, "total_steps": 1175, "loss": 1.1256424188613892, "lr": 3.9940312919897744e-05, "epoch": 0.6183368869936035, "percentage": 12.34, "elapsed_time": "0:12:46", "remaining_time": "1:30:46"} +{"current_steps": 146, "total_steps": 1175, "loss": 1.050999402999878, "lr": 3.993563582309759e-05, "epoch": 0.6226012793176973, "percentage": 12.43, "elapsed_time": "0:12:51", "remaining_time": "1:30:40"} +{"current_steps": 147, "total_steps": 1175, "loss": 1.0804365873336792, "lr": 3.993078261811186e-05, "epoch": 0.6268656716417911, "percentage": 12.51, "elapsed_time": "0:12:57", "remaining_time": "1:30:34"} +{"current_steps": 148, "total_steps": 1175, "loss": 1.130464792251587, "lr": 3.9925753347813e-05, "epoch": 0.6311300639658849, "percentage": 12.6, "elapsed_time": "0:13:02", "remaining_time": "1:30:27"} +{"current_steps": 149, "total_steps": 1175, "loss": 1.1157536506652832, "lr": 3.992054805662876e-05, "epoch": 0.6353944562899787, "percentage": 12.68, "elapsed_time": "0:13:07", "remaining_time": "1:30:20"} +{"current_steps": 150, "total_steps": 1175, "loss": 1.0605140924453735, "lr": 3.991516679054185e-05, "epoch": 0.6396588486140725, "percentage": 12.77, "elapsed_time": "0:13:12", "remaining_time": "1:30:14"} +{"current_steps": 151, "total_steps": 1175, "loss": 1.0675933361053467, "lr": 3.9909609597089496e-05, "epoch": 0.6439232409381663, "percentage": 12.85, "elapsed_time": "0:13:17", "remaining_time": "1:30:08"} +{"current_steps": 152, "total_steps": 1175, "loss": 1.0996378660202026, "lr": 3.9903876525363055e-05, "epoch": 0.6481876332622601, "percentage": 12.94, "elapsed_time": "0:13:22", "remaining_time": "1:30:02"} +{"current_steps": 153, "total_steps": 1175, "loss": 1.072128415107727, "lr": 3.989796762600755e-05, "epoch": 0.652452025586354, "percentage": 13.02, "elapsed_time": "0:13:27", "remaining_time": "1:29:55"} +{"current_steps": 154, "total_steps": 1175, "loss": 1.1608052253723145, "lr": 3.9891882951221246e-05, "epoch": 0.6567164179104478, "percentage": 13.11, "elapsed_time": "0:13:32", "remaining_time": "1:29:48"} +{"current_steps": 155, "total_steps": 1175, "loss": 1.043982982635498, "lr": 3.988562255475518e-05, "epoch": 0.6609808102345416, "percentage": 13.19, "elapsed_time": "0:13:37", "remaining_time": "1:29:42"} +{"current_steps": 156, "total_steps": 1175, "loss": 1.0851833820343018, "lr": 3.987918649191268e-05, "epoch": 0.6652452025586354, "percentage": 13.28, "elapsed_time": "0:13:43", "remaining_time": "1:29:36"} +{"current_steps": 157, "total_steps": 1175, "loss": 1.0836174488067627, "lr": 3.987257481954888e-05, "epoch": 0.6695095948827292, "percentage": 13.36, "elapsed_time": "0:13:48", "remaining_time": "1:29:30"} +{"current_steps": 158, "total_steps": 1175, "loss": 1.0783438682556152, "lr": 3.9865787596070236e-05, "epoch": 0.673773987206823, "percentage": 13.45, "elapsed_time": "0:13:53", "remaining_time": "1:29:23"} +{"current_steps": 159, "total_steps": 1175, "loss": 1.0921587944030762, "lr": 3.9858824881433975e-05, "epoch": 0.6780383795309168, "percentage": 13.53, "elapsed_time": "0:13:58", "remaining_time": "1:29:17"} +{"current_steps": 160, "total_steps": 1175, "loss": 1.1289031505584717, "lr": 3.9851686737147585e-05, "epoch": 0.6823027718550106, "percentage": 13.62, "elapsed_time": "0:14:03", "remaining_time": "1:29:11"} +{"current_steps": 161, "total_steps": 1175, "loss": 1.0423595905303955, "lr": 3.9844373226268305e-05, "epoch": 0.6865671641791045, "percentage": 13.7, "elapsed_time": "0:14:08", "remaining_time": "1:29:05"} +{"current_steps": 162, "total_steps": 1175, "loss": 1.109586477279663, "lr": 3.983688441340249e-05, "epoch": 0.6908315565031983, "percentage": 13.79, "elapsed_time": "0:14:13", "remaining_time": "1:28:59"} +{"current_steps": 163, "total_steps": 1175, "loss": 1.0937280654907227, "lr": 3.98292203647051e-05, "epoch": 0.6950959488272921, "percentage": 13.87, "elapsed_time": "0:14:18", "remaining_time": "1:28:53"} +{"current_steps": 164, "total_steps": 1175, "loss": 1.0845508575439453, "lr": 3.982138114787912e-05, "epoch": 0.6993603411513859, "percentage": 13.96, "elapsed_time": "0:14:24", "remaining_time": "1:28:47"} +{"current_steps": 165, "total_steps": 1175, "loss": 1.057763695716858, "lr": 3.98133668321749e-05, "epoch": 0.7036247334754797, "percentage": 14.04, "elapsed_time": "0:14:29", "remaining_time": "1:28:41"} +{"current_steps": 166, "total_steps": 1175, "loss": 1.1555659770965576, "lr": 3.980517748838963e-05, "epoch": 0.7078891257995735, "percentage": 14.13, "elapsed_time": "0:14:34", "remaining_time": "1:28:34"} +{"current_steps": 167, "total_steps": 1175, "loss": 1.0604078769683838, "lr": 3.979681318886664e-05, "epoch": 0.7121535181236673, "percentage": 14.21, "elapsed_time": "0:14:39", "remaining_time": "1:28:28"} +{"current_steps": 168, "total_steps": 1175, "loss": 1.0976730585098267, "lr": 3.978827400749481e-05, "epoch": 0.7164179104477612, "percentage": 14.3, "elapsed_time": "0:14:44", "remaining_time": "1:28:22"} +{"current_steps": 169, "total_steps": 1175, "loss": 1.1516985893249512, "lr": 3.977956001970788e-05, "epoch": 0.720682302771855, "percentage": 14.38, "elapsed_time": "0:14:49", "remaining_time": "1:28:16"} +{"current_steps": 170, "total_steps": 1175, "loss": 1.090247392654419, "lr": 3.977067130248381e-05, "epoch": 0.7249466950959488, "percentage": 14.47, "elapsed_time": "0:14:54", "remaining_time": "1:28:10"} +{"current_steps": 171, "total_steps": 1175, "loss": 1.0230085849761963, "lr": 3.9761607934344095e-05, "epoch": 0.7292110874200426, "percentage": 14.55, "elapsed_time": "0:15:00", "remaining_time": "1:28:04"} +{"current_steps": 172, "total_steps": 1175, "loss": 1.0465095043182373, "lr": 3.975236999535306e-05, "epoch": 0.7334754797441365, "percentage": 14.64, "elapsed_time": "0:15:05", "remaining_time": "1:27:59"} +{"current_steps": 173, "total_steps": 1175, "loss": 1.0935044288635254, "lr": 3.974295756711717e-05, "epoch": 0.7377398720682303, "percentage": 14.72, "elapsed_time": "0:15:10", "remaining_time": "1:27:53"} +{"current_steps": 174, "total_steps": 1175, "loss": 1.115492820739746, "lr": 3.9733370732784296e-05, "epoch": 0.7420042643923241, "percentage": 14.81, "elapsed_time": "0:15:15", "remaining_time": "1:27:47"} +{"current_steps": 175, "total_steps": 1175, "loss": 1.1452744007110596, "lr": 3.972360957704298e-05, "epoch": 0.746268656716418, "percentage": 14.89, "elapsed_time": "0:15:20", "remaining_time": "1:27:41"} +{"current_steps": 176, "total_steps": 1175, "loss": 1.0543792247772217, "lr": 3.97136741861217e-05, "epoch": 0.7505330490405118, "percentage": 14.98, "elapsed_time": "0:15:25", "remaining_time": "1:27:35"} +{"current_steps": 177, "total_steps": 1175, "loss": 1.089555025100708, "lr": 3.970356464778808e-05, "epoch": 0.7547974413646056, "percentage": 15.06, "elapsed_time": "0:15:30", "remaining_time": "1:27:29"} +{"current_steps": 178, "total_steps": 1175, "loss": 1.077789068222046, "lr": 3.969328105134817e-05, "epoch": 0.7590618336886994, "percentage": 15.15, "elapsed_time": "0:15:36", "remaining_time": "1:27:23"} +{"current_steps": 179, "total_steps": 1175, "loss": 1.0952332019805908, "lr": 3.9682823487645584e-05, "epoch": 0.7633262260127932, "percentage": 15.23, "elapsed_time": "0:15:41", "remaining_time": "1:27:17"} +{"current_steps": 180, "total_steps": 1175, "loss": 1.05403733253479, "lr": 3.9672192049060745e-05, "epoch": 0.767590618336887, "percentage": 15.32, "elapsed_time": "0:15:46", "remaining_time": "1:27:11"} +{"current_steps": 181, "total_steps": 1175, "loss": 1.091308355331421, "lr": 3.966138682951008e-05, "epoch": 0.7718550106609808, "percentage": 15.4, "elapsed_time": "0:15:51", "remaining_time": "1:27:05"} +{"current_steps": 182, "total_steps": 1175, "loss": 1.1261098384857178, "lr": 3.9650407924445147e-05, "epoch": 0.7761194029850746, "percentage": 15.49, "elapsed_time": "0:15:56", "remaining_time": "1:26:59"} +{"current_steps": 183, "total_steps": 1175, "loss": 1.0834410190582275, "lr": 3.963925543085181e-05, "epoch": 0.7803837953091685, "percentage": 15.57, "elapsed_time": "0:16:01", "remaining_time": "1:26:54"} +{"current_steps": 184, "total_steps": 1175, "loss": 1.0013039112091064, "lr": 3.96279294472494e-05, "epoch": 0.7846481876332623, "percentage": 15.66, "elapsed_time": "0:16:07", "remaining_time": "1:26:48"} +{"current_steps": 185, "total_steps": 1175, "loss": 1.0587292909622192, "lr": 3.961643007368984e-05, "epoch": 0.7889125799573561, "percentage": 15.74, "elapsed_time": "0:16:12", "remaining_time": "1:26:42"} +{"current_steps": 186, "total_steps": 1175, "loss": 1.1106066703796387, "lr": 3.960475741175671e-05, "epoch": 0.7931769722814499, "percentage": 15.83, "elapsed_time": "0:16:17", "remaining_time": "1:26:37"} +{"current_steps": 187, "total_steps": 1175, "loss": 1.0540430545806885, "lr": 3.959291156456444e-05, "epoch": 0.7974413646055437, "percentage": 15.91, "elapsed_time": "0:16:22", "remaining_time": "1:26:31"} +{"current_steps": 188, "total_steps": 1175, "loss": 1.039066195487976, "lr": 3.9580892636757334e-05, "epoch": 0.8017057569296375, "percentage": 16.0, "elapsed_time": "0:16:27", "remaining_time": "1:26:25"} +{"current_steps": 189, "total_steps": 1175, "loss": 1.0647523403167725, "lr": 3.9568700734508645e-05, "epoch": 0.8059701492537313, "percentage": 16.09, "elapsed_time": "0:16:32", "remaining_time": "1:26:19"} +{"current_steps": 190, "total_steps": 1175, "loss": 1.0983606576919556, "lr": 3.955633596551967e-05, "epoch": 0.8102345415778252, "percentage": 16.17, "elapsed_time": "0:16:37", "remaining_time": "1:26:13"} +{"current_steps": 191, "total_steps": 1175, "loss": 1.0771918296813965, "lr": 3.9543798439018776e-05, "epoch": 0.814498933901919, "percentage": 16.26, "elapsed_time": "0:16:43", "remaining_time": "1:26:08"} +{"current_steps": 192, "total_steps": 1175, "loss": 1.0556185245513916, "lr": 3.953108826576046e-05, "epoch": 0.8187633262260128, "percentage": 16.34, "elapsed_time": "0:16:48", "remaining_time": "1:26:02"} +{"current_steps": 193, "total_steps": 1175, "loss": 1.0616166591644287, "lr": 3.9518205558024334e-05, "epoch": 0.8230277185501066, "percentage": 16.43, "elapsed_time": "0:16:53", "remaining_time": "1:25:56"} +{"current_steps": 194, "total_steps": 1175, "loss": 1.057494878768921, "lr": 3.9505150429614154e-05, "epoch": 0.8272921108742004, "percentage": 16.51, "elapsed_time": "0:16:58", "remaining_time": "1:25:50"} +{"current_steps": 195, "total_steps": 1175, "loss": 1.0607072114944458, "lr": 3.949192299585681e-05, "epoch": 0.8315565031982942, "percentage": 16.6, "elapsed_time": "0:17:03", "remaining_time": "1:25:45"} +{"current_steps": 196, "total_steps": 1175, "loss": 1.062612533569336, "lr": 3.9478523373601325e-05, "epoch": 0.835820895522388, "percentage": 16.68, "elapsed_time": "0:17:08", "remaining_time": "1:25:39"} +{"current_steps": 197, "total_steps": 1175, "loss": 1.0612168312072754, "lr": 3.946495168121778e-05, "epoch": 0.8400852878464818, "percentage": 16.77, "elapsed_time": "0:17:14", "remaining_time": "1:25:33"} +{"current_steps": 198, "total_steps": 1175, "loss": 1.047271728515625, "lr": 3.9451208038596325e-05, "epoch": 0.8443496801705757, "percentage": 16.85, "elapsed_time": "0:17:19", "remaining_time": "1:25:27"} +{"current_steps": 199, "total_steps": 1175, "loss": 1.062045931816101, "lr": 3.943729256714608e-05, "epoch": 0.8486140724946695, "percentage": 16.94, "elapsed_time": "0:17:24", "remaining_time": "1:25:21"} +{"current_steps": 200, "total_steps": 1175, "loss": 1.0524030923843384, "lr": 3.942320538979408e-05, "epoch": 0.8528784648187633, "percentage": 17.02, "elapsed_time": "0:17:29", "remaining_time": "1:25:15"} +{"current_steps": 201, "total_steps": 1175, "loss": 1.0022788047790527, "lr": 3.9408946630984144e-05, "epoch": 0.8571428571428571, "percentage": 17.11, "elapsed_time": "0:17:34", "remaining_time": "1:25:09"} +{"current_steps": 202, "total_steps": 1175, "loss": 1.0410001277923584, "lr": 3.939451641667587e-05, "epoch": 0.8614072494669509, "percentage": 17.19, "elapsed_time": "0:17:39", "remaining_time": "1:25:04"} +{"current_steps": 203, "total_steps": 1175, "loss": 1.0641515254974365, "lr": 3.937991487434342e-05, "epoch": 0.8656716417910447, "percentage": 17.28, "elapsed_time": "0:17:44", "remaining_time": "1:24:58"} +{"current_steps": 204, "total_steps": 1175, "loss": 1.13057541847229, "lr": 3.9365142132974484e-05, "epoch": 0.8699360341151386, "percentage": 17.36, "elapsed_time": "0:17:49", "remaining_time": "1:24:52"} +{"current_steps": 205, "total_steps": 1175, "loss": 1.0646021366119385, "lr": 3.935019832306905e-05, "epoch": 0.8742004264392325, "percentage": 17.45, "elapsed_time": "0:17:55", "remaining_time": "1:24:46"} +{"current_steps": 206, "total_steps": 1175, "loss": 1.0732862949371338, "lr": 3.933508357663832e-05, "epoch": 0.8784648187633263, "percentage": 17.53, "elapsed_time": "0:18:00", "remaining_time": "1:24:41"} +{"current_steps": 207, "total_steps": 1175, "loss": 1.0405564308166504, "lr": 3.9319798027203544e-05, "epoch": 0.8827292110874201, "percentage": 17.62, "elapsed_time": "0:18:05", "remaining_time": "1:24:35"} +{"current_steps": 208, "total_steps": 1175, "loss": 1.0693408250808716, "lr": 3.930434180979478e-05, "epoch": 0.8869936034115139, "percentage": 17.7, "elapsed_time": "0:18:10", "remaining_time": "1:24:29"} +{"current_steps": 209, "total_steps": 1175, "loss": 1.0629595518112183, "lr": 3.928871506094975e-05, "epoch": 0.8912579957356077, "percentage": 17.79, "elapsed_time": "0:18:15", "remaining_time": "1:24:23"} +{"current_steps": 210, "total_steps": 1175, "loss": 1.0810761451721191, "lr": 3.927291791871264e-05, "epoch": 0.8955223880597015, "percentage": 17.87, "elapsed_time": "0:18:20", "remaining_time": "1:24:18"} +{"current_steps": 211, "total_steps": 1175, "loss": 1.069692611694336, "lr": 3.925695052263284e-05, "epoch": 0.8997867803837953, "percentage": 17.96, "elapsed_time": "0:18:25", "remaining_time": "1:24:12"} +{"current_steps": 212, "total_steps": 1175, "loss": 1.043962836265564, "lr": 3.924081301376375e-05, "epoch": 0.9040511727078892, "percentage": 18.04, "elapsed_time": "0:18:31", "remaining_time": "1:24:06"} +{"current_steps": 213, "total_steps": 1175, "loss": 1.0576932430267334, "lr": 3.9224505534661525e-05, "epoch": 0.908315565031983, "percentage": 18.13, "elapsed_time": "0:18:36", "remaining_time": "1:24:00"} +{"current_steps": 214, "total_steps": 1175, "loss": 1.07790207862854, "lr": 3.92080282293838e-05, "epoch": 0.9125799573560768, "percentage": 18.21, "elapsed_time": "0:18:41", "remaining_time": "1:23:55"} +{"current_steps": 215, "total_steps": 1175, "loss": 1.0570908784866333, "lr": 3.9191381243488417e-05, "epoch": 0.9168443496801706, "percentage": 18.3, "elapsed_time": "0:18:46", "remaining_time": "1:23:50"} +{"current_steps": 216, "total_steps": 1175, "loss": 1.0729179382324219, "lr": 3.9174564724032167e-05, "epoch": 0.9211087420042644, "percentage": 18.38, "elapsed_time": "0:18:51", "remaining_time": "1:23:44"} +{"current_steps": 217, "total_steps": 1175, "loss": 1.0518217086791992, "lr": 3.9157578819569455e-05, "epoch": 0.9253731343283582, "percentage": 18.47, "elapsed_time": "0:18:56", "remaining_time": "1:23:38"} +{"current_steps": 218, "total_steps": 1175, "loss": 1.046657919883728, "lr": 3.9140423680151036e-05, "epoch": 0.929637526652452, "percentage": 18.55, "elapsed_time": "0:19:01", "remaining_time": "1:23:32"} +{"current_steps": 219, "total_steps": 1175, "loss": 1.1028754711151123, "lr": 3.9123099457322625e-05, "epoch": 0.9339019189765458, "percentage": 18.64, "elapsed_time": "0:19:07", "remaining_time": "1:23:27"} +{"current_steps": 220, "total_steps": 1175, "loss": 1.0750335454940796, "lr": 3.9105606304123605e-05, "epoch": 0.9381663113006397, "percentage": 18.72, "elapsed_time": "0:19:12", "remaining_time": "1:23:21"} +{"current_steps": 221, "total_steps": 1175, "loss": 1.0630940198898315, "lr": 3.908794437508567e-05, "epoch": 0.9424307036247335, "percentage": 18.81, "elapsed_time": "0:19:17", "remaining_time": "1:23:16"} +{"current_steps": 222, "total_steps": 1175, "loss": 1.0762577056884766, "lr": 3.907011382623145e-05, "epoch": 0.9466950959488273, "percentage": 18.89, "elapsed_time": "0:19:22", "remaining_time": "1:23:10"} +{"current_steps": 223, "total_steps": 1175, "loss": 1.065406322479248, "lr": 3.905211481507313e-05, "epoch": 0.9509594882729211, "percentage": 18.98, "elapsed_time": "0:19:27", "remaining_time": "1:23:04"} +{"current_steps": 224, "total_steps": 1175, "loss": 1.0659347772598267, "lr": 3.903394750061106e-05, "epoch": 0.9552238805970149, "percentage": 19.06, "elapsed_time": "0:19:32", "remaining_time": "1:22:59"} +{"current_steps": 225, "total_steps": 1175, "loss": 1.0389450788497925, "lr": 3.9015612043332375e-05, "epoch": 0.9594882729211087, "percentage": 19.15, "elapsed_time": "0:19:37", "remaining_time": "1:22:53"} +{"current_steps": 226, "total_steps": 1175, "loss": 1.03799307346344, "lr": 3.8997108605209535e-05, "epoch": 0.9637526652452025, "percentage": 19.23, "elapsed_time": "0:19:43", "remaining_time": "1:22:48"} +{"current_steps": 227, "total_steps": 1175, "loss": 1.0135846138000488, "lr": 3.897843734969891e-05, "epoch": 0.9680170575692963, "percentage": 19.32, "elapsed_time": "0:19:48", "remaining_time": "1:22:42"} +{"current_steps": 228, "total_steps": 1175, "loss": 1.0680896043777466, "lr": 3.895959844173937e-05, "epoch": 0.9722814498933902, "percentage": 19.4, "elapsed_time": "0:19:53", "remaining_time": "1:22:36"} +{"current_steps": 229, "total_steps": 1175, "loss": 1.0651593208312988, "lr": 3.8940592047750774e-05, "epoch": 0.976545842217484, "percentage": 19.49, "elapsed_time": "0:19:58", "remaining_time": "1:22:30"} +{"current_steps": 230, "total_steps": 1175, "loss": 1.0773837566375732, "lr": 3.892141833563255e-05, "epoch": 0.9808102345415778, "percentage": 19.57, "elapsed_time": "0:20:03", "remaining_time": "1:22:25"} +{"current_steps": 231, "total_steps": 1175, "loss": 1.0360264778137207, "lr": 3.8902077474762155e-05, "epoch": 0.9850746268656716, "percentage": 19.66, "elapsed_time": "0:20:08", "remaining_time": "1:22:19"} +{"current_steps": 232, "total_steps": 1175, "loss": 1.0562363862991333, "lr": 3.888256963599364e-05, "epoch": 0.9893390191897654, "percentage": 19.74, "elapsed_time": "0:20:13", "remaining_time": "1:22:14"} +{"current_steps": 233, "total_steps": 1175, "loss": 1.0481302738189697, "lr": 3.886289499165609e-05, "epoch": 0.9936034115138592, "percentage": 19.83, "elapsed_time": "0:20:19", "remaining_time": "1:22:08"} +{"current_steps": 234, "total_steps": 1175, "loss": 1.079208493232727, "lr": 3.884305371555215e-05, "epoch": 0.997867803837953, "percentage": 19.91, "elapsed_time": "0:20:24", "remaining_time": "1:22:03"} +{"current_steps": 235, "total_steps": 1175, "loss": 1.1089693307876587, "lr": 3.882304598295643e-05, "epoch": 1.0, "percentage": 20.0, "elapsed_time": "0:20:26", "remaining_time": "1:21:47"} +{"current_steps": 236, "total_steps": 1175, "loss": 1.0655412673950195, "lr": 3.880287197061402e-05, "epoch": 1.004264392324094, "percentage": 20.09, "elapsed_time": "0:20:34", "remaining_time": "1:21:51"} +{"current_steps": 237, "total_steps": 1175, "loss": 1.045609951019287, "lr": 3.878253185673888e-05, "epoch": 1.0085287846481876, "percentage": 20.17, "elapsed_time": "0:20:39", "remaining_time": "1:21:45"} +{"current_steps": 238, "total_steps": 1175, "loss": 1.04897141456604, "lr": 3.876202582101229e-05, "epoch": 1.0127931769722816, "percentage": 20.26, "elapsed_time": "0:20:44", "remaining_time": "1:21:40"} +{"current_steps": 239, "total_steps": 1175, "loss": 1.0530734062194824, "lr": 3.874135404458125e-05, "epoch": 1.0170575692963753, "percentage": 20.34, "elapsed_time": "0:20:49", "remaining_time": "1:21:35"} +{"current_steps": 240, "total_steps": 1175, "loss": 1.0717518329620361, "lr": 3.8720516710056905e-05, "epoch": 1.0213219616204692, "percentage": 20.43, "elapsed_time": "0:20:55", "remaining_time": "1:21:29"} +{"current_steps": 241, "total_steps": 1175, "loss": 1.0576286315917969, "lr": 3.8699514001512885e-05, "epoch": 1.0255863539445629, "percentage": 20.51, "elapsed_time": "0:21:00", "remaining_time": "1:21:23"} +{"current_steps": 242, "total_steps": 1175, "loss": 1.026517629623413, "lr": 3.867834610448374e-05, "epoch": 1.0298507462686568, "percentage": 20.6, "elapsed_time": "0:21:05", "remaining_time": "1:21:18"} +{"current_steps": 243, "total_steps": 1175, "loss": 1.0431249141693115, "lr": 3.865701320596324e-05, "epoch": 1.0341151385927505, "percentage": 20.68, "elapsed_time": "0:21:10", "remaining_time": "1:21:13"} +{"current_steps": 244, "total_steps": 1175, "loss": 1.0454719066619873, "lr": 3.863551549440277e-05, "epoch": 1.0383795309168444, "percentage": 20.77, "elapsed_time": "0:21:15", "remaining_time": "1:21:07"} +{"current_steps": 245, "total_steps": 1175, "loss": 1.014958143234253, "lr": 3.861385315970964e-05, "epoch": 1.0426439232409381, "percentage": 20.85, "elapsed_time": "0:21:20", "remaining_time": "1:21:01"} +{"current_steps": 246, "total_steps": 1175, "loss": 1.0368402004241943, "lr": 3.859202639324542e-05, "epoch": 1.046908315565032, "percentage": 20.94, "elapsed_time": "0:21:25", "remaining_time": "1:20:55"} +{"current_steps": 247, "total_steps": 1175, "loss": 1.029822826385498, "lr": 3.8570035387824214e-05, "epoch": 1.0511727078891258, "percentage": 21.02, "elapsed_time": "0:21:31", "remaining_time": "1:20:50"} +{"current_steps": 248, "total_steps": 1175, "loss": 1.0420994758605957, "lr": 3.8547880337711036e-05, "epoch": 1.0554371002132197, "percentage": 21.11, "elapsed_time": "0:21:36", "remaining_time": "1:20:45"} +{"current_steps": 249, "total_steps": 1175, "loss": 1.092591404914856, "lr": 3.8525561438620016e-05, "epoch": 1.0597014925373134, "percentage": 21.19, "elapsed_time": "0:21:41", "remaining_time": "1:20:39"} +{"current_steps": 250, "total_steps": 1175, "loss": 1.0229907035827637, "lr": 3.850307888771269e-05, "epoch": 1.0639658848614073, "percentage": 21.28, "elapsed_time": "0:21:46", "remaining_time": "1:20:34"} +{"current_steps": 251, "total_steps": 1175, "loss": 1.036152958869934, "lr": 3.848043288359629e-05, "epoch": 1.068230277185501, "percentage": 21.36, "elapsed_time": "0:21:51", "remaining_time": "1:20:28"} +{"current_steps": 252, "total_steps": 1175, "loss": 1.0302397012710571, "lr": 3.8457623626321944e-05, "epoch": 1.072494669509595, "percentage": 21.45, "elapsed_time": "0:21:56", "remaining_time": "1:20:23"} +{"current_steps": 253, "total_steps": 1175, "loss": 1.0454399585723877, "lr": 3.843465131738296e-05, "epoch": 1.0767590618336886, "percentage": 21.53, "elapsed_time": "0:22:01", "remaining_time": "1:20:17"} +{"current_steps": 254, "total_steps": 1175, "loss": 1.025251865386963, "lr": 3.8411516159713e-05, "epoch": 1.0810234541577826, "percentage": 21.62, "elapsed_time": "0:22:07", "remaining_time": "1:20:12"} +{"current_steps": 255, "total_steps": 1175, "loss": 0.9942444562911987, "lr": 3.838821835768431e-05, "epoch": 1.0852878464818763, "percentage": 21.7, "elapsed_time": "0:22:12", "remaining_time": "1:20:06"} +{"current_steps": 256, "total_steps": 1175, "loss": 1.0361202955245972, "lr": 3.83647581171059e-05, "epoch": 1.0895522388059702, "percentage": 21.79, "elapsed_time": "0:22:17", "remaining_time": "1:20:01"} +{"current_steps": 257, "total_steps": 1175, "loss": 1.018730878829956, "lr": 3.8341135645221744e-05, "epoch": 1.0938166311300639, "percentage": 21.87, "elapsed_time": "0:22:22", "remaining_time": "1:19:55"} +{"current_steps": 258, "total_steps": 1175, "loss": 1.0082337856292725, "lr": 3.831735115070895e-05, "epoch": 1.0980810234541578, "percentage": 21.96, "elapsed_time": "0:22:27", "remaining_time": "1:19:50"} +{"current_steps": 259, "total_steps": 1175, "loss": 1.0284898281097412, "lr": 3.8293404843675904e-05, "epoch": 1.1023454157782515, "percentage": 22.04, "elapsed_time": "0:22:32", "remaining_time": "1:19:44"} +{"current_steps": 260, "total_steps": 1175, "loss": 1.0335543155670166, "lr": 3.8269296935660395e-05, "epoch": 1.1066098081023454, "percentage": 22.13, "elapsed_time": "0:22:37", "remaining_time": "1:19:39"} +{"current_steps": 261, "total_steps": 1175, "loss": 1.0389349460601807, "lr": 3.82450276396278e-05, "epoch": 1.1108742004264391, "percentage": 22.21, "elapsed_time": "0:22:43", "remaining_time": "1:19:33"} +{"current_steps": 262, "total_steps": 1175, "loss": 1.0075374841690063, "lr": 3.822059716996916e-05, "epoch": 1.115138592750533, "percentage": 22.3, "elapsed_time": "0:22:48", "remaining_time": "1:19:27"} +{"current_steps": 263, "total_steps": 1175, "loss": 0.9890443086624146, "lr": 3.819600574249929e-05, "epoch": 1.1194029850746268, "percentage": 22.38, "elapsed_time": "0:22:53", "remaining_time": "1:19:22"} +{"current_steps": 264, "total_steps": 1175, "loss": 1.054425597190857, "lr": 3.817125357445489e-05, "epoch": 1.1236673773987207, "percentage": 22.47, "elapsed_time": "0:22:58", "remaining_time": "1:19:16"} +{"current_steps": 265, "total_steps": 1175, "loss": 1.0141037702560425, "lr": 3.814634088449261e-05, "epoch": 1.1279317697228146, "percentage": 22.55, "elapsed_time": "0:23:03", "remaining_time": "1:19:11"} +{"current_steps": 266, "total_steps": 1175, "loss": 1.0493249893188477, "lr": 3.812126789268712e-05, "epoch": 1.1321961620469083, "percentage": 22.64, "elapsed_time": "0:23:08", "remaining_time": "1:19:05"} +{"current_steps": 267, "total_steps": 1175, "loss": 1.0849034786224365, "lr": 3.80960348205292e-05, "epoch": 1.136460554371002, "percentage": 22.72, "elapsed_time": "0:23:13", "remaining_time": "1:19:00"} +{"current_steps": 268, "total_steps": 1175, "loss": 1.0932810306549072, "lr": 3.807064189092372e-05, "epoch": 1.140724946695096, "percentage": 22.81, "elapsed_time": "0:23:18", "remaining_time": "1:18:54"} +{"current_steps": 269, "total_steps": 1175, "loss": 0.9904080629348755, "lr": 3.804508932818771e-05, "epoch": 1.1449893390191899, "percentage": 22.89, "elapsed_time": "0:23:24", "remaining_time": "1:18:48"} +{"current_steps": 270, "total_steps": 1175, "loss": 1.034711241722107, "lr": 3.801937735804838e-05, "epoch": 1.1492537313432836, "percentage": 22.98, "elapsed_time": "0:23:29", "remaining_time": "1:18:43"} +{"current_steps": 271, "total_steps": 1175, "loss": 1.0160858631134033, "lr": 3.799350620764114e-05, "epoch": 1.1535181236673775, "percentage": 23.06, "elapsed_time": "0:23:34", "remaining_time": "1:18:37"} +{"current_steps": 272, "total_steps": 1175, "loss": 1.003743052482605, "lr": 3.7967476105507535e-05, "epoch": 1.1577825159914712, "percentage": 23.15, "elapsed_time": "0:23:39", "remaining_time": "1:18:32"} +{"current_steps": 273, "total_steps": 1175, "loss": 1.0129845142364502, "lr": 3.7941287281593284e-05, "epoch": 1.1620469083155651, "percentage": 23.23, "elapsed_time": "0:23:44", "remaining_time": "1:18:27"} +{"current_steps": 274, "total_steps": 1175, "loss": 1.0502171516418457, "lr": 3.7914939967246227e-05, "epoch": 1.1663113006396588, "percentage": 23.32, "elapsed_time": "0:23:49", "remaining_time": "1:18:21"} +{"current_steps": 275, "total_steps": 1175, "loss": 1.0400927066802979, "lr": 3.7888434395214285e-05, "epoch": 1.1705756929637527, "percentage": 23.4, "elapsed_time": "0:23:54", "remaining_time": "1:18:16"} +{"current_steps": 276, "total_steps": 1175, "loss": 1.0391854047775269, "lr": 3.786177079964339e-05, "epoch": 1.1748400852878464, "percentage": 23.49, "elapsed_time": "0:24:00", "remaining_time": "1:18:10"} +{"current_steps": 277, "total_steps": 1175, "loss": 1.0601963996887207, "lr": 3.783494941607544e-05, "epoch": 1.1791044776119404, "percentage": 23.57, "elapsed_time": "0:24:05", "remaining_time": "1:18:05"} +{"current_steps": 278, "total_steps": 1175, "loss": 1.0429885387420654, "lr": 3.780797048144621e-05, "epoch": 1.183368869936034, "percentage": 23.66, "elapsed_time": "0:24:10", "remaining_time": "1:17:59"} +{"current_steps": 279, "total_steps": 1175, "loss": 1.050649881362915, "lr": 3.7780834234083236e-05, "epoch": 1.187633262260128, "percentage": 23.74, "elapsed_time": "0:24:15", "remaining_time": "1:17:54"} +{"current_steps": 280, "total_steps": 1175, "loss": 1.016859769821167, "lr": 3.775354091370376e-05, "epoch": 1.1918976545842217, "percentage": 23.83, "elapsed_time": "0:24:20", "remaining_time": "1:17:48"} +{"current_steps": 281, "total_steps": 1175, "loss": 0.9994684457778931, "lr": 3.772609076141255e-05, "epoch": 1.1961620469083156, "percentage": 23.91, "elapsed_time": "0:24:25", "remaining_time": "1:17:43"} +{"current_steps": 282, "total_steps": 1175, "loss": 1.0300400257110596, "lr": 3.769848401969982e-05, "epoch": 1.2004264392324093, "percentage": 24.0, "elapsed_time": "0:24:30", "remaining_time": "1:17:37"} +{"current_steps": 283, "total_steps": 1175, "loss": 1.0938390493392944, "lr": 3.767072093243907e-05, "epoch": 1.2046908315565032, "percentage": 24.09, "elapsed_time": "0:24:36", "remaining_time": "1:17:32"} +{"current_steps": 284, "total_steps": 1175, "loss": 1.1198451519012451, "lr": 3.7642801744884915e-05, "epoch": 1.208955223880597, "percentage": 24.17, "elapsed_time": "0:24:41", "remaining_time": "1:17:26"} +{"current_steps": 285, "total_steps": 1175, "loss": 0.9990887641906738, "lr": 3.761472670367096e-05, "epoch": 1.2132196162046909, "percentage": 24.26, "elapsed_time": "0:24:46", "remaining_time": "1:17:21"} +{"current_steps": 286, "total_steps": 1175, "loss": 1.0035858154296875, "lr": 3.758649605680758e-05, "epoch": 1.2174840085287846, "percentage": 24.34, "elapsed_time": "0:24:51", "remaining_time": "1:17:15"} +{"current_steps": 287, "total_steps": 1175, "loss": 1.0343601703643799, "lr": 3.755811005367974e-05, "epoch": 1.2217484008528785, "percentage": 24.43, "elapsed_time": "0:24:56", "remaining_time": "1:17:10"} +{"current_steps": 288, "total_steps": 1175, "loss": 1.036698818206787, "lr": 3.752956894504481e-05, "epoch": 1.2260127931769722, "percentage": 24.51, "elapsed_time": "0:25:01", "remaining_time": "1:17:04"} +{"current_steps": 289, "total_steps": 1175, "loss": 1.0224305391311646, "lr": 3.750087298303033e-05, "epoch": 1.2302771855010661, "percentage": 24.6, "elapsed_time": "0:25:06", "remaining_time": "1:16:59"} +{"current_steps": 290, "total_steps": 1175, "loss": 1.040165662765503, "lr": 3.7472022421131795e-05, "epoch": 1.2345415778251598, "percentage": 24.68, "elapsed_time": "0:25:11", "remaining_time": "1:16:53"} +{"current_steps": 291, "total_steps": 1175, "loss": 1.0578022003173828, "lr": 3.7443017514210406e-05, "epoch": 1.2388059701492538, "percentage": 24.77, "elapsed_time": "0:25:17", "remaining_time": "1:16:48"} +{"current_steps": 292, "total_steps": 1175, "loss": 1.0782644748687744, "lr": 3.7413858518490825e-05, "epoch": 1.2430703624733475, "percentage": 24.85, "elapsed_time": "0:25:22", "remaining_time": "1:16:42"} +{"current_steps": 293, "total_steps": 1175, "loss": 1.0355021953582764, "lr": 3.7384545691558895e-05, "epoch": 1.2473347547974414, "percentage": 24.94, "elapsed_time": "0:25:27", "remaining_time": "1:16:37"} +{"current_steps": 294, "total_steps": 1175, "loss": 1.0233511924743652, "lr": 3.735507929235941e-05, "epoch": 1.251599147121535, "percentage": 25.02, "elapsed_time": "0:25:32", "remaining_time": "1:16:31"} +{"current_steps": 295, "total_steps": 1175, "loss": 1.0197874307632446, "lr": 3.732545958119378e-05, "epoch": 1.255863539445629, "percentage": 25.11, "elapsed_time": "0:25:37", "remaining_time": "1:16:26"} +{"current_steps": 296, "total_steps": 1175, "loss": 1.052213191986084, "lr": 3.729568681971774e-05, "epoch": 1.260127931769723, "percentage": 25.19, "elapsed_time": "0:25:42", "remaining_time": "1:16:20"} +{"current_steps": 297, "total_steps": 1175, "loss": 1.075683355331421, "lr": 3.726576127093905e-05, "epoch": 1.2643923240938166, "percentage": 25.28, "elapsed_time": "0:25:47", "remaining_time": "1:16:15"} +{"current_steps": 298, "total_steps": 1175, "loss": 1.021393060684204, "lr": 3.7235683199215177e-05, "epoch": 1.2686567164179103, "percentage": 25.36, "elapsed_time": "0:25:52", "remaining_time": "1:16:09"} +{"current_steps": 299, "total_steps": 1175, "loss": 1.0408051013946533, "lr": 3.7205452870250944e-05, "epoch": 1.2729211087420043, "percentage": 25.45, "elapsed_time": "0:25:57", "remaining_time": "1:16:04"} +{"current_steps": 300, "total_steps": 1175, "loss": 0.9903295040130615, "lr": 3.7175070551096204e-05, "epoch": 1.2771855010660982, "percentage": 25.53, "elapsed_time": "0:26:03", "remaining_time": "1:15:58"} +{"current_steps": 301, "total_steps": 1175, "loss": 1.0472469329833984, "lr": 3.7144536510143436e-05, "epoch": 1.2814498933901919, "percentage": 25.62, "elapsed_time": "0:26:08", "remaining_time": "1:15:53"} +{"current_steps": 302, "total_steps": 1175, "loss": 1.0415022373199463, "lr": 3.711385101712544e-05, "epoch": 1.2857142857142856, "percentage": 25.7, "elapsed_time": "0:26:13", "remaining_time": "1:15:48"} +{"current_steps": 303, "total_steps": 1175, "loss": 1.0422717332839966, "lr": 3.708301434311289e-05, "epoch": 1.2899786780383795, "percentage": 25.79, "elapsed_time": "0:26:18", "remaining_time": "1:15:42"} +{"current_steps": 304, "total_steps": 1175, "loss": 1.00404691696167, "lr": 3.7052026760511996e-05, "epoch": 1.2942430703624734, "percentage": 25.87, "elapsed_time": "0:26:23", "remaining_time": "1:15:37"} +{"current_steps": 305, "total_steps": 1175, "loss": 1.0046180486679077, "lr": 3.7020888543062046e-05, "epoch": 1.2985074626865671, "percentage": 25.96, "elapsed_time": "0:26:28", "remaining_time": "1:15:31"} +{"current_steps": 306, "total_steps": 1175, "loss": 1.0792807340621948, "lr": 3.6989599965833024e-05, "epoch": 1.302771855010661, "percentage": 26.04, "elapsed_time": "0:26:33", "remaining_time": "1:15:26"} +{"current_steps": 307, "total_steps": 1175, "loss": 1.0605202913284302, "lr": 3.695816130522317e-05, "epoch": 1.3070362473347548, "percentage": 26.13, "elapsed_time": "0:26:38", "remaining_time": "1:15:20"} +{"current_steps": 308, "total_steps": 1175, "loss": 1.0294058322906494, "lr": 3.692657283895651e-05, "epoch": 1.3113006396588487, "percentage": 26.21, "elapsed_time": "0:26:44", "remaining_time": "1:15:15"} +{"current_steps": 309, "total_steps": 1175, "loss": 1.0614323616027832, "lr": 3.689483484608048e-05, "epoch": 1.3155650319829424, "percentage": 26.3, "elapsed_time": "0:26:49", "remaining_time": "1:15:10"} +{"current_steps": 310, "total_steps": 1175, "loss": 1.096575140953064, "lr": 3.6862947606963364e-05, "epoch": 1.3198294243070363, "percentage": 26.38, "elapsed_time": "0:26:54", "remaining_time": "1:15:04"} +{"current_steps": 311, "total_steps": 1175, "loss": 1.038635015487671, "lr": 3.6830911403291885e-05, "epoch": 1.32409381663113, "percentage": 26.47, "elapsed_time": "0:26:59", "remaining_time": "1:14:59"} +{"current_steps": 312, "total_steps": 1175, "loss": 1.0621452331542969, "lr": 3.679872651806869e-05, "epoch": 1.328358208955224, "percentage": 26.55, "elapsed_time": "0:27:04", "remaining_time": "1:14:54"} +{"current_steps": 313, "total_steps": 1175, "loss": 1.00935697555542, "lr": 3.676639323560986e-05, "epoch": 1.3326226012793176, "percentage": 26.64, "elapsed_time": "0:27:09", "remaining_time": "1:14:48"} +{"current_steps": 314, "total_steps": 1175, "loss": 1.0217959880828857, "lr": 3.6733911841542365e-05, "epoch": 1.3368869936034116, "percentage": 26.72, "elapsed_time": "0:27:15", "remaining_time": "1:14:43"} +{"current_steps": 315, "total_steps": 1175, "loss": 1.0224769115447998, "lr": 3.6701282622801626e-05, "epoch": 1.3411513859275053, "percentage": 26.81, "elapsed_time": "0:27:20", "remaining_time": "1:14:37"} +{"current_steps": 316, "total_steps": 1175, "loss": 1.0295928716659546, "lr": 3.666850586762886e-05, "epoch": 1.3454157782515992, "percentage": 26.89, "elapsed_time": "0:27:25", "remaining_time": "1:14:32"} +{"current_steps": 317, "total_steps": 1175, "loss": 1.0240471363067627, "lr": 3.663558186556863e-05, "epoch": 1.349680170575693, "percentage": 26.98, "elapsed_time": "0:27:30", "remaining_time": "1:14:27"} +{"current_steps": 318, "total_steps": 1175, "loss": 0.9949407577514648, "lr": 3.660251090746627e-05, "epoch": 1.3539445628997868, "percentage": 27.06, "elapsed_time": "0:27:35", "remaining_time": "1:14:21"} +{"current_steps": 319, "total_steps": 1175, "loss": 1.0744171142578125, "lr": 3.656929328546526e-05, "epoch": 1.3582089552238805, "percentage": 27.15, "elapsed_time": "0:27:40", "remaining_time": "1:14:16"} +{"current_steps": 320, "total_steps": 1175, "loss": 1.0498393774032593, "lr": 3.653592929300471e-05, "epoch": 1.3624733475479744, "percentage": 27.23, "elapsed_time": "0:27:45", "remaining_time": "1:14:10"} +{"current_steps": 321, "total_steps": 1175, "loss": 1.0534286499023438, "lr": 3.650241922481675e-05, "epoch": 1.3667377398720681, "percentage": 27.32, "elapsed_time": "0:27:50", "remaining_time": "1:14:05"} +{"current_steps": 322, "total_steps": 1175, "loss": 1.0516881942749023, "lr": 3.6468763376923886e-05, "epoch": 1.371002132196162, "percentage": 27.4, "elapsed_time": "0:27:56", "remaining_time": "1:14:00"} +{"current_steps": 323, "total_steps": 1175, "loss": 1.030785083770752, "lr": 3.6434962046636464e-05, "epoch": 1.375266524520256, "percentage": 27.49, "elapsed_time": "0:28:01", "remaining_time": "1:13:54"} +{"current_steps": 324, "total_steps": 1175, "loss": 0.9938373565673828, "lr": 3.6401015532549957e-05, "epoch": 1.3795309168443497, "percentage": 27.57, "elapsed_time": "0:28:06", "remaining_time": "1:13:49"} +{"current_steps": 325, "total_steps": 1175, "loss": 1.1035189628601074, "lr": 3.6366924134542386e-05, "epoch": 1.3837953091684434, "percentage": 27.66, "elapsed_time": "0:28:11", "remaining_time": "1:13:44"} +{"current_steps": 326, "total_steps": 1175, "loss": 1.023439645767212, "lr": 3.633268815377166e-05, "epoch": 1.3880597014925373, "percentage": 27.74, "elapsed_time": "0:28:16", "remaining_time": "1:13:38"} +{"current_steps": 327, "total_steps": 1175, "loss": 1.060289978981018, "lr": 3.6298307892672895e-05, "epoch": 1.3923240938166312, "percentage": 27.83, "elapsed_time": "0:28:21", "remaining_time": "1:13:33"} +{"current_steps": 328, "total_steps": 1175, "loss": 1.0092850923538208, "lr": 3.626378365495577e-05, "epoch": 1.396588486140725, "percentage": 27.91, "elapsed_time": "0:28:26", "remaining_time": "1:13:28"} +{"current_steps": 329, "total_steps": 1175, "loss": 1.021120309829712, "lr": 3.622911574560181e-05, "epoch": 1.4008528784648187, "percentage": 28.0, "elapsed_time": "0:28:32", "remaining_time": "1:13:23"} +{"current_steps": 330, "total_steps": 1175, "loss": 1.0362828969955444, "lr": 3.6194304470861744e-05, "epoch": 1.4051172707889126, "percentage": 28.09, "elapsed_time": "0:28:37", "remaining_time": "1:13:18"} +{"current_steps": 331, "total_steps": 1175, "loss": 1.0330214500427246, "lr": 3.615935013825272e-05, "epoch": 1.4093816631130065, "percentage": 28.17, "elapsed_time": "0:28:43", "remaining_time": "1:13:14"} +{"current_steps": 332, "total_steps": 1175, "loss": 1.032184362411499, "lr": 3.612425305655569e-05, "epoch": 1.4136460554371002, "percentage": 28.26, "elapsed_time": "0:28:49", "remaining_time": "1:13:10"} +{"current_steps": 333, "total_steps": 1175, "loss": 1.016597867012024, "lr": 3.6089013535812593e-05, "epoch": 1.417910447761194, "percentage": 28.34, "elapsed_time": "0:28:55", "remaining_time": "1:13:08"} +{"current_steps": 334, "total_steps": 1175, "loss": 1.0433218479156494, "lr": 3.6053631887323656e-05, "epoch": 1.4221748400852878, "percentage": 28.43, "elapsed_time": "0:29:01", "remaining_time": "1:13:04"} +{"current_steps": 335, "total_steps": 1175, "loss": 1.0579197406768799, "lr": 3.601810842364465e-05, "epoch": 1.4264392324093818, "percentage": 28.51, "elapsed_time": "0:29:06", "remaining_time": "1:12:58"} +{"current_steps": 336, "total_steps": 1175, "loss": 1.044649600982666, "lr": 3.598244345858412e-05, "epoch": 1.4307036247334755, "percentage": 28.6, "elapsed_time": "0:29:11", "remaining_time": "1:12:54"} +{"current_steps": 337, "total_steps": 1175, "loss": 1.0548815727233887, "lr": 3.594663730720059e-05, "epoch": 1.4349680170575694, "percentage": 28.68, "elapsed_time": "0:29:18", "remaining_time": "1:12:53"} +{"current_steps": 338, "total_steps": 1175, "loss": 1.0309990644454956, "lr": 3.591069028579982e-05, "epoch": 1.439232409381663, "percentage": 28.77, "elapsed_time": "0:29:24", "remaining_time": "1:12:49"} +{"current_steps": 339, "total_steps": 1175, "loss": 1.019059658050537, "lr": 3.5874602711931994e-05, "epoch": 1.443496801705757, "percentage": 28.85, "elapsed_time": "0:29:29", "remaining_time": "1:12:44"} +{"current_steps": 340, "total_steps": 1175, "loss": 1.0602333545684814, "lr": 3.5838374904388904e-05, "epoch": 1.4477611940298507, "percentage": 28.94, "elapsed_time": "0:29:36", "remaining_time": "1:12:41"} +{"current_steps": 341, "total_steps": 1175, "loss": 1.034498691558838, "lr": 3.580200718320115e-05, "epoch": 1.4520255863539446, "percentage": 29.02, "elapsed_time": "0:29:42", "remaining_time": "1:12:39"} +{"current_steps": 342, "total_steps": 1175, "loss": 1.071781873703003, "lr": 3.576549986963531e-05, "epoch": 1.4562899786780383, "percentage": 29.11, "elapsed_time": "0:29:48", "remaining_time": "1:12:35"} +{"current_steps": 343, "total_steps": 1175, "loss": 1.0310769081115723, "lr": 3.5728853286191075e-05, "epoch": 1.4605543710021323, "percentage": 29.19, "elapsed_time": "0:29:53", "remaining_time": "1:12:30"} +{"current_steps": 344, "total_steps": 1175, "loss": 1.085401177406311, "lr": 3.5692067756598465e-05, "epoch": 1.464818763326226, "percentage": 29.28, "elapsed_time": "0:30:00", "remaining_time": "1:12:28"} +{"current_steps": 345, "total_steps": 1175, "loss": 1.0883269309997559, "lr": 3.5655143605814885e-05, "epoch": 1.4690831556503199, "percentage": 29.36, "elapsed_time": "0:30:05", "remaining_time": "1:12:24"} +{"current_steps": 346, "total_steps": 1175, "loss": 1.0500903129577637, "lr": 3.561808116002232e-05, "epoch": 1.4733475479744136, "percentage": 29.45, "elapsed_time": "0:30:11", "remaining_time": "1:12:21"} +{"current_steps": 347, "total_steps": 1175, "loss": 1.1078016757965088, "lr": 3.5580880746624444e-05, "epoch": 1.4776119402985075, "percentage": 29.53, "elapsed_time": "0:30:17", "remaining_time": "1:12:17"} +{"current_steps": 348, "total_steps": 1175, "loss": 1.0176830291748047, "lr": 3.5543542694243685e-05, "epoch": 1.4818763326226012, "percentage": 29.62, "elapsed_time": "0:30:22", "remaining_time": "1:12:11"} +{"current_steps": 349, "total_steps": 1175, "loss": 1.0444616079330444, "lr": 3.5506067332718355e-05, "epoch": 1.4861407249466951, "percentage": 29.7, "elapsed_time": "0:30:28", "remaining_time": "1:12:08"} +{"current_steps": 350, "total_steps": 1175, "loss": 1.0343124866485596, "lr": 3.546845499309976e-05, "epoch": 1.4904051172707888, "percentage": 29.79, "elapsed_time": "0:30:34", "remaining_time": "1:12:03"} +{"current_steps": 351, "total_steps": 1175, "loss": 1.0102611780166626, "lr": 3.5430706007649225e-05, "epoch": 1.4946695095948828, "percentage": 29.87, "elapsed_time": "0:30:39", "remaining_time": "1:11:57"} +{"current_steps": 352, "total_steps": 1175, "loss": 0.9864039421081543, "lr": 3.539282070983518e-05, "epoch": 1.4989339019189765, "percentage": 29.96, "elapsed_time": "0:30:44", "remaining_time": "1:11:52"} +{"current_steps": 353, "total_steps": 1175, "loss": 1.0977790355682373, "lr": 3.535479943433023e-05, "epoch": 1.5031982942430704, "percentage": 30.04, "elapsed_time": "0:30:49", "remaining_time": "1:11:46"} +{"current_steps": 354, "total_steps": 1175, "loss": 1.0146563053131104, "lr": 3.5316642517008184e-05, "epoch": 1.5074626865671643, "percentage": 30.13, "elapsed_time": "0:30:54", "remaining_time": "1:11:41"} +{"current_steps": 355, "total_steps": 1175, "loss": 1.027766466140747, "lr": 3.5278350294941074e-05, "epoch": 1.511727078891258, "percentage": 30.21, "elapsed_time": "0:30:59", "remaining_time": "1:11:35"} +{"current_steps": 356, "total_steps": 1175, "loss": 1.0604379177093506, "lr": 3.523992310639622e-05, "epoch": 1.5159914712153517, "percentage": 30.3, "elapsed_time": "0:31:04", "remaining_time": "1:11:30"} +{"current_steps": 357, "total_steps": 1175, "loss": 1.0405174493789673, "lr": 3.5201361290833165e-05, "epoch": 1.5202558635394456, "percentage": 30.38, "elapsed_time": "0:31:10", "remaining_time": "1:11:25"} +{"current_steps": 358, "total_steps": 1175, "loss": 1.055159091949463, "lr": 3.516266518890079e-05, "epoch": 1.5245202558635396, "percentage": 30.47, "elapsed_time": "0:31:15", "remaining_time": "1:11:19"} +{"current_steps": 359, "total_steps": 1175, "loss": 0.9890848994255066, "lr": 3.512383514243419e-05, "epoch": 1.5287846481876333, "percentage": 30.55, "elapsed_time": "0:31:20", "remaining_time": "1:11:15"} +{"current_steps": 360, "total_steps": 1175, "loss": 1.0411036014556885, "lr": 3.5084871494451716e-05, "epoch": 1.533049040511727, "percentage": 30.64, "elapsed_time": "0:31:26", "remaining_time": "1:11:09"} +{"current_steps": 361, "total_steps": 1175, "loss": 1.080575942993164, "lr": 3.5045774589151955e-05, "epoch": 1.537313432835821, "percentage": 30.72, "elapsed_time": "0:31:31", "remaining_time": "1:11:04"} +{"current_steps": 362, "total_steps": 1175, "loss": 1.0592353343963623, "lr": 3.500654477191064e-05, "epoch": 1.5415778251599148, "percentage": 30.81, "elapsed_time": "0:31:36", "remaining_time": "1:10:58"} +{"current_steps": 363, "total_steps": 1175, "loss": 1.0313072204589844, "lr": 3.496718238927764e-05, "epoch": 1.5458422174840085, "percentage": 30.89, "elapsed_time": "0:31:41", "remaining_time": "1:10:53"} +{"current_steps": 364, "total_steps": 1175, "loss": 1.032320499420166, "lr": 3.492768778897388e-05, "epoch": 1.5501066098081022, "percentage": 30.98, "elapsed_time": "0:31:46", "remaining_time": "1:10:47"} +{"current_steps": 365, "total_steps": 1175, "loss": 1.0460598468780518, "lr": 3.4888061319888276e-05, "epoch": 1.5543710021321961, "percentage": 31.06, "elapsed_time": "0:31:51", "remaining_time": "1:10:42"} +{"current_steps": 366, "total_steps": 1175, "loss": 1.003669023513794, "lr": 3.484830333207466e-05, "epoch": 1.55863539445629, "percentage": 31.15, "elapsed_time": "0:31:56", "remaining_time": "1:10:36"} +{"current_steps": 367, "total_steps": 1175, "loss": 1.0113545656204224, "lr": 3.4808414176748666e-05, "epoch": 1.5628997867803838, "percentage": 31.23, "elapsed_time": "0:32:02", "remaining_time": "1:10:31"} +{"current_steps": 368, "total_steps": 1175, "loss": 1.0731767416000366, "lr": 3.476839420628466e-05, "epoch": 1.5671641791044775, "percentage": 31.32, "elapsed_time": "0:32:07", "remaining_time": "1:10:26"} +{"current_steps": 369, "total_steps": 1175, "loss": 1.0478543043136597, "lr": 3.472824377421257e-05, "epoch": 1.5714285714285714, "percentage": 31.4, "elapsed_time": "0:32:12", "remaining_time": "1:10:20"} +{"current_steps": 370, "total_steps": 1175, "loss": 1.048224687576294, "lr": 3.4687963235214845e-05, "epoch": 1.5756929637526653, "percentage": 31.49, "elapsed_time": "0:32:17", "remaining_time": "1:10:15"} +{"current_steps": 371, "total_steps": 1175, "loss": 1.0166910886764526, "lr": 3.464755294512325e-05, "epoch": 1.579957356076759, "percentage": 31.57, "elapsed_time": "0:32:22", "remaining_time": "1:10:09"} +{"current_steps": 372, "total_steps": 1175, "loss": 1.0563862323760986, "lr": 3.4607013260915765e-05, "epoch": 1.5842217484008527, "percentage": 31.66, "elapsed_time": "0:32:27", "remaining_time": "1:10:04"} +{"current_steps": 373, "total_steps": 1175, "loss": 0.9865554571151733, "lr": 3.4566344540713404e-05, "epoch": 1.5884861407249466, "percentage": 31.74, "elapsed_time": "0:32:32", "remaining_time": "1:09:59"} +{"current_steps": 374, "total_steps": 1175, "loss": 1.0230598449707031, "lr": 3.452554714377706e-05, "epoch": 1.5927505330490406, "percentage": 31.83, "elapsed_time": "0:32:38", "remaining_time": "1:09:53"} +{"current_steps": 375, "total_steps": 1175, "loss": 1.0132288932800293, "lr": 3.448462143050436e-05, "epoch": 1.5970149253731343, "percentage": 31.91, "elapsed_time": "0:32:43", "remaining_time": "1:09:48"} +{"current_steps": 376, "total_steps": 1175, "loss": 1.0427764654159546, "lr": 3.4443567762426444e-05, "epoch": 1.6012793176972282, "percentage": 32.0, "elapsed_time": "0:32:48", "remaining_time": "1:09:42"} +{"current_steps": 377, "total_steps": 1175, "loss": 1.0427534580230713, "lr": 3.440238650220477e-05, "epoch": 1.6055437100213221, "percentage": 32.09, "elapsed_time": "0:32:53", "remaining_time": "1:09:37"} +{"current_steps": 378, "total_steps": 1175, "loss": 1.0215208530426025, "lr": 3.4361078013627945e-05, "epoch": 1.6098081023454158, "percentage": 32.17, "elapsed_time": "0:32:58", "remaining_time": "1:09:31"} +{"current_steps": 379, "total_steps": 1175, "loss": 1.041208028793335, "lr": 3.4319642661608474e-05, "epoch": 1.6140724946695095, "percentage": 32.26, "elapsed_time": "0:33:03", "remaining_time": "1:09:26"} +{"current_steps": 380, "total_steps": 1175, "loss": 1.0833510160446167, "lr": 3.427808081217957e-05, "epoch": 1.6183368869936035, "percentage": 32.34, "elapsed_time": "0:33:09", "remaining_time": "1:09:21"} +{"current_steps": 381, "total_steps": 1175, "loss": 0.9997053742408752, "lr": 3.423639283249189e-05, "epoch": 1.6226012793176974, "percentage": 32.43, "elapsed_time": "0:33:14", "remaining_time": "1:09:15"} +{"current_steps": 382, "total_steps": 1175, "loss": 1.0127842426300049, "lr": 3.419457909081032e-05, "epoch": 1.626865671641791, "percentage": 32.51, "elapsed_time": "0:33:19", "remaining_time": "1:09:10"} +{"current_steps": 383, "total_steps": 1175, "loss": 1.0155236721038818, "lr": 3.415263995651069e-05, "epoch": 1.6311300639658848, "percentage": 32.6, "elapsed_time": "0:33:24", "remaining_time": "1:09:04"} +{"current_steps": 384, "total_steps": 1175, "loss": 1.051874041557312, "lr": 3.411057580007653e-05, "epoch": 1.6353944562899787, "percentage": 32.68, "elapsed_time": "0:33:29", "remaining_time": "1:08:59"} +{"current_steps": 385, "total_steps": 1175, "loss": 1.0342919826507568, "lr": 3.4068386993095806e-05, "epoch": 1.6396588486140726, "percentage": 32.77, "elapsed_time": "0:33:34", "remaining_time": "1:08:54"} +{"current_steps": 386, "total_steps": 1175, "loss": 1.0449540615081787, "lr": 3.402607390825762e-05, "epoch": 1.6439232409381663, "percentage": 32.85, "elapsed_time": "0:33:39", "remaining_time": "1:08:48"} +{"current_steps": 387, "total_steps": 1175, "loss": 1.0454938411712646, "lr": 3.398363691934894e-05, "epoch": 1.64818763326226, "percentage": 32.94, "elapsed_time": "0:33:44", "remaining_time": "1:08:43"} +{"current_steps": 388, "total_steps": 1175, "loss": 1.0003044605255127, "lr": 3.3941076401251244e-05, "epoch": 1.652452025586354, "percentage": 33.02, "elapsed_time": "0:33:50", "remaining_time": "1:08:37"} +{"current_steps": 389, "total_steps": 1175, "loss": 1.0234074592590332, "lr": 3.3898392729937295e-05, "epoch": 1.6567164179104479, "percentage": 33.11, "elapsed_time": "0:33:55", "remaining_time": "1:08:32"} +{"current_steps": 390, "total_steps": 1175, "loss": 1.036074161529541, "lr": 3.385558628246774e-05, "epoch": 1.6609808102345416, "percentage": 33.19, "elapsed_time": "0:34:00", "remaining_time": "1:08:26"} +{"current_steps": 391, "total_steps": 1175, "loss": 1.0574541091918945, "lr": 3.381265743698781e-05, "epoch": 1.6652452025586353, "percentage": 33.28, "elapsed_time": "0:34:05", "remaining_time": "1:08:21"} +{"current_steps": 392, "total_steps": 1175, "loss": 1.0520439147949219, "lr": 3.3769606572724e-05, "epoch": 1.6695095948827292, "percentage": 33.36, "elapsed_time": "0:34:10", "remaining_time": "1:08:15"} +{"current_steps": 393, "total_steps": 1175, "loss": 1.0322532653808594, "lr": 3.3726434069980686e-05, "epoch": 1.6737739872068231, "percentage": 33.45, "elapsed_time": "0:34:15", "remaining_time": "1:08:10"} +{"current_steps": 394, "total_steps": 1175, "loss": 1.0209152698516846, "lr": 3.368314031013678e-05, "epoch": 1.6780383795309168, "percentage": 33.53, "elapsed_time": "0:34:20", "remaining_time": "1:08:05"} +{"current_steps": 395, "total_steps": 1175, "loss": 0.9918817281723022, "lr": 3.363972567564236e-05, "epoch": 1.6823027718550105, "percentage": 33.62, "elapsed_time": "0:34:25", "remaining_time": "1:07:59"} +{"current_steps": 396, "total_steps": 1175, "loss": 1.0087916851043701, "lr": 3.35961905500153e-05, "epoch": 1.6865671641791045, "percentage": 33.7, "elapsed_time": "0:34:31", "remaining_time": "1:07:54"} +{"current_steps": 397, "total_steps": 1175, "loss": 0.9876875877380371, "lr": 3.3552535317837855e-05, "epoch": 1.6908315565031984, "percentage": 33.79, "elapsed_time": "0:34:36", "remaining_time": "1:07:48"} +{"current_steps": 398, "total_steps": 1175, "loss": 1.0088675022125244, "lr": 3.35087603647533e-05, "epoch": 1.695095948827292, "percentage": 33.87, "elapsed_time": "0:34:41", "remaining_time": "1:07:43"} +{"current_steps": 399, "total_steps": 1175, "loss": 1.0424166917800903, "lr": 3.346486607746249e-05, "epoch": 1.6993603411513858, "percentage": 33.96, "elapsed_time": "0:34:46", "remaining_time": "1:07:38"} +{"current_steps": 400, "total_steps": 1175, "loss": 1.0191667079925537, "lr": 3.342085284372047e-05, "epoch": 1.7036247334754797, "percentage": 34.04, "elapsed_time": "0:34:51", "remaining_time": "1:07:32"} +{"current_steps": 401, "total_steps": 1175, "loss": 1.0168663263320923, "lr": 3.337672105233303e-05, "epoch": 1.7078891257995736, "percentage": 34.13, "elapsed_time": "0:34:56", "remaining_time": "1:07:27"} +{"current_steps": 402, "total_steps": 1175, "loss": 1.0180773735046387, "lr": 3.3332471093153296e-05, "epoch": 1.7121535181236673, "percentage": 34.21, "elapsed_time": "0:35:02", "remaining_time": "1:07:22"} +{"current_steps": 403, "total_steps": 1175, "loss": 1.033068299293518, "lr": 3.3288103357078244e-05, "epoch": 1.716417910447761, "percentage": 34.3, "elapsed_time": "0:35:07", "remaining_time": "1:07:16"} +{"current_steps": 404, "total_steps": 1175, "loss": 0.9663518667221069, "lr": 3.324361823604529e-05, "epoch": 1.720682302771855, "percentage": 34.38, "elapsed_time": "0:35:12", "remaining_time": "1:07:11"} +{"current_steps": 405, "total_steps": 1175, "loss": 1.0394078493118286, "lr": 3.319901612302881e-05, "epoch": 1.724946695095949, "percentage": 34.47, "elapsed_time": "0:35:17", "remaining_time": "1:07:05"} +{"current_steps": 406, "total_steps": 1175, "loss": 1.053601861000061, "lr": 3.315429741203666e-05, "epoch": 1.7292110874200426, "percentage": 34.55, "elapsed_time": "0:35:22", "remaining_time": "1:07:00"} +{"current_steps": 407, "total_steps": 1175, "loss": 1.020158290863037, "lr": 3.3109462498106705e-05, "epoch": 1.7334754797441365, "percentage": 34.64, "elapsed_time": "0:35:27", "remaining_time": "1:06:55"} +{"current_steps": 408, "total_steps": 1175, "loss": 1.0457191467285156, "lr": 3.306451177730333e-05, "epoch": 1.7377398720682304, "percentage": 34.72, "elapsed_time": "0:35:32", "remaining_time": "1:06:49"} +{"current_steps": 409, "total_steps": 1175, "loss": 1.0097477436065674, "lr": 3.301944564671394e-05, "epoch": 1.7420042643923241, "percentage": 34.81, "elapsed_time": "0:35:38", "remaining_time": "1:06:44"} +{"current_steps": 410, "total_steps": 1175, "loss": 1.0764334201812744, "lr": 3.297426450444546e-05, "epoch": 1.7462686567164178, "percentage": 34.89, "elapsed_time": "0:35:43", "remaining_time": "1:06:39"} +{"current_steps": 411, "total_steps": 1175, "loss": 1.0006964206695557, "lr": 3.292896874962078e-05, "epoch": 1.7505330490405118, "percentage": 34.98, "elapsed_time": "0:35:48", "remaining_time": "1:06:33"} +{"current_steps": 412, "total_steps": 1175, "loss": 1.0603384971618652, "lr": 3.2883558782375294e-05, "epoch": 1.7547974413646057, "percentage": 35.06, "elapsed_time": "0:35:53", "remaining_time": "1:06:28"} +{"current_steps": 413, "total_steps": 1175, "loss": 1.043904423713684, "lr": 3.283803500385332e-05, "epoch": 1.7590618336886994, "percentage": 35.15, "elapsed_time": "0:35:58", "remaining_time": "1:06:23"} +{"current_steps": 414, "total_steps": 1175, "loss": 1.0399513244628906, "lr": 3.2792397816204546e-05, "epoch": 1.763326226012793, "percentage": 35.23, "elapsed_time": "0:36:04", "remaining_time": "1:06:17"} +{"current_steps": 415, "total_steps": 1175, "loss": 0.9982384443283081, "lr": 3.2746647622580524e-05, "epoch": 1.767590618336887, "percentage": 35.32, "elapsed_time": "0:36:09", "remaining_time": "1:06:12"} +{"current_steps": 416, "total_steps": 1175, "loss": 1.008116364479065, "lr": 3.270078482713106e-05, "epoch": 1.771855010660981, "percentage": 35.4, "elapsed_time": "0:36:14", "remaining_time": "1:06:07"} +{"current_steps": 417, "total_steps": 1175, "loss": 1.034130573272705, "lr": 3.265480983500069e-05, "epoch": 1.7761194029850746, "percentage": 35.49, "elapsed_time": "0:36:19", "remaining_time": "1:06:01"} +{"current_steps": 418, "total_steps": 1175, "loss": 1.0008351802825928, "lr": 3.260872305232507e-05, "epoch": 1.7803837953091683, "percentage": 35.57, "elapsed_time": "0:36:24", "remaining_time": "1:05:56"} +{"current_steps": 419, "total_steps": 1175, "loss": 1.0125362873077393, "lr": 3.256252488622738e-05, "epoch": 1.7846481876332623, "percentage": 35.66, "elapsed_time": "0:36:29", "remaining_time": "1:05:50"} +{"current_steps": 420, "total_steps": 1175, "loss": 1.066127061843872, "lr": 3.251621574481475e-05, "epoch": 1.7889125799573562, "percentage": 35.74, "elapsed_time": "0:36:34", "remaining_time": "1:05:45"} +{"current_steps": 421, "total_steps": 1175, "loss": 0.9925398230552673, "lr": 3.246979603717467e-05, "epoch": 1.79317697228145, "percentage": 35.83, "elapsed_time": "0:36:39", "remaining_time": "1:05:40"} +{"current_steps": 422, "total_steps": 1175, "loss": 1.0189104080200195, "lr": 3.242326617337133e-05, "epoch": 1.7974413646055436, "percentage": 35.91, "elapsed_time": "0:36:45", "remaining_time": "1:05:34"} +{"current_steps": 423, "total_steps": 1175, "loss": 1.0439180135726929, "lr": 3.2376626564442016e-05, "epoch": 1.8017057569296375, "percentage": 36.0, "elapsed_time": "0:36:50", "remaining_time": "1:05:29"} +{"current_steps": 424, "total_steps": 1175, "loss": 1.054990291595459, "lr": 3.2329877622393515e-05, "epoch": 1.8059701492537314, "percentage": 36.09, "elapsed_time": "0:36:55", "remaining_time": "1:05:23"} +{"current_steps": 425, "total_steps": 1175, "loss": 0.9770750999450684, "lr": 3.228301976019841e-05, "epoch": 1.8102345415778252, "percentage": 36.17, "elapsed_time": "0:37:00", "remaining_time": "1:05:18"} +{"current_steps": 426, "total_steps": 1175, "loss": 0.9906047582626343, "lr": 3.22360533917915e-05, "epoch": 1.8144989339019189, "percentage": 36.26, "elapsed_time": "0:37:05", "remaining_time": "1:05:13"} +{"current_steps": 427, "total_steps": 1175, "loss": 1.0660655498504639, "lr": 3.218897893206608e-05, "epoch": 1.8187633262260128, "percentage": 36.34, "elapsed_time": "0:37:10", "remaining_time": "1:05:07"} +{"current_steps": 428, "total_steps": 1175, "loss": 0.9783341288566589, "lr": 3.2141796796870335e-05, "epoch": 1.8230277185501067, "percentage": 36.43, "elapsed_time": "0:37:16", "remaining_time": "1:05:02"} +{"current_steps": 429, "total_steps": 1175, "loss": 1.0029715299606323, "lr": 3.2094507403003614e-05, "epoch": 1.8272921108742004, "percentage": 36.51, "elapsed_time": "0:37:21", "remaining_time": "1:04:57"} +{"current_steps": 430, "total_steps": 1175, "loss": 0.987981915473938, "lr": 3.2047111168212785e-05, "epoch": 1.831556503198294, "percentage": 36.6, "elapsed_time": "0:37:26", "remaining_time": "1:04:51"} +{"current_steps": 431, "total_steps": 1175, "loss": 1.0297985076904297, "lr": 3.1999608511188524e-05, "epoch": 1.835820895522388, "percentage": 36.68, "elapsed_time": "0:37:31", "remaining_time": "1:04:46"} +{"current_steps": 432, "total_steps": 1175, "loss": 1.0437334775924683, "lr": 3.1951999851561625e-05, "epoch": 1.840085287846482, "percentage": 36.77, "elapsed_time": "0:37:36", "remaining_time": "1:04:41"} +{"current_steps": 433, "total_steps": 1175, "loss": 1.0184440612792969, "lr": 3.190428560989931e-05, "epoch": 1.8443496801705757, "percentage": 36.85, "elapsed_time": "0:37:41", "remaining_time": "1:04:35"} +{"current_steps": 434, "total_steps": 1175, "loss": 1.0451010465621948, "lr": 3.185646620770146e-05, "epoch": 1.8486140724946694, "percentage": 36.94, "elapsed_time": "0:37:46", "remaining_time": "1:04:30"} +{"current_steps": 435, "total_steps": 1175, "loss": 1.0416852235794067, "lr": 3.180854206739696e-05, "epoch": 1.8528784648187633, "percentage": 37.02, "elapsed_time": "0:37:52", "remaining_time": "1:04:25"} +{"current_steps": 436, "total_steps": 1175, "loss": 1.007869005203247, "lr": 3.176051361233991e-05, "epoch": 1.8571428571428572, "percentage": 37.11, "elapsed_time": "0:37:57", "remaining_time": "1:04:19"} +{"current_steps": 437, "total_steps": 1175, "loss": 1.0384873151779175, "lr": 3.171238126680594e-05, "epoch": 1.861407249466951, "percentage": 37.19, "elapsed_time": "0:38:02", "remaining_time": "1:04:14"} +{"current_steps": 438, "total_steps": 1175, "loss": 1.05495285987854, "lr": 3.166414545598839e-05, "epoch": 1.8656716417910446, "percentage": 37.28, "elapsed_time": "0:38:07", "remaining_time": "1:04:09"} +{"current_steps": 439, "total_steps": 1175, "loss": 1.0517874956130981, "lr": 3.161580660599464e-05, "epoch": 1.8699360341151388, "percentage": 37.36, "elapsed_time": "0:38:12", "remaining_time": "1:04:03"} +{"current_steps": 440, "total_steps": 1175, "loss": 1.005902886390686, "lr": 3.1567365143842264e-05, "epoch": 1.8742004264392325, "percentage": 37.45, "elapsed_time": "0:38:17", "remaining_time": "1:03:58"} +{"current_steps": 441, "total_steps": 1175, "loss": 1.0198183059692383, "lr": 3.1518821497455326e-05, "epoch": 1.8784648187633262, "percentage": 37.53, "elapsed_time": "0:38:22", "remaining_time": "1:03:52"} +{"current_steps": 442, "total_steps": 1175, "loss": 1.0181028842926025, "lr": 3.147017609566054e-05, "epoch": 1.88272921108742, "percentage": 37.62, "elapsed_time": "0:38:27", "remaining_time": "1:03:47"} +{"current_steps": 443, "total_steps": 1175, "loss": 1.0328750610351562, "lr": 3.142142936818353e-05, "epoch": 1.886993603411514, "percentage": 37.7, "elapsed_time": "0:38:33", "remaining_time": "1:03:42"} +{"current_steps": 444, "total_steps": 1175, "loss": 0.999271035194397, "lr": 3.137258174564501e-05, "epoch": 1.8912579957356077, "percentage": 37.79, "elapsed_time": "0:38:38", "remaining_time": "1:03:36"} +{"current_steps": 445, "total_steps": 1175, "loss": 1.0545252561569214, "lr": 3.1323633659556986e-05, "epoch": 1.8955223880597014, "percentage": 37.87, "elapsed_time": "0:38:43", "remaining_time": "1:03:31"} +{"current_steps": 446, "total_steps": 1175, "loss": 1.0253419876098633, "lr": 3.127458554231894e-05, "epoch": 1.8997867803837953, "percentage": 37.96, "elapsed_time": "0:38:48", "remaining_time": "1:03:25"} +{"current_steps": 447, "total_steps": 1175, "loss": 1.0158578157424927, "lr": 3.122543782721402e-05, "epoch": 1.9040511727078893, "percentage": 38.04, "elapsed_time": "0:38:53", "remaining_time": "1:03:20"} +{"current_steps": 448, "total_steps": 1175, "loss": 1.069814920425415, "lr": 3.1176190948405194e-05, "epoch": 1.908315565031983, "percentage": 38.13, "elapsed_time": "0:38:58", "remaining_time": "1:03:14"} +{"current_steps": 449, "total_steps": 1175, "loss": 1.0053629875183105, "lr": 3.112684534093142e-05, "epoch": 1.9125799573560767, "percentage": 38.21, "elapsed_time": "0:39:03", "remaining_time": "1:03:09"} +{"current_steps": 450, "total_steps": 1175, "loss": 1.0423624515533447, "lr": 3.107740144070385e-05, "epoch": 1.9168443496801706, "percentage": 38.3, "elapsed_time": "0:39:08", "remaining_time": "1:03:04"} +{"current_steps": 451, "total_steps": 1175, "loss": 1.0227258205413818, "lr": 3.102785968450188e-05, "epoch": 1.9211087420042645, "percentage": 38.38, "elapsed_time": "0:39:14", "remaining_time": "1:02:58"} +{"current_steps": 452, "total_steps": 1175, "loss": 1.0171830654144287, "lr": 3.09782205099694e-05, "epoch": 1.9253731343283582, "percentage": 38.47, "elapsed_time": "0:39:19", "remaining_time": "1:02:53"} +{"current_steps": 453, "total_steps": 1175, "loss": 1.0100500583648682, "lr": 3.092848435561084e-05, "epoch": 1.929637526652452, "percentage": 38.55, "elapsed_time": "0:39:24", "remaining_time": "1:02:48"} +{"current_steps": 454, "total_steps": 1175, "loss": 1.0602819919586182, "lr": 3.0878651660787376e-05, "epoch": 1.9339019189765458, "percentage": 38.64, "elapsed_time": "0:39:29", "remaining_time": "1:02:42"} +{"current_steps": 455, "total_steps": 1175, "loss": 1.0430219173431396, "lr": 3.082872286571295e-05, "epoch": 1.9381663113006398, "percentage": 38.72, "elapsed_time": "0:39:34", "remaining_time": "1:02:37"} +{"current_steps": 456, "total_steps": 1175, "loss": 1.0674023628234863, "lr": 3.077869841145049e-05, "epoch": 1.9424307036247335, "percentage": 38.81, "elapsed_time": "0:39:39", "remaining_time": "1:02:32"} +{"current_steps": 457, "total_steps": 1175, "loss": 1.0135544538497925, "lr": 3.0728578739907934e-05, "epoch": 1.9466950959488272, "percentage": 38.89, "elapsed_time": "0:39:44", "remaining_time": "1:02:26"} +{"current_steps": 458, "total_steps": 1175, "loss": 1.0493121147155762, "lr": 3.067836429383437e-05, "epoch": 1.950959488272921, "percentage": 38.98, "elapsed_time": "0:39:49", "remaining_time": "1:02:21"} +{"current_steps": 459, "total_steps": 1175, "loss": 1.0064876079559326, "lr": 3.062805551681609e-05, "epoch": 1.955223880597015, "percentage": 39.06, "elapsed_time": "0:39:55", "remaining_time": "1:02:16"} +{"current_steps": 460, "total_steps": 1175, "loss": 1.0187498331069946, "lr": 3.057765285327271e-05, "epoch": 1.9594882729211087, "percentage": 39.15, "elapsed_time": "0:40:00", "remaining_time": "1:02:10"} +{"current_steps": 461, "total_steps": 1175, "loss": 1.0560030937194824, "lr": 3.0527156748453214e-05, "epoch": 1.9637526652452024, "percentage": 39.23, "elapsed_time": "0:40:05", "remaining_time": "1:02:05"} +{"current_steps": 462, "total_steps": 1175, "loss": 0.9743139743804932, "lr": 3.047656764843203e-05, "epoch": 1.9680170575692963, "percentage": 39.32, "elapsed_time": "0:40:10", "remaining_time": "1:02:00"} +{"current_steps": 463, "total_steps": 1175, "loss": 1.0284925699234009, "lr": 3.0425886000105094e-05, "epoch": 1.9722814498933903, "percentage": 39.4, "elapsed_time": "0:40:15", "remaining_time": "1:01:54"} +{"current_steps": 464, "total_steps": 1175, "loss": 1.011577844619751, "lr": 3.0375112251185892e-05, "epoch": 1.976545842217484, "percentage": 39.49, "elapsed_time": "0:40:20", "remaining_time": "1:01:49"} +{"current_steps": 465, "total_steps": 1175, "loss": 1.0314466953277588, "lr": 3.0324246850201527e-05, "epoch": 1.9808102345415777, "percentage": 39.57, "elapsed_time": "0:40:26", "remaining_time": "1:01:44"} +{"current_steps": 466, "total_steps": 1175, "loss": 1.0470771789550781, "lr": 3.0273290246488732e-05, "epoch": 1.9850746268656716, "percentage": 39.66, "elapsed_time": "0:40:31", "remaining_time": "1:01:39"} +{"current_steps": 467, "total_steps": 1175, "loss": 1.0223674774169922, "lr": 3.0222242890189904e-05, "epoch": 1.9893390191897655, "percentage": 39.74, "elapsed_time": "0:40:36", "remaining_time": "1:01:33"} +{"current_steps": 468, "total_steps": 1175, "loss": 0.9852697849273682, "lr": 3.017110523224914e-05, "epoch": 1.9936034115138592, "percentage": 39.83, "elapsed_time": "0:40:41", "remaining_time": "1:01:28"} +{"current_steps": 469, "total_steps": 1175, "loss": 1.0171148777008057, "lr": 3.011987772440825e-05, "epoch": 1.997867803837953, "percentage": 39.91, "elapsed_time": "0:40:46", "remaining_time": "1:01:23"} +{"current_steps": 470, "total_steps": 1175, "loss": 1.0270267724990845, "lr": 3.006856081920277e-05, "epoch": 2.0, "percentage": 40.0, "elapsed_time": "0:40:49", "remaining_time": "1:01:13"} +{"current_steps": 471, "total_steps": 1175, "loss": 0.9987781643867493, "lr": 3.001715496995793e-05, "epoch": 2.0042643923240937, "percentage": 40.09, "elapsed_time": "0:40:56", "remaining_time": "1:01:11"} +{"current_steps": 472, "total_steps": 1175, "loss": 1.0175721645355225, "lr": 2.9965660630784715e-05, "epoch": 2.008528784648188, "percentage": 40.17, "elapsed_time": "0:41:01", "remaining_time": "1:01:06"} +{"current_steps": 473, "total_steps": 1175, "loss": 1.0302276611328125, "lr": 2.9914078256575782e-05, "epoch": 2.0127931769722816, "percentage": 40.26, "elapsed_time": "0:41:06", "remaining_time": "1:01:01"} +{"current_steps": 474, "total_steps": 1175, "loss": 1.076301097869873, "lr": 2.9862408303001492e-05, "epoch": 2.0170575692963753, "percentage": 40.34, "elapsed_time": "0:41:11", "remaining_time": "1:00:55"} +{"current_steps": 475, "total_steps": 1175, "loss": 1.036280632019043, "lr": 2.9810651226505875e-05, "epoch": 2.021321961620469, "percentage": 40.43, "elapsed_time": "0:41:17", "remaining_time": "1:00:50"} +{"current_steps": 476, "total_steps": 1175, "loss": 1.0325391292572021, "lr": 2.9758807484302566e-05, "epoch": 2.025586353944563, "percentage": 40.51, "elapsed_time": "0:41:22", "remaining_time": "1:00:45"} +{"current_steps": 477, "total_steps": 1175, "loss": 1.0386598110198975, "lr": 2.9706877534370822e-05, "epoch": 2.029850746268657, "percentage": 40.6, "elapsed_time": "0:41:27", "remaining_time": "1:00:39"} +{"current_steps": 478, "total_steps": 1175, "loss": 1.0114233493804932, "lr": 2.965486183545142e-05, "epoch": 2.0341151385927505, "percentage": 40.68, "elapsed_time": "0:41:32", "remaining_time": "1:00:34"} +{"current_steps": 479, "total_steps": 1175, "loss": 0.9995619058609009, "lr": 2.9602760847042645e-05, "epoch": 2.038379530916844, "percentage": 40.77, "elapsed_time": "0:41:37", "remaining_time": "1:00:29"} +{"current_steps": 480, "total_steps": 1175, "loss": 0.9988946318626404, "lr": 2.955057502939621e-05, "epoch": 2.0426439232409384, "percentage": 40.85, "elapsed_time": "0:41:42", "remaining_time": "1:00:23"} +{"current_steps": 481, "total_steps": 1175, "loss": 1.019971251487732, "lr": 2.9498304843513193e-05, "epoch": 2.046908315565032, "percentage": 40.94, "elapsed_time": "0:41:47", "remaining_time": "1:00:18"} +{"current_steps": 482, "total_steps": 1175, "loss": 0.9783295392990112, "lr": 2.9445950751139957e-05, "epoch": 2.0511727078891258, "percentage": 41.02, "elapsed_time": "0:41:53", "remaining_time": "1:00:13"} +{"current_steps": 483, "total_steps": 1175, "loss": 0.9889360666275024, "lr": 2.939351321476412e-05, "epoch": 2.0554371002132195, "percentage": 41.11, "elapsed_time": "0:41:58", "remaining_time": "1:00:07"} +{"current_steps": 484, "total_steps": 1175, "loss": 0.9587419033050537, "lr": 2.9340992697610393e-05, "epoch": 2.0597014925373136, "percentage": 41.19, "elapsed_time": "0:42:03", "remaining_time": "1:00:02"} +{"current_steps": 485, "total_steps": 1175, "loss": 0.9435993432998657, "lr": 2.9288389663636537e-05, "epoch": 2.0639658848614073, "percentage": 41.28, "elapsed_time": "0:42:08", "remaining_time": "0:59:57"} +{"current_steps": 486, "total_steps": 1175, "loss": 0.9886394143104553, "lr": 2.923570457752925e-05, "epoch": 2.068230277185501, "percentage": 41.36, "elapsed_time": "0:42:13", "remaining_time": "0:59:51"} +{"current_steps": 487, "total_steps": 1175, "loss": 0.9575808644294739, "lr": 2.9182937904700078e-05, "epoch": 2.0724946695095947, "percentage": 41.45, "elapsed_time": "0:42:18", "remaining_time": "0:59:46"} +{"current_steps": 488, "total_steps": 1175, "loss": 1.0362825393676758, "lr": 2.9130090111281278e-05, "epoch": 2.076759061833689, "percentage": 41.53, "elapsed_time": "0:42:23", "remaining_time": "0:59:41"} +{"current_steps": 489, "total_steps": 1175, "loss": 0.998286247253418, "lr": 2.9077161664121722e-05, "epoch": 2.0810234541577826, "percentage": 41.62, "elapsed_time": "0:42:28", "remaining_time": "0:59:35"} +{"current_steps": 490, "total_steps": 1175, "loss": 1.0012212991714478, "lr": 2.902415303078275e-05, "epoch": 2.0852878464818763, "percentage": 41.7, "elapsed_time": "0:42:34", "remaining_time": "0:59:30"} +{"current_steps": 491, "total_steps": 1175, "loss": 0.9862103462219238, "lr": 2.8971064679534072e-05, "epoch": 2.08955223880597, "percentage": 41.79, "elapsed_time": "0:42:39", "remaining_time": "0:59:25"} +{"current_steps": 492, "total_steps": 1175, "loss": 1.006826400756836, "lr": 2.8917897079349604e-05, "epoch": 2.093816631130064, "percentage": 41.87, "elapsed_time": "0:42:44", "remaining_time": "0:59:19"} +{"current_steps": 493, "total_steps": 1175, "loss": 0.9882227778434753, "lr": 2.8864650699903336e-05, "epoch": 2.098081023454158, "percentage": 41.96, "elapsed_time": "0:42:49", "remaining_time": "0:59:14"} +{"current_steps": 494, "total_steps": 1175, "loss": 0.9697372913360596, "lr": 2.881132601156518e-05, "epoch": 2.1023454157782515, "percentage": 42.04, "elapsed_time": "0:42:54", "remaining_time": "0:59:09"} +{"current_steps": 495, "total_steps": 1175, "loss": 0.9951438903808594, "lr": 2.8757923485396805e-05, "epoch": 2.106609808102345, "percentage": 42.13, "elapsed_time": "0:42:59", "remaining_time": "0:59:03"} +{"current_steps": 496, "total_steps": 1175, "loss": 0.9793698787689209, "lr": 2.8704443593147517e-05, "epoch": 2.1108742004264394, "percentage": 42.21, "elapsed_time": "0:43:04", "remaining_time": "0:58:58"} +{"current_steps": 497, "total_steps": 1175, "loss": 1.0147504806518555, "lr": 2.8650886807250024e-05, "epoch": 2.115138592750533, "percentage": 42.3, "elapsed_time": "0:43:09", "remaining_time": "0:58:53"} +{"current_steps": 498, "total_steps": 1175, "loss": 0.952460527420044, "lr": 2.8597253600816332e-05, "epoch": 2.1194029850746268, "percentage": 42.38, "elapsed_time": "0:43:15", "remaining_time": "0:58:47"} +{"current_steps": 499, "total_steps": 1175, "loss": 1.0123192071914673, "lr": 2.8543544447633517e-05, "epoch": 2.1236673773987205, "percentage": 42.47, "elapsed_time": "0:43:20", "remaining_time": "0:58:42"} +{"current_steps": 500, "total_steps": 1175, "loss": 1.0260965824127197, "lr": 2.8489759822159558e-05, "epoch": 2.1279317697228146, "percentage": 42.55, "elapsed_time": "0:43:25", "remaining_time": "0:58:37"} +{"current_steps": 501, "total_steps": 1175, "loss": 0.9698889255523682, "lr": 2.843590019951914e-05, "epoch": 2.1321961620469083, "percentage": 42.64, "elapsed_time": "0:43:37", "remaining_time": "0:58:40"} +{"current_steps": 502, "total_steps": 1175, "loss": 1.0217312574386597, "lr": 2.838196605549948e-05, "epoch": 2.136460554371002, "percentage": 42.72, "elapsed_time": "0:43:42", "remaining_time": "0:58:35"} +{"current_steps": 503, "total_steps": 1175, "loss": 1.0324029922485352, "lr": 2.8327957866546082e-05, "epoch": 2.140724946695096, "percentage": 42.81, "elapsed_time": "0:43:47", "remaining_time": "0:58:30"} +{"current_steps": 504, "total_steps": 1175, "loss": 0.9821799397468567, "lr": 2.8273876109758568e-05, "epoch": 2.14498933901919, "percentage": 42.89, "elapsed_time": "0:43:52", "remaining_time": "0:58:24"} +{"current_steps": 505, "total_steps": 1175, "loss": 0.9939541816711426, "lr": 2.8219721262886427e-05, "epoch": 2.1492537313432836, "percentage": 42.98, "elapsed_time": "0:43:57", "remaining_time": "0:58:19"} +{"current_steps": 506, "total_steps": 1175, "loss": 0.9698700308799744, "lr": 2.816549380432483e-05, "epoch": 2.1535181236673773, "percentage": 43.06, "elapsed_time": "0:44:02", "remaining_time": "0:58:14"} +{"current_steps": 507, "total_steps": 1175, "loss": 0.9915518164634705, "lr": 2.8111194213110386e-05, "epoch": 2.1577825159914714, "percentage": 43.15, "elapsed_time": "0:44:07", "remaining_time": "0:58:08"} +{"current_steps": 508, "total_steps": 1175, "loss": 1.0347942113876343, "lr": 2.805682296891691e-05, "epoch": 2.162046908315565, "percentage": 43.23, "elapsed_time": "0:44:13", "remaining_time": "0:58:03"} +{"current_steps": 509, "total_steps": 1175, "loss": 1.0014612674713135, "lr": 2.8002380552051186e-05, "epoch": 2.166311300639659, "percentage": 43.32, "elapsed_time": "0:44:18", "remaining_time": "0:57:57"} +{"current_steps": 510, "total_steps": 1175, "loss": 1.0508100986480713, "lr": 2.7947867443448728e-05, "epoch": 2.1705756929637525, "percentage": 43.4, "elapsed_time": "0:44:23", "remaining_time": "0:57:52"} +{"current_steps": 511, "total_steps": 1175, "loss": 0.9997203350067139, "lr": 2.789328412466953e-05, "epoch": 2.1748400852878467, "percentage": 43.49, "elapsed_time": "0:44:28", "remaining_time": "0:57:47"} +{"current_steps": 512, "total_steps": 1175, "loss": 1.0263261795043945, "lr": 2.7838631077893813e-05, "epoch": 2.1791044776119404, "percentage": 43.57, "elapsed_time": "0:44:33", "remaining_time": "0:57:41"} +{"current_steps": 513, "total_steps": 1175, "loss": 1.010587215423584, "lr": 2.7783908785917753e-05, "epoch": 2.183368869936034, "percentage": 43.66, "elapsed_time": "0:44:38", "remaining_time": "0:57:36"} +{"current_steps": 514, "total_steps": 1175, "loss": 1.0085017681121826, "lr": 2.7729117732149244e-05, "epoch": 2.1876332622601278, "percentage": 43.74, "elapsed_time": "0:44:43", "remaining_time": "0:57:31"} +{"current_steps": 515, "total_steps": 1175, "loss": 1.0182987451553345, "lr": 2.7674258400603587e-05, "epoch": 2.191897654584222, "percentage": 43.83, "elapsed_time": "0:44:48", "remaining_time": "0:57:25"} +{"current_steps": 516, "total_steps": 1175, "loss": 0.9880110621452332, "lr": 2.761933127589927e-05, "epoch": 2.1961620469083156, "percentage": 43.91, "elapsed_time": "0:44:54", "remaining_time": "0:57:20"} +{"current_steps": 517, "total_steps": 1175, "loss": 1.0093759298324585, "lr": 2.7564336843253633e-05, "epoch": 2.2004264392324093, "percentage": 44.0, "elapsed_time": "0:44:59", "remaining_time": "0:57:15"} +{"current_steps": 518, "total_steps": 1175, "loss": 0.9518511295318604, "lr": 2.7509275588478606e-05, "epoch": 2.204690831556503, "percentage": 44.09, "elapsed_time": "0:45:04", "remaining_time": "0:57:10"} +{"current_steps": 519, "total_steps": 1175, "loss": 1.013105869293213, "lr": 2.7454147997976404e-05, "epoch": 2.208955223880597, "percentage": 44.17, "elapsed_time": "0:45:09", "remaining_time": "0:57:04"} +{"current_steps": 520, "total_steps": 1175, "loss": 1.0273163318634033, "lr": 2.7398954558735272e-05, "epoch": 2.213219616204691, "percentage": 44.26, "elapsed_time": "0:45:14", "remaining_time": "0:56:59"} +{"current_steps": 521, "total_steps": 1175, "loss": 1.0346674919128418, "lr": 2.7343695758325125e-05, "epoch": 2.2174840085287846, "percentage": 44.34, "elapsed_time": "0:45:19", "remaining_time": "0:56:53"} +{"current_steps": 522, "total_steps": 1175, "loss": 0.9666027426719666, "lr": 2.7288372084893282e-05, "epoch": 2.2217484008528783, "percentage": 44.43, "elapsed_time": "0:45:24", "remaining_time": "0:56:48"} +{"current_steps": 523, "total_steps": 1175, "loss": 0.9904748201370239, "lr": 2.7232984027160126e-05, "epoch": 2.2260127931769724, "percentage": 44.51, "elapsed_time": "0:45:29", "remaining_time": "0:56:43"} +{"current_steps": 524, "total_steps": 1175, "loss": 1.0020073652267456, "lr": 2.7177532074414822e-05, "epoch": 2.230277185501066, "percentage": 44.6, "elapsed_time": "0:45:35", "remaining_time": "0:56:37"} +{"current_steps": 525, "total_steps": 1175, "loss": 1.0094950199127197, "lr": 2.712201671651094e-05, "epoch": 2.23454157782516, "percentage": 44.68, "elapsed_time": "0:45:40", "remaining_time": "0:56:32"} +{"current_steps": 526, "total_steps": 1175, "loss": 0.9910581111907959, "lr": 2.7066438443862205e-05, "epoch": 2.2388059701492535, "percentage": 44.77, "elapsed_time": "0:45:45", "remaining_time": "0:56:27"} +{"current_steps": 527, "total_steps": 1175, "loss": 0.9488228559494019, "lr": 2.701079774743808e-05, "epoch": 2.2430703624733477, "percentage": 44.85, "elapsed_time": "0:45:50", "remaining_time": "0:56:21"} +{"current_steps": 528, "total_steps": 1175, "loss": 1.0226023197174072, "lr": 2.6955095118759496e-05, "epoch": 2.2473347547974414, "percentage": 44.94, "elapsed_time": "0:45:55", "remaining_time": "0:56:16"} +{"current_steps": 529, "total_steps": 1175, "loss": 1.003893256187439, "lr": 2.689933104989447e-05, "epoch": 2.251599147121535, "percentage": 45.02, "elapsed_time": "0:46:00", "remaining_time": "0:56:11"} +{"current_steps": 530, "total_steps": 1175, "loss": 0.9687828421592712, "lr": 2.6843506033453777e-05, "epoch": 2.2558635394456292, "percentage": 45.11, "elapsed_time": "0:46:05", "remaining_time": "0:56:05"} +{"current_steps": 531, "total_steps": 1175, "loss": 0.9960900545120239, "lr": 2.6787620562586587e-05, "epoch": 2.260127931769723, "percentage": 45.19, "elapsed_time": "0:46:10", "remaining_time": "0:56:00"} +{"current_steps": 532, "total_steps": 1175, "loss": 0.9789157509803772, "lr": 2.673167513097613e-05, "epoch": 2.2643923240938166, "percentage": 45.28, "elapsed_time": "0:46:16", "remaining_time": "0:55:55"} +{"current_steps": 533, "total_steps": 1175, "loss": 0.9460334777832031, "lr": 2.6675670232835297e-05, "epoch": 2.2686567164179103, "percentage": 45.36, "elapsed_time": "0:46:21", "remaining_time": "0:55:50"} +{"current_steps": 534, "total_steps": 1175, "loss": 0.9994051456451416, "lr": 2.661960636290231e-05, "epoch": 2.272921108742004, "percentage": 45.45, "elapsed_time": "0:46:26", "remaining_time": "0:55:44"} +{"current_steps": 535, "total_steps": 1175, "loss": 1.02659273147583, "lr": 2.6563484016436346e-05, "epoch": 2.277185501066098, "percentage": 45.53, "elapsed_time": "0:46:31", "remaining_time": "0:55:39"} +{"current_steps": 536, "total_steps": 1175, "loss": 1.0258793830871582, "lr": 2.6507303689213143e-05, "epoch": 2.281449893390192, "percentage": 45.62, "elapsed_time": "0:46:36", "remaining_time": "0:55:34"} +{"current_steps": 537, "total_steps": 1175, "loss": 0.9970362186431885, "lr": 2.6451065877520634e-05, "epoch": 2.2857142857142856, "percentage": 45.7, "elapsed_time": "0:46:41", "remaining_time": "0:55:28"} +{"current_steps": 538, "total_steps": 1175, "loss": 0.9489619731903076, "lr": 2.639477107815455e-05, "epoch": 2.2899786780383797, "percentage": 45.79, "elapsed_time": "0:46:46", "remaining_time": "0:55:23"} +{"current_steps": 539, "total_steps": 1175, "loss": 0.9728654623031616, "lr": 2.633841978841406e-05, "epoch": 2.2942430703624734, "percentage": 45.87, "elapsed_time": "0:46:51", "remaining_time": "0:55:18"} +{"current_steps": 540, "total_steps": 1175, "loss": 1.0068259239196777, "lr": 2.6282012506097347e-05, "epoch": 2.298507462686567, "percentage": 45.96, "elapsed_time": "0:46:57", "remaining_time": "0:55:12"} +{"current_steps": 541, "total_steps": 1175, "loss": 1.0444014072418213, "lr": 2.622554972949724e-05, "epoch": 2.302771855010661, "percentage": 46.04, "elapsed_time": "0:47:02", "remaining_time": "0:55:07"} +{"current_steps": 542, "total_steps": 1175, "loss": 1.0184507369995117, "lr": 2.6169031957396778e-05, "epoch": 2.307036247334755, "percentage": 46.13, "elapsed_time": "0:47:07", "remaining_time": "0:55:02"} +{"current_steps": 543, "total_steps": 1175, "loss": 0.9821099638938904, "lr": 2.611245968906482e-05, "epoch": 2.3113006396588487, "percentage": 46.21, "elapsed_time": "0:47:12", "remaining_time": "0:54:56"} +{"current_steps": 544, "total_steps": 1175, "loss": 0.9992808103561401, "lr": 2.605583342425165e-05, "epoch": 2.3155650319829424, "percentage": 46.3, "elapsed_time": "0:47:17", "remaining_time": "0:54:51"} +{"current_steps": 545, "total_steps": 1175, "loss": 1.0641918182373047, "lr": 2.5999153663184546e-05, "epoch": 2.319829424307036, "percentage": 46.38, "elapsed_time": "0:47:22", "remaining_time": "0:54:46"} +{"current_steps": 546, "total_steps": 1175, "loss": 0.9966145753860474, "lr": 2.594242090656335e-05, "epoch": 2.3240938166311302, "percentage": 46.47, "elapsed_time": "0:47:28", "remaining_time": "0:54:41"} +{"current_steps": 547, "total_steps": 1175, "loss": 0.9890132546424866, "lr": 2.5885635655556075e-05, "epoch": 2.328358208955224, "percentage": 46.55, "elapsed_time": "0:47:33", "remaining_time": "0:54:35"} +{"current_steps": 548, "total_steps": 1175, "loss": 1.017390489578247, "lr": 2.5828798411794443e-05, "epoch": 2.3326226012793176, "percentage": 46.64, "elapsed_time": "0:47:38", "remaining_time": "0:54:30"} +{"current_steps": 549, "total_steps": 1175, "loss": 0.9837027788162231, "lr": 2.5771909677369484e-05, "epoch": 2.3368869936034113, "percentage": 46.72, "elapsed_time": "0:47:43", "remaining_time": "0:54:25"} +{"current_steps": 550, "total_steps": 1175, "loss": 1.036919116973877, "lr": 2.571496995482709e-05, "epoch": 2.3411513859275055, "percentage": 46.81, "elapsed_time": "0:47:48", "remaining_time": "0:54:19"} +{"current_steps": 551, "total_steps": 1175, "loss": 1.0211420059204102, "lr": 2.565797974716357e-05, "epoch": 2.345415778251599, "percentage": 46.89, "elapsed_time": "0:47:53", "remaining_time": "0:54:14"} +{"current_steps": 552, "total_steps": 1175, "loss": 0.9743055105209351, "lr": 2.5600939557821205e-05, "epoch": 2.349680170575693, "percentage": 46.98, "elapsed_time": "0:47:59", "remaining_time": "0:54:09"} +{"current_steps": 553, "total_steps": 1175, "loss": 0.9749882221221924, "lr": 2.5543849890683813e-05, "epoch": 2.3539445628997866, "percentage": 47.06, "elapsed_time": "0:48:04", "remaining_time": "0:54:04"} +{"current_steps": 554, "total_steps": 1175, "loss": 0.9856359958648682, "lr": 2.548671125007229e-05, "epoch": 2.3582089552238807, "percentage": 47.15, "elapsed_time": "0:48:09", "remaining_time": "0:53:58"} +{"current_steps": 555, "total_steps": 1175, "loss": 1.0194714069366455, "lr": 2.5429524140740155e-05, "epoch": 2.3624733475479744, "percentage": 47.23, "elapsed_time": "0:48:14", "remaining_time": "0:53:53"} +{"current_steps": 556, "total_steps": 1175, "loss": 1.0033659934997559, "lr": 2.537228906786908e-05, "epoch": 2.366737739872068, "percentage": 47.32, "elapsed_time": "0:48:19", "remaining_time": "0:53:48"} +{"current_steps": 557, "total_steps": 1175, "loss": 0.996933102607727, "lr": 2.5315006537064473e-05, "epoch": 2.3710021321961623, "percentage": 47.4, "elapsed_time": "0:48:24", "remaining_time": "0:53:42"} +{"current_steps": 558, "total_steps": 1175, "loss": 0.978560209274292, "lr": 2.5257677054350927e-05, "epoch": 2.375266524520256, "percentage": 47.49, "elapsed_time": "0:48:29", "remaining_time": "0:53:37"} +{"current_steps": 559, "total_steps": 1175, "loss": 0.9780471920967102, "lr": 2.5200301126167857e-05, "epoch": 2.3795309168443497, "percentage": 47.57, "elapsed_time": "0:48:34", "remaining_time": "0:53:32"} +{"current_steps": 560, "total_steps": 1175, "loss": 1.0212950706481934, "lr": 2.514287925936492e-05, "epoch": 2.3837953091684434, "percentage": 47.66, "elapsed_time": "0:48:40", "remaining_time": "0:53:26"} +{"current_steps": 561, "total_steps": 1175, "loss": 1.0373973846435547, "lr": 2.5085411961197626e-05, "epoch": 2.388059701492537, "percentage": 47.74, "elapsed_time": "0:48:45", "remaining_time": "0:53:21"} +{"current_steps": 562, "total_steps": 1175, "loss": 1.0003979206085205, "lr": 2.502789973932278e-05, "epoch": 2.3923240938166312, "percentage": 47.83, "elapsed_time": "0:48:50", "remaining_time": "0:53:16"} +{"current_steps": 563, "total_steps": 1175, "loss": 1.0033340454101562, "lr": 2.4970343101794073e-05, "epoch": 2.396588486140725, "percentage": 47.91, "elapsed_time": "0:48:55", "remaining_time": "0:53:10"} +{"current_steps": 564, "total_steps": 1175, "loss": 0.9911829233169556, "lr": 2.4912742557057538e-05, "epoch": 2.4008528784648187, "percentage": 48.0, "elapsed_time": "0:49:00", "remaining_time": "0:53:05"} +{"current_steps": 565, "total_steps": 1175, "loss": 1.0349599123001099, "lr": 2.485509861394708e-05, "epoch": 2.405117270788913, "percentage": 48.09, "elapsed_time": "0:49:05", "remaining_time": "0:53:00"} +{"current_steps": 566, "total_steps": 1175, "loss": 0.9519776701927185, "lr": 2.4797411781679975e-05, "epoch": 2.4093816631130065, "percentage": 48.17, "elapsed_time": "0:49:10", "remaining_time": "0:52:55"} +{"current_steps": 567, "total_steps": 1175, "loss": 1.0031790733337402, "lr": 2.473968256985238e-05, "epoch": 2.4136460554371, "percentage": 48.26, "elapsed_time": "0:49:16", "remaining_time": "0:52:49"} +{"current_steps": 568, "total_steps": 1175, "loss": 1.0346243381500244, "lr": 2.4681911488434825e-05, "epoch": 2.417910447761194, "percentage": 48.34, "elapsed_time": "0:49:21", "remaining_time": "0:52:44"} +{"current_steps": 569, "total_steps": 1175, "loss": 1.0115197896957397, "lr": 2.4624099047767702e-05, "epoch": 2.4221748400852876, "percentage": 48.43, "elapsed_time": "0:49:26", "remaining_time": "0:52:39"} +{"current_steps": 570, "total_steps": 1175, "loss": 0.968398928642273, "lr": 2.4566245758556787e-05, "epoch": 2.4264392324093818, "percentage": 48.51, "elapsed_time": "0:49:31", "remaining_time": "0:52:33"} +{"current_steps": 571, "total_steps": 1175, "loss": 1.0073961019515991, "lr": 2.4508352131868664e-05, "epoch": 2.4307036247334755, "percentage": 48.6, "elapsed_time": "0:49:36", "remaining_time": "0:52:28"} +{"current_steps": 572, "total_steps": 1175, "loss": 0.9651147723197937, "lr": 2.445041867912629e-05, "epoch": 2.434968017057569, "percentage": 48.68, "elapsed_time": "0:49:41", "remaining_time": "0:52:23"} +{"current_steps": 573, "total_steps": 1175, "loss": 0.9606080651283264, "lr": 2.439244591210443e-05, "epoch": 2.4392324093816633, "percentage": 48.77, "elapsed_time": "0:49:46", "remaining_time": "0:52:17"} +{"current_steps": 574, "total_steps": 1175, "loss": 0.9872428178787231, "lr": 2.4334434342925133e-05, "epoch": 2.443496801705757, "percentage": 48.85, "elapsed_time": "0:49:51", "remaining_time": "0:52:12"} +{"current_steps": 575, "total_steps": 1175, "loss": 0.9739286303520203, "lr": 2.4276384484053227e-05, "epoch": 2.4477611940298507, "percentage": 48.94, "elapsed_time": "0:49:57", "remaining_time": "0:52:07"} +{"current_steps": 576, "total_steps": 1175, "loss": 1.035171389579773, "lr": 2.4218296848291795e-05, "epoch": 2.4520255863539444, "percentage": 49.02, "elapsed_time": "0:50:02", "remaining_time": "0:52:02"} +{"current_steps": 577, "total_steps": 1175, "loss": 0.9901844263076782, "lr": 2.4160171948777603e-05, "epoch": 2.4562899786780386, "percentage": 49.11, "elapsed_time": "0:50:07", "remaining_time": "0:51:56"} +{"current_steps": 578, "total_steps": 1175, "loss": 1.0634323358535767, "lr": 2.410201029897665e-05, "epoch": 2.4605543710021323, "percentage": 49.19, "elapsed_time": "0:50:12", "remaining_time": "0:51:51"} +{"current_steps": 579, "total_steps": 1175, "loss": 0.9816789031028748, "lr": 2.4043812412679532e-05, "epoch": 2.464818763326226, "percentage": 49.28, "elapsed_time": "0:50:17", "remaining_time": "0:51:46"} +{"current_steps": 580, "total_steps": 1175, "loss": 1.0299735069274902, "lr": 2.3985578803996985e-05, "epoch": 2.4690831556503197, "percentage": 49.36, "elapsed_time": "0:50:22", "remaining_time": "0:51:40"} +{"current_steps": 581, "total_steps": 1175, "loss": 1.0363097190856934, "lr": 2.392730998735529e-05, "epoch": 2.473347547974414, "percentage": 49.45, "elapsed_time": "0:50:27", "remaining_time": "0:51:35"} +{"current_steps": 582, "total_steps": 1175, "loss": 1.0010910034179688, "lr": 2.3869006477491755e-05, "epoch": 2.4776119402985075, "percentage": 49.53, "elapsed_time": "0:50:32", "remaining_time": "0:51:30"} +{"current_steps": 583, "total_steps": 1175, "loss": 0.9860137701034546, "lr": 2.381066878945017e-05, "epoch": 2.481876332622601, "percentage": 49.62, "elapsed_time": "0:50:38", "remaining_time": "0:51:25"} +{"current_steps": 584, "total_steps": 1175, "loss": 0.9822453260421753, "lr": 2.3752297438576257e-05, "epoch": 2.486140724946695, "percentage": 49.7, "elapsed_time": "0:50:43", "remaining_time": "0:51:19"} +{"current_steps": 585, "total_steps": 1175, "loss": 1.0518721342086792, "lr": 2.3693892940513074e-05, "epoch": 2.490405117270789, "percentage": 49.79, "elapsed_time": "0:50:48", "remaining_time": "0:51:14"} +{"current_steps": 586, "total_steps": 1175, "loss": 1.0154331922531128, "lr": 2.3635455811196536e-05, "epoch": 2.4946695095948828, "percentage": 49.87, "elapsed_time": "0:50:53", "remaining_time": "0:51:09"} +{"current_steps": 587, "total_steps": 1175, "loss": 1.0354433059692383, "lr": 2.3576986566850796e-05, "epoch": 2.4989339019189765, "percentage": 49.96, "elapsed_time": "0:50:58", "remaining_time": "0:51:03"} +{"current_steps": 588, "total_steps": 1175, "loss": 0.9651215672492981, "lr": 2.351848572398371e-05, "epoch": 2.50319829424307, "percentage": 50.04, "elapsed_time": "0:51:03", "remaining_time": "0:50:58"} +{"current_steps": 589, "total_steps": 1175, "loss": 0.9885987639427185, "lr": 2.3459953799382276e-05, "epoch": 2.5074626865671643, "percentage": 50.13, "elapsed_time": "0:51:09", "remaining_time": "0:50:53"} +{"current_steps": 590, "total_steps": 1175, "loss": 1.0021411180496216, "lr": 2.3401391310108054e-05, "epoch": 2.511727078891258, "percentage": 50.21, "elapsed_time": "0:51:14", "remaining_time": "0:50:48"} +{"current_steps": 591, "total_steps": 1175, "loss": 0.9634994268417358, "lr": 2.3342798773492602e-05, "epoch": 2.5159914712153517, "percentage": 50.3, "elapsed_time": "0:51:19", "remaining_time": "0:50:42"} +{"current_steps": 592, "total_steps": 1175, "loss": 1.0279403924942017, "lr": 2.328417670713294e-05, "epoch": 2.520255863539446, "percentage": 50.38, "elapsed_time": "0:51:24", "remaining_time": "0:50:37"} +{"current_steps": 593, "total_steps": 1175, "loss": 1.0075408220291138, "lr": 2.3225525628886918e-05, "epoch": 2.5245202558635396, "percentage": 50.47, "elapsed_time": "0:51:29", "remaining_time": "0:50:32"} +{"current_steps": 594, "total_steps": 1175, "loss": 1.0584495067596436, "lr": 2.3166846056868687e-05, "epoch": 2.5287846481876333, "percentage": 50.55, "elapsed_time": "0:51:34", "remaining_time": "0:50:27"} +{"current_steps": 595, "total_steps": 1175, "loss": 1.050918698310852, "lr": 2.31081385094441e-05, "epoch": 2.533049040511727, "percentage": 50.64, "elapsed_time": "0:51:40", "remaining_time": "0:50:21"} +{"current_steps": 596, "total_steps": 1175, "loss": 0.979073703289032, "lr": 2.304940350522615e-05, "epoch": 2.5373134328358207, "percentage": 50.72, "elapsed_time": "0:51:45", "remaining_time": "0:50:16"} +{"current_steps": 597, "total_steps": 1175, "loss": 0.9640562534332275, "lr": 2.299064156307037e-05, "epoch": 2.541577825159915, "percentage": 50.81, "elapsed_time": "0:51:50", "remaining_time": "0:50:11"} +{"current_steps": 598, "total_steps": 1175, "loss": 1.0127673149108887, "lr": 2.2931853202070275e-05, "epoch": 2.5458422174840085, "percentage": 50.89, "elapsed_time": "0:51:55", "remaining_time": "0:50:05"} +{"current_steps": 599, "total_steps": 1175, "loss": 0.9941070079803467, "lr": 2.2873038941552724e-05, "epoch": 2.550106609808102, "percentage": 50.98, "elapsed_time": "0:52:00", "remaining_time": "0:50:00"} +{"current_steps": 600, "total_steps": 1175, "loss": 0.9789334535598755, "lr": 2.2814199301073412e-05, "epoch": 2.5543710021321964, "percentage": 51.06, "elapsed_time": "0:52:05", "remaining_time": "0:49:55"} +{"current_steps": 601, "total_steps": 1175, "loss": 1.0233545303344727, "lr": 2.27553348004122e-05, "epoch": 2.55863539445629, "percentage": 51.15, "elapsed_time": "0:52:10", "remaining_time": "0:49:50"} +{"current_steps": 602, "total_steps": 1175, "loss": 0.987399697303772, "lr": 2.2696445959568577e-05, "epoch": 2.5628997867803838, "percentage": 51.23, "elapsed_time": "0:52:15", "remaining_time": "0:49:44"} +{"current_steps": 603, "total_steps": 1175, "loss": 1.0295848846435547, "lr": 2.2637533298757064e-05, "epoch": 2.5671641791044775, "percentage": 51.32, "elapsed_time": "0:52:21", "remaining_time": "0:49:39"} +{"current_steps": 604, "total_steps": 1175, "loss": 1.0304653644561768, "lr": 2.2578597338402567e-05, "epoch": 2.571428571428571, "percentage": 51.4, "elapsed_time": "0:52:26", "remaining_time": "0:49:34"} +{"current_steps": 605, "total_steps": 1175, "loss": 0.9955521821975708, "lr": 2.2519638599135844e-05, "epoch": 2.5756929637526653, "percentage": 51.49, "elapsed_time": "0:52:31", "remaining_time": "0:49:29"} +{"current_steps": 606, "total_steps": 1175, "loss": 1.0013747215270996, "lr": 2.2460657601788875e-05, "epoch": 2.579957356076759, "percentage": 51.57, "elapsed_time": "0:52:36", "remaining_time": "0:49:23"} +{"current_steps": 607, "total_steps": 1175, "loss": 1.004853367805481, "lr": 2.2401654867390256e-05, "epoch": 2.5842217484008527, "percentage": 51.66, "elapsed_time": "0:52:41", "remaining_time": "0:49:18"} +{"current_steps": 608, "total_steps": 1175, "loss": 0.9981028437614441, "lr": 2.2342630917160605e-05, "epoch": 2.588486140724947, "percentage": 51.74, "elapsed_time": "0:52:46", "remaining_time": "0:49:13"} +{"current_steps": 609, "total_steps": 1175, "loss": 1.0357897281646729, "lr": 2.2283586272507975e-05, "epoch": 2.5927505330490406, "percentage": 51.83, "elapsed_time": "0:52:51", "remaining_time": "0:49:07"} +{"current_steps": 610, "total_steps": 1175, "loss": 1.023173213005066, "lr": 2.2224521455023193e-05, "epoch": 2.5970149253731343, "percentage": 51.91, "elapsed_time": "0:52:57", "remaining_time": "0:49:02"} +{"current_steps": 611, "total_steps": 1175, "loss": 1.0014777183532715, "lr": 2.216543698647534e-05, "epoch": 2.6012793176972284, "percentage": 52.0, "elapsed_time": "0:53:02", "remaining_time": "0:48:57"} +{"current_steps": 612, "total_steps": 1175, "loss": 0.9888811707496643, "lr": 2.210633338880704e-05, "epoch": 2.605543710021322, "percentage": 52.09, "elapsed_time": "0:53:07", "remaining_time": "0:48:52"} +{"current_steps": 613, "total_steps": 1175, "loss": 0.9636735916137695, "lr": 2.204721118412994e-05, "epoch": 2.609808102345416, "percentage": 52.17, "elapsed_time": "0:53:12", "remaining_time": "0:48:46"} +{"current_steps": 614, "total_steps": 1175, "loss": 0.9971131086349487, "lr": 2.1988070894720037e-05, "epoch": 2.6140724946695095, "percentage": 52.26, "elapsed_time": "0:53:17", "remaining_time": "0:48:41"} +{"current_steps": 615, "total_steps": 1175, "loss": 1.0155519247055054, "lr": 2.192891304301309e-05, "epoch": 2.6183368869936032, "percentage": 52.34, "elapsed_time": "0:53:22", "remaining_time": "0:48:36"} +{"current_steps": 616, "total_steps": 1175, "loss": 0.9872410893440247, "lr": 2.18697381516e-05, "epoch": 2.6226012793176974, "percentage": 52.43, "elapsed_time": "0:53:27", "remaining_time": "0:48:30"} +{"current_steps": 617, "total_steps": 1175, "loss": 1.0182151794433594, "lr": 2.181054674322221e-05, "epoch": 2.626865671641791, "percentage": 52.51, "elapsed_time": "0:53:32", "remaining_time": "0:48:25"} +{"current_steps": 618, "total_steps": 1175, "loss": 0.9708200097084045, "lr": 2.1751339340767043e-05, "epoch": 2.631130063965885, "percentage": 52.6, "elapsed_time": "0:53:38", "remaining_time": "0:48:20"} +{"current_steps": 619, "total_steps": 1175, "loss": 1.0166131258010864, "lr": 2.169211646726313e-05, "epoch": 2.635394456289979, "percentage": 52.68, "elapsed_time": "0:53:43", "remaining_time": "0:48:15"} +{"current_steps": 620, "total_steps": 1175, "loss": 1.012539029121399, "lr": 2.163287864587576e-05, "epoch": 2.6396588486140726, "percentage": 52.77, "elapsed_time": "0:53:48", "remaining_time": "0:48:09"} +{"current_steps": 621, "total_steps": 1175, "loss": 0.9720137119293213, "lr": 2.157362639990229e-05, "epoch": 2.6439232409381663, "percentage": 52.85, "elapsed_time": "0:53:53", "remaining_time": "0:48:04"} +{"current_steps": 622, "total_steps": 1175, "loss": 1.0196996927261353, "lr": 2.151436025276747e-05, "epoch": 2.64818763326226, "percentage": 52.94, "elapsed_time": "0:53:58", "remaining_time": "0:47:59"} +{"current_steps": 623, "total_steps": 1175, "loss": 1.0273431539535522, "lr": 2.145508072801888e-05, "epoch": 2.6524520255863537, "percentage": 53.02, "elapsed_time": "0:54:03", "remaining_time": "0:47:54"} +{"current_steps": 624, "total_steps": 1175, "loss": 1.0347175598144531, "lr": 2.1395788349322256e-05, "epoch": 2.656716417910448, "percentage": 53.11, "elapsed_time": "0:54:08", "remaining_time": "0:47:48"} +{"current_steps": 625, "total_steps": 1175, "loss": 0.9719746112823486, "lr": 2.133648364045689e-05, "epoch": 2.6609808102345416, "percentage": 53.19, "elapsed_time": "0:54:13", "remaining_time": "0:47:43"} +{"current_steps": 626, "total_steps": 1175, "loss": 0.9729279279708862, "lr": 2.1277167125310996e-05, "epoch": 2.6652452025586353, "percentage": 53.28, "elapsed_time": "0:54:19", "remaining_time": "0:47:38"} +{"current_steps": 627, "total_steps": 1175, "loss": 0.9938591718673706, "lr": 2.1217839327877098e-05, "epoch": 2.6695095948827294, "percentage": 53.36, "elapsed_time": "0:54:24", "remaining_time": "0:47:32"} +{"current_steps": 628, "total_steps": 1175, "loss": 1.0283288955688477, "lr": 2.1158500772247352e-05, "epoch": 2.673773987206823, "percentage": 53.45, "elapsed_time": "0:54:29", "remaining_time": "0:47:27"} +{"current_steps": 629, "total_steps": 1175, "loss": 0.996048092842102, "lr": 2.1099151982608985e-05, "epoch": 2.678038379530917, "percentage": 53.53, "elapsed_time": "0:54:34", "remaining_time": "0:47:22"} +{"current_steps": 630, "total_steps": 1175, "loss": 1.012598991394043, "lr": 2.1039793483239607e-05, "epoch": 2.6823027718550105, "percentage": 53.62, "elapsed_time": "0:54:39", "remaining_time": "0:47:17"} +{"current_steps": 631, "total_steps": 1175, "loss": 0.9994162321090698, "lr": 2.0980425798502616e-05, "epoch": 2.6865671641791042, "percentage": 53.7, "elapsed_time": "0:54:44", "remaining_time": "0:47:11"} +{"current_steps": 632, "total_steps": 1175, "loss": 0.9778115153312683, "lr": 2.092104945284255e-05, "epoch": 2.6908315565031984, "percentage": 53.79, "elapsed_time": "0:54:49", "remaining_time": "0:47:06"} +{"current_steps": 633, "total_steps": 1175, "loss": 1.0162835121154785, "lr": 2.0861664970780434e-05, "epoch": 2.695095948827292, "percentage": 53.87, "elapsed_time": "0:54:55", "remaining_time": "0:47:01"} +{"current_steps": 634, "total_steps": 1175, "loss": 1.0076497793197632, "lr": 2.08022728769092e-05, "epoch": 2.699360341151386, "percentage": 53.96, "elapsed_time": "0:55:00", "remaining_time": "0:46:56"} +{"current_steps": 635, "total_steps": 1175, "loss": 0.9848247766494751, "lr": 2.0742873695889005e-05, "epoch": 2.70362473347548, "percentage": 54.04, "elapsed_time": "0:55:05", "remaining_time": "0:46:50"} +{"current_steps": 636, "total_steps": 1175, "loss": 1.0044206380844116, "lr": 2.0683467952442626e-05, "epoch": 2.7078891257995736, "percentage": 54.13, "elapsed_time": "0:55:10", "remaining_time": "0:46:45"} +{"current_steps": 637, "total_steps": 1175, "loss": 1.0012754201889038, "lr": 2.0624056171350785e-05, "epoch": 2.7121535181236673, "percentage": 54.21, "elapsed_time": "0:55:15", "remaining_time": "0:46:40"} +{"current_steps": 638, "total_steps": 1175, "loss": 1.0156222581863403, "lr": 2.0564638877447566e-05, "epoch": 2.716417910447761, "percentage": 54.3, "elapsed_time": "0:55:20", "remaining_time": "0:46:35"} +{"current_steps": 639, "total_steps": 1175, "loss": 1.0245939493179321, "lr": 2.0505216595615742e-05, "epoch": 2.7206823027718547, "percentage": 54.38, "elapsed_time": "0:55:25", "remaining_time": "0:46:29"} +{"current_steps": 640, "total_steps": 1175, "loss": 1.0242359638214111, "lr": 2.044578985078215e-05, "epoch": 2.724946695095949, "percentage": 54.47, "elapsed_time": "0:55:31", "remaining_time": "0:46:24"} +{"current_steps": 641, "total_steps": 1175, "loss": 0.9643347263336182, "lr": 2.0386359167913046e-05, "epoch": 2.7292110874200426, "percentage": 54.55, "elapsed_time": "0:55:36", "remaining_time": "0:46:19"} +{"current_steps": 642, "total_steps": 1175, "loss": 0.9665999412536621, "lr": 2.0326925072009485e-05, "epoch": 2.7334754797441363, "percentage": 54.64, "elapsed_time": "0:55:41", "remaining_time": "0:46:14"} +{"current_steps": 643, "total_steps": 1175, "loss": 1.0037893056869507, "lr": 2.0267488088102657e-05, "epoch": 2.7377398720682304, "percentage": 54.72, "elapsed_time": "0:55:46", "remaining_time": "0:46:08"} +{"current_steps": 644, "total_steps": 1175, "loss": 0.9691751003265381, "lr": 2.0208048741249288e-05, "epoch": 2.742004264392324, "percentage": 54.81, "elapsed_time": "0:55:51", "remaining_time": "0:46:03"} +{"current_steps": 645, "total_steps": 1175, "loss": 1.047579288482666, "lr": 2.014860755652695e-05, "epoch": 2.746268656716418, "percentage": 54.89, "elapsed_time": "0:55:56", "remaining_time": "0:45:58"} +{"current_steps": 646, "total_steps": 1175, "loss": 1.0120222568511963, "lr": 2.0089165059029477e-05, "epoch": 2.750533049040512, "percentage": 54.98, "elapsed_time": "0:56:01", "remaining_time": "0:45:52"} +{"current_steps": 647, "total_steps": 1175, "loss": 0.9922143220901489, "lr": 2.0029721773862277e-05, "epoch": 2.7547974413646057, "percentage": 55.06, "elapsed_time": "0:56:06", "remaining_time": "0:45:47"} +{"current_steps": 648, "total_steps": 1175, "loss": 0.9913243055343628, "lr": 1.997027822613773e-05, "epoch": 2.7590618336886994, "percentage": 55.15, "elapsed_time": "0:56:12", "remaining_time": "0:45:42"} +{"current_steps": 649, "total_steps": 1175, "loss": 1.0141838788986206, "lr": 1.9910834940970533e-05, "epoch": 2.763326226012793, "percentage": 55.23, "elapsed_time": "0:56:17", "remaining_time": "0:45:37"} +{"current_steps": 650, "total_steps": 1175, "loss": 1.024746298789978, "lr": 1.985139244347305e-05, "epoch": 2.767590618336887, "percentage": 55.32, "elapsed_time": "0:56:22", "remaining_time": "0:45:31"} +{"current_steps": 651, "total_steps": 1175, "loss": 1.0240997076034546, "lr": 1.979195125875072e-05, "epoch": 2.771855010660981, "percentage": 55.4, "elapsed_time": "0:56:27", "remaining_time": "0:45:26"} +{"current_steps": 652, "total_steps": 1175, "loss": 0.9887863397598267, "lr": 1.9732511911897353e-05, "epoch": 2.7761194029850746, "percentage": 55.49, "elapsed_time": "0:56:32", "remaining_time": "0:45:21"} +{"current_steps": 653, "total_steps": 1175, "loss": 0.9766459465026855, "lr": 1.9673074927990525e-05, "epoch": 2.7803837953091683, "percentage": 55.57, "elapsed_time": "0:56:37", "remaining_time": "0:45:16"} +{"current_steps": 654, "total_steps": 1175, "loss": 1.017755150794983, "lr": 1.9613640832086957e-05, "epoch": 2.7846481876332625, "percentage": 55.66, "elapsed_time": "0:56:42", "remaining_time": "0:45:10"} +{"current_steps": 655, "total_steps": 1175, "loss": 0.9604583978652954, "lr": 1.9554210149217855e-05, "epoch": 2.788912579957356, "percentage": 55.74, "elapsed_time": "0:56:47", "remaining_time": "0:45:05"} +{"current_steps": 656, "total_steps": 1175, "loss": 0.9861606955528259, "lr": 1.9494783404384265e-05, "epoch": 2.79317697228145, "percentage": 55.83, "elapsed_time": "0:56:53", "remaining_time": "0:45:00"} +{"current_steps": 657, "total_steps": 1175, "loss": 1.0003072023391724, "lr": 1.9435361122552437e-05, "epoch": 2.7974413646055436, "percentage": 55.91, "elapsed_time": "0:56:58", "remaining_time": "0:44:54"} +{"current_steps": 658, "total_steps": 1175, "loss": 1.0127203464508057, "lr": 1.9375943828649215e-05, "epoch": 2.8017057569296373, "percentage": 56.0, "elapsed_time": "0:57:03", "remaining_time": "0:44:49"} +{"current_steps": 659, "total_steps": 1175, "loss": 0.9938777089118958, "lr": 1.9316532047557378e-05, "epoch": 2.8059701492537314, "percentage": 56.09, "elapsed_time": "0:57:08", "remaining_time": "0:44:44"} +{"current_steps": 660, "total_steps": 1175, "loss": 0.98386549949646, "lr": 1.9257126304110998e-05, "epoch": 2.810234541577825, "percentage": 56.17, "elapsed_time": "0:57:13", "remaining_time": "0:44:39"} +{"current_steps": 661, "total_steps": 1175, "loss": 1.0038477182388306, "lr": 1.919772712309081e-05, "epoch": 2.814498933901919, "percentage": 56.26, "elapsed_time": "0:57:18", "remaining_time": "0:44:33"} +{"current_steps": 662, "total_steps": 1175, "loss": 0.9993883371353149, "lr": 1.9138335029219572e-05, "epoch": 2.818763326226013, "percentage": 56.34, "elapsed_time": "0:57:23", "remaining_time": "0:44:28"} +{"current_steps": 663, "total_steps": 1175, "loss": 1.0459842681884766, "lr": 1.9078950547157458e-05, "epoch": 2.8230277185501067, "percentage": 56.43, "elapsed_time": "0:57:28", "remaining_time": "0:44:23"} +{"current_steps": 664, "total_steps": 1175, "loss": 0.9924187064170837, "lr": 1.9019574201497387e-05, "epoch": 2.8272921108742004, "percentage": 56.51, "elapsed_time": "0:57:34", "remaining_time": "0:44:18"} +{"current_steps": 665, "total_steps": 1175, "loss": 1.0398309230804443, "lr": 1.8960206516760396e-05, "epoch": 2.831556503198294, "percentage": 56.6, "elapsed_time": "0:57:39", "remaining_time": "0:44:12"} +{"current_steps": 666, "total_steps": 1175, "loss": 0.9553192853927612, "lr": 1.890084801739102e-05, "epoch": 2.835820895522388, "percentage": 56.68, "elapsed_time": "0:57:44", "remaining_time": "0:44:07"} +{"current_steps": 667, "total_steps": 1175, "loss": 0.9848713874816895, "lr": 1.884149922775265e-05, "epoch": 2.840085287846482, "percentage": 56.77, "elapsed_time": "0:57:49", "remaining_time": "0:44:02"} +{"current_steps": 668, "total_steps": 1175, "loss": 1.0005512237548828, "lr": 1.878216067212291e-05, "epoch": 2.8443496801705757, "percentage": 56.85, "elapsed_time": "0:57:54", "remaining_time": "0:43:57"} +{"current_steps": 669, "total_steps": 1175, "loss": 1.0311071872711182, "lr": 1.8722832874689007e-05, "epoch": 2.8486140724946694, "percentage": 56.94, "elapsed_time": "0:57:59", "remaining_time": "0:43:51"} +{"current_steps": 670, "total_steps": 1175, "loss": 0.9968549013137817, "lr": 1.8663516359543123e-05, "epoch": 2.8528784648187635, "percentage": 57.02, "elapsed_time": "0:58:04", "remaining_time": "0:43:46"} +{"current_steps": 671, "total_steps": 1175, "loss": 1.0099412202835083, "lr": 1.860421165067775e-05, "epoch": 2.857142857142857, "percentage": 57.11, "elapsed_time": "0:58:10", "remaining_time": "0:43:41"} +{"current_steps": 672, "total_steps": 1175, "loss": 0.9846042394638062, "lr": 1.8544919271981125e-05, "epoch": 2.861407249466951, "percentage": 57.19, "elapsed_time": "0:58:15", "remaining_time": "0:43:36"} +{"current_steps": 673, "total_steps": 1175, "loss": 1.005096435546875, "lr": 1.8485639747232535e-05, "epoch": 2.8656716417910446, "percentage": 57.28, "elapsed_time": "0:58:20", "remaining_time": "0:43:31"} +{"current_steps": 674, "total_steps": 1175, "loss": 1.0177894830703735, "lr": 1.8426373600097723e-05, "epoch": 2.8699360341151388, "percentage": 57.36, "elapsed_time": "0:58:25", "remaining_time": "0:43:26"} +{"current_steps": 675, "total_steps": 1175, "loss": 1.002445101737976, "lr": 1.836712135412424e-05, "epoch": 2.8742004264392325, "percentage": 57.45, "elapsed_time": "0:58:31", "remaining_time": "0:43:20"} +{"current_steps": 676, "total_steps": 1175, "loss": 1.0102388858795166, "lr": 1.8307883532736878e-05, "epoch": 2.878464818763326, "percentage": 57.53, "elapsed_time": "0:58:36", "remaining_time": "0:43:15"} +{"current_steps": 677, "total_steps": 1175, "loss": 0.9696129560470581, "lr": 1.8248660659232964e-05, "epoch": 2.88272921108742, "percentage": 57.62, "elapsed_time": "0:58:41", "remaining_time": "0:43:10"} +{"current_steps": 678, "total_steps": 1175, "loss": 1.013519525527954, "lr": 1.8189453256777798e-05, "epoch": 2.886993603411514, "percentage": 57.7, "elapsed_time": "0:58:46", "remaining_time": "0:43:05"} +{"current_steps": 679, "total_steps": 1175, "loss": 0.9844130277633667, "lr": 1.8130261848399996e-05, "epoch": 2.8912579957356077, "percentage": 57.79, "elapsed_time": "0:58:51", "remaining_time": "0:42:59"} +{"current_steps": 680, "total_steps": 1175, "loss": 0.956390380859375, "lr": 1.8071086956986916e-05, "epoch": 2.8955223880597014, "percentage": 57.87, "elapsed_time": "0:58:56", "remaining_time": "0:42:54"} +{"current_steps": 681, "total_steps": 1175, "loss": 0.9912029504776001, "lr": 1.8011929105279967e-05, "epoch": 2.8997867803837956, "percentage": 57.96, "elapsed_time": "0:59:01", "remaining_time": "0:42:49"} +{"current_steps": 682, "total_steps": 1175, "loss": 0.9531004428863525, "lr": 1.795278881587007e-05, "epoch": 2.9040511727078893, "percentage": 58.04, "elapsed_time": "0:59:07", "remaining_time": "0:42:44"} +{"current_steps": 683, "total_steps": 1175, "loss": 1.0013391971588135, "lr": 1.7893666611192962e-05, "epoch": 2.908315565031983, "percentage": 58.13, "elapsed_time": "0:59:12", "remaining_time": "0:42:38"} +{"current_steps": 684, "total_steps": 1175, "loss": 1.032405972480774, "lr": 1.783456301352467e-05, "epoch": 2.9125799573560767, "percentage": 58.21, "elapsed_time": "0:59:17", "remaining_time": "0:42:33"} +{"current_steps": 685, "total_steps": 1175, "loss": 0.9582983255386353, "lr": 1.7775478544976813e-05, "epoch": 2.9168443496801704, "percentage": 58.3, "elapsed_time": "0:59:22", "remaining_time": "0:42:28"} +{"current_steps": 686, "total_steps": 1175, "loss": 1.0138694047927856, "lr": 1.7716413727492035e-05, "epoch": 2.9211087420042645, "percentage": 58.38, "elapsed_time": "0:59:27", "remaining_time": "0:42:23"} +{"current_steps": 687, "total_steps": 1175, "loss": 1.0201051235198975, "lr": 1.7657369082839392e-05, "epoch": 2.925373134328358, "percentage": 58.47, "elapsed_time": "0:59:32", "remaining_time": "0:42:17"} +{"current_steps": 688, "total_steps": 1175, "loss": 1.0258854627609253, "lr": 1.7598345132609747e-05, "epoch": 2.929637526652452, "percentage": 58.55, "elapsed_time": "0:59:38", "remaining_time": "0:42:12"} +{"current_steps": 689, "total_steps": 1175, "loss": 1.002000331878662, "lr": 1.7539342398211132e-05, "epoch": 2.933901918976546, "percentage": 58.64, "elapsed_time": "0:59:43", "remaining_time": "0:42:07"} +{"current_steps": 690, "total_steps": 1175, "loss": 1.014590859413147, "lr": 1.748036140086416e-05, "epoch": 2.9381663113006398, "percentage": 58.72, "elapsed_time": "0:59:48", "remaining_time": "0:42:02"} +{"current_steps": 691, "total_steps": 1175, "loss": 0.9970508813858032, "lr": 1.742140266159744e-05, "epoch": 2.9424307036247335, "percentage": 58.81, "elapsed_time": "0:59:53", "remaining_time": "0:41:57"} +{"current_steps": 692, "total_steps": 1175, "loss": 1.0036523342132568, "lr": 1.7362466701242943e-05, "epoch": 2.946695095948827, "percentage": 58.89, "elapsed_time": "0:59:58", "remaining_time": "0:41:51"} +{"current_steps": 693, "total_steps": 1175, "loss": 1.000980019569397, "lr": 1.7303554040431426e-05, "epoch": 2.950959488272921, "percentage": 58.98, "elapsed_time": "1:00:03", "remaining_time": "0:41:46"} +{"current_steps": 694, "total_steps": 1175, "loss": 0.9945222735404968, "lr": 1.7244665199587812e-05, "epoch": 2.955223880597015, "percentage": 59.06, "elapsed_time": "1:00:09", "remaining_time": "0:41:41"} +{"current_steps": 695, "total_steps": 1175, "loss": 1.0258584022521973, "lr": 1.7185800698926594e-05, "epoch": 2.9594882729211087, "percentage": 59.15, "elapsed_time": "1:00:14", "remaining_time": "0:41:36"} +{"current_steps": 696, "total_steps": 1175, "loss": 0.9935309886932373, "lr": 1.7126961058447276e-05, "epoch": 2.9637526652452024, "percentage": 59.23, "elapsed_time": "1:00:19", "remaining_time": "0:41:31"} +{"current_steps": 697, "total_steps": 1175, "loss": 1.0331902503967285, "lr": 1.706814679792973e-05, "epoch": 2.9680170575692966, "percentage": 59.32, "elapsed_time": "1:00:24", "remaining_time": "0:41:25"} +{"current_steps": 698, "total_steps": 1175, "loss": 0.984321653842926, "lr": 1.7009358436929632e-05, "epoch": 2.9722814498933903, "percentage": 59.4, "elapsed_time": "1:00:30", "remaining_time": "0:41:20"} +{"current_steps": 699, "total_steps": 1175, "loss": 1.0147403478622437, "lr": 1.6950596494773855e-05, "epoch": 2.976545842217484, "percentage": 59.49, "elapsed_time": "1:00:35", "remaining_time": "0:41:15"} +{"current_steps": 700, "total_steps": 1175, "loss": 1.0429158210754395, "lr": 1.6891861490555906e-05, "epoch": 2.9808102345415777, "percentage": 59.57, "elapsed_time": "1:00:41", "remaining_time": "0:41:10"} +{"current_steps": 701, "total_steps": 1175, "loss": 0.9809648990631104, "lr": 1.683315394313132e-05, "epoch": 2.9850746268656714, "percentage": 59.66, "elapsed_time": "1:00:46", "remaining_time": "0:41:05"} +{"current_steps": 702, "total_steps": 1175, "loss": 0.9754581451416016, "lr": 1.677447437111309e-05, "epoch": 2.9893390191897655, "percentage": 59.74, "elapsed_time": "1:00:52", "remaining_time": "0:41:00"} +{"current_steps": 703, "total_steps": 1175, "loss": 1.025207281112671, "lr": 1.671582329286707e-05, "epoch": 2.9936034115138592, "percentage": 59.83, "elapsed_time": "1:00:57", "remaining_time": "0:40:55"} +{"current_steps": 704, "total_steps": 1175, "loss": 1.0176316499710083, "lr": 1.66572012265074e-05, "epoch": 2.997867803837953, "percentage": 59.91, "elapsed_time": "1:01:02", "remaining_time": "0:40:50"} +{"current_steps": 705, "total_steps": 1175, "loss": 0.9181491136550903, "lr": 1.6598608689891953e-05, "epoch": 3.0, "percentage": 60.0, "elapsed_time": "1:01:05", "remaining_time": "0:40:43"} +{"current_steps": 706, "total_steps": 1175, "loss": 0.9819753170013428, "lr": 1.654004620061773e-05, "epoch": 3.0042643923240937, "percentage": 60.09, "elapsed_time": "1:01:10", "remaining_time": "0:40:38"} +{"current_steps": 707, "total_steps": 1175, "loss": 0.956446647644043, "lr": 1.6481514276016297e-05, "epoch": 3.008528784648188, "percentage": 60.17, "elapsed_time": "1:01:16", "remaining_time": "0:40:33"} +{"current_steps": 708, "total_steps": 1175, "loss": 0.9884039163589478, "lr": 1.6423013433149207e-05, "epoch": 3.0127931769722816, "percentage": 60.26, "elapsed_time": "1:01:21", "remaining_time": "0:40:28"} +{"current_steps": 709, "total_steps": 1175, "loss": 0.9924356937408447, "lr": 1.636454418880347e-05, "epoch": 3.0170575692963753, "percentage": 60.34, "elapsed_time": "1:01:27", "remaining_time": "0:40:23"} +{"current_steps": 710, "total_steps": 1175, "loss": 0.9875960350036621, "lr": 1.630610705948693e-05, "epoch": 3.021321961620469, "percentage": 60.43, "elapsed_time": "1:01:33", "remaining_time": "0:40:19"} +{"current_steps": 711, "total_steps": 1175, "loss": 1.0232791900634766, "lr": 1.6247702561423753e-05, "epoch": 3.025586353944563, "percentage": 60.51, "elapsed_time": "1:01:39", "remaining_time": "0:40:14"} +{"current_steps": 712, "total_steps": 1175, "loss": 0.9841296076774597, "lr": 1.6189331210549828e-05, "epoch": 3.029850746268657, "percentage": 60.6, "elapsed_time": "1:01:44", "remaining_time": "0:40:08"} +{"current_steps": 713, "total_steps": 1175, "loss": 0.9826107025146484, "lr": 1.613099352250825e-05, "epoch": 3.0341151385927505, "percentage": 60.68, "elapsed_time": "1:01:49", "remaining_time": "0:40:03"} +{"current_steps": 714, "total_steps": 1175, "loss": 0.9862861633300781, "lr": 1.6072690012644717e-05, "epoch": 3.038379530916844, "percentage": 60.77, "elapsed_time": "1:01:55", "remaining_time": "0:39:58"} +{"current_steps": 715, "total_steps": 1175, "loss": 0.9499567747116089, "lr": 1.6014421196003022e-05, "epoch": 3.0426439232409384, "percentage": 60.85, "elapsed_time": "1:02:01", "remaining_time": "0:39:53"} +{"current_steps": 716, "total_steps": 1175, "loss": 0.9644232988357544, "lr": 1.5956187587320468e-05, "epoch": 3.046908315565032, "percentage": 60.94, "elapsed_time": "1:02:06", "remaining_time": "0:39:49"} +{"current_steps": 717, "total_steps": 1175, "loss": 0.9658553600311279, "lr": 1.5897989701023355e-05, "epoch": 3.0511727078891258, "percentage": 61.02, "elapsed_time": "1:02:12", "remaining_time": "0:39:44"} +{"current_steps": 718, "total_steps": 1175, "loss": 0.979788064956665, "lr": 1.58398280512224e-05, "epoch": 3.0554371002132195, "percentage": 61.11, "elapsed_time": "1:02:18", "remaining_time": "0:39:39"} +{"current_steps": 719, "total_steps": 1175, "loss": 0.9843800067901611, "lr": 1.5781703151708215e-05, "epoch": 3.0597014925373136, "percentage": 61.19, "elapsed_time": "1:02:23", "remaining_time": "0:39:34"} +{"current_steps": 720, "total_steps": 1175, "loss": 0.9665867686271667, "lr": 1.5723615515946773e-05, "epoch": 3.0639658848614073, "percentage": 61.28, "elapsed_time": "1:02:28", "remaining_time": "0:39:29"} +{"current_steps": 721, "total_steps": 1175, "loss": 0.9476820230484009, "lr": 1.5665565657074874e-05, "epoch": 3.068230277185501, "percentage": 61.36, "elapsed_time": "1:02:34", "remaining_time": "0:39:24"} +{"current_steps": 722, "total_steps": 1175, "loss": 0.9628287553787231, "lr": 1.560755408789558e-05, "epoch": 3.0724946695095947, "percentage": 61.45, "elapsed_time": "1:02:39", "remaining_time": "0:39:18"} +{"current_steps": 723, "total_steps": 1175, "loss": 1.0098230838775635, "lr": 1.5549581320873715e-05, "epoch": 3.076759061833689, "percentage": 61.53, "elapsed_time": "1:02:44", "remaining_time": "0:39:13"} +{"current_steps": 724, "total_steps": 1175, "loss": 1.0204254388809204, "lr": 1.5491647868131343e-05, "epoch": 3.0810234541577826, "percentage": 61.62, "elapsed_time": "1:02:50", "remaining_time": "0:39:08"} +{"current_steps": 725, "total_steps": 1175, "loss": 0.9512300491333008, "lr": 1.5433754241443223e-05, "epoch": 3.0852878464818763, "percentage": 61.7, "elapsed_time": "1:02:55", "remaining_time": "0:39:03"} +{"current_steps": 726, "total_steps": 1175, "loss": 1.0124759674072266, "lr": 1.53759009522323e-05, "epoch": 3.08955223880597, "percentage": 61.79, "elapsed_time": "1:03:00", "remaining_time": "0:38:58"} +{"current_steps": 727, "total_steps": 1175, "loss": 1.0002617835998535, "lr": 1.5318088511565185e-05, "epoch": 3.093816631130064, "percentage": 61.87, "elapsed_time": "1:03:05", "remaining_time": "0:38:52"} +{"current_steps": 728, "total_steps": 1175, "loss": 0.9853769540786743, "lr": 1.5260317430147627e-05, "epoch": 3.098081023454158, "percentage": 61.96, "elapsed_time": "1:03:10", "remaining_time": "0:38:47"} +{"current_steps": 729, "total_steps": 1175, "loss": 1.0069472789764404, "lr": 1.5202588218320024e-05, "epoch": 3.1023454157782515, "percentage": 62.04, "elapsed_time": "1:03:15", "remaining_time": "0:38:42"} +{"current_steps": 730, "total_steps": 1175, "loss": 0.988615870475769, "lr": 1.5144901386052924e-05, "epoch": 3.106609808102345, "percentage": 62.13, "elapsed_time": "1:03:21", "remaining_time": "0:38:37"} +{"current_steps": 731, "total_steps": 1175, "loss": 0.9952294230461121, "lr": 1.5087257442942467e-05, "epoch": 3.1108742004264394, "percentage": 62.21, "elapsed_time": "1:03:26", "remaining_time": "0:38:31"} +{"current_steps": 732, "total_steps": 1175, "loss": 0.9941107034683228, "lr": 1.502965689820593e-05, "epoch": 3.115138592750533, "percentage": 62.3, "elapsed_time": "1:03:31", "remaining_time": "0:38:26"} +{"current_steps": 733, "total_steps": 1175, "loss": 0.9742693901062012, "lr": 1.4972100260677222e-05, "epoch": 3.1194029850746268, "percentage": 62.38, "elapsed_time": "1:03:37", "remaining_time": "0:38:21"} +{"current_steps": 734, "total_steps": 1175, "loss": 0.953710675239563, "lr": 1.4914588038802383e-05, "epoch": 3.1236673773987205, "percentage": 62.47, "elapsed_time": "1:03:42", "remaining_time": "0:38:16"} +{"current_steps": 735, "total_steps": 1175, "loss": 0.9636905193328857, "lr": 1.4857120740635084e-05, "epoch": 3.1279317697228146, "percentage": 62.55, "elapsed_time": "1:03:47", "remaining_time": "0:38:11"} +{"current_steps": 736, "total_steps": 1175, "loss": 0.970219612121582, "lr": 1.4799698873832153e-05, "epoch": 3.1321961620469083, "percentage": 62.64, "elapsed_time": "1:03:52", "remaining_time": "0:38:05"} +{"current_steps": 737, "total_steps": 1175, "loss": 0.9687181711196899, "lr": 1.4742322945649073e-05, "epoch": 3.136460554371002, "percentage": 62.72, "elapsed_time": "1:03:58", "remaining_time": "0:38:00"} +{"current_steps": 738, "total_steps": 1175, "loss": 1.0161409378051758, "lr": 1.4684993462935532e-05, "epoch": 3.140724946695096, "percentage": 62.81, "elapsed_time": "1:04:03", "remaining_time": "0:37:55"} +{"current_steps": 739, "total_steps": 1175, "loss": 0.9935801029205322, "lr": 1.462771093213092e-05, "epoch": 3.14498933901919, "percentage": 62.89, "elapsed_time": "1:04:08", "remaining_time": "0:37:50"} +{"current_steps": 740, "total_steps": 1175, "loss": 1.0185129642486572, "lr": 1.4570475859259856e-05, "epoch": 3.1492537313432836, "percentage": 62.98, "elapsed_time": "1:04:13", "remaining_time": "0:37:45"} +{"current_steps": 741, "total_steps": 1175, "loss": 0.9626775979995728, "lr": 1.4513288749927714e-05, "epoch": 3.1535181236673773, "percentage": 63.06, "elapsed_time": "1:04:18", "remaining_time": "0:37:40"} +{"current_steps": 742, "total_steps": 1175, "loss": 1.010709285736084, "lr": 1.4456150109316192e-05, "epoch": 3.1577825159914714, "percentage": 63.15, "elapsed_time": "1:04:23", "remaining_time": "0:37:34"} +{"current_steps": 743, "total_steps": 1175, "loss": 1.0011711120605469, "lr": 1.4399060442178798e-05, "epoch": 3.162046908315565, "percentage": 63.23, "elapsed_time": "1:04:28", "remaining_time": "0:37:29"} +{"current_steps": 744, "total_steps": 1175, "loss": 0.9482408165931702, "lr": 1.4342020252836437e-05, "epoch": 3.166311300639659, "percentage": 63.32, "elapsed_time": "1:04:34", "remaining_time": "0:37:24"} +{"current_steps": 745, "total_steps": 1175, "loss": 0.9633879661560059, "lr": 1.4285030045172913e-05, "epoch": 3.1705756929637525, "percentage": 63.4, "elapsed_time": "1:04:39", "remaining_time": "0:37:18"} +{"current_steps": 746, "total_steps": 1175, "loss": 0.9744983315467834, "lr": 1.422809032263052e-05, "epoch": 3.1748400852878467, "percentage": 63.49, "elapsed_time": "1:04:44", "remaining_time": "0:37:13"} +{"current_steps": 747, "total_steps": 1175, "loss": 0.951709508895874, "lr": 1.4171201588205566e-05, "epoch": 3.1791044776119404, "percentage": 63.57, "elapsed_time": "1:04:49", "remaining_time": "0:37:08"} +{"current_steps": 748, "total_steps": 1175, "loss": 0.9665570259094238, "lr": 1.4114364344443935e-05, "epoch": 3.183368869936034, "percentage": 63.66, "elapsed_time": "1:04:54", "remaining_time": "0:37:03"} +{"current_steps": 749, "total_steps": 1175, "loss": 0.9781259894371033, "lr": 1.4057579093436653e-05, "epoch": 3.1876332622601278, "percentage": 63.74, "elapsed_time": "1:04:59", "remaining_time": "0:36:58"} +{"current_steps": 750, "total_steps": 1175, "loss": 0.9583557844161987, "lr": 1.400084633681546e-05, "epoch": 3.191897654584222, "percentage": 63.83, "elapsed_time": "1:05:04", "remaining_time": "0:36:52"} +{"current_steps": 751, "total_steps": 1175, "loss": 1.0065157413482666, "lr": 1.3944166575748355e-05, "epoch": 3.1961620469083156, "percentage": 63.91, "elapsed_time": "1:05:10", "remaining_time": "0:36:47"} +{"current_steps": 752, "total_steps": 1175, "loss": 0.9470630288124084, "lr": 1.3887540310935187e-05, "epoch": 3.2004264392324093, "percentage": 64.0, "elapsed_time": "1:05:15", "remaining_time": "0:36:42"} +{"current_steps": 753, "total_steps": 1175, "loss": 0.9945131540298462, "lr": 1.3830968042603226e-05, "epoch": 3.204690831556503, "percentage": 64.09, "elapsed_time": "1:05:20", "remaining_time": "0:36:37"} +{"current_steps": 754, "total_steps": 1175, "loss": 0.9359656572341919, "lr": 1.3774450270502762e-05, "epoch": 3.208955223880597, "percentage": 64.17, "elapsed_time": "1:05:25", "remaining_time": "0:36:31"} +{"current_steps": 755, "total_steps": 1175, "loss": 0.9581259489059448, "lr": 1.3717987493902656e-05, "epoch": 3.213219616204691, "percentage": 64.26, "elapsed_time": "1:05:30", "remaining_time": "0:36:26"} +{"current_steps": 756, "total_steps": 1175, "loss": 1.0013266801834106, "lr": 1.3661580211585947e-05, "epoch": 3.2174840085287846, "percentage": 64.34, "elapsed_time": "1:05:35", "remaining_time": "0:36:21"} +{"current_steps": 757, "total_steps": 1175, "loss": 0.9701790809631348, "lr": 1.3605228921845457e-05, "epoch": 3.2217484008528783, "percentage": 64.43, "elapsed_time": "1:05:40", "remaining_time": "0:36:16"} +{"current_steps": 758, "total_steps": 1175, "loss": 0.992609977722168, "lr": 1.3548934122479373e-05, "epoch": 3.2260127931769724, "percentage": 64.51, "elapsed_time": "1:05:45", "remaining_time": "0:36:10"} +{"current_steps": 759, "total_steps": 1175, "loss": 0.9707850813865662, "lr": 1.349269631078686e-05, "epoch": 3.230277185501066, "percentage": 64.6, "elapsed_time": "1:05:51", "remaining_time": "0:36:05"} +{"current_steps": 760, "total_steps": 1175, "loss": 1.0266224145889282, "lr": 1.3436515983563659e-05, "epoch": 3.23454157782516, "percentage": 64.68, "elapsed_time": "1:05:56", "remaining_time": "0:36:00"} +{"current_steps": 761, "total_steps": 1175, "loss": 0.9265196323394775, "lr": 1.3380393637097692e-05, "epoch": 3.2388059701492535, "percentage": 64.77, "elapsed_time": "1:06:01", "remaining_time": "0:35:55"} +{"current_steps": 762, "total_steps": 1175, "loss": 0.9605081081390381, "lr": 1.3324329767164708e-05, "epoch": 3.2430703624733477, "percentage": 64.85, "elapsed_time": "1:06:06", "remaining_time": "0:35:49"} +{"current_steps": 763, "total_steps": 1175, "loss": 0.9768404364585876, "lr": 1.3268324869023878e-05, "epoch": 3.2473347547974414, "percentage": 64.94, "elapsed_time": "1:06:11", "remaining_time": "0:35:44"} +{"current_steps": 764, "total_steps": 1175, "loss": 1.0088846683502197, "lr": 1.3212379437413421e-05, "epoch": 3.251599147121535, "percentage": 65.02, "elapsed_time": "1:06:16", "remaining_time": "0:35:39"} +{"current_steps": 765, "total_steps": 1175, "loss": 0.960427463054657, "lr": 1.3156493966546236e-05, "epoch": 3.2558635394456292, "percentage": 65.11, "elapsed_time": "1:06:22", "remaining_time": "0:35:34"} +{"current_steps": 766, "total_steps": 1175, "loss": 0.9802002310752869, "lr": 1.3100668950105534e-05, "epoch": 3.260127931769723, "percentage": 65.19, "elapsed_time": "1:06:27", "remaining_time": "0:35:28"} +{"current_steps": 767, "total_steps": 1175, "loss": 0.9168298244476318, "lr": 1.3044904881240507e-05, "epoch": 3.2643923240938166, "percentage": 65.28, "elapsed_time": "1:06:32", "remaining_time": "0:35:23"} +{"current_steps": 768, "total_steps": 1175, "loss": 0.9944812059402466, "lr": 1.2989202252561926e-05, "epoch": 3.2686567164179103, "percentage": 65.36, "elapsed_time": "1:06:37", "remaining_time": "0:35:18"} +{"current_steps": 769, "total_steps": 1175, "loss": 0.9888614416122437, "lr": 1.2933561556137806e-05, "epoch": 3.272921108742004, "percentage": 65.45, "elapsed_time": "1:06:42", "remaining_time": "0:35:13"} +{"current_steps": 770, "total_steps": 1175, "loss": 0.9520066976547241, "lr": 1.2877983283489062e-05, "epoch": 3.277185501066098, "percentage": 65.53, "elapsed_time": "1:06:47", "remaining_time": "0:35:08"} +{"current_steps": 771, "total_steps": 1175, "loss": 0.9979465007781982, "lr": 1.2822467925585186e-05, "epoch": 3.281449893390192, "percentage": 65.62, "elapsed_time": "1:06:53", "remaining_time": "0:35:02"} +{"current_steps": 772, "total_steps": 1175, "loss": 0.9394721388816833, "lr": 1.2767015972839879e-05, "epoch": 3.2857142857142856, "percentage": 65.7, "elapsed_time": "1:06:58", "remaining_time": "0:34:57"} +{"current_steps": 773, "total_steps": 1175, "loss": 0.975515604019165, "lr": 1.2711627915106728e-05, "epoch": 3.2899786780383797, "percentage": 65.79, "elapsed_time": "1:07:03", "remaining_time": "0:34:52"} +{"current_steps": 774, "total_steps": 1175, "loss": 0.9717892408370972, "lr": 1.2656304241674877e-05, "epoch": 3.2942430703624734, "percentage": 65.87, "elapsed_time": "1:07:08", "remaining_time": "0:34:47"} +{"current_steps": 775, "total_steps": 1175, "loss": 0.9691690802574158, "lr": 1.2601045441264734e-05, "epoch": 3.298507462686567, "percentage": 65.96, "elapsed_time": "1:07:13", "remaining_time": "0:34:41"} +{"current_steps": 776, "total_steps": 1175, "loss": 0.9814242124557495, "lr": 1.2545852002023599e-05, "epoch": 3.302771855010661, "percentage": 66.04, "elapsed_time": "1:07:18", "remaining_time": "0:34:36"} +{"current_steps": 777, "total_steps": 1175, "loss": 1.0499078035354614, "lr": 1.2490724411521406e-05, "epoch": 3.307036247334755, "percentage": 66.13, "elapsed_time": "1:07:24", "remaining_time": "0:34:31"} +{"current_steps": 778, "total_steps": 1175, "loss": 0.9678243398666382, "lr": 1.243566315674637e-05, "epoch": 3.3113006396588487, "percentage": 66.21, "elapsed_time": "1:07:29", "remaining_time": "0:34:26"} +{"current_steps": 779, "total_steps": 1175, "loss": 1.048391342163086, "lr": 1.238066872410073e-05, "epoch": 3.3155650319829424, "percentage": 66.3, "elapsed_time": "1:07:34", "remaining_time": "0:34:21"} +{"current_steps": 780, "total_steps": 1175, "loss": 0.9866071939468384, "lr": 1.2325741599396418e-05, "epoch": 3.319829424307036, "percentage": 66.38, "elapsed_time": "1:07:39", "remaining_time": "0:34:15"} +{"current_steps": 781, "total_steps": 1175, "loss": 0.928637683391571, "lr": 1.2270882267850765e-05, "epoch": 3.3240938166311302, "percentage": 66.47, "elapsed_time": "1:07:44", "remaining_time": "0:34:10"} +{"current_steps": 782, "total_steps": 1175, "loss": 0.9275143146514893, "lr": 1.2216091214082248e-05, "epoch": 3.328358208955224, "percentage": 66.55, "elapsed_time": "1:07:49", "remaining_time": "0:34:05"} +{"current_steps": 783, "total_steps": 1175, "loss": 0.9925769567489624, "lr": 1.2161368922106192e-05, "epoch": 3.3326226012793176, "percentage": 66.64, "elapsed_time": "1:07:54", "remaining_time": "0:34:00"} +{"current_steps": 784, "total_steps": 1175, "loss": 0.9968470931053162, "lr": 1.2106715875330475e-05, "epoch": 3.3368869936034113, "percentage": 66.72, "elapsed_time": "1:08:00", "remaining_time": "0:33:54"} +{"current_steps": 785, "total_steps": 1175, "loss": 1.0421117544174194, "lr": 1.2052132556551275e-05, "epoch": 3.3411513859275055, "percentage": 66.81, "elapsed_time": "1:08:05", "remaining_time": "0:33:49"} +{"current_steps": 786, "total_steps": 1175, "loss": 0.9955507516860962, "lr": 1.1997619447948814e-05, "epoch": 3.345415778251599, "percentage": 66.89, "elapsed_time": "1:08:10", "remaining_time": "0:33:44"} +{"current_steps": 787, "total_steps": 1175, "loss": 1.018493890762329, "lr": 1.1943177031083094e-05, "epoch": 3.349680170575693, "percentage": 66.98, "elapsed_time": "1:08:15", "remaining_time": "0:33:39"} +{"current_steps": 788, "total_steps": 1175, "loss": 1.013110876083374, "lr": 1.1888805786889621e-05, "epoch": 3.3539445628997866, "percentage": 67.06, "elapsed_time": "1:08:20", "remaining_time": "0:33:33"} +{"current_steps": 789, "total_steps": 1175, "loss": 0.9786880016326904, "lr": 1.183450619567518e-05, "epoch": 3.3582089552238807, "percentage": 67.15, "elapsed_time": "1:08:25", "remaining_time": "0:33:28"} +{"current_steps": 790, "total_steps": 1175, "loss": 1.0120458602905273, "lr": 1.1780278737113581e-05, "epoch": 3.3624733475479744, "percentage": 67.23, "elapsed_time": "1:08:30", "remaining_time": "0:33:23"} +{"current_steps": 791, "total_steps": 1175, "loss": 0.9977100491523743, "lr": 1.1726123890241439e-05, "epoch": 3.366737739872068, "percentage": 67.32, "elapsed_time": "1:08:35", "remaining_time": "0:33:18"} +{"current_steps": 792, "total_steps": 1175, "loss": 0.9835935235023499, "lr": 1.1672042133453925e-05, "epoch": 3.3710021321961623, "percentage": 67.4, "elapsed_time": "1:08:41", "remaining_time": "0:33:12"} +{"current_steps": 793, "total_steps": 1175, "loss": 0.9231183528900146, "lr": 1.1618033944500527e-05, "epoch": 3.375266524520256, "percentage": 67.49, "elapsed_time": "1:08:46", "remaining_time": "0:33:07"} +{"current_steps": 794, "total_steps": 1175, "loss": 0.9658839702606201, "lr": 1.1564099800480864e-05, "epoch": 3.3795309168443497, "percentage": 67.57, "elapsed_time": "1:08:51", "remaining_time": "0:33:02"} +{"current_steps": 795, "total_steps": 1175, "loss": 0.9784061312675476, "lr": 1.151024017784045e-05, "epoch": 3.3837953091684434, "percentage": 67.66, "elapsed_time": "1:08:56", "remaining_time": "0:32:57"} +{"current_steps": 796, "total_steps": 1175, "loss": 0.9620468616485596, "lr": 1.1456455552366488e-05, "epoch": 3.388059701492537, "percentage": 67.74, "elapsed_time": "1:09:01", "remaining_time": "0:32:51"} +{"current_steps": 797, "total_steps": 1175, "loss": 0.9715833067893982, "lr": 1.1402746399183671e-05, "epoch": 3.3923240938166312, "percentage": 67.83, "elapsed_time": "1:09:06", "remaining_time": "0:32:46"} +{"current_steps": 798, "total_steps": 1175, "loss": 0.979851484298706, "lr": 1.1349113192749986e-05, "epoch": 3.396588486140725, "percentage": 67.91, "elapsed_time": "1:09:11", "remaining_time": "0:32:41"} +{"current_steps": 799, "total_steps": 1175, "loss": 0.9970759749412537, "lr": 1.1295556406852488e-05, "epoch": 3.4008528784648187, "percentage": 68.0, "elapsed_time": "1:09:17", "remaining_time": "0:32:36"} +{"current_steps": 800, "total_steps": 1175, "loss": 0.9559547901153564, "lr": 1.1242076514603201e-05, "epoch": 3.405117270788913, "percentage": 68.09, "elapsed_time": "1:09:22", "remaining_time": "0:32:30"} +{"current_steps": 801, "total_steps": 1175, "loss": 0.9920517206192017, "lr": 1.1188673988434831e-05, "epoch": 3.4093816631130065, "percentage": 68.17, "elapsed_time": "1:09:27", "remaining_time": "0:32:25"} +{"current_steps": 802, "total_steps": 1175, "loss": 0.9662362933158875, "lr": 1.1135349300096667e-05, "epoch": 3.4136460554371, "percentage": 68.26, "elapsed_time": "1:09:32", "remaining_time": "0:32:20"} +{"current_steps": 803, "total_steps": 1175, "loss": 0.9824597835540771, "lr": 1.1082102920650397e-05, "epoch": 3.417910447761194, "percentage": 68.34, "elapsed_time": "1:09:37", "remaining_time": "0:32:15"} +{"current_steps": 804, "total_steps": 1175, "loss": 1.0019625425338745, "lr": 1.102893532046593e-05, "epoch": 3.4221748400852876, "percentage": 68.43, "elapsed_time": "1:09:42", "remaining_time": "0:32:10"} +{"current_steps": 805, "total_steps": 1175, "loss": 0.968468189239502, "lr": 1.0975846969217258e-05, "epoch": 3.4264392324093818, "percentage": 68.51, "elapsed_time": "1:09:47", "remaining_time": "0:32:04"} +{"current_steps": 806, "total_steps": 1175, "loss": 0.983420729637146, "lr": 1.092283833587829e-05, "epoch": 3.4307036247334755, "percentage": 68.6, "elapsed_time": "1:09:52", "remaining_time": "0:31:59"} +{"current_steps": 807, "total_steps": 1175, "loss": 0.9687063097953796, "lr": 1.086990988871873e-05, "epoch": 3.434968017057569, "percentage": 68.68, "elapsed_time": "1:09:58", "remaining_time": "0:31:54"} +{"current_steps": 808, "total_steps": 1175, "loss": 0.9997848272323608, "lr": 1.0817062095299929e-05, "epoch": 3.4392324093816633, "percentage": 68.77, "elapsed_time": "1:10:03", "remaining_time": "0:31:49"} +{"current_steps": 809, "total_steps": 1175, "loss": 0.9746481776237488, "lr": 1.0764295422470755e-05, "epoch": 3.443496801705757, "percentage": 68.85, "elapsed_time": "1:10:08", "remaining_time": "0:31:43"} +{"current_steps": 810, "total_steps": 1175, "loss": 0.994911253452301, "lr": 1.0711610336363477e-05, "epoch": 3.4477611940298507, "percentage": 68.94, "elapsed_time": "1:10:13", "remaining_time": "0:31:38"} +{"current_steps": 811, "total_steps": 1175, "loss": 0.9198431968688965, "lr": 1.065900730238961e-05, "epoch": 3.4520255863539444, "percentage": 69.02, "elapsed_time": "1:10:18", "remaining_time": "0:31:33"} +{"current_steps": 812, "total_steps": 1175, "loss": 0.985055148601532, "lr": 1.0606486785235879e-05, "epoch": 3.4562899786780386, "percentage": 69.11, "elapsed_time": "1:10:23", "remaining_time": "0:31:28"} +{"current_steps": 813, "total_steps": 1175, "loss": 1.0189073085784912, "lr": 1.0554049248860045e-05, "epoch": 3.4605543710021323, "percentage": 69.19, "elapsed_time": "1:10:28", "remaining_time": "0:31:23"} +{"current_steps": 814, "total_steps": 1175, "loss": 0.9842698574066162, "lr": 1.0501695156486819e-05, "epoch": 3.464818763326226, "percentage": 69.28, "elapsed_time": "1:10:34", "remaining_time": "0:31:17"} +{"current_steps": 815, "total_steps": 1175, "loss": 0.946979820728302, "lr": 1.0449424970603796e-05, "epoch": 3.4690831556503197, "percentage": 69.36, "elapsed_time": "1:10:39", "remaining_time": "0:31:12"} +{"current_steps": 816, "total_steps": 1175, "loss": 0.9637709259986877, "lr": 1.0397239152957356e-05, "epoch": 3.473347547974414, "percentage": 69.45, "elapsed_time": "1:10:44", "remaining_time": "0:31:07"} +{"current_steps": 817, "total_steps": 1175, "loss": 1.0322394371032715, "lr": 1.034513816454858e-05, "epoch": 3.4776119402985075, "percentage": 69.53, "elapsed_time": "1:10:49", "remaining_time": "0:31:02"} +{"current_steps": 818, "total_steps": 1175, "loss": 0.9560979604721069, "lr": 1.0293122465629186e-05, "epoch": 3.481876332622601, "percentage": 69.62, "elapsed_time": "1:10:54", "remaining_time": "0:30:56"} +{"current_steps": 819, "total_steps": 1175, "loss": 0.9867568016052246, "lr": 1.0241192515697432e-05, "epoch": 3.486140724946695, "percentage": 69.7, "elapsed_time": "1:10:59", "remaining_time": "0:30:51"} +{"current_steps": 820, "total_steps": 1175, "loss": 1.0202665328979492, "lr": 1.0189348773494135e-05, "epoch": 3.490405117270789, "percentage": 69.79, "elapsed_time": "1:11:05", "remaining_time": "0:30:46"} +{"current_steps": 821, "total_steps": 1175, "loss": 0.9820563197135925, "lr": 1.0137591696998514e-05, "epoch": 3.4946695095948828, "percentage": 69.87, "elapsed_time": "1:11:10", "remaining_time": "0:30:41"} +{"current_steps": 822, "total_steps": 1175, "loss": 0.9641842842102051, "lr": 1.0085921743424225e-05, "epoch": 3.4989339019189765, "percentage": 69.96, "elapsed_time": "1:11:15", "remaining_time": "0:30:36"} +{"current_steps": 823, "total_steps": 1175, "loss": 1.0143928527832031, "lr": 1.0034339369215288e-05, "epoch": 3.50319829424307, "percentage": 70.04, "elapsed_time": "1:11:20", "remaining_time": "0:30:30"} +{"current_steps": 824, "total_steps": 1175, "loss": 0.9518420696258545, "lr": 9.982845030042068e-06, "epoch": 3.5074626865671643, "percentage": 70.13, "elapsed_time": "1:11:25", "remaining_time": "0:30:25"} +{"current_steps": 825, "total_steps": 1175, "loss": 1.0005815029144287, "lr": 9.931439180797237e-06, "epoch": 3.511727078891258, "percentage": 70.21, "elapsed_time": "1:11:30", "remaining_time": "0:30:20"} +{"current_steps": 826, "total_steps": 1175, "loss": 0.9343856573104858, "lr": 9.880122275591752e-06, "epoch": 3.5159914712153517, "percentage": 70.3, "elapsed_time": "1:11:35", "remaining_time": "0:30:15"} +{"current_steps": 827, "total_steps": 1175, "loss": 1.0360630750656128, "lr": 9.828894767750865e-06, "epoch": 3.520255863539446, "percentage": 70.38, "elapsed_time": "1:11:41", "remaining_time": "0:30:09"} +{"current_steps": 828, "total_steps": 1175, "loss": 0.9682170152664185, "lr": 9.777757109810102e-06, "epoch": 3.5245202558635396, "percentage": 70.47, "elapsed_time": "1:11:46", "remaining_time": "0:30:04"} +{"current_steps": 829, "total_steps": 1175, "loss": 0.9408371448516846, "lr": 9.726709753511275e-06, "epoch": 3.5287846481876333, "percentage": 70.55, "elapsed_time": "1:11:51", "remaining_time": "0:29:59"} +{"current_steps": 830, "total_steps": 1175, "loss": 0.9771254658699036, "lr": 9.675753149798474e-06, "epoch": 3.533049040511727, "percentage": 70.64, "elapsed_time": "1:11:56", "remaining_time": "0:29:54"} +{"current_steps": 831, "total_steps": 1175, "loss": 1.0215736627578735, "lr": 9.624887748814118e-06, "epoch": 3.5373134328358207, "percentage": 70.72, "elapsed_time": "1:12:01", "remaining_time": "0:29:49"} +{"current_steps": 832, "total_steps": 1175, "loss": 0.9992471933364868, "lr": 9.574113999894909e-06, "epoch": 3.541577825159915, "percentage": 70.81, "elapsed_time": "1:12:06", "remaining_time": "0:29:43"} +{"current_steps": 833, "total_steps": 1175, "loss": 0.9578772783279419, "lr": 9.523432351567979e-06, "epoch": 3.5458422174840085, "percentage": 70.89, "elapsed_time": "1:12:11", "remaining_time": "0:29:38"} +{"current_steps": 834, "total_steps": 1175, "loss": 1.0055651664733887, "lr": 9.472843251546792e-06, "epoch": 3.550106609808102, "percentage": 70.98, "elapsed_time": "1:12:17", "remaining_time": "0:29:33"} +{"current_steps": 835, "total_steps": 1175, "loss": 0.9918792843818665, "lr": 9.422347146727294e-06, "epoch": 3.5543710021321964, "percentage": 71.06, "elapsed_time": "1:12:22", "remaining_time": "0:29:28"} +{"current_steps": 836, "total_steps": 1175, "loss": 0.961499035358429, "lr": 9.371944483183912e-06, "epoch": 3.55863539445629, "percentage": 71.15, "elapsed_time": "1:12:27", "remaining_time": "0:29:22"} +{"current_steps": 837, "total_steps": 1175, "loss": 0.9757760763168335, "lr": 9.321635706165635e-06, "epoch": 3.5628997867803838, "percentage": 71.23, "elapsed_time": "1:12:32", "remaining_time": "0:29:17"} +{"current_steps": 838, "total_steps": 1175, "loss": 0.9891831874847412, "lr": 9.271421260092075e-06, "epoch": 3.5671641791044775, "percentage": 71.32, "elapsed_time": "1:12:37", "remaining_time": "0:29:12"} +{"current_steps": 839, "total_steps": 1175, "loss": 0.942074179649353, "lr": 9.221301588549519e-06, "epoch": 3.571428571428571, "percentage": 71.4, "elapsed_time": "1:12:42", "remaining_time": "0:29:07"} +{"current_steps": 840, "total_steps": 1175, "loss": 0.9449573755264282, "lr": 9.171277134287057e-06, "epoch": 3.5756929637526653, "percentage": 71.49, "elapsed_time": "1:12:47", "remaining_time": "0:29:01"} +{"current_steps": 841, "total_steps": 1175, "loss": 1.0226428508758545, "lr": 9.121348339212634e-06, "epoch": 3.579957356076759, "percentage": 71.57, "elapsed_time": "1:12:52", "remaining_time": "0:28:56"} +{"current_steps": 842, "total_steps": 1175, "loss": 1.0022697448730469, "lr": 9.07151564438916e-06, "epoch": 3.5842217484008527, "percentage": 71.66, "elapsed_time": "1:12:58", "remaining_time": "0:28:51"} +{"current_steps": 843, "total_steps": 1175, "loss": 0.9454774260520935, "lr": 9.021779490030611e-06, "epoch": 3.588486140724947, "percentage": 71.74, "elapsed_time": "1:13:03", "remaining_time": "0:28:46"} +{"current_steps": 844, "total_steps": 1175, "loss": 0.9721479415893555, "lr": 8.972140315498119e-06, "epoch": 3.5927505330490406, "percentage": 71.83, "elapsed_time": "1:13:08", "remaining_time": "0:28:41"} +{"current_steps": 845, "total_steps": 1175, "loss": 0.9860814809799194, "lr": 8.922598559296154e-06, "epoch": 3.5970149253731343, "percentage": 71.91, "elapsed_time": "1:13:13", "remaining_time": "0:28:35"} +{"current_steps": 846, "total_steps": 1175, "loss": 0.9304975271224976, "lr": 8.873154659068582e-06, "epoch": 3.6012793176972284, "percentage": 72.0, "elapsed_time": "1:13:18", "remaining_time": "0:28:30"} +{"current_steps": 847, "total_steps": 1175, "loss": 0.9832009673118591, "lr": 8.823809051594816e-06, "epoch": 3.605543710021322, "percentage": 72.09, "elapsed_time": "1:13:23", "remaining_time": "0:28:25"} +{"current_steps": 848, "total_steps": 1175, "loss": 0.9642736315727234, "lr": 8.774562172785988e-06, "epoch": 3.609808102345416, "percentage": 72.17, "elapsed_time": "1:13:28", "remaining_time": "0:28:20"} +{"current_steps": 849, "total_steps": 1175, "loss": 1.0208244323730469, "lr": 8.725414457681063e-06, "epoch": 3.6140724946695095, "percentage": 72.26, "elapsed_time": "1:13:34", "remaining_time": "0:28:14"} +{"current_steps": 850, "total_steps": 1175, "loss": 0.9406700134277344, "lr": 8.676366340443017e-06, "epoch": 3.6183368869936032, "percentage": 72.34, "elapsed_time": "1:13:39", "remaining_time": "0:28:09"} +{"current_steps": 851, "total_steps": 1175, "loss": 0.9919254183769226, "lr": 8.627418254355e-06, "epoch": 3.6226012793176974, "percentage": 72.43, "elapsed_time": "1:13:44", "remaining_time": "0:28:04"} +{"current_steps": 852, "total_steps": 1175, "loss": 0.9726200103759766, "lr": 8.578570631816474e-06, "epoch": 3.626865671641791, "percentage": 72.51, "elapsed_time": "1:13:49", "remaining_time": "0:27:59"} +{"current_steps": 853, "total_steps": 1175, "loss": 0.9699271321296692, "lr": 8.529823904339472e-06, "epoch": 3.631130063965885, "percentage": 72.6, "elapsed_time": "1:13:54", "remaining_time": "0:27:53"} +{"current_steps": 854, "total_steps": 1175, "loss": 0.9935591220855713, "lr": 8.481178502544684e-06, "epoch": 3.635394456289979, "percentage": 72.68, "elapsed_time": "1:13:59", "remaining_time": "0:27:48"} +{"current_steps": 855, "total_steps": 1175, "loss": 0.9702616930007935, "lr": 8.43263485615774e-06, "epoch": 3.6396588486140726, "percentage": 72.77, "elapsed_time": "1:14:04", "remaining_time": "0:27:43"} +{"current_steps": 856, "total_steps": 1175, "loss": 0.9816626310348511, "lr": 8.384193394005372e-06, "epoch": 3.6439232409381663, "percentage": 72.85, "elapsed_time": "1:14:09", "remaining_time": "0:27:38"} +{"current_steps": 857, "total_steps": 1175, "loss": 0.9883707761764526, "lr": 8.33585454401161e-06, "epoch": 3.64818763326226, "percentage": 72.94, "elapsed_time": "1:14:15", "remaining_time": "0:27:33"} +{"current_steps": 858, "total_steps": 1175, "loss": 0.9662632346153259, "lr": 8.287618733194073e-06, "epoch": 3.6524520255863537, "percentage": 73.02, "elapsed_time": "1:14:20", "remaining_time": "0:27:27"} +{"current_steps": 859, "total_steps": 1175, "loss": 0.9632445573806763, "lr": 8.239486387660096e-06, "epoch": 3.656716417910448, "percentage": 73.11, "elapsed_time": "1:14:25", "remaining_time": "0:27:22"} +{"current_steps": 860, "total_steps": 1175, "loss": 0.9559063911437988, "lr": 8.191457932603052e-06, "epoch": 3.6609808102345416, "percentage": 73.19, "elapsed_time": "1:14:30", "remaining_time": "0:27:17"} +{"current_steps": 861, "total_steps": 1175, "loss": 0.9960157871246338, "lr": 8.143533792298545e-06, "epoch": 3.6652452025586353, "percentage": 73.28, "elapsed_time": "1:14:35", "remaining_time": "0:27:12"} +{"current_steps": 862, "total_steps": 1175, "loss": 0.9713449478149414, "lr": 8.095714390100698e-06, "epoch": 3.6695095948827294, "percentage": 73.36, "elapsed_time": "1:14:40", "remaining_time": "0:27:06"} +{"current_steps": 863, "total_steps": 1175, "loss": 0.9706517457962036, "lr": 8.048000148438375e-06, "epoch": 3.673773987206823, "percentage": 73.45, "elapsed_time": "1:14:45", "remaining_time": "0:27:01"} +{"current_steps": 864, "total_steps": 1175, "loss": 0.9807164669036865, "lr": 8.000391488811485e-06, "epoch": 3.678038379530917, "percentage": 73.53, "elapsed_time": "1:14:50", "remaining_time": "0:26:56"} +{"current_steps": 865, "total_steps": 1175, "loss": 1.0361860990524292, "lr": 7.952888831787215e-06, "epoch": 3.6823027718550105, "percentage": 73.62, "elapsed_time": "1:14:55", "remaining_time": "0:26:51"} +{"current_steps": 866, "total_steps": 1175, "loss": 0.9568573236465454, "lr": 7.905492596996391e-06, "epoch": 3.6865671641791042, "percentage": 73.7, "elapsed_time": "1:15:01", "remaining_time": "0:26:46"} +{"current_steps": 867, "total_steps": 1175, "loss": 0.9520964026451111, "lr": 7.858203203129668e-06, "epoch": 3.6908315565031984, "percentage": 73.79, "elapsed_time": "1:15:06", "remaining_time": "0:26:40"} +{"current_steps": 868, "total_steps": 1175, "loss": 0.9987329840660095, "lr": 7.811021067933919e-06, "epoch": 3.695095948827292, "percentage": 73.87, "elapsed_time": "1:15:11", "remaining_time": "0:26:35"} +{"current_steps": 869, "total_steps": 1175, "loss": 0.9650008678436279, "lr": 7.763946608208504e-06, "epoch": 3.699360341151386, "percentage": 73.96, "elapsed_time": "1:15:16", "remaining_time": "0:26:30"} +{"current_steps": 870, "total_steps": 1175, "loss": 0.9769718050956726, "lr": 7.716980239801588e-06, "epoch": 3.70362473347548, "percentage": 74.04, "elapsed_time": "1:15:21", "remaining_time": "0:26:25"} +{"current_steps": 871, "total_steps": 1175, "loss": 1.0230576992034912, "lr": 7.670122377606495e-06, "epoch": 3.7078891257995736, "percentage": 74.13, "elapsed_time": "1:15:26", "remaining_time": "0:26:19"} +{"current_steps": 872, "total_steps": 1175, "loss": 0.9463640451431274, "lr": 7.623373435557988e-06, "epoch": 3.7121535181236673, "percentage": 74.21, "elapsed_time": "1:15:31", "remaining_time": "0:26:14"} +{"current_steps": 873, "total_steps": 1175, "loss": 0.9786025285720825, "lr": 7.5767338266286775e-06, "epoch": 3.716417910447761, "percentage": 74.3, "elapsed_time": "1:15:36", "remaining_time": "0:26:09"} +{"current_steps": 874, "total_steps": 1175, "loss": 0.9911025762557983, "lr": 7.530203962825331e-06, "epoch": 3.7206823027718547, "percentage": 74.38, "elapsed_time": "1:15:42", "remaining_time": "0:26:04"} +{"current_steps": 875, "total_steps": 1175, "loss": 0.9990006685256958, "lr": 7.483784255185249e-06, "epoch": 3.724946695095949, "percentage": 74.47, "elapsed_time": "1:15:47", "remaining_time": "0:25:59"} +{"current_steps": 876, "total_steps": 1175, "loss": 1.0078997611999512, "lr": 7.437475113772632e-06, "epoch": 3.7292110874200426, "percentage": 74.55, "elapsed_time": "1:15:52", "remaining_time": "0:25:53"} +{"current_steps": 877, "total_steps": 1175, "loss": 0.9665708541870117, "lr": 7.391276947674932e-06, "epoch": 3.7334754797441363, "percentage": 74.64, "elapsed_time": "1:15:57", "remaining_time": "0:25:48"} +{"current_steps": 878, "total_steps": 1175, "loss": 0.967820405960083, "lr": 7.345190164999307e-06, "epoch": 3.7377398720682304, "percentage": 74.72, "elapsed_time": "1:16:02", "remaining_time": "0:25:43"} +{"current_steps": 879, "total_steps": 1175, "loss": 0.9955414533615112, "lr": 7.299215172868947e-06, "epoch": 3.742004264392324, "percentage": 74.81, "elapsed_time": "1:16:07", "remaining_time": "0:25:38"} +{"current_steps": 880, "total_steps": 1175, "loss": 0.9483203887939453, "lr": 7.2533523774194865e-06, "epoch": 3.746268656716418, "percentage": 74.89, "elapsed_time": "1:16:12", "remaining_time": "0:25:32"} +{"current_steps": 881, "total_steps": 1175, "loss": 0.9806277751922607, "lr": 7.2076021837954616e-06, "epoch": 3.750533049040512, "percentage": 74.98, "elapsed_time": "1:16:17", "remaining_time": "0:25:27"} +{"current_steps": 882, "total_steps": 1175, "loss": 1.046656608581543, "lr": 7.161964996146689e-06, "epoch": 3.7547974413646057, "percentage": 75.06, "elapsed_time": "1:16:23", "remaining_time": "0:25:22"} +{"current_steps": 883, "total_steps": 1175, "loss": 0.9934045076370239, "lr": 7.116441217624708e-06, "epoch": 3.7590618336886994, "percentage": 75.15, "elapsed_time": "1:16:28", "remaining_time": "0:25:17"} +{"current_steps": 884, "total_steps": 1175, "loss": 0.996993899345398, "lr": 7.071031250379228e-06, "epoch": 3.763326226012793, "percentage": 75.23, "elapsed_time": "1:16:33", "remaining_time": "0:25:12"} +{"current_steps": 885, "total_steps": 1175, "loss": 1.007996916770935, "lr": 7.0257354955545466e-06, "epoch": 3.767590618336887, "percentage": 75.32, "elapsed_time": "1:16:38", "remaining_time": "0:25:06"} +{"current_steps": 886, "total_steps": 1175, "loss": 0.9907573461532593, "lr": 6.980554353286066e-06, "epoch": 3.771855010660981, "percentage": 75.4, "elapsed_time": "1:16:43", "remaining_time": "0:25:01"} +{"current_steps": 887, "total_steps": 1175, "loss": 0.9466689229011536, "lr": 6.935488222696676e-06, "epoch": 3.7761194029850746, "percentage": 75.49, "elapsed_time": "1:16:48", "remaining_time": "0:24:56"} +{"current_steps": 888, "total_steps": 1175, "loss": 1.0114989280700684, "lr": 6.890537501893302e-06, "epoch": 3.7803837953091683, "percentage": 75.57, "elapsed_time": "1:16:53", "remaining_time": "0:24:51"} +{"current_steps": 889, "total_steps": 1175, "loss": 0.9820560812950134, "lr": 6.845702587963352e-06, "epoch": 3.7846481876332625, "percentage": 75.66, "elapsed_time": "1:16:59", "remaining_time": "0:24:46"} +{"current_steps": 890, "total_steps": 1175, "loss": 1.0018254518508911, "lr": 6.800983876971192e-06, "epoch": 3.788912579957356, "percentage": 75.74, "elapsed_time": "1:17:04", "remaining_time": "0:24:40"} +{"current_steps": 891, "total_steps": 1175, "loss": 0.9879237413406372, "lr": 6.756381763954718e-06, "epoch": 3.79317697228145, "percentage": 75.83, "elapsed_time": "1:17:09", "remaining_time": "0:24:35"} +{"current_steps": 892, "total_steps": 1175, "loss": 0.9949040412902832, "lr": 6.7118966429217645e-06, "epoch": 3.7974413646055436, "percentage": 75.91, "elapsed_time": "1:17:14", "remaining_time": "0:24:30"} +{"current_steps": 893, "total_steps": 1175, "loss": 0.9881210923194885, "lr": 6.667528906846714e-06, "epoch": 3.8017057569296373, "percentage": 76.0, "elapsed_time": "1:17:19", "remaining_time": "0:24:25"} +{"current_steps": 894, "total_steps": 1175, "loss": 1.0030843019485474, "lr": 6.623278947666974e-06, "epoch": 3.8059701492537314, "percentage": 76.09, "elapsed_time": "1:17:24", "remaining_time": "0:24:19"} +{"current_steps": 895, "total_steps": 1175, "loss": 0.9971247315406799, "lr": 6.579147156279538e-06, "epoch": 3.810234541577825, "percentage": 76.17, "elapsed_time": "1:17:29", "remaining_time": "0:24:14"} +{"current_steps": 896, "total_steps": 1175, "loss": 1.0048599243164062, "lr": 6.535133922537513e-06, "epoch": 3.814498933901919, "percentage": 76.26, "elapsed_time": "1:17:35", "remaining_time": "0:24:09"} +{"current_steps": 897, "total_steps": 1175, "loss": 0.9471845626831055, "lr": 6.491239635246709e-06, "epoch": 3.818763326226013, "percentage": 76.34, "elapsed_time": "1:17:40", "remaining_time": "0:24:04"} +{"current_steps": 898, "total_steps": 1175, "loss": 0.9708333015441895, "lr": 6.447464682162143e-06, "epoch": 3.8230277185501067, "percentage": 76.43, "elapsed_time": "1:17:45", "remaining_time": "0:23:59"} +{"current_steps": 899, "total_steps": 1175, "loss": 0.9517656564712524, "lr": 6.403809449984704e-06, "epoch": 3.8272921108742004, "percentage": 76.51, "elapsed_time": "1:17:50", "remaining_time": "0:23:53"} +{"current_steps": 900, "total_steps": 1175, "loss": 1.0056332349777222, "lr": 6.3602743243576405e-06, "epoch": 3.831556503198294, "percentage": 76.6, "elapsed_time": "1:17:55", "remaining_time": "0:23:48"} +{"current_steps": 901, "total_steps": 1175, "loss": 0.979081928730011, "lr": 6.316859689863222e-06, "epoch": 3.835820895522388, "percentage": 76.68, "elapsed_time": "1:18:00", "remaining_time": "0:23:43"} +{"current_steps": 902, "total_steps": 1175, "loss": 0.9827362298965454, "lr": 6.273565930019316e-06, "epoch": 3.840085287846482, "percentage": 76.77, "elapsed_time": "1:18:05", "remaining_time": "0:23:38"} +{"current_steps": 903, "total_steps": 1175, "loss": 0.9563350677490234, "lr": 6.230393427276e-06, "epoch": 3.8443496801705757, "percentage": 76.85, "elapsed_time": "1:18:11", "remaining_time": "0:23:33"} +{"current_steps": 904, "total_steps": 1175, "loss": 0.9648277759552002, "lr": 6.187342563012198e-06, "epoch": 3.8486140724946694, "percentage": 76.94, "elapsed_time": "1:18:16", "remaining_time": "0:23:27"} +{"current_steps": 905, "total_steps": 1175, "loss": 0.9997645020484924, "lr": 6.144413717532269e-06, "epoch": 3.8528784648187635, "percentage": 77.02, "elapsed_time": "1:18:21", "remaining_time": "0:23:22"} +{"current_steps": 906, "total_steps": 1175, "loss": 1.013451099395752, "lr": 6.1016072700627106e-06, "epoch": 3.857142857142857, "percentage": 77.11, "elapsed_time": "1:18:26", "remaining_time": "0:23:17"} +{"current_steps": 907, "total_steps": 1175, "loss": 0.9879148006439209, "lr": 6.058923598748756e-06, "epoch": 3.861407249466951, "percentage": 77.19, "elapsed_time": "1:18:31", "remaining_time": "0:23:12"} +{"current_steps": 908, "total_steps": 1175, "loss": 0.972460925579071, "lr": 6.016363080651066e-06, "epoch": 3.8656716417910446, "percentage": 77.28, "elapsed_time": "1:18:36", "remaining_time": "0:23:06"} +{"current_steps": 909, "total_steps": 1175, "loss": 0.9702866077423096, "lr": 5.973926091742386e-06, "epoch": 3.8699360341151388, "percentage": 77.36, "elapsed_time": "1:18:41", "remaining_time": "0:23:01"} +{"current_steps": 910, "total_steps": 1175, "loss": 1.041187047958374, "lr": 5.931613006904196e-06, "epoch": 3.8742004264392325, "percentage": 77.45, "elapsed_time": "1:18:47", "remaining_time": "0:22:56"} +{"current_steps": 911, "total_steps": 1175, "loss": 1.0261526107788086, "lr": 5.889424199923473e-06, "epoch": 3.878464818763326, "percentage": 77.53, "elapsed_time": "1:18:52", "remaining_time": "0:22:51"} +{"current_steps": 912, "total_steps": 1175, "loss": 0.9904541969299316, "lr": 5.847360043489318e-06, "epoch": 3.88272921108742, "percentage": 77.62, "elapsed_time": "1:18:57", "remaining_time": "0:22:46"} +{"current_steps": 913, "total_steps": 1175, "loss": 0.966805636882782, "lr": 5.805420909189683e-06, "epoch": 3.886993603411514, "percentage": 77.7, "elapsed_time": "1:19:02", "remaining_time": "0:22:40"} +{"current_steps": 914, "total_steps": 1175, "loss": 0.9406954646110535, "lr": 5.7636071675081076e-06, "epoch": 3.8912579957356077, "percentage": 77.79, "elapsed_time": "1:19:07", "remaining_time": "0:22:35"} +{"current_steps": 915, "total_steps": 1175, "loss": 0.966067910194397, "lr": 5.721919187820431e-06, "epoch": 3.8955223880597014, "percentage": 77.87, "elapsed_time": "1:19:12", "remaining_time": "0:22:30"} +{"current_steps": 916, "total_steps": 1175, "loss": 0.9739153981208801, "lr": 5.6803573383915265e-06, "epoch": 3.8997867803837956, "percentage": 77.96, "elapsed_time": "1:19:18", "remaining_time": "0:22:25"} +{"current_steps": 917, "total_steps": 1175, "loss": 0.9694392085075378, "lr": 5.638921986372064e-06, "epoch": 3.9040511727078893, "percentage": 78.04, "elapsed_time": "1:19:23", "remaining_time": "0:22:20"} +{"current_steps": 918, "total_steps": 1175, "loss": 0.9809643030166626, "lr": 5.5976134977952315e-06, "epoch": 3.908315565031983, "percentage": 78.13, "elapsed_time": "1:19:28", "remaining_time": "0:22:14"} +{"current_steps": 919, "total_steps": 1175, "loss": 0.9921892285346985, "lr": 5.556432237573564e-06, "epoch": 3.9125799573560767, "percentage": 78.21, "elapsed_time": "1:19:33", "remaining_time": "0:22:09"} +{"current_steps": 920, "total_steps": 1175, "loss": 0.956708550453186, "lr": 5.5153785694956416e-06, "epoch": 3.9168443496801704, "percentage": 78.3, "elapsed_time": "1:19:38", "remaining_time": "0:22:04"} +{"current_steps": 921, "total_steps": 1175, "loss": 1.0439990758895874, "lr": 5.474452856222942e-06, "epoch": 3.9211087420042645, "percentage": 78.38, "elapsed_time": "1:19:43", "remaining_time": "0:21:59"} +{"current_steps": 922, "total_steps": 1175, "loss": 0.9538367986679077, "lr": 5.433655459286611e-06, "epoch": 3.925373134328358, "percentage": 78.47, "elapsed_time": "1:19:48", "remaining_time": "0:21:54"} +{"current_steps": 923, "total_steps": 1175, "loss": 1.000390887260437, "lr": 5.392986739084238e-06, "epoch": 3.929637526652452, "percentage": 78.55, "elapsed_time": "1:19:53", "remaining_time": "0:21:48"} +{"current_steps": 924, "total_steps": 1175, "loss": 0.9766531586647034, "lr": 5.352447054876755e-06, "epoch": 3.933901918976546, "percentage": 78.64, "elapsed_time": "1:19:59", "remaining_time": "0:21:43"} +{"current_steps": 925, "total_steps": 1175, "loss": 1.0060484409332275, "lr": 5.31203676478516e-06, "epoch": 3.9381663113006398, "percentage": 78.72, "elapsed_time": "1:20:04", "remaining_time": "0:21:38"} +{"current_steps": 926, "total_steps": 1175, "loss": 0.9796045422554016, "lr": 5.271756225787434e-06, "epoch": 3.9424307036247335, "percentage": 78.81, "elapsed_time": "1:20:09", "remaining_time": "0:21:33"} +{"current_steps": 927, "total_steps": 1175, "loss": 0.9589823484420776, "lr": 5.231605793715348e-06, "epoch": 3.946695095948827, "percentage": 78.89, "elapsed_time": "1:20:14", "remaining_time": "0:21:28"} +{"current_steps": 928, "total_steps": 1175, "loss": 0.9927637577056885, "lr": 5.191585823251335e-06, "epoch": 3.950959488272921, "percentage": 78.98, "elapsed_time": "1:20:19", "remaining_time": "0:21:22"} +{"current_steps": 929, "total_steps": 1175, "loss": 0.9865278005599976, "lr": 5.151696667925348e-06, "epoch": 3.955223880597015, "percentage": 79.06, "elapsed_time": "1:20:24", "remaining_time": "0:21:17"} +{"current_steps": 930, "total_steps": 1175, "loss": 0.9674332737922668, "lr": 5.111938680111732e-06, "epoch": 3.9594882729211087, "percentage": 79.15, "elapsed_time": "1:20:29", "remaining_time": "0:21:12"} +{"current_steps": 931, "total_steps": 1175, "loss": 1.012916922569275, "lr": 5.072312211026125e-06, "epoch": 3.9637526652452024, "percentage": 79.23, "elapsed_time": "1:20:35", "remaining_time": "0:21:07"} +{"current_steps": 932, "total_steps": 1175, "loss": 0.9658184051513672, "lr": 5.032817610722369e-06, "epoch": 3.9680170575692966, "percentage": 79.32, "elapsed_time": "1:20:40", "remaining_time": "0:21:01"} +{"current_steps": 933, "total_steps": 1175, "loss": 1.0015931129455566, "lr": 4.993455228089366e-06, "epoch": 3.9722814498933903, "percentage": 79.4, "elapsed_time": "1:20:45", "remaining_time": "0:20:56"} +{"current_steps": 934, "total_steps": 1175, "loss": 1.0132436752319336, "lr": 4.954225410848048e-06, "epoch": 3.976545842217484, "percentage": 79.49, "elapsed_time": "1:20:50", "remaining_time": "0:20:51"} +{"current_steps": 935, "total_steps": 1175, "loss": 1.002284288406372, "lr": 4.915128505548284e-06, "epoch": 3.9808102345415777, "percentage": 79.57, "elapsed_time": "1:20:55", "remaining_time": "0:20:46"} +{"current_steps": 936, "total_steps": 1175, "loss": 0.9923639297485352, "lr": 4.8761648575658145e-06, "epoch": 3.9850746268656714, "percentage": 79.66, "elapsed_time": "1:21:00", "remaining_time": "0:20:41"} +{"current_steps": 937, "total_steps": 1175, "loss": 0.9767214059829712, "lr": 4.837334811099217e-06, "epoch": 3.9893390191897655, "percentage": 79.74, "elapsed_time": "1:21:05", "remaining_time": "0:20:35"} +{"current_steps": 938, "total_steps": 1175, "loss": 0.9671895503997803, "lr": 4.7986387091668365e-06, "epoch": 3.9936034115138592, "percentage": 79.83, "elapsed_time": "1:21:10", "remaining_time": "0:20:30"} +{"current_steps": 939, "total_steps": 1175, "loss": 0.9730648994445801, "lr": 4.760076893603791e-06, "epoch": 3.997867803837953, "percentage": 79.91, "elapsed_time": "1:21:16", "remaining_time": "0:20:25"} +{"current_steps": 940, "total_steps": 1175, "loss": 0.9118285179138184, "lr": 4.721649705058926e-06, "epoch": 4.0, "percentage": 80.0, "elapsed_time": "1:21:18", "remaining_time": "0:20:19"} +{"current_steps": 941, "total_steps": 1175, "loss": 1.0042850971221924, "lr": 4.683357482991819e-06, "epoch": 4.004264392324094, "percentage": 80.09, "elapsed_time": "1:21:24", "remaining_time": "0:20:14"} +{"current_steps": 942, "total_steps": 1175, "loss": 1.0006030797958374, "lr": 4.645200565669776e-06, "epoch": 4.008528784648187, "percentage": 80.17, "elapsed_time": "1:21:29", "remaining_time": "0:20:09"} +{"current_steps": 943, "total_steps": 1175, "loss": 1.0080355405807495, "lr": 4.607179290164823e-06, "epoch": 4.0127931769722816, "percentage": 80.26, "elapsed_time": "1:21:34", "remaining_time": "0:20:04"} +{"current_steps": 944, "total_steps": 1175, "loss": 0.9192696213722229, "lr": 4.569293992350783e-06, "epoch": 4.017057569296376, "percentage": 80.34, "elapsed_time": "1:21:39", "remaining_time": "0:19:58"} +{"current_steps": 945, "total_steps": 1175, "loss": 0.9563088417053223, "lr": 4.531545006900244e-06, "epoch": 4.021321961620469, "percentage": 80.43, "elapsed_time": "1:21:44", "remaining_time": "0:19:53"} +{"current_steps": 946, "total_steps": 1175, "loss": 0.9235143661499023, "lr": 4.493932667281646e-06, "epoch": 4.025586353944563, "percentage": 80.51, "elapsed_time": "1:21:50", "remaining_time": "0:19:48"} +{"current_steps": 947, "total_steps": 1175, "loss": 0.9056645631790161, "lr": 4.456457305756321e-06, "epoch": 4.029850746268656, "percentage": 80.6, "elapsed_time": "1:21:55", "remaining_time": "0:19:43"} +{"current_steps": 948, "total_steps": 1175, "loss": 1.0127660036087036, "lr": 4.419119253375557e-06, "epoch": 4.0341151385927505, "percentage": 80.68, "elapsed_time": "1:22:00", "remaining_time": "0:19:38"} +{"current_steps": 949, "total_steps": 1175, "loss": 1.013496994972229, "lr": 4.381918839977675e-06, "epoch": 4.038379530916845, "percentage": 80.77, "elapsed_time": "1:22:05", "remaining_time": "0:19:33"} +{"current_steps": 950, "total_steps": 1175, "loss": 0.9960319995880127, "lr": 4.344856394185122e-06, "epoch": 4.042643923240938, "percentage": 80.85, "elapsed_time": "1:22:10", "remaining_time": "0:19:27"} +{"current_steps": 951, "total_steps": 1175, "loss": 0.9637834429740906, "lr": 4.307932243401538e-06, "epoch": 4.046908315565032, "percentage": 80.94, "elapsed_time": "1:22:15", "remaining_time": "0:19:22"} +{"current_steps": 952, "total_steps": 1175, "loss": 0.9625729322433472, "lr": 4.271146713808927e-06, "epoch": 4.051172707889126, "percentage": 81.02, "elapsed_time": "1:22:21", "remaining_time": "0:19:17"} +{"current_steps": 953, "total_steps": 1175, "loss": 0.9672271013259888, "lr": 4.234500130364698e-06, "epoch": 4.0554371002132195, "percentage": 81.11, "elapsed_time": "1:22:26", "remaining_time": "0:19:12"} +{"current_steps": 954, "total_steps": 1175, "loss": 0.9610116481781006, "lr": 4.197992816798851e-06, "epoch": 4.059701492537314, "percentage": 81.19, "elapsed_time": "1:22:31", "remaining_time": "0:19:07"} +{"current_steps": 955, "total_steps": 1175, "loss": 0.9844383001327515, "lr": 4.161625095611101e-06, "epoch": 4.063965884861407, "percentage": 81.28, "elapsed_time": "1:22:36", "remaining_time": "0:19:01"} +{"current_steps": 956, "total_steps": 1175, "loss": 0.9343520998954773, "lr": 4.125397288068007e-06, "epoch": 4.068230277185501, "percentage": 81.36, "elapsed_time": "1:22:41", "remaining_time": "0:18:56"} +{"current_steps": 957, "total_steps": 1175, "loss": 0.9805846214294434, "lr": 4.089309714200187e-06, "epoch": 4.072494669509595, "percentage": 81.45, "elapsed_time": "1:22:46", "remaining_time": "0:18:51"} +{"current_steps": 958, "total_steps": 1175, "loss": 0.9530068635940552, "lr": 4.0533626927994185e-06, "epoch": 4.076759061833688, "percentage": 81.53, "elapsed_time": "1:22:51", "remaining_time": "0:18:46"} +{"current_steps": 959, "total_steps": 1175, "loss": 0.9678086042404175, "lr": 4.017556541415888e-06, "epoch": 4.081023454157783, "percentage": 81.62, "elapsed_time": "1:22:57", "remaining_time": "0:18:40"} +{"current_steps": 960, "total_steps": 1175, "loss": 0.9462642073631287, "lr": 3.981891576355352e-06, "epoch": 4.085287846481877, "percentage": 81.7, "elapsed_time": "1:23:02", "remaining_time": "0:18:35"} +{"current_steps": 961, "total_steps": 1175, "loss": 0.9328886270523071, "lr": 3.946368112676346e-06, "epoch": 4.08955223880597, "percentage": 81.79, "elapsed_time": "1:23:07", "remaining_time": "0:18:30"} +{"current_steps": 962, "total_steps": 1175, "loss": 0.9877804517745972, "lr": 3.9109864641874166e-06, "epoch": 4.093816631130064, "percentage": 81.87, "elapsed_time": "1:23:12", "remaining_time": "0:18:25"} +{"current_steps": 963, "total_steps": 1175, "loss": 0.9596878290176392, "lr": 3.875746943444316e-06, "epoch": 4.098081023454157, "percentage": 81.96, "elapsed_time": "1:23:17", "remaining_time": "0:18:20"} +{"current_steps": 964, "total_steps": 1175, "loss": 0.9820946455001831, "lr": 3.840649861747278e-06, "epoch": 4.1023454157782515, "percentage": 82.04, "elapsed_time": "1:23:22", "remaining_time": "0:18:14"} +{"current_steps": 965, "total_steps": 1175, "loss": 0.9720626473426819, "lr": 3.8056955291382667e-06, "epoch": 4.106609808102346, "percentage": 82.13, "elapsed_time": "1:23:27", "remaining_time": "0:18:09"} +{"current_steps": 966, "total_steps": 1175, "loss": 1.0157244205474854, "lr": 3.7708842543981928e-06, "epoch": 4.110874200426439, "percentage": 82.21, "elapsed_time": "1:23:32", "remaining_time": "0:18:04"} +{"current_steps": 967, "total_steps": 1175, "loss": 0.9629996418952942, "lr": 3.736216345044237e-06, "epoch": 4.115138592750533, "percentage": 82.3, "elapsed_time": "1:23:38", "remaining_time": "0:17:59"} +{"current_steps": 968, "total_steps": 1175, "loss": 0.9766483902931213, "lr": 3.7016921073271084e-06, "epoch": 4.119402985074627, "percentage": 82.38, "elapsed_time": "1:23:43", "remaining_time": "0:17:54"} +{"current_steps": 969, "total_steps": 1175, "loss": 0.9764162302017212, "lr": 3.6673118462283453e-06, "epoch": 4.1236673773987205, "percentage": 82.47, "elapsed_time": "1:23:48", "remaining_time": "0:17:48"} +{"current_steps": 970, "total_steps": 1175, "loss": 0.987112283706665, "lr": 3.6330758654576227e-06, "epoch": 4.127931769722815, "percentage": 82.55, "elapsed_time": "1:23:53", "remaining_time": "0:17:43"} +{"current_steps": 971, "total_steps": 1175, "loss": 0.931121826171875, "lr": 3.598984467450055e-06, "epoch": 4.132196162046908, "percentage": 82.64, "elapsed_time": "1:23:58", "remaining_time": "0:17:38"} +{"current_steps": 972, "total_steps": 1175, "loss": 0.9461972713470459, "lr": 3.565037953363546e-06, "epoch": 4.136460554371002, "percentage": 82.72, "elapsed_time": "1:24:03", "remaining_time": "0:17:33"} +{"current_steps": 973, "total_steps": 1175, "loss": 0.9400416016578674, "lr": 3.5312366230761154e-06, "epoch": 4.140724946695096, "percentage": 82.81, "elapsed_time": "1:24:08", "remaining_time": "0:17:28"} +{"current_steps": 974, "total_steps": 1175, "loss": 0.9511521458625793, "lr": 3.497580775183258e-06, "epoch": 4.144989339019189, "percentage": 82.89, "elapsed_time": "1:24:14", "remaining_time": "0:17:22"} +{"current_steps": 975, "total_steps": 1175, "loss": 1.0143787860870361, "lr": 3.464070706995295e-06, "epoch": 4.149253731343284, "percentage": 82.98, "elapsed_time": "1:24:19", "remaining_time": "0:17:17"} +{"current_steps": 976, "total_steps": 1175, "loss": 0.9798712730407715, "lr": 3.4307067145347417e-06, "epoch": 4.153518123667378, "percentage": 83.06, "elapsed_time": "1:24:24", "remaining_time": "0:17:12"} +{"current_steps": 977, "total_steps": 1175, "loss": 0.9285037517547607, "lr": 3.397489092533739e-06, "epoch": 4.157782515991471, "percentage": 83.15, "elapsed_time": "1:24:29", "remaining_time": "0:17:07"} +{"current_steps": 978, "total_steps": 1175, "loss": 0.9553232192993164, "lr": 3.364418134431371e-06, "epoch": 4.162046908315565, "percentage": 83.23, "elapsed_time": "1:24:34", "remaining_time": "0:17:02"} +{"current_steps": 979, "total_steps": 1175, "loss": 1.0109907388687134, "lr": 3.331494132371149e-06, "epoch": 4.166311300639659, "percentage": 83.32, "elapsed_time": "1:24:39", "remaining_time": "0:16:56"} +{"current_steps": 980, "total_steps": 1175, "loss": 0.9764183759689331, "lr": 3.2987173771983816e-06, "epoch": 4.1705756929637525, "percentage": 83.4, "elapsed_time": "1:24:44", "remaining_time": "0:16:51"} +{"current_steps": 981, "total_steps": 1175, "loss": 0.994144856929779, "lr": 3.266088158457634e-06, "epoch": 4.174840085287847, "percentage": 83.49, "elapsed_time": "1:24:50", "remaining_time": "0:16:46"} +{"current_steps": 982, "total_steps": 1175, "loss": 0.9840140342712402, "lr": 3.233606764390147e-06, "epoch": 4.17910447761194, "percentage": 83.57, "elapsed_time": "1:24:55", "remaining_time": "0:16:41"} +{"current_steps": 983, "total_steps": 1175, "loss": 0.9591784477233887, "lr": 3.2012734819313127e-06, "epoch": 4.183368869936034, "percentage": 83.66, "elapsed_time": "1:25:00", "remaining_time": "0:16:36"} +{"current_steps": 984, "total_steps": 1175, "loss": 0.9503059387207031, "lr": 3.1690885967081187e-06, "epoch": 4.187633262260128, "percentage": 83.74, "elapsed_time": "1:25:05", "remaining_time": "0:16:30"} +{"current_steps": 985, "total_steps": 1175, "loss": 1.0117886066436768, "lr": 3.1370523930366393e-06, "epoch": 4.1918976545842215, "percentage": 83.83, "elapsed_time": "1:25:10", "remaining_time": "0:16:25"} +{"current_steps": 986, "total_steps": 1175, "loss": 0.9581783413887024, "lr": 3.105165153919525e-06, "epoch": 4.196162046908316, "percentage": 83.91, "elapsed_time": "1:25:15", "remaining_time": "0:16:20"} +{"current_steps": 987, "total_steps": 1175, "loss": 0.9946070313453674, "lr": 3.073427161043492e-06, "epoch": 4.20042643923241, "percentage": 84.0, "elapsed_time": "1:25:20", "remaining_time": "0:16:15"} +{"current_steps": 988, "total_steps": 1175, "loss": 0.9738461971282959, "lr": 3.0418386947768463e-06, "epoch": 4.204690831556503, "percentage": 84.09, "elapsed_time": "1:25:25", "remaining_time": "0:16:10"} +{"current_steps": 989, "total_steps": 1175, "loss": 0.9926748275756836, "lr": 3.01040003416698e-06, "epoch": 4.208955223880597, "percentage": 84.17, "elapsed_time": "1:25:31", "remaining_time": "0:16:04"} +{"current_steps": 990, "total_steps": 1175, "loss": 0.9960339069366455, "lr": 2.97911145693796e-06, "epoch": 4.21321961620469, "percentage": 84.26, "elapsed_time": "1:25:36", "remaining_time": "0:15:59"} +{"current_steps": 991, "total_steps": 1175, "loss": 0.9805059432983398, "lr": 2.947973239488009e-06, "epoch": 4.217484008528785, "percentage": 84.34, "elapsed_time": "1:25:41", "remaining_time": "0:15:54"} +{"current_steps": 992, "total_steps": 1175, "loss": 0.9603044986724854, "lr": 2.91698565688711e-06, "epoch": 4.221748400852879, "percentage": 84.43, "elapsed_time": "1:25:46", "remaining_time": "0:15:49"} +{"current_steps": 993, "total_steps": 1175, "loss": 0.9346092939376831, "lr": 2.886148982874566e-06, "epoch": 4.226012793176972, "percentage": 84.51, "elapsed_time": "1:25:51", "remaining_time": "0:15:44"} +{"current_steps": 994, "total_steps": 1175, "loss": 0.9752610325813293, "lr": 2.8554634898565668e-06, "epoch": 4.230277185501066, "percentage": 84.6, "elapsed_time": "1:25:56", "remaining_time": "0:15:38"} +{"current_steps": 995, "total_steps": 1175, "loss": 0.9946762323379517, "lr": 2.824929448903806e-06, "epoch": 4.23454157782516, "percentage": 84.68, "elapsed_time": "1:26:01", "remaining_time": "0:15:33"} +{"current_steps": 996, "total_steps": 1175, "loss": 0.9420288801193237, "lr": 2.794547129749059e-06, "epoch": 4.2388059701492535, "percentage": 84.77, "elapsed_time": "1:26:06", "remaining_time": "0:15:28"} +{"current_steps": 997, "total_steps": 1175, "loss": 0.9856697916984558, "lr": 2.7643168007848255e-06, "epoch": 4.243070362473348, "percentage": 84.85, "elapsed_time": "1:26:12", "remaining_time": "0:15:23"} +{"current_steps": 998, "total_steps": 1175, "loss": 0.9388452172279358, "lr": 2.734238729060956e-06, "epoch": 4.247334754797441, "percentage": 84.94, "elapsed_time": "1:26:17", "remaining_time": "0:15:18"} +{"current_steps": 999, "total_steps": 1175, "loss": 0.9250015020370483, "lr": 2.7043131802822653e-06, "epoch": 4.251599147121535, "percentage": 85.02, "elapsed_time": "1:26:22", "remaining_time": "0:15:13"} +{"current_steps": 1000, "total_steps": 1175, "loss": 0.9823698401451111, "lr": 2.674540418806222e-06, "epoch": 4.255863539445629, "percentage": 85.11, "elapsed_time": "1:26:27", "remaining_time": "0:15:07"} +{"current_steps": 1001, "total_steps": 1175, "loss": 0.9904990792274475, "lr": 2.6449207076405857e-06, "epoch": 4.2601279317697225, "percentage": 85.19, "elapsed_time": "1:26:38", "remaining_time": "0:15:03"} +{"current_steps": 1002, "total_steps": 1175, "loss": 0.9995609521865845, "lr": 2.6154543084411035e-06, "epoch": 4.264392324093817, "percentage": 85.28, "elapsed_time": "1:26:43", "remaining_time": "0:14:58"} +{"current_steps": 1003, "total_steps": 1175, "loss": 0.9617021083831787, "lr": 2.5861414815091834e-06, "epoch": 4.268656716417911, "percentage": 85.36, "elapsed_time": "1:26:48", "remaining_time": "0:14:53"} +{"current_steps": 1004, "total_steps": 1175, "loss": 0.9622359275817871, "lr": 2.5569824857895987e-06, "epoch": 4.272921108742004, "percentage": 85.45, "elapsed_time": "1:26:53", "remaining_time": "0:14:48"} +{"current_steps": 1005, "total_steps": 1175, "loss": 0.9702969789505005, "lr": 2.5279775788682083e-06, "epoch": 4.277185501066098, "percentage": 85.53, "elapsed_time": "1:26:59", "remaining_time": "0:14:42"} +{"current_steps": 1006, "total_steps": 1175, "loss": 0.997840404510498, "lr": 2.499127016969671e-06, "epoch": 4.281449893390192, "percentage": 85.62, "elapsed_time": "1:27:04", "remaining_time": "0:14:37"} +{"current_steps": 1007, "total_steps": 1175, "loss": 0.9802528023719788, "lr": 2.4704310549551934e-06, "epoch": 4.285714285714286, "percentage": 85.7, "elapsed_time": "1:27:09", "remaining_time": "0:14:32"} +{"current_steps": 1008, "total_steps": 1175, "loss": 0.9607895612716675, "lr": 2.441889946320266e-06, "epoch": 4.28997867803838, "percentage": 85.79, "elapsed_time": "1:27:14", "remaining_time": "0:14:27"} +{"current_steps": 1009, "total_steps": 1175, "loss": 1.0122029781341553, "lr": 2.4135039431924233e-06, "epoch": 4.294243070362473, "percentage": 85.87, "elapsed_time": "1:27:19", "remaining_time": "0:14:22"} +{"current_steps": 1010, "total_steps": 1175, "loss": 0.9613388180732727, "lr": 2.3852732963290426e-06, "epoch": 4.298507462686567, "percentage": 85.96, "elapsed_time": "1:27:24", "remaining_time": "0:14:16"} +{"current_steps": 1011, "total_steps": 1175, "loss": 0.9711207151412964, "lr": 2.3571982551150853e-06, "epoch": 4.302771855010661, "percentage": 86.04, "elapsed_time": "1:27:29", "remaining_time": "0:14:11"} +{"current_steps": 1012, "total_steps": 1175, "loss": 0.95209801197052, "lr": 2.329279067560937e-06, "epoch": 4.3070362473347545, "percentage": 86.13, "elapsed_time": "1:27:34", "remaining_time": "0:14:06"} +{"current_steps": 1013, "total_steps": 1175, "loss": 1.010817289352417, "lr": 2.301515980300182e-06, "epoch": 4.311300639658849, "percentage": 86.21, "elapsed_time": "1:27:40", "remaining_time": "0:14:01"} +{"current_steps": 1014, "total_steps": 1175, "loss": 0.9674122333526611, "lr": 2.2739092385874527e-06, "epoch": 4.315565031982943, "percentage": 86.3, "elapsed_time": "1:27:45", "remaining_time": "0:13:55"} +{"current_steps": 1015, "total_steps": 1175, "loss": 1.007246494293213, "lr": 2.2464590862962443e-06, "epoch": 4.319829424307036, "percentage": 86.38, "elapsed_time": "1:27:50", "remaining_time": "0:13:50"} +{"current_steps": 1016, "total_steps": 1175, "loss": 0.9655307531356812, "lr": 2.219165765916769e-06, "epoch": 4.32409381663113, "percentage": 86.47, "elapsed_time": "1:27:55", "remaining_time": "0:13:45"} +{"current_steps": 1017, "total_steps": 1175, "loss": 0.9374470710754395, "lr": 2.192029518553798e-06, "epoch": 4.3283582089552235, "percentage": 86.55, "elapsed_time": "1:28:00", "remaining_time": "0:13:40"} +{"current_steps": 1018, "total_steps": 1175, "loss": 0.9549652338027954, "lr": 2.165050583924566e-06, "epoch": 4.332622601279318, "percentage": 86.64, "elapsed_time": "1:28:05", "remaining_time": "0:13:35"} +{"current_steps": 1019, "total_steps": 1175, "loss": 0.9814637899398804, "lr": 2.1382292003566163e-06, "epoch": 4.336886993603412, "percentage": 86.72, "elapsed_time": "1:28:10", "remaining_time": "0:13:30"} +{"current_steps": 1020, "total_steps": 1175, "loss": 0.9029624462127686, "lr": 2.1115656047857213e-06, "epoch": 4.341151385927505, "percentage": 86.81, "elapsed_time": "1:28:16", "remaining_time": "0:13:24"} +{"current_steps": 1021, "total_steps": 1175, "loss": 0.9489182233810425, "lr": 2.0850600327537806e-06, "epoch": 4.345415778251599, "percentage": 86.89, "elapsed_time": "1:28:21", "remaining_time": "0:13:19"} +{"current_steps": 1022, "total_steps": 1175, "loss": 1.0014092922210693, "lr": 2.058712718406719e-06, "epoch": 4.349680170575693, "percentage": 86.98, "elapsed_time": "1:28:26", "remaining_time": "0:13:14"} +{"current_steps": 1023, "total_steps": 1175, "loss": 1.0021915435791016, "lr": 2.032523894492471e-06, "epoch": 4.353944562899787, "percentage": 87.06, "elapsed_time": "1:28:31", "remaining_time": "0:13:09"} +{"current_steps": 1024, "total_steps": 1175, "loss": 1.0034961700439453, "lr": 2.0064937923588634e-06, "epoch": 4.358208955223881, "percentage": 87.15, "elapsed_time": "1:28:36", "remaining_time": "0:13:03"} +{"current_steps": 1025, "total_steps": 1175, "loss": 0.9881649017333984, "lr": 1.9806226419516195e-06, "epoch": 4.362473347547974, "percentage": 87.23, "elapsed_time": "1:28:41", "remaining_time": "0:12:58"} +{"current_steps": 1026, "total_steps": 1175, "loss": 0.9592493772506714, "lr": 1.954910671812298e-06, "epoch": 4.366737739872068, "percentage": 87.32, "elapsed_time": "1:28:46", "remaining_time": "0:12:53"} +{"current_steps": 1027, "total_steps": 1175, "loss": 0.9723584651947021, "lr": 1.9293581090762894e-06, "epoch": 4.371002132196162, "percentage": 87.4, "elapsed_time": "1:28:52", "remaining_time": "0:12:48"} +{"current_steps": 1028, "total_steps": 1175, "loss": 0.9399305582046509, "lr": 1.9039651794708058e-06, "epoch": 4.3752665245202556, "percentage": 87.49, "elapsed_time": "1:28:57", "remaining_time": "0:12:43"} +{"current_steps": 1029, "total_steps": 1175, "loss": 0.9942531585693359, "lr": 1.8787321073128817e-06, "epoch": 4.37953091684435, "percentage": 87.57, "elapsed_time": "1:29:02", "remaining_time": "0:12:37"} +{"current_steps": 1030, "total_steps": 1175, "loss": 1.0341134071350098, "lr": 1.8536591155073958e-06, "epoch": 4.383795309168444, "percentage": 87.66, "elapsed_time": "1:29:07", "remaining_time": "0:12:32"} +{"current_steps": 1031, "total_steps": 1175, "loss": 0.9730774164199829, "lr": 1.8287464255451181e-06, "epoch": 4.388059701492537, "percentage": 87.74, "elapsed_time": "1:29:12", "remaining_time": "0:12:27"} +{"current_steps": 1032, "total_steps": 1175, "loss": 0.9657065272331238, "lr": 1.803994257500714e-06, "epoch": 4.392324093816631, "percentage": 87.83, "elapsed_time": "1:29:17", "remaining_time": "0:12:22"} +{"current_steps": 1033, "total_steps": 1175, "loss": 0.9861183762550354, "lr": 1.7794028300308474e-06, "epoch": 4.396588486140725, "percentage": 87.91, "elapsed_time": "1:29:22", "remaining_time": "0:12:17"} +{"current_steps": 1034, "total_steps": 1175, "loss": 0.9705492258071899, "lr": 1.7549723603722003e-06, "epoch": 4.400852878464819, "percentage": 88.0, "elapsed_time": "1:29:28", "remaining_time": "0:12:12"} +{"current_steps": 1035, "total_steps": 1175, "loss": 0.9851311445236206, "lr": 1.730703064339605e-06, "epoch": 4.405117270788913, "percentage": 88.09, "elapsed_time": "1:29:33", "remaining_time": "0:12:06"} +{"current_steps": 1036, "total_steps": 1175, "loss": 0.9457612037658691, "lr": 1.7065951563241022e-06, "epoch": 4.409381663113006, "percentage": 88.17, "elapsed_time": "1:29:38", "remaining_time": "0:12:01"} +{"current_steps": 1037, "total_steps": 1175, "loss": 1.0145244598388672, "lr": 1.682648849291051e-06, "epoch": 4.4136460554371, "percentage": 88.26, "elapsed_time": "1:29:43", "remaining_time": "0:11:56"} +{"current_steps": 1038, "total_steps": 1175, "loss": 1.0064364671707153, "lr": 1.6588643547782579e-06, "epoch": 4.417910447761194, "percentage": 88.34, "elapsed_time": "1:29:48", "remaining_time": "0:11:51"} +{"current_steps": 1039, "total_steps": 1175, "loss": 0.9757519960403442, "lr": 1.6352418828941052e-06, "epoch": 4.422174840085288, "percentage": 88.43, "elapsed_time": "1:29:53", "remaining_time": "0:11:45"} +{"current_steps": 1040, "total_steps": 1175, "loss": 0.9778440594673157, "lr": 1.6117816423156952e-06, "epoch": 4.426439232409382, "percentage": 88.51, "elapsed_time": "1:29:58", "remaining_time": "0:11:40"} +{"current_steps": 1041, "total_steps": 1175, "loss": 0.9737083911895752, "lr": 1.5884838402870029e-06, "epoch": 4.430703624733475, "percentage": 88.6, "elapsed_time": "1:30:03", "remaining_time": "0:11:35"} +{"current_steps": 1042, "total_steps": 1175, "loss": 0.98288494348526, "lr": 1.5653486826170384e-06, "epoch": 4.434968017057569, "percentage": 88.68, "elapsed_time": "1:30:09", "remaining_time": "0:11:30"} +{"current_steps": 1043, "total_steps": 1175, "loss": 0.9392582774162292, "lr": 1.5423763736780583e-06, "epoch": 4.439232409381663, "percentage": 88.77, "elapsed_time": "1:30:14", "remaining_time": "0:11:25"} +{"current_steps": 1044, "total_steps": 1175, "loss": 0.9626212120056152, "lr": 1.5195671164037173e-06, "epoch": 4.443496801705757, "percentage": 88.85, "elapsed_time": "1:30:19", "remaining_time": "0:11:20"} +{"current_steps": 1045, "total_steps": 1175, "loss": 0.9724099636077881, "lr": 1.496921112287315e-06, "epoch": 4.447761194029851, "percentage": 88.94, "elapsed_time": "1:30:24", "remaining_time": "0:11:14"} +{"current_steps": 1046, "total_steps": 1175, "loss": 0.9834390878677368, "lr": 1.4744385613799894e-06, "epoch": 4.452025586353945, "percentage": 89.02, "elapsed_time": "1:30:30", "remaining_time": "0:11:09"} +{"current_steps": 1047, "total_steps": 1175, "loss": 0.9382596611976624, "lr": 1.4521196622889644e-06, "epoch": 4.456289978678038, "percentage": 89.11, "elapsed_time": "1:30:35", "remaining_time": "0:11:04"} +{"current_steps": 1048, "total_steps": 1175, "loss": 0.9946603178977966, "lr": 1.4299646121757892e-06, "epoch": 4.460554371002132, "percentage": 89.19, "elapsed_time": "1:30:40", "remaining_time": "0:10:59"} +{"current_steps": 1049, "total_steps": 1175, "loss": 0.9563462734222412, "lr": 1.4079736067545912e-06, "epoch": 4.464818763326226, "percentage": 89.28, "elapsed_time": "1:30:45", "remaining_time": "0:10:54"} +{"current_steps": 1050, "total_steps": 1175, "loss": 0.9560338258743286, "lr": 1.3861468402903634e-06, "epoch": 4.46908315565032, "percentage": 89.36, "elapsed_time": "1:30:50", "remaining_time": "0:10:48"} +{"current_steps": 1051, "total_steps": 1175, "loss": 0.9236841201782227, "lr": 1.3644845055972322e-06, "epoch": 4.473347547974414, "percentage": 89.45, "elapsed_time": "1:30:55", "remaining_time": "0:10:43"} +{"current_steps": 1052, "total_steps": 1175, "loss": 0.9673594832420349, "lr": 1.3429867940367626e-06, "epoch": 4.477611940298507, "percentage": 89.53, "elapsed_time": "1:31:00", "remaining_time": "0:10:38"} +{"current_steps": 1053, "total_steps": 1175, "loss": 0.9610645771026611, "lr": 1.321653895516264e-06, "epoch": 4.481876332622601, "percentage": 89.62, "elapsed_time": "1:31:06", "remaining_time": "0:10:33"} +{"current_steps": 1054, "total_steps": 1175, "loss": 0.9949779510498047, "lr": 1.3004859984871199e-06, "epoch": 4.486140724946695, "percentage": 89.7, "elapsed_time": "1:31:11", "remaining_time": "0:10:28"} +{"current_steps": 1055, "total_steps": 1175, "loss": 0.948443591594696, "lr": 1.279483289943102e-06, "epoch": 4.490405117270789, "percentage": 89.79, "elapsed_time": "1:31:16", "remaining_time": "0:10:22"} +{"current_steps": 1056, "total_steps": 1175, "loss": 0.9572373032569885, "lr": 1.2586459554187558e-06, "epoch": 4.494669509594883, "percentage": 89.87, "elapsed_time": "1:31:21", "remaining_time": "0:10:17"} +{"current_steps": 1057, "total_steps": 1175, "loss": 0.9423749446868896, "lr": 1.2379741789877175e-06, "epoch": 4.498933901918977, "percentage": 89.96, "elapsed_time": "1:31:27", "remaining_time": "0:10:12"} +{"current_steps": 1058, "total_steps": 1175, "loss": 0.959303617477417, "lr": 1.2174681432611245e-06, "epoch": 4.50319829424307, "percentage": 90.04, "elapsed_time": "1:31:32", "remaining_time": "0:10:07"} +{"current_steps": 1059, "total_steps": 1175, "loss": 0.9340790510177612, "lr": 1.1971280293859811e-06, "epoch": 4.507462686567164, "percentage": 90.13, "elapsed_time": "1:31:37", "remaining_time": "0:10:02"} +{"current_steps": 1060, "total_steps": 1175, "loss": 0.9508934020996094, "lr": 1.17695401704357e-06, "epoch": 4.5117270788912585, "percentage": 90.21, "elapsed_time": "1:31:42", "remaining_time": "0:09:56"} +{"current_steps": 1061, "total_steps": 1175, "loss": 0.9892035722732544, "lr": 1.1569462844478552e-06, "epoch": 4.515991471215352, "percentage": 90.3, "elapsed_time": "1:31:48", "remaining_time": "0:09:51"} +{"current_steps": 1062, "total_steps": 1175, "loss": 0.9638294577598572, "lr": 1.1371050083439107e-06, "epoch": 4.520255863539446, "percentage": 90.38, "elapsed_time": "1:31:53", "remaining_time": "0:09:46"} +{"current_steps": 1063, "total_steps": 1175, "loss": 0.9722185134887695, "lr": 1.1174303640063622e-06, "epoch": 4.524520255863539, "percentage": 90.47, "elapsed_time": "1:31:58", "remaining_time": "0:09:41"} +{"current_steps": 1064, "total_steps": 1175, "loss": 0.991715133190155, "lr": 1.097922525237849e-06, "epoch": 4.528784648187633, "percentage": 90.55, "elapsed_time": "1:32:03", "remaining_time": "0:09:36"} +{"current_steps": 1065, "total_steps": 1175, "loss": 0.9921541213989258, "lr": 1.078581664367455e-06, "epoch": 4.533049040511727, "percentage": 90.64, "elapsed_time": "1:32:09", "remaining_time": "0:09:31"} +{"current_steps": 1066, "total_steps": 1175, "loss": 0.9642506837844849, "lr": 1.0594079522492274e-06, "epoch": 4.537313432835821, "percentage": 90.72, "elapsed_time": "1:32:15", "remaining_time": "0:09:25"} +{"current_steps": 1067, "total_steps": 1175, "loss": 0.9328286647796631, "lr": 1.040401558260633e-06, "epoch": 4.541577825159915, "percentage": 90.81, "elapsed_time": "1:32:20", "remaining_time": "0:09:20"} +{"current_steps": 1068, "total_steps": 1175, "loss": 0.9636072516441345, "lr": 1.0215626503010911e-06, "epoch": 4.545842217484008, "percentage": 90.89, "elapsed_time": "1:32:25", "remaining_time": "0:09:15"} +{"current_steps": 1069, "total_steps": 1175, "loss": 0.9635332822799683, "lr": 1.002891394790475e-06, "epoch": 4.550106609808102, "percentage": 90.98, "elapsed_time": "1:32:31", "remaining_time": "0:09:10"} +{"current_steps": 1070, "total_steps": 1175, "loss": 0.9370394945144653, "lr": 9.843879566676273e-07, "epoch": 4.554371002132196, "percentage": 91.06, "elapsed_time": "1:32:36", "remaining_time": "0:09:05"} +{"current_steps": 1071, "total_steps": 1175, "loss": 0.9778931736946106, "lr": 9.660524993889386e-07, "epoch": 4.55863539445629, "percentage": 91.15, "elapsed_time": "1:32:41", "remaining_time": "0:09:00"} +{"current_steps": 1072, "total_steps": 1175, "loss": 1.0181862115859985, "lr": 9.478851849268733e-07, "epoch": 4.562899786780384, "percentage": 91.23, "elapsed_time": "1:32:46", "remaining_time": "0:08:54"} +{"current_steps": 1073, "total_steps": 1175, "loss": 0.9861880540847778, "lr": 9.298861737685527e-07, "epoch": 4.567164179104478, "percentage": 91.32, "elapsed_time": "1:32:52", "remaining_time": "0:08:49"} +{"current_steps": 1074, "total_steps": 1175, "loss": 0.9695085287094116, "lr": 9.120556249143341e-07, "epoch": 4.571428571428571, "percentage": 91.4, "elapsed_time": "1:32:57", "remaining_time": "0:08:44"} +{"current_steps": 1075, "total_steps": 1175, "loss": 0.9866265058517456, "lr": 8.943936958763988e-07, "epoch": 4.575692963752665, "percentage": 91.49, "elapsed_time": "1:33:02", "remaining_time": "0:08:39"} +{"current_steps": 1076, "total_steps": 1175, "loss": 0.963053822517395, "lr": 8.769005426773836e-07, "epoch": 4.5799573560767595, "percentage": 91.57, "elapsed_time": "1:33:08", "remaining_time": "0:08:34"} +{"current_steps": 1077, "total_steps": 1175, "loss": 0.9740028977394104, "lr": 8.595763198489714e-07, "epoch": 4.584221748400853, "percentage": 91.66, "elapsed_time": "1:33:13", "remaining_time": "0:08:28"} +{"current_steps": 1078, "total_steps": 1175, "loss": 0.9532477855682373, "lr": 8.42421180430546e-07, "epoch": 4.588486140724947, "percentage": 91.74, "elapsed_time": "1:33:18", "remaining_time": "0:08:23"} +{"current_steps": 1079, "total_steps": 1175, "loss": 0.9715753793716431, "lr": 8.254352759678386e-07, "epoch": 4.59275053304904, "percentage": 91.83, "elapsed_time": "1:33:24", "remaining_time": "0:08:18"} +{"current_steps": 1080, "total_steps": 1175, "loss": 0.9847027063369751, "lr": 8.086187565115877e-07, "epoch": 4.597014925373134, "percentage": 91.91, "elapsed_time": "1:33:29", "remaining_time": "0:08:13"} +{"current_steps": 1081, "total_steps": 1175, "loss": 0.9372127056121826, "lr": 7.919717706162067e-07, "epoch": 4.601279317697228, "percentage": 92.0, "elapsed_time": "1:33:35", "remaining_time": "0:08:08"} +{"current_steps": 1082, "total_steps": 1175, "loss": 0.9522218704223633, "lr": 7.754944653384777e-07, "epoch": 4.605543710021322, "percentage": 92.09, "elapsed_time": "1:33:41", "remaining_time": "0:08:03"} +{"current_steps": 1083, "total_steps": 1175, "loss": 0.9851837158203125, "lr": 7.591869862362534e-07, "epoch": 4.609808102345416, "percentage": 92.17, "elapsed_time": "1:33:46", "remaining_time": "0:07:57"} +{"current_steps": 1084, "total_steps": 1175, "loss": 0.9888862371444702, "lr": 7.430494773671682e-07, "epoch": 4.61407249466951, "percentage": 92.26, "elapsed_time": "1:33:52", "remaining_time": "0:07:52"} +{"current_steps": 1085, "total_steps": 1175, "loss": 0.9169750809669495, "lr": 7.270820812873714e-07, "epoch": 4.618336886993603, "percentage": 92.34, "elapsed_time": "1:33:57", "remaining_time": "0:07:47"} +{"current_steps": 1086, "total_steps": 1175, "loss": 0.9655887484550476, "lr": 7.112849390502563e-07, "epoch": 4.622601279317697, "percentage": 92.43, "elapsed_time": "1:34:02", "remaining_time": "0:07:42"} +{"current_steps": 1087, "total_steps": 1175, "loss": 0.9082891941070557, "lr": 6.956581902052306e-07, "epoch": 4.6268656716417915, "percentage": 92.51, "elapsed_time": "1:34:08", "remaining_time": "0:07:37"} +{"current_steps": 1088, "total_steps": 1175, "loss": 0.9885333180427551, "lr": 6.802019727964593e-07, "epoch": 4.631130063965885, "percentage": 92.6, "elapsed_time": "1:34:14", "remaining_time": "0:07:32"} +{"current_steps": 1089, "total_steps": 1175, "loss": 0.9694564342498779, "lr": 6.64916423361679e-07, "epoch": 4.635394456289979, "percentage": 92.68, "elapsed_time": "1:34:19", "remaining_time": "0:07:26"} +{"current_steps": 1090, "total_steps": 1175, "loss": 0.9988362193107605, "lr": 6.498016769309567e-07, "epoch": 4.639658848614072, "percentage": 92.77, "elapsed_time": "1:34:24", "remaining_time": "0:07:21"} +{"current_steps": 1091, "total_steps": 1175, "loss": 0.9596368074417114, "lr": 6.348578670255224e-07, "epoch": 4.643923240938166, "percentage": 92.85, "elapsed_time": "1:34:30", "remaining_time": "0:07:16"} +{"current_steps": 1092, "total_steps": 1175, "loss": 0.9877229928970337, "lr": 6.200851256565799e-07, "epoch": 4.6481876332622605, "percentage": 92.94, "elapsed_time": "1:34:35", "remaining_time": "0:07:11"} +{"current_steps": 1093, "total_steps": 1175, "loss": 1.0273993015289307, "lr": 6.054835833241357e-07, "epoch": 4.652452025586354, "percentage": 93.02, "elapsed_time": "1:34:40", "remaining_time": "0:07:06"} +{"current_steps": 1094, "total_steps": 1175, "loss": 0.940179705619812, "lr": 5.910533690158593e-07, "epoch": 4.656716417910448, "percentage": 93.11, "elapsed_time": "1:34:46", "remaining_time": "0:07:01"} +{"current_steps": 1095, "total_steps": 1175, "loss": 0.9359939694404602, "lr": 5.767946102059307e-07, "epoch": 4.660980810234541, "percentage": 93.19, "elapsed_time": "1:34:51", "remaining_time": "0:06:55"} +{"current_steps": 1096, "total_steps": 1175, "loss": 0.9584230780601501, "lr": 5.627074328539173e-07, "epoch": 4.665245202558635, "percentage": 93.28, "elapsed_time": "1:34:56", "remaining_time": "0:06:50"} +{"current_steps": 1097, "total_steps": 1175, "loss": 0.9888830184936523, "lr": 5.487919614036741e-07, "epoch": 4.669509594882729, "percentage": 93.36, "elapsed_time": "1:35:02", "remaining_time": "0:06:45"} +{"current_steps": 1098, "total_steps": 1175, "loss": 0.9847787618637085, "lr": 5.350483187822231e-07, "epoch": 4.673773987206823, "percentage": 93.45, "elapsed_time": "1:35:07", "remaining_time": "0:06:40"} +{"current_steps": 1099, "total_steps": 1175, "loss": 0.9672181606292725, "lr": 5.214766263986848e-07, "epoch": 4.678038379530917, "percentage": 93.53, "elapsed_time": "1:35:12", "remaining_time": "0:06:35"} +{"current_steps": 1100, "total_steps": 1175, "loss": 0.9740506410598755, "lr": 5.080770041431926e-07, "epoch": 4.682302771855011, "percentage": 93.62, "elapsed_time": "1:35:17", "remaining_time": "0:06:29"} +{"current_steps": 1101, "total_steps": 1175, "loss": 1.0037099123001099, "lr": 4.948495703858492e-07, "epoch": 4.686567164179104, "percentage": 93.7, "elapsed_time": "1:35:22", "remaining_time": "0:06:24"} +{"current_steps": 1102, "total_steps": 1175, "loss": 0.9758346080780029, "lr": 4.81794441975667e-07, "epoch": 4.690831556503198, "percentage": 93.79, "elapsed_time": "1:35:28", "remaining_time": "0:06:19"} +{"current_steps": 1103, "total_steps": 1175, "loss": 0.9496323466300964, "lr": 4.689117342395388e-07, "epoch": 4.6950959488272925, "percentage": 93.87, "elapsed_time": "1:35:33", "remaining_time": "0:06:14"} +{"current_steps": 1104, "total_steps": 1175, "loss": 0.9582512974739075, "lr": 4.5620156098122204e-07, "epoch": 4.699360341151386, "percentage": 93.96, "elapsed_time": "1:35:38", "remaining_time": "0:06:09"} +{"current_steps": 1105, "total_steps": 1175, "loss": 0.9745293855667114, "lr": 4.4366403448033334e-07, "epoch": 4.70362473347548, "percentage": 94.04, "elapsed_time": "1:35:43", "remaining_time": "0:06:03"} +{"current_steps": 1106, "total_steps": 1175, "loss": 0.9649718999862671, "lr": 4.3129926549136057e-07, "epoch": 4.707889125799573, "percentage": 94.13, "elapsed_time": "1:35:49", "remaining_time": "0:05:58"} +{"current_steps": 1107, "total_steps": 1175, "loss": 0.9577633738517761, "lr": 4.191073632426701e-07, "epoch": 4.712153518123667, "percentage": 94.21, "elapsed_time": "1:35:54", "remaining_time": "0:05:53"} +{"current_steps": 1108, "total_steps": 1175, "loss": 0.9597268104553223, "lr": 4.0708843543555643e-07, "epoch": 4.7164179104477615, "percentage": 94.3, "elapsed_time": "1:35:59", "remaining_time": "0:05:48"} +{"current_steps": 1109, "total_steps": 1175, "loss": 0.9578450322151184, "lr": 3.95242588243292e-07, "epoch": 4.720682302771855, "percentage": 94.38, "elapsed_time": "1:36:04", "remaining_time": "0:05:43"} +{"current_steps": 1110, "total_steps": 1175, "loss": 0.9749801158905029, "lr": 3.8356992631017e-07, "epoch": 4.724946695095949, "percentage": 94.47, "elapsed_time": "1:36:09", "remaining_time": "0:05:37"} +{"current_steps": 1111, "total_steps": 1175, "loss": 0.9631055593490601, "lr": 3.720705527506008e-07, "epoch": 4.729211087420042, "percentage": 94.55, "elapsed_time": "1:36:14", "remaining_time": "0:05:32"} +{"current_steps": 1112, "total_steps": 1175, "loss": 0.9523324966430664, "lr": 3.60744569148197e-07, "epoch": 4.733475479744136, "percentage": 94.64, "elapsed_time": "1:36:19", "remaining_time": "0:05:27"} +{"current_steps": 1113, "total_steps": 1175, "loss": 0.9814821481704712, "lr": 3.4959207555485873e-07, "epoch": 4.73773987206823, "percentage": 94.72, "elapsed_time": "1:36:25", "remaining_time": "0:05:22"} +{"current_steps": 1114, "total_steps": 1175, "loss": 0.9813393950462341, "lr": 3.3861317048992317e-07, "epoch": 4.742004264392325, "percentage": 94.81, "elapsed_time": "1:36:30", "remaining_time": "0:05:17"} +{"current_steps": 1115, "total_steps": 1175, "loss": 0.9734345078468323, "lr": 3.278079509392562e-07, "epoch": 4.746268656716418, "percentage": 94.89, "elapsed_time": "1:36:35", "remaining_time": "0:05:11"} +{"current_steps": 1116, "total_steps": 1175, "loss": 0.9803054332733154, "lr": 3.171765123544224e-07, "epoch": 4.750533049040512, "percentage": 94.98, "elapsed_time": "1:36:40", "remaining_time": "0:05:06"} +{"current_steps": 1117, "total_steps": 1175, "loss": 0.9418925046920776, "lr": 3.06718948651834e-07, "epoch": 4.754797441364605, "percentage": 95.06, "elapsed_time": "1:36:45", "remaining_time": "0:05:01"} +{"current_steps": 1118, "total_steps": 1175, "loss": 0.9536681771278381, "lr": 2.964353522119168e-07, "epoch": 4.759061833688699, "percentage": 95.15, "elapsed_time": "1:36:51", "remaining_time": "0:04:56"} +{"current_steps": 1119, "total_steps": 1175, "loss": 0.9456994533538818, "lr": 2.863258138783032e-07, "epoch": 4.7633262260127935, "percentage": 95.23, "elapsed_time": "1:36:56", "remaining_time": "0:04:51"} +{"current_steps": 1120, "total_steps": 1175, "loss": 0.990449070930481, "lr": 2.7639042295702245e-07, "epoch": 4.767590618336887, "percentage": 95.32, "elapsed_time": "1:37:01", "remaining_time": "0:04:45"} +{"current_steps": 1121, "total_steps": 1175, "loss": 1.004270076751709, "lr": 2.666292672157056e-07, "epoch": 4.771855010660981, "percentage": 95.4, "elapsed_time": "1:37:06", "remaining_time": "0:04:40"} +{"current_steps": 1122, "total_steps": 1175, "loss": 1.0067514181137085, "lr": 2.570424328828325e-07, "epoch": 4.776119402985074, "percentage": 95.49, "elapsed_time": "1:37:11", "remaining_time": "0:04:35"} +{"current_steps": 1123, "total_steps": 1175, "loss": 0.9487168192863464, "lr": 2.4763000464694377e-07, "epoch": 4.780383795309168, "percentage": 95.57, "elapsed_time": "1:37:16", "remaining_time": "0:04:30"} +{"current_steps": 1124, "total_steps": 1175, "loss": 0.946317195892334, "lr": 2.383920656559102e-07, "epoch": 4.7846481876332625, "percentage": 95.66, "elapsed_time": "1:37:21", "remaining_time": "0:04:25"} +{"current_steps": 1125, "total_steps": 1175, "loss": 0.923102855682373, "lr": 2.2932869751619568e-07, "epoch": 4.788912579957356, "percentage": 95.74, "elapsed_time": "1:37:27", "remaining_time": "0:04:19"} +{"current_steps": 1126, "total_steps": 1175, "loss": 0.9572536945343018, "lr": 2.2043998029212643e-07, "epoch": 4.79317697228145, "percentage": 95.83, "elapsed_time": "1:37:32", "remaining_time": "0:04:14"} +{"current_steps": 1127, "total_steps": 1175, "loss": 0.9736548662185669, "lr": 2.1172599250519398e-07, "epoch": 4.797441364605544, "percentage": 95.91, "elapsed_time": "1:37:37", "remaining_time": "0:04:09"} +{"current_steps": 1128, "total_steps": 1175, "loss": 1.0034470558166504, "lr": 2.0318681113336013e-07, "epoch": 4.801705756929637, "percentage": 96.0, "elapsed_time": "1:37:42", "remaining_time": "0:04:04"} +{"current_steps": 1129, "total_steps": 1175, "loss": 0.9663975834846497, "lr": 1.9482251161037302e-07, "epoch": 4.8059701492537314, "percentage": 96.09, "elapsed_time": "1:37:47", "remaining_time": "0:03:59"} +{"current_steps": 1130, "total_steps": 1175, "loss": 0.9956861734390259, "lr": 1.866331678251032e-07, "epoch": 4.810234541577826, "percentage": 96.17, "elapsed_time": "1:37:52", "remaining_time": "0:03:53"} +{"current_steps": 1131, "total_steps": 1175, "loss": 0.9476275444030762, "lr": 1.7861885212088869e-07, "epoch": 4.814498933901919, "percentage": 96.26, "elapsed_time": "1:37:57", "remaining_time": "0:03:48"} +{"current_steps": 1132, "total_steps": 1175, "loss": 0.975817084312439, "lr": 1.7077963529490204e-07, "epoch": 4.818763326226013, "percentage": 96.34, "elapsed_time": "1:38:02", "remaining_time": "0:03:43"} +{"current_steps": 1133, "total_steps": 1175, "loss": 0.9581156373023987, "lr": 1.6311558659751535e-07, "epoch": 4.823027718550106, "percentage": 96.43, "elapsed_time": "1:38:08", "remaining_time": "0:03:38"} +{"current_steps": 1134, "total_steps": 1175, "loss": 0.9886241555213928, "lr": 1.5562677373169855e-07, "epoch": 4.8272921108742, "percentage": 96.51, "elapsed_time": "1:38:13", "remaining_time": "0:03:33"} +{"current_steps": 1135, "total_steps": 1175, "loss": 1.0126842260360718, "lr": 1.483132628524131e-07, "epoch": 4.8315565031982945, "percentage": 96.6, "elapsed_time": "1:38:18", "remaining_time": "0:03:27"} +{"current_steps": 1136, "total_steps": 1175, "loss": 0.964205801486969, "lr": 1.4117511856603262e-07, "epoch": 4.835820895522388, "percentage": 96.68, "elapsed_time": "1:38:23", "remaining_time": "0:03:22"} +{"current_steps": 1137, "total_steps": 1175, "loss": 0.9916234016418457, "lr": 1.342124039297721e-07, "epoch": 4.840085287846482, "percentage": 96.77, "elapsed_time": "1:38:28", "remaining_time": "0:03:17"} +{"current_steps": 1138, "total_steps": 1175, "loss": 0.9750006198883057, "lr": 1.2742518045112396e-07, "epoch": 4.844349680170575, "percentage": 96.85, "elapsed_time": "1:38:33", "remaining_time": "0:03:12"} +{"current_steps": 1139, "total_steps": 1175, "loss": 0.919538676738739, "lr": 1.2081350808732518e-07, "epoch": 4.848614072494669, "percentage": 96.94, "elapsed_time": "1:38:38", "remaining_time": "0:03:07"} +{"current_steps": 1140, "total_steps": 1175, "loss": 1.0680432319641113, "lr": 1.143774452448243e-07, "epoch": 4.8528784648187635, "percentage": 97.02, "elapsed_time": "1:38:43", "remaining_time": "0:03:01"} +{"current_steps": 1141, "total_steps": 1175, "loss": 0.9682325124740601, "lr": 1.0811704877875528e-07, "epoch": 4.857142857142857, "percentage": 97.11, "elapsed_time": "1:38:49", "remaining_time": "0:02:56"} +{"current_steps": 1142, "total_steps": 1175, "loss": 0.9909142851829529, "lr": 1.0203237399245336e-07, "epoch": 4.861407249466951, "percentage": 97.19, "elapsed_time": "1:38:54", "remaining_time": "0:02:51"} +{"current_steps": 1143, "total_steps": 1175, "loss": 0.9562100172042847, "lr": 9.612347463694882e-08, "epoch": 4.865671641791045, "percentage": 97.28, "elapsed_time": "1:38:59", "remaining_time": "0:02:46"} +{"current_steps": 1144, "total_steps": 1175, "loss": 0.9645106792449951, "lr": 9.039040291050738e-08, "epoch": 4.869936034115138, "percentage": 97.36, "elapsed_time": "1:39:04", "remaining_time": "0:02:41"} +{"current_steps": 1145, "total_steps": 1175, "loss": 0.9630197286605835, "lr": 8.483320945815499e-08, "epoch": 4.8742004264392325, "percentage": 97.45, "elapsed_time": "1:39:09", "remaining_time": "0:02:35"} +{"current_steps": 1146, "total_steps": 1175, "loss": 0.9523903131484985, "lr": 7.945194337124262e-08, "epoch": 4.878464818763327, "percentage": 97.53, "elapsed_time": "1:39:14", "remaining_time": "0:02:30"} +{"current_steps": 1147, "total_steps": 1175, "loss": 1.0448331832885742, "lr": 7.424665218700444e-08, "epoch": 4.88272921108742, "percentage": 97.62, "elapsed_time": "1:39:19", "remaining_time": "0:02:25"} +{"current_steps": 1148, "total_steps": 1175, "loss": 0.9594995379447937, "lr": 6.921738188814254e-08, "epoch": 4.886993603411514, "percentage": 97.7, "elapsed_time": "1:39:24", "remaining_time": "0:02:20"} +{"current_steps": 1149, "total_steps": 1175, "loss": 0.9661248922348022, "lr": 6.436417690241614e-08, "epoch": 4.891257995735607, "percentage": 97.79, "elapsed_time": "1:39:30", "remaining_time": "0:02:15"} +{"current_steps": 1150, "total_steps": 1175, "loss": 0.9736925363540649, "lr": 5.968708010225532e-08, "epoch": 4.895522388059701, "percentage": 97.87, "elapsed_time": "1:39:35", "remaining_time": "0:02:09"} +{"current_steps": 1151, "total_steps": 1175, "loss": 0.9437923431396484, "lr": 5.518613280437901e-08, "epoch": 4.899786780383796, "percentage": 97.96, "elapsed_time": "1:39:40", "remaining_time": "0:02:04"} +{"current_steps": 1152, "total_steps": 1175, "loss": 0.9633027911186218, "lr": 5.0861374769426433e-08, "epoch": 4.904051172707889, "percentage": 98.04, "elapsed_time": "1:39:45", "remaining_time": "0:01:59"} +{"current_steps": 1153, "total_steps": 1175, "loss": 0.9642020463943481, "lr": 4.671284420161071e-08, "epoch": 4.908315565031983, "percentage": 98.13, "elapsed_time": "1:39:50", "remaining_time": "0:01:54"} +{"current_steps": 1154, "total_steps": 1175, "loss": 0.9545692205429077, "lr": 4.274057774838136e-08, "epoch": 4.912579957356077, "percentage": 98.21, "elapsed_time": "1:39:55", "remaining_time": "0:01:49"} +{"current_steps": 1155, "total_steps": 1175, "loss": 0.9212028384208679, "lr": 3.894461050010012e-08, "epoch": 4.91684434968017, "percentage": 98.3, "elapsed_time": "1:40:00", "remaining_time": "0:01:43"} +{"current_steps": 1156, "total_steps": 1175, "loss": 0.9664217233657837, "lr": 3.5324975989725615e-08, "epoch": 4.9211087420042645, "percentage": 98.38, "elapsed_time": "1:40:06", "remaining_time": "0:01:38"} +{"current_steps": 1157, "total_steps": 1175, "loss": 0.9604615569114685, "lr": 3.188170619252473e-08, "epoch": 4.925373134328359, "percentage": 98.47, "elapsed_time": "1:40:11", "remaining_time": "0:01:33"} +{"current_steps": 1158, "total_steps": 1175, "loss": 0.9696751832962036, "lr": 2.8614831525786147e-08, "epoch": 4.929637526652452, "percentage": 98.55, "elapsed_time": "1:40:16", "remaining_time": "0:01:28"} +{"current_steps": 1159, "total_steps": 1175, "loss": 0.9708175659179688, "lr": 2.552438084855613e-08, "epoch": 4.933901918976546, "percentage": 98.64, "elapsed_time": "1:40:21", "remaining_time": "0:01:23"} +{"current_steps": 1160, "total_steps": 1175, "loss": 1.0133110284805298, "lr": 2.2610381461372068e-08, "epoch": 4.938166311300639, "percentage": 98.72, "elapsed_time": "1:40:26", "remaining_time": "0:01:17"} +{"current_steps": 1161, "total_steps": 1175, "loss": 0.9827720522880554, "lr": 1.987285910603598e-08, "epoch": 4.9424307036247335, "percentage": 98.81, "elapsed_time": "1:40:31", "remaining_time": "0:01:12"} +{"current_steps": 1162, "total_steps": 1175, "loss": 0.981905460357666, "lr": 1.7311837965379164e-08, "epoch": 4.946695095948828, "percentage": 98.89, "elapsed_time": "1:40:36", "remaining_time": "0:01:07"} +{"current_steps": 1163, "total_steps": 1175, "loss": 0.9775525331497192, "lr": 1.4927340663046798e-08, "epoch": 4.950959488272921, "percentage": 98.98, "elapsed_time": "1:40:41", "remaining_time": "0:01:02"} +{"current_steps": 1164, "total_steps": 1175, "loss": 0.9846411943435669, "lr": 1.2719388263300325e-08, "epoch": 4.955223880597015, "percentage": 99.06, "elapsed_time": "1:40:47", "remaining_time": "0:00:57"} +{"current_steps": 1165, "total_steps": 1175, "loss": 1.003123164176941, "lr": 1.0688000270839827e-08, "epoch": 4.959488272921108, "percentage": 99.15, "elapsed_time": "1:40:52", "remaining_time": "0:00:51"} +{"current_steps": 1166, "total_steps": 1175, "loss": 0.9979058504104614, "lr": 8.833194630615271e-09, "epoch": 4.963752665245202, "percentage": 99.23, "elapsed_time": "1:40:57", "remaining_time": "0:00:46"} +{"current_steps": 1167, "total_steps": 1175, "loss": 0.9676626920700073, "lr": 7.154987727682194e-09, "epoch": 4.968017057569297, "percentage": 99.32, "elapsed_time": "1:41:02", "remaining_time": "0:00:41"} +{"current_steps": 1168, "total_steps": 1175, "loss": 0.9879981279373169, "lr": 5.6533943870462625e-09, "epoch": 4.97228144989339, "percentage": 99.4, "elapsed_time": "1:41:07", "remaining_time": "0:00:36"} +{"current_steps": 1169, "total_steps": 1175, "loss": 1.0250025987625122, "lr": 4.328427873541152e-09, "epoch": 4.976545842217484, "percentage": 99.49, "elapsed_time": "1:41:12", "remaining_time": "0:00:31"} +{"current_steps": 1170, "total_steps": 1175, "loss": 0.9731056094169617, "lr": 3.1800998917086432e-09, "epoch": 4.980810234541578, "percentage": 99.57, "elapsed_time": "1:41:17", "remaining_time": "0:00:25"} +{"current_steps": 1171, "total_steps": 1175, "loss": 0.9649834632873535, "lr": 2.2084205856920393e-09, "epoch": 4.985074626865671, "percentage": 99.66, "elapsed_time": "1:41:23", "remaining_time": "0:00:20"} +{"current_steps": 1172, "total_steps": 1175, "loss": 1.0229482650756836, "lr": 1.4133985391473482e-09, "epoch": 4.9893390191897655, "percentage": 99.74, "elapsed_time": "1:41:28", "remaining_time": "0:00:15"} +{"current_steps": 1173, "total_steps": 1175, "loss": 0.9098262786865234, "lr": 7.950407751722288e-10, "epoch": 4.99360341151386, "percentage": 99.83, "elapsed_time": "1:41:33", "remaining_time": "0:00:10"} +{"current_steps": 1174, "total_steps": 1175, "loss": 0.9602517485618591, "lr": 3.5335275624159835e-10, "epoch": 4.997867803837953, "percentage": 99.91, "elapsed_time": "1:41:38", "remaining_time": "0:00:05"} +{"current_steps": 1175, "total_steps": 1175, "loss": 1.0513684749603271, "lr": 8.833838415212014e-11, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "1:41:41", "remaining_time": "0:00:00"} +{"current_steps": 1175, "total_steps": 1175, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "1:41:45", "remaining_time": "0:00:00"} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..99bc070 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,12972 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 5.0, + "eval_steps": 500, + "global_step": 1175, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0042643923240938165, + "grad_norm": 3.6095034261999874, + "learning_rate": 0.0, + "loss": 1.3665432929992676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35155701637268066, + "step": 1, + "valid_targets_mean": 14037.0, + "valid_targets_min": 2354 + }, + { + "epoch": 0.008528784648187633, + "grad_norm": 3.559692570865644, + "learning_rate": 3.3898305084745766e-07, + "loss": 1.3736059665679932, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3430137634277344, + "step": 2, + "valid_targets_mean": 14289.4, + "valid_targets_min": 3217 + }, + { + "epoch": 0.01279317697228145, + "grad_norm": 3.61021997519416, + "learning_rate": 6.779661016949153e-07, + "loss": 1.3179807662963867, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32869216799736023, + "step": 3, + "valid_targets_mean": 14767.9, + "valid_targets_min": 2376 + }, + { + "epoch": 0.017057569296375266, + "grad_norm": 3.624420248034226, + "learning_rate": 1.016949152542373e-06, + "loss": 1.37325918674469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3541400134563446, + "step": 4, + "valid_targets_mean": 13822.7, + "valid_targets_min": 1341 + }, + { + "epoch": 0.021321961620469083, + "grad_norm": 3.530294809260869, + "learning_rate": 1.3559322033898307e-06, + "loss": 1.3412843942642212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33716025948524475, + "step": 5, + "valid_targets_mean": 14168.3, + "valid_targets_min": 4549 + }, + { + "epoch": 0.0255863539445629, + "grad_norm": 3.4703468458999343, + "learning_rate": 1.6949152542372882e-06, + "loss": 1.32222318649292, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33370861411094666, + "step": 6, + "valid_targets_mean": 14643.4, + "valid_targets_min": 2062 + }, + { + "epoch": 0.029850746268656716, + "grad_norm": 3.2896475452822505, + "learning_rate": 2.033898305084746e-06, + "loss": 1.3199026584625244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33475369215011597, + "step": 7, + "valid_targets_mean": 14722.1, + "valid_targets_min": 5706 + }, + { + "epoch": 0.03411513859275053, + "grad_norm": 3.2575036864876643, + "learning_rate": 2.372881355932204e-06, + "loss": 1.3366254568099976, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.35241830348968506, + "step": 8, + "valid_targets_mean": 15226.8, + "valid_targets_min": 4690 + }, + { + "epoch": 0.03837953091684435, + "grad_norm": 2.7465656112914267, + "learning_rate": 2.7118644067796613e-06, + "loss": 1.3531262874603271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3520984649658203, + "step": 9, + "valid_targets_mean": 15026.3, + "valid_targets_min": 11234 + }, + { + "epoch": 0.042643923240938165, + "grad_norm": 2.599760232807994, + "learning_rate": 3.0508474576271192e-06, + "loss": 1.3007540702819824, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3108808696269989, + "step": 10, + "valid_targets_mean": 14829.6, + "valid_targets_min": 2873 + }, + { + "epoch": 0.046908315565031986, + "grad_norm": 2.3165871951448813, + "learning_rate": 3.3898305084745763e-06, + "loss": 1.324140191078186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33954882621765137, + "step": 11, + "valid_targets_mean": 14368.6, + "valid_targets_min": 1932 + }, + { + "epoch": 0.0511727078891258, + "grad_norm": 2.0999943066707485, + "learning_rate": 3.7288135593220342e-06, + "loss": 1.2988896369934082, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34070098400115967, + "step": 12, + "valid_targets_mean": 14815.2, + "valid_targets_min": 1409 + }, + { + "epoch": 0.05543710021321962, + "grad_norm": 2.0452655458107496, + "learning_rate": 4.067796610169492e-06, + "loss": 1.3134877681732178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3563053607940674, + "step": 13, + "valid_targets_mean": 15171.1, + "valid_targets_min": 9739 + }, + { + "epoch": 0.05970149253731343, + "grad_norm": 1.8633852133385178, + "learning_rate": 4.40677966101695e-06, + "loss": 1.3092262744903564, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3214074671268463, + "step": 14, + "valid_targets_mean": 14483.7, + "valid_targets_min": 9364 + }, + { + "epoch": 0.06396588486140725, + "grad_norm": 1.643565378324802, + "learning_rate": 4.745762711864408e-06, + "loss": 1.3089406490325928, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3482273519039154, + "step": 15, + "valid_targets_mean": 15076.1, + "valid_targets_min": 3559 + }, + { + "epoch": 0.06823027718550106, + "grad_norm": 1.9389715014736066, + "learning_rate": 5.084745762711865e-06, + "loss": 1.2389326095581055, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29310089349746704, + "step": 16, + "valid_targets_mean": 15001.3, + "valid_targets_min": 6070 + }, + { + "epoch": 0.07249466950959488, + "grad_norm": 1.935317154593614, + "learning_rate": 5.423728813559323e-06, + "loss": 1.2891302108764648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31626296043395996, + "step": 17, + "valid_targets_mean": 15154.1, + "valid_targets_min": 2936 + }, + { + "epoch": 0.0767590618336887, + "grad_norm": 1.6797105874067981, + "learning_rate": 5.7627118644067805e-06, + "loss": 1.3249022960662842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3653038442134857, + "step": 18, + "valid_targets_mean": 15572.8, + "valid_targets_min": 11400 + }, + { + "epoch": 0.08102345415778252, + "grad_norm": 1.536225534974527, + "learning_rate": 6.1016949152542385e-06, + "loss": 1.2798724174499512, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3003271818161011, + "step": 19, + "valid_targets_mean": 13826.0, + "valid_targets_min": 1943 + }, + { + "epoch": 0.08528784648187633, + "grad_norm": 1.3559392185650754, + "learning_rate": 6.440677966101695e-06, + "loss": 1.3002123832702637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3291531801223755, + "step": 20, + "valid_targets_mean": 14183.2, + "valid_targets_min": 1730 + }, + { + "epoch": 0.08955223880597014, + "grad_norm": 1.5279645356369358, + "learning_rate": 6.779661016949153e-06, + "loss": 1.2929916381835938, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3127654790878296, + "step": 21, + "valid_targets_mean": 14698.1, + "valid_targets_min": 2343 + }, + { + "epoch": 0.09381663113006397, + "grad_norm": 1.7988299073867975, + "learning_rate": 7.1186440677966106e-06, + "loss": 1.2641987800598145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3098265528678894, + "step": 22, + "valid_targets_mean": 14052.0, + "valid_targets_min": 5016 + }, + { + "epoch": 0.09808102345415778, + "grad_norm": 1.6343366766286063, + "learning_rate": 7.4576271186440685e-06, + "loss": 1.2987055778503418, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3260132968425751, + "step": 23, + "valid_targets_mean": 15747.2, + "valid_targets_min": 7977 + }, + { + "epoch": 0.1023454157782516, + "grad_norm": 1.2620053993929847, + "learning_rate": 7.796610169491526e-06, + "loss": 1.2431552410125732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3168746531009674, + "step": 24, + "valid_targets_mean": 15501.2, + "valid_targets_min": 8014 + }, + { + "epoch": 0.10660980810234541, + "grad_norm": 1.0528680343805368, + "learning_rate": 8.135593220338983e-06, + "loss": 1.238523006439209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2770693302154541, + "step": 25, + "valid_targets_mean": 13349.6, + "valid_targets_min": 2049 + }, + { + "epoch": 0.11087420042643924, + "grad_norm": 1.0516962195407495, + "learning_rate": 8.47457627118644e-06, + "loss": 1.2010830640792847, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2899758219718933, + "step": 26, + "valid_targets_mean": 13670.7, + "valid_targets_min": 2377 + }, + { + "epoch": 0.11513859275053305, + "grad_norm": 0.7940073699194692, + "learning_rate": 8.8135593220339e-06, + "loss": 1.1597228050231934, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2890457510948181, + "step": 27, + "valid_targets_mean": 15432.1, + "valid_targets_min": 9230 + }, + { + "epoch": 0.11940298507462686, + "grad_norm": 0.8645868310726478, + "learning_rate": 9.152542372881356e-06, + "loss": 1.2551610469818115, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2977900505065918, + "step": 28, + "valid_targets_mean": 14637.2, + "valid_targets_min": 5532 + }, + { + "epoch": 0.12366737739872068, + "grad_norm": 0.8359560136170403, + "learning_rate": 9.491525423728815e-06, + "loss": 1.1766114234924316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.303621768951416, + "step": 29, + "valid_targets_mean": 14625.5, + "valid_targets_min": 3306 + }, + { + "epoch": 0.1279317697228145, + "grad_norm": 0.7943946669901569, + "learning_rate": 9.830508474576272e-06, + "loss": 1.2415480613708496, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3172162175178528, + "step": 30, + "valid_targets_mean": 15120.5, + "valid_targets_min": 7362 + }, + { + "epoch": 0.13219616204690832, + "grad_norm": 0.7207217500352064, + "learning_rate": 1.016949152542373e-05, + "loss": 1.1535303592681885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28830111026763916, + "step": 31, + "valid_targets_mean": 14608.3, + "valid_targets_min": 1802 + }, + { + "epoch": 0.13646055437100213, + "grad_norm": 0.759995540842057, + "learning_rate": 1.0508474576271188e-05, + "loss": 1.1601849794387817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28582918643951416, + "step": 32, + "valid_targets_mean": 14868.9, + "valid_targets_min": 2381 + }, + { + "epoch": 0.14072494669509594, + "grad_norm": 0.6830737276669886, + "learning_rate": 1.0847457627118645e-05, + "loss": 1.2404439449310303, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31198614835739136, + "step": 33, + "valid_targets_mean": 15542.3, + "valid_targets_min": 9239 + }, + { + "epoch": 0.14498933901918976, + "grad_norm": 0.5929314585486694, + "learning_rate": 1.1186440677966102e-05, + "loss": 1.1997580528259277, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.314759224653244, + "step": 34, + "valid_targets_mean": 15298.9, + "valid_targets_min": 10829 + }, + { + "epoch": 0.14925373134328357, + "grad_norm": 0.6925944424841863, + "learning_rate": 1.1525423728813561e-05, + "loss": 1.1844682693481445, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30469638109207153, + "step": 35, + "valid_targets_mean": 15101.9, + "valid_targets_min": 4685 + }, + { + "epoch": 0.1535181236673774, + "grad_norm": 0.5763489262005752, + "learning_rate": 1.1864406779661018e-05, + "loss": 1.1813849210739136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30967646837234497, + "step": 36, + "valid_targets_mean": 15434.2, + "valid_targets_min": 8304 + }, + { + "epoch": 0.15778251599147122, + "grad_norm": 0.5005241806924791, + "learning_rate": 1.2203389830508477e-05, + "loss": 1.1754765510559082, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31492385268211365, + "step": 37, + "valid_targets_mean": 15374.5, + "valid_targets_min": 4818 + }, + { + "epoch": 0.16204690831556504, + "grad_norm": 0.5718581392632069, + "learning_rate": 1.2542372881355932e-05, + "loss": 1.1840746402740479, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30204474925994873, + "step": 38, + "valid_targets_mean": 15148.5, + "valid_targets_min": 7056 + }, + { + "epoch": 0.16631130063965885, + "grad_norm": 0.5334830486510865, + "learning_rate": 1.288135593220339e-05, + "loss": 1.2005257606506348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29177170991897583, + "step": 39, + "valid_targets_mean": 14903.9, + "valid_targets_min": 4758 + }, + { + "epoch": 0.17057569296375266, + "grad_norm": 0.5072131399300707, + "learning_rate": 1.3220338983050848e-05, + "loss": 1.1401925086975098, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30095767974853516, + "step": 40, + "valid_targets_mean": 14631.2, + "valid_targets_min": 4776 + }, + { + "epoch": 0.17484008528784648, + "grad_norm": 0.4722470199708251, + "learning_rate": 1.3559322033898305e-05, + "loss": 1.1670279502868652, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29240870475769043, + "step": 41, + "valid_targets_mean": 14968.9, + "valid_targets_min": 6853 + }, + { + "epoch": 0.1791044776119403, + "grad_norm": 0.5434354066399931, + "learning_rate": 1.3898305084745764e-05, + "loss": 1.130545973777771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2652777433395386, + "step": 42, + "valid_targets_mean": 14805.2, + "valid_targets_min": 7412 + }, + { + "epoch": 0.18336886993603413, + "grad_norm": 0.5058616408220706, + "learning_rate": 1.4237288135593221e-05, + "loss": 1.1756665706634521, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28690627217292786, + "step": 43, + "valid_targets_mean": 14401.4, + "valid_targets_min": 2709 + }, + { + "epoch": 0.18763326226012794, + "grad_norm": 0.43897750797538987, + "learning_rate": 1.4576271186440678e-05, + "loss": 1.1239181756973267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2766701579093933, + "step": 44, + "valid_targets_mean": 14983.8, + "valid_targets_min": 1264 + }, + { + "epoch": 0.19189765458422176, + "grad_norm": 0.43365275567782985, + "learning_rate": 1.4915254237288137e-05, + "loss": 1.1461116075515747, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2780134081840515, + "step": 45, + "valid_targets_mean": 14252.4, + "valid_targets_min": 1572 + }, + { + "epoch": 0.19616204690831557, + "grad_norm": 0.4512989265238024, + "learning_rate": 1.5254237288135594e-05, + "loss": 1.1434454917907715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2802661955356598, + "step": 46, + "valid_targets_mean": 14348.7, + "valid_targets_min": 3473 + }, + { + "epoch": 0.20042643923240938, + "grad_norm": 0.4159187399960977, + "learning_rate": 1.5593220338983053e-05, + "loss": 1.1971436738967896, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31074339151382446, + "step": 47, + "valid_targets_mean": 14754.3, + "valid_targets_min": 2628 + }, + { + "epoch": 0.2046908315565032, + "grad_norm": 0.4287002324444236, + "learning_rate": 1.593220338983051e-05, + "loss": 1.1085567474365234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.292905330657959, + "step": 48, + "valid_targets_mean": 15185.8, + "valid_targets_min": 5859 + }, + { + "epoch": 0.208955223880597, + "grad_norm": 0.40116357705924865, + "learning_rate": 1.6271186440677967e-05, + "loss": 1.1042619943618774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2574828863143921, + "step": 49, + "valid_targets_mean": 14226.5, + "valid_targets_min": 3280 + }, + { + "epoch": 0.21321961620469082, + "grad_norm": 0.43044245903683137, + "learning_rate": 1.6610169491525424e-05, + "loss": 1.1156997680664062, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28450608253479004, + "step": 50, + "valid_targets_mean": 14587.2, + "valid_targets_min": 2470 + }, + { + "epoch": 0.21748400852878466, + "grad_norm": 0.40451054894986177, + "learning_rate": 1.694915254237288e-05, + "loss": 1.137367844581604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2972930073738098, + "step": 51, + "valid_targets_mean": 15135.2, + "valid_targets_min": 2341 + }, + { + "epoch": 0.22174840085287847, + "grad_norm": 0.40087462915441974, + "learning_rate": 1.728813559322034e-05, + "loss": 1.105331301689148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29150766134262085, + "step": 52, + "valid_targets_mean": 14628.2, + "valid_targets_min": 3150 + }, + { + "epoch": 0.2260127931769723, + "grad_norm": 0.3661850552460705, + "learning_rate": 1.76271186440678e-05, + "loss": 1.0930171012878418, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3009355068206787, + "step": 53, + "valid_targets_mean": 14552.2, + "valid_targets_min": 6412 + }, + { + "epoch": 0.2302771855010661, + "grad_norm": 0.3979272702937718, + "learning_rate": 1.7966101694915256e-05, + "loss": 1.1302450895309448, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2833348512649536, + "step": 54, + "valid_targets_mean": 15306.2, + "valid_targets_min": 9939 + }, + { + "epoch": 0.2345415778251599, + "grad_norm": 0.3884881406972628, + "learning_rate": 1.8305084745762713e-05, + "loss": 1.152329921722412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29095304012298584, + "step": 55, + "valid_targets_mean": 14843.6, + "valid_targets_min": 6321 + }, + { + "epoch": 0.23880597014925373, + "grad_norm": 0.37487295806975274, + "learning_rate": 1.864406779661017e-05, + "loss": 1.1638445854187012, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2842888832092285, + "step": 56, + "valid_targets_mean": 13096.1, + "valid_targets_min": 1559 + }, + { + "epoch": 0.24307036247334754, + "grad_norm": 0.3423858598949549, + "learning_rate": 1.898305084745763e-05, + "loss": 1.1405789852142334, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28892263770103455, + "step": 57, + "valid_targets_mean": 13918.6, + "valid_targets_min": 4167 + }, + { + "epoch": 0.24733475479744135, + "grad_norm": 0.4222702129525128, + "learning_rate": 1.9322033898305087e-05, + "loss": 1.1632418632507324, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3402993977069855, + "step": 58, + "valid_targets_mean": 14827.4, + "valid_targets_min": 5551 + }, + { + "epoch": 0.2515991471215352, + "grad_norm": 0.3657817117369871, + "learning_rate": 1.9661016949152545e-05, + "loss": 1.1034752130508423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28796225786209106, + "step": 59, + "valid_targets_mean": 15212.9, + "valid_targets_min": 6383 + }, + { + "epoch": 0.255863539445629, + "grad_norm": 0.4055278504822321, + "learning_rate": 2e-05, + "loss": 1.1177839040756226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26372405886650085, + "step": 60, + "valid_targets_mean": 13981.9, + "valid_targets_min": 2976 + }, + { + "epoch": 0.2601279317697228, + "grad_norm": 0.40228392304363, + "learning_rate": 2.033898305084746e-05, + "loss": 1.1403781175613403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29810065031051636, + "step": 61, + "valid_targets_mean": 14926.7, + "valid_targets_min": 1673 + }, + { + "epoch": 0.26439232409381663, + "grad_norm": 0.3679776772252244, + "learning_rate": 2.0677966101694916e-05, + "loss": 1.1196714639663696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2828063666820526, + "step": 62, + "valid_targets_mean": 14801.3, + "valid_targets_min": 2559 + }, + { + "epoch": 0.26865671641791045, + "grad_norm": 0.40777945519207404, + "learning_rate": 2.1016949152542376e-05, + "loss": 1.0709521770477295, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25932246446609497, + "step": 63, + "valid_targets_mean": 14049.6, + "valid_targets_min": 3022 + }, + { + "epoch": 0.27292110874200426, + "grad_norm": 0.3727146822040382, + "learning_rate": 2.1355932203389833e-05, + "loss": 1.1754413843154907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28779110312461853, + "step": 64, + "valid_targets_mean": 14548.8, + "valid_targets_min": 3780 + }, + { + "epoch": 0.2771855010660981, + "grad_norm": 0.4366871373370923, + "learning_rate": 2.169491525423729e-05, + "loss": 1.1435012817382812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30721503496170044, + "step": 65, + "valid_targets_mean": 14906.1, + "valid_targets_min": 3792 + }, + { + "epoch": 0.2814498933901919, + "grad_norm": 0.4279774688028308, + "learning_rate": 2.2033898305084748e-05, + "loss": 1.1329200267791748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29479724168777466, + "step": 66, + "valid_targets_mean": 14316.4, + "valid_targets_min": 4259 + }, + { + "epoch": 0.2857142857142857, + "grad_norm": 0.400678735476192, + "learning_rate": 2.2372881355932205e-05, + "loss": 1.1038789749145508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2855043411254883, + "step": 67, + "valid_targets_mean": 14869.5, + "valid_targets_min": 1498 + }, + { + "epoch": 0.2899786780383795, + "grad_norm": 0.39497795610151304, + "learning_rate": 2.2711864406779665e-05, + "loss": 1.1426966190338135, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30167877674102783, + "step": 68, + "valid_targets_mean": 14551.2, + "valid_targets_min": 4821 + }, + { + "epoch": 0.2942430703624733, + "grad_norm": 0.43715196243542226, + "learning_rate": 2.3050847457627122e-05, + "loss": 1.1619558334350586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2610081434249878, + "step": 69, + "valid_targets_mean": 15006.9, + "valid_targets_min": 4126 + }, + { + "epoch": 0.29850746268656714, + "grad_norm": 0.42497754165786217, + "learning_rate": 2.338983050847458e-05, + "loss": 1.1545188426971436, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3084549307823181, + "step": 70, + "valid_targets_mean": 14557.5, + "valid_targets_min": 2311 + }, + { + "epoch": 0.302771855010661, + "grad_norm": 0.39300490131269866, + "learning_rate": 2.3728813559322036e-05, + "loss": 1.0535039901733398, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.256248414516449, + "step": 71, + "valid_targets_mean": 14938.7, + "valid_targets_min": 5689 + }, + { + "epoch": 0.3070362473347548, + "grad_norm": 0.4460473113733746, + "learning_rate": 2.406779661016949e-05, + "loss": 1.1028974056243896, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27757105231285095, + "step": 72, + "valid_targets_mean": 14522.3, + "valid_targets_min": 2727 + }, + { + "epoch": 0.31130063965884863, + "grad_norm": 0.37242828127501076, + "learning_rate": 2.4406779661016954e-05, + "loss": 1.112041711807251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2917976677417755, + "step": 73, + "valid_targets_mean": 15216.2, + "valid_targets_min": 6987 + }, + { + "epoch": 0.31556503198294245, + "grad_norm": 0.4481446274782485, + "learning_rate": 2.474576271186441e-05, + "loss": 1.1347367763519287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2750678062438965, + "step": 74, + "valid_targets_mean": 15336.4, + "valid_targets_min": 7965 + }, + { + "epoch": 0.31982942430703626, + "grad_norm": 0.43733581410184924, + "learning_rate": 2.5084745762711865e-05, + "loss": 1.1315557956695557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28009670972824097, + "step": 75, + "valid_targets_mean": 14431.6, + "valid_targets_min": 3089 + }, + { + "epoch": 0.32409381663113007, + "grad_norm": 0.3954285378329976, + "learning_rate": 2.5423728813559322e-05, + "loss": 1.1246960163116455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2804466784000397, + "step": 76, + "valid_targets_mean": 14309.1, + "valid_targets_min": 3138 + }, + { + "epoch": 0.3283582089552239, + "grad_norm": 0.4779571096323128, + "learning_rate": 2.576271186440678e-05, + "loss": 1.0751593112945557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2525976300239563, + "step": 77, + "valid_targets_mean": 13998.6, + "valid_targets_min": 2602 + }, + { + "epoch": 0.3326226012793177, + "grad_norm": 0.44585135554412114, + "learning_rate": 2.610169491525424e-05, + "loss": 1.1375093460083008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28445789217948914, + "step": 78, + "valid_targets_mean": 14369.1, + "valid_targets_min": 4167 + }, + { + "epoch": 0.3368869936034115, + "grad_norm": 0.470417374465623, + "learning_rate": 2.6440677966101696e-05, + "loss": 1.0897612571716309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26500076055526733, + "step": 79, + "valid_targets_mean": 14101.2, + "valid_targets_min": 4432 + }, + { + "epoch": 0.3411513859275053, + "grad_norm": 0.6068780185494126, + "learning_rate": 2.6779661016949153e-05, + "loss": 1.0993152856826782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24612003564834595, + "step": 80, + "valid_targets_mean": 13424.2, + "valid_targets_min": 1895 + }, + { + "epoch": 0.34541577825159914, + "grad_norm": 0.45799024316399983, + "learning_rate": 2.711864406779661e-05, + "loss": 1.1197320222854614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2934653162956238, + "step": 81, + "valid_targets_mean": 14789.6, + "valid_targets_min": 4851 + }, + { + "epoch": 0.34968017057569295, + "grad_norm": 0.5782558776257897, + "learning_rate": 2.7457627118644068e-05, + "loss": 1.0771827697753906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27633094787597656, + "step": 82, + "valid_targets_mean": 14809.2, + "valid_targets_min": 7600 + }, + { + "epoch": 0.35394456289978676, + "grad_norm": 0.4390423118562453, + "learning_rate": 2.7796610169491528e-05, + "loss": 1.0495096445083618, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25998854637145996, + "step": 83, + "valid_targets_mean": 14823.2, + "valid_targets_min": 3855 + }, + { + "epoch": 0.3582089552238806, + "grad_norm": 0.4477868865873094, + "learning_rate": 2.8135593220338985e-05, + "loss": 1.125932216644287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2996492385864258, + "step": 84, + "valid_targets_mean": 14658.2, + "valid_targets_min": 2691 + }, + { + "epoch": 0.3624733475479744, + "grad_norm": 0.5435108771538101, + "learning_rate": 2.8474576271186442e-05, + "loss": 1.099273681640625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2577955722808838, + "step": 85, + "valid_targets_mean": 13998.5, + "valid_targets_min": 714 + }, + { + "epoch": 0.36673773987206826, + "grad_norm": 0.501630159169242, + "learning_rate": 2.88135593220339e-05, + "loss": 1.0962635278701782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26727327704429626, + "step": 86, + "valid_targets_mean": 14302.5, + "valid_targets_min": 2859 + }, + { + "epoch": 0.37100213219616207, + "grad_norm": 0.711049212898963, + "learning_rate": 2.9152542372881356e-05, + "loss": 1.089374303817749, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26695096492767334, + "step": 87, + "valid_targets_mean": 14668.7, + "valid_targets_min": 4710 + }, + { + "epoch": 0.3752665245202559, + "grad_norm": 0.7892843671973578, + "learning_rate": 2.9491525423728817e-05, + "loss": 1.1430811882019043, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3085484504699707, + "step": 88, + "valid_targets_mean": 14349.1, + "valid_targets_min": 4164 + }, + { + "epoch": 0.3795309168443497, + "grad_norm": 0.5183689462744986, + "learning_rate": 2.9830508474576274e-05, + "loss": 1.0902841091156006, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2737078070640564, + "step": 89, + "valid_targets_mean": 13790.9, + "valid_targets_min": 1776 + }, + { + "epoch": 0.3837953091684435, + "grad_norm": 0.7646312445763587, + "learning_rate": 3.016949152542373e-05, + "loss": 1.1340059041976929, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2919940650463104, + "step": 90, + "valid_targets_mean": 14075.8, + "valid_targets_min": 4288 + }, + { + "epoch": 0.3880597014925373, + "grad_norm": 0.64752048623271, + "learning_rate": 3.0508474576271188e-05, + "loss": 1.1458442211151123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2805406153202057, + "step": 91, + "valid_targets_mean": 14182.6, + "valid_targets_min": 1376 + }, + { + "epoch": 0.39232409381663114, + "grad_norm": 0.5454429003352456, + "learning_rate": 3.084745762711865e-05, + "loss": 1.048313856124878, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26023590564727783, + "step": 92, + "valid_targets_mean": 14727.0, + "valid_targets_min": 8468 + }, + { + "epoch": 0.39658848614072495, + "grad_norm": 0.5637206883804008, + "learning_rate": 3.1186440677966106e-05, + "loss": 1.1587541103363037, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.287053644657135, + "step": 93, + "valid_targets_mean": 14595.0, + "valid_targets_min": 5134 + }, + { + "epoch": 0.40085287846481876, + "grad_norm": 0.5811785866053178, + "learning_rate": 3.152542372881356e-05, + "loss": 1.0834836959838867, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2870972752571106, + "step": 94, + "valid_targets_mean": 14092.9, + "valid_targets_min": 1414 + }, + { + "epoch": 0.4051172707889126, + "grad_norm": 0.553967312244566, + "learning_rate": 3.186440677966102e-05, + "loss": 1.0956907272338867, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2961192727088928, + "step": 95, + "valid_targets_mean": 14336.4, + "valid_targets_min": 7342 + }, + { + "epoch": 0.4093816631130064, + "grad_norm": 0.5781669241689168, + "learning_rate": 3.2203389830508473e-05, + "loss": 1.0783826112747192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2741866707801819, + "step": 96, + "valid_targets_mean": 14126.5, + "valid_targets_min": 7041 + }, + { + "epoch": 0.4136460554371002, + "grad_norm": 0.6498875686597626, + "learning_rate": 3.2542372881355934e-05, + "loss": 1.0799309015274048, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28361836075782776, + "step": 97, + "valid_targets_mean": 15288.1, + "valid_targets_min": 3032 + }, + { + "epoch": 0.417910447761194, + "grad_norm": 0.652805962867391, + "learning_rate": 3.2881355932203394e-05, + "loss": 1.0566236972808838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2684245705604553, + "step": 98, + "valid_targets_mean": 15102.7, + "valid_targets_min": 7890 + }, + { + "epoch": 0.42217484008528783, + "grad_norm": 0.7484133821403403, + "learning_rate": 3.322033898305085e-05, + "loss": 1.0985007286071777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.292073130607605, + "step": 99, + "valid_targets_mean": 15020.4, + "valid_targets_min": 2603 + }, + { + "epoch": 0.42643923240938164, + "grad_norm": 0.8870600868691184, + "learning_rate": 3.355932203389831e-05, + "loss": 1.083620548248291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26947855949401855, + "step": 100, + "valid_targets_mean": 14664.2, + "valid_targets_min": 6535 + }, + { + "epoch": 0.43070362473347545, + "grad_norm": 0.9948262878402319, + "learning_rate": 3.389830508474576e-05, + "loss": 1.104163646697998, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27336013317108154, + "step": 101, + "valid_targets_mean": 15139.3, + "valid_targets_min": 7516 + }, + { + "epoch": 0.4349680170575693, + "grad_norm": 1.2094358996328345, + "learning_rate": 3.423728813559322e-05, + "loss": 1.1375088691711426, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28912967443466187, + "step": 102, + "valid_targets_mean": 15199.2, + "valid_targets_min": 5997 + }, + { + "epoch": 0.43923240938166314, + "grad_norm": 0.5917111610802872, + "learning_rate": 3.457627118644068e-05, + "loss": 1.106834888458252, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28450143337249756, + "step": 103, + "valid_targets_mean": 15446.5, + "valid_targets_min": 7746 + }, + { + "epoch": 0.44349680170575695, + "grad_norm": 0.8378893547218664, + "learning_rate": 3.491525423728814e-05, + "loss": 1.0580928325653076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2566010653972626, + "step": 104, + "valid_targets_mean": 13467.9, + "valid_targets_min": 1542 + }, + { + "epoch": 0.44776119402985076, + "grad_norm": 0.9584890988278837, + "learning_rate": 3.52542372881356e-05, + "loss": 1.0532739162445068, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2541162073612213, + "step": 105, + "valid_targets_mean": 15373.5, + "valid_targets_min": 7168 + }, + { + "epoch": 0.4520255863539446, + "grad_norm": 0.76420718622364, + "learning_rate": 3.559322033898305e-05, + "loss": 1.0518145561218262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2692555785179138, + "step": 106, + "valid_targets_mean": 15254.9, + "valid_targets_min": 7796 + }, + { + "epoch": 0.4562899786780384, + "grad_norm": 0.7166791138340219, + "learning_rate": 3.593220338983051e-05, + "loss": 1.0239077806472778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24565693736076355, + "step": 107, + "valid_targets_mean": 15089.0, + "valid_targets_min": 2795 + }, + { + "epoch": 0.4605543710021322, + "grad_norm": 0.848653700045334, + "learning_rate": 3.627118644067797e-05, + "loss": 1.121671199798584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2981266379356384, + "step": 108, + "valid_targets_mean": 14925.8, + "valid_targets_min": 2210 + }, + { + "epoch": 0.464818763326226, + "grad_norm": 0.8008103618988017, + "learning_rate": 3.6610169491525426e-05, + "loss": 1.1195881366729736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27601322531700134, + "step": 109, + "valid_targets_mean": 14718.9, + "valid_targets_min": 3165 + }, + { + "epoch": 0.4690831556503198, + "grad_norm": 0.5038896313627277, + "learning_rate": 3.6949152542372886e-05, + "loss": 1.0606300830841064, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2645370066165924, + "step": 110, + "valid_targets_mean": 13860.9, + "valid_targets_min": 1750 + }, + { + "epoch": 0.47334754797441364, + "grad_norm": 0.6981474544248948, + "learning_rate": 3.728813559322034e-05, + "loss": 1.077075481414795, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2845897078514099, + "step": 111, + "valid_targets_mean": 15522.3, + "valid_targets_min": 8644 + }, + { + "epoch": 0.47761194029850745, + "grad_norm": 0.7881255811353257, + "learning_rate": 3.76271186440678e-05, + "loss": 1.1480183601379395, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28835219144821167, + "step": 112, + "valid_targets_mean": 15039.8, + "valid_targets_min": 3059 + }, + { + "epoch": 0.48187633262260127, + "grad_norm": 0.8593954262553742, + "learning_rate": 3.796610169491526e-05, + "loss": 1.1095085144042969, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3078654408454895, + "step": 113, + "valid_targets_mean": 14650.3, + "valid_targets_min": 4726 + }, + { + "epoch": 0.4861407249466951, + "grad_norm": 0.6675081661635298, + "learning_rate": 3.8305084745762714e-05, + "loss": 1.121692419052124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26774919033050537, + "step": 114, + "valid_targets_mean": 14586.5, + "valid_targets_min": 3354 + }, + { + "epoch": 0.4904051172707889, + "grad_norm": 0.4926421652997165, + "learning_rate": 3.8644067796610175e-05, + "loss": 1.062641978263855, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2730582356452942, + "step": 115, + "valid_targets_mean": 14931.8, + "valid_targets_min": 2234 + }, + { + "epoch": 0.4946695095948827, + "grad_norm": 0.739902860395688, + "learning_rate": 3.898305084745763e-05, + "loss": 1.091822624206543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2674185037612915, + "step": 116, + "valid_targets_mean": 14651.6, + "valid_targets_min": 4922 + }, + { + "epoch": 0.4989339019189765, + "grad_norm": 0.8457422864663297, + "learning_rate": 3.932203389830509e-05, + "loss": 1.042148470878601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2632845938205719, + "step": 117, + "valid_targets_mean": 15044.4, + "valid_targets_min": 1486 + }, + { + "epoch": 0.5031982942430704, + "grad_norm": 0.8923134861757134, + "learning_rate": 3.966101694915255e-05, + "loss": 1.0997896194458008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25651633739471436, + "step": 118, + "valid_targets_mean": 13800.0, + "valid_targets_min": 1422 + }, + { + "epoch": 0.5074626865671642, + "grad_norm": 0.7633195427196144, + "learning_rate": 4e-05, + "loss": 1.087050199508667, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26247042417526245, + "step": 119, + "valid_targets_mean": 14306.9, + "valid_targets_min": 3745 + }, + { + "epoch": 0.511727078891258, + "grad_norm": 0.5565450965383134, + "learning_rate": 3.999991166161585e-05, + "loss": 1.1498842239379883, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2769172191619873, + "step": 120, + "valid_targets_mean": 14492.4, + "valid_targets_min": 1617 + }, + { + "epoch": 0.5159914712153518, + "grad_norm": 0.7677755911311013, + "learning_rate": 3.999964664724376e-05, + "loss": 1.090078592300415, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25574517250061035, + "step": 121, + "valid_targets_mean": 14547.6, + "valid_targets_min": 2173 + }, + { + "epoch": 0.5202558635394456, + "grad_norm": 0.8690168799669488, + "learning_rate": 3.999920495922483e-05, + "loss": 1.0715370178222656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2652612030506134, + "step": 122, + "valid_targets_mean": 14360.5, + "valid_targets_min": 5181 + }, + { + "epoch": 0.5245202558635395, + "grad_norm": 0.8105117911085009, + "learning_rate": 3.999858660146085e-05, + "loss": 1.084350824356079, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26827168464660645, + "step": 123, + "valid_targets_mean": 15934.4, + "valid_targets_min": 11306 + }, + { + "epoch": 0.5287846481876333, + "grad_norm": 0.5485361451611932, + "learning_rate": 3.999779157941431e-05, + "loss": 1.070378303527832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2750994861125946, + "step": 124, + "valid_targets_mean": 15529.0, + "valid_targets_min": 7890 + }, + { + "epoch": 0.5330490405117271, + "grad_norm": 0.7358670167267896, + "learning_rate": 3.99968199001083e-05, + "loss": 1.0922883749008179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2585696578025818, + "step": 125, + "valid_targets_mean": 14589.0, + "valid_targets_min": 3231 + }, + { + "epoch": 0.5373134328358209, + "grad_norm": 0.797597736750579, + "learning_rate": 3.999567157212646e-05, + "loss": 1.043330192565918, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27747562527656555, + "step": 126, + "valid_targets_mean": 15039.9, + "valid_targets_min": 2574 + }, + { + "epoch": 0.5415778251599147, + "grad_norm": 0.8052083330157771, + "learning_rate": 3.9994346605612955e-05, + "loss": 1.0537865161895752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2740834355354309, + "step": 127, + "valid_targets_mean": 15032.1, + "valid_targets_min": 3330 + }, + { + "epoch": 0.5458422174840085, + "grad_norm": 0.7059667190927159, + "learning_rate": 3.999284501227232e-05, + "loss": 1.0584338903427124, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25231048464775085, + "step": 128, + "valid_targets_mean": 14215.7, + "valid_targets_min": 2195 + }, + { + "epoch": 0.5501066098081023, + "grad_norm": 0.5789841548740489, + "learning_rate": 3.9991166805369393e-05, + "loss": 1.1101102828979492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28956037759780884, + "step": 129, + "valid_targets_mean": 14395.0, + "valid_targets_min": 2490 + }, + { + "epoch": 0.5543710021321961, + "grad_norm": 0.6010665822792227, + "learning_rate": 3.9989311999729166e-05, + "loss": 1.1048550605773926, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2960030138492584, + "step": 130, + "valid_targets_mean": 14569.9, + "valid_targets_min": 5811 + }, + { + "epoch": 0.55863539445629, + "grad_norm": 0.6569701409702448, + "learning_rate": 3.99872806117367e-05, + "loss": 1.0684092044830322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2546845078468323, + "step": 131, + "valid_targets_mean": 13998.8, + "valid_targets_min": 2805 + }, + { + "epoch": 0.5628997867803838, + "grad_norm": 0.5314946187145101, + "learning_rate": 3.998507265933696e-05, + "loss": 1.06695556640625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2615054249763489, + "step": 132, + "valid_targets_mean": 14621.0, + "valid_targets_min": 3960 + }, + { + "epoch": 0.5671641791044776, + "grad_norm": 0.4863285223090466, + "learning_rate": 3.9982688162034624e-05, + "loss": 1.1031931638717651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27331802248954773, + "step": 133, + "valid_targets_mean": 14888.8, + "valid_targets_min": 2856 + }, + { + "epoch": 0.5714285714285714, + "grad_norm": 0.6707730371726137, + "learning_rate": 3.998012714089397e-05, + "loss": 1.1016449928283691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27774661779403687, + "step": 134, + "valid_targets_mean": 14446.8, + "valid_targets_min": 4971 + }, + { + "epoch": 0.5756929637526652, + "grad_norm": 0.5788621605217005, + "learning_rate": 3.997738961853863e-05, + "loss": 1.0966145992279053, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28476226329803467, + "step": 135, + "valid_targets_mean": 14908.9, + "valid_targets_min": 2543 + }, + { + "epoch": 0.579957356076759, + "grad_norm": 0.5263939820561895, + "learning_rate": 3.9974475619151445e-05, + "loss": 1.055633783340454, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25982269644737244, + "step": 136, + "valid_targets_mean": 15042.8, + "valid_targets_min": 5253 + }, + { + "epoch": 0.5842217484008528, + "grad_norm": 0.6148184853109188, + "learning_rate": 3.997138516847422e-05, + "loss": 1.036048412322998, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27854201197624207, + "step": 137, + "valid_targets_mean": 15314.4, + "valid_targets_min": 6815 + }, + { + "epoch": 0.5884861407249466, + "grad_norm": 0.5491786862115935, + "learning_rate": 3.9968118293807476e-05, + "loss": 1.1014585494995117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2563304305076599, + "step": 138, + "valid_targets_mean": 14211.9, + "valid_targets_min": 1760 + }, + { + "epoch": 0.5927505330490405, + "grad_norm": 0.5855027604643234, + "learning_rate": 3.996467502401028e-05, + "loss": 1.0730267763137817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26775574684143066, + "step": 139, + "valid_targets_mean": 15098.0, + "valid_targets_min": 2956 + }, + { + "epoch": 0.5970149253731343, + "grad_norm": 0.5642037245742461, + "learning_rate": 3.9961055389499904e-05, + "loss": 1.0382061004638672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2667868137359619, + "step": 140, + "valid_targets_mean": 14864.3, + "valid_targets_min": 6004 + }, + { + "epoch": 0.6012793176972282, + "grad_norm": 0.5472681549512222, + "learning_rate": 3.995725942225162e-05, + "loss": 1.0722460746765137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2508922219276428, + "step": 141, + "valid_targets_mean": 13533.8, + "valid_targets_min": 2100 + }, + { + "epoch": 0.605543710021322, + "grad_norm": 0.4851680900294786, + "learning_rate": 3.995328715579839e-05, + "loss": 1.0785164833068848, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27565503120422363, + "step": 142, + "valid_targets_mean": 15253.0, + "valid_targets_min": 7270 + }, + { + "epoch": 0.6098081023454158, + "grad_norm": 0.5680145964228549, + "learning_rate": 3.994913862523058e-05, + "loss": 1.0484199523925781, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2863396108150482, + "step": 143, + "valid_targets_mean": 15135.0, + "valid_targets_min": 2961 + }, + { + "epoch": 0.6140724946695096, + "grad_norm": 0.4764345561789664, + "learning_rate": 3.9944813867195624e-05, + "loss": 1.0812712907791138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22366584837436676, + "step": 144, + "valid_targets_mean": 12900.4, + "valid_targets_min": 976 + }, + { + "epoch": 0.6183368869936035, + "grad_norm": 0.5015035827929697, + "learning_rate": 3.9940312919897744e-05, + "loss": 1.1256424188613892, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30476629734039307, + "step": 145, + "valid_targets_mean": 15631.7, + "valid_targets_min": 9492 + }, + { + "epoch": 0.6226012793176973, + "grad_norm": 0.47020999167538946, + "learning_rate": 3.993563582309759e-05, + "loss": 1.050999402999878, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2459922432899475, + "step": 146, + "valid_targets_mean": 15147.3, + "valid_targets_min": 4251 + }, + { + "epoch": 0.6268656716417911, + "grad_norm": 0.5699061540685109, + "learning_rate": 3.993078261811186e-05, + "loss": 1.0804365873336792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27163904905319214, + "step": 147, + "valid_targets_mean": 15246.7, + "valid_targets_min": 1331 + }, + { + "epoch": 0.6311300639658849, + "grad_norm": 0.5584228247861476, + "learning_rate": 3.9925753347813e-05, + "loss": 1.130464792251587, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2737225890159607, + "step": 148, + "valid_targets_mean": 14379.8, + "valid_targets_min": 2708 + }, + { + "epoch": 0.6353944562899787, + "grad_norm": 0.47830315667039336, + "learning_rate": 3.992054805662876e-05, + "loss": 1.1157536506652832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27968811988830566, + "step": 149, + "valid_targets_mean": 14061.5, + "valid_targets_min": 2210 + }, + { + "epoch": 0.6396588486140725, + "grad_norm": 0.5499285487136879, + "learning_rate": 3.991516679054185e-05, + "loss": 1.0605140924453735, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2635878324508667, + "step": 150, + "valid_targets_mean": 14323.0, + "valid_targets_min": 5017 + }, + { + "epoch": 0.6439232409381663, + "grad_norm": 0.6044012708282325, + "learning_rate": 3.9909609597089496e-05, + "loss": 1.0675933361053467, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2633005976676941, + "step": 151, + "valid_targets_mean": 14257.8, + "valid_targets_min": 3979 + }, + { + "epoch": 0.6481876332622601, + "grad_norm": 0.6358081635077131, + "learning_rate": 3.9903876525363055e-05, + "loss": 1.0996378660202026, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26810404658317566, + "step": 152, + "valid_targets_mean": 14785.4, + "valid_targets_min": 7633 + }, + { + "epoch": 0.652452025586354, + "grad_norm": 0.6458024943907579, + "learning_rate": 3.989796762600755e-05, + "loss": 1.072128415107727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2497999668121338, + "step": 153, + "valid_targets_mean": 14481.2, + "valid_targets_min": 4915 + }, + { + "epoch": 0.6567164179104478, + "grad_norm": 0.5745683876644413, + "learning_rate": 3.9891882951221246e-05, + "loss": 1.1608052253723145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25260549783706665, + "step": 154, + "valid_targets_mean": 13851.4, + "valid_targets_min": 2001 + }, + { + "epoch": 0.6609808102345416, + "grad_norm": 0.4803649247442655, + "learning_rate": 3.988562255475518e-05, + "loss": 1.043982982635498, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26936691999435425, + "step": 155, + "valid_targets_mean": 15546.7, + "valid_targets_min": 6669 + }, + { + "epoch": 0.6652452025586354, + "grad_norm": 0.6486434078035109, + "learning_rate": 3.987918649191268e-05, + "loss": 1.0851833820343018, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2768741846084595, + "step": 156, + "valid_targets_mean": 14214.1, + "valid_targets_min": 5957 + }, + { + "epoch": 0.6695095948827292, + "grad_norm": 0.6615015585222541, + "learning_rate": 3.987257481954888e-05, + "loss": 1.0836174488067627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29727786779403687, + "step": 157, + "valid_targets_mean": 14841.3, + "valid_targets_min": 5718 + }, + { + "epoch": 0.673773987206823, + "grad_norm": 0.6947073895856398, + "learning_rate": 3.9865787596070236e-05, + "loss": 1.0783438682556152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2678397297859192, + "step": 158, + "valid_targets_mean": 14575.0, + "valid_targets_min": 5142 + }, + { + "epoch": 0.6780383795309168, + "grad_norm": 0.5751686277845317, + "learning_rate": 3.9858824881433975e-05, + "loss": 1.0921587944030762, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25064414739608765, + "step": 159, + "valid_targets_mean": 14356.6, + "valid_targets_min": 4878 + }, + { + "epoch": 0.6823027718550106, + "grad_norm": 0.5503535429377369, + "learning_rate": 3.9851686737147585e-05, + "loss": 1.1289031505584717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2460019886493683, + "step": 160, + "valid_targets_mean": 13718.2, + "valid_targets_min": 805 + }, + { + "epoch": 0.6865671641791045, + "grad_norm": 0.5708076518560402, + "learning_rate": 3.9844373226268305e-05, + "loss": 1.0423595905303955, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2565052807331085, + "step": 161, + "valid_targets_mean": 14228.8, + "valid_targets_min": 2382 + }, + { + "epoch": 0.6908315565031983, + "grad_norm": 0.5174361675534761, + "learning_rate": 3.983688441340249e-05, + "loss": 1.109586477279663, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29132208228111267, + "step": 162, + "valid_targets_mean": 14159.3, + "valid_targets_min": 2792 + }, + { + "epoch": 0.6950959488272921, + "grad_norm": 0.5350993430608216, + "learning_rate": 3.98292203647051e-05, + "loss": 1.0937280654907227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2752472162246704, + "step": 163, + "valid_targets_mean": 14302.1, + "valid_targets_min": 3711 + }, + { + "epoch": 0.6993603411513859, + "grad_norm": 0.4756314468049185, + "learning_rate": 3.982138114787912e-05, + "loss": 1.0845508575439453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26880186796188354, + "step": 164, + "valid_targets_mean": 14744.2, + "valid_targets_min": 6098 + }, + { + "epoch": 0.7036247334754797, + "grad_norm": 0.5062212740230443, + "learning_rate": 3.98133668321749e-05, + "loss": 1.057763695716858, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2630884349346161, + "step": 165, + "valid_targets_mean": 14565.0, + "valid_targets_min": 3680 + }, + { + "epoch": 0.7078891257995735, + "grad_norm": 0.6144712387209604, + "learning_rate": 3.980517748838963e-05, + "loss": 1.1555659770965576, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27543964982032776, + "step": 166, + "valid_targets_mean": 13850.9, + "valid_targets_min": 1110 + }, + { + "epoch": 0.7121535181236673, + "grad_norm": 0.5547824607569634, + "learning_rate": 3.979681318886664e-05, + "loss": 1.0604078769683838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2662920653820038, + "step": 167, + "valid_targets_mean": 14264.0, + "valid_targets_min": 2853 + }, + { + "epoch": 0.7164179104477612, + "grad_norm": 0.4947741759818403, + "learning_rate": 3.978827400749481e-05, + "loss": 1.0976730585098267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25428158044815063, + "step": 168, + "valid_targets_mean": 14187.2, + "valid_targets_min": 1974 + }, + { + "epoch": 0.720682302771855, + "grad_norm": 0.4156843059384637, + "learning_rate": 3.977956001970788e-05, + "loss": 1.1516985893249512, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2969016134738922, + "step": 169, + "valid_targets_mean": 15474.6, + "valid_targets_min": 8501 + }, + { + "epoch": 0.7249466950959488, + "grad_norm": 0.5895819516860525, + "learning_rate": 3.977067130248381e-05, + "loss": 1.090247392654419, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2736845016479492, + "step": 170, + "valid_targets_mean": 15222.5, + "valid_targets_min": 5620 + }, + { + "epoch": 0.7292110874200426, + "grad_norm": 0.5849578776142048, + "learning_rate": 3.9761607934344095e-05, + "loss": 1.0230085849761963, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24778780341148376, + "step": 171, + "valid_targets_mean": 13855.9, + "valid_targets_min": 3112 + }, + { + "epoch": 0.7334754797441365, + "grad_norm": 0.4548579226147538, + "learning_rate": 3.975236999535306e-05, + "loss": 1.0465095043182373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.252557635307312, + "step": 172, + "valid_targets_mean": 14650.7, + "valid_targets_min": 3113 + }, + { + "epoch": 0.7377398720682303, + "grad_norm": 0.5269196435658914, + "learning_rate": 3.974295756711717e-05, + "loss": 1.0935044288635254, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26230406761169434, + "step": 173, + "valid_targets_mean": 14595.8, + "valid_targets_min": 2880 + }, + { + "epoch": 0.7420042643923241, + "grad_norm": 0.5082604397593701, + "learning_rate": 3.9733370732784296e-05, + "loss": 1.115492820739746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2864971160888672, + "step": 174, + "valid_targets_mean": 14760.0, + "valid_targets_min": 2181 + }, + { + "epoch": 0.746268656716418, + "grad_norm": 0.42559255705028526, + "learning_rate": 3.972360957704298e-05, + "loss": 1.1452744007110596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24954742193222046, + "step": 175, + "valid_targets_mean": 14682.2, + "valid_targets_min": 1778 + }, + { + "epoch": 0.7505330490405118, + "grad_norm": 0.5072042814146205, + "learning_rate": 3.97136741861217e-05, + "loss": 1.0543792247772217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2778635621070862, + "step": 176, + "valid_targets_mean": 15725.2, + "valid_targets_min": 10505 + }, + { + "epoch": 0.7547974413646056, + "grad_norm": 0.5132699398104911, + "learning_rate": 3.970356464778808e-05, + "loss": 1.089555025100708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2771134376525879, + "step": 177, + "valid_targets_mean": 14786.6, + "valid_targets_min": 2882 + }, + { + "epoch": 0.7590618336886994, + "grad_norm": 0.4165975680037749, + "learning_rate": 3.969328105134817e-05, + "loss": 1.077789068222046, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2723352909088135, + "step": 178, + "valid_targets_mean": 15287.1, + "valid_targets_min": 1532 + }, + { + "epoch": 0.7633262260127932, + "grad_norm": 0.5047990050415179, + "learning_rate": 3.9682823487645584e-05, + "loss": 1.0952332019805908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27827781438827515, + "step": 179, + "valid_targets_mean": 14615.7, + "valid_targets_min": 5225 + }, + { + "epoch": 0.767590618336887, + "grad_norm": 0.539807553746683, + "learning_rate": 3.9672192049060745e-05, + "loss": 1.05403733253479, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24824458360671997, + "step": 180, + "valid_targets_mean": 14464.0, + "valid_targets_min": 3973 + }, + { + "epoch": 0.7718550106609808, + "grad_norm": 0.4146757724736937, + "learning_rate": 3.966138682951008e-05, + "loss": 1.091308355331421, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2893642783164978, + "step": 181, + "valid_targets_mean": 15169.6, + "valid_targets_min": 3780 + }, + { + "epoch": 0.7761194029850746, + "grad_norm": 0.40834987432982356, + "learning_rate": 3.9650407924445147e-05, + "loss": 1.1261098384857178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2868396043777466, + "step": 182, + "valid_targets_mean": 14721.4, + "valid_targets_min": 1440 + }, + { + "epoch": 0.7803837953091685, + "grad_norm": 0.5175874864584123, + "learning_rate": 3.963925543085181e-05, + "loss": 1.0834410190582275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2594006061553955, + "step": 183, + "valid_targets_mean": 14252.9, + "valid_targets_min": 4037 + }, + { + "epoch": 0.7846481876332623, + "grad_norm": 0.4684557852543165, + "learning_rate": 3.96279294472494e-05, + "loss": 1.0013039112091064, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2477436661720276, + "step": 184, + "valid_targets_mean": 14142.4, + "valid_targets_min": 1337 + }, + { + "epoch": 0.7889125799573561, + "grad_norm": 0.44505492822146303, + "learning_rate": 3.961643007368984e-05, + "loss": 1.0587292909622192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2760450839996338, + "step": 185, + "valid_targets_mean": 15403.3, + "valid_targets_min": 10688 + }, + { + "epoch": 0.7931769722814499, + "grad_norm": 0.4344064548320857, + "learning_rate": 3.960475741175671e-05, + "loss": 1.1106066703796387, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28772372007369995, + "step": 186, + "valid_targets_mean": 14154.4, + "valid_targets_min": 1176 + }, + { + "epoch": 0.7974413646055437, + "grad_norm": 0.43159567556898226, + "learning_rate": 3.959291156456444e-05, + "loss": 1.0540430545806885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2685752511024475, + "step": 187, + "valid_targets_mean": 14308.6, + "valid_targets_min": 1306 + }, + { + "epoch": 0.8017057569296375, + "grad_norm": 0.5410700638072535, + "learning_rate": 3.9580892636757334e-05, + "loss": 1.039066195487976, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26641255617141724, + "step": 188, + "valid_targets_mean": 14659.4, + "valid_targets_min": 1687 + }, + { + "epoch": 0.8059701492537313, + "grad_norm": 0.6059334352321268, + "learning_rate": 3.9568700734508645e-05, + "loss": 1.0647523403167725, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26160457730293274, + "step": 189, + "valid_targets_mean": 14306.6, + "valid_targets_min": 1896 + }, + { + "epoch": 0.8102345415778252, + "grad_norm": 0.4489821078015655, + "learning_rate": 3.955633596551967e-05, + "loss": 1.0983606576919556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2925269305706024, + "step": 190, + "valid_targets_mean": 14534.1, + "valid_targets_min": 2343 + }, + { + "epoch": 0.814498933901919, + "grad_norm": 0.45895194414412344, + "learning_rate": 3.9543798439018776e-05, + "loss": 1.0771918296813965, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2698771357536316, + "step": 191, + "valid_targets_mean": 14733.8, + "valid_targets_min": 2367 + }, + { + "epoch": 0.8187633262260128, + "grad_norm": 0.5218764358198349, + "learning_rate": 3.953108826576046e-05, + "loss": 1.0556185245513916, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29757291078567505, + "step": 192, + "valid_targets_mean": 15014.3, + "valid_targets_min": 3991 + }, + { + "epoch": 0.8230277185501066, + "grad_norm": 0.4414775718360485, + "learning_rate": 3.9518205558024334e-05, + "loss": 1.0616166591644287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24648046493530273, + "step": 193, + "valid_targets_mean": 14510.7, + "valid_targets_min": 2499 + }, + { + "epoch": 0.8272921108742004, + "grad_norm": 0.48302874637309473, + "learning_rate": 3.9505150429614154e-05, + "loss": 1.057494878768921, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2708985209465027, + "step": 194, + "valid_targets_mean": 14285.1, + "valid_targets_min": 4255 + }, + { + "epoch": 0.8315565031982942, + "grad_norm": 0.47971106129306745, + "learning_rate": 3.949192299585681e-05, + "loss": 1.0607072114944458, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23464509844779968, + "step": 195, + "valid_targets_mean": 13950.8, + "valid_targets_min": 3588 + }, + { + "epoch": 0.835820895522388, + "grad_norm": 0.5123313865384012, + "learning_rate": 3.9478523373601325e-05, + "loss": 1.062612533569336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2731626629829407, + "step": 196, + "valid_targets_mean": 14909.9, + "valid_targets_min": 6385 + }, + { + "epoch": 0.8400852878464818, + "grad_norm": 0.47835288821616884, + "learning_rate": 3.946495168121778e-05, + "loss": 1.0612168312072754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28909754753112793, + "step": 197, + "valid_targets_mean": 14388.2, + "valid_targets_min": 4776 + }, + { + "epoch": 0.8443496801705757, + "grad_norm": 0.4652131056026324, + "learning_rate": 3.9451208038596325e-05, + "loss": 1.047271728515625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2545679211616516, + "step": 198, + "valid_targets_mean": 14635.1, + "valid_targets_min": 4017 + }, + { + "epoch": 0.8486140724946695, + "grad_norm": 0.45502944460531947, + "learning_rate": 3.943729256714608e-05, + "loss": 1.062045931816101, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27325451374053955, + "step": 199, + "valid_targets_mean": 15327.0, + "valid_targets_min": 9625 + }, + { + "epoch": 0.8528784648187633, + "grad_norm": 0.5633842836653237, + "learning_rate": 3.942320538979408e-05, + "loss": 1.0524030923843384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26734107732772827, + "step": 200, + "valid_targets_mean": 15212.5, + "valid_targets_min": 6862 + }, + { + "epoch": 0.8571428571428571, + "grad_norm": 0.4506850114722311, + "learning_rate": 3.9408946630984144e-05, + "loss": 1.0022788047790527, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2606180012226105, + "step": 201, + "valid_targets_mean": 15162.9, + "valid_targets_min": 6042 + }, + { + "epoch": 0.8614072494669509, + "grad_norm": 0.4766784153086289, + "learning_rate": 3.939451641667587e-05, + "loss": 1.0410001277923584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26700738072395325, + "step": 202, + "valid_targets_mean": 15394.2, + "valid_targets_min": 5438 + }, + { + "epoch": 0.8656716417910447, + "grad_norm": 0.4928195041878012, + "learning_rate": 3.937991487434342e-05, + "loss": 1.0641515254974365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26238715648651123, + "step": 203, + "valid_targets_mean": 14678.1, + "valid_targets_min": 6220 + }, + { + "epoch": 0.8699360341151386, + "grad_norm": 0.4383260035803565, + "learning_rate": 3.9365142132974484e-05, + "loss": 1.13057541847229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31222638487815857, + "step": 204, + "valid_targets_mean": 14939.2, + "valid_targets_min": 3887 + }, + { + "epoch": 0.8742004264392325, + "grad_norm": 0.5422248497475302, + "learning_rate": 3.935019832306905e-05, + "loss": 1.0646021366119385, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27387237548828125, + "step": 205, + "valid_targets_mean": 14681.6, + "valid_targets_min": 5499 + }, + { + "epoch": 0.8784648187633263, + "grad_norm": 0.6087441640111857, + "learning_rate": 3.933508357663832e-05, + "loss": 1.0732862949371338, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2741767168045044, + "step": 206, + "valid_targets_mean": 14105.4, + "valid_targets_min": 4262 + }, + { + "epoch": 0.8827292110874201, + "grad_norm": 0.47249365924433767, + "learning_rate": 3.9319798027203544e-05, + "loss": 1.0405564308166504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.270063579082489, + "step": 207, + "valid_targets_mean": 14148.8, + "valid_targets_min": 575 + }, + { + "epoch": 0.8869936034115139, + "grad_norm": 0.5119659019318072, + "learning_rate": 3.930434180979478e-05, + "loss": 1.0693408250808716, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2622353434562683, + "step": 208, + "valid_targets_mean": 13855.8, + "valid_targets_min": 1919 + }, + { + "epoch": 0.8912579957356077, + "grad_norm": 0.5689354263807636, + "learning_rate": 3.928871506094975e-05, + "loss": 1.0629595518112183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27715960144996643, + "step": 209, + "valid_targets_mean": 15001.1, + "valid_targets_min": 11504 + }, + { + "epoch": 0.8955223880597015, + "grad_norm": 0.5404150402667937, + "learning_rate": 3.927291791871264e-05, + "loss": 1.0810761451721191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2562071681022644, + "step": 210, + "valid_targets_mean": 14553.4, + "valid_targets_min": 3171 + }, + { + "epoch": 0.8997867803837953, + "grad_norm": 0.6095076613029825, + "learning_rate": 3.925695052263284e-05, + "loss": 1.069692611694336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25801941752433777, + "step": 211, + "valid_targets_mean": 14692.5, + "valid_targets_min": 2295 + }, + { + "epoch": 0.9040511727078892, + "grad_norm": 0.43981803829640587, + "learning_rate": 3.924081301376375e-05, + "loss": 1.043962836265564, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2519513964653015, + "step": 212, + "valid_targets_mean": 14521.2, + "valid_targets_min": 1843 + }, + { + "epoch": 0.908315565031983, + "grad_norm": 0.5055732416079766, + "learning_rate": 3.9224505534661525e-05, + "loss": 1.0576932430267334, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23972484469413757, + "step": 213, + "valid_targets_mean": 14508.8, + "valid_targets_min": 4589 + }, + { + "epoch": 0.9125799573560768, + "grad_norm": 0.5624113760542802, + "learning_rate": 3.92080282293838e-05, + "loss": 1.07790207862854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28238582611083984, + "step": 214, + "valid_targets_mean": 14945.4, + "valid_targets_min": 1789 + }, + { + "epoch": 0.9168443496801706, + "grad_norm": 0.4579607500075873, + "learning_rate": 3.9191381243488417e-05, + "loss": 1.0570908784866333, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2734872102737427, + "step": 215, + "valid_targets_mean": 14692.0, + "valid_targets_min": 3666 + }, + { + "epoch": 0.9211087420042644, + "grad_norm": 0.5891814686766607, + "learning_rate": 3.9174564724032167e-05, + "loss": 1.0729179382324219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23688025772571564, + "step": 216, + "valid_targets_mean": 13916.4, + "valid_targets_min": 934 + }, + { + "epoch": 0.9253731343283582, + "grad_norm": 0.4276149009317814, + "learning_rate": 3.9157578819569455e-05, + "loss": 1.0518217086791992, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26122087240219116, + "step": 217, + "valid_targets_mean": 14793.8, + "valid_targets_min": 5449 + }, + { + "epoch": 0.929637526652452, + "grad_norm": 0.49226769754512945, + "learning_rate": 3.9140423680151036e-05, + "loss": 1.046657919883728, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2635388970375061, + "step": 218, + "valid_targets_mean": 14376.8, + "valid_targets_min": 2255 + }, + { + "epoch": 0.9339019189765458, + "grad_norm": 0.49153221979541967, + "learning_rate": 3.9123099457322625e-05, + "loss": 1.1028754711151123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2881706655025482, + "step": 219, + "valid_targets_mean": 14753.6, + "valid_targets_min": 2483 + }, + { + "epoch": 0.9381663113006397, + "grad_norm": 0.4206683982581794, + "learning_rate": 3.9105606304123605e-05, + "loss": 1.0750335454940796, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2556455433368683, + "step": 220, + "valid_targets_mean": 15313.4, + "valid_targets_min": 1380 + }, + { + "epoch": 0.9424307036247335, + "grad_norm": 0.518948199595769, + "learning_rate": 3.908794437508567e-05, + "loss": 1.0630940198898315, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26047244668006897, + "step": 221, + "valid_targets_mean": 14588.6, + "valid_targets_min": 7319 + }, + { + "epoch": 0.9466950959488273, + "grad_norm": 0.4833013373073034, + "learning_rate": 3.907011382623145e-05, + "loss": 1.0762577056884766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26928189396858215, + "step": 222, + "valid_targets_mean": 14791.3, + "valid_targets_min": 2399 + }, + { + "epoch": 0.9509594882729211, + "grad_norm": 0.3960814041459837, + "learning_rate": 3.905211481507313e-05, + "loss": 1.065406322479248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28226155042648315, + "step": 223, + "valid_targets_mean": 14754.5, + "valid_targets_min": 5518 + }, + { + "epoch": 0.9552238805970149, + "grad_norm": 0.48261177892907264, + "learning_rate": 3.903394750061106e-05, + "loss": 1.0659347772598267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2690155506134033, + "step": 224, + "valid_targets_mean": 14576.5, + "valid_targets_min": 3004 + }, + { + "epoch": 0.9594882729211087, + "grad_norm": 0.44808324622894463, + "learning_rate": 3.9015612043332375e-05, + "loss": 1.0389450788497925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2707400321960449, + "step": 225, + "valid_targets_mean": 14913.6, + "valid_targets_min": 2041 + }, + { + "epoch": 0.9637526652452025, + "grad_norm": 0.47198420641783995, + "learning_rate": 3.8997108605209535e-05, + "loss": 1.03799307346344, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2540205717086792, + "step": 226, + "valid_targets_mean": 14266.9, + "valid_targets_min": 5364 + }, + { + "epoch": 0.9680170575692963, + "grad_norm": 0.4381815788424525, + "learning_rate": 3.897843734969891e-05, + "loss": 1.0135846138000488, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24278923869132996, + "step": 227, + "valid_targets_mean": 15244.7, + "valid_targets_min": 8682 + }, + { + "epoch": 0.9722814498933902, + "grad_norm": 0.4447673018725514, + "learning_rate": 3.895959844173937e-05, + "loss": 1.0680896043777466, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2827821969985962, + "step": 228, + "valid_targets_mean": 14613.2, + "valid_targets_min": 5184 + }, + { + "epoch": 0.976545842217484, + "grad_norm": 0.4951715137155527, + "learning_rate": 3.8940592047750774e-05, + "loss": 1.0651593208312988, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23733918368816376, + "step": 229, + "valid_targets_mean": 13796.5, + "valid_targets_min": 4055 + }, + { + "epoch": 0.9808102345415778, + "grad_norm": 0.5067672333856692, + "learning_rate": 3.892141833563255e-05, + "loss": 1.0773837566375732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27972668409347534, + "step": 230, + "valid_targets_mean": 15119.4, + "valid_targets_min": 6852 + }, + { + "epoch": 0.9850746268656716, + "grad_norm": 0.46407214737248004, + "learning_rate": 3.8902077474762155e-05, + "loss": 1.0360264778137207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26833784580230713, + "step": 231, + "valid_targets_mean": 14657.4, + "valid_targets_min": 2488 + }, + { + "epoch": 0.9893390191897654, + "grad_norm": 0.4153102558348407, + "learning_rate": 3.888256963599364e-05, + "loss": 1.0562363862991333, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2683772146701813, + "step": 232, + "valid_targets_mean": 13907.6, + "valid_targets_min": 2050 + }, + { + "epoch": 0.9936034115138592, + "grad_norm": 0.4123808103332072, + "learning_rate": 3.886289499165609e-05, + "loss": 1.0481302738189697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29726642370224, + "step": 233, + "valid_targets_mean": 15261.6, + "valid_targets_min": 6414 + }, + { + "epoch": 0.997867803837953, + "grad_norm": 0.43109991180516594, + "learning_rate": 3.884305371555215e-05, + "loss": 1.079208493232727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27206656336784363, + "step": 234, + "valid_targets_mean": 14794.5, + "valid_targets_min": 7270 + }, + { + "epoch": 1.0, + "grad_norm": 0.5291174543372417, + "learning_rate": 3.882304598295643e-05, + "loss": 1.1089693307876587, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.540338397026062, + "step": 235, + "valid_targets_mean": 14646.9, + "valid_targets_min": 4497 + }, + { + "epoch": 1.004264392324094, + "grad_norm": 0.46209142075677184, + "learning_rate": 3.880287197061402e-05, + "loss": 1.0655412673950195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2537649869918823, + "step": 236, + "valid_targets_mean": 14099.9, + "valid_targets_min": 1532 + }, + { + "epoch": 1.0085287846481876, + "grad_norm": 0.5435573851854639, + "learning_rate": 3.878253185673888e-05, + "loss": 1.045609951019287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26505547761917114, + "step": 237, + "valid_targets_mean": 14961.6, + "valid_targets_min": 4982 + }, + { + "epoch": 1.0127931769722816, + "grad_norm": 0.5413226401381751, + "learning_rate": 3.876202582101229e-05, + "loss": 1.04897141456604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2555493116378784, + "step": 238, + "valid_targets_mean": 14361.9, + "valid_targets_min": 3011 + }, + { + "epoch": 1.0170575692963753, + "grad_norm": 0.4153700682870838, + "learning_rate": 3.874135404458125e-05, + "loss": 1.0530734062194824, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2591307759284973, + "step": 239, + "valid_targets_mean": 13943.9, + "valid_targets_min": 2264 + }, + { + "epoch": 1.0213219616204692, + "grad_norm": 0.3891461925005388, + "learning_rate": 3.8720516710056905e-05, + "loss": 1.0717518329620361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26199546456336975, + "step": 240, + "valid_targets_mean": 14627.2, + "valid_targets_min": 2267 + }, + { + "epoch": 1.0255863539445629, + "grad_norm": 0.7135766939621001, + "learning_rate": 3.8699514001512885e-05, + "loss": 1.0576286315917969, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2684980630874634, + "step": 241, + "valid_targets_mean": 15091.4, + "valid_targets_min": 2326 + }, + { + "epoch": 1.0298507462686568, + "grad_norm": 0.4620797583276411, + "learning_rate": 3.867834610448374e-05, + "loss": 1.026517629623413, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2597351372241974, + "step": 242, + "valid_targets_mean": 14510.1, + "valid_targets_min": 1886 + }, + { + "epoch": 1.0341151385927505, + "grad_norm": 0.49028085372128044, + "learning_rate": 3.865701320596324e-05, + "loss": 1.0431249141693115, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22768917679786682, + "step": 243, + "valid_targets_mean": 13234.3, + "valid_targets_min": 2697 + }, + { + "epoch": 1.0383795309168444, + "grad_norm": 0.40014372336732035, + "learning_rate": 3.863551549440277e-05, + "loss": 1.0454719066619873, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2336433231830597, + "step": 244, + "valid_targets_mean": 13984.1, + "valid_targets_min": 2184 + }, + { + "epoch": 1.0426439232409381, + "grad_norm": 0.4286915819783026, + "learning_rate": 3.861385315970964e-05, + "loss": 1.014958143234253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24774692952632904, + "step": 245, + "valid_targets_mean": 13774.0, + "valid_targets_min": 2392 + }, + { + "epoch": 1.046908315565032, + "grad_norm": 0.4891269148797478, + "learning_rate": 3.859202639324542e-05, + "loss": 1.0368402004241943, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24918904900550842, + "step": 246, + "valid_targets_mean": 13648.0, + "valid_targets_min": 2810 + }, + { + "epoch": 1.0511727078891258, + "grad_norm": 0.5785437585611787, + "learning_rate": 3.8570035387824214e-05, + "loss": 1.029822826385498, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24193452298641205, + "step": 247, + "valid_targets_mean": 14391.8, + "valid_targets_min": 1380 + }, + { + "epoch": 1.0554371002132197, + "grad_norm": 0.49027906008514305, + "learning_rate": 3.8547880337711036e-05, + "loss": 1.0420994758605957, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.240075021982193, + "step": 248, + "valid_targets_mean": 13562.6, + "valid_targets_min": 1765 + }, + { + "epoch": 1.0597014925373134, + "grad_norm": 0.4531599901069784, + "learning_rate": 3.8525561438620016e-05, + "loss": 1.092591404914856, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31094545125961304, + "step": 249, + "valid_targets_mean": 15091.4, + "valid_targets_min": 1968 + }, + { + "epoch": 1.0639658848614073, + "grad_norm": 0.4789861520712335, + "learning_rate": 3.850307888771269e-05, + "loss": 1.0229907035827637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25899046659469604, + "step": 250, + "valid_targets_mean": 14780.2, + "valid_targets_min": 8635 + }, + { + "epoch": 1.068230277185501, + "grad_norm": 0.5003673261621284, + "learning_rate": 3.848043288359629e-05, + "loss": 1.036152958869934, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2403426468372345, + "step": 251, + "valid_targets_mean": 15008.4, + "valid_targets_min": 927 + }, + { + "epoch": 1.072494669509595, + "grad_norm": 0.35225675218456176, + "learning_rate": 3.8457623626321944e-05, + "loss": 1.0302397012710571, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2747737765312195, + "step": 252, + "valid_targets_mean": 15544.8, + "valid_targets_min": 9842 + }, + { + "epoch": 1.0767590618336886, + "grad_norm": 0.4592550905412805, + "learning_rate": 3.843465131738296e-05, + "loss": 1.0454399585723877, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27861565351486206, + "step": 253, + "valid_targets_mean": 15859.3, + "valid_targets_min": 11105 + }, + { + "epoch": 1.0810234541577826, + "grad_norm": 0.5429737259358417, + "learning_rate": 3.8411516159713e-05, + "loss": 1.025251865386963, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2655147314071655, + "step": 254, + "valid_targets_mean": 14621.3, + "valid_targets_min": 1414 + }, + { + "epoch": 1.0852878464818763, + "grad_norm": 0.5125998906864528, + "learning_rate": 3.838821835768431e-05, + "loss": 0.9942444562911987, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24379202723503113, + "step": 255, + "valid_targets_mean": 14590.2, + "valid_targets_min": 2797 + }, + { + "epoch": 1.0895522388059702, + "grad_norm": 0.42817212203890226, + "learning_rate": 3.83647581171059e-05, + "loss": 1.0361202955245972, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2260512411594391, + "step": 256, + "valid_targets_mean": 14038.0, + "valid_targets_min": 1677 + }, + { + "epoch": 1.0938166311300639, + "grad_norm": 0.44374680049258125, + "learning_rate": 3.8341135645221744e-05, + "loss": 1.018730878829956, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2549141049385071, + "step": 257, + "valid_targets_mean": 14797.3, + "valid_targets_min": 4346 + }, + { + "epoch": 1.0980810234541578, + "grad_norm": 0.4205594672534289, + "learning_rate": 3.831735115070895e-05, + "loss": 1.0082337856292725, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2516046464443207, + "step": 258, + "valid_targets_mean": 14662.9, + "valid_targets_min": 1517 + }, + { + "epoch": 1.1023454157782515, + "grad_norm": 0.4454694983574608, + "learning_rate": 3.8293404843675904e-05, + "loss": 1.0284898281097412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2286127209663391, + "step": 259, + "valid_targets_mean": 14888.3, + "valid_targets_min": 6449 + }, + { + "epoch": 1.1066098081023454, + "grad_norm": 0.5045885678028411, + "learning_rate": 3.8269296935660395e-05, + "loss": 1.0335543155670166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23934660851955414, + "step": 260, + "valid_targets_mean": 14052.8, + "valid_targets_min": 4177 + }, + { + "epoch": 1.1108742004264391, + "grad_norm": 0.48150807409703467, + "learning_rate": 3.82450276396278e-05, + "loss": 1.0389349460601807, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2629929482936859, + "step": 261, + "valid_targets_mean": 14497.7, + "valid_targets_min": 5676 + }, + { + "epoch": 1.115138592750533, + "grad_norm": 0.37723589464290347, + "learning_rate": 3.822059716996916e-05, + "loss": 1.0075374841690063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25467902421951294, + "step": 262, + "valid_targets_mean": 14696.9, + "valid_targets_min": 3243 + }, + { + "epoch": 1.1194029850746268, + "grad_norm": 0.4127260559764244, + "learning_rate": 3.819600574249929e-05, + "loss": 0.9890443086624146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25443321466445923, + "step": 263, + "valid_targets_mean": 15884.4, + "valid_targets_min": 11415 + }, + { + "epoch": 1.1236673773987207, + "grad_norm": 0.5260964350191876, + "learning_rate": 3.817125357445489e-05, + "loss": 1.054425597190857, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2661350965499878, + "step": 264, + "valid_targets_mean": 14409.9, + "valid_targets_min": 2985 + }, + { + "epoch": 1.1279317697228146, + "grad_norm": 0.46323425077370667, + "learning_rate": 3.814634088449261e-05, + "loss": 1.0141037702560425, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2333107590675354, + "step": 265, + "valid_targets_mean": 14820.0, + "valid_targets_min": 5142 + }, + { + "epoch": 1.1321961620469083, + "grad_norm": 0.422437846529809, + "learning_rate": 3.812126789268712e-05, + "loss": 1.0493249893188477, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26373741030693054, + "step": 266, + "valid_targets_mean": 15534.1, + "valid_targets_min": 10907 + }, + { + "epoch": 1.136460554371002, + "grad_norm": 0.46961962629963355, + "learning_rate": 3.80960348205292e-05, + "loss": 1.0849034786224365, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24709264934062958, + "step": 267, + "valid_targets_mean": 13681.9, + "valid_targets_min": 2050 + }, + { + "epoch": 1.140724946695096, + "grad_norm": 0.47819964623494876, + "learning_rate": 3.807064189092372e-05, + "loss": 1.0932810306549072, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27637046575546265, + "step": 268, + "valid_targets_mean": 14011.6, + "valid_targets_min": 3608 + }, + { + "epoch": 1.1449893390191899, + "grad_norm": 0.4868072859995262, + "learning_rate": 3.804508932818771e-05, + "loss": 0.9904080629348755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24220585823059082, + "step": 269, + "valid_targets_mean": 15122.8, + "valid_targets_min": 8015 + }, + { + "epoch": 1.1492537313432836, + "grad_norm": 0.4307347346802511, + "learning_rate": 3.801937735804838e-05, + "loss": 1.034711241722107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2537466883659363, + "step": 270, + "valid_targets_mean": 14167.3, + "valid_targets_min": 3780 + }, + { + "epoch": 1.1535181236673775, + "grad_norm": 0.41557607247834316, + "learning_rate": 3.799350620764114e-05, + "loss": 1.0160858631134033, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.249742329120636, + "step": 271, + "valid_targets_mean": 14951.7, + "valid_targets_min": 4915 + }, + { + "epoch": 1.1577825159914712, + "grad_norm": 0.5268253932391055, + "learning_rate": 3.7967476105507535e-05, + "loss": 1.003743052482605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2506830394268036, + "step": 272, + "valid_targets_mean": 14177.5, + "valid_targets_min": 3884 + }, + { + "epoch": 1.1620469083155651, + "grad_norm": 0.442145519622731, + "learning_rate": 3.7941287281593284e-05, + "loss": 1.0129845142364502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2885182201862335, + "step": 273, + "valid_targets_mean": 15371.1, + "valid_targets_min": 2393 + }, + { + "epoch": 1.1663113006396588, + "grad_norm": 0.3908456257968783, + "learning_rate": 3.7914939967246227e-05, + "loss": 1.0502171516418457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2536545395851135, + "step": 274, + "valid_targets_mean": 14664.4, + "valid_targets_min": 5128 + }, + { + "epoch": 1.1705756929637527, + "grad_norm": 0.3862780244923522, + "learning_rate": 3.7888434395214285e-05, + "loss": 1.0400927066802979, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.237798273563385, + "step": 275, + "valid_targets_mean": 13460.7, + "valid_targets_min": 3188 + }, + { + "epoch": 1.1748400852878464, + "grad_norm": 0.4404520569169047, + "learning_rate": 3.786177079964339e-05, + "loss": 1.0391854047775269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2632058262825012, + "step": 276, + "valid_targets_mean": 14753.7, + "valid_targets_min": 6232 + }, + { + "epoch": 1.1791044776119404, + "grad_norm": 0.3756219096684917, + "learning_rate": 3.783494941607544e-05, + "loss": 1.0601963996887207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25917044281959534, + "step": 277, + "valid_targets_mean": 13945.5, + "valid_targets_min": 1337 + }, + { + "epoch": 1.183368869936034, + "grad_norm": 0.451547633414741, + "learning_rate": 3.780797048144621e-05, + "loss": 1.0429885387420654, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2507634162902832, + "step": 278, + "valid_targets_mean": 14387.9, + "valid_targets_min": 7158 + }, + { + "epoch": 1.187633262260128, + "grad_norm": 0.4712111097628715, + "learning_rate": 3.7780834234083236e-05, + "loss": 1.050649881362915, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2704152464866638, + "step": 279, + "valid_targets_mean": 14935.3, + "valid_targets_min": 3303 + }, + { + "epoch": 1.1918976545842217, + "grad_norm": 0.3669185185782326, + "learning_rate": 3.775354091370376e-05, + "loss": 1.016859769821167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26059484481811523, + "step": 280, + "valid_targets_mean": 15610.1, + "valid_targets_min": 9687 + }, + { + "epoch": 1.1961620469083156, + "grad_norm": 0.4144137396489066, + "learning_rate": 3.772609076141255e-05, + "loss": 0.9994684457778931, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2601706385612488, + "step": 281, + "valid_targets_mean": 14643.5, + "valid_targets_min": 6660 + }, + { + "epoch": 1.2004264392324093, + "grad_norm": 0.45567675891650883, + "learning_rate": 3.769848401969982e-05, + "loss": 1.0300400257110596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2580612301826477, + "step": 282, + "valid_targets_mean": 15233.8, + "valid_targets_min": 9758 + }, + { + "epoch": 1.2046908315565032, + "grad_norm": 0.3555022390818503, + "learning_rate": 3.767072093243907e-05, + "loss": 1.0938390493392944, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.266682505607605, + "step": 283, + "valid_targets_mean": 14910.6, + "valid_targets_min": 4441 + }, + { + "epoch": 1.208955223880597, + "grad_norm": 0.42569732647686503, + "learning_rate": 3.7642801744884915e-05, + "loss": 1.1198451519012451, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2663855254650116, + "step": 284, + "valid_targets_mean": 14378.9, + "valid_targets_min": 1902 + }, + { + "epoch": 1.2132196162046909, + "grad_norm": 0.4127343265468672, + "learning_rate": 3.761472670367096e-05, + "loss": 0.9990887641906738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23396140336990356, + "step": 285, + "valid_targets_mean": 14860.0, + "valid_targets_min": 3398 + }, + { + "epoch": 1.2174840085287846, + "grad_norm": 0.4232592265214671, + "learning_rate": 3.758649605680758e-05, + "loss": 1.0035858154296875, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24486322700977325, + "step": 286, + "valid_targets_mean": 14400.8, + "valid_targets_min": 7174 + }, + { + "epoch": 1.2217484008528785, + "grad_norm": 0.47083507604672054, + "learning_rate": 3.755811005367974e-05, + "loss": 1.0343601703643799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26513391733169556, + "step": 287, + "valid_targets_mean": 15103.7, + "valid_targets_min": 8717 + }, + { + "epoch": 1.2260127931769722, + "grad_norm": 0.4170006342101319, + "learning_rate": 3.752956894504481e-05, + "loss": 1.036698818206787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2382010668516159, + "step": 288, + "valid_targets_mean": 13118.1, + "valid_targets_min": 4521 + }, + { + "epoch": 1.2302771855010661, + "grad_norm": 0.45922410144773373, + "learning_rate": 3.750087298303033e-05, + "loss": 1.0224305391311646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2617640495300293, + "step": 289, + "valid_targets_mean": 14181.2, + "valid_targets_min": 1237 + }, + { + "epoch": 1.2345415778251598, + "grad_norm": 0.4639650600037873, + "learning_rate": 3.7472022421131795e-05, + "loss": 1.040165662765503, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.279234915971756, + "step": 290, + "valid_targets_mean": 15071.4, + "valid_targets_min": 7071 + }, + { + "epoch": 1.2388059701492538, + "grad_norm": 0.5052614475399082, + "learning_rate": 3.7443017514210406e-05, + "loss": 1.0578022003173828, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24848178029060364, + "step": 291, + "valid_targets_mean": 14662.9, + "valid_targets_min": 3892 + }, + { + "epoch": 1.2430703624733475, + "grad_norm": 0.5246082126633732, + "learning_rate": 3.7413858518490825e-05, + "loss": 1.0782644748687744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.277226984500885, + "step": 292, + "valid_targets_mean": 13855.1, + "valid_targets_min": 3518 + }, + { + "epoch": 1.2473347547974414, + "grad_norm": 0.4064959781816505, + "learning_rate": 3.7384545691558895e-05, + "loss": 1.0355021953582764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25371286273002625, + "step": 293, + "valid_targets_mean": 14714.8, + "valid_targets_min": 6099 + }, + { + "epoch": 1.251599147121535, + "grad_norm": 0.5042943693981129, + "learning_rate": 3.735507929235941e-05, + "loss": 1.0233511924743652, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2694402039051056, + "step": 294, + "valid_targets_mean": 14589.5, + "valid_targets_min": 2832 + }, + { + "epoch": 1.255863539445629, + "grad_norm": 0.49380678655207805, + "learning_rate": 3.732545958119378e-05, + "loss": 1.0197874307632446, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2528603672981262, + "step": 295, + "valid_targets_mean": 14854.3, + "valid_targets_min": 6387 + }, + { + "epoch": 1.260127931769723, + "grad_norm": 0.45062570460712725, + "learning_rate": 3.729568681971774e-05, + "loss": 1.052213191986084, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24603819847106934, + "step": 296, + "valid_targets_mean": 13976.9, + "valid_targets_min": 4314 + }, + { + "epoch": 1.2643923240938166, + "grad_norm": 0.3746896617567477, + "learning_rate": 3.726576127093905e-05, + "loss": 1.075683355331421, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.261888712644577, + "step": 297, + "valid_targets_mean": 15158.6, + "valid_targets_min": 4461 + }, + { + "epoch": 1.2686567164179103, + "grad_norm": 0.41056261687110635, + "learning_rate": 3.7235683199215177e-05, + "loss": 1.021393060684204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2325923889875412, + "step": 298, + "valid_targets_mean": 13554.0, + "valid_targets_min": 3668 + }, + { + "epoch": 1.2729211087420043, + "grad_norm": 0.4935296888861505, + "learning_rate": 3.7205452870250944e-05, + "loss": 1.0408051013946533, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24672801792621613, + "step": 299, + "valid_targets_mean": 15081.8, + "valid_targets_min": 5372 + }, + { + "epoch": 1.2771855010660982, + "grad_norm": 0.43061196618725706, + "learning_rate": 3.7175070551096204e-05, + "loss": 0.9903295040130615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24351432919502258, + "step": 300, + "valid_targets_mean": 14829.8, + "valid_targets_min": 5064 + }, + { + "epoch": 1.2814498933901919, + "grad_norm": 0.4823871541432052, + "learning_rate": 3.7144536510143436e-05, + "loss": 1.0472469329833984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2732764482498169, + "step": 301, + "valid_targets_mean": 14287.4, + "valid_targets_min": 1719 + }, + { + "epoch": 1.2857142857142856, + "grad_norm": 0.451775820650649, + "learning_rate": 3.711385101712544e-05, + "loss": 1.0415022373199463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2545892298221588, + "step": 302, + "valid_targets_mean": 14111.8, + "valid_targets_min": 714 + }, + { + "epoch": 1.2899786780383795, + "grad_norm": 0.5371461398762536, + "learning_rate": 3.708301434311289e-05, + "loss": 1.0422717332839966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21339184045791626, + "step": 303, + "valid_targets_mean": 13497.2, + "valid_targets_min": 1108 + }, + { + "epoch": 1.2942430703624734, + "grad_norm": 0.5271872774214361, + "learning_rate": 3.7052026760511996e-05, + "loss": 1.00404691696167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2703721523284912, + "step": 304, + "valid_targets_mean": 14671.9, + "valid_targets_min": 4066 + }, + { + "epoch": 1.2985074626865671, + "grad_norm": 0.46773495382617897, + "learning_rate": 3.7020888543062046e-05, + "loss": 1.0046180486679077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2816005051136017, + "step": 305, + "valid_targets_mean": 15683.1, + "valid_targets_min": 6707 + }, + { + "epoch": 1.302771855010661, + "grad_norm": 0.47056842659745385, + "learning_rate": 3.6989599965833024e-05, + "loss": 1.0792807340621948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3000812828540802, + "step": 306, + "valid_targets_mean": 15442.5, + "valid_targets_min": 7315 + }, + { + "epoch": 1.3070362473347548, + "grad_norm": 0.4364095538608938, + "learning_rate": 3.695816130522317e-05, + "loss": 1.0605202913284302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2570754289627075, + "step": 307, + "valid_targets_mean": 13909.1, + "valid_targets_min": 3271 + }, + { + "epoch": 1.3113006396588487, + "grad_norm": 0.48886491062143717, + "learning_rate": 3.692657283895651e-05, + "loss": 1.0294058322906494, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2760119140148163, + "step": 308, + "valid_targets_mean": 14565.0, + "valid_targets_min": 2334 + }, + { + "epoch": 1.3155650319829424, + "grad_norm": 0.5029760678480856, + "learning_rate": 3.689483484608048e-05, + "loss": 1.0614323616027832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2552529573440552, + "step": 309, + "valid_targets_mean": 14474.2, + "valid_targets_min": 1756 + }, + { + "epoch": 1.3198294243070363, + "grad_norm": 0.423583818084647, + "learning_rate": 3.6862947606963364e-05, + "loss": 1.096575140953064, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2549227476119995, + "step": 310, + "valid_targets_mean": 14506.5, + "valid_targets_min": 2082 + }, + { + "epoch": 1.32409381663113, + "grad_norm": 0.42315859890827595, + "learning_rate": 3.6830911403291885e-05, + "loss": 1.038635015487671, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25086838006973267, + "step": 311, + "valid_targets_mean": 13291.1, + "valid_targets_min": 3734 + }, + { + "epoch": 1.328358208955224, + "grad_norm": 0.4449274886943997, + "learning_rate": 3.679872651806869e-05, + "loss": 1.0621452331542969, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2692762613296509, + "step": 312, + "valid_targets_mean": 14425.4, + "valid_targets_min": 3203 + }, + { + "epoch": 1.3326226012793176, + "grad_norm": 0.40353806956953464, + "learning_rate": 3.676639323560986e-05, + "loss": 1.00935697555542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25382930040359497, + "step": 313, + "valid_targets_mean": 13922.9, + "valid_targets_min": 2388 + }, + { + "epoch": 1.3368869936034116, + "grad_norm": 0.38240160643844234, + "learning_rate": 3.6733911841542365e-05, + "loss": 1.0217959880828857, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2370169460773468, + "step": 314, + "valid_targets_mean": 14275.7, + "valid_targets_min": 4836 + }, + { + "epoch": 1.3411513859275053, + "grad_norm": 0.44912430742395426, + "learning_rate": 3.6701282622801626e-05, + "loss": 1.0224769115447998, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2658455967903137, + "step": 315, + "valid_targets_mean": 14691.5, + "valid_targets_min": 6289 + }, + { + "epoch": 1.3454157782515992, + "grad_norm": 0.4380862194803825, + "learning_rate": 3.666850586762886e-05, + "loss": 1.0295928716659546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23124775290489197, + "step": 316, + "valid_targets_mean": 14788.2, + "valid_targets_min": 1572 + }, + { + "epoch": 1.349680170575693, + "grad_norm": 0.3780351915048266, + "learning_rate": 3.663558186556863e-05, + "loss": 1.0240471363067627, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26825106143951416, + "step": 317, + "valid_targets_mean": 14430.4, + "valid_targets_min": 4225 + }, + { + "epoch": 1.3539445628997868, + "grad_norm": 0.38969522738546963, + "learning_rate": 3.660251090746627e-05, + "loss": 0.9949407577514648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2697961926460266, + "step": 318, + "valid_targets_mean": 14902.6, + "valid_targets_min": 3428 + }, + { + "epoch": 1.3582089552238805, + "grad_norm": 0.4384210627180082, + "learning_rate": 3.656929328546526e-05, + "loss": 1.0744171142578125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2820214629173279, + "step": 319, + "valid_targets_mean": 15383.8, + "valid_targets_min": 7527 + }, + { + "epoch": 1.3624733475479744, + "grad_norm": 0.3984289616259255, + "learning_rate": 3.653592929300471e-05, + "loss": 1.0498393774032593, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27668696641921997, + "step": 320, + "valid_targets_mean": 15038.8, + "valid_targets_min": 3785 + }, + { + "epoch": 1.3667377398720681, + "grad_norm": 0.432696068635002, + "learning_rate": 3.650241922481675e-05, + "loss": 1.0534286499023438, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2388778030872345, + "step": 321, + "valid_targets_mean": 14543.2, + "valid_targets_min": 752 + }, + { + "epoch": 1.371002132196162, + "grad_norm": 0.48698829590428744, + "learning_rate": 3.6468763376923886e-05, + "loss": 1.0516881942749023, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2882721722126007, + "step": 322, + "valid_targets_mean": 14838.8, + "valid_targets_min": 9079 + }, + { + "epoch": 1.375266524520256, + "grad_norm": 0.442918488237151, + "learning_rate": 3.6434962046636464e-05, + "loss": 1.030785083770752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.253054678440094, + "step": 323, + "valid_targets_mean": 14099.2, + "valid_targets_min": 5779 + }, + { + "epoch": 1.3795309168443497, + "grad_norm": 0.45324470211257595, + "learning_rate": 3.6401015532549957e-05, + "loss": 0.9938373565673828, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24963873624801636, + "step": 324, + "valid_targets_mean": 14335.0, + "valid_targets_min": 3001 + }, + { + "epoch": 1.3837953091684434, + "grad_norm": 0.49752146852740525, + "learning_rate": 3.6366924134542386e-05, + "loss": 1.1035189628601074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26730480790138245, + "step": 325, + "valid_targets_mean": 14326.0, + "valid_targets_min": 3428 + }, + { + "epoch": 1.3880597014925373, + "grad_norm": 0.4043390562619949, + "learning_rate": 3.633268815377166e-05, + "loss": 1.023439645767212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2766946256160736, + "step": 326, + "valid_targets_mean": 15623.8, + "valid_targets_min": 12359 + }, + { + "epoch": 1.3923240938166312, + "grad_norm": 0.4473813682718078, + "learning_rate": 3.6298307892672895e-05, + "loss": 1.060289978981018, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24249878525733948, + "step": 327, + "valid_targets_mean": 13869.2, + "valid_targets_min": 1192 + }, + { + "epoch": 1.396588486140725, + "grad_norm": 0.39880333868369167, + "learning_rate": 3.626378365495577e-05, + "loss": 1.0092850923538208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2509322762489319, + "step": 328, + "valid_targets_mean": 14535.7, + "valid_targets_min": 3330 + }, + { + "epoch": 1.4008528784648187, + "grad_norm": 0.4223878979770423, + "learning_rate": 3.622911574560181e-05, + "loss": 1.021120309829712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28987523913383484, + "step": 329, + "valid_targets_mean": 15023.1, + "valid_targets_min": 2399 + }, + { + "epoch": 1.4051172707889126, + "grad_norm": 0.40875151541174765, + "learning_rate": 3.6194304470861744e-05, + "loss": 1.0362828969955444, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24342569708824158, + "step": 330, + "valid_targets_mean": 14159.5, + "valid_targets_min": 1544 + }, + { + "epoch": 1.4093816631130065, + "grad_norm": 0.41445312343144597, + "learning_rate": 3.615935013825272e-05, + "loss": 1.0330214500427246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23608511686325073, + "step": 331, + "valid_targets_mean": 13884.3, + "valid_targets_min": 2210 + }, + { + "epoch": 1.4136460554371002, + "grad_norm": 0.40369141173086437, + "learning_rate": 3.612425305655569e-05, + "loss": 1.032184362411499, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22079363465309143, + "step": 332, + "valid_targets_mean": 13598.4, + "valid_targets_min": 4280 + }, + { + "epoch": 1.417910447761194, + "grad_norm": 0.41599210161287786, + "learning_rate": 3.6089013535812593e-05, + "loss": 1.016597867012024, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26309871673583984, + "step": 333, + "valid_targets_mean": 14173.4, + "valid_targets_min": 2833 + }, + { + "epoch": 1.4221748400852878, + "grad_norm": 0.4153459164865125, + "learning_rate": 3.6053631887323656e-05, + "loss": 1.0433218479156494, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24018901586532593, + "step": 334, + "valid_targets_mean": 13169.0, + "valid_targets_min": 1366 + }, + { + "epoch": 1.4264392324093818, + "grad_norm": 0.4228156331265008, + "learning_rate": 3.601810842364465e-05, + "loss": 1.0579197406768799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26747769117355347, + "step": 335, + "valid_targets_mean": 14832.1, + "valid_targets_min": 2603 + }, + { + "epoch": 1.4307036247334755, + "grad_norm": 0.37510951898211387, + "learning_rate": 3.598244345858412e-05, + "loss": 1.044649600982666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.266460657119751, + "step": 336, + "valid_targets_mean": 14194.0, + "valid_targets_min": 3392 + }, + { + "epoch": 1.4349680170575694, + "grad_norm": 0.3730794365026243, + "learning_rate": 3.594663730720059e-05, + "loss": 1.0548815727233887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2614656388759613, + "step": 337, + "valid_targets_mean": 14611.4, + "valid_targets_min": 4873 + }, + { + "epoch": 1.439232409381663, + "grad_norm": 0.46642538995969157, + "learning_rate": 3.591069028579982e-05, + "loss": 1.0309990644454956, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25115907192230225, + "step": 338, + "valid_targets_mean": 14504.5, + "valid_targets_min": 2470 + }, + { + "epoch": 1.443496801705757, + "grad_norm": 0.40116665083162756, + "learning_rate": 3.5874602711931994e-05, + "loss": 1.019059658050537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23682564496994019, + "step": 339, + "valid_targets_mean": 14049.9, + "valid_targets_min": 1264 + }, + { + "epoch": 1.4477611940298507, + "grad_norm": 0.3623754460535039, + "learning_rate": 3.5838374904388904e-05, + "loss": 1.0602333545684814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26520347595214844, + "step": 340, + "valid_targets_mean": 14519.9, + "valid_targets_min": 2559 + }, + { + "epoch": 1.4520255863539446, + "grad_norm": 0.432940910513449, + "learning_rate": 3.580200718320115e-05, + "loss": 1.034498691558838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2453865110874176, + "step": 341, + "valid_targets_mean": 14551.1, + "valid_targets_min": 4057 + }, + { + "epoch": 1.4562899786780383, + "grad_norm": 0.41479612142784555, + "learning_rate": 3.576549986963531e-05, + "loss": 1.071781873703003, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25374463200569153, + "step": 342, + "valid_targets_mean": 14550.2, + "valid_targets_min": 2094 + }, + { + "epoch": 1.4605543710021323, + "grad_norm": 0.4388319041148267, + "learning_rate": 3.5728853286191075e-05, + "loss": 1.0310769081115723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23260226845741272, + "step": 343, + "valid_targets_mean": 14847.7, + "valid_targets_min": 5643 + }, + { + "epoch": 1.464818763326226, + "grad_norm": 0.5097439382078064, + "learning_rate": 3.5692067756598465e-05, + "loss": 1.085401177406311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26018932461738586, + "step": 344, + "valid_targets_mean": 14242.3, + "valid_targets_min": 1644 + }, + { + "epoch": 1.4690831556503199, + "grad_norm": 0.4906254933146025, + "learning_rate": 3.5655143605814885e-05, + "loss": 1.0883269309997559, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29738789796829224, + "step": 345, + "valid_targets_mean": 15187.0, + "valid_targets_min": 5367 + }, + { + "epoch": 1.4733475479744136, + "grad_norm": 0.43429736392701807, + "learning_rate": 3.561808116002232e-05, + "loss": 1.0500903129577637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26342469453811646, + "step": 346, + "valid_targets_mean": 13944.7, + "valid_targets_min": 4459 + }, + { + "epoch": 1.4776119402985075, + "grad_norm": 0.5529170127999757, + "learning_rate": 3.5580880746624444e-05, + "loss": 1.1078016757965088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28638672828674316, + "step": 347, + "valid_targets_mean": 14733.8, + "valid_targets_min": 3275 + }, + { + "epoch": 1.4818763326226012, + "grad_norm": 0.5253935878945599, + "learning_rate": 3.5543542694243685e-05, + "loss": 1.0176830291748047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2500063180923462, + "step": 348, + "valid_targets_mean": 14831.7, + "valid_targets_min": 3693 + }, + { + "epoch": 1.4861407249466951, + "grad_norm": 0.43894141234318407, + "learning_rate": 3.5506067332718355e-05, + "loss": 1.0444616079330444, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2473248541355133, + "step": 349, + "valid_targets_mean": 14922.2, + "valid_targets_min": 6830 + }, + { + "epoch": 1.4904051172707888, + "grad_norm": 0.558730332863415, + "learning_rate": 3.546845499309976e-05, + "loss": 1.0343124866485596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27866482734680176, + "step": 350, + "valid_targets_mean": 14642.0, + "valid_targets_min": 1155 + }, + { + "epoch": 1.4946695095948828, + "grad_norm": 0.4185560934360798, + "learning_rate": 3.5430706007649225e-05, + "loss": 1.0102611780166626, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2702333331108093, + "step": 351, + "valid_targets_mean": 15455.7, + "valid_targets_min": 12148 + }, + { + "epoch": 1.4989339019189765, + "grad_norm": 0.46798829218110205, + "learning_rate": 3.539282070983518e-05, + "loss": 0.9864039421081543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2526516914367676, + "step": 352, + "valid_targets_mean": 14920.8, + "valid_targets_min": 7864 + }, + { + "epoch": 1.5031982942430704, + "grad_norm": 0.4076542842351871, + "learning_rate": 3.535479943433023e-05, + "loss": 1.0977790355682373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2841411828994751, + "step": 353, + "valid_targets_mean": 15169.3, + "valid_targets_min": 7521 + }, + { + "epoch": 1.5074626865671643, + "grad_norm": 0.3932038387603887, + "learning_rate": 3.5316642517008184e-05, + "loss": 1.0146563053131104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2530601918697357, + "step": 354, + "valid_targets_mean": 14533.0, + "valid_targets_min": 5862 + }, + { + "epoch": 1.511727078891258, + "grad_norm": 0.4004458039033539, + "learning_rate": 3.5278350294941074e-05, + "loss": 1.027766466140747, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24737542867660522, + "step": 355, + "valid_targets_mean": 15259.2, + "valid_targets_min": 9655 + }, + { + "epoch": 1.5159914712153517, + "grad_norm": 0.4041518470303862, + "learning_rate": 3.523992310639622e-05, + "loss": 1.0604379177093506, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23790603876113892, + "step": 356, + "valid_targets_mean": 14440.0, + "valid_targets_min": 3563 + }, + { + "epoch": 1.5202558635394456, + "grad_norm": 0.3853938737065251, + "learning_rate": 3.5201361290833165e-05, + "loss": 1.0405174493789673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2695203125476837, + "step": 357, + "valid_targets_mean": 14853.6, + "valid_targets_min": 5910 + }, + { + "epoch": 1.5245202558635396, + "grad_norm": 0.3700881121843563, + "learning_rate": 3.516266518890079e-05, + "loss": 1.055159091949463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2846561670303345, + "step": 358, + "valid_targets_mean": 15617.7, + "valid_targets_min": 7355 + }, + { + "epoch": 1.5287846481876333, + "grad_norm": 0.41639254141432347, + "learning_rate": 3.512383514243419e-05, + "loss": 0.9890848994255066, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25430652499198914, + "step": 359, + "valid_targets_mean": 14814.4, + "valid_targets_min": 5326 + }, + { + "epoch": 1.533049040511727, + "grad_norm": 0.4257510522388653, + "learning_rate": 3.5084871494451716e-05, + "loss": 1.0411036014556885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26373782753944397, + "step": 360, + "valid_targets_mean": 14399.5, + "valid_targets_min": 4828 + }, + { + "epoch": 1.537313432835821, + "grad_norm": 0.4332882415420207, + "learning_rate": 3.5045774589151955e-05, + "loss": 1.080575942993164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2707892656326294, + "step": 361, + "valid_targets_mean": 14051.8, + "valid_targets_min": 5008 + }, + { + "epoch": 1.5415778251599148, + "grad_norm": 0.4312474465754125, + "learning_rate": 3.500654477191064e-05, + "loss": 1.0592353343963623, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2550530433654785, + "step": 362, + "valid_targets_mean": 15092.4, + "valid_targets_min": 8853 + }, + { + "epoch": 1.5458422174840085, + "grad_norm": 0.388247184125693, + "learning_rate": 3.496718238927764e-05, + "loss": 1.0313072204589844, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2804388403892517, + "step": 363, + "valid_targets_mean": 14726.6, + "valid_targets_min": 4844 + }, + { + "epoch": 1.5501066098081022, + "grad_norm": 0.3715356387359413, + "learning_rate": 3.492768778897388e-05, + "loss": 1.032320499420166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2508130967617035, + "step": 364, + "valid_targets_mean": 14377.2, + "valid_targets_min": 4826 + }, + { + "epoch": 1.5543710021321961, + "grad_norm": 0.3904825246518211, + "learning_rate": 3.4888061319888276e-05, + "loss": 1.0460598468780518, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2676374912261963, + "step": 365, + "valid_targets_mean": 14739.1, + "valid_targets_min": 5957 + }, + { + "epoch": 1.55863539445629, + "grad_norm": 0.44926171747857924, + "learning_rate": 3.484830333207466e-05, + "loss": 1.003669023513794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23656132817268372, + "step": 366, + "valid_targets_mean": 14270.9, + "valid_targets_min": 1848 + }, + { + "epoch": 1.5628997867803838, + "grad_norm": 0.3559883543601324, + "learning_rate": 3.4808414176748666e-05, + "loss": 1.0113545656204224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25722262263298035, + "step": 367, + "valid_targets_mean": 14459.8, + "valid_targets_min": 3202 + }, + { + "epoch": 1.5671641791044775, + "grad_norm": 0.46220961836637714, + "learning_rate": 3.476839420628466e-05, + "loss": 1.0731767416000366, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2682396173477173, + "step": 368, + "valid_targets_mean": 15118.6, + "valid_targets_min": 4725 + }, + { + "epoch": 1.5714285714285714, + "grad_norm": 0.37036023858488, + "learning_rate": 3.472824377421257e-05, + "loss": 1.0478543043136597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25996726751327515, + "step": 369, + "valid_targets_mean": 14105.8, + "valid_targets_min": 2477 + }, + { + "epoch": 1.5756929637526653, + "grad_norm": 0.3932506540896539, + "learning_rate": 3.4687963235214845e-05, + "loss": 1.048224687576294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2726746201515198, + "step": 370, + "valid_targets_mean": 14638.0, + "valid_targets_min": 5709 + }, + { + "epoch": 1.579957356076759, + "grad_norm": 0.3881717185807725, + "learning_rate": 3.464755294512325e-05, + "loss": 1.0166910886764526, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2702951729297638, + "step": 371, + "valid_targets_mean": 14126.0, + "valid_targets_min": 2212 + }, + { + "epoch": 1.5842217484008527, + "grad_norm": 0.3773858725739773, + "learning_rate": 3.4607013260915765e-05, + "loss": 1.0563862323760986, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2720091938972473, + "step": 372, + "valid_targets_mean": 15196.1, + "valid_targets_min": 3138 + }, + { + "epoch": 1.5884861407249466, + "grad_norm": 0.3516127575139888, + "learning_rate": 3.4566344540713404e-05, + "loss": 0.9865554571151733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26179879903793335, + "step": 373, + "valid_targets_mean": 15523.9, + "valid_targets_min": 3813 + }, + { + "epoch": 1.5927505330490406, + "grad_norm": 0.34214633347445444, + "learning_rate": 3.452554714377706e-05, + "loss": 1.0230598449707031, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2778155505657196, + "step": 374, + "valid_targets_mean": 15356.0, + "valid_targets_min": 7797 + }, + { + "epoch": 1.5970149253731343, + "grad_norm": 0.35945798893790865, + "learning_rate": 3.448462143050436e-05, + "loss": 1.0132288932800293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2472197711467743, + "step": 375, + "valid_targets_mean": 14254.1, + "valid_targets_min": 4122 + }, + { + "epoch": 1.6012793176972282, + "grad_norm": 0.3691893165416932, + "learning_rate": 3.4443567762426444e-05, + "loss": 1.0427764654159546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2452818751335144, + "step": 376, + "valid_targets_mean": 14382.5, + "valid_targets_min": 1979 + }, + { + "epoch": 1.6055437100213221, + "grad_norm": 0.32656514899943623, + "learning_rate": 3.440238650220477e-05, + "loss": 1.0427534580230713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24343901872634888, + "step": 377, + "valid_targets_mean": 14360.2, + "valid_targets_min": 2659 + }, + { + "epoch": 1.6098081023454158, + "grad_norm": 0.35145024748398823, + "learning_rate": 3.4361078013627945e-05, + "loss": 1.0215208530426025, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24857380986213684, + "step": 378, + "valid_targets_mean": 14909.9, + "valid_targets_min": 5877 + }, + { + "epoch": 1.6140724946695095, + "grad_norm": 0.37012176740954145, + "learning_rate": 3.4319642661608474e-05, + "loss": 1.041208028793335, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2563413977622986, + "step": 379, + "valid_targets_mean": 14180.1, + "valid_targets_min": 2024 + }, + { + "epoch": 1.6183368869936035, + "grad_norm": 0.373177192589185, + "learning_rate": 3.427808081217957e-05, + "loss": 1.0833510160446167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2780710458755493, + "step": 380, + "valid_targets_mean": 14983.0, + "valid_targets_min": 2490 + }, + { + "epoch": 1.6226012793176974, + "grad_norm": 0.33354305565043396, + "learning_rate": 3.423639283249189e-05, + "loss": 0.9997053742408752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2615281939506531, + "step": 381, + "valid_targets_mean": 14317.3, + "valid_targets_min": 2195 + }, + { + "epoch": 1.626865671641791, + "grad_norm": 0.3471963364637768, + "learning_rate": 3.419457909081032e-05, + "loss": 1.0127842426300049, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2638373374938965, + "step": 382, + "valid_targets_mean": 14404.6, + "valid_targets_min": 2852 + }, + { + "epoch": 1.6311300639658848, + "grad_norm": 0.33501867942244973, + "learning_rate": 3.415263995651069e-05, + "loss": 1.0155236721038818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24730534851551056, + "step": 383, + "valid_targets_mean": 14532.0, + "valid_targets_min": 3217 + }, + { + "epoch": 1.6353944562899787, + "grad_norm": 0.334467361139862, + "learning_rate": 3.411057580007653e-05, + "loss": 1.051874041557312, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26716887950897217, + "step": 384, + "valid_targets_mean": 14626.3, + "valid_targets_min": 3330 + }, + { + "epoch": 1.6396588486140726, + "grad_norm": 0.3900211482958431, + "learning_rate": 3.4068386993095806e-05, + "loss": 1.0342919826507568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27061301469802856, + "step": 385, + "valid_targets_mean": 14689.8, + "valid_targets_min": 4055 + }, + { + "epoch": 1.6439232409381663, + "grad_norm": 0.3371460016615242, + "learning_rate": 3.402607390825762e-05, + "loss": 1.0449540615081787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2670671343803406, + "step": 386, + "valid_targets_mean": 14720.9, + "valid_targets_min": 5646 + }, + { + "epoch": 1.64818763326226, + "grad_norm": 0.32759083808119127, + "learning_rate": 3.398363691934894e-05, + "loss": 1.0454938411712646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23546123504638672, + "step": 387, + "valid_targets_mean": 13642.0, + "valid_targets_min": 2066 + }, + { + "epoch": 1.652452025586354, + "grad_norm": 0.3767978047948565, + "learning_rate": 3.3941076401251244e-05, + "loss": 1.0003044605255127, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2335018813610077, + "step": 388, + "valid_targets_mean": 14856.9, + "valid_targets_min": 2551 + }, + { + "epoch": 1.6567164179104479, + "grad_norm": 0.33539311118940945, + "learning_rate": 3.3898392729937295e-05, + "loss": 1.0234074592590332, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2576403021812439, + "step": 389, + "valid_targets_mean": 14729.4, + "valid_targets_min": 5174 + }, + { + "epoch": 1.6609808102345416, + "grad_norm": 0.38301412923847056, + "learning_rate": 3.385558628246774e-05, + "loss": 1.036074161529541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2639666199684143, + "step": 390, + "valid_targets_mean": 15467.5, + "valid_targets_min": 2531 + }, + { + "epoch": 1.6652452025586353, + "grad_norm": 0.4182031657844117, + "learning_rate": 3.381265743698781e-05, + "loss": 1.0574541091918945, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2728482782840729, + "step": 391, + "valid_targets_mean": 14620.8, + "valid_targets_min": 5987 + }, + { + "epoch": 1.6695095948827292, + "grad_norm": 0.3408222839521064, + "learning_rate": 3.3769606572724e-05, + "loss": 1.0520439147949219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29153692722320557, + "step": 392, + "valid_targets_mean": 14701.7, + "valid_targets_min": 5351 + }, + { + "epoch": 1.6737739872068231, + "grad_norm": 0.33485626829486975, + "learning_rate": 3.3726434069980686e-05, + "loss": 1.0322532653808594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2689778804779053, + "step": 393, + "valid_targets_mean": 14448.6, + "valid_targets_min": 7360 + }, + { + "epoch": 1.6780383795309168, + "grad_norm": 0.30661591637001406, + "learning_rate": 3.368314031013678e-05, + "loss": 1.0209152698516846, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23672515153884888, + "step": 394, + "valid_targets_mean": 14592.0, + "valid_targets_min": 2343 + }, + { + "epoch": 1.6823027718550105, + "grad_norm": 0.3594261214971763, + "learning_rate": 3.363972567564236e-05, + "loss": 0.9918817281723022, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25489190220832825, + "step": 395, + "valid_targets_mean": 15144.4, + "valid_targets_min": 3992 + }, + { + "epoch": 1.6865671641791045, + "grad_norm": 0.33742571789338416, + "learning_rate": 3.35961905500153e-05, + "loss": 1.0087916851043701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24313384294509888, + "step": 396, + "valid_targets_mean": 13942.8, + "valid_targets_min": 575 + }, + { + "epoch": 1.6908315565031984, + "grad_norm": 0.34745731043603895, + "learning_rate": 3.3552535317837855e-05, + "loss": 0.9876875877380371, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25758689641952515, + "step": 397, + "valid_targets_mean": 14985.8, + "valid_targets_min": 7044 + }, + { + "epoch": 1.695095948827292, + "grad_norm": 0.3870431140851168, + "learning_rate": 3.35087603647533e-05, + "loss": 1.0088675022125244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26093339920043945, + "step": 398, + "valid_targets_mean": 14448.2, + "valid_targets_min": 4964 + }, + { + "epoch": 1.6993603411513858, + "grad_norm": 0.3167918925838153, + "learning_rate": 3.346486607746249e-05, + "loss": 1.0424166917800903, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24083197116851807, + "step": 399, + "valid_targets_mean": 14033.4, + "valid_targets_min": 2426 + }, + { + "epoch": 1.7036247334754797, + "grad_norm": 0.3828292733495057, + "learning_rate": 3.342085284372047e-05, + "loss": 1.0191667079925537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2616402506828308, + "step": 400, + "valid_targets_mean": 14583.3, + "valid_targets_min": 7331 + }, + { + "epoch": 1.7078891257995736, + "grad_norm": 0.3649425592457212, + "learning_rate": 3.337672105233303e-05, + "loss": 1.0168663263320923, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2752700448036194, + "step": 401, + "valid_targets_mean": 14993.9, + "valid_targets_min": 7835 + }, + { + "epoch": 1.7121535181236673, + "grad_norm": 0.3643524380815211, + "learning_rate": 3.3332471093153296e-05, + "loss": 1.0180773735046387, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2487078756093979, + "step": 402, + "valid_targets_mean": 13492.8, + "valid_targets_min": 1627 + }, + { + "epoch": 1.716417910447761, + "grad_norm": 0.3780037653881082, + "learning_rate": 3.3288103357078244e-05, + "loss": 1.033068299293518, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26770901679992676, + "step": 403, + "valid_targets_mean": 14617.1, + "valid_targets_min": 2467 + }, + { + "epoch": 1.720682302771855, + "grad_norm": 0.34536872530694906, + "learning_rate": 3.324361823604529e-05, + "loss": 0.9663518667221069, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23401600122451782, + "step": 404, + "valid_targets_mean": 14467.1, + "valid_targets_min": 5017 + }, + { + "epoch": 1.724946695095949, + "grad_norm": 0.3809124784742419, + "learning_rate": 3.319901612302881e-05, + "loss": 1.0394078493118286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2640204429626465, + "step": 405, + "valid_targets_mean": 14536.2, + "valid_targets_min": 1635 + }, + { + "epoch": 1.7292110874200426, + "grad_norm": 0.3378638020700729, + "learning_rate": 3.315429741203666e-05, + "loss": 1.053601861000061, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2702086865901947, + "step": 406, + "valid_targets_mean": 14123.1, + "valid_targets_min": 4839 + }, + { + "epoch": 1.7334754797441365, + "grad_norm": 0.4175023493480647, + "learning_rate": 3.3109462498106705e-05, + "loss": 1.020158290863037, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24757951498031616, + "step": 407, + "valid_targets_mean": 14131.3, + "valid_targets_min": 2620 + }, + { + "epoch": 1.7377398720682304, + "grad_norm": 0.420943686556577, + "learning_rate": 3.306451177730333e-05, + "loss": 1.0457191467285156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24467140436172485, + "step": 408, + "valid_targets_mean": 13883.8, + "valid_targets_min": 1684 + }, + { + "epoch": 1.7420042643923241, + "grad_norm": 0.4250661617912362, + "learning_rate": 3.301944564671394e-05, + "loss": 1.0097477436065674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2460739016532898, + "step": 409, + "valid_targets_mean": 14259.0, + "valid_targets_min": 6646 + }, + { + "epoch": 1.7462686567164178, + "grad_norm": 0.4058959096130296, + "learning_rate": 3.297426450444546e-05, + "loss": 1.0764334201812744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25958263874053955, + "step": 410, + "valid_targets_mean": 14020.4, + "valid_targets_min": 1460 + }, + { + "epoch": 1.7505330490405118, + "grad_norm": 0.41814093158530896, + "learning_rate": 3.292896874962078e-05, + "loss": 1.0006964206695557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24656634032726288, + "step": 411, + "valid_targets_mean": 15418.4, + "valid_targets_min": 2382 + }, + { + "epoch": 1.7547974413646057, + "grad_norm": 0.403135172590173, + "learning_rate": 3.2883558782375294e-05, + "loss": 1.0603384971618652, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2844768166542053, + "step": 412, + "valid_targets_mean": 15048.8, + "valid_targets_min": 4661 + }, + { + "epoch": 1.7590618336886994, + "grad_norm": 0.40013299639055083, + "learning_rate": 3.283803500385332e-05, + "loss": 1.043904423713684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2568932771682739, + "step": 413, + "valid_targets_mean": 14366.2, + "valid_targets_min": 2376 + }, + { + "epoch": 1.763326226012793, + "grad_norm": 0.4405862304897585, + "learning_rate": 3.2792397816204546e-05, + "loss": 1.0399513244628906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2780348062515259, + "step": 414, + "valid_targets_mean": 14281.8, + "valid_targets_min": 1040 + }, + { + "epoch": 1.767590618336887, + "grad_norm": 0.3703678306152304, + "learning_rate": 3.2746647622580524e-05, + "loss": 0.9982384443283081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24133169651031494, + "step": 415, + "valid_targets_mean": 14948.2, + "valid_targets_min": 2248 + }, + { + "epoch": 1.771855010660981, + "grad_norm": 0.38058065355445136, + "learning_rate": 3.270078482713106e-05, + "loss": 1.008116364479065, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2592852711677551, + "step": 416, + "valid_targets_mean": 14538.5, + "valid_targets_min": 6985 + }, + { + "epoch": 1.7761194029850746, + "grad_norm": 0.3807576126149718, + "learning_rate": 3.265480983500069e-05, + "loss": 1.034130573272705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25203263759613037, + "step": 417, + "valid_targets_mean": 13763.7, + "valid_targets_min": 1735 + }, + { + "epoch": 1.7803837953091683, + "grad_norm": 0.35469677958006324, + "learning_rate": 3.260872305232507e-05, + "loss": 1.0008351802825928, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24766606092453003, + "step": 418, + "valid_targets_mean": 14314.2, + "valid_targets_min": 1831 + }, + { + "epoch": 1.7846481876332623, + "grad_norm": 0.34953838065509935, + "learning_rate": 3.256252488622738e-05, + "loss": 1.0125362873077393, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24885194003582, + "step": 419, + "valid_targets_mean": 15054.8, + "valid_targets_min": 10156 + }, + { + "epoch": 1.7889125799573562, + "grad_norm": 0.3464933663297695, + "learning_rate": 3.251621574481475e-05, + "loss": 1.066127061843872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2821931540966034, + "step": 420, + "valid_targets_mean": 14622.4, + "valid_targets_min": 1382 + }, + { + "epoch": 1.79317697228145, + "grad_norm": 0.37117640473424757, + "learning_rate": 3.246979603717467e-05, + "loss": 0.9925398230552673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23529556393623352, + "step": 421, + "valid_targets_mean": 15146.0, + "valid_targets_min": 5987 + }, + { + "epoch": 1.7974413646055436, + "grad_norm": 0.3770563485294222, + "learning_rate": 3.242326617337133e-05, + "loss": 1.0189104080200195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2386004775762558, + "step": 422, + "valid_targets_mean": 13824.1, + "valid_targets_min": 2070 + }, + { + "epoch": 1.8017057569296375, + "grad_norm": 0.35822765744928253, + "learning_rate": 3.2376626564442016e-05, + "loss": 1.0439180135726929, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2584801912307739, + "step": 423, + "valid_targets_mean": 15041.2, + "valid_targets_min": 7090 + }, + { + "epoch": 1.8059701492537314, + "grad_norm": 0.3324831953290299, + "learning_rate": 3.2329877622393515e-05, + "loss": 1.054990291595459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2689490020275116, + "step": 424, + "valid_targets_mean": 14683.2, + "valid_targets_min": 7228 + }, + { + "epoch": 1.8102345415778252, + "grad_norm": 0.34915086832559983, + "learning_rate": 3.228301976019841e-05, + "loss": 0.9770750999450684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23467862606048584, + "step": 425, + "valid_targets_mean": 14624.9, + "valid_targets_min": 1875 + }, + { + "epoch": 1.8144989339019189, + "grad_norm": 0.32965841703308824, + "learning_rate": 3.22360533917915e-05, + "loss": 0.9906047582626343, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24453550577163696, + "step": 426, + "valid_targets_mean": 14685.0, + "valid_targets_min": 1573 + }, + { + "epoch": 1.8187633262260128, + "grad_norm": 0.332438503069151, + "learning_rate": 3.218897893206608e-05, + "loss": 1.0660655498504639, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2804543375968933, + "step": 427, + "valid_targets_mean": 15406.8, + "valid_targets_min": 10790 + }, + { + "epoch": 1.8230277185501067, + "grad_norm": 0.3689774867731172, + "learning_rate": 3.2141796796870335e-05, + "loss": 0.9783341288566589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26350972056388855, + "step": 428, + "valid_targets_mean": 15478.9, + "valid_targets_min": 5018 + }, + { + "epoch": 1.8272921108742004, + "grad_norm": 0.32744116016781977, + "learning_rate": 3.2094507403003614e-05, + "loss": 1.0029715299606323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24373041093349457, + "step": 429, + "valid_targets_mean": 14231.6, + "valid_targets_min": 5631 + }, + { + "epoch": 1.831556503198294, + "grad_norm": 0.33963983474338727, + "learning_rate": 3.2047111168212785e-05, + "loss": 0.987981915473938, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23339219391345978, + "step": 430, + "valid_targets_mean": 14735.6, + "valid_targets_min": 4105 + }, + { + "epoch": 1.835820895522388, + "grad_norm": 0.3831323506936333, + "learning_rate": 3.1999608511188524e-05, + "loss": 1.0297985076904297, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23571500182151794, + "step": 431, + "valid_targets_mean": 14458.9, + "valid_targets_min": 4385 + }, + { + "epoch": 1.840085287846482, + "grad_norm": 0.28973582326105785, + "learning_rate": 3.1951999851561625e-05, + "loss": 1.0437334775924683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.250507652759552, + "step": 432, + "valid_targets_mean": 13971.0, + "valid_targets_min": 3231 + }, + { + "epoch": 1.8443496801705757, + "grad_norm": 0.3608534729433716, + "learning_rate": 3.190428560989931e-05, + "loss": 1.0184440612792969, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25230586528778076, + "step": 433, + "valid_targets_mean": 14063.9, + "valid_targets_min": 4039 + }, + { + "epoch": 1.8486140724946694, + "grad_norm": 0.3468183299999246, + "learning_rate": 3.185646620770146e-05, + "loss": 1.0451010465621948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2619550824165344, + "step": 434, + "valid_targets_mean": 14315.4, + "valid_targets_min": 5088 + }, + { + "epoch": 1.8528784648187633, + "grad_norm": 0.36334244080969874, + "learning_rate": 3.180854206739696e-05, + "loss": 1.0416852235794067, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2673511207103729, + "step": 435, + "valid_targets_mean": 14091.8, + "valid_targets_min": 2033 + }, + { + "epoch": 1.8571428571428572, + "grad_norm": 0.39437556533817586, + "learning_rate": 3.176051361233991e-05, + "loss": 1.007869005203247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2646036148071289, + "step": 436, + "valid_targets_mean": 15415.9, + "valid_targets_min": 9952 + }, + { + "epoch": 1.861407249466951, + "grad_norm": 0.3855523767310849, + "learning_rate": 3.171238126680594e-05, + "loss": 1.0384873151779175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24490685760974884, + "step": 437, + "valid_targets_mean": 13899.1, + "valid_targets_min": 3160 + }, + { + "epoch": 1.8656716417910446, + "grad_norm": 0.3511120043115705, + "learning_rate": 3.166414545598839e-05, + "loss": 1.05495285987854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2525789141654968, + "step": 438, + "valid_targets_mean": 14838.1, + "valid_targets_min": 6803 + }, + { + "epoch": 1.8699360341151388, + "grad_norm": 0.3492038967686587, + "learning_rate": 3.161580660599464e-05, + "loss": 1.0517874956130981, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26977279782295227, + "step": 439, + "valid_targets_mean": 15549.5, + "valid_targets_min": 9668 + }, + { + "epoch": 1.8742004264392325, + "grad_norm": 0.3211662787472829, + "learning_rate": 3.1567365143842264e-05, + "loss": 1.005902886390686, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2564969062805176, + "step": 440, + "valid_targets_mean": 14757.1, + "valid_targets_min": 5741 + }, + { + "epoch": 1.8784648187633262, + "grad_norm": 0.343359757201269, + "learning_rate": 3.1518821497455326e-05, + "loss": 1.0198183059692383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2403055876493454, + "step": 441, + "valid_targets_mean": 13502.8, + "valid_targets_min": 1366 + }, + { + "epoch": 1.88272921108742, + "grad_norm": 0.33782769808390783, + "learning_rate": 3.147017609566054e-05, + "loss": 1.0181028842926025, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2621736526489258, + "step": 442, + "valid_targets_mean": 15241.9, + "valid_targets_min": 8209 + }, + { + "epoch": 1.886993603411514, + "grad_norm": 0.3578357014545001, + "learning_rate": 3.142142936818353e-05, + "loss": 1.0328750610351562, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2619187831878662, + "step": 443, + "valid_targets_mean": 14370.1, + "valid_targets_min": 1593 + }, + { + "epoch": 1.8912579957356077, + "grad_norm": 0.31801043564050785, + "learning_rate": 3.137258174564501e-05, + "loss": 0.999271035194397, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23855340480804443, + "step": 444, + "valid_targets_mean": 14367.9, + "valid_targets_min": 2805 + }, + { + "epoch": 1.8955223880597014, + "grad_norm": 0.39506035505892084, + "learning_rate": 3.1323633659556986e-05, + "loss": 1.0545252561569214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.265545517206192, + "step": 445, + "valid_targets_mean": 15161.8, + "valid_targets_min": 2201 + }, + { + "epoch": 1.8997867803837953, + "grad_norm": 0.309023363110396, + "learning_rate": 3.127458554231894e-05, + "loss": 1.0253419876098633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22738057374954224, + "step": 446, + "valid_targets_mean": 14012.2, + "valid_targets_min": 5255 + }, + { + "epoch": 1.9040511727078893, + "grad_norm": 0.35490324818398, + "learning_rate": 3.122543782721402e-05, + "loss": 1.0158578157424927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2512471675872803, + "step": 447, + "valid_targets_mean": 13560.5, + "valid_targets_min": 2354 + }, + { + "epoch": 1.908315565031983, + "grad_norm": 0.3036891305581489, + "learning_rate": 3.1176190948405194e-05, + "loss": 1.069814920425415, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24784015119075775, + "step": 448, + "valid_targets_mean": 14122.2, + "valid_targets_min": 1497 + }, + { + "epoch": 1.9125799573560767, + "grad_norm": 0.3419726999138405, + "learning_rate": 3.112684534093142e-05, + "loss": 1.0053629875183105, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23691532015800476, + "step": 449, + "valid_targets_mean": 14771.7, + "valid_targets_min": 7360 + }, + { + "epoch": 1.9168443496801706, + "grad_norm": 0.40838753290198465, + "learning_rate": 3.107740144070385e-05, + "loss": 1.0423624515533447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2578592300415039, + "step": 450, + "valid_targets_mean": 14059.9, + "valid_targets_min": 2691 + }, + { + "epoch": 1.9211087420042645, + "grad_norm": 0.36196495758103997, + "learning_rate": 3.102785968450188e-05, + "loss": 1.0227258205413818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2570173144340515, + "step": 451, + "valid_targets_mean": 15053.9, + "valid_targets_min": 4019 + }, + { + "epoch": 1.9253731343283582, + "grad_norm": 0.3548321726590971, + "learning_rate": 3.09782205099694e-05, + "loss": 1.0171830654144287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2440769076347351, + "step": 452, + "valid_targets_mean": 14496.3, + "valid_targets_min": 1756 + }, + { + "epoch": 1.929637526652452, + "grad_norm": 0.4058071231863198, + "learning_rate": 3.092848435561084e-05, + "loss": 1.0100500583648682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27710479497909546, + "step": 453, + "valid_targets_mean": 14780.3, + "valid_targets_min": 4633 + }, + { + "epoch": 1.9339019189765458, + "grad_norm": 0.4070188851510975, + "learning_rate": 3.0878651660787376e-05, + "loss": 1.0602819919586182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2704969644546509, + "step": 454, + "valid_targets_mean": 15116.1, + "valid_targets_min": 7740 + }, + { + "epoch": 1.9381663113006398, + "grad_norm": 0.4187493934603921, + "learning_rate": 3.082872286571295e-05, + "loss": 1.0430219173431396, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2716018855571747, + "step": 455, + "valid_targets_mean": 15225.5, + "valid_targets_min": 2666 + }, + { + "epoch": 1.9424307036247335, + "grad_norm": 0.4808979187386021, + "learning_rate": 3.077869841145049e-05, + "loss": 1.0674023628234863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27071070671081543, + "step": 456, + "valid_targets_mean": 14779.5, + "valid_targets_min": 6332 + }, + { + "epoch": 1.9466950959488272, + "grad_norm": 0.34694791859336266, + "learning_rate": 3.0728578739907934e-05, + "loss": 1.0135544538497925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2522042393684387, + "step": 457, + "valid_targets_mean": 14540.5, + "valid_targets_min": 4167 + }, + { + "epoch": 1.950959488272921, + "grad_norm": 0.4736629987050759, + "learning_rate": 3.067836429383437e-05, + "loss": 1.0493121147155762, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25577256083488464, + "step": 458, + "valid_targets_mean": 14053.8, + "valid_targets_min": 2295 + }, + { + "epoch": 1.955223880597015, + "grad_norm": 0.389170647954697, + "learning_rate": 3.062805551681609e-05, + "loss": 1.0064876079559326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2679370641708374, + "step": 459, + "valid_targets_mean": 15165.4, + "valid_targets_min": 8024 + }, + { + "epoch": 1.9594882729211087, + "grad_norm": 0.4128770419419475, + "learning_rate": 3.057765285327271e-05, + "loss": 1.0187498331069946, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24309395253658295, + "step": 460, + "valid_targets_mean": 14963.2, + "valid_targets_min": 6194 + }, + { + "epoch": 1.9637526652452024, + "grad_norm": 0.39902135993292137, + "learning_rate": 3.0527156748453214e-05, + "loss": 1.0560030937194824, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2578599750995636, + "step": 461, + "valid_targets_mean": 14774.2, + "valid_targets_min": 3990 + }, + { + "epoch": 1.9680170575692963, + "grad_norm": 0.3304981556798958, + "learning_rate": 3.047656764843203e-05, + "loss": 0.9743139743804932, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24647156894207, + "step": 462, + "valid_targets_mean": 15133.5, + "valid_targets_min": 4797 + }, + { + "epoch": 1.9722814498933903, + "grad_norm": 0.3545188406065809, + "learning_rate": 3.0425886000105094e-05, + "loss": 1.0284925699234009, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24679070711135864, + "step": 463, + "valid_targets_mean": 14326.2, + "valid_targets_min": 2013 + }, + { + "epoch": 1.976545842217484, + "grad_norm": 0.321978376788113, + "learning_rate": 3.0375112251185892e-05, + "loss": 1.011577844619751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2750028073787689, + "step": 464, + "valid_targets_mean": 14633.4, + "valid_targets_min": 4870 + }, + { + "epoch": 1.9808102345415777, + "grad_norm": 0.3826133894972065, + "learning_rate": 3.0324246850201527e-05, + "loss": 1.0314466953277588, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2604595422744751, + "step": 465, + "valid_targets_mean": 14508.2, + "valid_targets_min": 980 + }, + { + "epoch": 1.9850746268656716, + "grad_norm": 0.3106361799547543, + "learning_rate": 3.0273290246488732e-05, + "loss": 1.0470771789550781, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25823935866355896, + "step": 466, + "valid_targets_mean": 14294.9, + "valid_targets_min": 2956 + }, + { + "epoch": 1.9893390191897655, + "grad_norm": 0.41078985221388087, + "learning_rate": 3.0222242890189904e-05, + "loss": 1.0223674774169922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25699979066848755, + "step": 467, + "valid_targets_mean": 14991.4, + "valid_targets_min": 7709 + }, + { + "epoch": 1.9936034115138592, + "grad_norm": 0.3252010849073714, + "learning_rate": 3.017110523224914e-05, + "loss": 0.9852697849273682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2593464255332947, + "step": 468, + "valid_targets_mean": 15114.2, + "valid_targets_min": 4733 + }, + { + "epoch": 1.997867803837953, + "grad_norm": 0.37058215950930407, + "learning_rate": 3.011987772440825e-05, + "loss": 1.0171148777008057, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25525736808776855, + "step": 469, + "valid_targets_mean": 13891.4, + "valid_targets_min": 1938 + }, + { + "epoch": 2.0, + "grad_norm": 0.4261470032561998, + "learning_rate": 3.006856081920277e-05, + "loss": 1.0270267724990845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5216562747955322, + "step": 470, + "valid_targets_mean": 15169.9, + "valid_targets_min": 9901 + }, + { + "epoch": 2.0042643923240937, + "grad_norm": 0.45304103112110533, + "learning_rate": 3.001715496995793e-05, + "loss": 0.9987781643867493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.251964271068573, + "step": 471, + "valid_targets_mean": 14206.0, + "valid_targets_min": 6976 + }, + { + "epoch": 2.008528784648188, + "grad_norm": 0.47018294489753626, + "learning_rate": 2.9965660630784715e-05, + "loss": 1.0175721645355225, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2600884437561035, + "step": 472, + "valid_targets_mean": 15563.6, + "valid_targets_min": 10378 + }, + { + "epoch": 2.0127931769722816, + "grad_norm": 0.3649858551533112, + "learning_rate": 2.9914078256575782e-05, + "loss": 1.0302276611328125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25027453899383545, + "step": 473, + "valid_targets_mean": 14867.3, + "valid_targets_min": 6280 + }, + { + "epoch": 2.0170575692963753, + "grad_norm": 0.45896794431925675, + "learning_rate": 2.9862408303001492e-05, + "loss": 1.076301097869873, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2921193242073059, + "step": 474, + "valid_targets_mean": 14555.3, + "valid_targets_min": 2249 + }, + { + "epoch": 2.021321961620469, + "grad_norm": 0.3481667692800604, + "learning_rate": 2.9810651226505875e-05, + "loss": 1.036280632019043, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27446985244750977, + "step": 475, + "valid_targets_mean": 14317.6, + "valid_targets_min": 2299 + }, + { + "epoch": 2.025586353944563, + "grad_norm": 0.4601511024144995, + "learning_rate": 2.9758807484302566e-05, + "loss": 1.0325391292572021, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24933630228042603, + "step": 476, + "valid_targets_mean": 13864.8, + "valid_targets_min": 2151 + }, + { + "epoch": 2.029850746268657, + "grad_norm": 0.448089815579243, + "learning_rate": 2.9706877534370822e-05, + "loss": 1.0386598110198975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25227582454681396, + "step": 477, + "valid_targets_mean": 14256.4, + "valid_targets_min": 2066 + }, + { + "epoch": 2.0341151385927505, + "grad_norm": 0.45781618197082224, + "learning_rate": 2.965486183545142e-05, + "loss": 1.0114233493804932, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24822190403938293, + "step": 478, + "valid_targets_mean": 14081.0, + "valid_targets_min": 2909 + }, + { + "epoch": 2.038379530916844, + "grad_norm": 0.4349737541090553, + "learning_rate": 2.9602760847042645e-05, + "loss": 0.9995619058609009, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25437045097351074, + "step": 479, + "valid_targets_mean": 15119.9, + "valid_targets_min": 4874 + }, + { + "epoch": 2.0426439232409384, + "grad_norm": 0.46144207194679, + "learning_rate": 2.955057502939621e-05, + "loss": 0.9988946318626404, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23824626207351685, + "step": 480, + "valid_targets_mean": 13312.1, + "valid_targets_min": 1519 + }, + { + "epoch": 2.046908315565032, + "grad_norm": 0.4196200363878079, + "learning_rate": 2.9498304843513193e-05, + "loss": 1.019971251487732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2508181631565094, + "step": 481, + "valid_targets_mean": 14688.1, + "valid_targets_min": 2511 + }, + { + "epoch": 2.0511727078891258, + "grad_norm": 0.5292738143769523, + "learning_rate": 2.9445950751139957e-05, + "loss": 0.9783295392990112, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24422825872898102, + "step": 482, + "valid_targets_mean": 14867.3, + "valid_targets_min": 7621 + }, + { + "epoch": 2.0554371002132195, + "grad_norm": 0.4368934079863611, + "learning_rate": 2.939351321476412e-05, + "loss": 0.9889360666275024, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24530810117721558, + "step": 483, + "valid_targets_mean": 15264.8, + "valid_targets_min": 9523 + }, + { + "epoch": 2.0597014925373136, + "grad_norm": 0.4067956384715909, + "learning_rate": 2.9340992697610393e-05, + "loss": 0.9587419033050537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23476752638816833, + "step": 484, + "valid_targets_mean": 14411.7, + "valid_targets_min": 2050 + }, + { + "epoch": 2.0639658848614073, + "grad_norm": 0.3663229598923624, + "learning_rate": 2.9288389663636537e-05, + "loss": 0.9435993432998657, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2204243242740631, + "step": 485, + "valid_targets_mean": 14179.5, + "valid_targets_min": 4288 + }, + { + "epoch": 2.068230277185501, + "grad_norm": 0.3948874114859334, + "learning_rate": 2.923570457752925e-05, + "loss": 0.9886394143104553, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22084525227546692, + "step": 486, + "valid_targets_mean": 13013.9, + "valid_targets_min": 1831 + }, + { + "epoch": 2.0724946695095947, + "grad_norm": 0.4053367288722847, + "learning_rate": 2.9182937904700078e-05, + "loss": 0.9575808644294739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23733314871788025, + "step": 487, + "valid_targets_mean": 14603.0, + "valid_targets_min": 5807 + }, + { + "epoch": 2.076759061833689, + "grad_norm": 0.357959078881158, + "learning_rate": 2.9130090111281278e-05, + "loss": 1.0362825393676758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26652559638023376, + "step": 488, + "valid_targets_mean": 14712.9, + "valid_targets_min": 5686 + }, + { + "epoch": 2.0810234541577826, + "grad_norm": 0.3625030051910193, + "learning_rate": 2.9077161664121722e-05, + "loss": 0.998286247253418, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2629318833351135, + "step": 489, + "valid_targets_mean": 14764.6, + "valid_targets_min": 8317 + }, + { + "epoch": 2.0852878464818763, + "grad_norm": 0.3581426489116462, + "learning_rate": 2.902415303078275e-05, + "loss": 1.0012212991714478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2437218427658081, + "step": 490, + "valid_targets_mean": 14034.1, + "valid_targets_min": 2242 + }, + { + "epoch": 2.08955223880597, + "grad_norm": 0.33251620156796063, + "learning_rate": 2.8971064679534072e-05, + "loss": 0.9862103462219238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26053065061569214, + "step": 491, + "valid_targets_mean": 14437.6, + "valid_targets_min": 3668 + }, + { + "epoch": 2.093816631130064, + "grad_norm": 0.3758321639405964, + "learning_rate": 2.8917897079349604e-05, + "loss": 1.006826400756836, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2373615801334381, + "step": 492, + "valid_targets_mean": 14412.9, + "valid_targets_min": 1726 + }, + { + "epoch": 2.098081023454158, + "grad_norm": 0.3514965810267716, + "learning_rate": 2.8864650699903336e-05, + "loss": 0.9882227778434753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2510005235671997, + "step": 493, + "valid_targets_mean": 14247.2, + "valid_targets_min": 6013 + }, + { + "epoch": 2.1023454157782515, + "grad_norm": 0.3471368865177626, + "learning_rate": 2.881132601156518e-05, + "loss": 0.9697372913360596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2327420562505722, + "step": 494, + "valid_targets_mean": 14427.5, + "valid_targets_min": 4288 + }, + { + "epoch": 2.106609808102345, + "grad_norm": 0.2885460017410152, + "learning_rate": 2.8757923485396805e-05, + "loss": 0.9951438903808594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23933443427085876, + "step": 495, + "valid_targets_mean": 14137.9, + "valid_targets_min": 2994 + }, + { + "epoch": 2.1108742004264394, + "grad_norm": 0.3467994325621212, + "learning_rate": 2.8704443593147517e-05, + "loss": 0.9793698787689209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2518441677093506, + "step": 496, + "valid_targets_mean": 14064.2, + "valid_targets_min": 1843 + }, + { + "epoch": 2.115138592750533, + "grad_norm": 0.3033452462810623, + "learning_rate": 2.8650886807250024e-05, + "loss": 1.0147504806518555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24597987532615662, + "step": 497, + "valid_targets_mean": 14743.6, + "valid_targets_min": 752 + }, + { + "epoch": 2.1194029850746268, + "grad_norm": 0.3408070825535228, + "learning_rate": 2.8597253600816332e-05, + "loss": 0.952460527420044, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23443883657455444, + "step": 498, + "valid_targets_mean": 14247.1, + "valid_targets_min": 575 + }, + { + "epoch": 2.1236673773987205, + "grad_norm": 0.3054078298080589, + "learning_rate": 2.8543544447633517e-05, + "loss": 1.0123192071914673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2612317204475403, + "step": 499, + "valid_targets_mean": 15464.2, + "valid_targets_min": 8312 + }, + { + "epoch": 2.1279317697228146, + "grad_norm": 0.3700609183978435, + "learning_rate": 2.8489759822159558e-05, + "loss": 1.0260965824127197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2564004361629486, + "step": 500, + "valid_targets_mean": 14635.7, + "valid_targets_min": 3694 + }, + { + "epoch": 2.1321961620469083, + "grad_norm": 0.33328575187573406, + "learning_rate": 2.843590019951914e-05, + "loss": 0.9698889255523682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24240043759346008, + "step": 501, + "valid_targets_mean": 15021.9, + "valid_targets_min": 5862 + }, + { + "epoch": 2.136460554371002, + "grad_norm": 0.384299590636117, + "learning_rate": 2.838196605549948e-05, + "loss": 1.0217312574386597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2600533366203308, + "step": 502, + "valid_targets_mean": 14611.5, + "valid_targets_min": 3847 + }, + { + "epoch": 2.140724946695096, + "grad_norm": 0.3764425664813159, + "learning_rate": 2.8327957866546082e-05, + "loss": 1.0324029922485352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24160277843475342, + "step": 503, + "valid_targets_mean": 14911.7, + "valid_targets_min": 4490 + }, + { + "epoch": 2.14498933901919, + "grad_norm": 0.3441607332393246, + "learning_rate": 2.8273876109758568e-05, + "loss": 0.9821799397468567, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24784724414348602, + "step": 504, + "valid_targets_mean": 14725.2, + "valid_targets_min": 3428 + }, + { + "epoch": 2.1492537313432836, + "grad_norm": 0.37941453583874735, + "learning_rate": 2.8219721262886427e-05, + "loss": 0.9939541816711426, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2545952796936035, + "step": 505, + "valid_targets_mean": 14990.6, + "valid_targets_min": 6710 + }, + { + "epoch": 2.1535181236673773, + "grad_norm": 0.3281635762444424, + "learning_rate": 2.816549380432483e-05, + "loss": 0.9698700308799744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23794811964035034, + "step": 506, + "valid_targets_mean": 15332.5, + "valid_targets_min": 5308 + }, + { + "epoch": 2.1577825159914714, + "grad_norm": 0.38885336366973805, + "learning_rate": 2.8111194213110386e-05, + "loss": 0.9915518164634705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2491394281387329, + "step": 507, + "valid_targets_mean": 14937.6, + "valid_targets_min": 6979 + }, + { + "epoch": 2.162046908315565, + "grad_norm": 0.331297503961614, + "learning_rate": 2.805682296891691e-05, + "loss": 1.0347942113876343, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24668872356414795, + "step": 508, + "valid_targets_mean": 14618.8, + "valid_targets_min": 2698 + }, + { + "epoch": 2.166311300639659, + "grad_norm": 0.3529187036876421, + "learning_rate": 2.8002380552051186e-05, + "loss": 1.0014612674713135, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24774719774723053, + "step": 509, + "valid_targets_mean": 14498.8, + "valid_targets_min": 4776 + }, + { + "epoch": 2.1705756929637525, + "grad_norm": 0.366965077773044, + "learning_rate": 2.7947867443448728e-05, + "loss": 1.0508100986480713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.268143892288208, + "step": 510, + "valid_targets_mean": 15571.0, + "valid_targets_min": 7360 + }, + { + "epoch": 2.1748400852878467, + "grad_norm": 0.32906318378662697, + "learning_rate": 2.789328412466953e-05, + "loss": 0.9997203350067139, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23755742609500885, + "step": 511, + "valid_targets_mean": 14751.3, + "valid_targets_min": 3243 + }, + { + "epoch": 2.1791044776119404, + "grad_norm": 0.35177788903347434, + "learning_rate": 2.7838631077893813e-05, + "loss": 1.0263261795043945, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25490206480026245, + "step": 512, + "valid_targets_mean": 14273.8, + "valid_targets_min": 2266 + }, + { + "epoch": 2.183368869936034, + "grad_norm": 0.34028455426739546, + "learning_rate": 2.7783908785917753e-05, + "loss": 1.010587215423584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2671951353549957, + "step": 513, + "valid_targets_mean": 14390.0, + "valid_targets_min": 4092 + }, + { + "epoch": 2.1876332622601278, + "grad_norm": 0.36693282979107555, + "learning_rate": 2.7729117732149244e-05, + "loss": 1.0085017681121826, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25137555599212646, + "step": 514, + "valid_targets_mean": 14906.8, + "valid_targets_min": 2502 + }, + { + "epoch": 2.191897654584222, + "grad_norm": 0.3795448830054028, + "learning_rate": 2.7674258400603587e-05, + "loss": 1.0182987451553345, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28089064359664917, + "step": 515, + "valid_targets_mean": 15310.5, + "valid_targets_min": 3271 + }, + { + "epoch": 2.1961620469083156, + "grad_norm": 0.38053785378880434, + "learning_rate": 2.761933127589927e-05, + "loss": 0.9880110621452332, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.254066526889801, + "step": 516, + "valid_targets_mean": 15095.4, + "valid_targets_min": 7989 + }, + { + "epoch": 2.2004264392324093, + "grad_norm": 0.34255644955233777, + "learning_rate": 2.7564336843253633e-05, + "loss": 1.0093759298324585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23695147037506104, + "step": 517, + "valid_targets_mean": 14574.7, + "valid_targets_min": 4055 + }, + { + "epoch": 2.204690831556503, + "grad_norm": 0.3928719556564055, + "learning_rate": 2.7509275588478606e-05, + "loss": 0.9518511295318604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22793179750442505, + "step": 518, + "valid_targets_mean": 14171.1, + "valid_targets_min": 2066 + }, + { + "epoch": 2.208955223880597, + "grad_norm": 0.33004662568032267, + "learning_rate": 2.7454147997976404e-05, + "loss": 1.013105869293213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25704652070999146, + "step": 519, + "valid_targets_mean": 14889.4, + "valid_targets_min": 7756 + }, + { + "epoch": 2.213219616204691, + "grad_norm": 0.38022286100808406, + "learning_rate": 2.7398954558735272e-05, + "loss": 1.0273163318634033, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2711549997329712, + "step": 520, + "valid_targets_mean": 14467.1, + "valid_targets_min": 4204 + }, + { + "epoch": 2.2174840085287846, + "grad_norm": 0.33660418341875553, + "learning_rate": 2.7343695758325125e-05, + "loss": 1.0346674919128418, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25120994448661804, + "step": 521, + "valid_targets_mean": 13929.1, + "valid_targets_min": 2697 + }, + { + "epoch": 2.2217484008528783, + "grad_norm": 0.37649788693011715, + "learning_rate": 2.7288372084893282e-05, + "loss": 0.9666027426719666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24410052597522736, + "step": 522, + "valid_targets_mean": 15248.3, + "valid_targets_min": 1517 + }, + { + "epoch": 2.2260127931769724, + "grad_norm": 0.34413713775157817, + "learning_rate": 2.7232984027160126e-05, + "loss": 0.9904748201370239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21707294881343842, + "step": 523, + "valid_targets_mean": 14821.8, + "valid_targets_min": 2062 + }, + { + "epoch": 2.230277185501066, + "grad_norm": 0.34164135590434347, + "learning_rate": 2.7177532074414822e-05, + "loss": 1.0020073652267456, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25181591510772705, + "step": 524, + "valid_targets_mean": 13669.2, + "valid_targets_min": 1189 + }, + { + "epoch": 2.23454157782516, + "grad_norm": 0.36586032609517777, + "learning_rate": 2.712201671651094e-05, + "loss": 1.0094950199127197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2598450779914856, + "step": 525, + "valid_targets_mean": 14686.9, + "valid_targets_min": 3001 + }, + { + "epoch": 2.2388059701492535, + "grad_norm": 0.37728559760742353, + "learning_rate": 2.7066438443862205e-05, + "loss": 0.9910581111907959, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23355071246623993, + "step": 526, + "valid_targets_mean": 14217.2, + "valid_targets_min": 3674 + }, + { + "epoch": 2.2430703624733477, + "grad_norm": 0.30553105358648747, + "learning_rate": 2.701079774743808e-05, + "loss": 0.9488228559494019, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2328556776046753, + "step": 527, + "valid_targets_mean": 14927.9, + "valid_targets_min": 6321 + }, + { + "epoch": 2.2473347547974414, + "grad_norm": 0.3599644735835354, + "learning_rate": 2.6955095118759496e-05, + "loss": 1.0226023197174072, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2568567991256714, + "step": 528, + "valid_targets_mean": 14094.9, + "valid_targets_min": 3640 + }, + { + "epoch": 2.251599147121535, + "grad_norm": 0.29077069956911633, + "learning_rate": 2.689933104989447e-05, + "loss": 1.003893256187439, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2466341108083725, + "step": 529, + "valid_targets_mean": 14749.3, + "valid_targets_min": 2099 + }, + { + "epoch": 2.2558635394456292, + "grad_norm": 0.3365561847842144, + "learning_rate": 2.6843506033453777e-05, + "loss": 0.9687828421592712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22781942784786224, + "step": 530, + "valid_targets_mean": 15208.4, + "valid_targets_min": 6099 + }, + { + "epoch": 2.260127931769723, + "grad_norm": 0.29568995670779963, + "learning_rate": 2.6787620562586587e-05, + "loss": 0.9960900545120239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2638796269893646, + "step": 531, + "valid_targets_mean": 15470.2, + "valid_targets_min": 8826 + }, + { + "epoch": 2.2643923240938166, + "grad_norm": 0.3472237072820283, + "learning_rate": 2.673167513097613e-05, + "loss": 0.9789157509803772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2449982911348343, + "step": 532, + "valid_targets_mean": 14315.0, + "valid_targets_min": 2856 + }, + { + "epoch": 2.2686567164179103, + "grad_norm": 0.3108163488961833, + "learning_rate": 2.6675670232835297e-05, + "loss": 0.9460334777832031, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2233898937702179, + "step": 533, + "valid_targets_mean": 13737.3, + "valid_targets_min": 1778 + }, + { + "epoch": 2.272921108742004, + "grad_norm": 0.3297898255290531, + "learning_rate": 2.661960636290231e-05, + "loss": 0.9994051456451416, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2590315639972687, + "step": 534, + "valid_targets_mean": 14841.2, + "valid_targets_min": 3402 + }, + { + "epoch": 2.277185501066098, + "grad_norm": 0.3264775214516908, + "learning_rate": 2.6563484016436346e-05, + "loss": 1.02659273147583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2553572654724121, + "step": 535, + "valid_targets_mean": 14679.2, + "valid_targets_min": 5570 + }, + { + "epoch": 2.281449893390192, + "grad_norm": 0.33784002917961925, + "learning_rate": 2.6507303689213143e-05, + "loss": 1.0258793830871582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3054291605949402, + "step": 536, + "valid_targets_mean": 15405.3, + "valid_targets_min": 11021 + }, + { + "epoch": 2.2857142857142856, + "grad_norm": 0.32626498868482645, + "learning_rate": 2.6451065877520634e-05, + "loss": 0.9970362186431885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.254366397857666, + "step": 537, + "valid_targets_mean": 15113.9, + "valid_targets_min": 7736 + }, + { + "epoch": 2.2899786780383797, + "grad_norm": 0.3414737788321613, + "learning_rate": 2.639477107815455e-05, + "loss": 0.9489619731903076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25024449825286865, + "step": 538, + "valid_targets_mean": 14803.9, + "valid_targets_min": 6681 + }, + { + "epoch": 2.2942430703624734, + "grad_norm": 0.3636952386802788, + "learning_rate": 2.633841978841406e-05, + "loss": 0.9728654623031616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2321648895740509, + "step": 539, + "valid_targets_mean": 14362.3, + "valid_targets_min": 7984 + }, + { + "epoch": 2.298507462686567, + "grad_norm": 0.3057411522579838, + "learning_rate": 2.6282012506097347e-05, + "loss": 1.0068259239196777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2509293556213379, + "step": 540, + "valid_targets_mean": 14533.2, + "valid_targets_min": 1498 + }, + { + "epoch": 2.302771855010661, + "grad_norm": 0.351676162724982, + "learning_rate": 2.622554972949724e-05, + "loss": 1.0444014072418213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24729007482528687, + "step": 541, + "valid_targets_mean": 13808.7, + "valid_targets_min": 1923 + }, + { + "epoch": 2.307036247334755, + "grad_norm": 0.32443165073172486, + "learning_rate": 2.6169031957396778e-05, + "loss": 1.0184507369995117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2640179991722107, + "step": 542, + "valid_targets_mean": 14197.4, + "valid_targets_min": 1999 + }, + { + "epoch": 2.3113006396588487, + "grad_norm": 0.3414797944901497, + "learning_rate": 2.611245968906482e-05, + "loss": 0.9821099638938904, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.259810209274292, + "step": 543, + "valid_targets_mean": 14675.0, + "valid_targets_min": 3536 + }, + { + "epoch": 2.3155650319829424, + "grad_norm": 0.4023877924977479, + "learning_rate": 2.605583342425165e-05, + "loss": 0.9992808103561401, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23244965076446533, + "step": 544, + "valid_targets_mean": 13720.2, + "valid_targets_min": 1863 + }, + { + "epoch": 2.319829424307036, + "grad_norm": 0.36349616641953175, + "learning_rate": 2.5999153663184546e-05, + "loss": 1.0641918182373047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29147133231163025, + "step": 545, + "valid_targets_mean": 14533.0, + "valid_targets_min": 3852 + }, + { + "epoch": 2.3240938166311302, + "grad_norm": 0.3775915564466432, + "learning_rate": 2.594242090656335e-05, + "loss": 0.9966145753860474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27358198165893555, + "step": 546, + "valid_targets_mean": 15236.0, + "valid_targets_min": 1593 + }, + { + "epoch": 2.328358208955224, + "grad_norm": 0.4314874779898479, + "learning_rate": 2.5885635655556075e-05, + "loss": 0.9890132546424866, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2735801339149475, + "step": 547, + "valid_targets_mean": 15963.0, + "valid_targets_min": 11813 + }, + { + "epoch": 2.3326226012793176, + "grad_norm": 0.3854914731013695, + "learning_rate": 2.5828798411794443e-05, + "loss": 1.017390489578247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2258615344762802, + "step": 548, + "valid_targets_mean": 13778.4, + "valid_targets_min": 2715 + }, + { + "epoch": 2.3368869936034113, + "grad_norm": 0.3659662850075527, + "learning_rate": 2.5771909677369484e-05, + "loss": 0.9837027788162231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2616751790046692, + "step": 549, + "valid_targets_mean": 15342.6, + "valid_targets_min": 9001 + }, + { + "epoch": 2.3411513859275055, + "grad_norm": 0.4445302071685335, + "learning_rate": 2.571496995482709e-05, + "loss": 1.036919116973877, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25041985511779785, + "step": 550, + "valid_targets_mean": 14808.0, + "valid_targets_min": 2730 + }, + { + "epoch": 2.345415778251599, + "grad_norm": 0.3727552816752512, + "learning_rate": 2.565797974716357e-05, + "loss": 1.0211420059204102, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23311321437358856, + "step": 551, + "valid_targets_mean": 14403.5, + "valid_targets_min": 5487 + }, + { + "epoch": 2.349680170575693, + "grad_norm": 0.42161360368650125, + "learning_rate": 2.5600939557821205e-05, + "loss": 0.9743055105209351, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24897313117980957, + "step": 552, + "valid_targets_mean": 14613.9, + "valid_targets_min": 2797 + }, + { + "epoch": 2.3539445628997866, + "grad_norm": 0.3041903132801738, + "learning_rate": 2.5543849890683813e-05, + "loss": 0.9749882221221924, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25994178652763367, + "step": 553, + "valid_targets_mean": 15261.2, + "valid_targets_min": 7622 + }, + { + "epoch": 2.3582089552238807, + "grad_norm": 0.3643368797701379, + "learning_rate": 2.548671125007229e-05, + "loss": 0.9856359958648682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2501842975616455, + "step": 554, + "valid_targets_mean": 13247.3, + "valid_targets_min": 2399 + }, + { + "epoch": 2.3624733475479744, + "grad_norm": 0.3502290439799157, + "learning_rate": 2.5429524140740155e-05, + "loss": 1.0194714069366455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2653921842575073, + "step": 555, + "valid_targets_mean": 14490.5, + "valid_targets_min": 3899 + }, + { + "epoch": 2.366737739872068, + "grad_norm": 0.3018990261751899, + "learning_rate": 2.537228906786908e-05, + "loss": 1.0033659934997559, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24577811360359192, + "step": 556, + "valid_targets_mean": 14929.9, + "valid_targets_min": 7722 + }, + { + "epoch": 2.3710021321961623, + "grad_norm": 0.3580593178576187, + "learning_rate": 2.5315006537064473e-05, + "loss": 0.996933102607727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2340065836906433, + "step": 557, + "valid_targets_mean": 14046.7, + "valid_targets_min": 4710 + }, + { + "epoch": 2.375266524520256, + "grad_norm": 0.3256183790600885, + "learning_rate": 2.5257677054350927e-05, + "loss": 0.978560209274292, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2356126606464386, + "step": 558, + "valid_targets_mean": 14559.5, + "valid_targets_min": 2243 + }, + { + "epoch": 2.3795309168443497, + "grad_norm": 0.3407321064506055, + "learning_rate": 2.5200301126167857e-05, + "loss": 0.9780471920967102, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23114609718322754, + "step": 559, + "valid_targets_mean": 14566.1, + "valid_targets_min": 7120 + }, + { + "epoch": 2.3837953091684434, + "grad_norm": 0.3583571276732938, + "learning_rate": 2.514287925936492e-05, + "loss": 1.0212950706481934, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2661419212818146, + "step": 560, + "valid_targets_mean": 15004.2, + "valid_targets_min": 4432 + }, + { + "epoch": 2.388059701492537, + "grad_norm": 0.33664147331934247, + "learning_rate": 2.5085411961197626e-05, + "loss": 1.0373973846435547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24310649931430817, + "step": 561, + "valid_targets_mean": 14641.2, + "valid_targets_min": 5183 + }, + { + "epoch": 2.3923240938166312, + "grad_norm": 0.35707958581650207, + "learning_rate": 2.502789973932278e-05, + "loss": 1.0003979206085205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25113213062286377, + "step": 562, + "valid_targets_mean": 14444.3, + "valid_targets_min": 1649 + }, + { + "epoch": 2.396588486140725, + "grad_norm": 0.3209570544747951, + "learning_rate": 2.4970343101794073e-05, + "loss": 1.0033340454101562, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23442038893699646, + "step": 563, + "valid_targets_mean": 13773.7, + "valid_targets_min": 2585 + }, + { + "epoch": 2.4008528784648187, + "grad_norm": 0.34273894903375246, + "learning_rate": 2.4912742557057538e-05, + "loss": 0.9911829233169556, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2385222613811493, + "step": 564, + "valid_targets_mean": 14328.3, + "valid_targets_min": 1192 + }, + { + "epoch": 2.405117270788913, + "grad_norm": 0.30874004275234807, + "learning_rate": 2.485509861394708e-05, + "loss": 1.0349599123001099, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2621227502822876, + "step": 565, + "valid_targets_mean": 14846.8, + "valid_targets_min": 2791 + }, + { + "epoch": 2.4093816631130065, + "grad_norm": 0.28513054852536457, + "learning_rate": 2.4797411781679975e-05, + "loss": 0.9519776701927185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2727797031402588, + "step": 566, + "valid_targets_mean": 14597.2, + "valid_targets_min": 3095 + }, + { + "epoch": 2.4136460554371, + "grad_norm": 0.3681769122591231, + "learning_rate": 2.473968256985238e-05, + "loss": 1.0031790733337402, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2726595401763916, + "step": 567, + "valid_targets_mean": 14863.2, + "valid_targets_min": 5634 + }, + { + "epoch": 2.417910447761194, + "grad_norm": 0.2820332686420988, + "learning_rate": 2.4681911488434825e-05, + "loss": 1.0346243381500244, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23928891122341156, + "step": 568, + "valid_targets_mean": 14235.1, + "valid_targets_min": 3758 + }, + { + "epoch": 2.4221748400852876, + "grad_norm": 0.345959881180222, + "learning_rate": 2.4624099047767702e-05, + "loss": 1.0115197896957397, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2657356262207031, + "step": 569, + "valid_targets_mean": 15144.8, + "valid_targets_min": 6786 + }, + { + "epoch": 2.4264392324093818, + "grad_norm": 0.3438789637735161, + "learning_rate": 2.4566245758556787e-05, + "loss": 0.968398928642273, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23302385210990906, + "step": 570, + "valid_targets_mean": 14613.6, + "valid_targets_min": 2792 + }, + { + "epoch": 2.4307036247334755, + "grad_norm": 0.33811336765989447, + "learning_rate": 2.4508352131868664e-05, + "loss": 1.0073961019515991, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26625654101371765, + "step": 571, + "valid_targets_mean": 14827.8, + "valid_targets_min": 1388 + }, + { + "epoch": 2.434968017057569, + "grad_norm": 0.35683634326397096, + "learning_rate": 2.445041867912629e-05, + "loss": 0.9651147723197937, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25018009543418884, + "step": 572, + "valid_targets_mean": 14830.1, + "valid_targets_min": 4156 + }, + { + "epoch": 2.4392324093816633, + "grad_norm": 0.3524895752220168, + "learning_rate": 2.439244591210443e-05, + "loss": 0.9606080651283264, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25262510776519775, + "step": 573, + "valid_targets_mean": 15141.2, + "valid_targets_min": 7503 + }, + { + "epoch": 2.443496801705757, + "grad_norm": 0.35416817181758903, + "learning_rate": 2.4334434342925133e-05, + "loss": 0.9872428178787231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24746586382389069, + "step": 574, + "valid_targets_mean": 14546.2, + "valid_targets_min": 3910 + }, + { + "epoch": 2.4477611940298507, + "grad_norm": 0.3714939792072648, + "learning_rate": 2.4276384484053227e-05, + "loss": 0.9739286303520203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2626081705093384, + "step": 575, + "valid_targets_mean": 15743.1, + "valid_targets_min": 5391 + }, + { + "epoch": 2.4520255863539444, + "grad_norm": 0.3267786224236743, + "learning_rate": 2.4218296848291795e-05, + "loss": 1.035171389579773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23967371881008148, + "step": 576, + "valid_targets_mean": 14141.6, + "valid_targets_min": 6849 + }, + { + "epoch": 2.4562899786780386, + "grad_norm": 0.37728983603227556, + "learning_rate": 2.4160171948777603e-05, + "loss": 0.9901844263076782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.260466992855072, + "step": 577, + "valid_targets_mean": 15361.8, + "valid_targets_min": 5801 + }, + { + "epoch": 2.4605543710021323, + "grad_norm": 0.34470084170040144, + "learning_rate": 2.410201029897665e-05, + "loss": 1.0634323358535767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2721429765224457, + "step": 578, + "valid_targets_mean": 15740.7, + "valid_targets_min": 11262 + }, + { + "epoch": 2.464818763326226, + "grad_norm": 0.29350111028258313, + "learning_rate": 2.4043812412679532e-05, + "loss": 0.9816789031028748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24625752866268158, + "step": 579, + "valid_targets_mean": 14815.9, + "valid_targets_min": 6065 + }, + { + "epoch": 2.4690831556503197, + "grad_norm": 0.38332974196139513, + "learning_rate": 2.3985578803996985e-05, + "loss": 1.0299735069274902, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2443036288022995, + "step": 580, + "valid_targets_mean": 13771.2, + "valid_targets_min": 1532 + }, + { + "epoch": 2.473347547974414, + "grad_norm": 0.27749887123963046, + "learning_rate": 2.392730998735529e-05, + "loss": 1.0363097190856934, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24988089501857758, + "step": 581, + "valid_targets_mean": 15384.0, + "valid_targets_min": 3886 + }, + { + "epoch": 2.4776119402985075, + "grad_norm": 0.3714639432663393, + "learning_rate": 2.3869006477491755e-05, + "loss": 1.0010910034179688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24456912279129028, + "step": 582, + "valid_targets_mean": 15580.9, + "valid_targets_min": 4483 + }, + { + "epoch": 2.481876332622601, + "grad_norm": 0.2826495723285361, + "learning_rate": 2.381066878945017e-05, + "loss": 0.9860137701034546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2536880075931549, + "step": 583, + "valid_targets_mean": 14908.8, + "valid_targets_min": 2910 + }, + { + "epoch": 2.486140724946695, + "grad_norm": 0.40701249580578086, + "learning_rate": 2.3752297438576257e-05, + "loss": 0.9822453260421753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24656826257705688, + "step": 584, + "valid_targets_mean": 15053.9, + "valid_targets_min": 5225 + }, + { + "epoch": 2.490405117270789, + "grad_norm": 0.3510159174487897, + "learning_rate": 2.3693892940513074e-05, + "loss": 1.0518721342086792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2570648193359375, + "step": 585, + "valid_targets_mean": 14092.7, + "valid_targets_min": 4352 + }, + { + "epoch": 2.4946695095948828, + "grad_norm": 0.3880025590367412, + "learning_rate": 2.3635455811196536e-05, + "loss": 1.0154331922531128, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2572515606880188, + "step": 586, + "valid_targets_mean": 14364.0, + "valid_targets_min": 2248 + }, + { + "epoch": 2.4989339019189765, + "grad_norm": 0.35629403874874577, + "learning_rate": 2.3576986566850796e-05, + "loss": 1.0354433059692383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22509129345417023, + "step": 587, + "valid_targets_mean": 13014.9, + "valid_targets_min": 3563 + }, + { + "epoch": 2.50319829424307, + "grad_norm": 0.40934673917041575, + "learning_rate": 2.351848572398371e-05, + "loss": 0.9651215672492981, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23150211572647095, + "step": 588, + "valid_targets_mean": 13630.2, + "valid_targets_min": 1625 + }, + { + "epoch": 2.5074626865671643, + "grad_norm": 0.29781159312023847, + "learning_rate": 2.3459953799382276e-05, + "loss": 0.9885987639427185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25822120904922485, + "step": 589, + "valid_targets_mean": 14645.2, + "valid_targets_min": 1150 + }, + { + "epoch": 2.511727078891258, + "grad_norm": 0.3673013969280717, + "learning_rate": 2.3401391310108054e-05, + "loss": 1.0021411180496216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27557167410850525, + "step": 590, + "valid_targets_mean": 15001.2, + "valid_targets_min": 2024 + }, + { + "epoch": 2.5159914712153517, + "grad_norm": 0.29338489273964863, + "learning_rate": 2.3342798773492602e-05, + "loss": 0.9634994268417358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2440796196460724, + "step": 591, + "valid_targets_mean": 13855.1, + "valid_targets_min": 5690 + }, + { + "epoch": 2.520255863539446, + "grad_norm": 0.3488525245756821, + "learning_rate": 2.328417670713294e-05, + "loss": 1.0279403924942017, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2383851408958435, + "step": 592, + "valid_targets_mean": 13533.6, + "valid_targets_min": 1542 + }, + { + "epoch": 2.5245202558635396, + "grad_norm": 0.3109864425433428, + "learning_rate": 2.3225525628886918e-05, + "loss": 1.0075408220291138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.236200213432312, + "step": 593, + "valid_targets_mean": 14168.2, + "valid_targets_min": 1389 + }, + { + "epoch": 2.5287846481876333, + "grad_norm": 0.32460960511845355, + "learning_rate": 2.3166846056868687e-05, + "loss": 1.0584495067596436, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.251211017370224, + "step": 594, + "valid_targets_mean": 14296.9, + "valid_targets_min": 2014 + }, + { + "epoch": 2.533049040511727, + "grad_norm": 0.3605753288926691, + "learning_rate": 2.31081385094441e-05, + "loss": 1.050918698310852, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27081745862960815, + "step": 595, + "valid_targets_mean": 14749.0, + "valid_targets_min": 1741 + }, + { + "epoch": 2.5373134328358207, + "grad_norm": 0.30930846078014956, + "learning_rate": 2.304940350522615e-05, + "loss": 0.979073703289032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24194717407226562, + "step": 596, + "valid_targets_mean": 15197.2, + "valid_targets_min": 5355 + }, + { + "epoch": 2.541577825159915, + "grad_norm": 0.3451913732751782, + "learning_rate": 2.299064156307037e-05, + "loss": 0.9640562534332275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2509298622608185, + "step": 597, + "valid_targets_mean": 15269.8, + "valid_targets_min": 6535 + }, + { + "epoch": 2.5458422174840085, + "grad_norm": 0.32132576352199543, + "learning_rate": 2.2931853202070275e-05, + "loss": 1.0127673149108887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24850039184093475, + "step": 598, + "valid_targets_mean": 14430.6, + "valid_targets_min": 5331 + }, + { + "epoch": 2.550106609808102, + "grad_norm": 0.3447521945135351, + "learning_rate": 2.2873038941552724e-05, + "loss": 0.9941070079803467, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24251219630241394, + "step": 599, + "valid_targets_mean": 14965.9, + "valid_targets_min": 5019 + }, + { + "epoch": 2.5543710021321964, + "grad_norm": 0.3603682710041226, + "learning_rate": 2.2814199301073412e-05, + "loss": 0.9789334535598755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23000304400920868, + "step": 600, + "valid_targets_mean": 14331.5, + "valid_targets_min": 2859 + }, + { + "epoch": 2.55863539445629, + "grad_norm": 0.4524316632207512, + "learning_rate": 2.27553348004122e-05, + "loss": 1.0233545303344727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2751467823982239, + "step": 601, + "valid_targets_mean": 14395.6, + "valid_targets_min": 2323 + }, + { + "epoch": 2.5628997867803838, + "grad_norm": 0.3405044476615485, + "learning_rate": 2.2696445959568577e-05, + "loss": 0.987399697303772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2388504594564438, + "step": 602, + "valid_targets_mean": 14543.6, + "valid_targets_min": 4364 + }, + { + "epoch": 2.5671641791044775, + "grad_norm": 0.3664481016997422, + "learning_rate": 2.2637533298757064e-05, + "loss": 1.0295848846435547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2575531601905823, + "step": 603, + "valid_targets_mean": 14556.6, + "valid_targets_min": 5755 + }, + { + "epoch": 2.571428571428571, + "grad_norm": 0.2958680019677503, + "learning_rate": 2.2578597338402567e-05, + "loss": 1.0304653644561768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2653025686740875, + "step": 604, + "valid_targets_mean": 14233.3, + "valid_targets_min": 1448 + }, + { + "epoch": 2.5756929637526653, + "grad_norm": 0.3380173235948929, + "learning_rate": 2.2519638599135844e-05, + "loss": 0.9955521821975708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2450968623161316, + "step": 605, + "valid_targets_mean": 14312.9, + "valid_targets_min": 1957 + }, + { + "epoch": 2.579957356076759, + "grad_norm": 0.2779481999776168, + "learning_rate": 2.2460657601788875e-05, + "loss": 1.0013747215270996, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25416964292526245, + "step": 606, + "valid_targets_mean": 14795.7, + "valid_targets_min": 4826 + }, + { + "epoch": 2.5842217484008527, + "grad_norm": 0.3435486497386558, + "learning_rate": 2.2401654867390256e-05, + "loss": 1.004853367805481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2516235411167145, + "step": 607, + "valid_targets_mean": 14743.6, + "valid_targets_min": 3783 + }, + { + "epoch": 2.588486140724947, + "grad_norm": 0.2929995479537222, + "learning_rate": 2.2342630917160605e-05, + "loss": 0.9981028437614441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2684582471847534, + "step": 608, + "valid_targets_mean": 15170.2, + "valid_targets_min": 10692 + }, + { + "epoch": 2.5927505330490406, + "grad_norm": 0.3834421277554267, + "learning_rate": 2.2283586272507975e-05, + "loss": 1.0357897281646729, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2626771926879883, + "step": 609, + "valid_targets_mean": 14811.3, + "valid_targets_min": 3785 + }, + { + "epoch": 2.5970149253731343, + "grad_norm": 0.29333535838418107, + "learning_rate": 2.2224521455023193e-05, + "loss": 1.023173213005066, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27299946546554565, + "step": 610, + "valid_targets_mean": 14908.2, + "valid_targets_min": 5562 + }, + { + "epoch": 2.6012793176972284, + "grad_norm": 0.36625779396404645, + "learning_rate": 2.216543698647534e-05, + "loss": 1.0014777183532715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23676812648773193, + "step": 611, + "valid_targets_mean": 14785.9, + "valid_targets_min": 7223 + }, + { + "epoch": 2.605543710021322, + "grad_norm": 0.28588149465647306, + "learning_rate": 2.210633338880704e-05, + "loss": 0.9888811707496643, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25012534856796265, + "step": 612, + "valid_targets_mean": 14344.6, + "valid_targets_min": 2032 + }, + { + "epoch": 2.609808102345416, + "grad_norm": 0.32582274237071657, + "learning_rate": 2.204721118412994e-05, + "loss": 0.9636735916137695, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23523156344890594, + "step": 613, + "valid_targets_mean": 15641.1, + "valid_targets_min": 6636 + }, + { + "epoch": 2.6140724946695095, + "grad_norm": 0.2704351653560179, + "learning_rate": 2.1988070894720037e-05, + "loss": 0.9971131086349487, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26915496587753296, + "step": 614, + "valid_targets_mean": 14540.5, + "valid_targets_min": 3303 + }, + { + "epoch": 2.6183368869936032, + "grad_norm": 0.32530372087862747, + "learning_rate": 2.192891304301309e-05, + "loss": 1.0155519247055054, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25380975008010864, + "step": 615, + "valid_targets_mean": 15039.4, + "valid_targets_min": 6067 + }, + { + "epoch": 2.6226012793176974, + "grad_norm": 0.2812902887326392, + "learning_rate": 2.18697381516e-05, + "loss": 0.9872410893440247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2469387799501419, + "step": 616, + "valid_targets_mean": 15058.1, + "valid_targets_min": 2443 + }, + { + "epoch": 2.626865671641791, + "grad_norm": 0.30977938730271676, + "learning_rate": 2.181054674322221e-05, + "loss": 1.0182151794433594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25168952345848083, + "step": 617, + "valid_targets_mean": 14824.7, + "valid_targets_min": 3184 + }, + { + "epoch": 2.631130063965885, + "grad_norm": 0.32384586042183855, + "learning_rate": 2.1751339340767043e-05, + "loss": 0.9708200097084045, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25685781240463257, + "step": 618, + "valid_targets_mean": 14443.2, + "valid_targets_min": 4836 + }, + { + "epoch": 2.635394456289979, + "grad_norm": 0.28582353916291997, + "learning_rate": 2.169211646726313e-05, + "loss": 1.0166131258010864, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24662283062934875, + "step": 619, + "valid_targets_mean": 14274.7, + "valid_targets_min": 1403 + }, + { + "epoch": 2.6396588486140726, + "grad_norm": 0.34566164860426407, + "learning_rate": 2.163287864587576e-05, + "loss": 1.012539029121399, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2719329297542572, + "step": 620, + "valid_targets_mean": 15074.4, + "valid_targets_min": 2880 + }, + { + "epoch": 2.6439232409381663, + "grad_norm": 0.28191692959061687, + "learning_rate": 2.157362639990229e-05, + "loss": 0.9720137119293213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2529488205909729, + "step": 621, + "valid_targets_mean": 14954.9, + "valid_targets_min": 4040 + }, + { + "epoch": 2.64818763326226, + "grad_norm": 0.32181494490315404, + "learning_rate": 2.151436025276747e-05, + "loss": 1.0196996927261353, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2558142840862274, + "step": 622, + "valid_targets_mean": 14778.2, + "valid_targets_min": 6227 + }, + { + "epoch": 2.6524520255863537, + "grad_norm": 0.3193343120806747, + "learning_rate": 2.145508072801888e-05, + "loss": 1.0273431539535522, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2799755334854126, + "step": 623, + "valid_targets_mean": 15236.8, + "valid_targets_min": 8987 + }, + { + "epoch": 2.656716417910448, + "grad_norm": 0.2959598694056831, + "learning_rate": 2.1395788349322256e-05, + "loss": 1.0347175598144531, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2716575860977173, + "step": 624, + "valid_targets_mean": 14910.9, + "valid_targets_min": 1083 + }, + { + "epoch": 2.6609808102345416, + "grad_norm": 0.32615860148425246, + "learning_rate": 2.133648364045689e-05, + "loss": 0.9719746112823486, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21385571360588074, + "step": 625, + "valid_targets_mean": 13264.8, + "valid_targets_min": 1699 + }, + { + "epoch": 2.6652452025586353, + "grad_norm": 0.3092815072790684, + "learning_rate": 2.1277167125310996e-05, + "loss": 0.9729279279708862, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24361443519592285, + "step": 626, + "valid_targets_mean": 14669.8, + "valid_targets_min": 2210 + }, + { + "epoch": 2.6695095948827294, + "grad_norm": 0.3171929824255565, + "learning_rate": 2.1217839327877098e-05, + "loss": 0.9938591718673706, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2757403254508972, + "step": 627, + "valid_targets_mean": 14886.6, + "valid_targets_min": 3979 + }, + { + "epoch": 2.673773987206823, + "grad_norm": 0.32162050849555684, + "learning_rate": 2.1158500772247352e-05, + "loss": 1.0283288955688477, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24078741669654846, + "step": 628, + "valid_targets_mean": 15458.4, + "valid_targets_min": 9633 + }, + { + "epoch": 2.678038379530917, + "grad_norm": 0.2955236736778956, + "learning_rate": 2.1099151982608985e-05, + "loss": 0.996048092842102, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23683753609657288, + "step": 629, + "valid_targets_mean": 13504.6, + "valid_targets_min": 2559 + }, + { + "epoch": 2.6823027718550105, + "grad_norm": 0.2607480919393273, + "learning_rate": 2.1039793483239607e-05, + "loss": 1.012598991394043, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2531105577945709, + "step": 630, + "valid_targets_mean": 15036.3, + "valid_targets_min": 4520 + }, + { + "epoch": 2.6865671641791042, + "grad_norm": 0.3023500155803285, + "learning_rate": 2.0980425798502616e-05, + "loss": 0.9994162321090698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25880444049835205, + "step": 631, + "valid_targets_mean": 14533.3, + "valid_targets_min": 4140 + }, + { + "epoch": 2.6908315565031984, + "grad_norm": 0.25438203117222763, + "learning_rate": 2.092104945284255e-05, + "loss": 0.9778115153312683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25283128023147583, + "step": 632, + "valid_targets_mean": 14683.9, + "valid_targets_min": 3664 + }, + { + "epoch": 2.695095948827292, + "grad_norm": 0.2830217534754341, + "learning_rate": 2.0861664970780434e-05, + "loss": 1.0162835121154785, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2786669135093689, + "step": 633, + "valid_targets_mean": 14680.2, + "valid_targets_min": 2295 + }, + { + "epoch": 2.699360341151386, + "grad_norm": 0.2764232598639447, + "learning_rate": 2.08022728769092e-05, + "loss": 1.0076497793197632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2617707848548889, + "step": 634, + "valid_targets_mean": 14868.3, + "valid_targets_min": 8834 + }, + { + "epoch": 2.70362473347548, + "grad_norm": 0.27947688140337634, + "learning_rate": 2.0742873695889005e-05, + "loss": 0.9848247766494751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21173720061779022, + "step": 635, + "valid_targets_mean": 13743.9, + "valid_targets_min": 2490 + }, + { + "epoch": 2.7078891257995736, + "grad_norm": 0.2950208095330011, + "learning_rate": 2.0683467952442626e-05, + "loss": 1.0044206380844116, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25531816482543945, + "step": 636, + "valid_targets_mean": 14205.1, + "valid_targets_min": 1530 + }, + { + "epoch": 2.7121535181236673, + "grad_norm": 0.31921787069887186, + "learning_rate": 2.0624056171350785e-05, + "loss": 1.0012754201889038, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2538597881793976, + "step": 637, + "valid_targets_mean": 14988.6, + "valid_targets_min": 1570 + }, + { + "epoch": 2.716417910447761, + "grad_norm": 0.29814243793990697, + "learning_rate": 2.0564638877447566e-05, + "loss": 1.0156222581863403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2510865330696106, + "step": 638, + "valid_targets_mean": 14630.3, + "valid_targets_min": 1235 + }, + { + "epoch": 2.7206823027718547, + "grad_norm": 0.36746929301245546, + "learning_rate": 2.0505216595615742e-05, + "loss": 1.0245939493179321, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2569143772125244, + "step": 639, + "valid_targets_mean": 15198.6, + "valid_targets_min": 7587 + }, + { + "epoch": 2.724946695095949, + "grad_norm": 0.29201653474293837, + "learning_rate": 2.044578985078215e-05, + "loss": 1.0242359638214111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24316444993019104, + "step": 640, + "valid_targets_mean": 13845.4, + "valid_targets_min": 2933 + }, + { + "epoch": 2.7292110874200426, + "grad_norm": 0.3447320951372601, + "learning_rate": 2.0386359167913046e-05, + "loss": 0.9643347263336182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24850095808506012, + "step": 641, + "valid_targets_mean": 15607.5, + "valid_targets_min": 12085 + }, + { + "epoch": 2.7334754797441363, + "grad_norm": 0.29140710920784874, + "learning_rate": 2.0326925072009485e-05, + "loss": 0.9665999412536621, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23093298077583313, + "step": 642, + "valid_targets_mean": 14645.0, + "valid_targets_min": 3174 + }, + { + "epoch": 2.7377398720682304, + "grad_norm": 0.30881055312250094, + "learning_rate": 2.0267488088102657e-05, + "loss": 1.0037893056869507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23332089185714722, + "step": 643, + "valid_targets_mean": 15101.0, + "valid_targets_min": 6402 + }, + { + "epoch": 2.742004264392324, + "grad_norm": 0.28449112886998956, + "learning_rate": 2.0208048741249288e-05, + "loss": 0.9691751003265381, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25266605615615845, + "step": 644, + "valid_targets_mean": 15077.2, + "valid_targets_min": 9108 + }, + { + "epoch": 2.746268656716418, + "grad_norm": 0.3351438852568685, + "learning_rate": 2.014860755652695e-05, + "loss": 1.047579288482666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2878621518611908, + "step": 645, + "valid_targets_mean": 15099.8, + "valid_targets_min": 6838 + }, + { + "epoch": 2.750533049040512, + "grad_norm": 0.2925694919153366, + "learning_rate": 2.0089165059029477e-05, + "loss": 1.0120222568511963, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2626033425331116, + "step": 646, + "valid_targets_mean": 15174.9, + "valid_targets_min": 4873 + }, + { + "epoch": 2.7547974413646057, + "grad_norm": 0.30476874327327724, + "learning_rate": 2.0029721773862277e-05, + "loss": 0.9922143220901489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24901431798934937, + "step": 647, + "valid_targets_mean": 13627.9, + "valid_targets_min": 1811 + }, + { + "epoch": 2.7590618336886994, + "grad_norm": 0.2861044184251968, + "learning_rate": 1.997027822613773e-05, + "loss": 0.9913243055343628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22186391055583954, + "step": 648, + "valid_targets_mean": 13473.2, + "valid_targets_min": 5255 + }, + { + "epoch": 2.763326226012793, + "grad_norm": 0.3215467503039592, + "learning_rate": 1.9910834940970533e-05, + "loss": 1.0141838788986206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2568804621696472, + "step": 649, + "valid_targets_mean": 13940.1, + "valid_targets_min": 1938 + }, + { + "epoch": 2.767590618336887, + "grad_norm": 0.28526218260612457, + "learning_rate": 1.985139244347305e-05, + "loss": 1.024746298789978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2506389915943146, + "step": 650, + "valid_targets_mean": 14478.3, + "valid_targets_min": 1684 + }, + { + "epoch": 2.771855010660981, + "grad_norm": 0.3255391066375804, + "learning_rate": 1.979195125875072e-05, + "loss": 1.0240997076034546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.275799959897995, + "step": 651, + "valid_targets_mean": 14935.5, + "valid_targets_min": 6628 + }, + { + "epoch": 2.7761194029850746, + "grad_norm": 0.2913341418155478, + "learning_rate": 1.9732511911897353e-05, + "loss": 0.9887863397598267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2484378069639206, + "step": 652, + "valid_targets_mean": 14193.0, + "valid_targets_min": 2212 + }, + { + "epoch": 2.7803837953091683, + "grad_norm": 0.26002973612839325, + "learning_rate": 1.9673074927990525e-05, + "loss": 0.9766459465026855, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21592479944229126, + "step": 653, + "valid_targets_mean": 13946.5, + "valid_targets_min": 2227 + }, + { + "epoch": 2.7846481876332625, + "grad_norm": 0.3011939249051517, + "learning_rate": 1.9613640832086957e-05, + "loss": 1.017755150794983, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2458583414554596, + "step": 654, + "valid_targets_mean": 14395.0, + "valid_targets_min": 2255 + }, + { + "epoch": 2.788912579957356, + "grad_norm": 0.27094161767688085, + "learning_rate": 1.9554210149217855e-05, + "loss": 0.9604583978652954, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23105964064598083, + "step": 655, + "valid_targets_mean": 14670.8, + "valid_targets_min": 1239 + }, + { + "epoch": 2.79317697228145, + "grad_norm": 0.28745214224753945, + "learning_rate": 1.9494783404384265e-05, + "loss": 0.9861606955528259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2513197660446167, + "step": 656, + "valid_targets_mean": 13690.4, + "valid_targets_min": 2574 + }, + { + "epoch": 2.7974413646055436, + "grad_norm": 0.2694350691725643, + "learning_rate": 1.9435361122552437e-05, + "loss": 1.0003072023391724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26699090003967285, + "step": 657, + "valid_targets_mean": 15698.2, + "valid_targets_min": 11218 + }, + { + "epoch": 2.8017057569296373, + "grad_norm": 0.30467579312383347, + "learning_rate": 1.9375943828649215e-05, + "loss": 1.0127203464508057, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24697598814964294, + "step": 658, + "valid_targets_mean": 13997.6, + "valid_targets_min": 4737 + }, + { + "epoch": 2.8059701492537314, + "grad_norm": 0.2986641340060596, + "learning_rate": 1.9316532047557378e-05, + "loss": 0.9938777089118958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2532762885093689, + "step": 659, + "valid_targets_mean": 13795.0, + "valid_targets_min": 3402 + }, + { + "epoch": 2.810234541577825, + "grad_norm": 0.27330007093437675, + "learning_rate": 1.9257126304110998e-05, + "loss": 0.98386549949646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24930505454540253, + "step": 660, + "valid_targets_mean": 14952.9, + "valid_targets_min": 6734 + }, + { + "epoch": 2.814498933901919, + "grad_norm": 0.3238728707341304, + "learning_rate": 1.919772712309081e-05, + "loss": 1.0038477182388306, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22611001133918762, + "step": 661, + "valid_targets_mean": 14572.7, + "valid_targets_min": 1356 + }, + { + "epoch": 2.818763326226013, + "grad_norm": 0.274903170095313, + "learning_rate": 1.9138335029219572e-05, + "loss": 0.9993883371353149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.253467321395874, + "step": 662, + "valid_targets_mean": 15381.4, + "valid_targets_min": 4878 + }, + { + "epoch": 2.8230277185501067, + "grad_norm": 0.30407654470800843, + "learning_rate": 1.9078950547157458e-05, + "loss": 1.0459842681884766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2509838938713074, + "step": 663, + "valid_targets_mean": 14729.2, + "valid_targets_min": 5173 + }, + { + "epoch": 2.8272921108742004, + "grad_norm": 0.3061123867070012, + "learning_rate": 1.9019574201497387e-05, + "loss": 0.9924187064170837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2563045620918274, + "step": 664, + "valid_targets_mean": 14750.4, + "valid_targets_min": 5714 + }, + { + "epoch": 2.831556503198294, + "grad_norm": 0.30193406567165587, + "learning_rate": 1.8960206516760396e-05, + "loss": 1.0398309230804443, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2820361256599426, + "step": 665, + "valid_targets_mean": 15179.4, + "valid_targets_min": 9622 + }, + { + "epoch": 2.835820895522388, + "grad_norm": 0.2929563961568466, + "learning_rate": 1.890084801739102e-05, + "loss": 0.9553192853927612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2163141965866089, + "step": 666, + "valid_targets_mean": 14456.8, + "valid_targets_min": 1677 + }, + { + "epoch": 2.840085287846482, + "grad_norm": 0.3091559645635843, + "learning_rate": 1.884149922775265e-05, + "loss": 0.9848713874816895, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2401236593723297, + "step": 667, + "valid_targets_mean": 14748.1, + "valid_targets_min": 7099 + }, + { + "epoch": 2.8443496801705757, + "grad_norm": 0.30089460795798023, + "learning_rate": 1.878216067212291e-05, + "loss": 1.0005512237548828, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23389124870300293, + "step": 668, + "valid_targets_mean": 14425.2, + "valid_targets_min": 5268 + }, + { + "epoch": 2.8486140724946694, + "grad_norm": 0.3261962138625656, + "learning_rate": 1.8722832874689007e-05, + "loss": 1.0311071872711182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2664957046508789, + "step": 669, + "valid_targets_mean": 15614.3, + "valid_targets_min": 11742 + }, + { + "epoch": 2.8528784648187635, + "grad_norm": 0.2878368826066572, + "learning_rate": 1.8663516359543123e-05, + "loss": 0.9968549013137817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24377457797527313, + "step": 670, + "valid_targets_mean": 13797.7, + "valid_targets_min": 2331 + }, + { + "epoch": 2.857142857142857, + "grad_norm": 0.31838290235824035, + "learning_rate": 1.860421165067775e-05, + "loss": 1.0099412202835083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24596422910690308, + "step": 671, + "valid_targets_mean": 15301.4, + "valid_targets_min": 3307 + }, + { + "epoch": 2.861407249466951, + "grad_norm": 0.2754651147467406, + "learning_rate": 1.8544919271981125e-05, + "loss": 0.9846042394638062, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23170757293701172, + "step": 672, + "valid_targets_mean": 14041.2, + "valid_targets_min": 2267 + }, + { + "epoch": 2.8656716417910446, + "grad_norm": 0.32847468922418677, + "learning_rate": 1.8485639747232535e-05, + "loss": 1.005096435546875, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25937849283218384, + "step": 673, + "valid_targets_mean": 15096.3, + "valid_targets_min": 9399 + }, + { + "epoch": 2.8699360341151388, + "grad_norm": 0.30458243305632854, + "learning_rate": 1.8426373600097723e-05, + "loss": 1.0177894830703735, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25794124603271484, + "step": 674, + "valid_targets_mean": 15325.4, + "valid_targets_min": 8396 + }, + { + "epoch": 2.8742004264392325, + "grad_norm": 0.3261755820068459, + "learning_rate": 1.836712135412424e-05, + "loss": 1.002445101737976, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25215405225753784, + "step": 675, + "valid_targets_mean": 15016.1, + "valid_targets_min": 7838 + }, + { + "epoch": 2.878464818763326, + "grad_norm": 0.2997312968171682, + "learning_rate": 1.8307883532736878e-05, + "loss": 1.0102388858795166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24711883068084717, + "step": 676, + "valid_targets_mean": 15054.6, + "valid_targets_min": 3859 + }, + { + "epoch": 2.88272921108742, + "grad_norm": 0.2929198430346057, + "learning_rate": 1.8248660659232964e-05, + "loss": 0.9696129560470581, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23157832026481628, + "step": 677, + "valid_targets_mean": 14770.5, + "valid_targets_min": 5402 + }, + { + "epoch": 2.886993603411514, + "grad_norm": 0.28073241284593736, + "learning_rate": 1.8189453256777798e-05, + "loss": 1.013519525527954, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25993987917900085, + "step": 678, + "valid_targets_mean": 14648.9, + "valid_targets_min": 4446 + }, + { + "epoch": 2.8912579957356077, + "grad_norm": 0.2946695587948662, + "learning_rate": 1.8130261848399996e-05, + "loss": 0.9844130277633667, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2361820936203003, + "step": 679, + "valid_targets_mean": 14444.9, + "valid_targets_min": 3093 + }, + { + "epoch": 2.8955223880597014, + "grad_norm": 0.28079565484091723, + "learning_rate": 1.8071086956986916e-05, + "loss": 0.956390380859375, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22418171167373657, + "step": 680, + "valid_targets_mean": 13660.1, + "valid_targets_min": 1932 + }, + { + "epoch": 2.8997867803837956, + "grad_norm": 0.2801289330593702, + "learning_rate": 1.8011929105279967e-05, + "loss": 0.9912029504776001, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2350948601961136, + "step": 681, + "valid_targets_mean": 14456.2, + "valid_targets_min": 5121 + }, + { + "epoch": 2.9040511727078893, + "grad_norm": 0.27325381262770354, + "learning_rate": 1.795278881587007e-05, + "loss": 0.9531004428863525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24051615595817566, + "step": 682, + "valid_targets_mean": 14905.6, + "valid_targets_min": 1896 + }, + { + "epoch": 2.908315565031983, + "grad_norm": 0.2986474799033835, + "learning_rate": 1.7893666611192962e-05, + "loss": 1.0013391971588135, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2637110948562622, + "step": 683, + "valid_targets_mean": 14420.5, + "valid_targets_min": 6043 + }, + { + "epoch": 2.9125799573560767, + "grad_norm": 0.2625521170168768, + "learning_rate": 1.783456301352467e-05, + "loss": 1.032405972480774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2396593987941742, + "step": 684, + "valid_targets_mean": 14147.8, + "valid_targets_min": 2852 + }, + { + "epoch": 2.9168443496801704, + "grad_norm": 0.27774105361299584, + "learning_rate": 1.7775478544976813e-05, + "loss": 0.9582983255386353, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24214479327201843, + "step": 685, + "valid_targets_mean": 14339.9, + "valid_targets_min": 1861 + }, + { + "epoch": 2.9211087420042645, + "grad_norm": 0.309414666717297, + "learning_rate": 1.7716413727492035e-05, + "loss": 1.0138694047927856, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2687375843524933, + "step": 686, + "valid_targets_mean": 14603.4, + "valid_targets_min": 4177 + }, + { + "epoch": 2.925373134328358, + "grad_norm": 0.2813401428555624, + "learning_rate": 1.7657369082839392e-05, + "loss": 1.0201051235198975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24503937363624573, + "step": 687, + "valid_targets_mean": 14408.7, + "valid_targets_min": 6466 + }, + { + "epoch": 2.929637526652452, + "grad_norm": 0.2780914157152459, + "learning_rate": 1.7598345132609747e-05, + "loss": 1.0258854627609253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24974925816059113, + "step": 688, + "valid_targets_mean": 14399.9, + "valid_targets_min": 2770 + }, + { + "epoch": 2.933901918976546, + "grad_norm": 0.27826477070991834, + "learning_rate": 1.7539342398211132e-05, + "loss": 1.002000331878662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.269614040851593, + "step": 689, + "valid_targets_mean": 14672.0, + "valid_targets_min": 2071 + }, + { + "epoch": 2.9381663113006398, + "grad_norm": 0.26151797799696985, + "learning_rate": 1.748036140086416e-05, + "loss": 1.014590859413147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2685566246509552, + "step": 690, + "valid_targets_mean": 15134.9, + "valid_targets_min": 2612 + }, + { + "epoch": 2.9424307036247335, + "grad_norm": 0.27864426558205724, + "learning_rate": 1.742140266159744e-05, + "loss": 0.9970508813858032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24841861426830292, + "step": 691, + "valid_targets_mean": 14269.2, + "valid_targets_min": 1758 + }, + { + "epoch": 2.946695095948827, + "grad_norm": 0.25963872761453904, + "learning_rate": 1.7362466701242943e-05, + "loss": 1.0036523342132568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23161643743515015, + "step": 692, + "valid_targets_mean": 13665.4, + "valid_targets_min": 1414 + }, + { + "epoch": 2.950959488272921, + "grad_norm": 0.2715714971551824, + "learning_rate": 1.7303554040431426e-05, + "loss": 1.000980019569397, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23404458165168762, + "step": 693, + "valid_targets_mean": 14589.4, + "valid_targets_min": 7691 + }, + { + "epoch": 2.955223880597015, + "grad_norm": 0.2895240322777501, + "learning_rate": 1.7244665199587812e-05, + "loss": 0.9945222735404968, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2587818503379822, + "step": 694, + "valid_targets_mean": 15091.8, + "valid_targets_min": 7829 + }, + { + "epoch": 2.9594882729211087, + "grad_norm": 0.2491244470969691, + "learning_rate": 1.7185800698926594e-05, + "loss": 1.0258584022521973, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2739405632019043, + "step": 695, + "valid_targets_mean": 14661.2, + "valid_targets_min": 10498 + }, + { + "epoch": 2.9637526652452024, + "grad_norm": 0.2577629779341349, + "learning_rate": 1.7126961058447276e-05, + "loss": 0.9935309886932373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23259735107421875, + "step": 696, + "valid_targets_mean": 15374.6, + "valid_targets_min": 7090 + }, + { + "epoch": 2.9680170575692966, + "grad_norm": 0.2610574679004603, + "learning_rate": 1.706814679792973e-05, + "loss": 1.0331902503967285, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25708138942718506, + "step": 697, + "valid_targets_mean": 14630.6, + "valid_targets_min": 1022 + }, + { + "epoch": 2.9722814498933903, + "grad_norm": 0.2635939674737759, + "learning_rate": 1.7009358436929632e-05, + "loss": 0.984321653842926, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23862984776496887, + "step": 698, + "valid_targets_mean": 14091.2, + "valid_targets_min": 2014 + }, + { + "epoch": 2.976545842217484, + "grad_norm": 0.2680053987936683, + "learning_rate": 1.6950596494773855e-05, + "loss": 1.0147403478622437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2726633548736572, + "step": 699, + "valid_targets_mean": 15212.6, + "valid_targets_min": 3210 + }, + { + "epoch": 2.9808102345415777, + "grad_norm": 0.2753441804695818, + "learning_rate": 1.6891861490555906e-05, + "loss": 1.0429158210754395, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2542388141155243, + "step": 700, + "valid_targets_mean": 13402.4, + "valid_targets_min": 4405 + }, + { + "epoch": 2.9850746268656714, + "grad_norm": 0.29212240811701157, + "learning_rate": 1.683315394313132e-05, + "loss": 0.9809648990631104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24427923560142517, + "step": 701, + "valid_targets_mean": 14898.9, + "valid_targets_min": 1376 + }, + { + "epoch": 2.9893390191897655, + "grad_norm": 0.2847232073089366, + "learning_rate": 1.677447437111309e-05, + "loss": 0.9754581451416016, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2438046932220459, + "step": 702, + "valid_targets_mean": 13953.7, + "valid_targets_min": 3392 + }, + { + "epoch": 2.9936034115138592, + "grad_norm": 0.26259731802338676, + "learning_rate": 1.671582329286707e-05, + "loss": 1.025207281112671, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27571746706962585, + "step": 703, + "valid_targets_mean": 14422.6, + "valid_targets_min": 1137 + }, + { + "epoch": 2.997867803837953, + "grad_norm": 0.28446340160710176, + "learning_rate": 1.66572012265074e-05, + "loss": 1.0176316499710083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2568015456199646, + "step": 704, + "valid_targets_mean": 14873.5, + "valid_targets_min": 8982 + }, + { + "epoch": 3.0, + "grad_norm": 0.329072326861756, + "learning_rate": 1.6598608689891953e-05, + "loss": 0.9181491136550903, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40637677907943726, + "step": 705, + "valid_targets_mean": 14252.5, + "valid_targets_min": 714 + }, + { + "epoch": 3.0042643923240937, + "grad_norm": 0.3417539224541602, + "learning_rate": 1.654004620061773e-05, + "loss": 0.9819753170013428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25378790497779846, + "step": 706, + "valid_targets_mean": 14935.4, + "valid_targets_min": 2227 + }, + { + "epoch": 3.008528784648188, + "grad_norm": 0.29505060833606567, + "learning_rate": 1.6481514276016297e-05, + "loss": 0.956446647644043, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22329074144363403, + "step": 707, + "valid_targets_mean": 13370.4, + "valid_targets_min": 1780 + }, + { + "epoch": 3.0127931769722816, + "grad_norm": 0.33835427633326093, + "learning_rate": 1.6423013433149207e-05, + "loss": 0.9884039163589478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23932109773159027, + "step": 708, + "valid_targets_mean": 14046.8, + "valid_targets_min": 1756 + }, + { + "epoch": 3.0170575692963753, + "grad_norm": 0.261647860420719, + "learning_rate": 1.636454418880347e-05, + "loss": 0.9924356937408447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24471953511238098, + "step": 709, + "valid_targets_mean": 14270.6, + "valid_targets_min": 1366 + }, + { + "epoch": 3.021321961620469, + "grad_norm": 0.32856814358729175, + "learning_rate": 1.630610705948693e-05, + "loss": 0.9875960350036621, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24989363551139832, + "step": 710, + "valid_targets_mean": 15469.6, + "valid_targets_min": 1397 + }, + { + "epoch": 3.025586353944563, + "grad_norm": 0.2696120998027373, + "learning_rate": 1.6247702561423753e-05, + "loss": 1.0232791900634766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24895143508911133, + "step": 711, + "valid_targets_mean": 14785.2, + "valid_targets_min": 2659 + }, + { + "epoch": 3.029850746268657, + "grad_norm": 0.36040449956817955, + "learning_rate": 1.6189331210549828e-05, + "loss": 0.9841296076774597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2596629858016968, + "step": 712, + "valid_targets_mean": 14634.5, + "valid_targets_min": 6716 + }, + { + "epoch": 3.0341151385927505, + "grad_norm": 0.2838400723401922, + "learning_rate": 1.613099352250825e-05, + "loss": 0.9826107025146484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2429734766483307, + "step": 713, + "valid_targets_mean": 14157.4, + "valid_targets_min": 1760 + }, + { + "epoch": 3.038379530916844, + "grad_norm": 0.32517783745240114, + "learning_rate": 1.6072690012644717e-05, + "loss": 0.9862861633300781, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24178437888622284, + "step": 714, + "valid_targets_mean": 15151.0, + "valid_targets_min": 8214 + }, + { + "epoch": 3.0426439232409384, + "grad_norm": 0.3017129052830993, + "learning_rate": 1.6014421196003022e-05, + "loss": 0.9499567747116089, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2725258469581604, + "step": 715, + "valid_targets_mean": 15267.2, + "valid_targets_min": 6651 + }, + { + "epoch": 3.046908315565032, + "grad_norm": 0.3536692325479161, + "learning_rate": 1.5956187587320468e-05, + "loss": 0.9644232988357544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2405238002538681, + "step": 716, + "valid_targets_mean": 15007.3, + "valid_targets_min": 6623 + }, + { + "epoch": 3.0511727078891258, + "grad_norm": 0.31421883625625663, + "learning_rate": 1.5897989701023355e-05, + "loss": 0.9658553600311279, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24571488797664642, + "step": 717, + "valid_targets_mean": 14385.4, + "valid_targets_min": 2839 + }, + { + "epoch": 3.0554371002132195, + "grad_norm": 0.31629713516785807, + "learning_rate": 1.58398280512224e-05, + "loss": 0.979788064956665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.253682017326355, + "step": 718, + "valid_targets_mean": 14743.2, + "valid_targets_min": 3428 + }, + { + "epoch": 3.0597014925373136, + "grad_norm": 0.34674368334551675, + "learning_rate": 1.5781703151708215e-05, + "loss": 0.9843800067901611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2317553162574768, + "step": 719, + "valid_targets_mean": 13908.6, + "valid_targets_min": 2140 + }, + { + "epoch": 3.0639658848614073, + "grad_norm": 0.2913863501730682, + "learning_rate": 1.5723615515946773e-05, + "loss": 0.9665867686271667, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23495791852474213, + "step": 720, + "valid_targets_mean": 13845.8, + "valid_targets_min": 2723 + }, + { + "epoch": 3.068230277185501, + "grad_norm": 0.37921521385833407, + "learning_rate": 1.5665565657074874e-05, + "loss": 0.9476820230484009, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2518354654312134, + "step": 721, + "valid_targets_mean": 14853.0, + "valid_targets_min": 4328 + }, + { + "epoch": 3.0724946695095947, + "grad_norm": 0.3061956068087769, + "learning_rate": 1.560755408789558e-05, + "loss": 0.9628287553787231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21817487478256226, + "step": 722, + "valid_targets_mean": 12729.1, + "valid_targets_min": 2303 + }, + { + "epoch": 3.076759061833689, + "grad_norm": 0.378303072054375, + "learning_rate": 1.5549581320873715e-05, + "loss": 1.0098230838775635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2575799226760864, + "step": 723, + "valid_targets_mean": 15057.9, + "valid_targets_min": 7488 + }, + { + "epoch": 3.0810234541577826, + "grad_norm": 0.2890372455815842, + "learning_rate": 1.5491647868131343e-05, + "loss": 1.0204254388809204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26172223687171936, + "step": 724, + "valid_targets_mean": 15596.0, + "valid_targets_min": 7283 + }, + { + "epoch": 3.0852878464818763, + "grad_norm": 0.3420399710935773, + "learning_rate": 1.5433754241443223e-05, + "loss": 0.9512300491333008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2369207739830017, + "step": 725, + "valid_targets_mean": 14835.4, + "valid_targets_min": 6151 + }, + { + "epoch": 3.08955223880597, + "grad_norm": 0.34038150397640266, + "learning_rate": 1.53759009522323e-05, + "loss": 1.0124759674072266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2753083109855652, + "step": 726, + "valid_targets_mean": 15197.3, + "valid_targets_min": 8987 + }, + { + "epoch": 3.093816631130064, + "grad_norm": 0.3100192375950978, + "learning_rate": 1.5318088511565185e-05, + "loss": 1.0002617835998535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21985961496829987, + "step": 727, + "valid_targets_mean": 14557.7, + "valid_targets_min": 4288 + }, + { + "epoch": 3.098081023454158, + "grad_norm": 0.33281032554112766, + "learning_rate": 1.5260317430147627e-05, + "loss": 0.9853769540786743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2552061975002289, + "step": 728, + "valid_targets_mean": 15146.8, + "valid_targets_min": 6993 + }, + { + "epoch": 3.1023454157782515, + "grad_norm": 0.32340051408977954, + "learning_rate": 1.5202588218320024e-05, + "loss": 1.0069472789764404, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2594277858734131, + "step": 729, + "valid_targets_mean": 14658.7, + "valid_targets_min": 9374 + }, + { + "epoch": 3.106609808102345, + "grad_norm": 0.3459546909784368, + "learning_rate": 1.5144901386052924e-05, + "loss": 0.988615870475769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2513861358165741, + "step": 730, + "valid_targets_mean": 14178.2, + "valid_targets_min": 1157 + }, + { + "epoch": 3.1108742004264394, + "grad_norm": 0.30440414834422264, + "learning_rate": 1.5087257442942467e-05, + "loss": 0.9952294230461121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24073612689971924, + "step": 731, + "valid_targets_mean": 14035.7, + "valid_targets_min": 2033 + }, + { + "epoch": 3.115138592750533, + "grad_norm": 0.3283844116398468, + "learning_rate": 1.502965689820593e-05, + "loss": 0.9941107034683228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2571861147880554, + "step": 732, + "valid_targets_mean": 14857.7, + "valid_targets_min": 5823 + }, + { + "epoch": 3.1194029850746268, + "grad_norm": 0.31863785306298104, + "learning_rate": 1.4972100260677222e-05, + "loss": 0.9742693901062012, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2496413141489029, + "step": 733, + "valid_targets_mean": 15568.8, + "valid_targets_min": 9668 + }, + { + "epoch": 3.1236673773987205, + "grad_norm": 0.2980907358091392, + "learning_rate": 1.4914588038802383e-05, + "loss": 0.953710675239563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25917893648147583, + "step": 734, + "valid_targets_mean": 15243.9, + "valid_targets_min": 7651 + }, + { + "epoch": 3.1279317697228146, + "grad_norm": 0.3154771559803136, + "learning_rate": 1.4857120740635084e-05, + "loss": 0.9636905193328857, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2144869565963745, + "step": 735, + "valid_targets_mean": 14190.0, + "valid_targets_min": 3093 + }, + { + "epoch": 3.1321961620469083, + "grad_norm": 0.283184831707923, + "learning_rate": 1.4799698873832153e-05, + "loss": 0.970219612121582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24540294706821442, + "step": 736, + "valid_targets_mean": 14513.7, + "valid_targets_min": 4489 + }, + { + "epoch": 3.136460554371002, + "grad_norm": 0.2974204072610502, + "learning_rate": 1.4742322945649073e-05, + "loss": 0.9687181711196899, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23797650635242462, + "step": 737, + "valid_targets_mean": 15115.2, + "valid_targets_min": 3630 + }, + { + "epoch": 3.140724946695096, + "grad_norm": 0.25739824883202206, + "learning_rate": 1.4684993462935532e-05, + "loss": 1.0161409378051758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24516227841377258, + "step": 738, + "valid_targets_mean": 13897.5, + "valid_targets_min": 2132 + }, + { + "epoch": 3.14498933901919, + "grad_norm": 0.2999031415958146, + "learning_rate": 1.462771093213092e-05, + "loss": 0.9935801029205322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2576490640640259, + "step": 739, + "valid_targets_mean": 14842.8, + "valid_targets_min": 4455 + }, + { + "epoch": 3.1492537313432836, + "grad_norm": 0.29360540339054153, + "learning_rate": 1.4570475859259856e-05, + "loss": 1.0185129642486572, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25551605224609375, + "step": 740, + "valid_targets_mean": 14303.8, + "valid_targets_min": 3999 + }, + { + "epoch": 3.1535181236673773, + "grad_norm": 0.2819035828390607, + "learning_rate": 1.4513288749927714e-05, + "loss": 0.9626775979995728, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24636633694171906, + "step": 741, + "valid_targets_mean": 14189.3, + "valid_targets_min": 7031 + }, + { + "epoch": 3.1577825159914714, + "grad_norm": 0.28144076026143533, + "learning_rate": 1.4456150109316192e-05, + "loss": 1.010709285736084, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2493475377559662, + "step": 742, + "valid_targets_mean": 15032.7, + "valid_targets_min": 6873 + }, + { + "epoch": 3.162046908315565, + "grad_norm": 0.29620172180454607, + "learning_rate": 1.4399060442178798e-05, + "loss": 1.0011711120605469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24875086545944214, + "step": 743, + "valid_targets_mean": 14197.8, + "valid_targets_min": 2689 + }, + { + "epoch": 3.166311300639659, + "grad_norm": 0.27460647158921325, + "learning_rate": 1.4342020252836437e-05, + "loss": 0.9482408165931702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23761501908302307, + "step": 744, + "valid_targets_mean": 14842.0, + "valid_targets_min": 4959 + }, + { + "epoch": 3.1705756929637525, + "grad_norm": 0.2811109311749601, + "learning_rate": 1.4285030045172913e-05, + "loss": 0.9633879661560059, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25517749786376953, + "step": 745, + "valid_targets_mean": 15101.4, + "valid_targets_min": 7179 + }, + { + "epoch": 3.1748400852878467, + "grad_norm": 0.31868948949925263, + "learning_rate": 1.422809032263052e-05, + "loss": 0.9744983315467834, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2595503330230713, + "step": 746, + "valid_targets_mean": 15064.9, + "valid_targets_min": 3556 + }, + { + "epoch": 3.1791044776119404, + "grad_norm": 0.27981253592367966, + "learning_rate": 1.4171201588205566e-05, + "loss": 0.951709508895874, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24453382194042206, + "step": 747, + "valid_targets_mean": 14460.3, + "valid_targets_min": 3546 + }, + { + "epoch": 3.183368869936034, + "grad_norm": 0.2880960969576025, + "learning_rate": 1.4114364344443935e-05, + "loss": 0.9665570259094238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23244787752628326, + "step": 748, + "valid_targets_mean": 15058.3, + "valid_targets_min": 5619 + }, + { + "epoch": 3.1876332622601278, + "grad_norm": 0.29060526455432706, + "learning_rate": 1.4057579093436653e-05, + "loss": 0.9781259894371033, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23163039982318878, + "step": 749, + "valid_targets_mean": 14682.0, + "valid_targets_min": 4993 + }, + { + "epoch": 3.191897654584222, + "grad_norm": 0.2658497276672498, + "learning_rate": 1.400084633681546e-05, + "loss": 0.9583557844161987, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2123161256313324, + "step": 750, + "valid_targets_mean": 14937.3, + "valid_targets_min": 6130 + }, + { + "epoch": 3.1961620469083156, + "grad_norm": 0.2748386771475929, + "learning_rate": 1.3944166575748355e-05, + "loss": 1.0065157413482666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24055948853492737, + "step": 751, + "valid_targets_mean": 15015.8, + "valid_targets_min": 6065 + }, + { + "epoch": 3.2004264392324093, + "grad_norm": 0.2551588931694033, + "learning_rate": 1.3887540310935187e-05, + "loss": 0.9470630288124084, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.247154101729393, + "step": 752, + "valid_targets_mean": 14598.2, + "valid_targets_min": 2048 + }, + { + "epoch": 3.204690831556503, + "grad_norm": 0.2940780684005928, + "learning_rate": 1.3830968042603226e-05, + "loss": 0.9945131540298462, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23863181471824646, + "step": 753, + "valid_targets_mean": 13820.2, + "valid_targets_min": 1730 + }, + { + "epoch": 3.208955223880597, + "grad_norm": 0.2648880071040607, + "learning_rate": 1.3774450270502762e-05, + "loss": 0.9359656572341919, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22216373682022095, + "step": 754, + "valid_targets_mean": 14340.8, + "valid_targets_min": 1380 + }, + { + "epoch": 3.213219616204691, + "grad_norm": 0.291391264645764, + "learning_rate": 1.3717987493902656e-05, + "loss": 0.9581259489059448, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23209252953529358, + "step": 755, + "valid_targets_mean": 14867.7, + "valid_targets_min": 2135 + }, + { + "epoch": 3.2174840085287846, + "grad_norm": 0.26780135742435446, + "learning_rate": 1.3661580211585947e-05, + "loss": 1.0013266801834106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24638405442237854, + "step": 756, + "valid_targets_mean": 14753.9, + "valid_targets_min": 1968 + }, + { + "epoch": 3.2217484008528783, + "grad_norm": 0.2636036578450671, + "learning_rate": 1.3605228921845457e-05, + "loss": 0.9701790809631348, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25354689359664917, + "step": 757, + "valid_targets_mean": 15058.9, + "valid_targets_min": 1677 + }, + { + "epoch": 3.2260127931769724, + "grad_norm": 0.27198763337914833, + "learning_rate": 1.3548934122479373e-05, + "loss": 0.992609977722168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25720351934432983, + "step": 758, + "valid_targets_mean": 14217.5, + "valid_targets_min": 4859 + }, + { + "epoch": 3.230277185501066, + "grad_norm": 0.2762059201189316, + "learning_rate": 1.349269631078686e-05, + "loss": 0.9707850813865662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24857297539710999, + "step": 759, + "valid_targets_mean": 15206.1, + "valid_targets_min": 5725 + }, + { + "epoch": 3.23454157782516, + "grad_norm": 0.2759388909625414, + "learning_rate": 1.3436515983563659e-05, + "loss": 1.0266224145889282, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2438332587480545, + "step": 760, + "valid_targets_mean": 14201.3, + "valid_targets_min": 1971 + }, + { + "epoch": 3.2388059701492535, + "grad_norm": 0.2465084879297926, + "learning_rate": 1.3380393637097692e-05, + "loss": 0.9265196323394775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.226420059800148, + "step": 761, + "valid_targets_mean": 14595.2, + "valid_targets_min": 4471 + }, + { + "epoch": 3.2430703624733477, + "grad_norm": 0.24311935209083857, + "learning_rate": 1.3324329767164708e-05, + "loss": 0.9605081081390381, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23155008256435394, + "step": 762, + "valid_targets_mean": 14623.3, + "valid_targets_min": 6527 + }, + { + "epoch": 3.2473347547974414, + "grad_norm": 0.28258689167490697, + "learning_rate": 1.3268324869023878e-05, + "loss": 0.9768404364585876, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2243225872516632, + "step": 763, + "valid_targets_mean": 13988.0, + "valid_targets_min": 1741 + }, + { + "epoch": 3.251599147121535, + "grad_norm": 0.26309024409635645, + "learning_rate": 1.3212379437413421e-05, + "loss": 1.0088846683502197, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2539817690849304, + "step": 764, + "valid_targets_mean": 13731.8, + "valid_targets_min": 2767 + }, + { + "epoch": 3.2558635394456292, + "grad_norm": 0.27539579259693137, + "learning_rate": 1.3156493966546236e-05, + "loss": 0.960427463054657, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2437533736228943, + "step": 765, + "valid_targets_mean": 15253.4, + "valid_targets_min": 10283 + }, + { + "epoch": 3.260127931769723, + "grad_norm": 0.2653907047853398, + "learning_rate": 1.3100668950105534e-05, + "loss": 0.9802002310752869, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24038583040237427, + "step": 766, + "valid_targets_mean": 13939.0, + "valid_targets_min": 2151 + }, + { + "epoch": 3.2643923240938166, + "grad_norm": 0.2623493315499297, + "learning_rate": 1.3044904881240507e-05, + "loss": 0.9168298244476318, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22533482313156128, + "step": 767, + "valid_targets_mean": 14715.1, + "valid_targets_min": 1182 + }, + { + "epoch": 3.2686567164179103, + "grad_norm": 0.32310144720030354, + "learning_rate": 1.2989202252561926e-05, + "loss": 0.9944812059402466, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2704041004180908, + "step": 768, + "valid_targets_mean": 15047.2, + "valid_targets_min": 9284 + }, + { + "epoch": 3.272921108742004, + "grad_norm": 0.26244209038363603, + "learning_rate": 1.2933561556137806e-05, + "loss": 0.9888614416122437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2533096969127655, + "step": 769, + "valid_targets_mean": 15175.8, + "valid_targets_min": 8491 + }, + { + "epoch": 3.277185501066098, + "grad_norm": 0.2532907694607067, + "learning_rate": 1.2877983283489062e-05, + "loss": 0.9520066976547241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2536150813102722, + "step": 770, + "valid_targets_mean": 15184.2, + "valid_targets_min": 7985 + }, + { + "epoch": 3.281449893390192, + "grad_norm": 0.26781215436772277, + "learning_rate": 1.2822467925585186e-05, + "loss": 0.9979465007781982, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.263954222202301, + "step": 771, + "valid_targets_mean": 14969.1, + "valid_targets_min": 4675 + }, + { + "epoch": 3.2857142857142856, + "grad_norm": 0.2608385336238836, + "learning_rate": 1.2767015972839879e-05, + "loss": 0.9394721388816833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24243156611919403, + "step": 772, + "valid_targets_mean": 14638.4, + "valid_targets_min": 2025 + }, + { + "epoch": 3.2899786780383797, + "grad_norm": 0.2487866675739251, + "learning_rate": 1.2711627915106728e-05, + "loss": 0.975515604019165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20965570211410522, + "step": 773, + "valid_targets_mean": 14076.6, + "valid_targets_min": 2062 + }, + { + "epoch": 3.2942430703624734, + "grad_norm": 0.2510762402174945, + "learning_rate": 1.2656304241674877e-05, + "loss": 0.9717892408370972, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2080877125263214, + "step": 774, + "valid_targets_mean": 13653.0, + "valid_targets_min": 1617 + }, + { + "epoch": 3.298507462686567, + "grad_norm": 0.30211205613009207, + "learning_rate": 1.2601045441264734e-05, + "loss": 0.9691690802574158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24019382894039154, + "step": 775, + "valid_targets_mean": 14468.7, + "valid_targets_min": 1497 + }, + { + "epoch": 3.302771855010661, + "grad_norm": 0.23849976778362944, + "learning_rate": 1.2545852002023599e-05, + "loss": 0.9814242124557495, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22221355140209198, + "step": 776, + "valid_targets_mean": 13769.2, + "valid_targets_min": 1414 + }, + { + "epoch": 3.307036247334755, + "grad_norm": 0.3050255894382961, + "learning_rate": 1.2490724411521406e-05, + "loss": 1.0499078035354614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2555561661720276, + "step": 777, + "valid_targets_mean": 14771.3, + "valid_targets_min": 2909 + }, + { + "epoch": 3.3113006396588487, + "grad_norm": 0.24722631371566617, + "learning_rate": 1.243566315674637e-05, + "loss": 0.9678243398666382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25532081723213196, + "step": 778, + "valid_targets_mean": 14883.9, + "valid_targets_min": 3976 + }, + { + "epoch": 3.3155650319829424, + "grad_norm": 0.28305369699191174, + "learning_rate": 1.238066872410073e-05, + "loss": 1.048391342163086, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2570115327835083, + "step": 779, + "valid_targets_mean": 13571.6, + "valid_targets_min": 2936 + }, + { + "epoch": 3.319829424307036, + "grad_norm": 0.2681526312865143, + "learning_rate": 1.2325741599396418e-05, + "loss": 0.9866071939468384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23067453503608704, + "step": 780, + "valid_targets_mean": 13806.5, + "valid_targets_min": 2177 + }, + { + "epoch": 3.3240938166311302, + "grad_norm": 0.26656876730810636, + "learning_rate": 1.2270882267850765e-05, + "loss": 0.928637683391571, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23767060041427612, + "step": 781, + "valid_targets_mean": 15658.2, + "valid_targets_min": 6294 + }, + { + "epoch": 3.328358208955224, + "grad_norm": 0.29797608884424853, + "learning_rate": 1.2216091214082248e-05, + "loss": 0.9275143146514893, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23108075559139252, + "step": 782, + "valid_targets_mean": 14416.9, + "valid_targets_min": 3910 + }, + { + "epoch": 3.3326226012793176, + "grad_norm": 0.24359151295047798, + "learning_rate": 1.2161368922106192e-05, + "loss": 0.9925769567489624, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26450932025909424, + "step": 783, + "valid_targets_mean": 15729.0, + "valid_targets_min": 10634 + }, + { + "epoch": 3.3368869936034113, + "grad_norm": 0.2818400310256115, + "learning_rate": 1.2106715875330475e-05, + "loss": 0.9968470931053162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2680169939994812, + "step": 784, + "valid_targets_mean": 14808.2, + "valid_targets_min": 3531 + }, + { + "epoch": 3.3411513859275055, + "grad_norm": 0.2677582666064544, + "learning_rate": 1.2052132556551275e-05, + "loss": 1.0421117544174194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23851540684700012, + "step": 785, + "valid_targets_mean": 13779.9, + "valid_targets_min": 1919 + }, + { + "epoch": 3.345415778251599, + "grad_norm": 0.25621439378166544, + "learning_rate": 1.1997619447948814e-05, + "loss": 0.9955507516860962, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24843750894069672, + "step": 786, + "valid_targets_mean": 14286.3, + "valid_targets_min": 2432 + }, + { + "epoch": 3.349680170575693, + "grad_norm": 0.2869822205087454, + "learning_rate": 1.1943177031083094e-05, + "loss": 1.018493890762329, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2717875838279724, + "step": 787, + "valid_targets_mean": 15721.7, + "valid_targets_min": 9700 + }, + { + "epoch": 3.3539445628997866, + "grad_norm": 0.24344917057840126, + "learning_rate": 1.1888805786889621e-05, + "loss": 1.013110876083374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25155869126319885, + "step": 788, + "valid_targets_mean": 15286.8, + "valid_targets_min": 3694 + }, + { + "epoch": 3.3582089552238807, + "grad_norm": 0.28861897558310295, + "learning_rate": 1.183450619567518e-05, + "loss": 0.9786880016326904, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24753284454345703, + "step": 789, + "valid_targets_mean": 14251.6, + "valid_targets_min": 4490 + }, + { + "epoch": 3.3624733475479744, + "grad_norm": 0.2633144404311006, + "learning_rate": 1.1780278737113581e-05, + "loss": 1.0120458602905273, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24441072344779968, + "step": 790, + "valid_targets_mean": 14975.3, + "valid_targets_min": 3680 + }, + { + "epoch": 3.366737739872068, + "grad_norm": 0.2803292068269423, + "learning_rate": 1.1726123890241439e-05, + "loss": 0.9977100491523743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25507649779319763, + "step": 791, + "valid_targets_mean": 14009.7, + "valid_targets_min": 3023 + }, + { + "epoch": 3.3710021321961623, + "grad_norm": 0.26765143753677234, + "learning_rate": 1.1672042133453925e-05, + "loss": 0.9835935235023499, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2534845471382141, + "step": 792, + "valid_targets_mean": 14919.7, + "valid_targets_min": 6013 + }, + { + "epoch": 3.375266524520256, + "grad_norm": 0.25324643234075256, + "learning_rate": 1.1618033944500527e-05, + "loss": 0.9231183528900146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22209623456001282, + "step": 793, + "valid_targets_mean": 14836.0, + "valid_targets_min": 2406 + }, + { + "epoch": 3.3795309168443497, + "grad_norm": 0.27382533446583085, + "learning_rate": 1.1564099800480864e-05, + "loss": 0.9658839702606201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22201785445213318, + "step": 794, + "valid_targets_mean": 13774.2, + "valid_targets_min": 1670 + }, + { + "epoch": 3.3837953091684434, + "grad_norm": 0.23574579575274768, + "learning_rate": 1.151024017784045e-05, + "loss": 0.9784061312675476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2386031597852707, + "step": 795, + "valid_targets_mean": 14262.1, + "valid_targets_min": 4572 + }, + { + "epoch": 3.388059701492537, + "grad_norm": 0.26446525011364513, + "learning_rate": 1.1456455552366488e-05, + "loss": 0.9620468616485596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2624356150627136, + "step": 796, + "valid_targets_mean": 15688.0, + "valid_targets_min": 10994 + }, + { + "epoch": 3.3923240938166312, + "grad_norm": 0.2946516604731879, + "learning_rate": 1.1402746399183671e-05, + "loss": 0.9715833067893982, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2458597719669342, + "step": 797, + "valid_targets_mean": 14943.1, + "valid_targets_min": 6999 + }, + { + "epoch": 3.396588486140725, + "grad_norm": 0.25100464784214727, + "learning_rate": 1.1349113192749986e-05, + "loss": 0.979851484298706, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2558574378490448, + "step": 798, + "valid_targets_mean": 15075.8, + "valid_targets_min": 6989 + }, + { + "epoch": 3.4008528784648187, + "grad_norm": 0.28293072370460143, + "learning_rate": 1.1295556406852488e-05, + "loss": 0.9970759749412537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23473231494426727, + "step": 799, + "valid_targets_mean": 14168.6, + "valid_targets_min": 1460 + }, + { + "epoch": 3.405117270788913, + "grad_norm": 0.26993063050031846, + "learning_rate": 1.1242076514603201e-05, + "loss": 0.9559547901153564, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21587030589580536, + "step": 800, + "valid_targets_mean": 14430.6, + "valid_targets_min": 2882 + }, + { + "epoch": 3.4093816631130065, + "grad_norm": 0.24685229128332098, + "learning_rate": 1.1188673988434831e-05, + "loss": 0.9920517206192017, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24107736349105835, + "step": 801, + "valid_targets_mean": 13998.1, + "valid_targets_min": 3138 + }, + { + "epoch": 3.4136460554371, + "grad_norm": 0.2755427283976288, + "learning_rate": 1.1135349300096667e-05, + "loss": 0.9662362933158875, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24288468062877655, + "step": 802, + "valid_targets_mean": 15185.6, + "valid_targets_min": 3257 + }, + { + "epoch": 3.417910447761194, + "grad_norm": 0.25239907237737214, + "learning_rate": 1.1082102920650397e-05, + "loss": 0.9824597835540771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22753871977329254, + "step": 803, + "valid_targets_mean": 14153.7, + "valid_targets_min": 4364 + }, + { + "epoch": 3.4221748400852876, + "grad_norm": 0.2562666347543531, + "learning_rate": 1.102893532046593e-05, + "loss": 1.0019625425338745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2592023015022278, + "step": 804, + "valid_targets_mean": 14353.4, + "valid_targets_min": 2195 + }, + { + "epoch": 3.4264392324093818, + "grad_norm": 0.28238824416216146, + "learning_rate": 1.0975846969217258e-05, + "loss": 0.968468189239502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25971686840057373, + "step": 805, + "valid_targets_mean": 14756.9, + "valid_targets_min": 7238 + }, + { + "epoch": 3.4307036247334755, + "grad_norm": 0.26599977569448535, + "learning_rate": 1.092283833587829e-05, + "loss": 0.983420729637146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25543704628944397, + "step": 806, + "valid_targets_mean": 14806.7, + "valid_targets_min": 4322 + }, + { + "epoch": 3.434968017057569, + "grad_norm": 0.2638984292339725, + "learning_rate": 1.086990988871873e-05, + "loss": 0.9687063097953796, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23694062232971191, + "step": 807, + "valid_targets_mean": 14201.8, + "valid_targets_min": 2523 + }, + { + "epoch": 3.4392324093816633, + "grad_norm": 0.2622674577937183, + "learning_rate": 1.0817062095299929e-05, + "loss": 0.9997848272323608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2400442361831665, + "step": 808, + "valid_targets_mean": 13671.8, + "valid_targets_min": 1212 + }, + { + "epoch": 3.443496801705757, + "grad_norm": 0.2569999483268217, + "learning_rate": 1.0764295422470755e-05, + "loss": 0.9746481776237488, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23871907591819763, + "step": 809, + "valid_targets_mean": 14831.8, + "valid_targets_min": 3113 + }, + { + "epoch": 3.4477611940298507, + "grad_norm": 0.2757061743608293, + "learning_rate": 1.0711610336363477e-05, + "loss": 0.994911253452301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25191304087638855, + "step": 810, + "valid_targets_mean": 14924.7, + "valid_targets_min": 4915 + }, + { + "epoch": 3.4520255863539444, + "grad_norm": 0.2549562317547253, + "learning_rate": 1.065900730238961e-05, + "loss": 0.9198431968688965, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2329210340976715, + "step": 811, + "valid_targets_mean": 15488.8, + "valid_targets_min": 1409 + }, + { + "epoch": 3.4562899786780386, + "grad_norm": 0.2534486974336105, + "learning_rate": 1.0606486785235879e-05, + "loss": 0.985055148601532, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24948279559612274, + "step": 812, + "valid_targets_mean": 14707.6, + "valid_targets_min": 7077 + }, + { + "epoch": 3.4605543710021323, + "grad_norm": 0.26322629067765024, + "learning_rate": 1.0554049248860045e-05, + "loss": 1.0189073085784912, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.244399756193161, + "step": 813, + "valid_targets_mean": 13376.9, + "valid_targets_min": 2264 + }, + { + "epoch": 3.464818763326226, + "grad_norm": 0.25987012261112424, + "learning_rate": 1.0501695156486819e-05, + "loss": 0.9842698574066162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23711949586868286, + "step": 814, + "valid_targets_mean": 14910.6, + "valid_targets_min": 2733 + }, + { + "epoch": 3.4690831556503197, + "grad_norm": 0.24191801293970758, + "learning_rate": 1.0449424970603796e-05, + "loss": 0.946979820728302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22759191691875458, + "step": 815, + "valid_targets_mean": 14495.3, + "valid_targets_min": 2024 + }, + { + "epoch": 3.473347547974414, + "grad_norm": 0.2540091751033316, + "learning_rate": 1.0397239152957356e-05, + "loss": 0.9637709259986877, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2339923232793808, + "step": 816, + "valid_targets_mean": 14073.7, + "valid_targets_min": 1239 + }, + { + "epoch": 3.4776119402985075, + "grad_norm": 0.24222044744241059, + "learning_rate": 1.034513816454858e-05, + "loss": 1.0322394371032715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2537650167942047, + "step": 817, + "valid_targets_mean": 14066.1, + "valid_targets_min": 1944 + }, + { + "epoch": 3.481876332622601, + "grad_norm": 0.2611709250817118, + "learning_rate": 1.0293122465629186e-05, + "loss": 0.9560979604721069, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24220697581768036, + "step": 818, + "valid_targets_mean": 14672.9, + "valid_targets_min": 3004 + }, + { + "epoch": 3.486140724946695, + "grad_norm": 0.26355777917374235, + "learning_rate": 1.0241192515697432e-05, + "loss": 0.9867568016052246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23415711522102356, + "step": 819, + "valid_targets_mean": 14388.9, + "valid_targets_min": 2995 + }, + { + "epoch": 3.490405117270789, + "grad_norm": 0.2371250728550352, + "learning_rate": 1.0189348773494135e-05, + "loss": 1.0202665328979492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2818511128425598, + "step": 820, + "valid_targets_mean": 14920.1, + "valid_targets_min": 4167 + }, + { + "epoch": 3.4946695095948828, + "grad_norm": 0.259705177489916, + "learning_rate": 1.0137591696998514e-05, + "loss": 0.9820563197135925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24459995329380035, + "step": 821, + "valid_targets_mean": 14204.5, + "valid_targets_min": 1495 + }, + { + "epoch": 3.4989339019189765, + "grad_norm": 0.265988138101001, + "learning_rate": 1.0085921743424225e-05, + "loss": 0.9641842842102051, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2623577117919922, + "step": 822, + "valid_targets_mean": 14899.5, + "valid_targets_min": 4092 + }, + { + "epoch": 3.50319829424307, + "grad_norm": 0.2388400445828997, + "learning_rate": 1.0034339369215288e-05, + "loss": 1.0143928527832031, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2637423872947693, + "step": 823, + "valid_targets_mean": 15294.1, + "valid_targets_min": 9094 + }, + { + "epoch": 3.5074626865671643, + "grad_norm": 0.2803895403000936, + "learning_rate": 9.982845030042068e-06, + "loss": 0.9518420696258545, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2300497591495514, + "step": 824, + "valid_targets_mean": 14787.1, + "valid_targets_min": 3972 + }, + { + "epoch": 3.511727078891258, + "grad_norm": 0.24699293670834366, + "learning_rate": 9.931439180797237e-06, + "loss": 1.0005815029144287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2434898316860199, + "step": 825, + "valid_targets_mean": 15400.7, + "valid_targets_min": 8805 + }, + { + "epoch": 3.5159914712153517, + "grad_norm": 0.26869982611870297, + "learning_rate": 9.880122275591752e-06, + "loss": 0.9343856573104858, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25968652963638306, + "step": 826, + "valid_targets_mean": 14202.7, + "valid_targets_min": 3132 + }, + { + "epoch": 3.520255863539446, + "grad_norm": 0.29846154633481226, + "learning_rate": 9.828894767750865e-06, + "loss": 1.0360630750656128, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2631513178348541, + "step": 827, + "valid_targets_mean": 15087.4, + "valid_targets_min": 2986 + }, + { + "epoch": 3.5245202558635396, + "grad_norm": 0.231628468381195, + "learning_rate": 9.777757109810102e-06, + "loss": 0.9682170152664185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24099406599998474, + "step": 828, + "valid_targets_mean": 14234.5, + "valid_targets_min": 2212 + }, + { + "epoch": 3.5287846481876333, + "grad_norm": 0.25089908418380913, + "learning_rate": 9.726709753511275e-06, + "loss": 0.9408371448516846, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24183693528175354, + "step": 829, + "valid_targets_mean": 15226.1, + "valid_targets_min": 8810 + }, + { + "epoch": 3.533049040511727, + "grad_norm": 0.27121917659055517, + "learning_rate": 9.675753149798474e-06, + "loss": 0.9771254658699036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24990615248680115, + "step": 830, + "valid_targets_mean": 14781.8, + "valid_targets_min": 6994 + }, + { + "epoch": 3.5373134328358207, + "grad_norm": 0.2502357665660496, + "learning_rate": 9.624887748814118e-06, + "loss": 1.0215736627578735, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23912081122398376, + "step": 831, + "valid_targets_mean": 15214.8, + "valid_targets_min": 10179 + }, + { + "epoch": 3.541577825159915, + "grad_norm": 0.2600784963343472, + "learning_rate": 9.574113999894909e-06, + "loss": 0.9992471933364868, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22977708280086517, + "step": 832, + "valid_targets_mean": 13955.2, + "valid_targets_min": 2242 + }, + { + "epoch": 3.5458422174840085, + "grad_norm": 0.2516815603897745, + "learning_rate": 9.523432351567979e-06, + "loss": 0.9578772783279419, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24671056866645813, + "step": 833, + "valid_targets_mean": 15254.7, + "valid_targets_min": 7740 + }, + { + "epoch": 3.550106609808102, + "grad_norm": 0.24192388857352504, + "learning_rate": 9.472843251546792e-06, + "loss": 1.0055651664733887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23401792347431183, + "step": 834, + "valid_targets_mean": 13342.5, + "valid_targets_min": 2585 + }, + { + "epoch": 3.5543710021321964, + "grad_norm": 0.27778145777402363, + "learning_rate": 9.422347146727294e-06, + "loss": 0.9918792843818665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24214953184127808, + "step": 835, + "valid_targets_mean": 14498.8, + "valid_targets_min": 1281 + }, + { + "epoch": 3.55863539445629, + "grad_norm": 0.2614800511266557, + "learning_rate": 9.371944483183912e-06, + "loss": 0.961499035358429, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2294447422027588, + "step": 836, + "valid_targets_mean": 14546.1, + "valid_targets_min": 5910 + }, + { + "epoch": 3.5628997867803838, + "grad_norm": 0.24803139384138795, + "learning_rate": 9.321635706165635e-06, + "loss": 0.9757760763168335, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2558143734931946, + "step": 837, + "valid_targets_mean": 15266.3, + "valid_targets_min": 8894 + }, + { + "epoch": 3.5671641791044775, + "grad_norm": 0.26421904120538203, + "learning_rate": 9.271421260092075e-06, + "loss": 0.9891831874847412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24245423078536987, + "step": 838, + "valid_targets_mean": 14179.2, + "valid_targets_min": 7842 + }, + { + "epoch": 3.571428571428571, + "grad_norm": 0.24120372404398885, + "learning_rate": 9.221301588549519e-06, + "loss": 0.942074179649353, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25345689058303833, + "step": 839, + "valid_targets_mean": 14947.5, + "valid_targets_min": 9230 + }, + { + "epoch": 3.5756929637526653, + "grad_norm": 0.24432645883901663, + "learning_rate": 9.171277134287057e-06, + "loss": 0.9449573755264282, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22649499773979187, + "step": 840, + "valid_targets_mean": 14274.2, + "valid_targets_min": 1554 + }, + { + "epoch": 3.579957356076759, + "grad_norm": 0.24438018740047743, + "learning_rate": 9.121348339212634e-06, + "loss": 1.0226428508758545, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25364571809768677, + "step": 841, + "valid_targets_mean": 14984.0, + "valid_targets_min": 2854 + }, + { + "epoch": 3.5842217484008527, + "grad_norm": 0.23228082229773042, + "learning_rate": 9.07151564438916e-06, + "loss": 1.0022697448730469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2715730667114258, + "step": 842, + "valid_targets_mean": 14656.7, + "valid_targets_min": 6455 + }, + { + "epoch": 3.588486140724947, + "grad_norm": 0.22153785708655307, + "learning_rate": 9.021779490030611e-06, + "loss": 0.9454774260520935, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22272752225399017, + "step": 843, + "valid_targets_mean": 14127.5, + "valid_targets_min": 934 + }, + { + "epoch": 3.5927505330490406, + "grad_norm": 0.23925613158013162, + "learning_rate": 8.972140315498119e-06, + "loss": 0.9721479415893555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2528744637966156, + "step": 844, + "valid_targets_mean": 15379.6, + "valid_targets_min": 8062 + }, + { + "epoch": 3.5970149253731343, + "grad_norm": 0.24416093619066498, + "learning_rate": 8.922598559296154e-06, + "loss": 0.9860814809799194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24769961833953857, + "step": 845, + "valid_targets_mean": 15095.0, + "valid_targets_min": 9098 + }, + { + "epoch": 3.6012793176972284, + "grad_norm": 0.24450290094743976, + "learning_rate": 8.873154659068582e-06, + "loss": 0.9304975271224976, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2089354395866394, + "step": 846, + "valid_targets_mean": 14593.3, + "valid_targets_min": 4305 + }, + { + "epoch": 3.605543710021322, + "grad_norm": 0.2359163136679419, + "learning_rate": 8.823809051594816e-06, + "loss": 0.9832009673118591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23041662573814392, + "step": 847, + "valid_targets_mean": 13953.9, + "valid_targets_min": 1518 + }, + { + "epoch": 3.609808102345416, + "grad_norm": 0.2245217144041866, + "learning_rate": 8.774562172785988e-06, + "loss": 0.9642736315727234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2261798083782196, + "step": 848, + "valid_targets_mean": 14167.8, + "valid_targets_min": 2377 + }, + { + "epoch": 3.6140724946695095, + "grad_norm": 0.26510925350020154, + "learning_rate": 8.725414457681063e-06, + "loss": 1.0208244323730469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2765214443206787, + "step": 849, + "valid_targets_mean": 15571.1, + "valid_targets_min": 3899 + }, + { + "epoch": 3.6183368869936032, + "grad_norm": 0.23189319110965834, + "learning_rate": 8.676366340443017e-06, + "loss": 0.9406700134277344, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22266805171966553, + "step": 850, + "valid_targets_mean": 14893.3, + "valid_targets_min": 6060 + }, + { + "epoch": 3.6226012793176974, + "grad_norm": 0.2535988557752394, + "learning_rate": 8.627418254355e-06, + "loss": 0.9919254183769226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2515752613544464, + "step": 851, + "valid_targets_mean": 14232.6, + "valid_targets_min": 1176 + }, + { + "epoch": 3.626865671641791, + "grad_norm": 0.23243500837942946, + "learning_rate": 8.578570631816474e-06, + "loss": 0.9726200103759766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23803666234016418, + "step": 852, + "valid_targets_mean": 14178.5, + "valid_targets_min": 7984 + }, + { + "epoch": 3.631130063965885, + "grad_norm": 0.24950532558501276, + "learning_rate": 8.529823904339472e-06, + "loss": 0.9699271321296692, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.251629114151001, + "step": 853, + "valid_targets_mean": 13938.4, + "valid_targets_min": 4176 + }, + { + "epoch": 3.635394456289979, + "grad_norm": 0.24895272568179946, + "learning_rate": 8.481178502544684e-06, + "loss": 0.9935591220855713, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22088086605072021, + "step": 854, + "valid_targets_mean": 14311.4, + "valid_targets_min": 3387 + }, + { + "epoch": 3.6396588486140726, + "grad_norm": 0.24976757519865808, + "learning_rate": 8.43263485615774e-06, + "loss": 0.9702616930007935, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2503027319908142, + "step": 855, + "valid_targets_mean": 15099.9, + "valid_targets_min": 4158 + }, + { + "epoch": 3.6439232409381663, + "grad_norm": 0.2652806510317654, + "learning_rate": 8.384193394005372e-06, + "loss": 0.9816626310348511, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25915277004241943, + "step": 856, + "valid_targets_mean": 14864.0, + "valid_targets_min": 9550 + }, + { + "epoch": 3.64818763326226, + "grad_norm": 0.24571537156487527, + "learning_rate": 8.33585454401161e-06, + "loss": 0.9883707761764526, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23713959753513336, + "step": 857, + "valid_targets_mean": 14149.2, + "valid_targets_min": 5806 + }, + { + "epoch": 3.6524520255863537, + "grad_norm": 0.23524174221929256, + "learning_rate": 8.287618733194073e-06, + "loss": 0.9662632346153259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23696158826351166, + "step": 858, + "valid_targets_mean": 14875.3, + "valid_targets_min": 4261 + }, + { + "epoch": 3.656716417910448, + "grad_norm": 0.22667413416922827, + "learning_rate": 8.239486387660096e-06, + "loss": 0.9632445573806763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2446478307247162, + "step": 859, + "valid_targets_mean": 14762.0, + "valid_targets_min": 2243 + }, + { + "epoch": 3.6609808102345416, + "grad_norm": 0.24089377081144345, + "learning_rate": 8.191457932603052e-06, + "loss": 0.9559063911437988, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2483818233013153, + "step": 860, + "valid_targets_mean": 14555.2, + "valid_targets_min": 2317 + }, + { + "epoch": 3.6652452025586353, + "grad_norm": 0.2588786545230032, + "learning_rate": 8.143533792298545e-06, + "loss": 0.9960157871246338, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2394169569015503, + "step": 861, + "valid_targets_mean": 13622.2, + "valid_targets_min": 1189 + }, + { + "epoch": 3.6695095948827294, + "grad_norm": 0.2235205743092298, + "learning_rate": 8.095714390100698e-06, + "loss": 0.9713449478149414, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25745829939842224, + "step": 862, + "valid_targets_mean": 14786.1, + "valid_targets_min": 5323 + }, + { + "epoch": 3.673773987206823, + "grad_norm": 0.22968864712171463, + "learning_rate": 8.048000148438375e-06, + "loss": 0.9706517457962036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2478504478931427, + "step": 863, + "valid_targets_mean": 14706.8, + "valid_targets_min": 2050 + }, + { + "epoch": 3.678038379530917, + "grad_norm": 0.22612857332196562, + "learning_rate": 8.000391488811485e-06, + "loss": 0.9807164669036865, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24754057824611664, + "step": 864, + "valid_targets_mean": 14964.3, + "valid_targets_min": 4977 + }, + { + "epoch": 3.6823027718550105, + "grad_norm": 0.25539163939478027, + "learning_rate": 7.952888831787215e-06, + "loss": 1.0361860990524292, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24390782415866852, + "step": 865, + "valid_targets_mean": 14916.1, + "valid_targets_min": 5985 + }, + { + "epoch": 3.6865671641791042, + "grad_norm": 0.2455547337289385, + "learning_rate": 7.905492596996391e-06, + "loss": 0.9568573236465454, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23050349950790405, + "step": 866, + "valid_targets_mean": 14725.6, + "valid_targets_min": 6183 + }, + { + "epoch": 3.6908315565031984, + "grad_norm": 0.22769656736556496, + "learning_rate": 7.858203203129668e-06, + "loss": 0.9520964026451111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24754388630390167, + "step": 867, + "valid_targets_mean": 15242.0, + "valid_targets_min": 7757 + }, + { + "epoch": 3.695095948827292, + "grad_norm": 0.24458247633032892, + "learning_rate": 7.811021067933919e-06, + "loss": 0.9987329840660095, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24426715075969696, + "step": 868, + "valid_targets_mean": 14525.9, + "valid_targets_min": 1895 + }, + { + "epoch": 3.699360341151386, + "grad_norm": 0.24146868219803297, + "learning_rate": 7.763946608208504e-06, + "loss": 0.9650008678436279, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22406381368637085, + "step": 869, + "valid_targets_mean": 14157.7, + "valid_targets_min": 2294 + }, + { + "epoch": 3.70362473347548, + "grad_norm": 0.21233317092547138, + "learning_rate": 7.716980239801588e-06, + "loss": 0.9769718050956726, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22000914812088013, + "step": 870, + "valid_targets_mean": 13162.8, + "valid_targets_min": 2704 + }, + { + "epoch": 3.7078891257995736, + "grad_norm": 0.25981603859930497, + "learning_rate": 7.670122377606495e-06, + "loss": 1.0230576992034912, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25247544050216675, + "step": 871, + "valid_targets_mean": 13542.7, + "valid_targets_min": 1022 + }, + { + "epoch": 3.7121535181236673, + "grad_norm": 0.24120562129813655, + "learning_rate": 7.623373435557988e-06, + "loss": 0.9463640451431274, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2375239133834839, + "step": 872, + "valid_targets_mean": 15127.0, + "valid_targets_min": 6449 + }, + { + "epoch": 3.716417910447761, + "grad_norm": 0.2245030678554318, + "learning_rate": 7.5767338266286775e-06, + "loss": 0.9786025285720825, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23157241940498352, + "step": 873, + "valid_targets_mean": 13910.7, + "valid_targets_min": 5037 + }, + { + "epoch": 3.7206823027718547, + "grad_norm": 0.23288844831269276, + "learning_rate": 7.530203962825331e-06, + "loss": 0.9911025762557983, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21122583746910095, + "step": 874, + "valid_targets_mean": 13119.2, + "valid_targets_min": 2490 + }, + { + "epoch": 3.724946695095949, + "grad_norm": 0.27222483816263005, + "learning_rate": 7.483784255185249e-06, + "loss": 0.9990006685256958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24723437428474426, + "step": 875, + "valid_targets_mean": 14785.9, + "valid_targets_min": 3874 + }, + { + "epoch": 3.7292110874200426, + "grad_norm": 0.2644117293321211, + "learning_rate": 7.437475113772632e-06, + "loss": 1.0078997611999512, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25119057297706604, + "step": 876, + "valid_targets_mean": 15057.7, + "valid_targets_min": 9723 + }, + { + "epoch": 3.7334754797441363, + "grad_norm": 0.24724171688530638, + "learning_rate": 7.391276947674932e-06, + "loss": 0.9665708541870117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23952801525592804, + "step": 877, + "valid_targets_mean": 14631.8, + "valid_targets_min": 6130 + }, + { + "epoch": 3.7377398720682304, + "grad_norm": 0.28647957077023595, + "learning_rate": 7.345190164999307e-06, + "loss": 0.967820405960083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23053833842277527, + "step": 878, + "valid_targets_mean": 14925.0, + "valid_targets_min": 7657 + }, + { + "epoch": 3.742004264392324, + "grad_norm": 0.2327400527390114, + "learning_rate": 7.299215172868947e-06, + "loss": 0.9955414533615112, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2453860640525818, + "step": 879, + "valid_targets_mean": 13903.8, + "valid_targets_min": 1726 + }, + { + "epoch": 3.746268656716418, + "grad_norm": 0.2207936733218975, + "learning_rate": 7.2533523774194865e-06, + "loss": 0.9483203887939453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26135653257369995, + "step": 880, + "valid_targets_mean": 15312.8, + "valid_targets_min": 11925 + }, + { + "epoch": 3.750533049040512, + "grad_norm": 0.2300862632957973, + "learning_rate": 7.2076021837954616e-06, + "loss": 0.9806277751922607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23874348402023315, + "step": 881, + "valid_targets_mean": 14891.8, + "valid_targets_min": 1848 + }, + { + "epoch": 3.7547974413646057, + "grad_norm": 0.24870143297414857, + "learning_rate": 7.161964996146689e-06, + "loss": 1.046656608581543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24634422361850739, + "step": 882, + "valid_targets_mean": 14668.8, + "valid_targets_min": 4126 + }, + { + "epoch": 3.7590618336886994, + "grad_norm": 0.22147895784835617, + "learning_rate": 7.116441217624708e-06, + "loss": 0.9934045076370239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27002787590026855, + "step": 883, + "valid_targets_mean": 15408.2, + "valid_targets_min": 9820 + }, + { + "epoch": 3.763326226012793, + "grad_norm": 0.2446394869202921, + "learning_rate": 7.071031250379228e-06, + "loss": 0.996993899345398, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24206006526947021, + "step": 884, + "valid_targets_mean": 14343.3, + "valid_targets_min": 4333 + }, + { + "epoch": 3.767590618336887, + "grad_norm": 0.24998670653421667, + "learning_rate": 7.0257354955545466e-06, + "loss": 1.007996916770935, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24389483034610748, + "step": 885, + "valid_targets_mean": 15130.5, + "valid_targets_min": 7247 + }, + { + "epoch": 3.771855010660981, + "grad_norm": 0.2409640520795428, + "learning_rate": 6.980554353286066e-06, + "loss": 0.9907573461532593, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25695574283599854, + "step": 886, + "valid_targets_mean": 14252.0, + "valid_targets_min": 2331 + }, + { + "epoch": 3.7761194029850746, + "grad_norm": 0.21630757494791464, + "learning_rate": 6.935488222696676e-06, + "loss": 0.9466689229011536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23345476388931274, + "step": 887, + "valid_targets_mean": 14437.1, + "valid_targets_min": 2454 + }, + { + "epoch": 3.7803837953091683, + "grad_norm": 0.21812104909145605, + "learning_rate": 6.890537501893302e-06, + "loss": 1.0114989280700684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26563623547554016, + "step": 888, + "valid_targets_mean": 15158.0, + "valid_targets_min": 5390 + }, + { + "epoch": 3.7846481876332625, + "grad_norm": 0.22583982632122657, + "learning_rate": 6.845702587963352e-06, + "loss": 0.9820560812950134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22860771417617798, + "step": 889, + "valid_targets_mean": 14585.2, + "valid_targets_min": 4294 + }, + { + "epoch": 3.788912579957356, + "grad_norm": 0.22781777077904905, + "learning_rate": 6.800983876971192e-06, + "loss": 1.0018254518508911, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25380298495292664, + "step": 890, + "valid_targets_mean": 15159.2, + "valid_targets_min": 4537 + }, + { + "epoch": 3.79317697228145, + "grad_norm": 0.245946021804642, + "learning_rate": 6.756381763954718e-06, + "loss": 0.9879237413406372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2271505445241928, + "step": 891, + "valid_targets_mean": 14488.2, + "valid_targets_min": 4167 + }, + { + "epoch": 3.7974413646055436, + "grad_norm": 0.21935290451621, + "learning_rate": 6.7118966429217645e-06, + "loss": 0.9949040412902832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2252422273159027, + "step": 892, + "valid_targets_mean": 13439.8, + "valid_targets_min": 2234 + }, + { + "epoch": 3.8017057569296373, + "grad_norm": 0.2326597771006886, + "learning_rate": 6.667528906846714e-06, + "loss": 0.9881210923194885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2523738741874695, + "step": 893, + "valid_targets_mean": 15426.2, + "valid_targets_min": 7311 + }, + { + "epoch": 3.8059701492537314, + "grad_norm": 0.24603226496570071, + "learning_rate": 6.623278947666974e-06, + "loss": 1.0030843019485474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25523436069488525, + "step": 894, + "valid_targets_mean": 14742.0, + "valid_targets_min": 2248 + }, + { + "epoch": 3.810234541577825, + "grad_norm": 0.266631934837499, + "learning_rate": 6.579147156279538e-06, + "loss": 0.9971247315406799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2442849725484848, + "step": 895, + "valid_targets_mean": 14518.3, + "valid_targets_min": 5077 + }, + { + "epoch": 3.814498933901919, + "grad_norm": 0.22661067454202902, + "learning_rate": 6.535133922537513e-06, + "loss": 1.0048599243164062, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25347477197647095, + "step": 896, + "valid_targets_mean": 15031.2, + "valid_targets_min": 3446 + }, + { + "epoch": 3.818763326226013, + "grad_norm": 0.24279285266836087, + "learning_rate": 6.491239635246709e-06, + "loss": 0.9471845626831055, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25361740589141846, + "step": 897, + "valid_targets_mean": 14355.9, + "valid_targets_min": 3785 + }, + { + "epoch": 3.8230277185501067, + "grad_norm": 0.22447031258250963, + "learning_rate": 6.447464682162143e-06, + "loss": 0.9708333015441895, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2511172890663147, + "step": 898, + "valid_targets_mean": 14254.9, + "valid_targets_min": 5724 + }, + { + "epoch": 3.8272921108742004, + "grad_norm": 0.2180126504875139, + "learning_rate": 6.403809449984704e-06, + "loss": 0.9517656564712524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22828862071037292, + "step": 899, + "valid_targets_mean": 13606.8, + "valid_targets_min": 2633 + }, + { + "epoch": 3.831556503198294, + "grad_norm": 0.22768302089677078, + "learning_rate": 6.3602743243576405e-06, + "loss": 1.0056332349777222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24649082124233246, + "step": 900, + "valid_targets_mean": 14836.6, + "valid_targets_min": 2264 + }, + { + "epoch": 3.835820895522388, + "grad_norm": 0.23360775368290362, + "learning_rate": 6.316859689863222e-06, + "loss": 0.979081928730011, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2140704244375229, + "step": 901, + "valid_targets_mean": 14308.3, + "valid_targets_min": 5342 + }, + { + "epoch": 3.840085287846482, + "grad_norm": 0.22409249934660494, + "learning_rate": 6.273565930019316e-06, + "loss": 0.9827362298965454, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22869783639907837, + "step": 902, + "valid_targets_mean": 13936.7, + "valid_targets_min": 1649 + }, + { + "epoch": 3.8443496801705757, + "grad_norm": 0.21947597286366416, + "learning_rate": 6.230393427276e-06, + "loss": 0.9563350677490234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23218029737472534, + "step": 903, + "valid_targets_mean": 15035.5, + "valid_targets_min": 6891 + }, + { + "epoch": 3.8486140724946694, + "grad_norm": 0.2294472946303094, + "learning_rate": 6.187342563012198e-06, + "loss": 0.9648277759552002, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22801150381565094, + "step": 904, + "valid_targets_mean": 15162.3, + "valid_targets_min": 7415 + }, + { + "epoch": 3.8528784648187635, + "grad_norm": 0.23831859235363342, + "learning_rate": 6.144413717532269e-06, + "loss": 0.9997645020484924, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24504047632217407, + "step": 905, + "valid_targets_mean": 14434.5, + "valid_targets_min": 4254 + }, + { + "epoch": 3.857142857142857, + "grad_norm": 0.21316337592556336, + "learning_rate": 6.1016072700627106e-06, + "loss": 1.013451099395752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2638471722602844, + "step": 906, + "valid_targets_mean": 15279.8, + "valid_targets_min": 9661 + }, + { + "epoch": 3.861407249466951, + "grad_norm": 0.21560295735447713, + "learning_rate": 6.058923598748756e-06, + "loss": 0.9879148006439209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25007355213165283, + "step": 907, + "valid_targets_mean": 14704.1, + "valid_targets_min": 4873 + }, + { + "epoch": 3.8656716417910446, + "grad_norm": 0.22081513779623307, + "learning_rate": 6.016363080651066e-06, + "loss": 0.972460925579071, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26785242557525635, + "step": 908, + "valid_targets_mean": 15310.9, + "valid_targets_min": 1417 + }, + { + "epoch": 3.8699360341151388, + "grad_norm": 0.215554369596329, + "learning_rate": 5.973926091742386e-06, + "loss": 0.9702866077423096, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2544465959072113, + "step": 909, + "valid_targets_mean": 14615.7, + "valid_targets_min": 2662 + }, + { + "epoch": 3.8742004264392325, + "grad_norm": 0.2270168812979479, + "learning_rate": 5.931613006904196e-06, + "loss": 1.041187047958374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2589257061481476, + "step": 910, + "valid_targets_mean": 14657.2, + "valid_targets_min": 2024 + }, + { + "epoch": 3.878464818763326, + "grad_norm": 0.22359388614658796, + "learning_rate": 5.889424199923473e-06, + "loss": 1.0261526107788086, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2758401036262512, + "step": 911, + "valid_targets_mean": 14530.1, + "valid_targets_min": 2574 + }, + { + "epoch": 3.88272921108742, + "grad_norm": 0.22384565420989688, + "learning_rate": 5.847360043489318e-06, + "loss": 0.9904541969299316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23720858991146088, + "step": 912, + "valid_targets_mean": 13643.0, + "valid_targets_min": 4384 + }, + { + "epoch": 3.886993603411514, + "grad_norm": 0.23154427689601847, + "learning_rate": 5.805420909189683e-06, + "loss": 0.966805636882782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23835605382919312, + "step": 913, + "valid_targets_mean": 13716.4, + "valid_targets_min": 1999 + }, + { + "epoch": 3.8912579957356077, + "grad_norm": 0.2143041362190162, + "learning_rate": 5.7636071675081076e-06, + "loss": 0.9406954646110535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22822436690330505, + "step": 914, + "valid_targets_mean": 14781.8, + "valid_targets_min": 4557 + }, + { + "epoch": 3.8955223880597014, + "grad_norm": 0.2239489404420758, + "learning_rate": 5.721919187820431e-06, + "loss": 0.966067910194397, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23978188633918762, + "step": 915, + "valid_targets_mean": 14888.3, + "valid_targets_min": 1662 + }, + { + "epoch": 3.8997867803837956, + "grad_norm": 0.2246564522473887, + "learning_rate": 5.6803573383915265e-06, + "loss": 0.9739153981208801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23945488035678864, + "step": 916, + "valid_targets_mean": 14734.2, + "valid_targets_min": 6220 + }, + { + "epoch": 3.9040511727078893, + "grad_norm": 0.23128830802806832, + "learning_rate": 5.638921986372064e-06, + "loss": 0.9694392085075378, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2503975033760071, + "step": 917, + "valid_targets_mean": 14521.5, + "valid_targets_min": 4590 + }, + { + "epoch": 3.908315565031983, + "grad_norm": 0.22103661486834023, + "learning_rate": 5.5976134977952315e-06, + "loss": 0.9809643030166626, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2672732472419739, + "step": 918, + "valid_targets_mean": 14542.9, + "valid_targets_min": 4690 + }, + { + "epoch": 3.9125799573560767, + "grad_norm": 0.21739956435689023, + "learning_rate": 5.556432237573564e-06, + "loss": 0.9921892285346985, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24295605719089508, + "step": 919, + "valid_targets_mean": 14229.2, + "valid_targets_min": 1722 + }, + { + "epoch": 3.9168443496801704, + "grad_norm": 0.22604645101729312, + "learning_rate": 5.5153785694956416e-06, + "loss": 0.956708550453186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24477112293243408, + "step": 920, + "valid_targets_mean": 14657.9, + "valid_targets_min": 5751 + }, + { + "epoch": 3.9211087420042645, + "grad_norm": 0.2404884445232945, + "learning_rate": 5.474452856222942e-06, + "loss": 1.0439990758895874, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25359728932380676, + "step": 921, + "valid_targets_mean": 14060.7, + "valid_targets_min": 2813 + }, + { + "epoch": 3.925373134328358, + "grad_norm": 0.22032750526822323, + "learning_rate": 5.433655459286611e-06, + "loss": 0.9538367986679077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23562100529670715, + "step": 922, + "valid_targets_mean": 14302.5, + "valid_targets_min": 4541 + }, + { + "epoch": 3.929637526652452, + "grad_norm": 0.20552263463345719, + "learning_rate": 5.392986739084238e-06, + "loss": 1.000390887260437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24285882711410522, + "step": 923, + "valid_targets_mean": 14002.6, + "valid_targets_min": 3538 + }, + { + "epoch": 3.933901918976546, + "grad_norm": 0.22429960515785585, + "learning_rate": 5.352447054876755e-06, + "loss": 0.9766531586647034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22854523360729218, + "step": 924, + "valid_targets_mean": 13950.4, + "valid_targets_min": 1861 + }, + { + "epoch": 3.9381663113006398, + "grad_norm": 0.2415085567962944, + "learning_rate": 5.31203676478516e-06, + "loss": 1.0060484409332275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25534623861312866, + "step": 925, + "valid_targets_mean": 15236.5, + "valid_targets_min": 1413 + }, + { + "epoch": 3.9424307036247335, + "grad_norm": 0.22915738361343682, + "learning_rate": 5.271756225787434e-06, + "loss": 0.9796045422554016, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25547105073928833, + "step": 926, + "valid_targets_mean": 15215.2, + "valid_targets_min": 6385 + }, + { + "epoch": 3.946695095948827, + "grad_norm": 0.25163106173324307, + "learning_rate": 5.231605793715348e-06, + "loss": 0.9589823484420776, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22687679529190063, + "step": 927, + "valid_targets_mean": 14792.0, + "valid_targets_min": 6755 + }, + { + "epoch": 3.950959488272921, + "grad_norm": 0.24040509681493633, + "learning_rate": 5.191585823251335e-06, + "loss": 0.9927637577056885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22231948375701904, + "step": 928, + "valid_targets_mean": 14103.3, + "valid_targets_min": 1337 + }, + { + "epoch": 3.955223880597015, + "grad_norm": 0.22924082425507014, + "learning_rate": 5.151696667925348e-06, + "loss": 0.9865278005599976, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25896745920181274, + "step": 929, + "valid_targets_mean": 14469.6, + "valid_targets_min": 2411 + }, + { + "epoch": 3.9594882729211087, + "grad_norm": 0.19879146511999657, + "learning_rate": 5.111938680111732e-06, + "loss": 0.9674332737922668, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2582390010356903, + "step": 930, + "valid_targets_mean": 14435.3, + "valid_targets_min": 2399 + }, + { + "epoch": 3.9637526652452024, + "grad_norm": 0.22126743586442613, + "learning_rate": 5.072312211026125e-06, + "loss": 1.012916922569275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2825927734375, + "step": 931, + "valid_targets_mean": 14951.0, + "valid_targets_min": 1843 + }, + { + "epoch": 3.9680170575692966, + "grad_norm": 0.21867166536576235, + "learning_rate": 5.032817610722369e-06, + "loss": 0.9658184051513672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23165291547775269, + "step": 932, + "valid_targets_mean": 14676.0, + "valid_targets_min": 5449 + }, + { + "epoch": 3.9722814498933903, + "grad_norm": 0.22364207275323592, + "learning_rate": 4.993455228089366e-06, + "loss": 1.0015931129455566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24776685237884521, + "step": 933, + "valid_targets_mean": 14040.2, + "valid_targets_min": 2181 + }, + { + "epoch": 3.976545842217484, + "grad_norm": 0.2042925699256337, + "learning_rate": 4.954225410848048e-06, + "loss": 1.0132436752319336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2558859586715698, + "step": 934, + "valid_targets_mean": 14669.7, + "valid_targets_min": 6432 + }, + { + "epoch": 3.9808102345415777, + "grad_norm": 0.22223779728157847, + "learning_rate": 4.915128505548284e-06, + "loss": 1.002284288406372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2467479109764099, + "step": 935, + "valid_targets_mean": 14697.2, + "valid_targets_min": 1573 + }, + { + "epoch": 3.9850746268656714, + "grad_norm": 0.22124383489771185, + "learning_rate": 4.8761648575658145e-06, + "loss": 0.9923639297485352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2363635003566742, + "step": 936, + "valid_targets_mean": 14532.8, + "valid_targets_min": 3243 + }, + { + "epoch": 3.9893390191897655, + "grad_norm": 0.21530875229060228, + "learning_rate": 4.837334811099217e-06, + "loss": 0.9767214059829712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2520718276500702, + "step": 937, + "valid_targets_mean": 14174.2, + "valid_targets_min": 3072 + }, + { + "epoch": 3.9936034115138592, + "grad_norm": 0.21172490339949648, + "learning_rate": 4.7986387091668365e-06, + "loss": 0.9671895503997803, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24724167585372925, + "step": 938, + "valid_targets_mean": 14576.5, + "valid_targets_min": 6313 + }, + { + "epoch": 3.997867803837953, + "grad_norm": 0.22865146944208778, + "learning_rate": 4.760076893603791e-06, + "loss": 0.9730648994445801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24118828773498535, + "step": 939, + "valid_targets_mean": 14682.5, + "valid_targets_min": 3668 + }, + { + "epoch": 4.0, + "grad_norm": 0.26952511801055457, + "learning_rate": 4.721649705058926e-06, + "loss": 0.9118285179138184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4505543112754822, + "step": 940, + "valid_targets_mean": 15165.0, + "valid_targets_min": 6436 + }, + { + "epoch": 4.004264392324094, + "grad_norm": 0.2598615455809434, + "learning_rate": 4.683357482991819e-06, + "loss": 1.0042850971221924, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2683258354663849, + "step": 941, + "valid_targets_mean": 14863.8, + "valid_targets_min": 2691 + }, + { + "epoch": 4.008528784648187, + "grad_norm": 0.2496957929931509, + "learning_rate": 4.645200565669776e-06, + "loss": 1.0006030797958374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2577746510505676, + "step": 942, + "valid_targets_mean": 15028.4, + "valid_targets_min": 3047 + }, + { + "epoch": 4.0127931769722816, + "grad_norm": 0.21150850214146644, + "learning_rate": 4.607179290164823e-06, + "loss": 1.0080355405807495, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23704233765602112, + "step": 943, + "valid_targets_mean": 14554.3, + "valid_targets_min": 3704 + }, + { + "epoch": 4.017057569296376, + "grad_norm": 0.2375880728968116, + "learning_rate": 4.569293992350783e-06, + "loss": 0.9192696213722229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22098088264465332, + "step": 944, + "valid_targets_mean": 14898.5, + "valid_targets_min": 2985 + }, + { + "epoch": 4.021321961620469, + "grad_norm": 0.2206007314421585, + "learning_rate": 4.531545006900244e-06, + "loss": 0.9563088417053223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2254941761493683, + "step": 945, + "valid_targets_mean": 14250.9, + "valid_targets_min": 2251 + }, + { + "epoch": 4.025586353944563, + "grad_norm": 0.23986481760882775, + "learning_rate": 4.493932667281646e-06, + "loss": 0.9235143661499023, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.213372141122818, + "step": 946, + "valid_targets_mean": 15698.6, + "valid_targets_min": 11052 + }, + { + "epoch": 4.029850746268656, + "grad_norm": 0.2214768351800122, + "learning_rate": 4.456457305756321e-06, + "loss": 0.9056645631790161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2257278859615326, + "step": 947, + "valid_targets_mean": 14185.8, + "valid_targets_min": 1860 + }, + { + "epoch": 4.0341151385927505, + "grad_norm": 0.23103840555371336, + "learning_rate": 4.419119253375557e-06, + "loss": 1.0127660036087036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2503008544445038, + "step": 948, + "valid_targets_mean": 14074.9, + "valid_targets_min": 2264 + }, + { + "epoch": 4.038379530916845, + "grad_norm": 0.249133136138053, + "learning_rate": 4.381918839977675e-06, + "loss": 1.013496994972229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24352091550827026, + "step": 949, + "valid_targets_mean": 14197.3, + "valid_targets_min": 4327 + }, + { + "epoch": 4.042643923240938, + "grad_norm": 0.2466218442362566, + "learning_rate": 4.344856394185122e-06, + "loss": 0.9960319995880127, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23479261994361877, + "step": 950, + "valid_targets_mean": 14938.5, + "valid_targets_min": 5877 + }, + { + "epoch": 4.046908315565032, + "grad_norm": 0.22403008035660443, + "learning_rate": 4.307932243401538e-06, + "loss": 0.9637834429740906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22966812551021576, + "step": 951, + "valid_targets_mean": 14600.7, + "valid_targets_min": 1397 + }, + { + "epoch": 4.051172707889126, + "grad_norm": 0.2214812448472233, + "learning_rate": 4.271146713808927e-06, + "loss": 0.9625729322433472, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22703179717063904, + "step": 952, + "valid_targets_mean": 13891.4, + "valid_targets_min": 6092 + }, + { + "epoch": 4.0554371002132195, + "grad_norm": 0.23835313549674086, + "learning_rate": 4.234500130364698e-06, + "loss": 0.9672271013259888, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22592604160308838, + "step": 953, + "valid_targets_mean": 13705.8, + "valid_targets_min": 3021 + }, + { + "epoch": 4.059701492537314, + "grad_norm": 0.2431168490906322, + "learning_rate": 4.197992816798851e-06, + "loss": 0.9610116481781006, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24816054105758667, + "step": 954, + "valid_targets_mean": 15000.9, + "valid_targets_min": 9098 + }, + { + "epoch": 4.063965884861407, + "grad_norm": 0.21649784955636678, + "learning_rate": 4.161625095611101e-06, + "loss": 0.9844383001327515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22619038820266724, + "step": 955, + "valid_targets_mean": 13627.3, + "valid_targets_min": 2041 + }, + { + "epoch": 4.068230277185501, + "grad_norm": 0.21242720170627966, + "learning_rate": 4.125397288068007e-06, + "loss": 0.9343520998954773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2527115046977997, + "step": 956, + "valid_targets_mean": 15573.6, + "valid_targets_min": 8774 + }, + { + "epoch": 4.072494669509595, + "grad_norm": 0.23885201853505186, + "learning_rate": 4.089309714200187e-06, + "loss": 0.9805846214294434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23342227935791016, + "step": 957, + "valid_targets_mean": 14154.8, + "valid_targets_min": 4403 + }, + { + "epoch": 4.076759061833688, + "grad_norm": 0.2409175417470154, + "learning_rate": 4.0533626927994185e-06, + "loss": 0.9530068635940552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22673815488815308, + "step": 958, + "valid_targets_mean": 14209.4, + "valid_targets_min": 1429 + }, + { + "epoch": 4.081023454157783, + "grad_norm": 0.20095032940579705, + "learning_rate": 4.017556541415888e-06, + "loss": 0.9678086042404175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24230854213237762, + "step": 959, + "valid_targets_mean": 14519.5, + "valid_targets_min": 4361 + }, + { + "epoch": 4.085287846481877, + "grad_norm": 0.20738540785399912, + "learning_rate": 3.981891576355352e-06, + "loss": 0.9462642073631287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.256551057100296, + "step": 960, + "valid_targets_mean": 15968.7, + "valid_targets_min": 11170 + }, + { + "epoch": 4.08955223880597, + "grad_norm": 0.21889372697262374, + "learning_rate": 3.946368112676346e-06, + "loss": 0.9328886270523071, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23367977142333984, + "step": 961, + "valid_targets_mean": 14337.7, + "valid_targets_min": 5469 + }, + { + "epoch": 4.093816631130064, + "grad_norm": 0.21906660866558375, + "learning_rate": 3.9109864641874166e-06, + "loss": 0.9877804517745972, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2542395293712616, + "step": 962, + "valid_targets_mean": 14935.7, + "valid_targets_min": 8954 + }, + { + "epoch": 4.098081023454157, + "grad_norm": 0.20273824597016718, + "learning_rate": 3.875746943444316e-06, + "loss": 0.9596878290176392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2125156819820404, + "step": 963, + "valid_targets_mean": 14245.0, + "valid_targets_min": 1666 + }, + { + "epoch": 4.1023454157782515, + "grad_norm": 0.21324639176915003, + "learning_rate": 3.840649861747278e-06, + "loss": 0.9820946455001831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25652480125427246, + "step": 964, + "valid_targets_mean": 14939.0, + "valid_targets_min": 5564 + }, + { + "epoch": 4.106609808102346, + "grad_norm": 0.2271780304804354, + "learning_rate": 3.8056955291382667e-06, + "loss": 0.9720626473426819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2259848415851593, + "step": 965, + "valid_targets_mean": 14278.5, + "valid_targets_min": 2855 + }, + { + "epoch": 4.110874200426439, + "grad_norm": 0.21758482051590056, + "learning_rate": 3.7708842543981928e-06, + "loss": 1.0157244205474854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22084775567054749, + "step": 966, + "valid_targets_mean": 13830.6, + "valid_targets_min": 3608 + }, + { + "epoch": 4.115138592750533, + "grad_norm": 0.2178449259177168, + "learning_rate": 3.736216345044237e-06, + "loss": 0.9629996418952942, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2448056936264038, + "step": 967, + "valid_targets_mean": 15243.3, + "valid_targets_min": 9348 + }, + { + "epoch": 4.119402985074627, + "grad_norm": 0.23068970986223994, + "learning_rate": 3.7016921073271084e-06, + "loss": 0.9766483902931213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25123000144958496, + "step": 968, + "valid_targets_mean": 14588.3, + "valid_targets_min": 5330 + }, + { + "epoch": 4.1236673773987205, + "grad_norm": 0.20603833187982268, + "learning_rate": 3.6673118462283453e-06, + "loss": 0.9764162302017212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2223636358976364, + "step": 969, + "valid_targets_mean": 14617.9, + "valid_targets_min": 7151 + }, + { + "epoch": 4.127931769722815, + "grad_norm": 0.21537174550511082, + "learning_rate": 3.6330758654576227e-06, + "loss": 0.987112283706665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2530418634414673, + "step": 970, + "valid_targets_mean": 15085.6, + "valid_targets_min": 5562 + }, + { + "epoch": 4.132196162046908, + "grad_norm": 0.21251058017992036, + "learning_rate": 3.598984467450055e-06, + "loss": 0.931121826171875, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2508315443992615, + "step": 971, + "valid_targets_mean": 15062.1, + "valid_targets_min": 7551 + }, + { + "epoch": 4.136460554371002, + "grad_norm": 0.19532450425042266, + "learning_rate": 3.565037953363546e-06, + "loss": 0.9461972713470459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24638235569000244, + "step": 972, + "valid_targets_mean": 15131.4, + "valid_targets_min": 4479 + }, + { + "epoch": 4.140724946695096, + "grad_norm": 0.21113282093820956, + "learning_rate": 3.5312366230761154e-06, + "loss": 0.9400416016578674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24616287648677826, + "step": 973, + "valid_targets_mean": 14582.4, + "valid_targets_min": 2490 + }, + { + "epoch": 4.144989339019189, + "grad_norm": 0.1978320467183934, + "learning_rate": 3.497580775183258e-06, + "loss": 0.9511521458625793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21084284782409668, + "step": 974, + "valid_targets_mean": 13831.5, + "valid_targets_min": 3354 + }, + { + "epoch": 4.149253731343284, + "grad_norm": 0.2010512956277587, + "learning_rate": 3.464070706995295e-06, + "loss": 1.0143787860870361, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2493945062160492, + "step": 975, + "valid_targets_mean": 15031.1, + "valid_targets_min": 2622 + }, + { + "epoch": 4.153518123667378, + "grad_norm": 0.2171565984670613, + "learning_rate": 3.4307067145347417e-06, + "loss": 0.9798712730407715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22808438539505005, + "step": 976, + "valid_targets_mean": 13718.6, + "valid_targets_min": 2764 + }, + { + "epoch": 4.157782515991471, + "grad_norm": 0.2100321812880822, + "learning_rate": 3.397489092533739e-06, + "loss": 0.9285037517547607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2307242751121521, + "step": 977, + "valid_targets_mean": 15139.5, + "valid_targets_min": 4248 + }, + { + "epoch": 4.162046908315565, + "grad_norm": 0.19920637644670364, + "learning_rate": 3.364418134431371e-06, + "loss": 0.9553232192993164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24909673631191254, + "step": 978, + "valid_targets_mean": 14692.8, + "valid_targets_min": 5739 + }, + { + "epoch": 4.166311300639659, + "grad_norm": 0.2098162057936045, + "learning_rate": 3.331494132371149e-06, + "loss": 1.0109907388687134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2588352560997009, + "step": 979, + "valid_targets_mean": 14593.1, + "valid_targets_min": 6091 + }, + { + "epoch": 4.1705756929637525, + "grad_norm": 0.20353019856800333, + "learning_rate": 3.2987173771983816e-06, + "loss": 0.9764183759689331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2452232539653778, + "step": 980, + "valid_targets_mean": 13589.6, + "valid_targets_min": 1670 + }, + { + "epoch": 4.174840085287847, + "grad_norm": 0.21252727700791268, + "learning_rate": 3.266088158457634e-06, + "loss": 0.994144856929779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23629000782966614, + "step": 981, + "valid_targets_mean": 15645.4, + "valid_targets_min": 1730 + }, + { + "epoch": 4.17910447761194, + "grad_norm": 0.20962826973153406, + "learning_rate": 3.233606764390147e-06, + "loss": 0.9840140342712402, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26156654953956604, + "step": 982, + "valid_targets_mean": 14633.2, + "valid_targets_min": 2488 + }, + { + "epoch": 4.183368869936034, + "grad_norm": 0.20671678278867006, + "learning_rate": 3.2012734819313127e-06, + "loss": 0.9591784477233887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2380722612142563, + "step": 983, + "valid_targets_mean": 14881.7, + "valid_targets_min": 2565 + }, + { + "epoch": 4.187633262260128, + "grad_norm": 0.20511799080332893, + "learning_rate": 3.1690885967081187e-06, + "loss": 0.9503059387207031, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23584482073783875, + "step": 984, + "valid_targets_mean": 14258.9, + "valid_targets_min": 4410 + }, + { + "epoch": 4.1918976545842215, + "grad_norm": 0.21463917773699412, + "learning_rate": 3.1370523930366393e-06, + "loss": 1.0117886066436768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27862632274627686, + "step": 985, + "valid_targets_mean": 15395.7, + "valid_targets_min": 7915 + }, + { + "epoch": 4.196162046908316, + "grad_norm": 0.2093726292598158, + "learning_rate": 3.105165153919525e-06, + "loss": 0.9581783413887024, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24928846955299377, + "step": 986, + "valid_targets_mean": 14460.1, + "valid_targets_min": 7483 + }, + { + "epoch": 4.20042643923241, + "grad_norm": 0.2222992030682058, + "learning_rate": 3.073427161043492e-06, + "loss": 0.9946070313453674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24092058837413788, + "step": 987, + "valid_targets_mean": 14355.3, + "valid_targets_min": 1923 + }, + { + "epoch": 4.204690831556503, + "grad_norm": 0.20625953296432958, + "learning_rate": 3.0418386947768463e-06, + "loss": 0.9738461971282959, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.262671560049057, + "step": 988, + "valid_targets_mean": 15095.2, + "valid_targets_min": 1015 + }, + { + "epoch": 4.208955223880597, + "grad_norm": 0.19665303464106015, + "learning_rate": 3.01040003416698e-06, + "loss": 0.9926748275756836, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28288888931274414, + "step": 989, + "valid_targets_mean": 15240.2, + "valid_targets_min": 11135 + }, + { + "epoch": 4.21321961620469, + "grad_norm": 0.25115515156728374, + "learning_rate": 2.97911145693796e-06, + "loss": 0.9960339069366455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2687056064605713, + "step": 990, + "valid_targets_mean": 14847.2, + "valid_targets_min": 2936 + }, + { + "epoch": 4.217484008528785, + "grad_norm": 0.2106643095843318, + "learning_rate": 2.947973239488009e-06, + "loss": 0.9805059432983398, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2532857656478882, + "step": 991, + "valid_targets_mean": 14551.3, + "valid_targets_min": 2467 + }, + { + "epoch": 4.221748400852879, + "grad_norm": 0.21502802400470067, + "learning_rate": 2.91698565688711e-06, + "loss": 0.9603044986724854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2556474506855011, + "step": 992, + "valid_targets_mean": 14250.3, + "valid_targets_min": 1495 + }, + { + "epoch": 4.226012793176972, + "grad_norm": 0.20760424832292068, + "learning_rate": 2.886148982874566e-06, + "loss": 0.9346092939376831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25037434697151184, + "step": 993, + "valid_targets_mean": 14932.4, + "valid_targets_min": 5263 + }, + { + "epoch": 4.230277185501066, + "grad_norm": 0.1998437758947691, + "learning_rate": 2.8554634898565668e-06, + "loss": 0.9752610325813293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23575522005558014, + "step": 994, + "valid_targets_mean": 14343.8, + "valid_targets_min": 2517 + }, + { + "epoch": 4.23454157782516, + "grad_norm": 0.21425012639834637, + "learning_rate": 2.824929448903806e-06, + "loss": 0.9946762323379517, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22817380726337433, + "step": 995, + "valid_targets_mean": 15112.9, + "valid_targets_min": 4219 + }, + { + "epoch": 4.2388059701492535, + "grad_norm": 0.20532225386032876, + "learning_rate": 2.794547129749059e-06, + "loss": 0.9420288801193237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2333347499370575, + "step": 996, + "valid_targets_mean": 15241.3, + "valid_targets_min": 4205 + }, + { + "epoch": 4.243070362473348, + "grad_norm": 0.21482359658075506, + "learning_rate": 2.7643168007848255e-06, + "loss": 0.9856697916984558, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23364725708961487, + "step": 997, + "valid_targets_mean": 13191.8, + "valid_targets_min": 1469 + }, + { + "epoch": 4.247334754797441, + "grad_norm": 0.19816002866144786, + "learning_rate": 2.734238729060956e-06, + "loss": 0.9388452172279358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23158328235149384, + "step": 998, + "valid_targets_mean": 14693.8, + "valid_targets_min": 6332 + }, + { + "epoch": 4.251599147121535, + "grad_norm": 0.19331845274108647, + "learning_rate": 2.7043131802822653e-06, + "loss": 0.9250015020370483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23296083509922028, + "step": 999, + "valid_targets_mean": 14107.6, + "valid_targets_min": 1448 + }, + { + "epoch": 4.255863539445629, + "grad_norm": 0.2201391523017833, + "learning_rate": 2.674540418806222e-06, + "loss": 0.9823698401451111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22890284657478333, + "step": 1000, + "valid_targets_mean": 13355.3, + "valid_targets_min": 1938 + }, + { + "epoch": 4.2601279317697225, + "grad_norm": 0.2032741949241354, + "learning_rate": 2.6449207076405857e-06, + "loss": 0.9904990792274475, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25959524512290955, + "step": 1001, + "valid_targets_mean": 13930.4, + "valid_targets_min": 3569 + }, + { + "epoch": 4.264392324093817, + "grad_norm": 0.19940973362259065, + "learning_rate": 2.6154543084411035e-06, + "loss": 0.9995609521865845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23636648058891296, + "step": 1002, + "valid_targets_mean": 13908.2, + "valid_targets_min": 3123 + }, + { + "epoch": 4.268656716417911, + "grad_norm": 0.18849042085274592, + "learning_rate": 2.5861414815091834e-06, + "loss": 0.9617021083831787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23850838840007782, + "step": 1003, + "valid_targets_mean": 14161.7, + "valid_targets_min": 3380 + }, + { + "epoch": 4.272921108742004, + "grad_norm": 0.19094650959348478, + "learning_rate": 2.5569824857895987e-06, + "loss": 0.9622359275817871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2301647961139679, + "step": 1004, + "valid_targets_mean": 15462.8, + "valid_targets_min": 8361 + }, + { + "epoch": 4.277185501066098, + "grad_norm": 0.20207137277949228, + "learning_rate": 2.5279775788682083e-06, + "loss": 0.9702969789505005, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22978103160858154, + "step": 1005, + "valid_targets_mean": 13786.1, + "valid_targets_min": 1132 + }, + { + "epoch": 4.281449893390192, + "grad_norm": 0.1980151959217989, + "learning_rate": 2.499127016969671e-06, + "loss": 0.997840404510498, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2729083299636841, + "step": 1006, + "valid_targets_mean": 15124.5, + "valid_targets_min": 7044 + }, + { + "epoch": 4.285714285714286, + "grad_norm": 0.19261362795010986, + "learning_rate": 2.4704310549551934e-06, + "loss": 0.9802528023719788, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24483956396579742, + "step": 1007, + "valid_targets_mean": 14889.1, + "valid_targets_min": 2810 + }, + { + "epoch": 4.28997867803838, + "grad_norm": 0.19177431216588642, + "learning_rate": 2.441889946320266e-06, + "loss": 0.9607895612716675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22158975899219513, + "step": 1008, + "valid_targets_mean": 14288.4, + "valid_targets_min": 790 + }, + { + "epoch": 4.294243070362473, + "grad_norm": 0.20398516323125138, + "learning_rate": 2.4135039431924233e-06, + "loss": 1.0122029781341553, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23977097868919373, + "step": 1009, + "valid_targets_mean": 14440.8, + "valid_targets_min": 2303 + }, + { + "epoch": 4.298507462686567, + "grad_norm": 0.19793787015176892, + "learning_rate": 2.3852732963290426e-06, + "loss": 0.9613388180732727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24488984048366547, + "step": 1010, + "valid_targets_mean": 15772.8, + "valid_targets_min": 11202 + }, + { + "epoch": 4.302771855010661, + "grad_norm": 0.19676291396618317, + "learning_rate": 2.3571982551150853e-06, + "loss": 0.9711207151412964, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2269301414489746, + "step": 1011, + "valid_targets_mean": 14957.4, + "valid_targets_min": 1896 + }, + { + "epoch": 4.3070362473347545, + "grad_norm": 0.203833072396682, + "learning_rate": 2.329279067560937e-06, + "loss": 0.95209801197052, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23200687766075134, + "step": 1012, + "valid_targets_mean": 15725.0, + "valid_targets_min": 10802 + }, + { + "epoch": 4.311300639658849, + "grad_norm": 0.1964883187880883, + "learning_rate": 2.301515980300182e-06, + "loss": 1.010817289352417, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24898159503936768, + "step": 1013, + "valid_targets_mean": 14878.5, + "valid_targets_min": 4121 + }, + { + "epoch": 4.315565031982943, + "grad_norm": 0.1898322181946224, + "learning_rate": 2.2739092385874527e-06, + "loss": 0.9674122333526611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24440470337867737, + "step": 1014, + "valid_targets_mean": 15091.1, + "valid_targets_min": 9708 + }, + { + "epoch": 4.319829424307036, + "grad_norm": 0.19301254137228901, + "learning_rate": 2.2464590862962443e-06, + "loss": 1.007246494293213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25044572353363037, + "step": 1015, + "valid_targets_mean": 15506.5, + "valid_targets_min": 11439 + }, + { + "epoch": 4.32409381663113, + "grad_norm": 0.20822308446025029, + "learning_rate": 2.219165765916769e-06, + "loss": 0.9655307531356812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2484723925590515, + "step": 1016, + "valid_targets_mean": 13985.8, + "valid_targets_min": 5122 + }, + { + "epoch": 4.3283582089552235, + "grad_norm": 0.1934116882562992, + "learning_rate": 2.192029518553798e-06, + "loss": 0.9374470710754395, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24636460840702057, + "step": 1017, + "valid_targets_mean": 14612.4, + "valid_targets_min": 2367 + }, + { + "epoch": 4.332622601279318, + "grad_norm": 0.1882233968320926, + "learning_rate": 2.165050583924566e-06, + "loss": 0.9549652338027954, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24371199309825897, + "step": 1018, + "valid_targets_mean": 14470.8, + "valid_targets_min": 1843 + }, + { + "epoch": 4.336886993603412, + "grad_norm": 0.19491440983190494, + "learning_rate": 2.1382292003566163e-06, + "loss": 0.9814637899398804, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24736478924751282, + "step": 1019, + "valid_targets_mean": 14428.2, + "valid_targets_min": 2854 + }, + { + "epoch": 4.341151385927505, + "grad_norm": 0.19348582439512307, + "learning_rate": 2.1115656047857213e-06, + "loss": 0.9029624462127686, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22049419581890106, + "step": 1020, + "valid_targets_mean": 14133.2, + "valid_targets_min": 6151 + }, + { + "epoch": 4.345415778251599, + "grad_norm": 0.19689546268055425, + "learning_rate": 2.0850600327537806e-06, + "loss": 0.9489182233810425, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23130947351455688, + "step": 1021, + "valid_targets_mean": 13676.0, + "valid_targets_min": 1755 + }, + { + "epoch": 4.349680170575693, + "grad_norm": 0.21680025079180115, + "learning_rate": 2.058712718406719e-06, + "loss": 1.0014092922210693, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24298085272312164, + "step": 1022, + "valid_targets_mean": 14311.4, + "valid_targets_min": 3247 + }, + { + "epoch": 4.353944562899787, + "grad_norm": 0.19883892276598125, + "learning_rate": 2.032523894492471e-06, + "loss": 1.0021915435791016, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2545217275619507, + "step": 1023, + "valid_targets_mean": 15110.4, + "valid_targets_min": 1532 + }, + { + "epoch": 4.358208955223881, + "grad_norm": 0.19658289092130304, + "learning_rate": 2.0064937923588634e-06, + "loss": 1.0034961700439453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25181353092193604, + "step": 1024, + "valid_targets_mean": 14426.9, + "valid_targets_min": 5173 + }, + { + "epoch": 4.362473347547974, + "grad_norm": 0.20280454994094435, + "learning_rate": 1.9806226419516195e-06, + "loss": 0.9881649017333984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2471427172422409, + "step": 1025, + "valid_targets_mean": 14459.1, + "valid_targets_min": 4839 + }, + { + "epoch": 4.366737739872068, + "grad_norm": 0.19758701399656536, + "learning_rate": 1.954910671812298e-06, + "loss": 0.9592493772506714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22283393144607544, + "step": 1026, + "valid_targets_mean": 14732.3, + "valid_targets_min": 1356 + }, + { + "epoch": 4.371002132196162, + "grad_norm": 0.20655855190403255, + "learning_rate": 1.9293581090762894e-06, + "loss": 0.9723584651947021, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2688632011413574, + "step": 1027, + "valid_targets_mean": 14489.7, + "valid_targets_min": 6514 + }, + { + "epoch": 4.3752665245202556, + "grad_norm": 0.18928942678209912, + "learning_rate": 1.9039651794708058e-06, + "loss": 0.9399305582046509, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2241872400045395, + "step": 1028, + "valid_targets_mean": 14407.0, + "valid_targets_min": 4268 + }, + { + "epoch": 4.37953091684435, + "grad_norm": 0.19666747835129786, + "learning_rate": 1.8787321073128817e-06, + "loss": 0.9942531585693359, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27074459195137024, + "step": 1029, + "valid_targets_mean": 15288.0, + "valid_targets_min": 11404 + }, + { + "epoch": 4.383795309168444, + "grad_norm": 0.2101409121194208, + "learning_rate": 1.8536591155073958e-06, + "loss": 1.0341134071350098, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24674727022647858, + "step": 1030, + "valid_targets_mean": 14246.6, + "valid_targets_min": 2476 + }, + { + "epoch": 4.388059701492537, + "grad_norm": 0.18631607942909864, + "learning_rate": 1.8287464255451181e-06, + "loss": 0.9730774164199829, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23140370845794678, + "step": 1031, + "valid_targets_mean": 14085.7, + "valid_targets_min": 4573 + }, + { + "epoch": 4.392324093816631, + "grad_norm": 0.19758703749563059, + "learning_rate": 1.803994257500714e-06, + "loss": 0.9657065272331238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2400664985179901, + "step": 1032, + "valid_targets_mean": 14661.8, + "valid_targets_min": 2977 + }, + { + "epoch": 4.396588486140725, + "grad_norm": 0.19551350846462764, + "learning_rate": 1.7794028300308474e-06, + "loss": 0.9861183762550354, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2520785927772522, + "step": 1033, + "valid_targets_mean": 14641.4, + "valid_targets_min": 1570 + }, + { + "epoch": 4.400852878464819, + "grad_norm": 0.19925587301074782, + "learning_rate": 1.7549723603722003e-06, + "loss": 0.9705492258071899, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24231970310211182, + "step": 1034, + "valid_targets_mean": 15059.8, + "valid_targets_min": 4959 + }, + { + "epoch": 4.405117270788913, + "grad_norm": 0.19374055288057854, + "learning_rate": 1.730703064339605e-06, + "loss": 0.9851311445236206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25250244140625, + "step": 1035, + "valid_targets_mean": 15045.9, + "valid_targets_min": 6417 + }, + { + "epoch": 4.409381663113006, + "grad_norm": 0.19565910507434792, + "learning_rate": 1.7065951563241022e-06, + "loss": 0.9457612037658691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2272084653377533, + "step": 1036, + "valid_targets_mean": 14376.1, + "valid_targets_min": 2529 + }, + { + "epoch": 4.4136460554371, + "grad_norm": 0.20118356334045073, + "learning_rate": 1.682648849291051e-06, + "loss": 1.0145244598388672, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23717108368873596, + "step": 1037, + "valid_targets_mean": 14745.7, + "valid_targets_min": 4989 + }, + { + "epoch": 4.417910447761194, + "grad_norm": 0.21430267101742012, + "learning_rate": 1.6588643547782579e-06, + "loss": 1.0064364671707153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.246231347322464, + "step": 1038, + "valid_targets_mean": 14692.8, + "valid_targets_min": 6686 + }, + { + "epoch": 4.422174840085288, + "grad_norm": 0.21878191303956324, + "learning_rate": 1.6352418828941052e-06, + "loss": 0.9757519960403442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23531365394592285, + "step": 1039, + "valid_targets_mean": 14086.5, + "valid_targets_min": 5272 + }, + { + "epoch": 4.426439232409382, + "grad_norm": 0.20380700736786128, + "learning_rate": 1.6117816423156952e-06, + "loss": 0.9778440594673157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25534456968307495, + "step": 1040, + "valid_targets_mean": 14957.0, + "valid_targets_min": 8445 + }, + { + "epoch": 4.430703624733475, + "grad_norm": 0.19698958768813593, + "learning_rate": 1.5884838402870029e-06, + "loss": 0.9737083911895752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2419891208410263, + "step": 1041, + "valid_targets_mean": 14819.6, + "valid_targets_min": 1627 + }, + { + "epoch": 4.434968017057569, + "grad_norm": 0.19579630291077935, + "learning_rate": 1.5653486826170384e-06, + "loss": 0.98288494348526, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24798744916915894, + "step": 1042, + "valid_targets_mean": 14243.4, + "valid_targets_min": 2334 + }, + { + "epoch": 4.439232409381663, + "grad_norm": 0.1908871461023108, + "learning_rate": 1.5423763736780583e-06, + "loss": 0.9392582774162292, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22092154622077942, + "step": 1043, + "valid_targets_mean": 13959.9, + "valid_targets_min": 2249 + }, + { + "epoch": 4.443496801705757, + "grad_norm": 0.19569440179212702, + "learning_rate": 1.5195671164037173e-06, + "loss": 0.9626212120056152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25484955310821533, + "step": 1044, + "valid_targets_mean": 14064.9, + "valid_targets_min": 934 + }, + { + "epoch": 4.447761194029851, + "grad_norm": 0.2062871958262136, + "learning_rate": 1.496921112287315e-06, + "loss": 0.9724099636077881, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24300584197044373, + "step": 1045, + "valid_targets_mean": 15046.3, + "valid_targets_min": 6694 + }, + { + "epoch": 4.452025586353945, + "grad_norm": 0.193355511893332, + "learning_rate": 1.4744385613799894e-06, + "loss": 0.9834390878677368, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2343788743019104, + "step": 1046, + "valid_targets_mean": 14326.0, + "valid_targets_min": 6455 + }, + { + "epoch": 4.456289978678038, + "grad_norm": 0.19879252661733948, + "learning_rate": 1.4521196622889644e-06, + "loss": 0.9382596611976624, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2293034791946411, + "step": 1047, + "valid_targets_mean": 13610.3, + "valid_targets_min": 2201 + }, + { + "epoch": 4.460554371002132, + "grad_norm": 0.19757281174151833, + "learning_rate": 1.4299646121757892e-06, + "loss": 0.9946603178977966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2413906753063202, + "step": 1048, + "valid_targets_mean": 15320.3, + "valid_targets_min": 6751 + }, + { + "epoch": 4.464818763326226, + "grad_norm": 0.18561266334145715, + "learning_rate": 1.4079736067545912e-06, + "loss": 0.9563462734222412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24462030827999115, + "step": 1049, + "valid_targets_mean": 15228.7, + "valid_targets_min": 6663 + }, + { + "epoch": 4.46908315565032, + "grad_norm": 0.19151475827533837, + "learning_rate": 1.3861468402903634e-06, + "loss": 0.9560338258743286, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2419666349887848, + "step": 1050, + "valid_targets_mean": 14435.8, + "valid_targets_min": 2343 + }, + { + "epoch": 4.473347547974414, + "grad_norm": 0.1987630047053824, + "learning_rate": 1.3644845055972322e-06, + "loss": 0.9236841201782227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21292690932750702, + "step": 1051, + "valid_targets_mean": 14298.3, + "valid_targets_min": 5524 + }, + { + "epoch": 4.477611940298507, + "grad_norm": 0.19194131819944682, + "learning_rate": 1.3429867940367626e-06, + "loss": 0.9673594832420349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2729540169239044, + "step": 1052, + "valid_targets_mean": 15544.0, + "valid_targets_min": 8747 + }, + { + "epoch": 4.481876332622601, + "grad_norm": 0.192812446226727, + "learning_rate": 1.321653895516264e-06, + "loss": 0.9610645771026611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22623619437217712, + "step": 1053, + "valid_targets_mean": 14384.1, + "valid_targets_min": 2210 + }, + { + "epoch": 4.486140724946695, + "grad_norm": 0.19921613895103593, + "learning_rate": 1.3004859984871199e-06, + "loss": 0.9949779510498047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.238198384642601, + "step": 1054, + "valid_targets_mean": 14810.5, + "valid_targets_min": 3268 + }, + { + "epoch": 4.490405117270789, + "grad_norm": 0.2165221604049567, + "learning_rate": 1.279483289943102e-06, + "loss": 0.948443591594696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22628512978553772, + "step": 1055, + "valid_targets_mean": 14849.5, + "valid_targets_min": 5249 + }, + { + "epoch": 4.494669509594883, + "grad_norm": 0.20231454197466772, + "learning_rate": 1.2586459554187558e-06, + "loss": 0.9572373032569885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22259369492530823, + "step": 1056, + "valid_targets_mean": 13745.0, + "valid_targets_min": 2013 + }, + { + "epoch": 4.498933901918977, + "grad_norm": 0.18715519061479605, + "learning_rate": 1.2379741789877175e-06, + "loss": 0.9423749446868896, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22759225964546204, + "step": 1057, + "valid_targets_mean": 13998.2, + "valid_targets_min": 1699 + }, + { + "epoch": 4.50319829424307, + "grad_norm": 0.19620227290225017, + "learning_rate": 1.2174681432611245e-06, + "loss": 0.959303617477417, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2522681951522827, + "step": 1058, + "valid_targets_mean": 15055.7, + "valid_targets_min": 5225 + }, + { + "epoch": 4.507462686567164, + "grad_norm": 0.1889099106230078, + "learning_rate": 1.1971280293859811e-06, + "loss": 0.9340790510177612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.261797159910202, + "step": 1059, + "valid_targets_mean": 15610.4, + "valid_targets_min": 8812 + }, + { + "epoch": 4.5117270788912585, + "grad_norm": 0.18480707566845292, + "learning_rate": 1.17695401704357e-06, + "loss": 0.9508934020996094, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23876410722732544, + "step": 1060, + "valid_targets_mean": 14518.6, + "valid_targets_min": 4628 + }, + { + "epoch": 4.515991471215352, + "grad_norm": 0.19695395466616883, + "learning_rate": 1.1569462844478552e-06, + "loss": 0.9892035722732544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2530513107776642, + "step": 1061, + "valid_targets_mean": 14293.0, + "valid_targets_min": 7136 + }, + { + "epoch": 4.520255863539446, + "grad_norm": 0.1882358661033066, + "learning_rate": 1.1371050083439107e-06, + "loss": 0.9638294577598572, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2503116726875305, + "step": 1062, + "valid_targets_mean": 14417.9, + "valid_targets_min": 3406 + }, + { + "epoch": 4.524520255863539, + "grad_norm": 0.20777062678587696, + "learning_rate": 1.1174303640063622e-06, + "loss": 0.9722185134887695, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25119921565055847, + "step": 1063, + "valid_targets_mean": 14916.6, + "valid_targets_min": 8177 + }, + { + "epoch": 4.528784648187633, + "grad_norm": 0.19873176172073242, + "learning_rate": 1.097922525237849e-06, + "loss": 0.991715133190155, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24108682572841644, + "step": 1064, + "valid_targets_mean": 15386.4, + "valid_targets_min": 6365 + }, + { + "epoch": 4.533049040511727, + "grad_norm": 0.19890055604921067, + "learning_rate": 1.078581664367455e-06, + "loss": 0.9921541213989258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.263067364692688, + "step": 1065, + "valid_targets_mean": 14550.4, + "valid_targets_min": 4874 + }, + { + "epoch": 4.537313432835821, + "grad_norm": 0.19636402581022136, + "learning_rate": 1.0594079522492274e-06, + "loss": 0.9642506837844849, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24342098832130432, + "step": 1066, + "valid_targets_mean": 14769.9, + "valid_targets_min": 7369 + }, + { + "epoch": 4.541577825159915, + "grad_norm": 0.18699904506849158, + "learning_rate": 1.040401558260633e-06, + "loss": 0.9328286647796631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22978878021240234, + "step": 1067, + "valid_targets_mean": 15161.9, + "valid_targets_min": 4365 + }, + { + "epoch": 4.545842217484008, + "grad_norm": 0.1986441092033793, + "learning_rate": 1.0215626503010911e-06, + "loss": 0.9636072516441345, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24951043725013733, + "step": 1068, + "valid_targets_mean": 15064.1, + "valid_targets_min": 5965 + }, + { + "epoch": 4.550106609808102, + "grad_norm": 0.1923777508292909, + "learning_rate": 1.002891394790475e-06, + "loss": 0.9635332822799683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2602200210094452, + "step": 1069, + "valid_targets_mean": 15075.1, + "valid_targets_min": 10306 + }, + { + "epoch": 4.554371002132196, + "grad_norm": 0.19845653439920236, + "learning_rate": 9.843879566676273e-07, + "loss": 0.9370394945144653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21623185276985168, + "step": 1070, + "valid_targets_mean": 14604.8, + "valid_targets_min": 7174 + }, + { + "epoch": 4.55863539445629, + "grad_norm": 0.1889495311593519, + "learning_rate": 9.660524993889386e-07, + "loss": 0.9778931736946106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24932819604873657, + "step": 1071, + "valid_targets_mean": 14791.7, + "valid_targets_min": 2852 + }, + { + "epoch": 4.562899786780384, + "grad_norm": 0.1998996367095556, + "learning_rate": 9.478851849268733e-07, + "loss": 1.0181862115859985, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24299973249435425, + "step": 1072, + "valid_targets_mean": 15323.3, + "valid_targets_min": 4432 + }, + { + "epoch": 4.567164179104478, + "grad_norm": 0.1820641069793803, + "learning_rate": 9.298861737685527e-07, + "loss": 0.9861880540847778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25722503662109375, + "step": 1073, + "valid_targets_mean": 15189.2, + "valid_targets_min": 4444 + }, + { + "epoch": 4.571428571428571, + "grad_norm": 0.18153700558157096, + "learning_rate": 9.120556249143341e-07, + "loss": 0.9695085287094116, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25585031509399414, + "step": 1074, + "valid_targets_mean": 15192.9, + "valid_targets_min": 6463 + }, + { + "epoch": 4.575692963752665, + "grad_norm": 0.19006038413199366, + "learning_rate": 8.943936958763988e-07, + "loss": 0.9866265058517456, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.243064284324646, + "step": 1075, + "valid_targets_mean": 14832.0, + "valid_targets_min": 6920 + }, + { + "epoch": 4.5799573560767595, + "grad_norm": 0.19175574211225055, + "learning_rate": 8.769005426773836e-07, + "loss": 0.963053822517395, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25238820910453796, + "step": 1076, + "valid_targets_mean": 14330.1, + "valid_targets_min": 3132 + }, + { + "epoch": 4.584221748400853, + "grad_norm": 0.19962468737797834, + "learning_rate": 8.595763198489714e-07, + "loss": 0.9740028977394104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2691187858581543, + "step": 1077, + "valid_targets_mean": 15214.1, + "valid_targets_min": 2728 + }, + { + "epoch": 4.588486140724947, + "grad_norm": 0.19543672587397726, + "learning_rate": 8.42421180430546e-07, + "loss": 0.9532477855682373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24406878650188446, + "step": 1078, + "valid_targets_mean": 13688.9, + "valid_targets_min": 3217 + }, + { + "epoch": 4.59275053304904, + "grad_norm": 0.19673814011906443, + "learning_rate": 8.254352759678386e-07, + "loss": 0.9715753793716431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24512049555778503, + "step": 1079, + "valid_targets_mean": 14655.3, + "valid_targets_min": 1422 + }, + { + "epoch": 4.597014925373134, + "grad_norm": 0.1933483442932517, + "learning_rate": 8.086187565115877e-07, + "loss": 0.9847027063369751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25676167011260986, + "step": 1080, + "valid_targets_mean": 14677.1, + "valid_targets_min": 6142 + }, + { + "epoch": 4.601279317697228, + "grad_norm": 0.1923172399437775, + "learning_rate": 7.919717706162067e-07, + "loss": 0.9372127056121826, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22452899813652039, + "step": 1081, + "valid_targets_mean": 15138.4, + "valid_targets_min": 4384 + }, + { + "epoch": 4.605543710021322, + "grad_norm": 0.18797774715751278, + "learning_rate": 7.754944653384777e-07, + "loss": 0.9522218704223633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23005110025405884, + "step": 1082, + "valid_targets_mean": 14424.0, + "valid_targets_min": 3680 + }, + { + "epoch": 4.609808102345416, + "grad_norm": 0.20412765681703315, + "learning_rate": 7.591869862362534e-07, + "loss": 0.9851837158203125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2484971135854721, + "step": 1083, + "valid_targets_mean": 13968.2, + "valid_targets_min": 2376 + }, + { + "epoch": 4.61407249466951, + "grad_norm": 0.18705295806126554, + "learning_rate": 7.430494773671682e-07, + "loss": 0.9888862371444702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22050131857395172, + "step": 1084, + "valid_targets_mean": 13599.2, + "valid_targets_min": 1005 + }, + { + "epoch": 4.618336886993603, + "grad_norm": 0.18103862779139593, + "learning_rate": 7.270820812873714e-07, + "loss": 0.9169750809669495, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22809740900993347, + "step": 1085, + "valid_targets_mean": 14565.4, + "valid_targets_min": 3230 + }, + { + "epoch": 4.622601279317697, + "grad_norm": 0.20129642792242997, + "learning_rate": 7.112849390502563e-07, + "loss": 0.9655887484550476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23568880558013916, + "step": 1086, + "valid_targets_mean": 14391.3, + "valid_targets_min": 5144 + }, + { + "epoch": 4.6268656716417915, + "grad_norm": 0.19291335895387385, + "learning_rate": 6.956581902052306e-07, + "loss": 0.9082891941070557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25022265315055847, + "step": 1087, + "valid_targets_mean": 14731.2, + "valid_targets_min": 4783 + }, + { + "epoch": 4.631130063965885, + "grad_norm": 0.18812618459759783, + "learning_rate": 6.802019727964593e-07, + "loss": 0.9885333180427551, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24837151169776917, + "step": 1088, + "valid_targets_mean": 15128.8, + "valid_targets_min": 7616 + }, + { + "epoch": 4.635394456289979, + "grad_norm": 0.18344580845464492, + "learning_rate": 6.64916423361679e-07, + "loss": 0.9694564342498779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2605583667755127, + "step": 1089, + "valid_targets_mean": 15145.5, + "valid_targets_min": 2543 + }, + { + "epoch": 4.639658848614072, + "grad_norm": 0.19009505707846464, + "learning_rate": 6.498016769309567e-07, + "loss": 0.9988362193107605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25263822078704834, + "step": 1090, + "valid_targets_mean": 14835.9, + "valid_targets_min": 6481 + }, + { + "epoch": 4.643923240938166, + "grad_norm": 0.18706541614986036, + "learning_rate": 6.348578670255224e-07, + "loss": 0.9596368074417114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2152118682861328, + "step": 1091, + "valid_targets_mean": 14126.3, + "valid_targets_min": 2377 + }, + { + "epoch": 4.6481876332622605, + "grad_norm": 0.18810915341347045, + "learning_rate": 6.200851256565799e-07, + "loss": 0.9877229928970337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2393847107887268, + "step": 1092, + "valid_targets_mean": 15427.7, + "valid_targets_min": 5751 + }, + { + "epoch": 4.652452025586354, + "grad_norm": 0.19928457189939688, + "learning_rate": 6.054835833241357e-07, + "loss": 1.0273993015289307, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2642330229282379, + "step": 1093, + "valid_targets_mean": 15013.9, + "valid_targets_min": 4037 + }, + { + "epoch": 4.656716417910448, + "grad_norm": 0.1889990280202245, + "learning_rate": 5.910533690158593e-07, + "loss": 0.940179705619812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23338699340820312, + "step": 1094, + "valid_targets_mean": 14892.3, + "valid_targets_min": 3271 + }, + { + "epoch": 4.660980810234541, + "grad_norm": 0.18391492586969269, + "learning_rate": 5.767946102059307e-07, + "loss": 0.9359939694404602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2344275861978531, + "step": 1095, + "valid_targets_mean": 14785.7, + "valid_targets_min": 8805 + }, + { + "epoch": 4.665245202558635, + "grad_norm": 0.19734063165504975, + "learning_rate": 5.627074328539173e-07, + "loss": 0.9584230780601501, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23185734450817108, + "step": 1096, + "valid_targets_mean": 14196.5, + "valid_targets_min": 2432 + }, + { + "epoch": 4.669509594882729, + "grad_norm": 0.18300224884424285, + "learning_rate": 5.487919614036741e-07, + "loss": 0.9888830184936523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24649932980537415, + "step": 1097, + "valid_targets_mean": 14630.5, + "valid_targets_min": 927 + }, + { + "epoch": 4.673773987206823, + "grad_norm": 0.18499557400386807, + "learning_rate": 5.350483187822231e-07, + "loss": 0.9847787618637085, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22201907634735107, + "step": 1098, + "valid_targets_mean": 14386.3, + "valid_targets_min": 2252 + }, + { + "epoch": 4.678038379530917, + "grad_norm": 0.19238368327234123, + "learning_rate": 5.214766263986848e-07, + "loss": 0.9672181606292725, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23221909999847412, + "step": 1099, + "valid_targets_mean": 14210.4, + "valid_targets_min": 4534 + }, + { + "epoch": 4.682302771855011, + "grad_norm": 0.21105431763048013, + "learning_rate": 5.080770041431926e-07, + "loss": 0.9740506410598755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2598978281021118, + "step": 1100, + "valid_targets_mean": 14957.4, + "valid_targets_min": 7315 + }, + { + "epoch": 4.686567164179104, + "grad_norm": 0.19163944687988396, + "learning_rate": 4.948495703858492e-07, + "loss": 1.0037099123001099, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24747025966644287, + "step": 1101, + "valid_targets_mean": 15043.2, + "valid_targets_min": 7809 + }, + { + "epoch": 4.690831556503198, + "grad_norm": 0.2028138879391803, + "learning_rate": 4.81794441975667e-07, + "loss": 0.9758346080780029, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24678604304790497, + "step": 1102, + "valid_targets_mean": 14021.8, + "valid_targets_min": 4164 + }, + { + "epoch": 4.6950959488272925, + "grad_norm": 0.18419384239308548, + "learning_rate": 4.689117342395388e-07, + "loss": 0.9496323466300964, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24128226935863495, + "step": 1103, + "valid_targets_mean": 14995.4, + "valid_targets_min": 7534 + }, + { + "epoch": 4.699360341151386, + "grad_norm": 0.21041750416967223, + "learning_rate": 4.5620156098122204e-07, + "loss": 0.9582512974739075, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24600455164909363, + "step": 1104, + "valid_targets_mean": 15277.9, + "valid_targets_min": 5342 + }, + { + "epoch": 4.70362473347548, + "grad_norm": 0.18497727482724743, + "learning_rate": 4.4366403448033334e-07, + "loss": 0.9745293855667114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.241338849067688, + "step": 1105, + "valid_targets_mean": 14005.7, + "valid_targets_min": 3799 + }, + { + "epoch": 4.707889125799573, + "grad_norm": 0.19615234574871981, + "learning_rate": 4.3129926549136057e-07, + "loss": 0.9649718999862671, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21115264296531677, + "step": 1106, + "valid_targets_mean": 13501.6, + "valid_targets_min": 1542 + }, + { + "epoch": 4.712153518123667, + "grad_norm": 0.1837567434240439, + "learning_rate": 4.191073632426701e-07, + "loss": 0.9577633738517761, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2420632541179657, + "step": 1107, + "valid_targets_mean": 14789.1, + "valid_targets_min": 2956 + }, + { + "epoch": 4.7164179104477615, + "grad_norm": 0.20259924674259924, + "learning_rate": 4.0708843543555643e-07, + "loss": 0.9597268104553223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25918832421302795, + "step": 1108, + "valid_targets_mean": 14971.0, + "valid_targets_min": 7082 + }, + { + "epoch": 4.720682302771855, + "grad_norm": 0.1836845346354727, + "learning_rate": 3.95242588243292e-07, + "loss": 0.9578450322151184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24787406623363495, + "step": 1109, + "valid_targets_mean": 14803.8, + "valid_targets_min": 5534 + }, + { + "epoch": 4.724946695095949, + "grad_norm": 0.1899517621683741, + "learning_rate": 3.8356992631017e-07, + "loss": 0.9749801158905029, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2569088637828827, + "step": 1110, + "valid_targets_mean": 14341.0, + "valid_targets_min": 4140 + }, + { + "epoch": 4.729211087420042, + "grad_norm": 0.18434897929537367, + "learning_rate": 3.720705527506008e-07, + "loss": 0.9631055593490601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22997114062309265, + "step": 1111, + "valid_targets_mean": 14496.9, + "valid_targets_min": 2722 + }, + { + "epoch": 4.733475479744136, + "grad_norm": 0.18472017417978503, + "learning_rate": 3.60744569148197e-07, + "loss": 0.9523324966430664, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25216734409332275, + "step": 1112, + "valid_targets_mean": 15027.6, + "valid_targets_min": 8215 + }, + { + "epoch": 4.73773987206823, + "grad_norm": 0.18397508985131536, + "learning_rate": 3.4959207555485873e-07, + "loss": 0.9814821481704712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24376820027828217, + "step": 1113, + "valid_targets_mean": 14313.0, + "valid_targets_min": 1735 + }, + { + "epoch": 4.742004264392325, + "grad_norm": 0.1911943136789838, + "learning_rate": 3.3861317048992317e-07, + "loss": 0.9813393950462341, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24430274963378906, + "step": 1114, + "valid_targets_mean": 14462.2, + "valid_targets_min": 4500 + }, + { + "epoch": 4.746268656716418, + "grad_norm": 0.1873194680136805, + "learning_rate": 3.278079509392562e-07, + "loss": 0.9734345078468323, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2578297555446625, + "step": 1115, + "valid_targets_mean": 15003.0, + "valid_targets_min": 1331 + }, + { + "epoch": 4.750533049040512, + "grad_norm": 0.1908834484987274, + "learning_rate": 3.171765123544224e-07, + "loss": 0.9803054332733154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26190507411956787, + "step": 1116, + "valid_targets_mean": 14364.8, + "valid_targets_min": 1532 + }, + { + "epoch": 4.754797441364605, + "grad_norm": 0.19667911628591056, + "learning_rate": 3.06718948651834e-07, + "loss": 0.9418925046920776, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24983903765678406, + "step": 1117, + "valid_targets_mean": 15741.3, + "valid_targets_min": 9257 + }, + { + "epoch": 4.759061833688699, + "grad_norm": 0.1857840585589905, + "learning_rate": 2.964353522119168e-07, + "loss": 0.9536681771278381, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23696255683898926, + "step": 1118, + "valid_targets_mean": 14265.1, + "valid_targets_min": 3473 + }, + { + "epoch": 4.7633262260127935, + "grad_norm": 0.18211523776490568, + "learning_rate": 2.863258138783032e-07, + "loss": 0.9456994533538818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2278173565864563, + "step": 1119, + "valid_targets_mean": 15495.7, + "valid_targets_min": 7691 + }, + { + "epoch": 4.767590618336887, + "grad_norm": 0.18772012264434046, + "learning_rate": 2.7639042295702245e-07, + "loss": 0.990449070930481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.255142480134964, + "step": 1120, + "valid_targets_mean": 14624.2, + "valid_targets_min": 2602 + }, + { + "epoch": 4.771855010660981, + "grad_norm": 0.17794325021838514, + "learning_rate": 2.666292672157056e-07, + "loss": 1.004270076751709, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2521766722202301, + "step": 1121, + "valid_targets_mean": 15172.8, + "valid_targets_min": 4005 + }, + { + "epoch": 4.776119402985074, + "grad_norm": 0.18852994886358498, + "learning_rate": 2.570424328828325e-07, + "loss": 1.0067514181137085, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.261335551738739, + "step": 1122, + "valid_targets_mean": 14962.2, + "valid_targets_min": 6855 + }, + { + "epoch": 4.780383795309168, + "grad_norm": 0.18114120226091554, + "learning_rate": 2.4763000464694377e-07, + "loss": 0.9487168192863464, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24179330468177795, + "step": 1123, + "valid_targets_mean": 15655.1, + "valid_targets_min": 7801 + }, + { + "epoch": 4.7846481876332625, + "grad_norm": 0.1910696091037759, + "learning_rate": 2.383920656559102e-07, + "loss": 0.946317195892334, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22754478454589844, + "step": 1124, + "valid_targets_mean": 14220.1, + "valid_targets_min": 1366 + }, + { + "epoch": 4.788912579957356, + "grad_norm": 0.1740135498338563, + "learning_rate": 2.2932869751619568e-07, + "loss": 0.923102855682373, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24337363243103027, + "step": 1125, + "valid_targets_mean": 14937.3, + "valid_targets_min": 4412 + }, + { + "epoch": 4.79317697228145, + "grad_norm": 0.19122995459784278, + "learning_rate": 2.2043998029212643e-07, + "loss": 0.9572536945343018, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2463478446006775, + "step": 1126, + "valid_targets_mean": 14208.7, + "valid_targets_min": 3253 + }, + { + "epoch": 4.797441364605544, + "grad_norm": 0.182450314547117, + "learning_rate": 2.1172599250519398e-07, + "loss": 0.9736548662185669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.228672057390213, + "step": 1127, + "valid_targets_mean": 14014.3, + "valid_targets_min": 4944 + }, + { + "epoch": 4.801705756929637, + "grad_norm": 0.19054240714571696, + "learning_rate": 2.0318681113336013e-07, + "loss": 1.0034470558166504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28851258754730225, + "step": 1128, + "valid_targets_mean": 15012.4, + "valid_targets_min": 7985 + }, + { + "epoch": 4.8059701492537314, + "grad_norm": 0.1814757020638321, + "learning_rate": 1.9482251161037302e-07, + "loss": 0.9663975834846497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2275254726409912, + "step": 1129, + "valid_targets_mean": 15023.7, + "valid_targets_min": 4122 + }, + { + "epoch": 4.810234541577826, + "grad_norm": 0.18317281762468177, + "learning_rate": 1.866331678251032e-07, + "loss": 0.9956861734390259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2160797119140625, + "step": 1130, + "valid_targets_mean": 13762.8, + "valid_targets_min": 2047 + }, + { + "epoch": 4.814498933901919, + "grad_norm": 0.18636653048095259, + "learning_rate": 1.7861885212088869e-07, + "loss": 0.9476275444030762, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2452719658613205, + "step": 1131, + "valid_targets_mean": 15130.3, + "valid_targets_min": 7622 + }, + { + "epoch": 4.818763326226013, + "grad_norm": 0.1870893164405279, + "learning_rate": 1.7077963529490204e-07, + "loss": 0.975817084312439, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2513091564178467, + "step": 1132, + "valid_targets_mean": 14312.8, + "valid_targets_min": 5570 + }, + { + "epoch": 4.823027718550106, + "grad_norm": 0.18847022911078076, + "learning_rate": 1.6311558659751535e-07, + "loss": 0.9581156373023987, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22693569958209991, + "step": 1133, + "valid_targets_mean": 14382.9, + "valid_targets_min": 1264 + }, + { + "epoch": 4.8272921108742, + "grad_norm": 0.18763662045198543, + "learning_rate": 1.5562677373169855e-07, + "loss": 0.9886241555213928, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27293211221694946, + "step": 1134, + "valid_targets_mean": 14403.2, + "valid_targets_min": 2343 + }, + { + "epoch": 4.8315565031982945, + "grad_norm": 0.20077979460564824, + "learning_rate": 1.483132628524131e-07, + "loss": 1.0126842260360718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2457951009273529, + "step": 1135, + "valid_targets_mean": 13454.9, + "valid_targets_min": 1662 + }, + { + "epoch": 4.835820895522388, + "grad_norm": 0.18687809459757265, + "learning_rate": 1.4117511856603262e-07, + "loss": 0.964205801486969, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24054095149040222, + "step": 1136, + "valid_targets_mean": 14659.2, + "valid_targets_min": 7141 + }, + { + "epoch": 4.840085287846482, + "grad_norm": 0.17951977462647573, + "learning_rate": 1.342124039297721e-07, + "loss": 0.9916234016418457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2584913671016693, + "step": 1137, + "valid_targets_mean": 14581.5, + "valid_targets_min": 3649 + }, + { + "epoch": 4.844349680170575, + "grad_norm": 0.1876770296121893, + "learning_rate": 1.2742518045112396e-07, + "loss": 0.9750006198883057, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25414639711380005, + "step": 1138, + "valid_targets_mean": 14580.6, + "valid_targets_min": 4957 + }, + { + "epoch": 4.848614072494669, + "grad_norm": 0.1814102798805498, + "learning_rate": 1.2081350808732518e-07, + "loss": 0.919538676738739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25270211696624756, + "step": 1139, + "valid_targets_mean": 14550.2, + "valid_targets_min": 5470 + }, + { + "epoch": 4.8528784648187635, + "grad_norm": 0.1950808693960726, + "learning_rate": 1.143774452448243e-07, + "loss": 1.0680432319641113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2641150951385498, + "step": 1140, + "valid_targets_mean": 14606.9, + "valid_targets_min": 2001 + }, + { + "epoch": 4.857142857142857, + "grad_norm": 0.17672726951039464, + "learning_rate": 1.0811704877875528e-07, + "loss": 0.9682325124740601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22097748517990112, + "step": 1141, + "valid_targets_mean": 14835.0, + "valid_targets_min": 2783 + }, + { + "epoch": 4.861407249466951, + "grad_norm": 0.1833672772740864, + "learning_rate": 1.0203237399245336e-07, + "loss": 0.9909142851829529, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2378884106874466, + "step": 1142, + "valid_targets_mean": 14631.3, + "valid_targets_min": 3338 + }, + { + "epoch": 4.865671641791045, + "grad_norm": 0.18592331736700526, + "learning_rate": 9.612347463694882e-08, + "loss": 0.9562100172042847, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24233251810073853, + "step": 1143, + "valid_targets_mean": 15098.5, + "valid_targets_min": 3303 + }, + { + "epoch": 4.869936034115138, + "grad_norm": 0.18082248229448622, + "learning_rate": 9.039040291050738e-08, + "loss": 0.9645106792449951, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25318577885627747, + "step": 1144, + "valid_targets_mean": 15200.3, + "valid_targets_min": 3911 + }, + { + "epoch": 4.8742004264392325, + "grad_norm": 0.18353141910274656, + "learning_rate": 8.483320945815499e-08, + "loss": 0.9630197286605835, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24674592912197113, + "step": 1145, + "valid_targets_mean": 15471.6, + "valid_targets_min": 2373 + }, + { + "epoch": 4.878464818763327, + "grad_norm": 0.18176404653949083, + "learning_rate": 7.945194337124262e-08, + "loss": 0.9523903131484985, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24069157242774963, + "step": 1146, + "valid_targets_mean": 15206.4, + "valid_targets_min": 4402 + }, + { + "epoch": 4.88272921108742, + "grad_norm": 0.19201063070974406, + "learning_rate": 7.424665218700444e-08, + "loss": 1.0448331832885742, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2784254550933838, + "step": 1147, + "valid_targets_mean": 15334.1, + "valid_targets_min": 9093 + }, + { + "epoch": 4.886993603411514, + "grad_norm": 0.1783617740470841, + "learning_rate": 6.921738188814254e-08, + "loss": 0.9594995379447937, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2339986264705658, + "step": 1148, + "valid_targets_mean": 14965.5, + "valid_targets_min": 7527 + }, + { + "epoch": 4.891257995735607, + "grad_norm": 0.18561765833213933, + "learning_rate": 6.436417690241614e-08, + "loss": 0.9661248922348022, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21553745865821838, + "step": 1149, + "valid_targets_mean": 13662.2, + "valid_targets_min": 1875 + }, + { + "epoch": 4.895522388059701, + "grad_norm": 0.18503637692193856, + "learning_rate": 5.968708010225532e-08, + "loss": 0.9736925363540649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.268303781747818, + "step": 1150, + "valid_targets_mean": 15144.8, + "valid_targets_min": 9892 + }, + { + "epoch": 4.899786780383796, + "grad_norm": 0.17701878705856514, + "learning_rate": 5.518613280437901e-08, + "loss": 0.9437923431396484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2272908240556717, + "step": 1151, + "valid_targets_mean": 14869.4, + "valid_targets_min": 2024 + }, + { + "epoch": 4.904051172707889, + "grad_norm": 0.181032855927092, + "learning_rate": 5.0861374769426433e-08, + "loss": 0.9633027911186218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2316833883523941, + "step": 1152, + "valid_targets_mean": 13918.9, + "valid_targets_min": 4828 + }, + { + "epoch": 4.908315565031983, + "grad_norm": 0.18263654557472672, + "learning_rate": 4.671284420161071e-08, + "loss": 0.9642020463943481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23549358546733856, + "step": 1153, + "valid_targets_mean": 14218.9, + "valid_targets_min": 4878 + }, + { + "epoch": 4.912579957356077, + "grad_norm": 0.17852479124594317, + "learning_rate": 4.274057774838136e-08, + "loss": 0.9545692205429077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23123939335346222, + "step": 1154, + "valid_targets_mean": 14188.5, + "valid_targets_min": 1968 + }, + { + "epoch": 4.91684434968017, + "grad_norm": 0.1762245137912565, + "learning_rate": 3.894461050010012e-08, + "loss": 0.9212028384208679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19921639561653137, + "step": 1155, + "valid_targets_mean": 13616.8, + "valid_targets_min": 1976 + }, + { + "epoch": 4.9211087420042645, + "grad_norm": 0.1895851250116108, + "learning_rate": 3.5324975989725615e-08, + "loss": 0.9664217233657837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2327403724193573, + "step": 1156, + "valid_targets_mean": 14298.6, + "valid_targets_min": 1530 + }, + { + "epoch": 4.925373134328359, + "grad_norm": 0.17718130578936006, + "learning_rate": 3.188170619252473e-08, + "loss": 0.9604615569114685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24611347913742065, + "step": 1157, + "valid_targets_mean": 15502.1, + "valid_targets_min": 10452 + }, + { + "epoch": 4.929637526652452, + "grad_norm": 0.17461171312398904, + "learning_rate": 2.8614831525786147e-08, + "loss": 0.9696751832962036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23947827517986298, + "step": 1158, + "valid_targets_mean": 15015.1, + "valid_targets_min": 8581 + }, + { + "epoch": 4.933901918976546, + "grad_norm": 0.1958379764084762, + "learning_rate": 2.552438084855613e-08, + "loss": 0.9708175659179688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23773235082626343, + "step": 1159, + "valid_targets_mean": 14752.8, + "valid_targets_min": 3297 + }, + { + "epoch": 4.938166311300639, + "grad_norm": 0.1879404170175882, + "learning_rate": 2.2610381461372068e-08, + "loss": 1.0133110284805298, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2715913951396942, + "step": 1160, + "valid_targets_mean": 15038.8, + "valid_targets_min": 7635 + }, + { + "epoch": 4.9424307036247335, + "grad_norm": 0.1767207674354065, + "learning_rate": 1.987285910603598e-08, + "loss": 0.9827720522880554, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24032725393772125, + "step": 1161, + "valid_targets_mean": 14799.3, + "valid_targets_min": 3960 + }, + { + "epoch": 4.946695095948828, + "grad_norm": 0.19329327315064376, + "learning_rate": 1.7311837965379164e-08, + "loss": 0.981905460357666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2515614926815033, + "step": 1162, + "valid_targets_mean": 15367.5, + "valid_targets_min": 7569 + }, + { + "epoch": 4.950959488272921, + "grad_norm": 0.1801223332568921, + "learning_rate": 1.4927340663046798e-08, + "loss": 0.9775525331497192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.251977801322937, + "step": 1163, + "valid_targets_mean": 14676.9, + "valid_targets_min": 2093 + }, + { + "epoch": 4.955223880597015, + "grad_norm": 0.18051575173508805, + "learning_rate": 1.2719388263300325e-08, + "loss": 0.9846411943435669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23140552639961243, + "step": 1164, + "valid_targets_mean": 13634.5, + "valid_targets_min": 2356 + }, + { + "epoch": 4.959488272921108, + "grad_norm": 0.17883061175729384, + "learning_rate": 1.0688000270839827e-08, + "loss": 1.003123164176941, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2499091625213623, + "step": 1165, + "valid_targets_mean": 14019.7, + "valid_targets_min": 1756 + }, + { + "epoch": 4.963752665245202, + "grad_norm": 0.20200364697082135, + "learning_rate": 8.833194630615271e-09, + "loss": 0.9979058504104614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2524784207344055, + "step": 1166, + "valid_targets_mean": 14548.3, + "valid_targets_min": 7809 + }, + { + "epoch": 4.968017057569297, + "grad_norm": 0.18544115356837396, + "learning_rate": 7.154987727682194e-09, + "loss": 0.9676626920700073, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24133968353271484, + "step": 1167, + "valid_targets_mean": 14866.4, + "valid_targets_min": 4105 + }, + { + "epoch": 4.97228144989339, + "grad_norm": 0.17635779057501313, + "learning_rate": 5.6533943870462625e-09, + "loss": 0.9879981279373169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24625255167484283, + "step": 1168, + "valid_targets_mean": 14967.6, + "valid_targets_min": 3003 + }, + { + "epoch": 4.976545842217484, + "grad_norm": 0.1845314169679861, + "learning_rate": 4.328427873541152e-09, + "loss": 1.0250025987625122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2580295205116272, + "step": 1169, + "valid_targets_mean": 15066.1, + "valid_targets_min": 8759 + }, + { + "epoch": 4.980810234541578, + "grad_norm": 0.1861702009125637, + "learning_rate": 3.1800998917086432e-09, + "loss": 0.9731056094169617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2466808259487152, + "step": 1170, + "valid_targets_mean": 15083.5, + "valid_targets_min": 5835 + }, + { + "epoch": 4.985074626865671, + "grad_norm": 0.17734566957344822, + "learning_rate": 2.2084205856920393e-09, + "loss": 0.9649834632873535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2296658605337143, + "step": 1171, + "valid_targets_mean": 14655.2, + "valid_targets_min": 3045 + }, + { + "epoch": 4.9893390191897655, + "grad_norm": 0.1865006084861443, + "learning_rate": 1.4133985391473482e-09, + "loss": 1.0229482650756836, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2613421678543091, + "step": 1172, + "valid_targets_mean": 14752.4, + "valid_targets_min": 3992 + }, + { + "epoch": 4.99360341151386, + "grad_norm": 0.18027432476731153, + "learning_rate": 7.950407751722288e-10, + "loss": 0.9098262786865234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22018353641033173, + "step": 1173, + "valid_targets_mean": 14614.0, + "valid_targets_min": 4385 + }, + { + "epoch": 4.997867803837953, + "grad_norm": 0.17741020957006148, + "learning_rate": 3.5335275624159835e-10, + "loss": 0.9602517485618591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24667122960090637, + "step": 1174, + "valid_targets_mean": 15355.7, + "valid_targets_min": 8230 + }, + { + "epoch": 5.0, + "grad_norm": 0.2558608122090385, + "learning_rate": 8.833838415212014e-11, + "loss": 1.0513684749603271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4691426753997803, + "step": 1175, + "valid_targets_mean": 13499.5, + "valid_targets_min": 3345 + }, + { + "epoch": 5.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4691426753997803, + "step": 1175, + "total_flos": 1658037051588608.0, + "train_loss": 1.0214215231449046, + "train_runtime": 6105.894, + "train_samples_per_second": 24.566, + "train_steps_per_second": 0.192, + "valid_targets_mean": 13499.5, + "valid_targets_min": 3345 + } + ], + "logging_steps": 1, + "max_steps": 1175, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1658037051588608.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..a6f7502 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2985b9819b9dbe766cf58a530ff5ee92c9f38c34e4a09aa80a36eb24c5bc2175 +size 7953 diff --git a/training_loss.png b/training_loss.png new file mode 100644 index 0000000..28d78c3 Binary files /dev/null and b/training_loss.png differ