commit 91bb47112e4e5ad8e29a9a74a9c73e85ac7862d9 Author: ModelHub XC Date: Fri Jun 12 17:08:20 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: shuoxing/llama3-8b-full-pretrain-wash-c4-1-5m-bs4 Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..85de97f --- /dev/null +++ b/README.md @@ -0,0 +1,60 @@ +--- +library_name: transformers +license: llama3 +base_model: shuoxing/llama3-8b-full-pretrain-junk-tweet-1m-en-reproduce-bs8 +tags: +- llama-factory +- full +- generated_from_trainer +model-index: +- name: llama3-8b-full-pretrain-wash-c4-1-5m-bs4 + results: [] +--- + + + +# llama3-8b-full-pretrain-wash-c4-1-5m-bs4 + +This model is a fine-tuned version of [shuoxing/llama3-8b-full-pretrain-junk-tweet-1m-en-reproduce-bs8](https://huggingface.co/shuoxing/llama3-8b-full-pretrain-junk-tweet-1m-en-reproduce-bs8) on the c4_1_5m dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 1e-05 +- train_batch_size: 1 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- total_train_batch_size: 4 +- total_eval_batch_size: 32 +- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_steps: 0.1 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- Transformers 5.2.0 +- Pytorch 2.6.0+cu124 +- Datasets 4.0.0 +- Tokenizers 0.22.2 diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..48c1c2b --- /dev/null +++ b/all_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 3.0, + "total_flos": 6456127242240.0, + "train_loss": 1.6602046456561104, + "train_runtime": 2865.3381, + "train_samples_per_second": 3.4, + "train_steps_per_second": 0.85 +} \ No newline at end of file diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..39bd0c9 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,5 @@ +{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|> + +'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|> + +' }}{% endif %} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..a5b8bc3 --- /dev/null +++ b/config.json @@ -0,0 +1,32 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "dtype": "bfloat16", + "eos_token_id": 128009, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pad_token_id": 128009, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_parameters": { + "rope_theta": 500000.0, + "rope_type": "default" + }, + "tie_word_embeddings": false, + "transformers_version": "5.2.0", + "use_cache": false, + "vocab_size": 128256 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..eb23973 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,13 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": [ + 128001, + 128009 + ], + "max_length": 4096, + "pad_token_id": 128009, + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "5.2.0" +} diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..dd40cee --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c7807d7cde3c2578fe5f331f1ca1cc3927e327a97acccde075df7d71a667c7a +size 16060556616 diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..86a3394 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..75e0e01 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,16 @@ +{ + "backend": "tokenizers", + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "is_local": false, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|eot_id|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "TokenizersBackend" +} diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..48c1c2b --- /dev/null +++ b/train_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 3.0, + "total_flos": 6456127242240.0, + "train_loss": 1.6602046456561104, + "train_runtime": 2865.3381, + "train_samples_per_second": 3.4, + "train_steps_per_second": 0.85 +} \ No newline at end of file diff --git a/trainer_log.jsonl b/trainer_log.jsonl new file mode 100644 index 0000000..63bf4af --- /dev/null +++ b/trainer_log.jsonl @@ -0,0 +1,2437 @@ +{"current_steps": 1, "total_steps": 2436, "loss": 4.157936096191406, "lr": 0.0, "epoch": 0.0012315270935960591, "percentage": 0.04, "elapsed_time": "0:00:03", "remaining_time": "2:24:44"} +{"current_steps": 2, "total_steps": 2436, "loss": 3.8494455814361572, "lr": 4.098360655737705e-08, "epoch": 0.0024630541871921183, "percentage": 0.08, "elapsed_time": "0:00:04", "remaining_time": "1:40:59"} +{"current_steps": 3, "total_steps": 2436, "loss": 3.7497382164001465, "lr": 8.19672131147541e-08, "epoch": 0.003694581280788177, "percentage": 0.12, "elapsed_time": "0:00:06", "remaining_time": "1:21:51"} +{"current_steps": 4, "total_steps": 2436, "loss": 4.874395847320557, "lr": 1.2295081967213116e-07, "epoch": 0.0049261083743842365, "percentage": 0.16, "elapsed_time": "0:00:07", "remaining_time": "1:12:15"} +{"current_steps": 5, "total_steps": 2436, "loss": 5.729328155517578, "lr": 1.639344262295082e-07, "epoch": 0.006157635467980296, "percentage": 0.21, "elapsed_time": "0:00:08", "remaining_time": "1:06:29"} +{"current_steps": 6, "total_steps": 2436, "loss": 3.968146324157715, "lr": 2.0491803278688524e-07, "epoch": 0.007389162561576354, "percentage": 0.25, "elapsed_time": "0:00:09", "remaining_time": "1:02:39"} +{"current_steps": 7, "total_steps": 2436, "loss": 4.092198848724365, "lr": 2.459016393442623e-07, "epoch": 0.008620689655172414, "percentage": 0.29, "elapsed_time": "0:00:10", "remaining_time": "1:00:01"} +{"current_steps": 8, "total_steps": 2436, "loss": 3.4101109504699707, "lr": 2.8688524590163937e-07, "epoch": 0.009852216748768473, "percentage": 0.33, "elapsed_time": "0:00:11", "remaining_time": "0:57:56"} +{"current_steps": 9, "total_steps": 2436, "loss": 4.387180805206299, "lr": 3.278688524590164e-07, "epoch": 0.011083743842364532, "percentage": 0.37, "elapsed_time": "0:00:12", "remaining_time": "0:56:17"} +{"current_steps": 10, "total_steps": 2436, "loss": 3.4985814094543457, "lr": 3.6885245901639347e-07, "epoch": 0.012315270935960592, "percentage": 0.41, "elapsed_time": "0:00:13", "remaining_time": "0:55:04"} +{"current_steps": 11, "total_steps": 2436, "loss": 5.157108306884766, "lr": 4.0983606557377047e-07, "epoch": 0.013546798029556651, "percentage": 0.45, "elapsed_time": "0:00:14", "remaining_time": "0:54:08"} +{"current_steps": 12, "total_steps": 2436, "loss": 4.057161808013916, "lr": 4.508196721311476e-07, "epoch": 0.014778325123152709, "percentage": 0.49, "elapsed_time": "0:00:15", "remaining_time": "0:53:15"} +{"current_steps": 13, "total_steps": 2436, "loss": 4.237695693969727, "lr": 4.918032786885246e-07, "epoch": 0.01600985221674877, "percentage": 0.53, "elapsed_time": "0:00:16", "remaining_time": "0:52:28"} +{"current_steps": 14, "total_steps": 2436, "loss": 4.635364532470703, "lr": 5.327868852459017e-07, "epoch": 0.017241379310344827, "percentage": 0.57, "elapsed_time": "0:00:17", "remaining_time": "0:51:48"} +{"current_steps": 15, "total_steps": 2436, "loss": 3.3291709423065186, "lr": 5.737704918032787e-07, "epoch": 0.01847290640394089, "percentage": 0.62, "elapsed_time": "0:00:19", "remaining_time": "0:51:15"} +{"current_steps": 16, "total_steps": 2436, "loss": 3.8693442344665527, "lr": 6.147540983606558e-07, "epoch": 0.019704433497536946, "percentage": 0.66, "elapsed_time": "0:00:20", "remaining_time": "0:50:45"} +{"current_steps": 17, "total_steps": 2436, "loss": 3.4419002532958984, "lr": 6.557377049180328e-07, "epoch": 0.020935960591133004, "percentage": 0.7, "elapsed_time": "0:00:21", "remaining_time": "0:50:20"} +{"current_steps": 18, "total_steps": 2436, "loss": 3.8446784019470215, "lr": 6.967213114754098e-07, "epoch": 0.022167487684729065, "percentage": 0.74, "elapsed_time": "0:00:22", "remaining_time": "0:49:56"} +{"current_steps": 19, "total_steps": 2436, "loss": 3.5930001735687256, "lr": 7.377049180327869e-07, "epoch": 0.023399014778325122, "percentage": 0.78, "elapsed_time": "0:00:23", "remaining_time": "0:49:34"} +{"current_steps": 20, "total_steps": 2436, "loss": 3.638699531555176, "lr": 7.78688524590164e-07, "epoch": 0.024630541871921183, "percentage": 0.82, "elapsed_time": "0:00:24", "remaining_time": "0:49:15"} +{"current_steps": 21, "total_steps": 2436, "loss": 3.6789143085479736, "lr": 8.196721311475409e-07, "epoch": 0.02586206896551724, "percentage": 0.86, "elapsed_time": "0:00:25", "remaining_time": "0:49:00"} +{"current_steps": 22, "total_steps": 2436, "loss": 3.959703207015991, "lr": 8.606557377049181e-07, "epoch": 0.027093596059113302, "percentage": 0.9, "elapsed_time": "0:00:26", "remaining_time": "0:48:43"} +{"current_steps": 23, "total_steps": 2436, "loss": 3.8822054862976074, "lr": 9.016393442622952e-07, "epoch": 0.02832512315270936, "percentage": 0.94, "elapsed_time": "0:00:27", "remaining_time": "0:48:29"} +{"current_steps": 24, "total_steps": 2436, "loss": 3.8448376655578613, "lr": 9.426229508196721e-07, "epoch": 0.029556650246305417, "percentage": 0.99, "elapsed_time": "0:00:28", "remaining_time": "0:48:14"} +{"current_steps": 25, "total_steps": 2436, "loss": 3.372765064239502, "lr": 9.836065573770493e-07, "epoch": 0.03078817733990148, "percentage": 1.03, "elapsed_time": "0:00:29", "remaining_time": "0:48:02"} +{"current_steps": 26, "total_steps": 2436, "loss": 3.4989559650421143, "lr": 1.0245901639344263e-06, "epoch": 0.03201970443349754, "percentage": 1.07, "elapsed_time": "0:00:30", "remaining_time": "0:47:50"} +{"current_steps": 27, "total_steps": 2436, "loss": 3.6318516731262207, "lr": 1.0655737704918034e-06, "epoch": 0.0332512315270936, "percentage": 1.11, "elapsed_time": "0:00:32", "remaining_time": "0:47:38"} +{"current_steps": 28, "total_steps": 2436, "loss": 3.351621627807617, "lr": 1.1065573770491804e-06, "epoch": 0.034482758620689655, "percentage": 1.15, "elapsed_time": "0:00:33", "remaining_time": "0:47:27"} +{"current_steps": 29, "total_steps": 2436, "loss": 3.1978442668914795, "lr": 1.1475409836065575e-06, "epoch": 0.03571428571428571, "percentage": 1.19, "elapsed_time": "0:00:34", "remaining_time": "0:47:18"} +{"current_steps": 30, "total_steps": 2436, "loss": 4.033670902252197, "lr": 1.1885245901639345e-06, "epoch": 0.03694581280788178, "percentage": 1.23, "elapsed_time": "0:00:35", "remaining_time": "0:47:08"} +{"current_steps": 31, "total_steps": 2436, "loss": 3.626315116882324, "lr": 1.2295081967213116e-06, "epoch": 0.038177339901477834, "percentage": 1.27, "elapsed_time": "0:00:36", "remaining_time": "0:46:59"} +{"current_steps": 32, "total_steps": 2436, "loss": 3.385767936706543, "lr": 1.2704918032786886e-06, "epoch": 0.03940886699507389, "percentage": 1.31, "elapsed_time": "0:00:37", "remaining_time": "0:46:51"} +{"current_steps": 33, "total_steps": 2436, "loss": 3.946913719177246, "lr": 1.3114754098360657e-06, "epoch": 0.04064039408866995, "percentage": 1.35, "elapsed_time": "0:00:38", "remaining_time": "0:46:43"} +{"current_steps": 34, "total_steps": 2436, "loss": 3.3034565448760986, "lr": 1.352459016393443e-06, "epoch": 0.04187192118226601, "percentage": 1.4, "elapsed_time": "0:00:39", "remaining_time": "0:46:35"} +{"current_steps": 35, "total_steps": 2436, "loss": 3.2368359565734863, "lr": 1.3934426229508196e-06, "epoch": 0.04310344827586207, "percentage": 1.44, "elapsed_time": "0:00:40", "remaining_time": "0:46:29"} +{"current_steps": 36, "total_steps": 2436, "loss": 3.728569984436035, "lr": 1.4344262295081968e-06, "epoch": 0.04433497536945813, "percentage": 1.48, "elapsed_time": "0:00:41", "remaining_time": "0:46:23"} +{"current_steps": 37, "total_steps": 2436, "loss": 3.3756117820739746, "lr": 1.4754098360655739e-06, "epoch": 0.04556650246305419, "percentage": 1.52, "elapsed_time": "0:00:42", "remaining_time": "0:46:16"} +{"current_steps": 38, "total_steps": 2436, "loss": 3.399596691131592, "lr": 1.516393442622951e-06, "epoch": 0.046798029556650245, "percentage": 1.56, "elapsed_time": "0:00:43", "remaining_time": "0:46:10"} +{"current_steps": 39, "total_steps": 2436, "loss": 4.209182262420654, "lr": 1.557377049180328e-06, "epoch": 0.0480295566502463, "percentage": 1.6, "elapsed_time": "0:00:44", "remaining_time": "0:46:03"} +{"current_steps": 40, "total_steps": 2436, "loss": 2.797691822052002, "lr": 1.5983606557377053e-06, "epoch": 0.04926108374384237, "percentage": 1.64, "elapsed_time": "0:00:46", "remaining_time": "0:45:58"} +{"current_steps": 41, "total_steps": 2436, "loss": 3.630617141723633, "lr": 1.6393442622950819e-06, "epoch": 0.050492610837438424, "percentage": 1.68, "elapsed_time": "0:00:47", "remaining_time": "0:45:52"} +{"current_steps": 42, "total_steps": 2436, "loss": 3.182535171508789, "lr": 1.6803278688524592e-06, "epoch": 0.05172413793103448, "percentage": 1.72, "elapsed_time": "0:00:48", "remaining_time": "0:45:46"} +{"current_steps": 43, "total_steps": 2436, "loss": 3.554767370223999, "lr": 1.7213114754098362e-06, "epoch": 0.05295566502463054, "percentage": 1.77, "elapsed_time": "0:00:49", "remaining_time": "0:45:41"} +{"current_steps": 44, "total_steps": 2436, "loss": 3.675961494445801, "lr": 1.7622950819672133e-06, "epoch": 0.054187192118226604, "percentage": 1.81, "elapsed_time": "0:00:50", "remaining_time": "0:45:36"} +{"current_steps": 45, "total_steps": 2436, "loss": 3.346269369125366, "lr": 1.8032786885245903e-06, "epoch": 0.05541871921182266, "percentage": 1.85, "elapsed_time": "0:00:51", "remaining_time": "0:45:31"} +{"current_steps": 46, "total_steps": 2436, "loss": 3.4892683029174805, "lr": 1.8442622950819674e-06, "epoch": 0.05665024630541872, "percentage": 1.89, "elapsed_time": "0:00:52", "remaining_time": "0:45:26"} +{"current_steps": 47, "total_steps": 2436, "loss": 3.3602352142333984, "lr": 1.8852459016393442e-06, "epoch": 0.05788177339901478, "percentage": 1.93, "elapsed_time": "0:00:53", "remaining_time": "0:45:22"} +{"current_steps": 48, "total_steps": 2436, "loss": 3.301713228225708, "lr": 1.9262295081967215e-06, "epoch": 0.059113300492610835, "percentage": 1.97, "elapsed_time": "0:00:54", "remaining_time": "0:45:18"} +{"current_steps": 49, "total_steps": 2436, "loss": 3.7745046615600586, "lr": 1.9672131147540985e-06, "epoch": 0.0603448275862069, "percentage": 2.01, "elapsed_time": "0:00:55", "remaining_time": "0:45:14"} +{"current_steps": 50, "total_steps": 2436, "loss": 3.0452070236206055, "lr": 2.0081967213114756e-06, "epoch": 0.06157635467980296, "percentage": 2.05, "elapsed_time": "0:00:56", "remaining_time": "0:45:10"} +{"current_steps": 51, "total_steps": 2436, "loss": 3.7955079078674316, "lr": 2.0491803278688526e-06, "epoch": 0.06280788177339902, "percentage": 2.09, "elapsed_time": "0:00:57", "remaining_time": "0:45:06"} +{"current_steps": 52, "total_steps": 2436, "loss": 3.1644039154052734, "lr": 2.0901639344262297e-06, "epoch": 0.06403940886699508, "percentage": 2.13, "elapsed_time": "0:00:58", "remaining_time": "0:45:02"} +{"current_steps": 53, "total_steps": 2436, "loss": 3.2459874153137207, "lr": 2.1311475409836067e-06, "epoch": 0.06527093596059114, "percentage": 2.18, "elapsed_time": "0:01:00", "remaining_time": "0:44:59"} +{"current_steps": 54, "total_steps": 2436, "loss": 3.61742901802063, "lr": 2.1721311475409838e-06, "epoch": 0.0665024630541872, "percentage": 2.22, "elapsed_time": "0:01:01", "remaining_time": "0:44:55"} +{"current_steps": 55, "total_steps": 2436, "loss": 3.3136467933654785, "lr": 2.213114754098361e-06, "epoch": 0.06773399014778325, "percentage": 2.26, "elapsed_time": "0:01:02", "remaining_time": "0:44:52"} +{"current_steps": 56, "total_steps": 2436, "loss": 3.272696018218994, "lr": 2.254098360655738e-06, "epoch": 0.06896551724137931, "percentage": 2.3, "elapsed_time": "0:01:03", "remaining_time": "0:44:48"} +{"current_steps": 57, "total_steps": 2436, "loss": 3.041365385055542, "lr": 2.295081967213115e-06, "epoch": 0.07019704433497537, "percentage": 2.34, "elapsed_time": "0:01:04", "remaining_time": "0:44:45"} +{"current_steps": 58, "total_steps": 2436, "loss": 3.309293746948242, "lr": 2.336065573770492e-06, "epoch": 0.07142857142857142, "percentage": 2.38, "elapsed_time": "0:01:05", "remaining_time": "0:44:41"} +{"current_steps": 59, "total_steps": 2436, "loss": 3.4676990509033203, "lr": 2.377049180327869e-06, "epoch": 0.07266009852216748, "percentage": 2.42, "elapsed_time": "0:01:06", "remaining_time": "0:44:38"} +{"current_steps": 60, "total_steps": 2436, "loss": 2.8236446380615234, "lr": 2.418032786885246e-06, "epoch": 0.07389162561576355, "percentage": 2.46, "elapsed_time": "0:01:07", "remaining_time": "0:44:34"} +{"current_steps": 61, "total_steps": 2436, "loss": 2.716705083847046, "lr": 2.459016393442623e-06, "epoch": 0.07512315270935961, "percentage": 2.5, "elapsed_time": "0:01:08", "remaining_time": "0:44:32"} +{"current_steps": 62, "total_steps": 2436, "loss": 2.5590922832489014, "lr": 2.5e-06, "epoch": 0.07635467980295567, "percentage": 2.55, "elapsed_time": "0:01:09", "remaining_time": "0:44:29"} +{"current_steps": 63, "total_steps": 2436, "loss": 2.6987993717193604, "lr": 2.5409836065573773e-06, "epoch": 0.07758620689655173, "percentage": 2.59, "elapsed_time": "0:01:10", "remaining_time": "0:44:26"} +{"current_steps": 64, "total_steps": 2436, "loss": 4.340274810791016, "lr": 2.5819672131147543e-06, "epoch": 0.07881773399014778, "percentage": 2.63, "elapsed_time": "0:01:11", "remaining_time": "0:44:23"} +{"current_steps": 65, "total_steps": 2436, "loss": 4.166017532348633, "lr": 2.6229508196721314e-06, "epoch": 0.08004926108374384, "percentage": 2.67, "elapsed_time": "0:01:12", "remaining_time": "0:44:20"} +{"current_steps": 66, "total_steps": 2436, "loss": 2.664743185043335, "lr": 2.6639344262295084e-06, "epoch": 0.0812807881773399, "percentage": 2.71, "elapsed_time": "0:01:14", "remaining_time": "0:44:17"} +{"current_steps": 67, "total_steps": 2436, "loss": 3.4285409450531006, "lr": 2.704918032786886e-06, "epoch": 0.08251231527093596, "percentage": 2.75, "elapsed_time": "0:01:15", "remaining_time": "0:44:14"} +{"current_steps": 68, "total_steps": 2436, "loss": 2.613044023513794, "lr": 2.745901639344263e-06, "epoch": 0.08374384236453201, "percentage": 2.79, "elapsed_time": "0:01:16", "remaining_time": "0:44:12"} +{"current_steps": 69, "total_steps": 2436, "loss": 3.1923232078552246, "lr": 2.786885245901639e-06, "epoch": 0.08497536945812807, "percentage": 2.83, "elapsed_time": "0:01:17", "remaining_time": "0:44:09"} +{"current_steps": 70, "total_steps": 2436, "loss": 3.881509780883789, "lr": 2.8278688524590166e-06, "epoch": 0.08620689655172414, "percentage": 2.87, "elapsed_time": "0:01:18", "remaining_time": "0:44:06"} +{"current_steps": 71, "total_steps": 2436, "loss": 3.3872318267822266, "lr": 2.8688524590163937e-06, "epoch": 0.0874384236453202, "percentage": 2.91, "elapsed_time": "0:01:19", "remaining_time": "0:44:04"} +{"current_steps": 72, "total_steps": 2436, "loss": 3.1114461421966553, "lr": 2.9098360655737707e-06, "epoch": 0.08866995073891626, "percentage": 2.96, "elapsed_time": "0:01:20", "remaining_time": "0:44:01"} +{"current_steps": 73, "total_steps": 2436, "loss": 3.182547092437744, "lr": 2.9508196721311478e-06, "epoch": 0.08990147783251232, "percentage": 3.0, "elapsed_time": "0:01:21", "remaining_time": "0:43:59"} +{"current_steps": 74, "total_steps": 2436, "loss": 3.488222599029541, "lr": 2.991803278688525e-06, "epoch": 0.09113300492610837, "percentage": 3.04, "elapsed_time": "0:01:22", "remaining_time": "0:43:57"} +{"current_steps": 75, "total_steps": 2436, "loss": 3.0836119651794434, "lr": 3.032786885245902e-06, "epoch": 0.09236453201970443, "percentage": 3.08, "elapsed_time": "0:01:23", "remaining_time": "0:43:54"} +{"current_steps": 76, "total_steps": 2436, "loss": 2.965284824371338, "lr": 3.073770491803279e-06, "epoch": 0.09359605911330049, "percentage": 3.12, "elapsed_time": "0:01:24", "remaining_time": "0:43:52"} +{"current_steps": 77, "total_steps": 2436, "loss": 3.0366950035095215, "lr": 3.114754098360656e-06, "epoch": 0.09482758620689655, "percentage": 3.16, "elapsed_time": "0:01:25", "remaining_time": "0:43:50"} +{"current_steps": 78, "total_steps": 2436, "loss": 3.7336153984069824, "lr": 3.155737704918033e-06, "epoch": 0.0960591133004926, "percentage": 3.2, "elapsed_time": "0:01:26", "remaining_time": "0:43:48"} +{"current_steps": 79, "total_steps": 2436, "loss": 3.3801069259643555, "lr": 3.1967213114754105e-06, "epoch": 0.09729064039408868, "percentage": 3.24, "elapsed_time": "0:01:28", "remaining_time": "0:43:45"} +{"current_steps": 80, "total_steps": 2436, "loss": 3.1140761375427246, "lr": 3.2377049180327876e-06, "epoch": 0.09852216748768473, "percentage": 3.28, "elapsed_time": "0:01:29", "remaining_time": "0:43:43"} +{"current_steps": 81, "total_steps": 2436, "loss": 3.1199679374694824, "lr": 3.2786885245901638e-06, "epoch": 0.09975369458128079, "percentage": 3.33, "elapsed_time": "0:01:30", "remaining_time": "0:43:41"} +{"current_steps": 82, "total_steps": 2436, "loss": 2.919370651245117, "lr": 3.3196721311475413e-06, "epoch": 0.10098522167487685, "percentage": 3.37, "elapsed_time": "0:01:31", "remaining_time": "0:43:39"} +{"current_steps": 83, "total_steps": 2436, "loss": 3.0058987140655518, "lr": 3.3606557377049183e-06, "epoch": 0.1022167487684729, "percentage": 3.41, "elapsed_time": "0:01:32", "remaining_time": "0:43:37"} +{"current_steps": 84, "total_steps": 2436, "loss": 3.201812744140625, "lr": 3.4016393442622954e-06, "epoch": 0.10344827586206896, "percentage": 3.45, "elapsed_time": "0:01:33", "remaining_time": "0:43:35"} +{"current_steps": 85, "total_steps": 2436, "loss": 2.8387913703918457, "lr": 3.4426229508196724e-06, "epoch": 0.10467980295566502, "percentage": 3.49, "elapsed_time": "0:01:34", "remaining_time": "0:43:33"} +{"current_steps": 86, "total_steps": 2436, "loss": 3.565217971801758, "lr": 3.4836065573770495e-06, "epoch": 0.10591133004926108, "percentage": 3.53, "elapsed_time": "0:01:35", "remaining_time": "0:43:31"} +{"current_steps": 87, "total_steps": 2436, "loss": 3.563566207885742, "lr": 3.5245901639344265e-06, "epoch": 0.10714285714285714, "percentage": 3.57, "elapsed_time": "0:01:36", "remaining_time": "0:43:28"} +{"current_steps": 88, "total_steps": 2436, "loss": 3.3282840251922607, "lr": 3.5655737704918036e-06, "epoch": 0.10837438423645321, "percentage": 3.61, "elapsed_time": "0:01:37", "remaining_time": "0:43:27"} +{"current_steps": 89, "total_steps": 2436, "loss": 2.693999767303467, "lr": 3.6065573770491806e-06, "epoch": 0.10960591133004927, "percentage": 3.65, "elapsed_time": "0:01:38", "remaining_time": "0:43:25"} +{"current_steps": 90, "total_steps": 2436, "loss": 3.0820372104644775, "lr": 3.6475409836065577e-06, "epoch": 0.11083743842364532, "percentage": 3.69, "elapsed_time": "0:01:39", "remaining_time": "0:43:23"} +{"current_steps": 91, "total_steps": 2436, "loss": 2.5853302478790283, "lr": 3.6885245901639347e-06, "epoch": 0.11206896551724138, "percentage": 3.74, "elapsed_time": "0:01:40", "remaining_time": "0:43:21"} +{"current_steps": 92, "total_steps": 2436, "loss": 3.622239589691162, "lr": 3.729508196721312e-06, "epoch": 0.11330049261083744, "percentage": 3.78, "elapsed_time": "0:01:42", "remaining_time": "0:43:19"} +{"current_steps": 93, "total_steps": 2436, "loss": 3.269063949584961, "lr": 3.7704918032786884e-06, "epoch": 0.1145320197044335, "percentage": 3.82, "elapsed_time": "0:01:43", "remaining_time": "0:43:17"} +{"current_steps": 94, "total_steps": 2436, "loss": 2.932877540588379, "lr": 3.811475409836066e-06, "epoch": 0.11576354679802955, "percentage": 3.86, "elapsed_time": "0:01:44", "remaining_time": "0:43:15"} +{"current_steps": 95, "total_steps": 2436, "loss": 2.8118062019348145, "lr": 3.852459016393443e-06, "epoch": 0.11699507389162561, "percentage": 3.9, "elapsed_time": "0:01:45", "remaining_time": "0:43:13"} +{"current_steps": 96, "total_steps": 2436, "loss": 2.977217674255371, "lr": 3.8934426229508196e-06, "epoch": 0.11822660098522167, "percentage": 3.94, "elapsed_time": "0:01:46", "remaining_time": "0:43:11"} +{"current_steps": 97, "total_steps": 2436, "loss": 2.534318685531616, "lr": 3.934426229508197e-06, "epoch": 0.11945812807881774, "percentage": 3.98, "elapsed_time": "0:01:47", "remaining_time": "0:43:09"} +{"current_steps": 98, "total_steps": 2436, "loss": 2.888187885284424, "lr": 3.975409836065574e-06, "epoch": 0.1206896551724138, "percentage": 4.02, "elapsed_time": "0:01:48", "remaining_time": "0:43:08"} +{"current_steps": 99, "total_steps": 2436, "loss": 2.6558847427368164, "lr": 4.016393442622951e-06, "epoch": 0.12192118226600986, "percentage": 4.06, "elapsed_time": "0:01:49", "remaining_time": "0:43:06"} +{"current_steps": 100, "total_steps": 2436, "loss": 2.707779884338379, "lr": 4.057377049180329e-06, "epoch": 0.12315270935960591, "percentage": 4.11, "elapsed_time": "0:01:50", "remaining_time": "0:43:04"} +{"current_steps": 101, "total_steps": 2436, "loss": 3.2292768955230713, "lr": 4.098360655737705e-06, "epoch": 0.12438423645320197, "percentage": 4.15, "elapsed_time": "0:01:51", "remaining_time": "0:43:03"} +{"current_steps": 102, "total_steps": 2436, "loss": 2.9476242065429688, "lr": 4.139344262295083e-06, "epoch": 0.12561576354679804, "percentage": 4.19, "elapsed_time": "0:01:52", "remaining_time": "0:43:01"} +{"current_steps": 103, "total_steps": 2436, "loss": 2.9598989486694336, "lr": 4.180327868852459e-06, "epoch": 0.1268472906403941, "percentage": 4.23, "elapsed_time": "0:01:53", "remaining_time": "0:42:59"} +{"current_steps": 104, "total_steps": 2436, "loss": 2.593669891357422, "lr": 4.221311475409837e-06, "epoch": 0.12807881773399016, "percentage": 4.27, "elapsed_time": "0:01:54", "remaining_time": "0:42:57"} +{"current_steps": 105, "total_steps": 2436, "loss": 2.884164810180664, "lr": 4.2622950819672135e-06, "epoch": 0.12931034482758622, "percentage": 4.31, "elapsed_time": "0:01:56", "remaining_time": "0:42:56"} +{"current_steps": 106, "total_steps": 2436, "loss": 3.091454267501831, "lr": 4.30327868852459e-06, "epoch": 0.13054187192118227, "percentage": 4.35, "elapsed_time": "0:01:57", "remaining_time": "0:42:54"} +{"current_steps": 107, "total_steps": 2436, "loss": 2.913923740386963, "lr": 4.3442622950819676e-06, "epoch": 0.13177339901477833, "percentage": 4.39, "elapsed_time": "0:01:58", "remaining_time": "0:42:52"} +{"current_steps": 108, "total_steps": 2436, "loss": 2.779545307159424, "lr": 4.385245901639344e-06, "epoch": 0.1330049261083744, "percentage": 4.43, "elapsed_time": "0:01:59", "remaining_time": "0:42:50"} +{"current_steps": 109, "total_steps": 2436, "loss": 1.8711936473846436, "lr": 4.426229508196722e-06, "epoch": 0.13423645320197045, "percentage": 4.47, "elapsed_time": "0:02:00", "remaining_time": "0:42:49"} +{"current_steps": 110, "total_steps": 2436, "loss": 2.892902374267578, "lr": 4.467213114754098e-06, "epoch": 0.1354679802955665, "percentage": 4.52, "elapsed_time": "0:02:01", "remaining_time": "0:42:47"} +{"current_steps": 111, "total_steps": 2436, "loss": 3.0064496994018555, "lr": 4.508196721311476e-06, "epoch": 0.13669950738916256, "percentage": 4.56, "elapsed_time": "0:02:02", "remaining_time": "0:42:46"} +{"current_steps": 112, "total_steps": 2436, "loss": 2.8515172004699707, "lr": 4.549180327868853e-06, "epoch": 0.13793103448275862, "percentage": 4.6, "elapsed_time": "0:02:03", "remaining_time": "0:42:44"} +{"current_steps": 113, "total_steps": 2436, "loss": 3.2504403591156006, "lr": 4.59016393442623e-06, "epoch": 0.13916256157635468, "percentage": 4.64, "elapsed_time": "0:02:04", "remaining_time": "0:42:42"} +{"current_steps": 114, "total_steps": 2436, "loss": 2.67405366897583, "lr": 4.631147540983607e-06, "epoch": 0.14039408866995073, "percentage": 4.68, "elapsed_time": "0:02:05", "remaining_time": "0:42:41"} +{"current_steps": 115, "total_steps": 2436, "loss": 3.079516887664795, "lr": 4.672131147540984e-06, "epoch": 0.1416256157635468, "percentage": 4.72, "elapsed_time": "0:02:06", "remaining_time": "0:42:39"} +{"current_steps": 116, "total_steps": 2436, "loss": 2.8897287845611572, "lr": 4.7131147540983615e-06, "epoch": 0.14285714285714285, "percentage": 4.76, "elapsed_time": "0:02:07", "remaining_time": "0:42:37"} +{"current_steps": 117, "total_steps": 2436, "loss": 2.7275729179382324, "lr": 4.754098360655738e-06, "epoch": 0.1440886699507389, "percentage": 4.8, "elapsed_time": "0:02:08", "remaining_time": "0:42:36"} +{"current_steps": 118, "total_steps": 2436, "loss": 2.9996538162231445, "lr": 4.795081967213115e-06, "epoch": 0.14532019704433496, "percentage": 4.84, "elapsed_time": "0:02:10", "remaining_time": "0:42:34"} +{"current_steps": 119, "total_steps": 2436, "loss": 3.307245969772339, "lr": 4.836065573770492e-06, "epoch": 0.14655172413793102, "percentage": 4.89, "elapsed_time": "0:02:11", "remaining_time": "0:42:33"} +{"current_steps": 120, "total_steps": 2436, "loss": 3.475133180618286, "lr": 4.877049180327869e-06, "epoch": 0.1477832512315271, "percentage": 4.93, "elapsed_time": "0:02:12", "remaining_time": "0:42:31"} +{"current_steps": 121, "total_steps": 2436, "loss": 3.0947790145874023, "lr": 4.918032786885246e-06, "epoch": 0.14901477832512317, "percentage": 4.97, "elapsed_time": "0:02:13", "remaining_time": "0:42:30"} +{"current_steps": 122, "total_steps": 2436, "loss": 2.9675135612487793, "lr": 4.959016393442623e-06, "epoch": 0.15024630541871922, "percentage": 5.01, "elapsed_time": "0:02:14", "remaining_time": "0:42:28"} +{"current_steps": 123, "total_steps": 2436, "loss": 2.7873148918151855, "lr": 5e-06, "epoch": 0.15147783251231528, "percentage": 5.05, "elapsed_time": "0:02:15", "remaining_time": "0:42:27"} +{"current_steps": 124, "total_steps": 2436, "loss": 3.10044264793396, "lr": 5.040983606557377e-06, "epoch": 0.15270935960591134, "percentage": 5.09, "elapsed_time": "0:02:16", "remaining_time": "0:42:25"} +{"current_steps": 125, "total_steps": 2436, "loss": 3.755798101425171, "lr": 5.0819672131147545e-06, "epoch": 0.1539408866995074, "percentage": 5.13, "elapsed_time": "0:02:17", "remaining_time": "0:42:24"} +{"current_steps": 126, "total_steps": 2436, "loss": 3.0117135047912598, "lr": 5.122950819672131e-06, "epoch": 0.15517241379310345, "percentage": 5.17, "elapsed_time": "0:02:18", "remaining_time": "0:42:22"} +{"current_steps": 127, "total_steps": 2436, "loss": 2.944417953491211, "lr": 5.163934426229509e-06, "epoch": 0.1564039408866995, "percentage": 5.21, "elapsed_time": "0:02:19", "remaining_time": "0:42:21"} +{"current_steps": 128, "total_steps": 2436, "loss": 2.672874927520752, "lr": 5.204918032786885e-06, "epoch": 0.15763546798029557, "percentage": 5.25, "elapsed_time": "0:02:20", "remaining_time": "0:42:19"} +{"current_steps": 129, "total_steps": 2436, "loss": 2.7205734252929688, "lr": 5.245901639344263e-06, "epoch": 0.15886699507389163, "percentage": 5.3, "elapsed_time": "0:02:21", "remaining_time": "0:42:18"} +{"current_steps": 130, "total_steps": 2436, "loss": 2.883897304534912, "lr": 5.286885245901639e-06, "epoch": 0.16009852216748768, "percentage": 5.34, "elapsed_time": "0:02:22", "remaining_time": "0:42:16"} +{"current_steps": 131, "total_steps": 2436, "loss": 2.782104253768921, "lr": 5.327868852459017e-06, "epoch": 0.16133004926108374, "percentage": 5.38, "elapsed_time": "0:02:24", "remaining_time": "0:42:15"} +{"current_steps": 132, "total_steps": 2436, "loss": 2.6445870399475098, "lr": 5.3688524590163935e-06, "epoch": 0.1625615763546798, "percentage": 5.42, "elapsed_time": "0:02:25", "remaining_time": "0:42:13"} +{"current_steps": 133, "total_steps": 2436, "loss": 2.9319727420806885, "lr": 5.409836065573772e-06, "epoch": 0.16379310344827586, "percentage": 5.46, "elapsed_time": "0:02:26", "remaining_time": "0:42:11"} +{"current_steps": 134, "total_steps": 2436, "loss": 3.169668674468994, "lr": 5.4508196721311476e-06, "epoch": 0.16502463054187191, "percentage": 5.5, "elapsed_time": "0:02:27", "remaining_time": "0:42:10"} +{"current_steps": 135, "total_steps": 2436, "loss": 2.8588128089904785, "lr": 5.491803278688526e-06, "epoch": 0.16625615763546797, "percentage": 5.54, "elapsed_time": "0:02:28", "remaining_time": "0:42:08"} +{"current_steps": 136, "total_steps": 2436, "loss": 2.9894580841064453, "lr": 5.5327868852459025e-06, "epoch": 0.16748768472906403, "percentage": 5.58, "elapsed_time": "0:02:29", "remaining_time": "0:42:07"} +{"current_steps": 137, "total_steps": 2436, "loss": 2.5721185207366943, "lr": 5.573770491803278e-06, "epoch": 0.1687192118226601, "percentage": 5.62, "elapsed_time": "0:02:30", "remaining_time": "0:42:06"} +{"current_steps": 138, "total_steps": 2436, "loss": 2.927572727203369, "lr": 5.614754098360657e-06, "epoch": 0.16995073891625614, "percentage": 5.67, "elapsed_time": "0:02:31", "remaining_time": "0:42:04"} +{"current_steps": 139, "total_steps": 2436, "loss": 2.1956796646118164, "lr": 5.655737704918033e-06, "epoch": 0.17118226600985223, "percentage": 5.71, "elapsed_time": "0:02:32", "remaining_time": "0:42:03"} +{"current_steps": 140, "total_steps": 2436, "loss": 2.9739363193511963, "lr": 5.696721311475411e-06, "epoch": 0.1724137931034483, "percentage": 5.75, "elapsed_time": "0:02:33", "remaining_time": "0:42:02"} +{"current_steps": 141, "total_steps": 2436, "loss": 2.9413986206054688, "lr": 5.737704918032787e-06, "epoch": 0.17364532019704434, "percentage": 5.79, "elapsed_time": "0:02:34", "remaining_time": "0:42:00"} +{"current_steps": 142, "total_steps": 2436, "loss": 3.267493724822998, "lr": 5.778688524590165e-06, "epoch": 0.1748768472906404, "percentage": 5.83, "elapsed_time": "0:02:35", "remaining_time": "0:41:59"} +{"current_steps": 143, "total_steps": 2436, "loss": 3.355569362640381, "lr": 5.8196721311475415e-06, "epoch": 0.17610837438423646, "percentage": 5.87, "elapsed_time": "0:02:37", "remaining_time": "0:41:57"} +{"current_steps": 144, "total_steps": 2436, "loss": 1.9742871522903442, "lr": 5.860655737704919e-06, "epoch": 0.17733990147783252, "percentage": 5.91, "elapsed_time": "0:02:38", "remaining_time": "0:41:56"} +{"current_steps": 145, "total_steps": 2436, "loss": 3.571032762527466, "lr": 5.9016393442622956e-06, "epoch": 0.17857142857142858, "percentage": 5.95, "elapsed_time": "0:02:39", "remaining_time": "0:41:54"} +{"current_steps": 146, "total_steps": 2436, "loss": 3.3115599155426025, "lr": 5.942622950819673e-06, "epoch": 0.17980295566502463, "percentage": 5.99, "elapsed_time": "0:02:40", "remaining_time": "0:41:53"} +{"current_steps": 147, "total_steps": 2436, "loss": 2.781893730163574, "lr": 5.98360655737705e-06, "epoch": 0.1810344827586207, "percentage": 6.03, "elapsed_time": "0:02:41", "remaining_time": "0:41:51"} +{"current_steps": 148, "total_steps": 2436, "loss": 3.5053911209106445, "lr": 6.024590163934426e-06, "epoch": 0.18226600985221675, "percentage": 6.08, "elapsed_time": "0:02:42", "remaining_time": "0:41:50"} +{"current_steps": 149, "total_steps": 2436, "loss": 2.797297477722168, "lr": 6.065573770491804e-06, "epoch": 0.1834975369458128, "percentage": 6.12, "elapsed_time": "0:02:43", "remaining_time": "0:41:48"} +{"current_steps": 150, "total_steps": 2436, "loss": 2.995811939239502, "lr": 6.10655737704918e-06, "epoch": 0.18472906403940886, "percentage": 6.16, "elapsed_time": "0:02:44", "remaining_time": "0:41:47"} +{"current_steps": 151, "total_steps": 2436, "loss": 3.028142213821411, "lr": 6.147540983606558e-06, "epoch": 0.18596059113300492, "percentage": 6.2, "elapsed_time": "0:02:45", "remaining_time": "0:41:45"} +{"current_steps": 152, "total_steps": 2436, "loss": 3.092806816101074, "lr": 6.1885245901639345e-06, "epoch": 0.18719211822660098, "percentage": 6.24, "elapsed_time": "0:02:46", "remaining_time": "0:41:44"} +{"current_steps": 153, "total_steps": 2436, "loss": 3.4657726287841797, "lr": 6.229508196721312e-06, "epoch": 0.18842364532019704, "percentage": 6.28, "elapsed_time": "0:02:47", "remaining_time": "0:41:42"} +{"current_steps": 154, "total_steps": 2436, "loss": 2.888990879058838, "lr": 6.270491803278689e-06, "epoch": 0.1896551724137931, "percentage": 6.32, "elapsed_time": "0:02:48", "remaining_time": "0:41:41"} +{"current_steps": 155, "total_steps": 2436, "loss": 2.21640682220459, "lr": 6.311475409836066e-06, "epoch": 0.19088669950738915, "percentage": 6.36, "elapsed_time": "0:02:49", "remaining_time": "0:41:40"} +{"current_steps": 156, "total_steps": 2436, "loss": 3.1153030395507812, "lr": 6.352459016393443e-06, "epoch": 0.1921182266009852, "percentage": 6.4, "elapsed_time": "0:02:50", "remaining_time": "0:41:38"} +{"current_steps": 157, "total_steps": 2436, "loss": 3.5814146995544434, "lr": 6.393442622950821e-06, "epoch": 0.1933497536945813, "percentage": 6.44, "elapsed_time": "0:02:52", "remaining_time": "0:41:37"} +{"current_steps": 158, "total_steps": 2436, "loss": 3.3865175247192383, "lr": 6.434426229508197e-06, "epoch": 0.19458128078817735, "percentage": 6.49, "elapsed_time": "0:02:53", "remaining_time": "0:41:36"} +{"current_steps": 159, "total_steps": 2436, "loss": 3.2125191688537598, "lr": 6.475409836065575e-06, "epoch": 0.1958128078817734, "percentage": 6.53, "elapsed_time": "0:02:54", "remaining_time": "0:41:34"} +{"current_steps": 160, "total_steps": 2436, "loss": 3.137500286102295, "lr": 6.516393442622952e-06, "epoch": 0.19704433497536947, "percentage": 6.57, "elapsed_time": "0:02:55", "remaining_time": "0:41:33"} +{"current_steps": 161, "total_steps": 2436, "loss": 2.63275408744812, "lr": 6.5573770491803276e-06, "epoch": 0.19827586206896552, "percentage": 6.61, "elapsed_time": "0:02:56", "remaining_time": "0:41:32"} +{"current_steps": 162, "total_steps": 2436, "loss": 3.1714844703674316, "lr": 6.598360655737706e-06, "epoch": 0.19950738916256158, "percentage": 6.65, "elapsed_time": "0:02:57", "remaining_time": "0:41:31"} +{"current_steps": 163, "total_steps": 2436, "loss": 2.2414371967315674, "lr": 6.6393442622950825e-06, "epoch": 0.20073891625615764, "percentage": 6.69, "elapsed_time": "0:02:58", "remaining_time": "0:41:29"} +{"current_steps": 164, "total_steps": 2436, "loss": 2.4281110763549805, "lr": 6.68032786885246e-06, "epoch": 0.2019704433497537, "percentage": 6.73, "elapsed_time": "0:02:59", "remaining_time": "0:41:28"} +{"current_steps": 165, "total_steps": 2436, "loss": 2.6953632831573486, "lr": 6.721311475409837e-06, "epoch": 0.20320197044334976, "percentage": 6.77, "elapsed_time": "0:03:00", "remaining_time": "0:41:27"} +{"current_steps": 166, "total_steps": 2436, "loss": 2.7645516395568848, "lr": 6.762295081967214e-06, "epoch": 0.2044334975369458, "percentage": 6.81, "elapsed_time": "0:03:01", "remaining_time": "0:41:25"} +{"current_steps": 167, "total_steps": 2436, "loss": 2.676801919937134, "lr": 6.803278688524591e-06, "epoch": 0.20566502463054187, "percentage": 6.86, "elapsed_time": "0:03:02", "remaining_time": "0:41:24"} +{"current_steps": 168, "total_steps": 2436, "loss": 3.2417163848876953, "lr": 6.844262295081968e-06, "epoch": 0.20689655172413793, "percentage": 6.9, "elapsed_time": "0:03:03", "remaining_time": "0:41:23"} +{"current_steps": 169, "total_steps": 2436, "loss": 3.1967976093292236, "lr": 6.885245901639345e-06, "epoch": 0.20812807881773399, "percentage": 6.94, "elapsed_time": "0:03:05", "remaining_time": "0:41:22"} +{"current_steps": 170, "total_steps": 2436, "loss": 3.4212145805358887, "lr": 6.926229508196722e-06, "epoch": 0.20935960591133004, "percentage": 6.98, "elapsed_time": "0:03:06", "remaining_time": "0:41:20"} +{"current_steps": 171, "total_steps": 2436, "loss": 3.0731911659240723, "lr": 6.967213114754099e-06, "epoch": 0.2105911330049261, "percentage": 7.02, "elapsed_time": "0:03:07", "remaining_time": "0:41:19"} +{"current_steps": 172, "total_steps": 2436, "loss": 3.8659727573394775, "lr": 7.0081967213114756e-06, "epoch": 0.21182266009852216, "percentage": 7.06, "elapsed_time": "0:03:08", "remaining_time": "0:41:18"} +{"current_steps": 173, "total_steps": 2436, "loss": 3.036478042602539, "lr": 7.049180327868853e-06, "epoch": 0.21305418719211822, "percentage": 7.1, "elapsed_time": "0:03:09", "remaining_time": "0:41:16"} +{"current_steps": 174, "total_steps": 2436, "loss": 2.489211082458496, "lr": 7.09016393442623e-06, "epoch": 0.21428571428571427, "percentage": 7.14, "elapsed_time": "0:03:10", "remaining_time": "0:41:15"} +{"current_steps": 175, "total_steps": 2436, "loss": 3.8306775093078613, "lr": 7.131147540983607e-06, "epoch": 0.21551724137931033, "percentage": 7.18, "elapsed_time": "0:03:11", "remaining_time": "0:41:14"} +{"current_steps": 176, "total_steps": 2436, "loss": 3.0287742614746094, "lr": 7.172131147540984e-06, "epoch": 0.21674876847290642, "percentage": 7.22, "elapsed_time": "0:03:12", "remaining_time": "0:41:12"} +{"current_steps": 177, "total_steps": 2436, "loss": 2.767753839492798, "lr": 7.213114754098361e-06, "epoch": 0.21798029556650247, "percentage": 7.27, "elapsed_time": "0:03:13", "remaining_time": "0:41:11"} +{"current_steps": 178, "total_steps": 2436, "loss": 2.8400726318359375, "lr": 7.254098360655738e-06, "epoch": 0.21921182266009853, "percentage": 7.31, "elapsed_time": "0:03:14", "remaining_time": "0:41:10"} +{"current_steps": 179, "total_steps": 2436, "loss": 2.9013113975524902, "lr": 7.295081967213115e-06, "epoch": 0.2204433497536946, "percentage": 7.35, "elapsed_time": "0:03:15", "remaining_time": "0:41:09"} +{"current_steps": 180, "total_steps": 2436, "loss": 3.1111714839935303, "lr": 7.336065573770492e-06, "epoch": 0.22167487684729065, "percentage": 7.39, "elapsed_time": "0:03:16", "remaining_time": "0:41:07"} +{"current_steps": 181, "total_steps": 2436, "loss": 2.968287229537964, "lr": 7.3770491803278695e-06, "epoch": 0.2229064039408867, "percentage": 7.43, "elapsed_time": "0:03:17", "remaining_time": "0:41:06"} +{"current_steps": 182, "total_steps": 2436, "loss": 3.2560596466064453, "lr": 7.418032786885246e-06, "epoch": 0.22413793103448276, "percentage": 7.47, "elapsed_time": "0:03:19", "remaining_time": "0:41:05"} +{"current_steps": 183, "total_steps": 2436, "loss": 2.6196365356445312, "lr": 7.459016393442624e-06, "epoch": 0.22536945812807882, "percentage": 7.51, "elapsed_time": "0:03:20", "remaining_time": "0:41:03"} +{"current_steps": 184, "total_steps": 2436, "loss": 2.48789644241333, "lr": 7.500000000000001e-06, "epoch": 0.22660098522167488, "percentage": 7.55, "elapsed_time": "0:03:21", "remaining_time": "0:41:02"} +{"current_steps": 185, "total_steps": 2436, "loss": 3.492011308670044, "lr": 7.540983606557377e-06, "epoch": 0.22783251231527094, "percentage": 7.59, "elapsed_time": "0:03:22", "remaining_time": "0:41:01"} +{"current_steps": 186, "total_steps": 2436, "loss": 2.643688917160034, "lr": 7.581967213114755e-06, "epoch": 0.229064039408867, "percentage": 7.64, "elapsed_time": "0:03:23", "remaining_time": "0:41:00"} +{"current_steps": 187, "total_steps": 2436, "loss": 3.291731834411621, "lr": 7.622950819672132e-06, "epoch": 0.23029556650246305, "percentage": 7.68, "elapsed_time": "0:03:24", "remaining_time": "0:40:58"} +{"current_steps": 188, "total_steps": 2436, "loss": 2.9554359912872314, "lr": 7.66393442622951e-06, "epoch": 0.2315270935960591, "percentage": 7.72, "elapsed_time": "0:03:25", "remaining_time": "0:40:57"} +{"current_steps": 189, "total_steps": 2436, "loss": 2.634860038757324, "lr": 7.704918032786886e-06, "epoch": 0.23275862068965517, "percentage": 7.76, "elapsed_time": "0:03:26", "remaining_time": "0:40:56"} +{"current_steps": 190, "total_steps": 2436, "loss": 3.505284309387207, "lr": 7.745901639344263e-06, "epoch": 0.23399014778325122, "percentage": 7.8, "elapsed_time": "0:03:27", "remaining_time": "0:40:55"} +{"current_steps": 191, "total_steps": 2436, "loss": 2.8865461349487305, "lr": 7.786885245901639e-06, "epoch": 0.23522167487684728, "percentage": 7.84, "elapsed_time": "0:03:28", "remaining_time": "0:40:53"} +{"current_steps": 192, "total_steps": 2436, "loss": 2.804072618484497, "lr": 7.827868852459017e-06, "epoch": 0.23645320197044334, "percentage": 7.88, "elapsed_time": "0:03:29", "remaining_time": "0:40:52"} +{"current_steps": 193, "total_steps": 2436, "loss": 2.830981969833374, "lr": 7.868852459016394e-06, "epoch": 0.2376847290640394, "percentage": 7.92, "elapsed_time": "0:03:30", "remaining_time": "0:40:51"} +{"current_steps": 194, "total_steps": 2436, "loss": 2.2295336723327637, "lr": 7.909836065573772e-06, "epoch": 0.23891625615763548, "percentage": 7.96, "elapsed_time": "0:03:32", "remaining_time": "0:40:50"} +{"current_steps": 195, "total_steps": 2436, "loss": 2.338548183441162, "lr": 7.950819672131147e-06, "epoch": 0.24014778325123154, "percentage": 8.0, "elapsed_time": "0:03:33", "remaining_time": "0:40:48"} +{"current_steps": 196, "total_steps": 2436, "loss": 3.0856008529663086, "lr": 7.991803278688526e-06, "epoch": 0.2413793103448276, "percentage": 8.05, "elapsed_time": "0:03:34", "remaining_time": "0:40:47"} +{"current_steps": 197, "total_steps": 2436, "loss": 2.8032941818237305, "lr": 8.032786885245902e-06, "epoch": 0.24261083743842365, "percentage": 8.09, "elapsed_time": "0:03:35", "remaining_time": "0:40:46"} +{"current_steps": 198, "total_steps": 2436, "loss": 2.793623924255371, "lr": 8.073770491803279e-06, "epoch": 0.2438423645320197, "percentage": 8.13, "elapsed_time": "0:03:36", "remaining_time": "0:40:45"} +{"current_steps": 199, "total_steps": 2436, "loss": 3.316802740097046, "lr": 8.114754098360657e-06, "epoch": 0.24507389162561577, "percentage": 8.17, "elapsed_time": "0:03:37", "remaining_time": "0:40:44"} +{"current_steps": 200, "total_steps": 2436, "loss": 2.7230677604675293, "lr": 8.155737704918034e-06, "epoch": 0.24630541871921183, "percentage": 8.21, "elapsed_time": "0:03:38", "remaining_time": "0:40:42"} +{"current_steps": 201, "total_steps": 2436, "loss": 3.3343541622161865, "lr": 8.19672131147541e-06, "epoch": 0.24753694581280788, "percentage": 8.25, "elapsed_time": "0:03:39", "remaining_time": "0:40:41"} +{"current_steps": 202, "total_steps": 2436, "loss": 2.6796741485595703, "lr": 8.237704918032787e-06, "epoch": 0.24876847290640394, "percentage": 8.29, "elapsed_time": "0:03:40", "remaining_time": "0:40:40"} +{"current_steps": 203, "total_steps": 2436, "loss": 3.5974526405334473, "lr": 8.278688524590165e-06, "epoch": 0.25, "percentage": 8.33, "elapsed_time": "0:03:41", "remaining_time": "0:40:38"} +{"current_steps": 204, "total_steps": 2436, "loss": 2.2697930335998535, "lr": 8.319672131147542e-06, "epoch": 0.2512315270935961, "percentage": 8.37, "elapsed_time": "0:03:42", "remaining_time": "0:40:37"} +{"current_steps": 205, "total_steps": 2436, "loss": 3.692251682281494, "lr": 8.360655737704919e-06, "epoch": 0.2524630541871921, "percentage": 8.42, "elapsed_time": "0:03:43", "remaining_time": "0:40:36"} +{"current_steps": 206, "total_steps": 2436, "loss": 1.9303261041641235, "lr": 8.401639344262295e-06, "epoch": 0.2536945812807882, "percentage": 8.46, "elapsed_time": "0:03:44", "remaining_time": "0:40:35"} +{"current_steps": 207, "total_steps": 2436, "loss": 2.538956880569458, "lr": 8.442622950819674e-06, "epoch": 0.25492610837438423, "percentage": 8.5, "elapsed_time": "0:03:46", "remaining_time": "0:40:34"} +{"current_steps": 208, "total_steps": 2436, "loss": 2.777608633041382, "lr": 8.48360655737705e-06, "epoch": 0.2561576354679803, "percentage": 8.54, "elapsed_time": "0:03:47", "remaining_time": "0:40:32"} +{"current_steps": 209, "total_steps": 2436, "loss": 2.5776896476745605, "lr": 8.524590163934427e-06, "epoch": 0.25738916256157635, "percentage": 8.58, "elapsed_time": "0:03:48", "remaining_time": "0:40:31"} +{"current_steps": 210, "total_steps": 2436, "loss": 2.9199795722961426, "lr": 8.565573770491804e-06, "epoch": 0.25862068965517243, "percentage": 8.62, "elapsed_time": "0:03:49", "remaining_time": "0:40:30"} +{"current_steps": 211, "total_steps": 2436, "loss": 3.515129566192627, "lr": 8.60655737704918e-06, "epoch": 0.25985221674876846, "percentage": 8.66, "elapsed_time": "0:03:50", "remaining_time": "0:40:29"} +{"current_steps": 212, "total_steps": 2436, "loss": 2.5549678802490234, "lr": 8.647540983606559e-06, "epoch": 0.26108374384236455, "percentage": 8.7, "elapsed_time": "0:03:51", "remaining_time": "0:40:28"} +{"current_steps": 213, "total_steps": 2436, "loss": 2.769425630569458, "lr": 8.688524590163935e-06, "epoch": 0.2623152709359606, "percentage": 8.74, "elapsed_time": "0:03:52", "remaining_time": "0:40:27"} +{"current_steps": 214, "total_steps": 2436, "loss": 3.369231700897217, "lr": 8.729508196721312e-06, "epoch": 0.26354679802955666, "percentage": 8.78, "elapsed_time": "0:03:53", "remaining_time": "0:40:25"} +{"current_steps": 215, "total_steps": 2436, "loss": 2.942309856414795, "lr": 8.770491803278688e-06, "epoch": 0.2647783251231527, "percentage": 8.83, "elapsed_time": "0:03:54", "remaining_time": "0:40:24"} +{"current_steps": 216, "total_steps": 2436, "loss": 2.7516608238220215, "lr": 8.811475409836067e-06, "epoch": 0.2660098522167488, "percentage": 8.87, "elapsed_time": "0:03:55", "remaining_time": "0:40:23"} +{"current_steps": 217, "total_steps": 2436, "loss": 2.8445613384246826, "lr": 8.852459016393443e-06, "epoch": 0.2672413793103448, "percentage": 8.91, "elapsed_time": "0:03:56", "remaining_time": "0:40:21"} +{"current_steps": 218, "total_steps": 2436, "loss": 2.987518787384033, "lr": 8.893442622950822e-06, "epoch": 0.2684729064039409, "percentage": 8.95, "elapsed_time": "0:03:57", "remaining_time": "0:40:20"} +{"current_steps": 219, "total_steps": 2436, "loss": 3.2499587535858154, "lr": 8.934426229508197e-06, "epoch": 0.2697044334975369, "percentage": 8.99, "elapsed_time": "0:03:58", "remaining_time": "0:40:19"} +{"current_steps": 220, "total_steps": 2436, "loss": 2.926447868347168, "lr": 8.975409836065575e-06, "epoch": 0.270935960591133, "percentage": 9.03, "elapsed_time": "0:04:00", "remaining_time": "0:40:18"} +{"current_steps": 221, "total_steps": 2436, "loss": 2.8910017013549805, "lr": 9.016393442622952e-06, "epoch": 0.27216748768472904, "percentage": 9.07, "elapsed_time": "0:04:01", "remaining_time": "0:40:16"} +{"current_steps": 222, "total_steps": 2436, "loss": 3.0613536834716797, "lr": 9.057377049180328e-06, "epoch": 0.2733990147783251, "percentage": 9.11, "elapsed_time": "0:04:02", "remaining_time": "0:40:15"} +{"current_steps": 223, "total_steps": 2436, "loss": 2.829414129257202, "lr": 9.098360655737707e-06, "epoch": 0.2746305418719212, "percentage": 9.15, "elapsed_time": "0:04:03", "remaining_time": "0:40:14"} +{"current_steps": 224, "total_steps": 2436, "loss": 2.7085399627685547, "lr": 9.139344262295083e-06, "epoch": 0.27586206896551724, "percentage": 9.2, "elapsed_time": "0:04:04", "remaining_time": "0:40:13"} +{"current_steps": 225, "total_steps": 2436, "loss": 2.6637799739837646, "lr": 9.18032786885246e-06, "epoch": 0.2770935960591133, "percentage": 9.24, "elapsed_time": "0:04:05", "remaining_time": "0:40:12"} +{"current_steps": 226, "total_steps": 2436, "loss": 2.845503807067871, "lr": 9.221311475409836e-06, "epoch": 0.27832512315270935, "percentage": 9.28, "elapsed_time": "0:04:06", "remaining_time": "0:40:10"} +{"current_steps": 227, "total_steps": 2436, "loss": 2.954394817352295, "lr": 9.262295081967215e-06, "epoch": 0.27955665024630544, "percentage": 9.32, "elapsed_time": "0:04:07", "remaining_time": "0:40:09"} +{"current_steps": 228, "total_steps": 2436, "loss": 2.640540838241577, "lr": 9.303278688524591e-06, "epoch": 0.28078817733990147, "percentage": 9.36, "elapsed_time": "0:04:08", "remaining_time": "0:40:08"} +{"current_steps": 229, "total_steps": 2436, "loss": 2.806300163269043, "lr": 9.344262295081968e-06, "epoch": 0.28201970443349755, "percentage": 9.4, "elapsed_time": "0:04:09", "remaining_time": "0:40:07"} +{"current_steps": 230, "total_steps": 2436, "loss": 2.6030101776123047, "lr": 9.385245901639345e-06, "epoch": 0.2832512315270936, "percentage": 9.44, "elapsed_time": "0:04:10", "remaining_time": "0:40:05"} +{"current_steps": 231, "total_steps": 2436, "loss": 2.7991466522216797, "lr": 9.426229508196723e-06, "epoch": 0.28448275862068967, "percentage": 9.48, "elapsed_time": "0:04:11", "remaining_time": "0:40:04"} +{"current_steps": 232, "total_steps": 2436, "loss": 3.106261968612671, "lr": 9.4672131147541e-06, "epoch": 0.2857142857142857, "percentage": 9.52, "elapsed_time": "0:04:12", "remaining_time": "0:40:03"} +{"current_steps": 233, "total_steps": 2436, "loss": 3.2630815505981445, "lr": 9.508196721311476e-06, "epoch": 0.2869458128078818, "percentage": 9.56, "elapsed_time": "0:04:14", "remaining_time": "0:40:02"} +{"current_steps": 234, "total_steps": 2436, "loss": 3.1156482696533203, "lr": 9.549180327868853e-06, "epoch": 0.2881773399014778, "percentage": 9.61, "elapsed_time": "0:04:15", "remaining_time": "0:40:00"} +{"current_steps": 235, "total_steps": 2436, "loss": 3.1271071434020996, "lr": 9.59016393442623e-06, "epoch": 0.2894088669950739, "percentage": 9.65, "elapsed_time": "0:04:16", "remaining_time": "0:39:59"} +{"current_steps": 236, "total_steps": 2436, "loss": 2.536348342895508, "lr": 9.631147540983608e-06, "epoch": 0.29064039408866993, "percentage": 9.69, "elapsed_time": "0:04:17", "remaining_time": "0:39:58"} +{"current_steps": 237, "total_steps": 2436, "loss": 2.8055825233459473, "lr": 9.672131147540984e-06, "epoch": 0.291871921182266, "percentage": 9.73, "elapsed_time": "0:04:18", "remaining_time": "0:39:57"} +{"current_steps": 238, "total_steps": 2436, "loss": 2.9949395656585693, "lr": 9.713114754098361e-06, "epoch": 0.29310344827586204, "percentage": 9.77, "elapsed_time": "0:04:19", "remaining_time": "0:39:55"} +{"current_steps": 239, "total_steps": 2436, "loss": 2.125136137008667, "lr": 9.754098360655738e-06, "epoch": 0.29433497536945813, "percentage": 9.81, "elapsed_time": "0:04:20", "remaining_time": "0:39:54"} +{"current_steps": 240, "total_steps": 2436, "loss": 2.830984592437744, "lr": 9.795081967213116e-06, "epoch": 0.2955665024630542, "percentage": 9.85, "elapsed_time": "0:04:21", "remaining_time": "0:39:53"} +{"current_steps": 241, "total_steps": 2436, "loss": 3.2255706787109375, "lr": 9.836065573770493e-06, "epoch": 0.29679802955665024, "percentage": 9.89, "elapsed_time": "0:04:22", "remaining_time": "0:39:52"} +{"current_steps": 242, "total_steps": 2436, "loss": 2.686436653137207, "lr": 9.87704918032787e-06, "epoch": 0.29802955665024633, "percentage": 9.93, "elapsed_time": "0:04:23", "remaining_time": "0:39:50"} +{"current_steps": 243, "total_steps": 2436, "loss": 2.497978687286377, "lr": 9.918032786885246e-06, "epoch": 0.29926108374384236, "percentage": 9.98, "elapsed_time": "0:04:24", "remaining_time": "0:39:49"} +{"current_steps": 244, "total_steps": 2436, "loss": 3.308448076248169, "lr": 9.959016393442624e-06, "epoch": 0.30049261083743845, "percentage": 10.02, "elapsed_time": "0:04:25", "remaining_time": "0:39:48"} +{"current_steps": 245, "total_steps": 2436, "loss": 3.2378220558166504, "lr": 1e-05, "epoch": 0.3017241379310345, "percentage": 10.06, "elapsed_time": "0:04:26", "remaining_time": "0:39:47"} +{"current_steps": 246, "total_steps": 2436, "loss": 2.4129133224487305, "lr": 9.999994864785605e-06, "epoch": 0.30295566502463056, "percentage": 10.1, "elapsed_time": "0:04:28", "remaining_time": "0:39:46"} +{"current_steps": 247, "total_steps": 2436, "loss": 2.938180923461914, "lr": 9.99997945915297e-06, "epoch": 0.3041871921182266, "percentage": 10.14, "elapsed_time": "0:04:29", "remaining_time": "0:39:45"} +{"current_steps": 248, "total_steps": 2436, "loss": 2.5165305137634277, "lr": 9.999953783133733e-06, "epoch": 0.3054187192118227, "percentage": 10.18, "elapsed_time": "0:04:30", "remaining_time": "0:39:43"} +{"current_steps": 249, "total_steps": 2436, "loss": 3.425577163696289, "lr": 9.999917836780642e-06, "epoch": 0.3066502463054187, "percentage": 10.22, "elapsed_time": "0:04:31", "remaining_time": "0:39:42"} +{"current_steps": 250, "total_steps": 2436, "loss": 2.876093626022339, "lr": 9.999871620167532e-06, "epoch": 0.3078817733990148, "percentage": 10.26, "elapsed_time": "0:04:32", "remaining_time": "0:39:41"} +{"current_steps": 251, "total_steps": 2436, "loss": 2.9071428775787354, "lr": 9.999815133389334e-06, "epoch": 0.3091133004926108, "percentage": 10.3, "elapsed_time": "0:04:33", "remaining_time": "0:39:40"} +{"current_steps": 252, "total_steps": 2436, "loss": 2.998086452484131, "lr": 9.999748376562078e-06, "epoch": 0.3103448275862069, "percentage": 10.34, "elapsed_time": "0:04:34", "remaining_time": "0:39:39"} +{"current_steps": 253, "total_steps": 2436, "loss": 2.1193456649780273, "lr": 9.999671349822887e-06, "epoch": 0.31157635467980294, "percentage": 10.39, "elapsed_time": "0:04:35", "remaining_time": "0:39:37"} +{"current_steps": 254, "total_steps": 2436, "loss": 2.753380298614502, "lr": 9.999584053329983e-06, "epoch": 0.312807881773399, "percentage": 10.43, "elapsed_time": "0:04:36", "remaining_time": "0:39:36"} +{"current_steps": 255, "total_steps": 2436, "loss": 2.876704216003418, "lr": 9.999486487262677e-06, "epoch": 0.31403940886699505, "percentage": 10.47, "elapsed_time": "0:04:37", "remaining_time": "0:39:35"} +{"current_steps": 256, "total_steps": 2436, "loss": 3.0882208347320557, "lr": 9.999378651821381e-06, "epoch": 0.31527093596059114, "percentage": 10.51, "elapsed_time": "0:04:38", "remaining_time": "0:39:34"} +{"current_steps": 257, "total_steps": 2436, "loss": 3.155285120010376, "lr": 9.999260547227599e-06, "epoch": 0.31650246305418717, "percentage": 10.55, "elapsed_time": "0:04:39", "remaining_time": "0:39:33"} +{"current_steps": 258, "total_steps": 2436, "loss": 2.7646055221557617, "lr": 9.999132173723923e-06, "epoch": 0.31773399014778325, "percentage": 10.59, "elapsed_time": "0:04:40", "remaining_time": "0:39:31"} +{"current_steps": 259, "total_steps": 2436, "loss": 2.7237563133239746, "lr": 9.998993531574048e-06, "epoch": 0.31896551724137934, "percentage": 10.63, "elapsed_time": "0:04:42", "remaining_time": "0:39:30"} +{"current_steps": 260, "total_steps": 2436, "loss": 3.3845739364624023, "lr": 9.998844621062755e-06, "epoch": 0.32019704433497537, "percentage": 10.67, "elapsed_time": "0:04:43", "remaining_time": "0:39:29"} +{"current_steps": 261, "total_steps": 2436, "loss": 3.8065264225006104, "lr": 9.998685442495921e-06, "epoch": 0.32142857142857145, "percentage": 10.71, "elapsed_time": "0:04:44", "remaining_time": "0:39:28"} +{"current_steps": 262, "total_steps": 2436, "loss": 2.8899989128112793, "lr": 9.998515996200508e-06, "epoch": 0.3226600985221675, "percentage": 10.76, "elapsed_time": "0:04:45", "remaining_time": "0:39:27"} +{"current_steps": 263, "total_steps": 2436, "loss": 3.253079414367676, "lr": 9.998336282524579e-06, "epoch": 0.32389162561576357, "percentage": 10.8, "elapsed_time": "0:04:46", "remaining_time": "0:39:27"} +{"current_steps": 264, "total_steps": 2436, "loss": 3.346510648727417, "lr": 9.998146301837274e-06, "epoch": 0.3251231527093596, "percentage": 10.84, "elapsed_time": "0:04:47", "remaining_time": "0:39:26"} +{"current_steps": 265, "total_steps": 2436, "loss": 3.4698657989501953, "lr": 9.997946054528837e-06, "epoch": 0.3263546798029557, "percentage": 10.88, "elapsed_time": "0:04:48", "remaining_time": "0:39:26"} +{"current_steps": 266, "total_steps": 2436, "loss": 3.174567699432373, "lr": 9.99773554101059e-06, "epoch": 0.3275862068965517, "percentage": 10.92, "elapsed_time": "0:04:49", "remaining_time": "0:39:24"} +{"current_steps": 267, "total_steps": 2436, "loss": 2.5275719165802, "lr": 9.997514761714946e-06, "epoch": 0.3288177339901478, "percentage": 10.96, "elapsed_time": "0:04:50", "remaining_time": "0:39:23"} +{"current_steps": 268, "total_steps": 2436, "loss": 2.9102673530578613, "lr": 9.997283717095403e-06, "epoch": 0.33004926108374383, "percentage": 11.0, "elapsed_time": "0:04:52", "remaining_time": "0:39:22"} +{"current_steps": 269, "total_steps": 2436, "loss": 2.865558624267578, "lr": 9.99704240762655e-06, "epoch": 0.3312807881773399, "percentage": 11.04, "elapsed_time": "0:04:53", "remaining_time": "0:39:21"} +{"current_steps": 270, "total_steps": 2436, "loss": 2.749305248260498, "lr": 9.996790833804053e-06, "epoch": 0.33251231527093594, "percentage": 11.08, "elapsed_time": "0:04:54", "remaining_time": "0:39:20"} +{"current_steps": 271, "total_steps": 2436, "loss": 2.0590691566467285, "lr": 9.996528996144668e-06, "epoch": 0.33374384236453203, "percentage": 11.12, "elapsed_time": "0:04:55", "remaining_time": "0:39:18"} +{"current_steps": 272, "total_steps": 2436, "loss": 3.0421628952026367, "lr": 9.996256895186234e-06, "epoch": 0.33497536945812806, "percentage": 11.17, "elapsed_time": "0:04:56", "remaining_time": "0:39:17"} +{"current_steps": 273, "total_steps": 2436, "loss": 2.8302841186523438, "lr": 9.995974531487668e-06, "epoch": 0.33620689655172414, "percentage": 11.21, "elapsed_time": "0:04:57", "remaining_time": "0:39:16"} +{"current_steps": 274, "total_steps": 2436, "loss": 2.7192673683166504, "lr": 9.995681905628968e-06, "epoch": 0.3374384236453202, "percentage": 11.25, "elapsed_time": "0:04:58", "remaining_time": "0:39:15"} +{"current_steps": 275, "total_steps": 2436, "loss": 2.3330166339874268, "lr": 9.995379018211215e-06, "epoch": 0.33866995073891626, "percentage": 11.29, "elapsed_time": "0:04:59", "remaining_time": "0:39:14"} +{"current_steps": 276, "total_steps": 2436, "loss": 2.5359480381011963, "lr": 9.995065869856566e-06, "epoch": 0.3399014778325123, "percentage": 11.33, "elapsed_time": "0:05:00", "remaining_time": "0:39:13"} +{"current_steps": 277, "total_steps": 2436, "loss": 3.049252986907959, "lr": 9.994742461208251e-06, "epoch": 0.3411330049261084, "percentage": 11.37, "elapsed_time": "0:05:01", "remaining_time": "0:39:11"} +{"current_steps": 278, "total_steps": 2436, "loss": 3.3440940380096436, "lr": 9.994408792930584e-06, "epoch": 0.34236453201970446, "percentage": 11.41, "elapsed_time": "0:05:02", "remaining_time": "0:39:10"} +{"current_steps": 279, "total_steps": 2436, "loss": 3.038376808166504, "lr": 9.994064865708944e-06, "epoch": 0.3435960591133005, "percentage": 11.45, "elapsed_time": "0:05:03", "remaining_time": "0:39:09"} +{"current_steps": 280, "total_steps": 2436, "loss": 3.6074423789978027, "lr": 9.993710680249788e-06, "epoch": 0.3448275862068966, "percentage": 11.49, "elapsed_time": "0:05:04", "remaining_time": "0:39:08"} +{"current_steps": 281, "total_steps": 2436, "loss": 2.686741352081299, "lr": 9.993346237280646e-06, "epoch": 0.3460591133004926, "percentage": 11.54, "elapsed_time": "0:05:06", "remaining_time": "0:39:06"} +{"current_steps": 282, "total_steps": 2436, "loss": 2.4198198318481445, "lr": 9.992971537550112e-06, "epoch": 0.3472906403940887, "percentage": 11.58, "elapsed_time": "0:05:07", "remaining_time": "0:39:05"} +{"current_steps": 283, "total_steps": 2436, "loss": 2.8091788291931152, "lr": 9.992586581827853e-06, "epoch": 0.3485221674876847, "percentage": 11.62, "elapsed_time": "0:05:08", "remaining_time": "0:39:04"} +{"current_steps": 284, "total_steps": 2436, "loss": 3.0199592113494873, "lr": 9.992191370904599e-06, "epoch": 0.3497536945812808, "percentage": 11.66, "elapsed_time": "0:05:09", "remaining_time": "0:39:03"} +{"current_steps": 285, "total_steps": 2436, "loss": 2.6372945308685303, "lr": 9.991785905592149e-06, "epoch": 0.35098522167487683, "percentage": 11.7, "elapsed_time": "0:05:10", "remaining_time": "0:39:02"} +{"current_steps": 286, "total_steps": 2436, "loss": 2.9127607345581055, "lr": 9.991370186723363e-06, "epoch": 0.3522167487684729, "percentage": 11.74, "elapsed_time": "0:05:11", "remaining_time": "0:39:00"} +{"current_steps": 287, "total_steps": 2436, "loss": 2.464376926422119, "lr": 9.990944215152166e-06, "epoch": 0.35344827586206895, "percentage": 11.78, "elapsed_time": "0:05:12", "remaining_time": "0:38:59"} +{"current_steps": 288, "total_steps": 2436, "loss": 2.8306374549865723, "lr": 9.990507991753535e-06, "epoch": 0.35467980295566504, "percentage": 11.82, "elapsed_time": "0:05:13", "remaining_time": "0:38:58"} +{"current_steps": 289, "total_steps": 2436, "loss": 2.9181313514709473, "lr": 9.990061517423513e-06, "epoch": 0.35591133004926107, "percentage": 11.86, "elapsed_time": "0:05:14", "remaining_time": "0:38:57"} +{"current_steps": 290, "total_steps": 2436, "loss": 3.1937739849090576, "lr": 9.989604793079198e-06, "epoch": 0.35714285714285715, "percentage": 11.9, "elapsed_time": "0:05:15", "remaining_time": "0:38:56"} +{"current_steps": 291, "total_steps": 2436, "loss": 4.190927028656006, "lr": 9.989137819658738e-06, "epoch": 0.3583743842364532, "percentage": 11.95, "elapsed_time": "0:05:16", "remaining_time": "0:38:54"} +{"current_steps": 292, "total_steps": 2436, "loss": 2.8343558311462402, "lr": 9.988660598121337e-06, "epoch": 0.35960591133004927, "percentage": 11.99, "elapsed_time": "0:05:17", "remaining_time": "0:38:53"} +{"current_steps": 293, "total_steps": 2436, "loss": 3.741821050643921, "lr": 9.988173129447251e-06, "epoch": 0.3608374384236453, "percentage": 12.03, "elapsed_time": "0:05:18", "remaining_time": "0:38:52"} +{"current_steps": 294, "total_steps": 2436, "loss": 2.484419345855713, "lr": 9.98767541463778e-06, "epoch": 0.3620689655172414, "percentage": 12.07, "elapsed_time": "0:05:19", "remaining_time": "0:38:51"} +{"current_steps": 295, "total_steps": 2436, "loss": 2.671337127685547, "lr": 9.987167454715277e-06, "epoch": 0.3633004926108374, "percentage": 12.11, "elapsed_time": "0:05:21", "remaining_time": "0:38:50"} +{"current_steps": 296, "total_steps": 2436, "loss": 3.118803024291992, "lr": 9.986649250723129e-06, "epoch": 0.3645320197044335, "percentage": 12.15, "elapsed_time": "0:05:22", "remaining_time": "0:38:49"} +{"current_steps": 297, "total_steps": 2436, "loss": 3.10141658782959, "lr": 9.986120803725776e-06, "epoch": 0.3657635467980296, "percentage": 12.19, "elapsed_time": "0:05:23", "remaining_time": "0:38:47"} +{"current_steps": 298, "total_steps": 2436, "loss": 2.7978734970092773, "lr": 9.985582114808693e-06, "epoch": 0.3669950738916256, "percentage": 12.23, "elapsed_time": "0:05:24", "remaining_time": "0:38:46"} +{"current_steps": 299, "total_steps": 2436, "loss": 2.5770411491394043, "lr": 9.985033185078392e-06, "epoch": 0.3682266009852217, "percentage": 12.27, "elapsed_time": "0:05:25", "remaining_time": "0:38:45"} +{"current_steps": 300, "total_steps": 2436, "loss": 3.0273873805999756, "lr": 9.984474015662421e-06, "epoch": 0.3694581280788177, "percentage": 12.32, "elapsed_time": "0:05:26", "remaining_time": "0:38:44"} +{"current_steps": 301, "total_steps": 2436, "loss": 2.9202780723571777, "lr": 9.983904607709365e-06, "epoch": 0.3706896551724138, "percentage": 12.36, "elapsed_time": "0:05:27", "remaining_time": "0:38:43"} +{"current_steps": 302, "total_steps": 2436, "loss": 2.9816439151763916, "lr": 9.983324962388835e-06, "epoch": 0.37192118226600984, "percentage": 12.4, "elapsed_time": "0:05:28", "remaining_time": "0:38:42"} +{"current_steps": 303, "total_steps": 2436, "loss": 2.5605852603912354, "lr": 9.982735080891471e-06, "epoch": 0.3731527093596059, "percentage": 12.44, "elapsed_time": "0:05:29", "remaining_time": "0:38:40"} +{"current_steps": 304, "total_steps": 2436, "loss": 2.9378490447998047, "lr": 9.982134964428942e-06, "epoch": 0.37438423645320196, "percentage": 12.48, "elapsed_time": "0:05:30", "remaining_time": "0:38:39"} +{"current_steps": 305, "total_steps": 2436, "loss": 2.410521984100342, "lr": 9.981524614233938e-06, "epoch": 0.37561576354679804, "percentage": 12.52, "elapsed_time": "0:05:31", "remaining_time": "0:38:38"} +{"current_steps": 306, "total_steps": 2436, "loss": 2.381927013397217, "lr": 9.98090403156017e-06, "epoch": 0.3768472906403941, "percentage": 12.56, "elapsed_time": "0:05:32", "remaining_time": "0:38:37"} +{"current_steps": 307, "total_steps": 2436, "loss": 3.1156816482543945, "lr": 9.98027321768237e-06, "epoch": 0.37807881773399016, "percentage": 12.6, "elapsed_time": "0:05:33", "remaining_time": "0:38:36"} +{"current_steps": 308, "total_steps": 2436, "loss": 3.2660152912139893, "lr": 9.97963217389628e-06, "epoch": 0.3793103448275862, "percentage": 12.64, "elapsed_time": "0:05:35", "remaining_time": "0:38:35"} +{"current_steps": 309, "total_steps": 2436, "loss": 3.1832613945007324, "lr": 9.978980901518663e-06, "epoch": 0.3805418719211823, "percentage": 12.68, "elapsed_time": "0:05:36", "remaining_time": "0:38:33"} +{"current_steps": 310, "total_steps": 2436, "loss": 2.719600200653076, "lr": 9.978319401887287e-06, "epoch": 0.3817733990147783, "percentage": 12.73, "elapsed_time": "0:05:37", "remaining_time": "0:38:32"} +{"current_steps": 311, "total_steps": 2436, "loss": 2.652092456817627, "lr": 9.977647676360927e-06, "epoch": 0.3830049261083744, "percentage": 12.77, "elapsed_time": "0:05:38", "remaining_time": "0:38:31"} +{"current_steps": 312, "total_steps": 2436, "loss": 2.5932788848876953, "lr": 9.976965726319369e-06, "epoch": 0.3842364532019704, "percentage": 12.81, "elapsed_time": "0:05:39", "remaining_time": "0:38:30"} +{"current_steps": 313, "total_steps": 2436, "loss": 2.558863401412964, "lr": 9.976273553163393e-06, "epoch": 0.3854679802955665, "percentage": 12.85, "elapsed_time": "0:05:40", "remaining_time": "0:38:29"} +{"current_steps": 314, "total_steps": 2436, "loss": 3.1973023414611816, "lr": 9.975571158314783e-06, "epoch": 0.3866995073891626, "percentage": 12.89, "elapsed_time": "0:05:41", "remaining_time": "0:38:28"} +{"current_steps": 315, "total_steps": 2436, "loss": 3.286236524581909, "lr": 9.974858543216319e-06, "epoch": 0.3879310344827586, "percentage": 12.93, "elapsed_time": "0:05:42", "remaining_time": "0:38:26"} +{"current_steps": 316, "total_steps": 2436, "loss": 3.5159969329833984, "lr": 9.974135709331774e-06, "epoch": 0.3891625615763547, "percentage": 12.97, "elapsed_time": "0:05:43", "remaining_time": "0:38:25"} +{"current_steps": 317, "total_steps": 2436, "loss": 2.647761821746826, "lr": 9.973402658145908e-06, "epoch": 0.39039408866995073, "percentage": 13.01, "elapsed_time": "0:05:44", "remaining_time": "0:38:24"} +{"current_steps": 318, "total_steps": 2436, "loss": 2.8499808311462402, "lr": 9.972659391164473e-06, "epoch": 0.3916256157635468, "percentage": 13.05, "elapsed_time": "0:05:45", "remaining_time": "0:38:23"} +{"current_steps": 319, "total_steps": 2436, "loss": 2.332852840423584, "lr": 9.971905909914206e-06, "epoch": 0.39285714285714285, "percentage": 13.1, "elapsed_time": "0:05:46", "remaining_time": "0:38:22"} +{"current_steps": 320, "total_steps": 2436, "loss": 2.627098560333252, "lr": 9.971142215942817e-06, "epoch": 0.39408866995073893, "percentage": 13.14, "elapsed_time": "0:05:47", "remaining_time": "0:38:21"} +{"current_steps": 321, "total_steps": 2436, "loss": 2.302323341369629, "lr": 9.970368310819e-06, "epoch": 0.39532019704433496, "percentage": 13.18, "elapsed_time": "0:05:49", "remaining_time": "0:38:20"} +{"current_steps": 322, "total_steps": 2436, "loss": 2.6783509254455566, "lr": 9.969584196132427e-06, "epoch": 0.39655172413793105, "percentage": 13.22, "elapsed_time": "0:05:50", "remaining_time": "0:38:19"} +{"current_steps": 323, "total_steps": 2436, "loss": 2.9487061500549316, "lr": 9.96878987349373e-06, "epoch": 0.3977832512315271, "percentage": 13.26, "elapsed_time": "0:05:51", "remaining_time": "0:38:17"} +{"current_steps": 324, "total_steps": 2436, "loss": 2.5883233547210693, "lr": 9.967985344534521e-06, "epoch": 0.39901477832512317, "percentage": 13.3, "elapsed_time": "0:05:52", "remaining_time": "0:38:16"} +{"current_steps": 325, "total_steps": 2436, "loss": 3.125821590423584, "lr": 9.96717061090737e-06, "epoch": 0.4002463054187192, "percentage": 13.34, "elapsed_time": "0:05:53", "remaining_time": "0:38:15"} +{"current_steps": 326, "total_steps": 2436, "loss": 2.829881191253662, "lr": 9.966345674285808e-06, "epoch": 0.4014778325123153, "percentage": 13.38, "elapsed_time": "0:05:54", "remaining_time": "0:38:14"} +{"current_steps": 327, "total_steps": 2436, "loss": 2.5988128185272217, "lr": 9.965510536364329e-06, "epoch": 0.4027093596059113, "percentage": 13.42, "elapsed_time": "0:05:55", "remaining_time": "0:38:13"} +{"current_steps": 328, "total_steps": 2436, "loss": 2.158940315246582, "lr": 9.964665198858375e-06, "epoch": 0.4039408866995074, "percentage": 13.46, "elapsed_time": "0:05:56", "remaining_time": "0:38:12"} +{"current_steps": 329, "total_steps": 2436, "loss": 2.716994285583496, "lr": 9.96380966350434e-06, "epoch": 0.4051724137931034, "percentage": 13.51, "elapsed_time": "0:05:57", "remaining_time": "0:38:11"} +{"current_steps": 330, "total_steps": 2436, "loss": 3.1283516883850098, "lr": 9.962943932059573e-06, "epoch": 0.4064039408866995, "percentage": 13.55, "elapsed_time": "0:05:58", "remaining_time": "0:38:09"} +{"current_steps": 331, "total_steps": 2436, "loss": 3.0957908630371094, "lr": 9.962068006302357e-06, "epoch": 0.40763546798029554, "percentage": 13.59, "elapsed_time": "0:05:59", "remaining_time": "0:38:08"} +{"current_steps": 332, "total_steps": 2436, "loss": 2.3027350902557373, "lr": 9.961181888031917e-06, "epoch": 0.4088669950738916, "percentage": 13.63, "elapsed_time": "0:06:00", "remaining_time": "0:38:07"} +{"current_steps": 333, "total_steps": 2436, "loss": 2.956791877746582, "lr": 9.960285579068419e-06, "epoch": 0.4100985221674877, "percentage": 13.67, "elapsed_time": "0:06:02", "remaining_time": "0:38:06"} +{"current_steps": 334, "total_steps": 2436, "loss": 2.5689826011657715, "lr": 9.959379081252958e-06, "epoch": 0.41133004926108374, "percentage": 13.71, "elapsed_time": "0:06:03", "remaining_time": "0:38:05"} +{"current_steps": 335, "total_steps": 2436, "loss": 3.1086199283599854, "lr": 9.958462396447556e-06, "epoch": 0.4125615763546798, "percentage": 13.75, "elapsed_time": "0:06:04", "remaining_time": "0:38:04"} +{"current_steps": 336, "total_steps": 2436, "loss": 3.134901285171509, "lr": 9.957535526535165e-06, "epoch": 0.41379310344827586, "percentage": 13.79, "elapsed_time": "0:06:05", "remaining_time": "0:38:03"} +{"current_steps": 337, "total_steps": 2436, "loss": 2.642225742340088, "lr": 9.956598473419652e-06, "epoch": 0.41502463054187194, "percentage": 13.83, "elapsed_time": "0:06:06", "remaining_time": "0:38:01"} +{"current_steps": 338, "total_steps": 2436, "loss": 2.828200340270996, "lr": 9.95565123902581e-06, "epoch": 0.41625615763546797, "percentage": 13.88, "elapsed_time": "0:06:07", "remaining_time": "0:38:00"} +{"current_steps": 339, "total_steps": 2436, "loss": 2.751354217529297, "lr": 9.954693825299333e-06, "epoch": 0.41748768472906406, "percentage": 13.92, "elapsed_time": "0:06:08", "remaining_time": "0:37:59"} +{"current_steps": 340, "total_steps": 2436, "loss": 2.818434715270996, "lr": 9.953726234206835e-06, "epoch": 0.4187192118226601, "percentage": 13.96, "elapsed_time": "0:06:09", "remaining_time": "0:37:58"} +{"current_steps": 341, "total_steps": 2436, "loss": 2.8631365299224854, "lr": 9.95274846773583e-06, "epoch": 0.41995073891625617, "percentage": 14.0, "elapsed_time": "0:06:10", "remaining_time": "0:37:57"} +{"current_steps": 342, "total_steps": 2436, "loss": 2.387998580932617, "lr": 9.951760527894733e-06, "epoch": 0.4211822660098522, "percentage": 14.04, "elapsed_time": "0:06:11", "remaining_time": "0:37:56"} +{"current_steps": 343, "total_steps": 2436, "loss": 2.366614580154419, "lr": 9.950762416712862e-06, "epoch": 0.4224137931034483, "percentage": 14.08, "elapsed_time": "0:06:12", "remaining_time": "0:37:55"} +{"current_steps": 344, "total_steps": 2436, "loss": 2.4502060413360596, "lr": 9.949754136240416e-06, "epoch": 0.4236453201970443, "percentage": 14.12, "elapsed_time": "0:06:13", "remaining_time": "0:37:54"} +{"current_steps": 345, "total_steps": 2436, "loss": 2.47091007232666, "lr": 9.948735688548496e-06, "epoch": 0.4248768472906404, "percentage": 14.16, "elapsed_time": "0:06:15", "remaining_time": "0:37:53"} +{"current_steps": 346, "total_steps": 2436, "loss": 3.0400021076202393, "lr": 9.947707075729076e-06, "epoch": 0.42610837438423643, "percentage": 14.2, "elapsed_time": "0:06:16", "remaining_time": "0:37:51"} +{"current_steps": 347, "total_steps": 2436, "loss": 2.622288227081299, "lr": 9.946668299895017e-06, "epoch": 0.4273399014778325, "percentage": 14.24, "elapsed_time": "0:06:17", "remaining_time": "0:37:50"} +{"current_steps": 348, "total_steps": 2436, "loss": 3.3773419857025146, "lr": 9.945619363180054e-06, "epoch": 0.42857142857142855, "percentage": 14.29, "elapsed_time": "0:06:18", "remaining_time": "0:37:49"} +{"current_steps": 349, "total_steps": 2436, "loss": 3.279005527496338, "lr": 9.944560267738792e-06, "epoch": 0.42980295566502463, "percentage": 14.33, "elapsed_time": "0:06:19", "remaining_time": "0:37:48"} +{"current_steps": 350, "total_steps": 2436, "loss": 2.8206255435943604, "lr": 9.943491015746704e-06, "epoch": 0.43103448275862066, "percentage": 14.37, "elapsed_time": "0:06:20", "remaining_time": "0:37:47"} +{"current_steps": 351, "total_steps": 2436, "loss": 3.312700033187866, "lr": 9.942411609400127e-06, "epoch": 0.43226600985221675, "percentage": 14.41, "elapsed_time": "0:06:21", "remaining_time": "0:37:46"} +{"current_steps": 352, "total_steps": 2436, "loss": 2.580315113067627, "lr": 9.941322050916251e-06, "epoch": 0.43349753694581283, "percentage": 14.45, "elapsed_time": "0:06:22", "remaining_time": "0:37:45"} +{"current_steps": 353, "total_steps": 2436, "loss": 2.8339614868164062, "lr": 9.940222342533126e-06, "epoch": 0.43472906403940886, "percentage": 14.49, "elapsed_time": "0:06:23", "remaining_time": "0:37:43"} +{"current_steps": 354, "total_steps": 2436, "loss": 2.582752227783203, "lr": 9.939112486509644e-06, "epoch": 0.43596059113300495, "percentage": 14.53, "elapsed_time": "0:06:24", "remaining_time": "0:37:42"} +{"current_steps": 355, "total_steps": 2436, "loss": 2.9355309009552, "lr": 9.937992485125547e-06, "epoch": 0.437192118226601, "percentage": 14.57, "elapsed_time": "0:06:25", "remaining_time": "0:37:41"} +{"current_steps": 356, "total_steps": 2436, "loss": 2.796612024307251, "lr": 9.936862340681412e-06, "epoch": 0.43842364532019706, "percentage": 14.61, "elapsed_time": "0:06:26", "remaining_time": "0:37:40"} +{"current_steps": 357, "total_steps": 2436, "loss": 2.6307716369628906, "lr": 9.935722055498655e-06, "epoch": 0.4396551724137931, "percentage": 14.66, "elapsed_time": "0:06:27", "remaining_time": "0:37:39"} +{"current_steps": 358, "total_steps": 2436, "loss": 2.8603620529174805, "lr": 9.934571631919518e-06, "epoch": 0.4408866995073892, "percentage": 14.7, "elapsed_time": "0:06:29", "remaining_time": "0:37:38"} +{"current_steps": 359, "total_steps": 2436, "loss": 3.1397266387939453, "lr": 9.933411072307071e-06, "epoch": 0.4421182266009852, "percentage": 14.74, "elapsed_time": "0:06:30", "remaining_time": "0:37:37"} +{"current_steps": 360, "total_steps": 2436, "loss": 2.5362772941589355, "lr": 9.9322403790452e-06, "epoch": 0.4433497536945813, "percentage": 14.78, "elapsed_time": "0:06:31", "remaining_time": "0:37:36"} +{"current_steps": 361, "total_steps": 2436, "loss": 2.7547712326049805, "lr": 9.931059554538613e-06, "epoch": 0.4445812807881773, "percentage": 14.82, "elapsed_time": "0:06:32", "remaining_time": "0:37:34"} +{"current_steps": 362, "total_steps": 2436, "loss": 3.144801139831543, "lr": 9.929868601212822e-06, "epoch": 0.4458128078817734, "percentage": 14.86, "elapsed_time": "0:06:33", "remaining_time": "0:37:33"} +{"current_steps": 363, "total_steps": 2436, "loss": 2.600550889968872, "lr": 9.928667521514149e-06, "epoch": 0.44704433497536944, "percentage": 14.9, "elapsed_time": "0:06:34", "remaining_time": "0:37:32"} +{"current_steps": 364, "total_steps": 2436, "loss": 2.176116704940796, "lr": 9.927456317909711e-06, "epoch": 0.4482758620689655, "percentage": 14.94, "elapsed_time": "0:06:35", "remaining_time": "0:37:31"} +{"current_steps": 365, "total_steps": 2436, "loss": 3.1918365955352783, "lr": 9.92623499288743e-06, "epoch": 0.44950738916256155, "percentage": 14.98, "elapsed_time": "0:06:36", "remaining_time": "0:37:30"} +{"current_steps": 366, "total_steps": 2436, "loss": 2.6937577724456787, "lr": 9.92500354895601e-06, "epoch": 0.45073891625615764, "percentage": 15.02, "elapsed_time": "0:06:37", "remaining_time": "0:37:29"} +{"current_steps": 367, "total_steps": 2436, "loss": 3.6490774154663086, "lr": 9.92376198864494e-06, "epoch": 0.45197044334975367, "percentage": 15.07, "elapsed_time": "0:06:38", "remaining_time": "0:37:27"} +{"current_steps": 368, "total_steps": 2436, "loss": 3.0342392921447754, "lr": 9.922510314504493e-06, "epoch": 0.45320197044334976, "percentage": 15.11, "elapsed_time": "0:06:39", "remaining_time": "0:37:26"} +{"current_steps": 369, "total_steps": 2436, "loss": 3.175008773803711, "lr": 9.921248529105716e-06, "epoch": 0.4544334975369458, "percentage": 15.15, "elapsed_time": "0:06:40", "remaining_time": "0:37:25"} +{"current_steps": 370, "total_steps": 2436, "loss": 1.9000710248947144, "lr": 9.919976635040425e-06, "epoch": 0.45566502463054187, "percentage": 15.19, "elapsed_time": "0:06:41", "remaining_time": "0:37:24"} +{"current_steps": 371, "total_steps": 2436, "loss": 3.5248589515686035, "lr": 9.918694634921195e-06, "epoch": 0.45689655172413796, "percentage": 15.23, "elapsed_time": "0:06:43", "remaining_time": "0:37:23"} +{"current_steps": 372, "total_steps": 2436, "loss": 2.869842529296875, "lr": 9.91740253138137e-06, "epoch": 0.458128078817734, "percentage": 15.27, "elapsed_time": "0:06:44", "remaining_time": "0:37:22"} +{"current_steps": 373, "total_steps": 2436, "loss": 1.9380724430084229, "lr": 9.916100327075038e-06, "epoch": 0.45935960591133007, "percentage": 15.31, "elapsed_time": "0:06:45", "remaining_time": "0:37:21"} +{"current_steps": 374, "total_steps": 2436, "loss": 2.2112460136413574, "lr": 9.914788024677039e-06, "epoch": 0.4605911330049261, "percentage": 15.35, "elapsed_time": "0:06:46", "remaining_time": "0:37:20"} +{"current_steps": 375, "total_steps": 2436, "loss": 3.1283068656921387, "lr": 9.913465626882954e-06, "epoch": 0.4618226600985222, "percentage": 15.39, "elapsed_time": "0:06:47", "remaining_time": "0:37:19"} +{"current_steps": 376, "total_steps": 2436, "loss": 2.692117929458618, "lr": 9.912133136409103e-06, "epoch": 0.4630541871921182, "percentage": 15.44, "elapsed_time": "0:06:48", "remaining_time": "0:37:17"} +{"current_steps": 377, "total_steps": 2436, "loss": 3.047241687774658, "lr": 9.910790555992536e-06, "epoch": 0.4642857142857143, "percentage": 15.48, "elapsed_time": "0:06:49", "remaining_time": "0:37:16"} +{"current_steps": 378, "total_steps": 2436, "loss": 3.0103232860565186, "lr": 9.909437888391025e-06, "epoch": 0.46551724137931033, "percentage": 15.52, "elapsed_time": "0:06:50", "remaining_time": "0:37:15"} +{"current_steps": 379, "total_steps": 2436, "loss": 2.8296966552734375, "lr": 9.908075136383068e-06, "epoch": 0.4667487684729064, "percentage": 15.56, "elapsed_time": "0:06:51", "remaining_time": "0:37:14"} +{"current_steps": 380, "total_steps": 2436, "loss": 2.818819999694824, "lr": 9.906702302767876e-06, "epoch": 0.46798029556650245, "percentage": 15.6, "elapsed_time": "0:06:52", "remaining_time": "0:37:13"} +{"current_steps": 381, "total_steps": 2436, "loss": 3.6281867027282715, "lr": 9.905319390365364e-06, "epoch": 0.46921182266009853, "percentage": 15.64, "elapsed_time": "0:06:53", "remaining_time": "0:37:12"} +{"current_steps": 382, "total_steps": 2436, "loss": 2.7123236656188965, "lr": 9.903926402016153e-06, "epoch": 0.47044334975369456, "percentage": 15.68, "elapsed_time": "0:06:54", "remaining_time": "0:37:10"} +{"current_steps": 383, "total_steps": 2436, "loss": 2.69736909866333, "lr": 9.902523340581562e-06, "epoch": 0.47167487684729065, "percentage": 15.72, "elapsed_time": "0:06:55", "remaining_time": "0:37:09"} +{"current_steps": 384, "total_steps": 2436, "loss": 3.088184118270874, "lr": 9.901110208943599e-06, "epoch": 0.4729064039408867, "percentage": 15.76, "elapsed_time": "0:06:57", "remaining_time": "0:37:08"} +{"current_steps": 385, "total_steps": 2436, "loss": 2.606736183166504, "lr": 9.899687010004956e-06, "epoch": 0.47413793103448276, "percentage": 15.8, "elapsed_time": "0:06:58", "remaining_time": "0:37:07"} +{"current_steps": 386, "total_steps": 2436, "loss": 2.684105157852173, "lr": 9.898253746689007e-06, "epoch": 0.4753694581280788, "percentage": 15.85, "elapsed_time": "0:06:59", "remaining_time": "0:37:06"} +{"current_steps": 387, "total_steps": 2436, "loss": 2.8739280700683594, "lr": 9.896810421939797e-06, "epoch": 0.4766009852216749, "percentage": 15.89, "elapsed_time": "0:07:00", "remaining_time": "0:37:05"} +{"current_steps": 388, "total_steps": 2436, "loss": 2.835542917251587, "lr": 9.895357038722043e-06, "epoch": 0.47783251231527096, "percentage": 15.93, "elapsed_time": "0:07:01", "remaining_time": "0:37:04"} +{"current_steps": 389, "total_steps": 2436, "loss": 2.855287551879883, "lr": 9.893893600021112e-06, "epoch": 0.479064039408867, "percentage": 15.97, "elapsed_time": "0:07:02", "remaining_time": "0:37:02"} +{"current_steps": 390, "total_steps": 2436, "loss": 2.8026838302612305, "lr": 9.892420108843038e-06, "epoch": 0.4802955665024631, "percentage": 16.01, "elapsed_time": "0:07:03", "remaining_time": "0:37:01"} +{"current_steps": 391, "total_steps": 2436, "loss": 3.1150124073028564, "lr": 9.890936568214493e-06, "epoch": 0.4815270935960591, "percentage": 16.05, "elapsed_time": "0:07:04", "remaining_time": "0:37:00"} +{"current_steps": 392, "total_steps": 2436, "loss": 2.578108072280884, "lr": 9.889442981182802e-06, "epoch": 0.4827586206896552, "percentage": 16.09, "elapsed_time": "0:07:05", "remaining_time": "0:36:59"} +{"current_steps": 393, "total_steps": 2436, "loss": 2.7470006942749023, "lr": 9.88793935081592e-06, "epoch": 0.4839901477832512, "percentage": 16.13, "elapsed_time": "0:07:06", "remaining_time": "0:36:58"} +{"current_steps": 394, "total_steps": 2436, "loss": 2.9015283584594727, "lr": 9.88642568020243e-06, "epoch": 0.4852216748768473, "percentage": 16.17, "elapsed_time": "0:07:07", "remaining_time": "0:36:57"} +{"current_steps": 395, "total_steps": 2436, "loss": 3.79250431060791, "lr": 9.884901972451542e-06, "epoch": 0.48645320197044334, "percentage": 16.22, "elapsed_time": "0:07:08", "remaining_time": "0:36:56"} +{"current_steps": 396, "total_steps": 2436, "loss": 3.0748767852783203, "lr": 9.883368230693082e-06, "epoch": 0.4876847290640394, "percentage": 16.26, "elapsed_time": "0:07:09", "remaining_time": "0:36:54"} +{"current_steps": 397, "total_steps": 2436, "loss": 2.822726011276245, "lr": 9.881824458077491e-06, "epoch": 0.48891625615763545, "percentage": 16.3, "elapsed_time": "0:07:11", "remaining_time": "0:36:53"} +{"current_steps": 398, "total_steps": 2436, "loss": 2.7966151237487793, "lr": 9.880270657775806e-06, "epoch": 0.49014778325123154, "percentage": 16.34, "elapsed_time": "0:07:12", "remaining_time": "0:36:52"} +{"current_steps": 399, "total_steps": 2436, "loss": 2.8517651557922363, "lr": 9.878706832979668e-06, "epoch": 0.49137931034482757, "percentage": 16.38, "elapsed_time": "0:07:13", "remaining_time": "0:36:51"} +{"current_steps": 400, "total_steps": 2436, "loss": 2.7754080295562744, "lr": 9.877132986901306e-06, "epoch": 0.49261083743842365, "percentage": 16.42, "elapsed_time": "0:07:14", "remaining_time": "0:36:50"} +{"current_steps": 401, "total_steps": 2436, "loss": 2.9478702545166016, "lr": 9.875549122773536e-06, "epoch": 0.4938423645320197, "percentage": 16.46, "elapsed_time": "0:07:15", "remaining_time": "0:36:49"} +{"current_steps": 402, "total_steps": 2436, "loss": 2.9535412788391113, "lr": 9.87395524384975e-06, "epoch": 0.49507389162561577, "percentage": 16.5, "elapsed_time": "0:07:16", "remaining_time": "0:36:48"} +{"current_steps": 403, "total_steps": 2436, "loss": 3.415161609649658, "lr": 9.872351353403912e-06, "epoch": 0.4963054187192118, "percentage": 16.54, "elapsed_time": "0:07:17", "remaining_time": "0:36:47"} +{"current_steps": 404, "total_steps": 2436, "loss": 2.573082447052002, "lr": 9.870737454730552e-06, "epoch": 0.4975369458128079, "percentage": 16.58, "elapsed_time": "0:07:18", "remaining_time": "0:36:45"} +{"current_steps": 405, "total_steps": 2436, "loss": 2.4743850231170654, "lr": 9.869113551144754e-06, "epoch": 0.4987684729064039, "percentage": 16.63, "elapsed_time": "0:07:19", "remaining_time": "0:36:44"} +{"current_steps": 406, "total_steps": 2436, "loss": 2.6644279956817627, "lr": 9.867479645982158e-06, "epoch": 0.5, "percentage": 16.67, "elapsed_time": "0:07:20", "remaining_time": "0:36:43"} +{"current_steps": 407, "total_steps": 2436, "loss": 2.7798032760620117, "lr": 9.865835742598942e-06, "epoch": 0.5012315270935961, "percentage": 16.71, "elapsed_time": "0:07:21", "remaining_time": "0:36:42"} +{"current_steps": 408, "total_steps": 2436, "loss": 3.939884662628174, "lr": 9.864181844371828e-06, "epoch": 0.5024630541871922, "percentage": 16.75, "elapsed_time": "0:07:22", "remaining_time": "0:36:41"} +{"current_steps": 409, "total_steps": 2436, "loss": 2.8093104362487793, "lr": 9.86251795469806e-06, "epoch": 0.5036945812807881, "percentage": 16.79, "elapsed_time": "0:07:23", "remaining_time": "0:36:40"} +{"current_steps": 410, "total_steps": 2436, "loss": 2.1494715213775635, "lr": 9.860844076995416e-06, "epoch": 0.5049261083743842, "percentage": 16.83, "elapsed_time": "0:07:25", "remaining_time": "0:36:39"} +{"current_steps": 411, "total_steps": 2436, "loss": 2.964136838912964, "lr": 9.85916021470218e-06, "epoch": 0.5061576354679803, "percentage": 16.87, "elapsed_time": "0:07:26", "remaining_time": "0:36:38"} +{"current_steps": 412, "total_steps": 2436, "loss": 2.641287088394165, "lr": 9.857466371277152e-06, "epoch": 0.5073891625615764, "percentage": 16.91, "elapsed_time": "0:07:27", "remaining_time": "0:36:36"} +{"current_steps": 413, "total_steps": 2436, "loss": 2.454512357711792, "lr": 9.85576255019963e-06, "epoch": 0.5086206896551724, "percentage": 16.95, "elapsed_time": "0:07:28", "remaining_time": "0:36:35"} +{"current_steps": 414, "total_steps": 2436, "loss": 2.4566071033477783, "lr": 9.85404875496941e-06, "epoch": 0.5098522167487685, "percentage": 17.0, "elapsed_time": "0:07:29", "remaining_time": "0:36:34"} +{"current_steps": 415, "total_steps": 2436, "loss": 2.7254204750061035, "lr": 9.852324989106772e-06, "epoch": 0.5110837438423645, "percentage": 17.04, "elapsed_time": "0:07:30", "remaining_time": "0:36:33"} +{"current_steps": 416, "total_steps": 2436, "loss": 2.743382215499878, "lr": 9.850591256152483e-06, "epoch": 0.5123152709359606, "percentage": 17.08, "elapsed_time": "0:07:31", "remaining_time": "0:36:32"} +{"current_steps": 417, "total_steps": 2436, "loss": 3.376046657562256, "lr": 9.848847559667774e-06, "epoch": 0.5135467980295566, "percentage": 17.12, "elapsed_time": "0:07:32", "remaining_time": "0:36:31"} +{"current_steps": 418, "total_steps": 2436, "loss": 2.73980975151062, "lr": 9.847093903234351e-06, "epoch": 0.5147783251231527, "percentage": 17.16, "elapsed_time": "0:07:33", "remaining_time": "0:36:30"} +{"current_steps": 419, "total_steps": 2436, "loss": 2.7565903663635254, "lr": 9.845330290454373e-06, "epoch": 0.5160098522167488, "percentage": 17.2, "elapsed_time": "0:07:34", "remaining_time": "0:36:29"} +{"current_steps": 420, "total_steps": 2436, "loss": 2.9061315059661865, "lr": 9.843556724950454e-06, "epoch": 0.5172413793103449, "percentage": 17.24, "elapsed_time": "0:07:35", "remaining_time": "0:36:27"} +{"current_steps": 421, "total_steps": 2436, "loss": 3.1584839820861816, "lr": 9.841773210365646e-06, "epoch": 0.5184729064039408, "percentage": 17.28, "elapsed_time": "0:07:36", "remaining_time": "0:36:26"} +{"current_steps": 422, "total_steps": 2436, "loss": 3.300762176513672, "lr": 9.839979750363443e-06, "epoch": 0.5197044334975369, "percentage": 17.32, "elapsed_time": "0:07:37", "remaining_time": "0:36:25"} +{"current_steps": 423, "total_steps": 2436, "loss": 2.5202269554138184, "lr": 9.838176348627768e-06, "epoch": 0.520935960591133, "percentage": 17.36, "elapsed_time": "0:07:39", "remaining_time": "0:36:24"} +{"current_steps": 424, "total_steps": 2436, "loss": 3.9240634441375732, "lr": 9.83636300886296e-06, "epoch": 0.5221674876847291, "percentage": 17.41, "elapsed_time": "0:07:40", "remaining_time": "0:36:23"} +{"current_steps": 425, "total_steps": 2436, "loss": 3.1783556938171387, "lr": 9.834539734793774e-06, "epoch": 0.5233990147783252, "percentage": 17.45, "elapsed_time": "0:07:41", "remaining_time": "0:36:22"} +{"current_steps": 426, "total_steps": 2436, "loss": 2.787106513977051, "lr": 9.832706530165372e-06, "epoch": 0.5246305418719212, "percentage": 17.49, "elapsed_time": "0:07:42", "remaining_time": "0:36:20"} +{"current_steps": 427, "total_steps": 2436, "loss": 3.270280599594116, "lr": 9.830863398743313e-06, "epoch": 0.5258620689655172, "percentage": 17.53, "elapsed_time": "0:07:43", "remaining_time": "0:36:19"} +{"current_steps": 428, "total_steps": 2436, "loss": 3.0135059356689453, "lr": 9.829010344313548e-06, "epoch": 0.5270935960591133, "percentage": 17.57, "elapsed_time": "0:07:44", "remaining_time": "0:36:18"} +{"current_steps": 429, "total_steps": 2436, "loss": 2.989795207977295, "lr": 9.82714737068241e-06, "epoch": 0.5283251231527094, "percentage": 17.61, "elapsed_time": "0:07:45", "remaining_time": "0:36:17"} +{"current_steps": 430, "total_steps": 2436, "loss": 2.5208187103271484, "lr": 9.825274481676605e-06, "epoch": 0.5295566502463054, "percentage": 17.65, "elapsed_time": "0:07:46", "remaining_time": "0:36:16"} +{"current_steps": 431, "total_steps": 2436, "loss": 3.1890928745269775, "lr": 9.82339168114321e-06, "epoch": 0.5307881773399015, "percentage": 17.69, "elapsed_time": "0:07:47", "remaining_time": "0:36:15"} +{"current_steps": 432, "total_steps": 2436, "loss": 3.0655789375305176, "lr": 9.821498972949657e-06, "epoch": 0.5320197044334976, "percentage": 17.73, "elapsed_time": "0:07:48", "remaining_time": "0:36:14"} +{"current_steps": 433, "total_steps": 2436, "loss": 2.611284017562866, "lr": 9.81959636098373e-06, "epoch": 0.5332512315270936, "percentage": 17.78, "elapsed_time": "0:07:49", "remaining_time": "0:36:13"} +{"current_steps": 434, "total_steps": 2436, "loss": 2.863576889038086, "lr": 9.817683849153561e-06, "epoch": 0.5344827586206896, "percentage": 17.82, "elapsed_time": "0:07:50", "remaining_time": "0:36:12"} +{"current_steps": 435, "total_steps": 2436, "loss": 2.6186623573303223, "lr": 9.815761441387609e-06, "epoch": 0.5357142857142857, "percentage": 17.86, "elapsed_time": "0:07:51", "remaining_time": "0:36:10"} +{"current_steps": 436, "total_steps": 2436, "loss": 1.3848458528518677, "lr": 9.813829141634666e-06, "epoch": 0.5369458128078818, "percentage": 17.9, "elapsed_time": "0:07:53", "remaining_time": "0:36:09"} +{"current_steps": 437, "total_steps": 2436, "loss": 3.00791597366333, "lr": 9.811886953863841e-06, "epoch": 0.5381773399014779, "percentage": 17.94, "elapsed_time": "0:07:54", "remaining_time": "0:36:08"} +{"current_steps": 438, "total_steps": 2436, "loss": 2.8431854248046875, "lr": 9.809934882064555e-06, "epoch": 0.5394088669950738, "percentage": 17.98, "elapsed_time": "0:07:55", "remaining_time": "0:36:07"} +{"current_steps": 439, "total_steps": 2436, "loss": 2.3595449924468994, "lr": 9.807972930246531e-06, "epoch": 0.5406403940886699, "percentage": 18.02, "elapsed_time": "0:07:56", "remaining_time": "0:36:06"} +{"current_steps": 440, "total_steps": 2436, "loss": 2.55434250831604, "lr": 9.806001102439789e-06, "epoch": 0.541871921182266, "percentage": 18.06, "elapsed_time": "0:07:57", "remaining_time": "0:36:05"} +{"current_steps": 441, "total_steps": 2436, "loss": 2.4509990215301514, "lr": 9.804019402694627e-06, "epoch": 0.5431034482758621, "percentage": 18.1, "elapsed_time": "0:07:58", "remaining_time": "0:36:04"} +{"current_steps": 442, "total_steps": 2436, "loss": 2.825401782989502, "lr": 9.802027835081628e-06, "epoch": 0.5443349753694581, "percentage": 18.14, "elapsed_time": "0:07:59", "remaining_time": "0:36:03"} +{"current_steps": 443, "total_steps": 2436, "loss": 2.7315573692321777, "lr": 9.800026403691643e-06, "epoch": 0.5455665024630542, "percentage": 18.19, "elapsed_time": "0:08:00", "remaining_time": "0:36:01"} +{"current_steps": 444, "total_steps": 2436, "loss": 3.1359333992004395, "lr": 9.798015112635786e-06, "epoch": 0.5467980295566502, "percentage": 18.23, "elapsed_time": "0:08:01", "remaining_time": "0:36:00"} +{"current_steps": 445, "total_steps": 2436, "loss": 3.2884740829467773, "lr": 9.795993966045418e-06, "epoch": 0.5480295566502463, "percentage": 18.27, "elapsed_time": "0:08:02", "remaining_time": "0:35:59"} +{"current_steps": 446, "total_steps": 2436, "loss": 2.8281359672546387, "lr": 9.793962968072149e-06, "epoch": 0.5492610837438424, "percentage": 18.31, "elapsed_time": "0:08:03", "remaining_time": "0:35:58"} +{"current_steps": 447, "total_steps": 2436, "loss": 2.633974313735962, "lr": 9.791922122887823e-06, "epoch": 0.5504926108374384, "percentage": 18.35, "elapsed_time": "0:08:04", "remaining_time": "0:35:57"} +{"current_steps": 448, "total_steps": 2436, "loss": 2.1651690006256104, "lr": 9.78987143468451e-06, "epoch": 0.5517241379310345, "percentage": 18.39, "elapsed_time": "0:08:05", "remaining_time": "0:35:56"} +{"current_steps": 449, "total_steps": 2436, "loss": 3.011908531188965, "lr": 9.7878109076745e-06, "epoch": 0.5529556650246306, "percentage": 18.43, "elapsed_time": "0:08:07", "remaining_time": "0:35:55"} +{"current_steps": 450, "total_steps": 2436, "loss": 3.121683359146118, "lr": 9.785740546090293e-06, "epoch": 0.5541871921182266, "percentage": 18.47, "elapsed_time": "0:08:08", "remaining_time": "0:35:54"} +{"current_steps": 451, "total_steps": 2436, "loss": 2.9901375770568848, "lr": 9.783660354184589e-06, "epoch": 0.5554187192118226, "percentage": 18.51, "elapsed_time": "0:08:09", "remaining_time": "0:35:53"} +{"current_steps": 452, "total_steps": 2436, "loss": 3.1121528148651123, "lr": 9.78157033623028e-06, "epoch": 0.5566502463054187, "percentage": 18.56, "elapsed_time": "0:08:10", "remaining_time": "0:35:52"} +{"current_steps": 453, "total_steps": 2436, "loss": 2.9811508655548096, "lr": 9.779470496520442e-06, "epoch": 0.5578817733990148, "percentage": 18.6, "elapsed_time": "0:08:11", "remaining_time": "0:35:50"} +{"current_steps": 454, "total_steps": 2436, "loss": 2.8219947814941406, "lr": 9.777360839368327e-06, "epoch": 0.5591133004926109, "percentage": 18.64, "elapsed_time": "0:08:12", "remaining_time": "0:35:49"} +{"current_steps": 455, "total_steps": 2436, "loss": 2.870987892150879, "lr": 9.77524136910735e-06, "epoch": 0.5603448275862069, "percentage": 18.68, "elapsed_time": "0:08:13", "remaining_time": "0:35:48"} +{"current_steps": 456, "total_steps": 2436, "loss": 3.1902365684509277, "lr": 9.773112090091084e-06, "epoch": 0.5615763546798029, "percentage": 18.72, "elapsed_time": "0:08:14", "remaining_time": "0:35:47"} +{"current_steps": 457, "total_steps": 2436, "loss": 3.3052220344543457, "lr": 9.770973006693256e-06, "epoch": 0.562807881773399, "percentage": 18.76, "elapsed_time": "0:08:15", "remaining_time": "0:35:46"} +{"current_steps": 458, "total_steps": 2436, "loss": 2.3376049995422363, "lr": 9.76882412330772e-06, "epoch": 0.5640394088669951, "percentage": 18.8, "elapsed_time": "0:08:16", "remaining_time": "0:35:45"} +{"current_steps": 459, "total_steps": 2436, "loss": 2.8364970684051514, "lr": 9.766665444348472e-06, "epoch": 0.5652709359605911, "percentage": 18.84, "elapsed_time": "0:08:17", "remaining_time": "0:35:44"} +{"current_steps": 460, "total_steps": 2436, "loss": 2.582505702972412, "lr": 9.76449697424962e-06, "epoch": 0.5665024630541872, "percentage": 18.88, "elapsed_time": "0:08:18", "remaining_time": "0:35:43"} +{"current_steps": 461, "total_steps": 2436, "loss": 2.485147476196289, "lr": 9.76231871746539e-06, "epoch": 0.5677339901477833, "percentage": 18.92, "elapsed_time": "0:08:19", "remaining_time": "0:35:42"} +{"current_steps": 462, "total_steps": 2436, "loss": 3.0910027027130127, "lr": 9.760130678470106e-06, "epoch": 0.5689655172413793, "percentage": 18.97, "elapsed_time": "0:08:21", "remaining_time": "0:35:40"} +{"current_steps": 463, "total_steps": 2436, "loss": 3.3621506690979004, "lr": 9.757932861758188e-06, "epoch": 0.5701970443349754, "percentage": 19.01, "elapsed_time": "0:08:22", "remaining_time": "0:35:39"} +{"current_steps": 464, "total_steps": 2436, "loss": 2.8310019969940186, "lr": 9.755725271844142e-06, "epoch": 0.5714285714285714, "percentage": 19.05, "elapsed_time": "0:08:23", "remaining_time": "0:35:38"} +{"current_steps": 465, "total_steps": 2436, "loss": 2.797703742980957, "lr": 9.753507913262548e-06, "epoch": 0.5726600985221675, "percentage": 19.09, "elapsed_time": "0:08:24", "remaining_time": "0:35:37"} +{"current_steps": 466, "total_steps": 2436, "loss": 2.6609878540039062, "lr": 9.751280790568047e-06, "epoch": 0.5738916256157636, "percentage": 19.13, "elapsed_time": "0:08:25", "remaining_time": "0:35:36"} +{"current_steps": 467, "total_steps": 2436, "loss": 2.778043508529663, "lr": 9.749043908335343e-06, "epoch": 0.5751231527093597, "percentage": 19.17, "elapsed_time": "0:08:26", "remaining_time": "0:35:35"} +{"current_steps": 468, "total_steps": 2436, "loss": 2.8315014839172363, "lr": 9.74679727115918e-06, "epoch": 0.5763546798029556, "percentage": 19.21, "elapsed_time": "0:08:27", "remaining_time": "0:35:34"} +{"current_steps": 469, "total_steps": 2436, "loss": 3.3902840614318848, "lr": 9.744540883654348e-06, "epoch": 0.5775862068965517, "percentage": 19.25, "elapsed_time": "0:08:28", "remaining_time": "0:35:33"} +{"current_steps": 470, "total_steps": 2436, "loss": 3.53080153465271, "lr": 9.742274750455659e-06, "epoch": 0.5788177339901478, "percentage": 19.29, "elapsed_time": "0:08:29", "remaining_time": "0:35:31"} +{"current_steps": 471, "total_steps": 2436, "loss": 2.270110845565796, "lr": 9.739998876217943e-06, "epoch": 0.5800492610837439, "percentage": 19.33, "elapsed_time": "0:08:30", "remaining_time": "0:35:30"} +{"current_steps": 472, "total_steps": 2436, "loss": 2.7059872150421143, "lr": 9.737713265616043e-06, "epoch": 0.5812807881773399, "percentage": 19.38, "elapsed_time": "0:08:31", "remaining_time": "0:35:29"} +{"current_steps": 473, "total_steps": 2436, "loss": 4.328514575958252, "lr": 9.735417923344798e-06, "epoch": 0.5825123152709359, "percentage": 19.42, "elapsed_time": "0:08:32", "remaining_time": "0:35:28"} +{"current_steps": 474, "total_steps": 2436, "loss": 3.2155938148498535, "lr": 9.73311285411904e-06, "epoch": 0.583743842364532, "percentage": 19.46, "elapsed_time": "0:08:33", "remaining_time": "0:35:27"} +{"current_steps": 475, "total_steps": 2436, "loss": 2.277022361755371, "lr": 9.730798062673575e-06, "epoch": 0.5849753694581281, "percentage": 19.5, "elapsed_time": "0:08:35", "remaining_time": "0:35:26"} +{"current_steps": 476, "total_steps": 2436, "loss": 2.794111490249634, "lr": 9.728473553763186e-06, "epoch": 0.5862068965517241, "percentage": 19.54, "elapsed_time": "0:08:36", "remaining_time": "0:35:25"} +{"current_steps": 477, "total_steps": 2436, "loss": 3.00388765335083, "lr": 9.726139332162613e-06, "epoch": 0.5874384236453202, "percentage": 19.58, "elapsed_time": "0:08:37", "remaining_time": "0:35:24"} +{"current_steps": 478, "total_steps": 2436, "loss": 2.5355563163757324, "lr": 9.723795402666549e-06, "epoch": 0.5886699507389163, "percentage": 19.62, "elapsed_time": "0:08:38", "remaining_time": "0:35:22"} +{"current_steps": 479, "total_steps": 2436, "loss": 3.2441415786743164, "lr": 9.721441770089621e-06, "epoch": 0.5899014778325123, "percentage": 19.66, "elapsed_time": "0:08:39", "remaining_time": "0:35:21"} +{"current_steps": 480, "total_steps": 2436, "loss": 2.826803207397461, "lr": 9.719078439266399e-06, "epoch": 0.5911330049261084, "percentage": 19.7, "elapsed_time": "0:08:40", "remaining_time": "0:35:20"} +{"current_steps": 481, "total_steps": 2436, "loss": 2.5396804809570312, "lr": 9.716705415051362e-06, "epoch": 0.5923645320197044, "percentage": 19.75, "elapsed_time": "0:08:41", "remaining_time": "0:35:19"} +{"current_steps": 482, "total_steps": 2436, "loss": 2.85546875, "lr": 9.714322702318908e-06, "epoch": 0.5935960591133005, "percentage": 19.79, "elapsed_time": "0:08:42", "remaining_time": "0:35:18"} +{"current_steps": 483, "total_steps": 2436, "loss": 3.217014789581299, "lr": 9.711930305963333e-06, "epoch": 0.5948275862068966, "percentage": 19.83, "elapsed_time": "0:08:43", "remaining_time": "0:35:17"} +{"current_steps": 484, "total_steps": 2436, "loss": 2.781094551086426, "lr": 9.70952823089882e-06, "epoch": 0.5960591133004927, "percentage": 19.87, "elapsed_time": "0:08:44", "remaining_time": "0:35:16"} +{"current_steps": 485, "total_steps": 2436, "loss": 2.617154121398926, "lr": 9.707116482059447e-06, "epoch": 0.5972906403940886, "percentage": 19.91, "elapsed_time": "0:08:45", "remaining_time": "0:35:15"} +{"current_steps": 486, "total_steps": 2436, "loss": 2.601886510848999, "lr": 9.704695064399143e-06, "epoch": 0.5985221674876847, "percentage": 19.95, "elapsed_time": "0:08:46", "remaining_time": "0:35:13"} +{"current_steps": 487, "total_steps": 2436, "loss": 2.9616146087646484, "lr": 9.702263982891712e-06, "epoch": 0.5997536945812808, "percentage": 19.99, "elapsed_time": "0:08:47", "remaining_time": "0:35:12"} +{"current_steps": 488, "total_steps": 2436, "loss": 2.8881943225860596, "lr": 9.699823242530803e-06, "epoch": 0.6009852216748769, "percentage": 20.03, "elapsed_time": "0:08:49", "remaining_time": "0:35:11"} +{"current_steps": 489, "total_steps": 2436, "loss": 2.6718311309814453, "lr": 9.697372848329905e-06, "epoch": 0.6022167487684729, "percentage": 20.07, "elapsed_time": "0:08:50", "remaining_time": "0:35:10"} +{"current_steps": 490, "total_steps": 2436, "loss": 2.959104537963867, "lr": 9.69491280532234e-06, "epoch": 0.603448275862069, "percentage": 20.11, "elapsed_time": "0:08:51", "remaining_time": "0:35:09"} +{"current_steps": 491, "total_steps": 2436, "loss": 2.085991621017456, "lr": 9.692443118561248e-06, "epoch": 0.604679802955665, "percentage": 20.16, "elapsed_time": "0:08:52", "remaining_time": "0:35:08"} +{"current_steps": 492, "total_steps": 2436, "loss": 4.498569488525391, "lr": 9.689963793119574e-06, "epoch": 0.6059113300492611, "percentage": 20.2, "elapsed_time": "0:08:53", "remaining_time": "0:35:07"} +{"current_steps": 493, "total_steps": 2436, "loss": 2.7837424278259277, "lr": 9.68747483409007e-06, "epoch": 0.6071428571428571, "percentage": 20.24, "elapsed_time": "0:08:54", "remaining_time": "0:35:06"} +{"current_steps": 494, "total_steps": 2436, "loss": 2.637524366378784, "lr": 9.684976246585264e-06, "epoch": 0.6083743842364532, "percentage": 20.28, "elapsed_time": "0:08:55", "remaining_time": "0:35:05"} +{"current_steps": 495, "total_steps": 2436, "loss": 2.765727996826172, "lr": 9.682468035737475e-06, "epoch": 0.6096059113300493, "percentage": 20.32, "elapsed_time": "0:08:56", "remaining_time": "0:35:04"} +{"current_steps": 496, "total_steps": 2436, "loss": 2.825129270553589, "lr": 9.679950206698782e-06, "epoch": 0.6108374384236454, "percentage": 20.36, "elapsed_time": "0:08:57", "remaining_time": "0:35:02"} +{"current_steps": 497, "total_steps": 2436, "loss": 2.733224630355835, "lr": 9.677422764641021e-06, "epoch": 0.6120689655172413, "percentage": 20.4, "elapsed_time": "0:08:58", "remaining_time": "0:35:01"} +{"current_steps": 498, "total_steps": 2436, "loss": 3.6287670135498047, "lr": 9.674885714755773e-06, "epoch": 0.6133004926108374, "percentage": 20.44, "elapsed_time": "0:08:59", "remaining_time": "0:35:00"} +{"current_steps": 499, "total_steps": 2436, "loss": 2.38788104057312, "lr": 9.672339062254359e-06, "epoch": 0.6145320197044335, "percentage": 20.48, "elapsed_time": "0:09:00", "remaining_time": "0:34:59"} +{"current_steps": 500, "total_steps": 2436, "loss": 2.942269802093506, "lr": 9.66978281236782e-06, "epoch": 0.6157635467980296, "percentage": 20.53, "elapsed_time": "0:09:01", "remaining_time": "0:34:58"} +{"current_steps": 501, "total_steps": 2436, "loss": 2.4100990295410156, "lr": 9.667216970346916e-06, "epoch": 0.6169950738916257, "percentage": 20.57, "elapsed_time": "0:09:03", "remaining_time": "0:34:57"} +{"current_steps": 502, "total_steps": 2436, "loss": 2.3959155082702637, "lr": 9.6646415414621e-06, "epoch": 0.6182266009852216, "percentage": 20.61, "elapsed_time": "0:09:04", "remaining_time": "0:34:56"} +{"current_steps": 503, "total_steps": 2436, "loss": 2.93027925491333, "lr": 9.662056531003528e-06, "epoch": 0.6194581280788177, "percentage": 20.65, "elapsed_time": "0:09:05", "remaining_time": "0:34:55"} +{"current_steps": 504, "total_steps": 2436, "loss": 3.164715528488159, "lr": 9.659461944281035e-06, "epoch": 0.6206896551724138, "percentage": 20.69, "elapsed_time": "0:09:06", "remaining_time": "0:34:54"} +{"current_steps": 505, "total_steps": 2436, "loss": 2.634587287902832, "lr": 9.656857786624119e-06, "epoch": 0.6219211822660099, "percentage": 20.73, "elapsed_time": "0:09:07", "remaining_time": "0:34:52"} +{"current_steps": 506, "total_steps": 2436, "loss": 3.5667788982391357, "lr": 9.654244063381948e-06, "epoch": 0.6231527093596059, "percentage": 20.77, "elapsed_time": "0:09:08", "remaining_time": "0:34:51"} +{"current_steps": 507, "total_steps": 2436, "loss": 2.9383740425109863, "lr": 9.651620779923332e-06, "epoch": 0.624384236453202, "percentage": 20.81, "elapsed_time": "0:09:09", "remaining_time": "0:34:50"} +{"current_steps": 508, "total_steps": 2436, "loss": 2.7658987045288086, "lr": 9.648987941636719e-06, "epoch": 0.625615763546798, "percentage": 20.85, "elapsed_time": "0:09:10", "remaining_time": "0:34:49"} +{"current_steps": 509, "total_steps": 2436, "loss": 3.3089890480041504, "lr": 9.646345553930187e-06, "epoch": 0.6268472906403941, "percentage": 20.89, "elapsed_time": "0:09:11", "remaining_time": "0:34:48"} +{"current_steps": 510, "total_steps": 2436, "loss": 2.6208066940307617, "lr": 9.643693622231426e-06, "epoch": 0.6280788177339901, "percentage": 20.94, "elapsed_time": "0:09:12", "remaining_time": "0:34:47"} +{"current_steps": 511, "total_steps": 2436, "loss": 2.7099995613098145, "lr": 9.64103215198773e-06, "epoch": 0.6293103448275862, "percentage": 20.98, "elapsed_time": "0:09:13", "remaining_time": "0:34:46"} +{"current_steps": 512, "total_steps": 2436, "loss": 2.894531488418579, "lr": 9.638361148665989e-06, "epoch": 0.6305418719211823, "percentage": 21.02, "elapsed_time": "0:09:14", "remaining_time": "0:34:45"} +{"current_steps": 513, "total_steps": 2436, "loss": 3.1289191246032715, "lr": 9.63568061775267e-06, "epoch": 0.6317733990147784, "percentage": 21.06, "elapsed_time": "0:09:16", "remaining_time": "0:34:44"} +{"current_steps": 514, "total_steps": 2436, "loss": 2.954707145690918, "lr": 9.632990564753817e-06, "epoch": 0.6330049261083743, "percentage": 21.1, "elapsed_time": "0:09:17", "remaining_time": "0:34:43"} +{"current_steps": 515, "total_steps": 2436, "loss": 2.93411922454834, "lr": 9.630290995195028e-06, "epoch": 0.6342364532019704, "percentage": 21.14, "elapsed_time": "0:09:18", "remaining_time": "0:34:42"} +{"current_steps": 516, "total_steps": 2436, "loss": 2.637021541595459, "lr": 9.62758191462145e-06, "epoch": 0.6354679802955665, "percentage": 21.18, "elapsed_time": "0:09:19", "remaining_time": "0:34:41"} +{"current_steps": 517, "total_steps": 2436, "loss": 3.020066261291504, "lr": 9.624863328597767e-06, "epoch": 0.6366995073891626, "percentage": 21.22, "elapsed_time": "0:09:20", "remaining_time": "0:34:39"} +{"current_steps": 518, "total_steps": 2436, "loss": 2.5983335971832275, "lr": 9.622135242708188e-06, "epoch": 0.6379310344827587, "percentage": 21.26, "elapsed_time": "0:09:21", "remaining_time": "0:34:38"} +{"current_steps": 519, "total_steps": 2436, "loss": 2.714207410812378, "lr": 9.619397662556434e-06, "epoch": 0.6391625615763546, "percentage": 21.31, "elapsed_time": "0:09:22", "remaining_time": "0:34:37"} +{"current_steps": 520, "total_steps": 2436, "loss": 2.8505520820617676, "lr": 9.616650593765733e-06, "epoch": 0.6403940886699507, "percentage": 21.35, "elapsed_time": "0:09:23", "remaining_time": "0:34:36"} +{"current_steps": 521, "total_steps": 2436, "loss": 2.8081271648406982, "lr": 9.613894041978795e-06, "epoch": 0.6416256157635468, "percentage": 21.39, "elapsed_time": "0:09:24", "remaining_time": "0:34:35"} +{"current_steps": 522, "total_steps": 2436, "loss": 3.106411933898926, "lr": 9.611128012857818e-06, "epoch": 0.6428571428571429, "percentage": 21.43, "elapsed_time": "0:09:25", "remaining_time": "0:34:34"} +{"current_steps": 523, "total_steps": 2436, "loss": 3.087594985961914, "lr": 9.60835251208446e-06, "epoch": 0.6440886699507389, "percentage": 21.47, "elapsed_time": "0:09:26", "remaining_time": "0:34:33"} +{"current_steps": 524, "total_steps": 2436, "loss": 2.7104361057281494, "lr": 9.60556754535984e-06, "epoch": 0.645320197044335, "percentage": 21.51, "elapsed_time": "0:09:27", "remaining_time": "0:34:32"} +{"current_steps": 525, "total_steps": 2436, "loss": 2.8562324047088623, "lr": 9.602773118404518e-06, "epoch": 0.646551724137931, "percentage": 21.55, "elapsed_time": "0:09:28", "remaining_time": "0:34:31"} +{"current_steps": 526, "total_steps": 2436, "loss": 3.282554864883423, "lr": 9.599969236958485e-06, "epoch": 0.6477832512315271, "percentage": 21.59, "elapsed_time": "0:09:30", "remaining_time": "0:34:29"} +{"current_steps": 527, "total_steps": 2436, "loss": 2.623101234436035, "lr": 9.597155906781154e-06, "epoch": 0.6490147783251231, "percentage": 21.63, "elapsed_time": "0:09:31", "remaining_time": "0:34:28"} +{"current_steps": 528, "total_steps": 2436, "loss": 2.889674663543701, "lr": 9.59433313365135e-06, "epoch": 0.6502463054187192, "percentage": 21.67, "elapsed_time": "0:09:32", "remaining_time": "0:34:27"} +{"current_steps": 529, "total_steps": 2436, "loss": 2.787289619445801, "lr": 9.591500923367287e-06, "epoch": 0.6514778325123153, "percentage": 21.72, "elapsed_time": "0:09:33", "remaining_time": "0:34:26"} +{"current_steps": 530, "total_steps": 2436, "loss": 2.879824161529541, "lr": 9.58865928174657e-06, "epoch": 0.6527093596059114, "percentage": 21.76, "elapsed_time": "0:09:34", "remaining_time": "0:34:25"} +{"current_steps": 531, "total_steps": 2436, "loss": 2.967193126678467, "lr": 9.585808214626173e-06, "epoch": 0.6539408866995073, "percentage": 21.8, "elapsed_time": "0:09:35", "remaining_time": "0:34:24"} +{"current_steps": 532, "total_steps": 2436, "loss": 3.1004772186279297, "lr": 9.582947727862433e-06, "epoch": 0.6551724137931034, "percentage": 21.84, "elapsed_time": "0:09:36", "remaining_time": "0:34:23"} +{"current_steps": 533, "total_steps": 2436, "loss": 2.69935941696167, "lr": 9.580077827331038e-06, "epoch": 0.6564039408866995, "percentage": 21.88, "elapsed_time": "0:09:37", "remaining_time": "0:34:22"} +{"current_steps": 534, "total_steps": 2436, "loss": 3.2806637287139893, "lr": 9.577198518927005e-06, "epoch": 0.6576354679802956, "percentage": 21.92, "elapsed_time": "0:09:38", "remaining_time": "0:34:21"} +{"current_steps": 535, "total_steps": 2436, "loss": 3.050356149673462, "lr": 9.574309808564682e-06, "epoch": 0.6588669950738916, "percentage": 21.96, "elapsed_time": "0:09:39", "remaining_time": "0:34:20"} +{"current_steps": 536, "total_steps": 2436, "loss": 2.8415322303771973, "lr": 9.57141170217773e-06, "epoch": 0.6600985221674877, "percentage": 22.0, "elapsed_time": "0:09:40", "remaining_time": "0:34:18"} +{"current_steps": 537, "total_steps": 2436, "loss": 2.5309085845947266, "lr": 9.568504205719106e-06, "epoch": 0.6613300492610837, "percentage": 22.04, "elapsed_time": "0:09:41", "remaining_time": "0:34:17"} +{"current_steps": 538, "total_steps": 2436, "loss": 3.5695877075195312, "lr": 9.565587325161056e-06, "epoch": 0.6625615763546798, "percentage": 22.09, "elapsed_time": "0:09:42", "remaining_time": "0:34:16"} +{"current_steps": 539, "total_steps": 2436, "loss": 2.7938594818115234, "lr": 9.562661066495108e-06, "epoch": 0.6637931034482759, "percentage": 22.13, "elapsed_time": "0:09:44", "remaining_time": "0:34:15"} +{"current_steps": 540, "total_steps": 2436, "loss": 2.8548948764801025, "lr": 9.559725435732042e-06, "epoch": 0.6650246305418719, "percentage": 22.17, "elapsed_time": "0:09:45", "remaining_time": "0:34:14"} +{"current_steps": 541, "total_steps": 2436, "loss": 3.054051399230957, "lr": 9.556780438901899e-06, "epoch": 0.666256157635468, "percentage": 22.21, "elapsed_time": "0:09:46", "remaining_time": "0:34:13"} +{"current_steps": 542, "total_steps": 2436, "loss": 3.566359281539917, "lr": 9.553826082053951e-06, "epoch": 0.6674876847290641, "percentage": 22.25, "elapsed_time": "0:09:47", "remaining_time": "0:34:12"} +{"current_steps": 543, "total_steps": 2436, "loss": 2.8619909286499023, "lr": 9.550862371256705e-06, "epoch": 0.6687192118226601, "percentage": 22.29, "elapsed_time": "0:09:48", "remaining_time": "0:34:11"} +{"current_steps": 544, "total_steps": 2436, "loss": 3.0177836418151855, "lr": 9.547889312597877e-06, "epoch": 0.6699507389162561, "percentage": 22.33, "elapsed_time": "0:09:49", "remaining_time": "0:34:10"} +{"current_steps": 545, "total_steps": 2436, "loss": 1.9943304061889648, "lr": 9.544906912184383e-06, "epoch": 0.6711822660098522, "percentage": 22.37, "elapsed_time": "0:09:50", "remaining_time": "0:34:08"} +{"current_steps": 546, "total_steps": 2436, "loss": 2.650038957595825, "lr": 9.541915176142326e-06, "epoch": 0.6724137931034483, "percentage": 22.41, "elapsed_time": "0:09:51", "remaining_time": "0:34:07"} +{"current_steps": 547, "total_steps": 2436, "loss": 2.826953411102295, "lr": 9.538914110616995e-06, "epoch": 0.6736453201970444, "percentage": 22.45, "elapsed_time": "0:09:52", "remaining_time": "0:34:06"} +{"current_steps": 548, "total_steps": 2436, "loss": 2.770202159881592, "lr": 9.53590372177283e-06, "epoch": 0.6748768472906403, "percentage": 22.5, "elapsed_time": "0:09:53", "remaining_time": "0:34:05"} +{"current_steps": 549, "total_steps": 2436, "loss": 2.0859670639038086, "lr": 9.532884015793432e-06, "epoch": 0.6761083743842364, "percentage": 22.54, "elapsed_time": "0:09:54", "remaining_time": "0:34:04"} +{"current_steps": 550, "total_steps": 2436, "loss": 2.7557499408721924, "lr": 9.529854998881534e-06, "epoch": 0.6773399014778325, "percentage": 22.58, "elapsed_time": "0:09:55", "remaining_time": "0:34:03"} +{"current_steps": 551, "total_steps": 2436, "loss": 2.710692882537842, "lr": 9.526816677258995e-06, "epoch": 0.6785714285714286, "percentage": 22.62, "elapsed_time": "0:09:57", "remaining_time": "0:34:02"} +{"current_steps": 552, "total_steps": 2436, "loss": 3.055102825164795, "lr": 9.523769057166791e-06, "epoch": 0.6798029556650246, "percentage": 22.66, "elapsed_time": "0:09:58", "remaining_time": "0:34:01"} +{"current_steps": 553, "total_steps": 2436, "loss": 2.606031894683838, "lr": 9.520712144864997e-06, "epoch": 0.6810344827586207, "percentage": 22.7, "elapsed_time": "0:09:59", "remaining_time": "0:34:00"} +{"current_steps": 554, "total_steps": 2436, "loss": 2.9099555015563965, "lr": 9.517645946632766e-06, "epoch": 0.6822660098522167, "percentage": 22.74, "elapsed_time": "0:10:00", "remaining_time": "0:33:59"} +{"current_steps": 555, "total_steps": 2436, "loss": 2.7148189544677734, "lr": 9.514570468768338e-06, "epoch": 0.6834975369458128, "percentage": 22.78, "elapsed_time": "0:10:01", "remaining_time": "0:33:58"} +{"current_steps": 556, "total_steps": 2436, "loss": 2.528857707977295, "lr": 9.511485717589006e-06, "epoch": 0.6847290640394089, "percentage": 22.82, "elapsed_time": "0:10:02", "remaining_time": "0:33:56"} +{"current_steps": 557, "total_steps": 2436, "loss": 2.814006805419922, "lr": 9.508391699431114e-06, "epoch": 0.6859605911330049, "percentage": 22.87, "elapsed_time": "0:10:03", "remaining_time": "0:33:55"} +{"current_steps": 558, "total_steps": 2436, "loss": 3.3046352863311768, "lr": 9.50528842065004e-06, "epoch": 0.687192118226601, "percentage": 22.91, "elapsed_time": "0:10:04", "remaining_time": "0:33:54"} +{"current_steps": 559, "total_steps": 2436, "loss": 3.1519320011138916, "lr": 9.502175887620188e-06, "epoch": 0.6884236453201971, "percentage": 22.95, "elapsed_time": "0:10:05", "remaining_time": "0:33:53"} +{"current_steps": 560, "total_steps": 2436, "loss": 2.2819509506225586, "lr": 9.499054106734963e-06, "epoch": 0.6896551724137931, "percentage": 22.99, "elapsed_time": "0:10:06", "remaining_time": "0:33:52"} +{"current_steps": 561, "total_steps": 2436, "loss": 2.7894287109375, "lr": 9.495923084406773e-06, "epoch": 0.6908866995073891, "percentage": 23.03, "elapsed_time": "0:10:07", "remaining_time": "0:33:51"} +{"current_steps": 562, "total_steps": 2436, "loss": 3.233968734741211, "lr": 9.492782827067006e-06, "epoch": 0.6921182266009852, "percentage": 23.07, "elapsed_time": "0:10:08", "remaining_time": "0:33:50"} +{"current_steps": 563, "total_steps": 2436, "loss": 2.594421863555908, "lr": 9.48963334116602e-06, "epoch": 0.6933497536945813, "percentage": 23.11, "elapsed_time": "0:10:09", "remaining_time": "0:33:49"} +{"current_steps": 564, "total_steps": 2436, "loss": 3.181318759918213, "lr": 9.486474633173129e-06, "epoch": 0.6945812807881774, "percentage": 23.15, "elapsed_time": "0:10:11", "remaining_time": "0:33:48"} +{"current_steps": 565, "total_steps": 2436, "loss": 3.2115392684936523, "lr": 9.48330670957659e-06, "epoch": 0.6958128078817734, "percentage": 23.19, "elapsed_time": "0:10:12", "remaining_time": "0:33:46"} +{"current_steps": 566, "total_steps": 2436, "loss": 2.408634901046753, "lr": 9.480129576883592e-06, "epoch": 0.6970443349753694, "percentage": 23.23, "elapsed_time": "0:10:13", "remaining_time": "0:33:45"} +{"current_steps": 567, "total_steps": 2436, "loss": 2.9304041862487793, "lr": 9.476943241620233e-06, "epoch": 0.6982758620689655, "percentage": 23.28, "elapsed_time": "0:10:14", "remaining_time": "0:33:44"} +{"current_steps": 568, "total_steps": 2436, "loss": 2.75127911567688, "lr": 9.473747710331524e-06, "epoch": 0.6995073891625616, "percentage": 23.32, "elapsed_time": "0:10:15", "remaining_time": "0:33:43"} +{"current_steps": 569, "total_steps": 2436, "loss": 3.3793530464172363, "lr": 9.470542989581357e-06, "epoch": 0.7007389162561576, "percentage": 23.36, "elapsed_time": "0:10:16", "remaining_time": "0:33:42"} +{"current_steps": 570, "total_steps": 2436, "loss": 3.001579999923706, "lr": 9.467329085952505e-06, "epoch": 0.7019704433497537, "percentage": 23.4, "elapsed_time": "0:10:17", "remaining_time": "0:33:41"} +{"current_steps": 571, "total_steps": 2436, "loss": 2.063443422317505, "lr": 9.464106006046602e-06, "epoch": 0.7032019704433498, "percentage": 23.44, "elapsed_time": "0:10:18", "remaining_time": "0:33:40"} +{"current_steps": 572, "total_steps": 2436, "loss": 3.079399585723877, "lr": 9.460873756484128e-06, "epoch": 0.7044334975369458, "percentage": 23.48, "elapsed_time": "0:10:19", "remaining_time": "0:33:39"} +{"current_steps": 573, "total_steps": 2436, "loss": 2.6499621868133545, "lr": 9.457632343904404e-06, "epoch": 0.7056650246305419, "percentage": 23.52, "elapsed_time": "0:10:20", "remaining_time": "0:33:38"} +{"current_steps": 574, "total_steps": 2436, "loss": 2.848517656326294, "lr": 9.454381774965567e-06, "epoch": 0.7068965517241379, "percentage": 23.56, "elapsed_time": "0:10:21", "remaining_time": "0:33:37"} +{"current_steps": 575, "total_steps": 2436, "loss": 2.936286687850952, "lr": 9.451122056344564e-06, "epoch": 0.708128078817734, "percentage": 23.6, "elapsed_time": "0:10:22", "remaining_time": "0:33:35"} +{"current_steps": 576, "total_steps": 2436, "loss": 2.315443515777588, "lr": 9.44785319473714e-06, "epoch": 0.7093596059113301, "percentage": 23.65, "elapsed_time": "0:10:23", "remaining_time": "0:33:34"} +{"current_steps": 577, "total_steps": 2436, "loss": 3.121138334274292, "lr": 9.444575196857814e-06, "epoch": 0.7105911330049262, "percentage": 23.69, "elapsed_time": "0:10:25", "remaining_time": "0:33:33"} +{"current_steps": 578, "total_steps": 2436, "loss": 3.326282501220703, "lr": 9.441288069439876e-06, "epoch": 0.7118226600985221, "percentage": 23.73, "elapsed_time": "0:10:26", "remaining_time": "0:33:32"} +{"current_steps": 579, "total_steps": 2436, "loss": 2.8816466331481934, "lr": 9.437991819235366e-06, "epoch": 0.7130541871921182, "percentage": 23.77, "elapsed_time": "0:10:27", "remaining_time": "0:33:31"} +{"current_steps": 580, "total_steps": 2436, "loss": 3.6819610595703125, "lr": 9.434686453015067e-06, "epoch": 0.7142857142857143, "percentage": 23.81, "elapsed_time": "0:10:28", "remaining_time": "0:33:30"} +{"current_steps": 581, "total_steps": 2436, "loss": 2.904045343399048, "lr": 9.431371977568483e-06, "epoch": 0.7155172413793104, "percentage": 23.85, "elapsed_time": "0:10:29", "remaining_time": "0:33:29"} +{"current_steps": 582, "total_steps": 2436, "loss": 3.5356435775756836, "lr": 9.428048399703831e-06, "epoch": 0.7167487684729064, "percentage": 23.89, "elapsed_time": "0:10:30", "remaining_time": "0:33:28"} +{"current_steps": 583, "total_steps": 2436, "loss": 2.4456870555877686, "lr": 9.424715726248027e-06, "epoch": 0.7179802955665024, "percentage": 23.93, "elapsed_time": "0:10:31", "remaining_time": "0:33:27"} +{"current_steps": 584, "total_steps": 2436, "loss": 2.5000674724578857, "lr": 9.421373964046665e-06, "epoch": 0.7192118226600985, "percentage": 23.97, "elapsed_time": "0:10:32", "remaining_time": "0:33:26"} +{"current_steps": 585, "total_steps": 2436, "loss": 2.856738567352295, "lr": 9.418023119964012e-06, "epoch": 0.7204433497536946, "percentage": 24.01, "elapsed_time": "0:10:33", "remaining_time": "0:33:24"} +{"current_steps": 586, "total_steps": 2436, "loss": 2.623438835144043, "lr": 9.414663200882991e-06, "epoch": 0.7216748768472906, "percentage": 24.06, "elapsed_time": "0:10:34", "remaining_time": "0:33:23"} +{"current_steps": 587, "total_steps": 2436, "loss": 2.987426996231079, "lr": 9.411294213705162e-06, "epoch": 0.7229064039408867, "percentage": 24.1, "elapsed_time": "0:10:35", "remaining_time": "0:33:22"} +{"current_steps": 588, "total_steps": 2436, "loss": 2.8868589401245117, "lr": 9.407916165350713e-06, "epoch": 0.7241379310344828, "percentage": 24.14, "elapsed_time": "0:10:36", "remaining_time": "0:33:21"} +{"current_steps": 589, "total_steps": 2436, "loss": 2.878659725189209, "lr": 9.404529062758447e-06, "epoch": 0.7253694581280788, "percentage": 24.18, "elapsed_time": "0:10:38", "remaining_time": "0:33:20"} +{"current_steps": 590, "total_steps": 2436, "loss": 3.197636127471924, "lr": 9.401132912885764e-06, "epoch": 0.7266009852216748, "percentage": 24.22, "elapsed_time": "0:10:39", "remaining_time": "0:33:19"} +{"current_steps": 591, "total_steps": 2436, "loss": 2.8974030017852783, "lr": 9.397727722708643e-06, "epoch": 0.7278325123152709, "percentage": 24.26, "elapsed_time": "0:10:40", "remaining_time": "0:33:18"} +{"current_steps": 592, "total_steps": 2436, "loss": 2.558945894241333, "lr": 9.39431349922164e-06, "epoch": 0.729064039408867, "percentage": 24.3, "elapsed_time": "0:10:41", "remaining_time": "0:33:17"} +{"current_steps": 593, "total_steps": 2436, "loss": 1.0518803596496582, "lr": 9.390890249437863e-06, "epoch": 0.7302955665024631, "percentage": 24.34, "elapsed_time": "0:10:42", "remaining_time": "0:33:16"} +{"current_steps": 594, "total_steps": 2436, "loss": 3.5599231719970703, "lr": 9.38745798038896e-06, "epoch": 0.7315270935960592, "percentage": 24.38, "elapsed_time": "0:10:43", "remaining_time": "0:33:15"} +{"current_steps": 595, "total_steps": 2436, "loss": 3.1517539024353027, "lr": 9.384016699125102e-06, "epoch": 0.7327586206896551, "percentage": 24.43, "elapsed_time": "0:10:44", "remaining_time": "0:33:14"} +{"current_steps": 596, "total_steps": 2436, "loss": 2.809019088745117, "lr": 9.380566412714982e-06, "epoch": 0.7339901477832512, "percentage": 24.47, "elapsed_time": "0:10:45", "remaining_time": "0:33:12"} +{"current_steps": 597, "total_steps": 2436, "loss": 3.2317776679992676, "lr": 9.377107128245782e-06, "epoch": 0.7352216748768473, "percentage": 24.51, "elapsed_time": "0:10:46", "remaining_time": "0:33:11"} +{"current_steps": 598, "total_steps": 2436, "loss": 2.7792513370513916, "lr": 9.373638852823166e-06, "epoch": 0.7364532019704434, "percentage": 24.55, "elapsed_time": "0:10:47", "remaining_time": "0:33:10"} +{"current_steps": 599, "total_steps": 2436, "loss": 2.75253963470459, "lr": 9.370161593571274e-06, "epoch": 0.7376847290640394, "percentage": 24.59, "elapsed_time": "0:10:48", "remaining_time": "0:33:09"} +{"current_steps": 600, "total_steps": 2436, "loss": 3.381519317626953, "lr": 9.36667535763269e-06, "epoch": 0.7389162561576355, "percentage": 24.63, "elapsed_time": "0:10:49", "remaining_time": "0:33:08"} +{"current_steps": 601, "total_steps": 2436, "loss": 2.62427020072937, "lr": 9.363180152168448e-06, "epoch": 0.7401477832512315, "percentage": 24.67, "elapsed_time": "0:10:50", "remaining_time": "0:33:07"} +{"current_steps": 602, "total_steps": 2436, "loss": 2.4824719429016113, "lr": 9.359675984357992e-06, "epoch": 0.7413793103448276, "percentage": 24.71, "elapsed_time": "0:10:51", "remaining_time": "0:33:06"} +{"current_steps": 603, "total_steps": 2436, "loss": 2.8167097568511963, "lr": 9.356162861399188e-06, "epoch": 0.7426108374384236, "percentage": 24.75, "elapsed_time": "0:10:53", "remaining_time": "0:33:05"} +{"current_steps": 604, "total_steps": 2436, "loss": 2.9545063972473145, "lr": 9.352640790508291e-06, "epoch": 0.7438423645320197, "percentage": 24.79, "elapsed_time": "0:10:54", "remaining_time": "0:33:04"} +{"current_steps": 605, "total_steps": 2436, "loss": 2.833635091781616, "lr": 9.349109778919938e-06, "epoch": 0.7450738916256158, "percentage": 24.84, "elapsed_time": "0:10:55", "remaining_time": "0:33:02"} +{"current_steps": 606, "total_steps": 2436, "loss": 2.775730609893799, "lr": 9.345569833887124e-06, "epoch": 0.7463054187192119, "percentage": 24.88, "elapsed_time": "0:10:56", "remaining_time": "0:33:01"} +{"current_steps": 607, "total_steps": 2436, "loss": 2.652602195739746, "lr": 9.342020962681206e-06, "epoch": 0.7475369458128078, "percentage": 24.92, "elapsed_time": "0:10:57", "remaining_time": "0:33:00"} +{"current_steps": 608, "total_steps": 2436, "loss": 2.7008144855499268, "lr": 9.338463172591868e-06, "epoch": 0.7487684729064039, "percentage": 24.96, "elapsed_time": "0:10:58", "remaining_time": "0:32:59"} +{"current_steps": 609, "total_steps": 2436, "loss": 2.7525248527526855, "lr": 9.334896470927115e-06, "epoch": 0.75, "percentage": 25.0, "elapsed_time": "0:10:59", "remaining_time": "0:32:58"} +{"current_steps": 610, "total_steps": 2436, "loss": 3.446526527404785, "lr": 9.331320865013257e-06, "epoch": 0.7512315270935961, "percentage": 25.04, "elapsed_time": "0:11:00", "remaining_time": "0:32:57"} +{"current_steps": 611, "total_steps": 2436, "loss": 3.0489022731781006, "lr": 9.327736362194899e-06, "epoch": 0.7524630541871922, "percentage": 25.08, "elapsed_time": "0:11:01", "remaining_time": "0:32:56"} +{"current_steps": 612, "total_steps": 2436, "loss": 2.840083360671997, "lr": 9.324142969834916e-06, "epoch": 0.7536945812807881, "percentage": 25.12, "elapsed_time": "0:11:02", "remaining_time": "0:32:55"} +{"current_steps": 613, "total_steps": 2436, "loss": 2.878903388977051, "lr": 9.32054069531444e-06, "epoch": 0.7549261083743842, "percentage": 25.16, "elapsed_time": "0:11:03", "remaining_time": "0:32:54"} +{"current_steps": 614, "total_steps": 2436, "loss": 2.568045139312744, "lr": 9.316929546032855e-06, "epoch": 0.7561576354679803, "percentage": 25.21, "elapsed_time": "0:11:04", "remaining_time": "0:32:53"} +{"current_steps": 615, "total_steps": 2436, "loss": 2.8981618881225586, "lr": 9.313309529407773e-06, "epoch": 0.7573891625615764, "percentage": 25.25, "elapsed_time": "0:11:06", "remaining_time": "0:32:52"} +{"current_steps": 616, "total_steps": 2436, "loss": 3.3486928939819336, "lr": 9.309680652875015e-06, "epoch": 0.7586206896551724, "percentage": 25.29, "elapsed_time": "0:11:07", "remaining_time": "0:32:50"} +{"current_steps": 617, "total_steps": 2436, "loss": 3.1101677417755127, "lr": 9.306042923888607e-06, "epoch": 0.7598522167487685, "percentage": 25.33, "elapsed_time": "0:11:08", "remaining_time": "0:32:49"} +{"current_steps": 618, "total_steps": 2436, "loss": 2.5806779861450195, "lr": 9.302396349920756e-06, "epoch": 0.7610837438423645, "percentage": 25.37, "elapsed_time": "0:11:09", "remaining_time": "0:32:48"} +{"current_steps": 619, "total_steps": 2436, "loss": 2.678412437438965, "lr": 9.298740938461835e-06, "epoch": 0.7623152709359606, "percentage": 25.41, "elapsed_time": "0:11:10", "remaining_time": "0:32:47"} +{"current_steps": 620, "total_steps": 2436, "loss": 2.62287974357605, "lr": 9.295076697020378e-06, "epoch": 0.7635467980295566, "percentage": 25.45, "elapsed_time": "0:11:11", "remaining_time": "0:32:46"} +{"current_steps": 621, "total_steps": 2436, "loss": 3.0267720222473145, "lr": 9.291403633123046e-06, "epoch": 0.7647783251231527, "percentage": 25.49, "elapsed_time": "0:11:12", "remaining_time": "0:32:45"} +{"current_steps": 622, "total_steps": 2436, "loss": 3.147644281387329, "lr": 9.287721754314629e-06, "epoch": 0.7660098522167488, "percentage": 25.53, "elapsed_time": "0:11:13", "remaining_time": "0:32:44"} +{"current_steps": 623, "total_steps": 2436, "loss": 3.159574031829834, "lr": 9.284031068158023e-06, "epoch": 0.7672413793103449, "percentage": 25.57, "elapsed_time": "0:11:14", "remaining_time": "0:32:43"} +{"current_steps": 624, "total_steps": 2436, "loss": 2.6432247161865234, "lr": 9.280331582234212e-06, "epoch": 0.7684729064039408, "percentage": 25.62, "elapsed_time": "0:11:15", "remaining_time": "0:32:42"} +{"current_steps": 625, "total_steps": 2436, "loss": 3.2058279514312744, "lr": 9.27662330414226e-06, "epoch": 0.7697044334975369, "percentage": 25.66, "elapsed_time": "0:11:16", "remaining_time": "0:32:41"} +{"current_steps": 626, "total_steps": 2436, "loss": 2.787260055541992, "lr": 9.272906241499285e-06, "epoch": 0.770935960591133, "percentage": 25.7, "elapsed_time": "0:11:17", "remaining_time": "0:32:39"} +{"current_steps": 627, "total_steps": 2436, "loss": 2.5751729011535645, "lr": 9.269180401940455e-06, "epoch": 0.7721674876847291, "percentage": 25.74, "elapsed_time": "0:11:18", "remaining_time": "0:32:38"} +{"current_steps": 628, "total_steps": 2436, "loss": 2.7433929443359375, "lr": 9.265445793118962e-06, "epoch": 0.7733990147783252, "percentage": 25.78, "elapsed_time": "0:11:19", "remaining_time": "0:32:37"} +{"current_steps": 629, "total_steps": 2436, "loss": 2.771510124206543, "lr": 9.261702422706014e-06, "epoch": 0.7746305418719212, "percentage": 25.82, "elapsed_time": "0:11:21", "remaining_time": "0:32:36"} +{"current_steps": 630, "total_steps": 2436, "loss": 2.873830795288086, "lr": 9.257950298390815e-06, "epoch": 0.7758620689655172, "percentage": 25.86, "elapsed_time": "0:11:22", "remaining_time": "0:32:35"} +{"current_steps": 631, "total_steps": 2436, "loss": 2.7849340438842773, "lr": 9.254189427880548e-06, "epoch": 0.7770935960591133, "percentage": 25.9, "elapsed_time": "0:11:23", "remaining_time": "0:32:34"} +{"current_steps": 632, "total_steps": 2436, "loss": 3.1721668243408203, "lr": 9.250419818900366e-06, "epoch": 0.7783251231527094, "percentage": 25.94, "elapsed_time": "0:11:24", "remaining_time": "0:32:33"} +{"current_steps": 633, "total_steps": 2436, "loss": 2.7493605613708496, "lr": 9.24664147919337e-06, "epoch": 0.7795566502463054, "percentage": 25.99, "elapsed_time": "0:11:25", "remaining_time": "0:32:32"} +{"current_steps": 634, "total_steps": 2436, "loss": 2.470233917236328, "lr": 9.242854416520591e-06, "epoch": 0.7807881773399015, "percentage": 26.03, "elapsed_time": "0:11:26", "remaining_time": "0:32:31"} +{"current_steps": 635, "total_steps": 2436, "loss": 2.7109014987945557, "lr": 9.239058638660983e-06, "epoch": 0.7820197044334976, "percentage": 26.07, "elapsed_time": "0:11:27", "remaining_time": "0:32:29"} +{"current_steps": 636, "total_steps": 2436, "loss": 3.0344791412353516, "lr": 9.235254153411394e-06, "epoch": 0.7832512315270936, "percentage": 26.11, "elapsed_time": "0:11:28", "remaining_time": "0:32:28"} +{"current_steps": 637, "total_steps": 2436, "loss": 2.381561279296875, "lr": 9.231440968586572e-06, "epoch": 0.7844827586206896, "percentage": 26.15, "elapsed_time": "0:11:29", "remaining_time": "0:32:27"} +{"current_steps": 638, "total_steps": 2436, "loss": 1.716524362564087, "lr": 9.227619092019116e-06, "epoch": 0.7857142857142857, "percentage": 26.19, "elapsed_time": "0:11:30", "remaining_time": "0:32:26"} +{"current_steps": 639, "total_steps": 2436, "loss": 2.591820240020752, "lr": 9.223788531559495e-06, "epoch": 0.7869458128078818, "percentage": 26.23, "elapsed_time": "0:11:31", "remaining_time": "0:32:25"} +{"current_steps": 640, "total_steps": 2436, "loss": 3.0194711685180664, "lr": 9.219949295076006e-06, "epoch": 0.7881773399014779, "percentage": 26.27, "elapsed_time": "0:11:32", "remaining_time": "0:32:24"} +{"current_steps": 641, "total_steps": 2436, "loss": 2.852489471435547, "lr": 9.216101390454771e-06, "epoch": 0.7894088669950738, "percentage": 26.31, "elapsed_time": "0:11:33", "remaining_time": "0:32:23"} +{"current_steps": 642, "total_steps": 2436, "loss": 3.1419005393981934, "lr": 9.212244825599714e-06, "epoch": 0.7906403940886699, "percentage": 26.35, "elapsed_time": "0:11:35", "remaining_time": "0:32:22"} +{"current_steps": 643, "total_steps": 2436, "loss": 2.8307576179504395, "lr": 9.208379608432552e-06, "epoch": 0.791871921182266, "percentage": 26.4, "elapsed_time": "0:11:36", "remaining_time": "0:32:21"} +{"current_steps": 644, "total_steps": 2436, "loss": 2.581083297729492, "lr": 9.204505746892772e-06, "epoch": 0.7931034482758621, "percentage": 26.44, "elapsed_time": "0:11:37", "remaining_time": "0:32:20"} +{"current_steps": 645, "total_steps": 2436, "loss": 2.868973731994629, "lr": 9.200623248937619e-06, "epoch": 0.7943349753694581, "percentage": 26.48, "elapsed_time": "0:11:38", "remaining_time": "0:32:19"} +{"current_steps": 646, "total_steps": 2436, "loss": 2.8063859939575195, "lr": 9.196732122542073e-06, "epoch": 0.7955665024630542, "percentage": 26.52, "elapsed_time": "0:11:39", "remaining_time": "0:32:17"} +{"current_steps": 647, "total_steps": 2436, "loss": 2.990504264831543, "lr": 9.192832375698845e-06, "epoch": 0.7967980295566502, "percentage": 26.56, "elapsed_time": "0:11:40", "remaining_time": "0:32:16"} +{"current_steps": 648, "total_steps": 2436, "loss": 2.390320301055908, "lr": 9.18892401641835e-06, "epoch": 0.7980295566502463, "percentage": 26.6, "elapsed_time": "0:11:41", "remaining_time": "0:32:15"} +{"current_steps": 649, "total_steps": 2436, "loss": 2.671368360519409, "lr": 9.185007052728689e-06, "epoch": 0.7992610837438424, "percentage": 26.64, "elapsed_time": "0:11:42", "remaining_time": "0:32:14"} +{"current_steps": 650, "total_steps": 2436, "loss": 3.259225845336914, "lr": 9.181081492675645e-06, "epoch": 0.8004926108374384, "percentage": 26.68, "elapsed_time": "0:11:43", "remaining_time": "0:32:13"} +{"current_steps": 651, "total_steps": 2436, "loss": 2.6810710430145264, "lr": 9.177147344322651e-06, "epoch": 0.8017241379310345, "percentage": 26.72, "elapsed_time": "0:11:44", "remaining_time": "0:32:12"} +{"current_steps": 652, "total_steps": 2436, "loss": 2.833371162414551, "lr": 9.173204615750792e-06, "epoch": 0.8029556650246306, "percentage": 26.77, "elapsed_time": "0:11:45", "remaining_time": "0:32:11"} +{"current_steps": 653, "total_steps": 2436, "loss": 2.3488945960998535, "lr": 9.169253315058764e-06, "epoch": 0.8041871921182266, "percentage": 26.81, "elapsed_time": "0:11:46", "remaining_time": "0:32:10"} +{"current_steps": 654, "total_steps": 2436, "loss": 2.609282970428467, "lr": 9.165293450362882e-06, "epoch": 0.8054187192118226, "percentage": 26.85, "elapsed_time": "0:11:47", "remaining_time": "0:32:09"} +{"current_steps": 655, "total_steps": 2436, "loss": 2.536142587661743, "lr": 9.161325029797044e-06, "epoch": 0.8066502463054187, "percentage": 26.89, "elapsed_time": "0:11:49", "remaining_time": "0:32:08"} +{"current_steps": 656, "total_steps": 2436, "loss": 2.7175073623657227, "lr": 9.157348061512728e-06, "epoch": 0.8078817733990148, "percentage": 26.93, "elapsed_time": "0:11:50", "remaining_time": "0:32:06"} +{"current_steps": 657, "total_steps": 2436, "loss": 2.99211049079895, "lr": 9.153362553678967e-06, "epoch": 0.8091133004926109, "percentage": 26.97, "elapsed_time": "0:11:51", "remaining_time": "0:32:05"} +{"current_steps": 658, "total_steps": 2436, "loss": 2.9390807151794434, "lr": 9.149368514482337e-06, "epoch": 0.8103448275862069, "percentage": 27.01, "elapsed_time": "0:11:52", "remaining_time": "0:32:04"} +{"current_steps": 659, "total_steps": 2436, "loss": 3.0422894954681396, "lr": 9.145365952126937e-06, "epoch": 0.8115763546798029, "percentage": 27.05, "elapsed_time": "0:11:53", "remaining_time": "0:32:03"} +{"current_steps": 660, "total_steps": 2436, "loss": 3.0573301315307617, "lr": 9.141354874834372e-06, "epoch": 0.812807881773399, "percentage": 27.09, "elapsed_time": "0:11:54", "remaining_time": "0:32:02"} +{"current_steps": 661, "total_steps": 2436, "loss": 2.5086781978607178, "lr": 9.13733529084374e-06, "epoch": 0.8140394088669951, "percentage": 27.13, "elapsed_time": "0:11:55", "remaining_time": "0:32:01"} +{"current_steps": 662, "total_steps": 2436, "loss": 2.858813762664795, "lr": 9.13330720841161e-06, "epoch": 0.8152709359605911, "percentage": 27.18, "elapsed_time": "0:11:56", "remaining_time": "0:32:00"} +{"current_steps": 663, "total_steps": 2436, "loss": 2.6715052127838135, "lr": 9.129270635812013e-06, "epoch": 0.8165024630541872, "percentage": 27.22, "elapsed_time": "0:11:57", "remaining_time": "0:31:59"} +{"current_steps": 664, "total_steps": 2436, "loss": 3.18508243560791, "lr": 9.125225581336408e-06, "epoch": 0.8177339901477833, "percentage": 27.26, "elapsed_time": "0:11:58", "remaining_time": "0:31:58"} +{"current_steps": 665, "total_steps": 2436, "loss": 3.0426509380340576, "lr": 9.12117205329369e-06, "epoch": 0.8189655172413793, "percentage": 27.3, "elapsed_time": "0:11:59", "remaining_time": "0:31:56"} +{"current_steps": 666, "total_steps": 2436, "loss": 2.8654000759124756, "lr": 9.11711006001015e-06, "epoch": 0.8201970443349754, "percentage": 27.34, "elapsed_time": "0:12:00", "remaining_time": "0:31:55"} +{"current_steps": 667, "total_steps": 2436, "loss": 3.141207695007324, "lr": 9.113039609829472e-06, "epoch": 0.8214285714285714, "percentage": 27.38, "elapsed_time": "0:12:01", "remaining_time": "0:31:54"} +{"current_steps": 668, "total_steps": 2436, "loss": 2.3188462257385254, "lr": 9.108960711112709e-06, "epoch": 0.8226600985221675, "percentage": 27.42, "elapsed_time": "0:12:03", "remaining_time": "0:31:53"} +{"current_steps": 669, "total_steps": 2436, "loss": 2.785968542098999, "lr": 9.104873372238269e-06, "epoch": 0.8238916256157636, "percentage": 27.46, "elapsed_time": "0:12:04", "remaining_time": "0:31:52"} +{"current_steps": 670, "total_steps": 2436, "loss": 3.0693092346191406, "lr": 9.100777601601896e-06, "epoch": 0.8251231527093597, "percentage": 27.5, "elapsed_time": "0:12:05", "remaining_time": "0:31:51"} +{"current_steps": 671, "total_steps": 2436, "loss": 3.038943290710449, "lr": 9.096673407616656e-06, "epoch": 0.8263546798029556, "percentage": 27.55, "elapsed_time": "0:12:06", "remaining_time": "0:31:50"} +{"current_steps": 672, "total_steps": 2436, "loss": 3.259847640991211, "lr": 9.092560798712913e-06, "epoch": 0.8275862068965517, "percentage": 27.59, "elapsed_time": "0:12:07", "remaining_time": "0:31:49"} +{"current_steps": 673, "total_steps": 2436, "loss": 2.8227295875549316, "lr": 9.08843978333832e-06, "epoch": 0.8288177339901478, "percentage": 27.63, "elapsed_time": "0:12:08", "remaining_time": "0:31:48"} +{"current_steps": 674, "total_steps": 2436, "loss": 3.373309850692749, "lr": 9.084310369957795e-06, "epoch": 0.8300492610837439, "percentage": 27.67, "elapsed_time": "0:12:09", "remaining_time": "0:31:47"} +{"current_steps": 675, "total_steps": 2436, "loss": 3.2833662033081055, "lr": 9.08017256705351e-06, "epoch": 0.8312807881773399, "percentage": 27.71, "elapsed_time": "0:12:10", "remaining_time": "0:31:46"} +{"current_steps": 676, "total_steps": 2436, "loss": 2.7175965309143066, "lr": 9.076026383124863e-06, "epoch": 0.8325123152709359, "percentage": 27.75, "elapsed_time": "0:12:11", "remaining_time": "0:31:44"} +{"current_steps": 677, "total_steps": 2436, "loss": 2.594611167907715, "lr": 9.071871826688472e-06, "epoch": 0.833743842364532, "percentage": 27.79, "elapsed_time": "0:12:12", "remaining_time": "0:31:43"} +{"current_steps": 678, "total_steps": 2436, "loss": 2.8605175018310547, "lr": 9.067708906278155e-06, "epoch": 0.8349753694581281, "percentage": 27.83, "elapsed_time": "0:12:13", "remaining_time": "0:31:42"} +{"current_steps": 679, "total_steps": 2436, "loss": 2.1438748836517334, "lr": 9.063537630444903e-06, "epoch": 0.8362068965517241, "percentage": 27.87, "elapsed_time": "0:12:14", "remaining_time": "0:31:41"} +{"current_steps": 680, "total_steps": 2436, "loss": 2.8170299530029297, "lr": 9.05935800775688e-06, "epoch": 0.8374384236453202, "percentage": 27.91, "elapsed_time": "0:12:15", "remaining_time": "0:31:40"} +{"current_steps": 681, "total_steps": 2436, "loss": 1.7328954935073853, "lr": 9.055170046799386e-06, "epoch": 0.8386699507389163, "percentage": 27.96, "elapsed_time": "0:12:17", "remaining_time": "0:31:39"} +{"current_steps": 682, "total_steps": 2436, "loss": 2.8324766159057617, "lr": 9.050973756174852e-06, "epoch": 0.8399014778325123, "percentage": 28.0, "elapsed_time": "0:12:18", "remaining_time": "0:31:38"} +{"current_steps": 683, "total_steps": 2436, "loss": 2.805690288543701, "lr": 9.046769144502818e-06, "epoch": 0.8411330049261084, "percentage": 28.04, "elapsed_time": "0:12:19", "remaining_time": "0:31:37"} +{"current_steps": 684, "total_steps": 2436, "loss": 2.1270194053649902, "lr": 9.04255622041992e-06, "epoch": 0.8423645320197044, "percentage": 28.08, "elapsed_time": "0:12:20", "remaining_time": "0:31:36"} +{"current_steps": 685, "total_steps": 2436, "loss": 2.8757829666137695, "lr": 9.038334992579863e-06, "epoch": 0.8435960591133005, "percentage": 28.12, "elapsed_time": "0:12:21", "remaining_time": "0:31:35"} +{"current_steps": 686, "total_steps": 2436, "loss": 2.84549617767334, "lr": 9.034105469653412e-06, "epoch": 0.8448275862068966, "percentage": 28.16, "elapsed_time": "0:12:22", "remaining_time": "0:31:34"} +{"current_steps": 687, "total_steps": 2436, "loss": 2.4058642387390137, "lr": 9.029867660328369e-06, "epoch": 0.8460591133004927, "percentage": 28.2, "elapsed_time": "0:12:23", "remaining_time": "0:31:32"} +{"current_steps": 688, "total_steps": 2436, "loss": 3.2583184242248535, "lr": 9.025621573309559e-06, "epoch": 0.8472906403940886, "percentage": 28.24, "elapsed_time": "0:12:24", "remaining_time": "0:31:31"} +{"current_steps": 689, "total_steps": 2436, "loss": 2.951143264770508, "lr": 9.021367217318808e-06, "epoch": 0.8485221674876847, "percentage": 28.28, "elapsed_time": "0:12:25", "remaining_time": "0:31:30"} +{"current_steps": 690, "total_steps": 2436, "loss": 3.0142836570739746, "lr": 9.017104601094927e-06, "epoch": 0.8497536945812808, "percentage": 28.33, "elapsed_time": "0:12:26", "remaining_time": "0:31:29"} +{"current_steps": 691, "total_steps": 2436, "loss": 2.7629013061523438, "lr": 9.012833733393697e-06, "epoch": 0.8509852216748769, "percentage": 28.37, "elapsed_time": "0:12:27", "remaining_time": "0:31:28"} +{"current_steps": 692, "total_steps": 2436, "loss": 2.6153712272644043, "lr": 9.008554622987845e-06, "epoch": 0.8522167487684729, "percentage": 28.41, "elapsed_time": "0:12:28", "remaining_time": "0:31:27"} +{"current_steps": 693, "total_steps": 2436, "loss": 2.7227087020874023, "lr": 9.004267278667032e-06, "epoch": 0.853448275862069, "percentage": 28.45, "elapsed_time": "0:12:30", "remaining_time": "0:31:26"} +{"current_steps": 694, "total_steps": 2436, "loss": 2.7320899963378906, "lr": 8.999971709237832e-06, "epoch": 0.854679802955665, "percentage": 28.49, "elapsed_time": "0:12:31", "remaining_time": "0:31:25"} +{"current_steps": 695, "total_steps": 2436, "loss": 2.4416356086730957, "lr": 8.99566792352371e-06, "epoch": 0.8559113300492611, "percentage": 28.53, "elapsed_time": "0:12:32", "remaining_time": "0:31:24"} +{"current_steps": 696, "total_steps": 2436, "loss": 3.251642942428589, "lr": 8.991355930365013e-06, "epoch": 0.8571428571428571, "percentage": 28.57, "elapsed_time": "0:12:33", "remaining_time": "0:31:23"} +{"current_steps": 697, "total_steps": 2436, "loss": 2.9292666912078857, "lr": 8.987035738618943e-06, "epoch": 0.8583743842364532, "percentage": 28.61, "elapsed_time": "0:12:34", "remaining_time": "0:31:22"} +{"current_steps": 698, "total_steps": 2436, "loss": 2.804452896118164, "lr": 8.982707357159549e-06, "epoch": 0.8596059113300493, "percentage": 28.65, "elapsed_time": "0:12:35", "remaining_time": "0:31:20"} +{"current_steps": 699, "total_steps": 2436, "loss": 2.4997687339782715, "lr": 8.978370794877691e-06, "epoch": 0.8608374384236454, "percentage": 28.69, "elapsed_time": "0:12:36", "remaining_time": "0:31:19"} +{"current_steps": 700, "total_steps": 2436, "loss": 2.459716558456421, "lr": 8.974026060681044e-06, "epoch": 0.8620689655172413, "percentage": 28.74, "elapsed_time": "0:12:37", "remaining_time": "0:31:18"} +{"current_steps": 701, "total_steps": 2436, "loss": 2.57291316986084, "lr": 8.969673163494063e-06, "epoch": 0.8633004926108374, "percentage": 28.78, "elapsed_time": "0:12:38", "remaining_time": "0:31:17"} +{"current_steps": 702, "total_steps": 2436, "loss": 2.6452269554138184, "lr": 8.965312112257973e-06, "epoch": 0.8645320197044335, "percentage": 28.82, "elapsed_time": "0:12:39", "remaining_time": "0:31:16"} +{"current_steps": 703, "total_steps": 2436, "loss": 2.4361040592193604, "lr": 8.960942915930749e-06, "epoch": 0.8657635467980296, "percentage": 28.86, "elapsed_time": "0:12:40", "remaining_time": "0:31:15"} +{"current_steps": 704, "total_steps": 2436, "loss": 2.819046974182129, "lr": 8.956565583487092e-06, "epoch": 0.8669950738916257, "percentage": 28.9, "elapsed_time": "0:12:41", "remaining_time": "0:31:14"} +{"current_steps": 705, "total_steps": 2436, "loss": 3.536510944366455, "lr": 8.952180123918419e-06, "epoch": 0.8682266009852216, "percentage": 28.94, "elapsed_time": "0:12:42", "remaining_time": "0:31:13"} +{"current_steps": 706, "total_steps": 2436, "loss": 3.340855121612549, "lr": 8.94778654623284e-06, "epoch": 0.8694581280788177, "percentage": 28.98, "elapsed_time": "0:12:43", "remaining_time": "0:31:12"} +{"current_steps": 707, "total_steps": 2436, "loss": 2.7881288528442383, "lr": 8.94338485945514e-06, "epoch": 0.8706896551724138, "percentage": 29.02, "elapsed_time": "0:12:45", "remaining_time": "0:31:11"} +{"current_steps": 708, "total_steps": 2436, "loss": 3.119422197341919, "lr": 8.938975072626762e-06, "epoch": 0.8719211822660099, "percentage": 29.06, "elapsed_time": "0:12:46", "remaining_time": "0:31:09"} +{"current_steps": 709, "total_steps": 2436, "loss": 2.694553852081299, "lr": 8.934557194805787e-06, "epoch": 0.8731527093596059, "percentage": 29.11, "elapsed_time": "0:12:47", "remaining_time": "0:31:08"} +{"current_steps": 710, "total_steps": 2436, "loss": 2.7162301540374756, "lr": 8.930131235066914e-06, "epoch": 0.874384236453202, "percentage": 29.15, "elapsed_time": "0:12:48", "remaining_time": "0:31:07"} +{"current_steps": 711, "total_steps": 2436, "loss": 2.4017574787139893, "lr": 8.925697202501442e-06, "epoch": 0.875615763546798, "percentage": 29.19, "elapsed_time": "0:12:49", "remaining_time": "0:31:06"} +{"current_steps": 712, "total_steps": 2436, "loss": 2.491663932800293, "lr": 8.92125510621726e-06, "epoch": 0.8768472906403941, "percentage": 29.23, "elapsed_time": "0:12:50", "remaining_time": "0:31:05"} +{"current_steps": 713, "total_steps": 2436, "loss": 3.09323787689209, "lr": 8.916804955338807e-06, "epoch": 0.8780788177339901, "percentage": 29.27, "elapsed_time": "0:12:51", "remaining_time": "0:31:04"} +{"current_steps": 714, "total_steps": 2436, "loss": 3.0273964405059814, "lr": 8.91234675900708e-06, "epoch": 0.8793103448275862, "percentage": 29.31, "elapsed_time": "0:12:52", "remaining_time": "0:31:03"} +{"current_steps": 715, "total_steps": 2436, "loss": 2.5009701251983643, "lr": 8.907880526379594e-06, "epoch": 0.8805418719211823, "percentage": 29.35, "elapsed_time": "0:12:53", "remaining_time": "0:31:02"} +{"current_steps": 716, "total_steps": 2436, "loss": 2.7629752159118652, "lr": 8.903406266630374e-06, "epoch": 0.8817733990147784, "percentage": 29.39, "elapsed_time": "0:12:54", "remaining_time": "0:31:01"} +{"current_steps": 717, "total_steps": 2436, "loss": 2.5285563468933105, "lr": 8.898923988949936e-06, "epoch": 0.8830049261083743, "percentage": 29.43, "elapsed_time": "0:12:55", "remaining_time": "0:31:00"} +{"current_steps": 718, "total_steps": 2436, "loss": 2.6903738975524902, "lr": 8.89443370254526e-06, "epoch": 0.8842364532019704, "percentage": 29.47, "elapsed_time": "0:12:56", "remaining_time": "0:30:58"} +{"current_steps": 719, "total_steps": 2436, "loss": 2.8083925247192383, "lr": 8.88993541663978e-06, "epoch": 0.8854679802955665, "percentage": 29.52, "elapsed_time": "0:12:57", "remaining_time": "0:30:57"} +{"current_steps": 720, "total_steps": 2436, "loss": 3.0920486450195312, "lr": 8.885429140473361e-06, "epoch": 0.8866995073891626, "percentage": 29.56, "elapsed_time": "0:12:59", "remaining_time": "0:30:56"} +{"current_steps": 721, "total_steps": 2436, "loss": 2.7464776039123535, "lr": 8.880914883302278e-06, "epoch": 0.8879310344827587, "percentage": 29.6, "elapsed_time": "0:13:00", "remaining_time": "0:30:55"} +{"current_steps": 722, "total_steps": 2436, "loss": 2.7022242546081543, "lr": 8.876392654399208e-06, "epoch": 0.8891625615763546, "percentage": 29.64, "elapsed_time": "0:13:01", "remaining_time": "0:30:54"} +{"current_steps": 723, "total_steps": 2436, "loss": 3.202090263366699, "lr": 8.871862463053193e-06, "epoch": 0.8903940886699507, "percentage": 29.68, "elapsed_time": "0:13:02", "remaining_time": "0:30:53"} +{"current_steps": 724, "total_steps": 2436, "loss": 2.792590856552124, "lr": 8.867324318569637e-06, "epoch": 0.8916256157635468, "percentage": 29.72, "elapsed_time": "0:13:03", "remaining_time": "0:30:52"} +{"current_steps": 725, "total_steps": 2436, "loss": 2.8918404579162598, "lr": 8.862778230270276e-06, "epoch": 0.8928571428571429, "percentage": 29.76, "elapsed_time": "0:13:04", "remaining_time": "0:30:51"} +{"current_steps": 726, "total_steps": 2436, "loss": 2.881380081176758, "lr": 8.858224207493165e-06, "epoch": 0.8940886699507389, "percentage": 29.8, "elapsed_time": "0:13:05", "remaining_time": "0:30:50"} +{"current_steps": 727, "total_steps": 2436, "loss": 2.7197518348693848, "lr": 8.85366225959266e-06, "epoch": 0.895320197044335, "percentage": 29.84, "elapsed_time": "0:13:06", "remaining_time": "0:30:49"} +{"current_steps": 728, "total_steps": 2436, "loss": 2.8458380699157715, "lr": 8.849092395939388e-06, "epoch": 0.896551724137931, "percentage": 29.89, "elapsed_time": "0:13:07", "remaining_time": "0:30:48"} +{"current_steps": 729, "total_steps": 2436, "loss": 2.5815629959106445, "lr": 8.844514625920246e-06, "epoch": 0.8977832512315271, "percentage": 29.93, "elapsed_time": "0:13:08", "remaining_time": "0:30:46"} +{"current_steps": 730, "total_steps": 2436, "loss": 2.388244867324829, "lr": 8.839928958938364e-06, "epoch": 0.8990147783251231, "percentage": 29.97, "elapsed_time": "0:13:09", "remaining_time": "0:30:45"} +{"current_steps": 731, "total_steps": 2436, "loss": 2.678809404373169, "lr": 8.835335404413096e-06, "epoch": 0.9002463054187192, "percentage": 30.01, "elapsed_time": "0:13:10", "remaining_time": "0:30:44"} +{"current_steps": 732, "total_steps": 2436, "loss": 3.4926984310150146, "lr": 8.830733971779996e-06, "epoch": 0.9014778325123153, "percentage": 30.05, "elapsed_time": "0:13:11", "remaining_time": "0:30:43"} +{"current_steps": 733, "total_steps": 2436, "loss": 3.143955707550049, "lr": 8.826124670490804e-06, "epoch": 0.9027093596059114, "percentage": 30.09, "elapsed_time": "0:13:13", "remaining_time": "0:30:42"} +{"current_steps": 734, "total_steps": 2436, "loss": 2.30763840675354, "lr": 8.821507510013416e-06, "epoch": 0.9039408866995073, "percentage": 30.13, "elapsed_time": "0:13:14", "remaining_time": "0:30:41"} +{"current_steps": 735, "total_steps": 2436, "loss": 3.2019965648651123, "lr": 8.816882499831877e-06, "epoch": 0.9051724137931034, "percentage": 30.17, "elapsed_time": "0:13:15", "remaining_time": "0:30:40"} +{"current_steps": 736, "total_steps": 2436, "loss": 2.5554118156433105, "lr": 8.812249649446357e-06, "epoch": 0.9064039408866995, "percentage": 30.21, "elapsed_time": "0:13:16", "remaining_time": "0:30:39"} +{"current_steps": 737, "total_steps": 2436, "loss": 2.6560721397399902, "lr": 8.807608968373123e-06, "epoch": 0.9076354679802956, "percentage": 30.25, "elapsed_time": "0:13:17", "remaining_time": "0:30:38"} +{"current_steps": 738, "total_steps": 2436, "loss": 3.2792091369628906, "lr": 8.802960466144537e-06, "epoch": 0.9088669950738916, "percentage": 30.3, "elapsed_time": "0:13:18", "remaining_time": "0:30:37"} +{"current_steps": 739, "total_steps": 2436, "loss": 2.4306914806365967, "lr": 8.798304152309019e-06, "epoch": 0.9100985221674877, "percentage": 30.34, "elapsed_time": "0:13:19", "remaining_time": "0:30:36"} +{"current_steps": 740, "total_steps": 2436, "loss": 2.791334867477417, "lr": 8.793640036431036e-06, "epoch": 0.9113300492610837, "percentage": 30.38, "elapsed_time": "0:13:20", "remaining_time": "0:30:34"} +{"current_steps": 741, "total_steps": 2436, "loss": 2.8516879081726074, "lr": 8.788968128091084e-06, "epoch": 0.9125615763546798, "percentage": 30.42, "elapsed_time": "0:13:21", "remaining_time": "0:30:33"} +{"current_steps": 742, "total_steps": 2436, "loss": 2.783674716949463, "lr": 8.784288436885663e-06, "epoch": 0.9137931034482759, "percentage": 30.46, "elapsed_time": "0:13:22", "remaining_time": "0:30:32"} +{"current_steps": 743, "total_steps": 2436, "loss": 2.538564443588257, "lr": 8.779600972427257e-06, "epoch": 0.9150246305418719, "percentage": 30.5, "elapsed_time": "0:13:23", "remaining_time": "0:30:31"} +{"current_steps": 744, "total_steps": 2436, "loss": 2.603914260864258, "lr": 8.774905744344326e-06, "epoch": 0.916256157635468, "percentage": 30.54, "elapsed_time": "0:13:24", "remaining_time": "0:30:30"} +{"current_steps": 745, "total_steps": 2436, "loss": 2.6232197284698486, "lr": 8.770202762281267e-06, "epoch": 0.9174876847290641, "percentage": 30.58, "elapsed_time": "0:13:26", "remaining_time": "0:30:29"} +{"current_steps": 746, "total_steps": 2436, "loss": 2.586906671524048, "lr": 8.765492035898406e-06, "epoch": 0.9187192118226601, "percentage": 30.62, "elapsed_time": "0:13:27", "remaining_time": "0:30:28"} +{"current_steps": 747, "total_steps": 2436, "loss": 3.019075870513916, "lr": 8.760773574871985e-06, "epoch": 0.9199507389162561, "percentage": 30.67, "elapsed_time": "0:13:28", "remaining_time": "0:30:27"} +{"current_steps": 748, "total_steps": 2436, "loss": 2.6554617881774902, "lr": 8.756047388894123e-06, "epoch": 0.9211822660098522, "percentage": 30.71, "elapsed_time": "0:13:29", "remaining_time": "0:30:26"} +{"current_steps": 749, "total_steps": 2436, "loss": 3.3622567653656006, "lr": 8.751313487672815e-06, "epoch": 0.9224137931034483, "percentage": 30.75, "elapsed_time": "0:13:30", "remaining_time": "0:30:25"} +{"current_steps": 750, "total_steps": 2436, "loss": 2.748253345489502, "lr": 8.746571880931896e-06, "epoch": 0.9236453201970444, "percentage": 30.79, "elapsed_time": "0:13:31", "remaining_time": "0:30:23"} +{"current_steps": 751, "total_steps": 2436, "loss": 3.358571767807007, "lr": 8.741822578411036e-06, "epoch": 0.9248768472906403, "percentage": 30.83, "elapsed_time": "0:13:32", "remaining_time": "0:30:22"} +{"current_steps": 752, "total_steps": 2436, "loss": 2.707146167755127, "lr": 8.737065589865709e-06, "epoch": 0.9261083743842364, "percentage": 30.87, "elapsed_time": "0:13:33", "remaining_time": "0:30:21"} +{"current_steps": 753, "total_steps": 2436, "loss": 2.782027006149292, "lr": 8.732300925067177e-06, "epoch": 0.9273399014778325, "percentage": 30.91, "elapsed_time": "0:13:34", "remaining_time": "0:30:20"} +{"current_steps": 754, "total_steps": 2436, "loss": 2.758582830429077, "lr": 8.727528593802469e-06, "epoch": 0.9285714285714286, "percentage": 30.95, "elapsed_time": "0:13:35", "remaining_time": "0:30:19"} +{"current_steps": 755, "total_steps": 2436, "loss": 2.798398971557617, "lr": 8.722748605874365e-06, "epoch": 0.9298029556650246, "percentage": 30.99, "elapsed_time": "0:13:36", "remaining_time": "0:30:18"} +{"current_steps": 756, "total_steps": 2436, "loss": 2.8893141746520996, "lr": 8.717960971101367e-06, "epoch": 0.9310344827586207, "percentage": 31.03, "elapsed_time": "0:13:37", "remaining_time": "0:30:17"} +{"current_steps": 757, "total_steps": 2436, "loss": 2.8260703086853027, "lr": 8.71316569931769e-06, "epoch": 0.9322660098522167, "percentage": 31.08, "elapsed_time": "0:13:38", "remaining_time": "0:30:16"} +{"current_steps": 758, "total_steps": 2436, "loss": 2.8373727798461914, "lr": 8.708362800373235e-06, "epoch": 0.9334975369458128, "percentage": 31.12, "elapsed_time": "0:13:40", "remaining_time": "0:30:15"} +{"current_steps": 759, "total_steps": 2436, "loss": 2.7638840675354004, "lr": 8.703552284133565e-06, "epoch": 0.9347290640394089, "percentage": 31.16, "elapsed_time": "0:13:41", "remaining_time": "0:30:14"} +{"current_steps": 760, "total_steps": 2436, "loss": 3.436288833618164, "lr": 8.698734160479892e-06, "epoch": 0.9359605911330049, "percentage": 31.2, "elapsed_time": "0:13:42", "remaining_time": "0:30:13"} +{"current_steps": 761, "total_steps": 2436, "loss": 2.9463398456573486, "lr": 8.69390843930906e-06, "epoch": 0.937192118226601, "percentage": 31.24, "elapsed_time": "0:13:43", "remaining_time": "0:30:11"} +{"current_steps": 762, "total_steps": 2436, "loss": 2.8301844596862793, "lr": 8.68907513053351e-06, "epoch": 0.9384236453201971, "percentage": 31.28, "elapsed_time": "0:13:44", "remaining_time": "0:30:10"} +{"current_steps": 763, "total_steps": 2436, "loss": 2.329922676086426, "lr": 8.684234244081274e-06, "epoch": 0.9396551724137931, "percentage": 31.32, "elapsed_time": "0:13:45", "remaining_time": "0:30:09"} +{"current_steps": 764, "total_steps": 2436, "loss": 2.2752580642700195, "lr": 8.67938578989595e-06, "epoch": 0.9408866995073891, "percentage": 31.36, "elapsed_time": "0:13:46", "remaining_time": "0:30:08"} +{"current_steps": 765, "total_steps": 2436, "loss": 2.549682378768921, "lr": 8.674529777936674e-06, "epoch": 0.9421182266009852, "percentage": 31.4, "elapsed_time": "0:13:47", "remaining_time": "0:30:07"} +{"current_steps": 766, "total_steps": 2436, "loss": 2.177875518798828, "lr": 8.669666218178114e-06, "epoch": 0.9433497536945813, "percentage": 31.44, "elapsed_time": "0:13:48", "remaining_time": "0:30:06"} +{"current_steps": 767, "total_steps": 2436, "loss": 3.4030704498291016, "lr": 8.66479512061044e-06, "epoch": 0.9445812807881774, "percentage": 31.49, "elapsed_time": "0:13:49", "remaining_time": "0:30:05"} +{"current_steps": 768, "total_steps": 2436, "loss": 2.8890881538391113, "lr": 8.659916495239302e-06, "epoch": 0.9458128078817734, "percentage": 31.53, "elapsed_time": "0:13:50", "remaining_time": "0:30:04"} +{"current_steps": 769, "total_steps": 2436, "loss": 2.6665287017822266, "lr": 8.655030352085816e-06, "epoch": 0.9470443349753694, "percentage": 31.57, "elapsed_time": "0:13:51", "remaining_time": "0:30:03"} +{"current_steps": 770, "total_steps": 2436, "loss": 2.8044798374176025, "lr": 8.650136701186537e-06, "epoch": 0.9482758620689655, "percentage": 31.61, "elapsed_time": "0:13:52", "remaining_time": "0:30:02"} +{"current_steps": 771, "total_steps": 2436, "loss": 2.809295654296875, "lr": 8.645235552593447e-06, "epoch": 0.9495073891625616, "percentage": 31.65, "elapsed_time": "0:13:53", "remaining_time": "0:30:01"} +{"current_steps": 772, "total_steps": 2436, "loss": 2.66239070892334, "lr": 8.640326916373923e-06, "epoch": 0.9507389162561576, "percentage": 31.69, "elapsed_time": "0:13:55", "remaining_time": "0:29:59"} +{"current_steps": 773, "total_steps": 2436, "loss": 3.0714645385742188, "lr": 8.635410802610724e-06, "epoch": 0.9519704433497537, "percentage": 31.73, "elapsed_time": "0:13:56", "remaining_time": "0:29:58"} +{"current_steps": 774, "total_steps": 2436, "loss": 2.5254178047180176, "lr": 8.630487221401974e-06, "epoch": 0.9532019704433498, "percentage": 31.77, "elapsed_time": "0:13:57", "remaining_time": "0:29:57"} +{"current_steps": 775, "total_steps": 2436, "loss": 2.4160585403442383, "lr": 8.625556182861126e-06, "epoch": 0.9544334975369458, "percentage": 31.81, "elapsed_time": "0:13:58", "remaining_time": "0:29:56"} +{"current_steps": 776, "total_steps": 2436, "loss": 2.972367763519287, "lr": 8.620617697116957e-06, "epoch": 0.9556650246305419, "percentage": 31.86, "elapsed_time": "0:13:59", "remaining_time": "0:29:55"} +{"current_steps": 777, "total_steps": 2436, "loss": 2.9206340312957764, "lr": 8.615671774313543e-06, "epoch": 0.9568965517241379, "percentage": 31.9, "elapsed_time": "0:14:00", "remaining_time": "0:29:54"} +{"current_steps": 778, "total_steps": 2436, "loss": 3.192002296447754, "lr": 8.61071842461023e-06, "epoch": 0.958128078817734, "percentage": 31.94, "elapsed_time": "0:14:01", "remaining_time": "0:29:53"} +{"current_steps": 779, "total_steps": 2436, "loss": 3.0840883255004883, "lr": 8.605757658181626e-06, "epoch": 0.9593596059113301, "percentage": 31.98, "elapsed_time": "0:14:02", "remaining_time": "0:29:52"} +{"current_steps": 780, "total_steps": 2436, "loss": 3.344426155090332, "lr": 8.60078948521757e-06, "epoch": 0.9605911330049262, "percentage": 32.02, "elapsed_time": "0:14:03", "remaining_time": "0:29:51"} +{"current_steps": 781, "total_steps": 2436, "loss": 2.887132406234741, "lr": 8.595813915923113e-06, "epoch": 0.9618226600985221, "percentage": 32.06, "elapsed_time": "0:14:04", "remaining_time": "0:29:50"} +{"current_steps": 782, "total_steps": 2436, "loss": 2.354299306869507, "lr": 8.590830960518502e-06, "epoch": 0.9630541871921182, "percentage": 32.1, "elapsed_time": "0:14:05", "remaining_time": "0:29:49"} +{"current_steps": 783, "total_steps": 2436, "loss": 2.574817657470703, "lr": 8.585840629239158e-06, "epoch": 0.9642857142857143, "percentage": 32.14, "elapsed_time": "0:14:06", "remaining_time": "0:29:47"} +{"current_steps": 784, "total_steps": 2436, "loss": 2.3363120555877686, "lr": 8.580842932335644e-06, "epoch": 0.9655172413793104, "percentage": 32.18, "elapsed_time": "0:14:08", "remaining_time": "0:29:46"} +{"current_steps": 785, "total_steps": 2436, "loss": 2.452828884124756, "lr": 8.575837880073663e-06, "epoch": 0.9667487684729064, "percentage": 32.22, "elapsed_time": "0:14:09", "remaining_time": "0:29:45"} +{"current_steps": 786, "total_steps": 2436, "loss": 2.8182177543640137, "lr": 8.57082548273402e-06, "epoch": 0.9679802955665024, "percentage": 32.27, "elapsed_time": "0:14:10", "remaining_time": "0:29:44"} +{"current_steps": 787, "total_steps": 2436, "loss": 3.2871310710906982, "lr": 8.565805750612607e-06, "epoch": 0.9692118226600985, "percentage": 32.31, "elapsed_time": "0:14:11", "remaining_time": "0:29:43"} +{"current_steps": 788, "total_steps": 2436, "loss": 2.959153175354004, "lr": 8.560778694020387e-06, "epoch": 0.9704433497536946, "percentage": 32.35, "elapsed_time": "0:14:12", "remaining_time": "0:29:42"} +{"current_steps": 789, "total_steps": 2436, "loss": 2.859107732772827, "lr": 8.555744323283364e-06, "epoch": 0.9716748768472906, "percentage": 32.39, "elapsed_time": "0:14:13", "remaining_time": "0:29:41"} +{"current_steps": 790, "total_steps": 2436, "loss": 2.8537421226501465, "lr": 8.550702648742566e-06, "epoch": 0.9729064039408867, "percentage": 32.43, "elapsed_time": "0:14:14", "remaining_time": "0:29:40"} +{"current_steps": 791, "total_steps": 2436, "loss": 2.77693772315979, "lr": 8.545653680754029e-06, "epoch": 0.9741379310344828, "percentage": 32.47, "elapsed_time": "0:14:15", "remaining_time": "0:29:39"} +{"current_steps": 792, "total_steps": 2436, "loss": 2.6960999965667725, "lr": 8.540597429688761e-06, "epoch": 0.9753694581280788, "percentage": 32.51, "elapsed_time": "0:14:16", "remaining_time": "0:29:38"} +{"current_steps": 793, "total_steps": 2436, "loss": 3.3942298889160156, "lr": 8.535533905932739e-06, "epoch": 0.9766009852216748, "percentage": 32.55, "elapsed_time": "0:14:17", "remaining_time": "0:29:37"} +{"current_steps": 794, "total_steps": 2436, "loss": 2.8664398193359375, "lr": 8.530463119886871e-06, "epoch": 0.9778325123152709, "percentage": 32.59, "elapsed_time": "0:14:18", "remaining_time": "0:29:35"} +{"current_steps": 795, "total_steps": 2436, "loss": 3.023148536682129, "lr": 8.525385081966992e-06, "epoch": 0.979064039408867, "percentage": 32.64, "elapsed_time": "0:14:19", "remaining_time": "0:29:34"} +{"current_steps": 796, "total_steps": 2436, "loss": 2.7858657836914062, "lr": 8.520299802603826e-06, "epoch": 0.9802955665024631, "percentage": 32.68, "elapsed_time": "0:14:20", "remaining_time": "0:29:33"} +{"current_steps": 797, "total_steps": 2436, "loss": 2.4665451049804688, "lr": 8.515207292242969e-06, "epoch": 0.9815270935960592, "percentage": 32.72, "elapsed_time": "0:14:22", "remaining_time": "0:29:32"} +{"current_steps": 798, "total_steps": 2436, "loss": 2.412269115447998, "lr": 8.510107561344876e-06, "epoch": 0.9827586206896551, "percentage": 32.76, "elapsed_time": "0:14:23", "remaining_time": "0:29:31"} +{"current_steps": 799, "total_steps": 2436, "loss": 3.08200740814209, "lr": 8.505000620384834e-06, "epoch": 0.9839901477832512, "percentage": 32.8, "elapsed_time": "0:14:24", "remaining_time": "0:29:30"} +{"current_steps": 800, "total_steps": 2436, "loss": 2.851126194000244, "lr": 8.499886479852935e-06, "epoch": 0.9852216748768473, "percentage": 32.84, "elapsed_time": "0:14:25", "remaining_time": "0:29:29"} +{"current_steps": 801, "total_steps": 2436, "loss": 2.7692008018493652, "lr": 8.494765150254063e-06, "epoch": 0.9864532019704434, "percentage": 32.88, "elapsed_time": "0:14:26", "remaining_time": "0:29:28"} +{"current_steps": 802, "total_steps": 2436, "loss": 2.045649290084839, "lr": 8.489636642107867e-06, "epoch": 0.9876847290640394, "percentage": 32.92, "elapsed_time": "0:14:27", "remaining_time": "0:29:27"} +{"current_steps": 803, "total_steps": 2436, "loss": 3.0901870727539062, "lr": 8.484500965948746e-06, "epoch": 0.9889162561576355, "percentage": 32.96, "elapsed_time": "0:14:28", "remaining_time": "0:29:26"} +{"current_steps": 804, "total_steps": 2436, "loss": 4.652253150939941, "lr": 8.479358132325815e-06, "epoch": 0.9901477832512315, "percentage": 33.0, "elapsed_time": "0:14:29", "remaining_time": "0:29:25"} +{"current_steps": 805, "total_steps": 2436, "loss": 3.992189884185791, "lr": 8.474208151802898e-06, "epoch": 0.9913793103448276, "percentage": 33.05, "elapsed_time": "0:14:30", "remaining_time": "0:29:23"} +{"current_steps": 806, "total_steps": 2436, "loss": 2.7150464057922363, "lr": 8.469051034958496e-06, "epoch": 0.9926108374384236, "percentage": 33.09, "elapsed_time": "0:14:31", "remaining_time": "0:29:22"} +{"current_steps": 807, "total_steps": 2436, "loss": 2.807770013809204, "lr": 8.46388679238577e-06, "epoch": 0.9938423645320197, "percentage": 33.13, "elapsed_time": "0:14:32", "remaining_time": "0:29:21"} +{"current_steps": 808, "total_steps": 2436, "loss": 2.386625289916992, "lr": 8.458715434692515e-06, "epoch": 0.9950738916256158, "percentage": 33.17, "elapsed_time": "0:14:33", "remaining_time": "0:29:20"} +{"current_steps": 809, "total_steps": 2436, "loss": 2.585855484008789, "lr": 8.453536972501146e-06, "epoch": 0.9963054187192119, "percentage": 33.21, "elapsed_time": "0:14:34", "remaining_time": "0:29:19"} +{"current_steps": 810, "total_steps": 2436, "loss": 1.9756630659103394, "lr": 8.448351416448664e-06, "epoch": 0.9975369458128078, "percentage": 33.25, "elapsed_time": "0:14:36", "remaining_time": "0:29:18"} +{"current_steps": 811, "total_steps": 2436, "loss": 2.844794511795044, "lr": 8.443158777186652e-06, "epoch": 0.9987684729064039, "percentage": 33.29, "elapsed_time": "0:14:37", "remaining_time": "0:29:17"} +{"current_steps": 812, "total_steps": 2436, "loss": 2.8835721015930176, "lr": 8.437959065381232e-06, "epoch": 1.0, "percentage": 33.33, "elapsed_time": "0:14:38", "remaining_time": "0:29:16"} +{"current_steps": 813, "total_steps": 2436, "loss": 1.4173179864883423, "lr": 8.432752291713058e-06, "epoch": 1.001231527093596, "percentage": 33.37, "elapsed_time": "0:15:51", "remaining_time": "0:31:39"} +{"current_steps": 814, "total_steps": 2436, "loss": 1.3743655681610107, "lr": 8.427538466877294e-06, "epoch": 1.0024630541871922, "percentage": 33.42, "elapsed_time": "0:15:52", "remaining_time": "0:31:38"} +{"current_steps": 815, "total_steps": 2436, "loss": 1.448968768119812, "lr": 8.422317601583576e-06, "epoch": 1.0036945812807883, "percentage": 33.46, "elapsed_time": "0:15:53", "remaining_time": "0:31:37"} +{"current_steps": 816, "total_steps": 2436, "loss": 1.4555410146713257, "lr": 8.417089706556015e-06, "epoch": 1.0049261083743843, "percentage": 33.5, "elapsed_time": "0:15:54", "remaining_time": "0:31:35"} +{"current_steps": 817, "total_steps": 2436, "loss": 1.3096075057983398, "lr": 8.411854792533154e-06, "epoch": 1.0061576354679802, "percentage": 33.54, "elapsed_time": "0:15:56", "remaining_time": "0:31:34"} +{"current_steps": 818, "total_steps": 2436, "loss": 1.8452348709106445, "lr": 8.406612870267957e-06, "epoch": 1.0073891625615763, "percentage": 33.58, "elapsed_time": "0:15:57", "remaining_time": "0:31:33"} +{"current_steps": 819, "total_steps": 2436, "loss": 1.6339285373687744, "lr": 8.401363950527777e-06, "epoch": 1.0086206896551724, "percentage": 33.62, "elapsed_time": "0:15:58", "remaining_time": "0:31:31"} +{"current_steps": 820, "total_steps": 2436, "loss": 1.714133381843567, "lr": 8.39610804409435e-06, "epoch": 1.0098522167487685, "percentage": 33.66, "elapsed_time": "0:15:59", "remaining_time": "0:31:30"} +{"current_steps": 821, "total_steps": 2436, "loss": 1.7810550928115845, "lr": 8.390845161763756e-06, "epoch": 1.0110837438423645, "percentage": 33.7, "elapsed_time": "0:16:00", "remaining_time": "0:31:29"} +{"current_steps": 822, "total_steps": 2436, "loss": 1.2523250579833984, "lr": 8.385575314346408e-06, "epoch": 1.0123152709359606, "percentage": 33.74, "elapsed_time": "0:16:01", "remaining_time": "0:31:27"} +{"current_steps": 823, "total_steps": 2436, "loss": 1.4618515968322754, "lr": 8.380298512667023e-06, "epoch": 1.0135467980295567, "percentage": 33.78, "elapsed_time": "0:16:02", "remaining_time": "0:31:26"} +{"current_steps": 824, "total_steps": 2436, "loss": 1.5188508033752441, "lr": 8.375014767564606e-06, "epoch": 1.0147783251231528, "percentage": 33.83, "elapsed_time": "0:16:03", "remaining_time": "0:31:25"} +{"current_steps": 825, "total_steps": 2436, "loss": 1.3847301006317139, "lr": 8.369724089892423e-06, "epoch": 1.0160098522167487, "percentage": 33.87, "elapsed_time": "0:16:04", "remaining_time": "0:31:23"} +{"current_steps": 826, "total_steps": 2436, "loss": 1.2926149368286133, "lr": 8.364426490517978e-06, "epoch": 1.0172413793103448, "percentage": 33.91, "elapsed_time": "0:16:05", "remaining_time": "0:31:22"} +{"current_steps": 827, "total_steps": 2436, "loss": 2.3063907623291016, "lr": 8.359121980322992e-06, "epoch": 1.0184729064039408, "percentage": 33.95, "elapsed_time": "0:16:06", "remaining_time": "0:31:21"} +{"current_steps": 828, "total_steps": 2436, "loss": 1.8268505334854126, "lr": 8.353810570203392e-06, "epoch": 1.019704433497537, "percentage": 33.99, "elapsed_time": "0:16:07", "remaining_time": "0:31:19"} +{"current_steps": 829, "total_steps": 2436, "loss": 1.7018903493881226, "lr": 8.34849227106926e-06, "epoch": 1.020935960591133, "percentage": 34.03, "elapsed_time": "0:16:09", "remaining_time": "0:31:18"} +{"current_steps": 830, "total_steps": 2436, "loss": 1.228044867515564, "lr": 8.343167093844847e-06, "epoch": 1.022167487684729, "percentage": 34.07, "elapsed_time": "0:16:10", "remaining_time": "0:31:17"} +{"current_steps": 831, "total_steps": 2436, "loss": 1.8953372240066528, "lr": 8.337835049468517e-06, "epoch": 1.0233990147783252, "percentage": 34.11, "elapsed_time": "0:16:11", "remaining_time": "0:31:15"} +{"current_steps": 832, "total_steps": 2436, "loss": 2.2595765590667725, "lr": 8.332496148892748e-06, "epoch": 1.0246305418719213, "percentage": 34.15, "elapsed_time": "0:16:12", "remaining_time": "0:31:14"} +{"current_steps": 833, "total_steps": 2436, "loss": 1.9772108793258667, "lr": 8.327150403084105e-06, "epoch": 1.0258620689655173, "percentage": 34.2, "elapsed_time": "0:16:13", "remaining_time": "0:31:13"} +{"current_steps": 834, "total_steps": 2436, "loss": 1.6397690773010254, "lr": 8.321797823023201e-06, "epoch": 1.0270935960591132, "percentage": 34.24, "elapsed_time": "0:16:14", "remaining_time": "0:31:11"} +{"current_steps": 835, "total_steps": 2436, "loss": 1.8092628717422485, "lr": 8.3164384197047e-06, "epoch": 1.0283251231527093, "percentage": 34.28, "elapsed_time": "0:16:15", "remaining_time": "0:31:10"} +{"current_steps": 836, "total_steps": 2436, "loss": 1.4974594116210938, "lr": 8.311072204137272e-06, "epoch": 1.0295566502463054, "percentage": 34.32, "elapsed_time": "0:16:16", "remaining_time": "0:31:09"} +{"current_steps": 837, "total_steps": 2436, "loss": 1.6198664903640747, "lr": 8.305699187343586e-06, "epoch": 1.0307881773399015, "percentage": 34.36, "elapsed_time": "0:16:17", "remaining_time": "0:31:07"} +{"current_steps": 838, "total_steps": 2436, "loss": 1.3746960163116455, "lr": 8.300319380360278e-06, "epoch": 1.0320197044334976, "percentage": 34.4, "elapsed_time": "0:16:18", "remaining_time": "0:31:06"} +{"current_steps": 839, "total_steps": 2436, "loss": 1.6171293258666992, "lr": 8.294932794237936e-06, "epoch": 1.0332512315270936, "percentage": 34.44, "elapsed_time": "0:16:19", "remaining_time": "0:31:05"} +{"current_steps": 840, "total_steps": 2436, "loss": 1.569738507270813, "lr": 8.289539440041066e-06, "epoch": 1.0344827586206897, "percentage": 34.48, "elapsed_time": "0:16:20", "remaining_time": "0:31:03"} +{"current_steps": 841, "total_steps": 2436, "loss": 1.2823517322540283, "lr": 8.284139328848083e-06, "epoch": 1.0357142857142858, "percentage": 34.52, "elapsed_time": "0:16:21", "remaining_time": "0:31:02"} +{"current_steps": 842, "total_steps": 2436, "loss": 1.646303415298462, "lr": 8.278732471751275e-06, "epoch": 1.0369458128078817, "percentage": 34.56, "elapsed_time": "0:16:23", "remaining_time": "0:31:01"} +{"current_steps": 843, "total_steps": 2436, "loss": 1.1557375192642212, "lr": 8.273318879856794e-06, "epoch": 1.0381773399014778, "percentage": 34.61, "elapsed_time": "0:16:24", "remaining_time": "0:30:59"} +{"current_steps": 844, "total_steps": 2436, "loss": 1.8793773651123047, "lr": 8.26789856428462e-06, "epoch": 1.0394088669950738, "percentage": 34.65, "elapsed_time": "0:16:25", "remaining_time": "0:30:58"} +{"current_steps": 845, "total_steps": 2436, "loss": 1.8577170372009277, "lr": 8.262471536168547e-06, "epoch": 1.04064039408867, "percentage": 34.69, "elapsed_time": "0:16:26", "remaining_time": "0:30:57"} +{"current_steps": 846, "total_steps": 2436, "loss": 1.6104650497436523, "lr": 8.257037806656156e-06, "epoch": 1.041871921182266, "percentage": 34.73, "elapsed_time": "0:16:27", "remaining_time": "0:30:55"} +{"current_steps": 847, "total_steps": 2436, "loss": 1.5425922870635986, "lr": 8.251597386908791e-06, "epoch": 1.043103448275862, "percentage": 34.77, "elapsed_time": "0:16:28", "remaining_time": "0:30:54"} +{"current_steps": 848, "total_steps": 2436, "loss": 1.681383728981018, "lr": 8.246150288101544e-06, "epoch": 1.0443349753694582, "percentage": 34.81, "elapsed_time": "0:16:29", "remaining_time": "0:30:53"} +{"current_steps": 849, "total_steps": 2436, "loss": 1.7646219730377197, "lr": 8.240696521423221e-06, "epoch": 1.0455665024630543, "percentage": 34.85, "elapsed_time": "0:16:30", "remaining_time": "0:30:51"} +{"current_steps": 850, "total_steps": 2436, "loss": 1.445223331451416, "lr": 8.23523609807633e-06, "epoch": 1.0467980295566504, "percentage": 34.89, "elapsed_time": "0:16:31", "remaining_time": "0:30:50"} +{"current_steps": 851, "total_steps": 2436, "loss": 0.9492518901824951, "lr": 8.229769029277044e-06, "epoch": 1.0480295566502462, "percentage": 34.93, "elapsed_time": "0:16:32", "remaining_time": "0:30:49"} +{"current_steps": 852, "total_steps": 2436, "loss": 1.33433997631073, "lr": 8.224295326255194e-06, "epoch": 1.0492610837438423, "percentage": 34.98, "elapsed_time": "0:16:33", "remaining_time": "0:30:47"} +{"current_steps": 853, "total_steps": 2436, "loss": 1.712221384048462, "lr": 8.218815000254233e-06, "epoch": 1.0504926108374384, "percentage": 35.02, "elapsed_time": "0:16:34", "remaining_time": "0:30:46"} +{"current_steps": 854, "total_steps": 2436, "loss": 2.256254196166992, "lr": 8.213328062531223e-06, "epoch": 1.0517241379310345, "percentage": 35.06, "elapsed_time": "0:16:36", "remaining_time": "0:30:45"} +{"current_steps": 855, "total_steps": 2436, "loss": 1.1827871799468994, "lr": 8.207834524356804e-06, "epoch": 1.0529556650246306, "percentage": 35.1, "elapsed_time": "0:16:37", "remaining_time": "0:30:43"} +{"current_steps": 856, "total_steps": 2436, "loss": 1.831944465637207, "lr": 8.202334397015173e-06, "epoch": 1.0541871921182266, "percentage": 35.14, "elapsed_time": "0:16:38", "remaining_time": "0:30:42"} +{"current_steps": 857, "total_steps": 2436, "loss": 1.4239716529846191, "lr": 8.196827691804066e-06, "epoch": 1.0554187192118227, "percentage": 35.18, "elapsed_time": "0:16:39", "remaining_time": "0:30:41"} +{"current_steps": 858, "total_steps": 2436, "loss": 1.4468379020690918, "lr": 8.191314420034728e-06, "epoch": 1.0566502463054188, "percentage": 35.22, "elapsed_time": "0:16:40", "remaining_time": "0:30:39"} +{"current_steps": 859, "total_steps": 2436, "loss": 1.5082018375396729, "lr": 8.185794593031889e-06, "epoch": 1.0578817733990147, "percentage": 35.26, "elapsed_time": "0:16:41", "remaining_time": "0:30:38"} +{"current_steps": 860, "total_steps": 2436, "loss": 1.7838118076324463, "lr": 8.180268222133748e-06, "epoch": 1.0591133004926108, "percentage": 35.3, "elapsed_time": "0:16:42", "remaining_time": "0:30:37"} +{"current_steps": 861, "total_steps": 2436, "loss": 2.0072226524353027, "lr": 8.174735318691946e-06, "epoch": 1.0603448275862069, "percentage": 35.34, "elapsed_time": "0:16:43", "remaining_time": "0:30:35"} +{"current_steps": 862, "total_steps": 2436, "loss": 1.521295189857483, "lr": 8.16919589407154e-06, "epoch": 1.061576354679803, "percentage": 35.39, "elapsed_time": "0:16:44", "remaining_time": "0:30:34"} +{"current_steps": 863, "total_steps": 2436, "loss": 1.790357232093811, "lr": 8.163649959650983e-06, "epoch": 1.062807881773399, "percentage": 35.43, "elapsed_time": "0:16:45", "remaining_time": "0:30:33"} +{"current_steps": 864, "total_steps": 2436, "loss": 1.602294683456421, "lr": 8.1580975268221e-06, "epoch": 1.064039408866995, "percentage": 35.47, "elapsed_time": "0:16:46", "remaining_time": "0:30:31"} +{"current_steps": 865, "total_steps": 2436, "loss": 1.4220796823501587, "lr": 8.152538606990065e-06, "epoch": 1.0652709359605912, "percentage": 35.51, "elapsed_time": "0:16:47", "remaining_time": "0:30:30"} +{"current_steps": 866, "total_steps": 2436, "loss": 1.5728261470794678, "lr": 8.146973211573378e-06, "epoch": 1.0665024630541873, "percentage": 35.55, "elapsed_time": "0:16:49", "remaining_time": "0:30:29"} +{"current_steps": 867, "total_steps": 2436, "loss": 1.4759845733642578, "lr": 8.141401352003834e-06, "epoch": 1.0677339901477834, "percentage": 35.59, "elapsed_time": "0:16:50", "remaining_time": "0:30:27"} +{"current_steps": 868, "total_steps": 2436, "loss": 1.0524405241012573, "lr": 8.135823039726513e-06, "epoch": 1.0689655172413792, "percentage": 35.63, "elapsed_time": "0:16:51", "remaining_time": "0:30:26"} +{"current_steps": 869, "total_steps": 2436, "loss": 1.538460373878479, "lr": 8.130238286199747e-06, "epoch": 1.0701970443349753, "percentage": 35.67, "elapsed_time": "0:16:52", "remaining_time": "0:30:25"} +{"current_steps": 870, "total_steps": 2436, "loss": 1.1455146074295044, "lr": 8.124647102895098e-06, "epoch": 1.0714285714285714, "percentage": 35.71, "elapsed_time": "0:16:53", "remaining_time": "0:30:24"} +{"current_steps": 871, "total_steps": 2436, "loss": 1.5209722518920898, "lr": 8.119049501297336e-06, "epoch": 1.0726600985221675, "percentage": 35.76, "elapsed_time": "0:16:54", "remaining_time": "0:30:22"} +{"current_steps": 872, "total_steps": 2436, "loss": 1.359959602355957, "lr": 8.113445492904416e-06, "epoch": 1.0738916256157636, "percentage": 35.8, "elapsed_time": "0:16:55", "remaining_time": "0:30:21"} +{"current_steps": 873, "total_steps": 2436, "loss": 0.7508935928344727, "lr": 8.107835089227446e-06, "epoch": 1.0751231527093597, "percentage": 35.84, "elapsed_time": "0:16:56", "remaining_time": "0:30:20"} +{"current_steps": 874, "total_steps": 2436, "loss": 1.1200660467147827, "lr": 8.102218301790686e-06, "epoch": 1.0763546798029557, "percentage": 35.88, "elapsed_time": "0:16:57", "remaining_time": "0:30:18"} +{"current_steps": 875, "total_steps": 2436, "loss": 1.4502555131912231, "lr": 8.096595142131491e-06, "epoch": 1.0775862068965518, "percentage": 35.92, "elapsed_time": "0:16:58", "remaining_time": "0:30:17"} +{"current_steps": 876, "total_steps": 2436, "loss": 1.4533472061157227, "lr": 8.090965621800317e-06, "epoch": 1.0788177339901477, "percentage": 35.96, "elapsed_time": "0:16:59", "remaining_time": "0:30:16"} +{"current_steps": 877, "total_steps": 2436, "loss": 1.3467981815338135, "lr": 8.085329752360683e-06, "epoch": 1.0800492610837438, "percentage": 36.0, "elapsed_time": "0:17:00", "remaining_time": "0:30:14"} +{"current_steps": 878, "total_steps": 2436, "loss": 1.5720915794372559, "lr": 8.079687545389144e-06, "epoch": 1.0812807881773399, "percentage": 36.04, "elapsed_time": "0:17:02", "remaining_time": "0:30:13"} +{"current_steps": 879, "total_steps": 2436, "loss": 0.9794504642486572, "lr": 8.074039012475277e-06, "epoch": 1.082512315270936, "percentage": 36.08, "elapsed_time": "0:17:03", "remaining_time": "0:30:12"} +{"current_steps": 880, "total_steps": 2436, "loss": 1.8581080436706543, "lr": 8.068384165221657e-06, "epoch": 1.083743842364532, "percentage": 36.12, "elapsed_time": "0:17:04", "remaining_time": "0:30:10"} +{"current_steps": 881, "total_steps": 2436, "loss": 1.5307658910751343, "lr": 8.062723015243821e-06, "epoch": 1.0849753694581281, "percentage": 36.17, "elapsed_time": "0:17:05", "remaining_time": "0:30:09"} +{"current_steps": 882, "total_steps": 2436, "loss": 2.7890782356262207, "lr": 8.05705557417026e-06, "epoch": 1.0862068965517242, "percentage": 36.21, "elapsed_time": "0:17:06", "remaining_time": "0:30:08"} +{"current_steps": 883, "total_steps": 2436, "loss": 1.7938904762268066, "lr": 8.051381853642385e-06, "epoch": 1.0874384236453203, "percentage": 36.25, "elapsed_time": "0:17:07", "remaining_time": "0:30:07"} +{"current_steps": 884, "total_steps": 2436, "loss": 1.7228388786315918, "lr": 8.0457018653145e-06, "epoch": 1.0886699507389164, "percentage": 36.29, "elapsed_time": "0:17:08", "remaining_time": "0:30:05"} +{"current_steps": 885, "total_steps": 2436, "loss": 1.2761911153793335, "lr": 8.04001562085379e-06, "epoch": 1.0899014778325122, "percentage": 36.33, "elapsed_time": "0:17:09", "remaining_time": "0:30:04"} +{"current_steps": 886, "total_steps": 2436, "loss": 1.2001762390136719, "lr": 8.034323131940288e-06, "epoch": 1.0911330049261083, "percentage": 36.37, "elapsed_time": "0:17:10", "remaining_time": "0:30:03"} +{"current_steps": 887, "total_steps": 2436, "loss": 1.0602792501449585, "lr": 8.028624410266856e-06, "epoch": 1.0923645320197044, "percentage": 36.41, "elapsed_time": "0:17:11", "remaining_time": "0:30:01"} +{"current_steps": 888, "total_steps": 2436, "loss": 1.6093053817749023, "lr": 8.022919467539157e-06, "epoch": 1.0935960591133005, "percentage": 36.45, "elapsed_time": "0:17:12", "remaining_time": "0:30:00"} +{"current_steps": 889, "total_steps": 2436, "loss": 1.3845837116241455, "lr": 8.017208315475633e-06, "epoch": 1.0948275862068966, "percentage": 36.49, "elapsed_time": "0:17:13", "remaining_time": "0:29:59"} +{"current_steps": 890, "total_steps": 2436, "loss": 1.170523762702942, "lr": 8.011490965807479e-06, "epoch": 1.0960591133004927, "percentage": 36.54, "elapsed_time": "0:17:14", "remaining_time": "0:29:57"} +{"current_steps": 891, "total_steps": 2436, "loss": 2.2524640560150146, "lr": 8.005767430278619e-06, "epoch": 1.0972906403940887, "percentage": 36.58, "elapsed_time": "0:17:16", "remaining_time": "0:29:56"} +{"current_steps": 892, "total_steps": 2436, "loss": 1.900492787361145, "lr": 8.00003772064569e-06, "epoch": 1.0985221674876848, "percentage": 36.62, "elapsed_time": "0:17:17", "remaining_time": "0:29:55"} +{"current_steps": 893, "total_steps": 2436, "loss": 1.9371180534362793, "lr": 7.994301848678006e-06, "epoch": 1.0997536945812807, "percentage": 36.66, "elapsed_time": "0:17:18", "remaining_time": "0:29:53"} +{"current_steps": 894, "total_steps": 2436, "loss": 0.5737314224243164, "lr": 7.98855982615754e-06, "epoch": 1.1009852216748768, "percentage": 36.7, "elapsed_time": "0:17:19", "remaining_time": "0:29:52"} +{"current_steps": 895, "total_steps": 2436, "loss": 1.9806501865386963, "lr": 7.982811664878897e-06, "epoch": 1.1022167487684729, "percentage": 36.74, "elapsed_time": "0:17:20", "remaining_time": "0:29:51"} +{"current_steps": 896, "total_steps": 2436, "loss": 1.0362755060195923, "lr": 7.977057376649295e-06, "epoch": 1.103448275862069, "percentage": 36.78, "elapsed_time": "0:17:21", "remaining_time": "0:29:49"} +{"current_steps": 897, "total_steps": 2436, "loss": 1.70633864402771, "lr": 7.971296973288534e-06, "epoch": 1.104679802955665, "percentage": 36.82, "elapsed_time": "0:17:22", "remaining_time": "0:29:48"} +{"current_steps": 898, "total_steps": 2436, "loss": 1.787100911140442, "lr": 7.965530466628977e-06, "epoch": 1.1059113300492611, "percentage": 36.86, "elapsed_time": "0:17:23", "remaining_time": "0:29:47"} +{"current_steps": 899, "total_steps": 2436, "loss": 1.725630283355713, "lr": 7.959757868515526e-06, "epoch": 1.1071428571428572, "percentage": 36.9, "elapsed_time": "0:17:24", "remaining_time": "0:29:46"} +{"current_steps": 900, "total_steps": 2436, "loss": 1.216347575187683, "lr": 7.953979190805587e-06, "epoch": 1.1083743842364533, "percentage": 36.95, "elapsed_time": "0:17:25", "remaining_time": "0:29:44"} +{"current_steps": 901, "total_steps": 2436, "loss": 1.4683033227920532, "lr": 7.948194445369065e-06, "epoch": 1.1096059113300494, "percentage": 36.99, "elapsed_time": "0:17:26", "remaining_time": "0:29:43"} +{"current_steps": 902, "total_steps": 2436, "loss": 1.1516010761260986, "lr": 7.942403644088319e-06, "epoch": 1.1108374384236452, "percentage": 37.03, "elapsed_time": "0:17:27", "remaining_time": "0:29:42"} +{"current_steps": 903, "total_steps": 2436, "loss": 1.9040346145629883, "lr": 7.936606798858154e-06, "epoch": 1.1120689655172413, "percentage": 37.07, "elapsed_time": "0:17:29", "remaining_time": "0:29:40"} +{"current_steps": 904, "total_steps": 2436, "loss": 1.3092480897903442, "lr": 7.930803921585787e-06, "epoch": 1.1133004926108374, "percentage": 37.11, "elapsed_time": "0:17:30", "remaining_time": "0:29:39"} +{"current_steps": 905, "total_steps": 2436, "loss": 1.5384130477905273, "lr": 7.924995024190825e-06, "epoch": 1.1145320197044335, "percentage": 37.15, "elapsed_time": "0:17:31", "remaining_time": "0:29:38"} +{"current_steps": 906, "total_steps": 2436, "loss": 1.537634015083313, "lr": 7.91918011860524e-06, "epoch": 1.1157635467980296, "percentage": 37.19, "elapsed_time": "0:17:32", "remaining_time": "0:29:36"} +{"current_steps": 907, "total_steps": 2436, "loss": 1.7487473487854004, "lr": 7.91335921677335e-06, "epoch": 1.1169950738916257, "percentage": 37.23, "elapsed_time": "0:17:33", "remaining_time": "0:29:35"} +{"current_steps": 908, "total_steps": 2436, "loss": 2.079786539077759, "lr": 7.907532330651784e-06, "epoch": 1.1182266009852218, "percentage": 37.27, "elapsed_time": "0:17:34", "remaining_time": "0:29:34"} +{"current_steps": 909, "total_steps": 2436, "loss": 1.8143104314804077, "lr": 7.901699472209467e-06, "epoch": 1.1194581280788178, "percentage": 37.32, "elapsed_time": "0:17:35", "remaining_time": "0:29:33"} +{"current_steps": 910, "total_steps": 2436, "loss": 1.532914161682129, "lr": 7.89586065342759e-06, "epoch": 1.1206896551724137, "percentage": 37.36, "elapsed_time": "0:17:36", "remaining_time": "0:29:31"} +{"current_steps": 911, "total_steps": 2436, "loss": 1.2643623352050781, "lr": 7.890015886299587e-06, "epoch": 1.1219211822660098, "percentage": 37.4, "elapsed_time": "0:17:37", "remaining_time": "0:29:30"} +{"current_steps": 912, "total_steps": 2436, "loss": 1.9245643615722656, "lr": 7.884165182831112e-06, "epoch": 1.1231527093596059, "percentage": 37.44, "elapsed_time": "0:17:38", "remaining_time": "0:29:29"} +{"current_steps": 913, "total_steps": 2436, "loss": 1.7177766561508179, "lr": 7.878308555040012e-06, "epoch": 1.124384236453202, "percentage": 37.48, "elapsed_time": "0:17:39", "remaining_time": "0:29:27"} +{"current_steps": 914, "total_steps": 2436, "loss": 1.8152745962142944, "lr": 7.872446014956302e-06, "epoch": 1.125615763546798, "percentage": 37.52, "elapsed_time": "0:17:40", "remaining_time": "0:29:26"} +{"current_steps": 915, "total_steps": 2436, "loss": 1.1599400043487549, "lr": 7.86657757462214e-06, "epoch": 1.1268472906403941, "percentage": 37.56, "elapsed_time": "0:17:41", "remaining_time": "0:29:25"} +{"current_steps": 916, "total_steps": 2436, "loss": 2.191415786743164, "lr": 7.860703246091808e-06, "epoch": 1.1280788177339902, "percentage": 37.6, "elapsed_time": "0:17:43", "remaining_time": "0:29:23"} +{"current_steps": 917, "total_steps": 2436, "loss": 1.395401120185852, "lr": 7.85482304143168e-06, "epoch": 1.1293103448275863, "percentage": 37.64, "elapsed_time": "0:17:44", "remaining_time": "0:29:22"} +{"current_steps": 918, "total_steps": 2436, "loss": 1.3161064386367798, "lr": 7.848936972720203e-06, "epoch": 1.1305418719211824, "percentage": 37.68, "elapsed_time": "0:17:45", "remaining_time": "0:29:21"} +{"current_steps": 919, "total_steps": 2436, "loss": 1.1442368030548096, "lr": 7.843045052047863e-06, "epoch": 1.1317733990147782, "percentage": 37.73, "elapsed_time": "0:17:46", "remaining_time": "0:29:20"} +{"current_steps": 920, "total_steps": 2436, "loss": 1.7718126773834229, "lr": 7.837147291517172e-06, "epoch": 1.1330049261083743, "percentage": 37.77, "elapsed_time": "0:17:47", "remaining_time": "0:29:18"} +{"current_steps": 921, "total_steps": 2436, "loss": 0.8722761869430542, "lr": 7.831243703242636e-06, "epoch": 1.1342364532019704, "percentage": 37.81, "elapsed_time": "0:17:48", "remaining_time": "0:29:17"} +{"current_steps": 922, "total_steps": 2436, "loss": 1.5427806377410889, "lr": 7.825334299350733e-06, "epoch": 1.1354679802955665, "percentage": 37.85, "elapsed_time": "0:17:49", "remaining_time": "0:29:16"} +{"current_steps": 923, "total_steps": 2436, "loss": 1.1668936014175415, "lr": 7.819419091979884e-06, "epoch": 1.1366995073891626, "percentage": 37.89, "elapsed_time": "0:17:50", "remaining_time": "0:29:14"} +{"current_steps": 924, "total_steps": 2436, "loss": 1.1266424655914307, "lr": 7.813498093280432e-06, "epoch": 1.1379310344827587, "percentage": 37.93, "elapsed_time": "0:17:51", "remaining_time": "0:29:13"} +{"current_steps": 925, "total_steps": 2436, "loss": 1.493699550628662, "lr": 7.807571315414616e-06, "epoch": 1.1391625615763548, "percentage": 37.97, "elapsed_time": "0:17:52", "remaining_time": "0:29:12"} +{"current_steps": 926, "total_steps": 2436, "loss": 1.6297705173492432, "lr": 7.801638770556547e-06, "epoch": 1.1403940886699506, "percentage": 38.01, "elapsed_time": "0:17:53", "remaining_time": "0:29:10"} +{"current_steps": 927, "total_steps": 2436, "loss": 2.0215024948120117, "lr": 7.795700470892177e-06, "epoch": 1.1416256157635467, "percentage": 38.05, "elapsed_time": "0:17:54", "remaining_time": "0:29:09"} +{"current_steps": 928, "total_steps": 2436, "loss": 1.6887433528900146, "lr": 7.78975642861929e-06, "epoch": 1.1428571428571428, "percentage": 38.1, "elapsed_time": "0:17:55", "remaining_time": "0:29:08"} +{"current_steps": 929, "total_steps": 2436, "loss": 1.3021103143692017, "lr": 7.783806655947454e-06, "epoch": 1.1440886699507389, "percentage": 38.14, "elapsed_time": "0:17:57", "remaining_time": "0:29:07"} +{"current_steps": 930, "total_steps": 2436, "loss": 1.2565847635269165, "lr": 7.777851165098012e-06, "epoch": 1.145320197044335, "percentage": 38.18, "elapsed_time": "0:17:58", "remaining_time": "0:29:05"} +{"current_steps": 931, "total_steps": 2436, "loss": 2.616732358932495, "lr": 7.771889968304054e-06, "epoch": 1.146551724137931, "percentage": 38.22, "elapsed_time": "0:17:59", "remaining_time": "0:29:04"} +{"current_steps": 932, "total_steps": 2436, "loss": 1.4130675792694092, "lr": 7.765923077810389e-06, "epoch": 1.1477832512315271, "percentage": 38.26, "elapsed_time": "0:18:00", "remaining_time": "0:29:03"} +{"current_steps": 933, "total_steps": 2436, "loss": 1.4476386308670044, "lr": 7.759950505873523e-06, "epoch": 1.1490147783251232, "percentage": 38.3, "elapsed_time": "0:18:01", "remaining_time": "0:29:01"} +{"current_steps": 934, "total_steps": 2436, "loss": 2.25156307220459, "lr": 7.753972264761629e-06, "epoch": 1.1502463054187193, "percentage": 38.34, "elapsed_time": "0:18:02", "remaining_time": "0:29:00"} +{"current_steps": 935, "total_steps": 2436, "loss": 1.5051602125167847, "lr": 7.747988366754529e-06, "epoch": 1.1514778325123154, "percentage": 38.38, "elapsed_time": "0:18:03", "remaining_time": "0:28:59"} +{"current_steps": 936, "total_steps": 2436, "loss": 1.6275739669799805, "lr": 7.74199882414366e-06, "epoch": 1.1527093596059113, "percentage": 38.42, "elapsed_time": "0:18:04", "remaining_time": "0:28:58"} +{"current_steps": 937, "total_steps": 2436, "loss": 1.595947504043579, "lr": 7.736003649232058e-06, "epoch": 1.1539408866995073, "percentage": 38.46, "elapsed_time": "0:18:05", "remaining_time": "0:28:56"} +{"current_steps": 938, "total_steps": 2436, "loss": 1.4467124938964844, "lr": 7.730002854334328e-06, "epoch": 1.1551724137931034, "percentage": 38.51, "elapsed_time": "0:18:06", "remaining_time": "0:28:55"} +{"current_steps": 939, "total_steps": 2436, "loss": 1.2888911962509155, "lr": 7.723996451776615e-06, "epoch": 1.1564039408866995, "percentage": 38.55, "elapsed_time": "0:18:07", "remaining_time": "0:28:54"} +{"current_steps": 940, "total_steps": 2436, "loss": 1.2005081176757812, "lr": 7.717984453896585e-06, "epoch": 1.1576354679802956, "percentage": 38.59, "elapsed_time": "0:18:08", "remaining_time": "0:28:53"} +{"current_steps": 941, "total_steps": 2436, "loss": 1.5737872123718262, "lr": 7.711966873043396e-06, "epoch": 1.1588669950738917, "percentage": 38.63, "elapsed_time": "0:18:09", "remaining_time": "0:28:51"} +{"current_steps": 942, "total_steps": 2436, "loss": 1.929309368133545, "lr": 7.705943721577679e-06, "epoch": 1.1600985221674878, "percentage": 38.67, "elapsed_time": "0:18:11", "remaining_time": "0:28:50"} +{"current_steps": 943, "total_steps": 2436, "loss": 1.2395710945129395, "lr": 7.699915011871502e-06, "epoch": 1.1613300492610836, "percentage": 38.71, "elapsed_time": "0:18:12", "remaining_time": "0:28:49"} +{"current_steps": 944, "total_steps": 2436, "loss": 1.5058845281600952, "lr": 7.693880756308349e-06, "epoch": 1.1625615763546797, "percentage": 38.75, "elapsed_time": "0:18:13", "remaining_time": "0:28:47"} +{"current_steps": 945, "total_steps": 2436, "loss": 1.1811325550079346, "lr": 7.687840967283102e-06, "epoch": 1.1637931034482758, "percentage": 38.79, "elapsed_time": "0:18:14", "remaining_time": "0:28:46"} +{"current_steps": 946, "total_steps": 2436, "loss": 1.0631262063980103, "lr": 7.681795657202004e-06, "epoch": 1.1650246305418719, "percentage": 38.83, "elapsed_time": "0:18:15", "remaining_time": "0:28:45"} +{"current_steps": 947, "total_steps": 2436, "loss": 1.8445112705230713, "lr": 7.675744838482641e-06, "epoch": 1.166256157635468, "percentage": 38.88, "elapsed_time": "0:18:16", "remaining_time": "0:28:43"} +{"current_steps": 948, "total_steps": 2436, "loss": 0.4735199511051178, "lr": 7.669688523553913e-06, "epoch": 1.167487684729064, "percentage": 38.92, "elapsed_time": "0:18:17", "remaining_time": "0:28:42"} +{"current_steps": 949, "total_steps": 2436, "loss": 2.7862026691436768, "lr": 7.66362672485601e-06, "epoch": 1.1687192118226601, "percentage": 38.96, "elapsed_time": "0:18:18", "remaining_time": "0:28:41"} +{"current_steps": 950, "total_steps": 2436, "loss": 2.1690142154693604, "lr": 7.657559454840386e-06, "epoch": 1.1699507389162562, "percentage": 39.0, "elapsed_time": "0:18:19", "remaining_time": "0:28:40"} +{"current_steps": 951, "total_steps": 2436, "loss": 1.7143161296844482, "lr": 7.651486725969736e-06, "epoch": 1.1711822660098523, "percentage": 39.04, "elapsed_time": "0:18:20", "remaining_time": "0:28:38"} +{"current_steps": 952, "total_steps": 2436, "loss": 1.5288606882095337, "lr": 7.645408550717966e-06, "epoch": 1.1724137931034484, "percentage": 39.08, "elapsed_time": "0:18:21", "remaining_time": "0:28:37"} +{"current_steps": 953, "total_steps": 2436, "loss": 1.8929002285003662, "lr": 7.639324941570165e-06, "epoch": 1.1736453201970443, "percentage": 39.12, "elapsed_time": "0:18:22", "remaining_time": "0:28:36"} +{"current_steps": 954, "total_steps": 2436, "loss": 1.5853391885757446, "lr": 7.633235911022592e-06, "epoch": 1.1748768472906403, "percentage": 39.16, "elapsed_time": "0:18:24", "remaining_time": "0:28:35"} +{"current_steps": 955, "total_steps": 2436, "loss": 1.1136324405670166, "lr": 7.627141471582635e-06, "epoch": 1.1761083743842364, "percentage": 39.2, "elapsed_time": "0:18:25", "remaining_time": "0:28:33"} +{"current_steps": 956, "total_steps": 2436, "loss": 1.868667721748352, "lr": 7.6210416357687975e-06, "epoch": 1.1773399014778325, "percentage": 39.24, "elapsed_time": "0:18:26", "remaining_time": "0:28:32"} +{"current_steps": 957, "total_steps": 2436, "loss": 1.5594688653945923, "lr": 7.614936416110668e-06, "epoch": 1.1785714285714286, "percentage": 39.29, "elapsed_time": "0:18:27", "remaining_time": "0:28:31"} +{"current_steps": 958, "total_steps": 2436, "loss": 2.3145830631256104, "lr": 7.6088258251488845e-06, "epoch": 1.1798029556650247, "percentage": 39.33, "elapsed_time": "0:18:28", "remaining_time": "0:28:29"} +{"current_steps": 959, "total_steps": 2436, "loss": 1.1473604440689087, "lr": 7.6027098754351306e-06, "epoch": 1.1810344827586208, "percentage": 39.37, "elapsed_time": "0:18:29", "remaining_time": "0:28:28"} +{"current_steps": 960, "total_steps": 2436, "loss": 2.2835638523101807, "lr": 7.596588579532087e-06, "epoch": 1.1822660098522166, "percentage": 39.41, "elapsed_time": "0:18:30", "remaining_time": "0:28:27"} +{"current_steps": 961, "total_steps": 2436, "loss": 1.8787577152252197, "lr": 7.590461950013424e-06, "epoch": 1.1834975369458127, "percentage": 39.45, "elapsed_time": "0:18:31", "remaining_time": "0:28:26"} +{"current_steps": 962, "total_steps": 2436, "loss": 2.114804983139038, "lr": 7.584329999463763e-06, "epoch": 1.1847290640394088, "percentage": 39.49, "elapsed_time": "0:18:32", "remaining_time": "0:28:24"} +{"current_steps": 963, "total_steps": 2436, "loss": 1.288927435874939, "lr": 7.578192740478656e-06, "epoch": 1.185960591133005, "percentage": 39.53, "elapsed_time": "0:18:33", "remaining_time": "0:28:23"} +{"current_steps": 964, "total_steps": 2436, "loss": 1.929607629776001, "lr": 7.572050185664558e-06, "epoch": 1.187192118226601, "percentage": 39.57, "elapsed_time": "0:18:34", "remaining_time": "0:28:22"} +{"current_steps": 965, "total_steps": 2436, "loss": 0.5397343039512634, "lr": 7.565902347638806e-06, "epoch": 1.188423645320197, "percentage": 39.61, "elapsed_time": "0:18:35", "remaining_time": "0:28:20"} +{"current_steps": 966, "total_steps": 2436, "loss": 1.1908174753189087, "lr": 7.559749239029584e-06, "epoch": 1.1896551724137931, "percentage": 39.66, "elapsed_time": "0:18:36", "remaining_time": "0:28:19"} +{"current_steps": 967, "total_steps": 2436, "loss": 1.624518632888794, "lr": 7.553590872475909e-06, "epoch": 1.1908866995073892, "percentage": 39.7, "elapsed_time": "0:18:37", "remaining_time": "0:28:18"} +{"current_steps": 968, "total_steps": 2436, "loss": 1.3011376857757568, "lr": 7.547427260627586e-06, "epoch": 1.1921182266009853, "percentage": 39.74, "elapsed_time": "0:18:39", "remaining_time": "0:28:17"} +{"current_steps": 969, "total_steps": 2436, "loss": 1.2930490970611572, "lr": 7.541258416145212e-06, "epoch": 1.1933497536945814, "percentage": 39.78, "elapsed_time": "0:18:40", "remaining_time": "0:28:15"} +{"current_steps": 970, "total_steps": 2436, "loss": 1.34272038936615, "lr": 7.535084351700117e-06, "epoch": 1.1945812807881773, "percentage": 39.82, "elapsed_time": "0:18:41", "remaining_time": "0:28:14"} +{"current_steps": 971, "total_steps": 2436, "loss": 1.2804269790649414, "lr": 7.528905079974358e-06, "epoch": 1.1958128078817734, "percentage": 39.86, "elapsed_time": "0:18:42", "remaining_time": "0:28:13"} +{"current_steps": 972, "total_steps": 2436, "loss": 1.7138396501541138, "lr": 7.522720613660691e-06, "epoch": 1.1970443349753694, "percentage": 39.9, "elapsed_time": "0:18:43", "remaining_time": "0:28:11"} +{"current_steps": 973, "total_steps": 2436, "loss": 1.7358574867248535, "lr": 7.5165309654625405e-06, "epoch": 1.1982758620689655, "percentage": 39.94, "elapsed_time": "0:18:44", "remaining_time": "0:28:10"} +{"current_steps": 974, "total_steps": 2436, "loss": 1.0514552593231201, "lr": 7.510336148093975e-06, "epoch": 1.1995073891625616, "percentage": 39.98, "elapsed_time": "0:18:45", "remaining_time": "0:28:09"} +{"current_steps": 975, "total_steps": 2436, "loss": 1.7314313650131226, "lr": 7.504136174279679e-06, "epoch": 1.2007389162561577, "percentage": 40.02, "elapsed_time": "0:18:46", "remaining_time": "0:28:08"} +{"current_steps": 976, "total_steps": 2436, "loss": 1.0069202184677124, "lr": 7.4979310567549315e-06, "epoch": 1.2019704433497538, "percentage": 40.07, "elapsed_time": "0:18:47", "remaining_time": "0:28:06"} +{"current_steps": 977, "total_steps": 2436, "loss": 1.1851680278778076, "lr": 7.491720808265576e-06, "epoch": 1.2032019704433496, "percentage": 40.11, "elapsed_time": "0:18:48", "remaining_time": "0:28:05"} +{"current_steps": 978, "total_steps": 2436, "loss": 1.355776309967041, "lr": 7.485505441567995e-06, "epoch": 1.2044334975369457, "percentage": 40.15, "elapsed_time": "0:18:49", "remaining_time": "0:28:04"} +{"current_steps": 979, "total_steps": 2436, "loss": 1.5034677982330322, "lr": 7.4792849694290846e-06, "epoch": 1.2056650246305418, "percentage": 40.19, "elapsed_time": "0:18:50", "remaining_time": "0:28:03"} +{"current_steps": 980, "total_steps": 2436, "loss": 1.9321900606155396, "lr": 7.473059404626229e-06, "epoch": 1.206896551724138, "percentage": 40.23, "elapsed_time": "0:18:51", "remaining_time": "0:28:01"} +{"current_steps": 981, "total_steps": 2436, "loss": 1.4899095296859741, "lr": 7.466828759947271e-06, "epoch": 1.208128078817734, "percentage": 40.27, "elapsed_time": "0:18:53", "remaining_time": "0:28:00"} +{"current_steps": 982, "total_steps": 2436, "loss": 1.9984737634658813, "lr": 7.46059304819049e-06, "epoch": 1.20935960591133, "percentage": 40.31, "elapsed_time": "0:18:54", "remaining_time": "0:27:59"} +{"current_steps": 983, "total_steps": 2436, "loss": 1.7756625413894653, "lr": 7.454352282164572e-06, "epoch": 1.2105911330049262, "percentage": 40.35, "elapsed_time": "0:18:55", "remaining_time": "0:27:58"} +{"current_steps": 984, "total_steps": 2436, "loss": 1.47117018699646, "lr": 7.448106474688588e-06, "epoch": 1.2118226600985222, "percentage": 40.39, "elapsed_time": "0:18:56", "remaining_time": "0:27:56"} +{"current_steps": 985, "total_steps": 2436, "loss": 1.3485603332519531, "lr": 7.441855638591958e-06, "epoch": 1.2130541871921183, "percentage": 40.44, "elapsed_time": "0:18:57", "remaining_time": "0:27:55"} +{"current_steps": 986, "total_steps": 2436, "loss": 1.3982055187225342, "lr": 7.435599786714438e-06, "epoch": 1.2142857142857142, "percentage": 40.48, "elapsed_time": "0:18:58", "remaining_time": "0:27:54"} +{"current_steps": 987, "total_steps": 2436, "loss": 1.4942795038223267, "lr": 7.429338931906085e-06, "epoch": 1.2155172413793103, "percentage": 40.52, "elapsed_time": "0:18:59", "remaining_time": "0:27:52"} +{"current_steps": 988, "total_steps": 2436, "loss": 2.227587938308716, "lr": 7.423073087027228e-06, "epoch": 1.2167487684729064, "percentage": 40.56, "elapsed_time": "0:19:00", "remaining_time": "0:27:51"} +{"current_steps": 989, "total_steps": 2436, "loss": 1.523234486579895, "lr": 7.416802264948455e-06, "epoch": 1.2179802955665024, "percentage": 40.6, "elapsed_time": "0:19:01", "remaining_time": "0:27:50"} +{"current_steps": 990, "total_steps": 2436, "loss": 3.9873814582824707, "lr": 7.410526478550568e-06, "epoch": 1.2192118226600985, "percentage": 40.64, "elapsed_time": "0:19:02", "remaining_time": "0:27:49"} +{"current_steps": 991, "total_steps": 2436, "loss": 1.279615044593811, "lr": 7.404245740724573e-06, "epoch": 1.2204433497536946, "percentage": 40.68, "elapsed_time": "0:19:03", "remaining_time": "0:27:47"} +{"current_steps": 992, "total_steps": 2436, "loss": 0.9347010850906372, "lr": 7.3979600643716435e-06, "epoch": 1.2216748768472907, "percentage": 40.72, "elapsed_time": "0:19:04", "remaining_time": "0:27:46"} +{"current_steps": 993, "total_steps": 2436, "loss": 1.9017002582550049, "lr": 7.391669462403096e-06, "epoch": 1.2229064039408868, "percentage": 40.76, "elapsed_time": "0:19:06", "remaining_time": "0:27:45"} +{"current_steps": 994, "total_steps": 2436, "loss": 1.7247897386550903, "lr": 7.385373947740369e-06, "epoch": 1.2241379310344827, "percentage": 40.8, "elapsed_time": "0:19:07", "remaining_time": "0:27:44"} +{"current_steps": 995, "total_steps": 2436, "loss": 0.7111251950263977, "lr": 7.379073533314988e-06, "epoch": 1.2253694581280787, "percentage": 40.85, "elapsed_time": "0:19:08", "remaining_time": "0:27:42"} +{"current_steps": 996, "total_steps": 2436, "loss": 0.9086591601371765, "lr": 7.372768232068544e-06, "epoch": 1.2266009852216748, "percentage": 40.89, "elapsed_time": "0:19:09", "remaining_time": "0:27:41"} +{"current_steps": 997, "total_steps": 2436, "loss": 1.6426423788070679, "lr": 7.366458056952668e-06, "epoch": 1.227832512315271, "percentage": 40.93, "elapsed_time": "0:19:10", "remaining_time": "0:27:40"} +{"current_steps": 998, "total_steps": 2436, "loss": 1.2501566410064697, "lr": 7.360143020929e-06, "epoch": 1.229064039408867, "percentage": 40.97, "elapsed_time": "0:19:11", "remaining_time": "0:27:39"} +{"current_steps": 999, "total_steps": 2436, "loss": 2.263824939727783, "lr": 7.353823136969167e-06, "epoch": 1.230295566502463, "percentage": 41.01, "elapsed_time": "0:19:12", "remaining_time": "0:27:37"} +{"current_steps": 1000, "total_steps": 2436, "loss": 1.3503868579864502, "lr": 7.34749841805475e-06, "epoch": 1.2315270935960592, "percentage": 41.05, "elapsed_time": "0:19:13", "remaining_time": "0:27:36"} +{"current_steps": 1001, "total_steps": 2436, "loss": 1.2844277620315552, "lr": 7.341168877177267e-06, "epoch": 1.2327586206896552, "percentage": 41.09, "elapsed_time": "0:19:14", "remaining_time": "0:27:35"} +{"current_steps": 1002, "total_steps": 2436, "loss": 1.823725700378418, "lr": 7.3348345273381365e-06, "epoch": 1.2339901477832513, "percentage": 41.13, "elapsed_time": "0:19:15", "remaining_time": "0:27:34"} +{"current_steps": 1003, "total_steps": 2436, "loss": 1.8349339962005615, "lr": 7.328495381548655e-06, "epoch": 1.2352216748768472, "percentage": 41.17, "elapsed_time": "0:19:16", "remaining_time": "0:27:32"} +{"current_steps": 1004, "total_steps": 2436, "loss": 1.431024432182312, "lr": 7.322151452829972e-06, "epoch": 1.2364532019704433, "percentage": 41.22, "elapsed_time": "0:19:17", "remaining_time": "0:27:31"} +{"current_steps": 1005, "total_steps": 2436, "loss": 0.8406596183776855, "lr": 7.315802754213062e-06, "epoch": 1.2376847290640394, "percentage": 41.26, "elapsed_time": "0:19:18", "remaining_time": "0:27:30"} +{"current_steps": 1006, "total_steps": 2436, "loss": 1.7037804126739502, "lr": 7.309449298738696e-06, "epoch": 1.2389162561576355, "percentage": 41.3, "elapsed_time": "0:19:20", "remaining_time": "0:27:28"} +{"current_steps": 1007, "total_steps": 2436, "loss": 1.4264461994171143, "lr": 7.303091099457418e-06, "epoch": 1.2401477832512315, "percentage": 41.34, "elapsed_time": "0:19:21", "remaining_time": "0:27:27"} +{"current_steps": 1008, "total_steps": 2436, "loss": 2.502678632736206, "lr": 7.296728169429511e-06, "epoch": 1.2413793103448276, "percentage": 41.38, "elapsed_time": "0:19:22", "remaining_time": "0:27:26"} +{"current_steps": 1009, "total_steps": 2436, "loss": 1.5582114458084106, "lr": 7.290360521724984e-06, "epoch": 1.2426108374384237, "percentage": 41.42, "elapsed_time": "0:19:23", "remaining_time": "0:27:25"} +{"current_steps": 1010, "total_steps": 2436, "loss": 1.494875192642212, "lr": 7.283988169423526e-06, "epoch": 1.2438423645320198, "percentage": 41.46, "elapsed_time": "0:19:24", "remaining_time": "0:27:23"} +{"current_steps": 1011, "total_steps": 2436, "loss": 1.886913776397705, "lr": 7.277611125614499e-06, "epoch": 1.2450738916256157, "percentage": 41.5, "elapsed_time": "0:19:25", "remaining_time": "0:27:22"} +{"current_steps": 1012, "total_steps": 2436, "loss": 1.8913657665252686, "lr": 7.271229403396896e-06, "epoch": 1.2463054187192117, "percentage": 41.54, "elapsed_time": "0:19:26", "remaining_time": "0:27:21"} +{"current_steps": 1013, "total_steps": 2436, "loss": 1.1614234447479248, "lr": 7.264843015879321e-06, "epoch": 1.2475369458128078, "percentage": 41.58, "elapsed_time": "0:19:27", "remaining_time": "0:27:20"} +{"current_steps": 1014, "total_steps": 2436, "loss": 1.6838147640228271, "lr": 7.258451976179967e-06, "epoch": 1.248768472906404, "percentage": 41.63, "elapsed_time": "0:19:28", "remaining_time": "0:27:18"} +{"current_steps": 1015, "total_steps": 2436, "loss": 1.1039239168167114, "lr": 7.25205629742657e-06, "epoch": 1.25, "percentage": 41.67, "elapsed_time": "0:19:29", "remaining_time": "0:27:17"} +{"current_steps": 1016, "total_steps": 2436, "loss": 1.519346833229065, "lr": 7.245655992756406e-06, "epoch": 1.251231527093596, "percentage": 41.71, "elapsed_time": "0:19:30", "remaining_time": "0:27:16"} +{"current_steps": 1017, "total_steps": 2436, "loss": 1.0175197124481201, "lr": 7.2392510753162516e-06, "epoch": 1.2524630541871922, "percentage": 41.75, "elapsed_time": "0:19:31", "remaining_time": "0:27:15"} +{"current_steps": 1018, "total_steps": 2436, "loss": 0.9778202772140503, "lr": 7.232841558262354e-06, "epoch": 1.2536945812807883, "percentage": 41.79, "elapsed_time": "0:19:33", "remaining_time": "0:27:13"} +{"current_steps": 1019, "total_steps": 2436, "loss": 1.8379024267196655, "lr": 7.226427454760412e-06, "epoch": 1.2549261083743843, "percentage": 41.83, "elapsed_time": "0:19:34", "remaining_time": "0:27:12"} +{"current_steps": 1020, "total_steps": 2436, "loss": 1.8412721157073975, "lr": 7.2200087779855435e-06, "epoch": 1.2561576354679804, "percentage": 41.87, "elapsed_time": "0:19:35", "remaining_time": "0:27:11"} +{"current_steps": 1021, "total_steps": 2436, "loss": 1.8508501052856445, "lr": 7.213585541122261e-06, "epoch": 1.2573891625615763, "percentage": 41.91, "elapsed_time": "0:19:36", "remaining_time": "0:27:10"} +{"current_steps": 1022, "total_steps": 2436, "loss": 1.3070871829986572, "lr": 7.207157757364445e-06, "epoch": 1.2586206896551724, "percentage": 41.95, "elapsed_time": "0:19:37", "remaining_time": "0:27:08"} +{"current_steps": 1023, "total_steps": 2436, "loss": 2.1278223991394043, "lr": 7.200725439915314e-06, "epoch": 1.2598522167487685, "percentage": 42.0, "elapsed_time": "0:19:38", "remaining_time": "0:27:07"} +{"current_steps": 1024, "total_steps": 2436, "loss": 1.0636892318725586, "lr": 7.194288601987398e-06, "epoch": 1.2610837438423645, "percentage": 42.04, "elapsed_time": "0:19:39", "remaining_time": "0:27:06"} +{"current_steps": 1025, "total_steps": 2436, "loss": 1.7365200519561768, "lr": 7.187847256802518e-06, "epoch": 1.2623152709359606, "percentage": 42.08, "elapsed_time": "0:19:40", "remaining_time": "0:27:05"} +{"current_steps": 1026, "total_steps": 2436, "loss": 1.792116403579712, "lr": 7.181401417591746e-06, "epoch": 1.2635467980295567, "percentage": 42.12, "elapsed_time": "0:19:41", "remaining_time": "0:27:03"} +{"current_steps": 1027, "total_steps": 2436, "loss": 1.3348667621612549, "lr": 7.174951097595389e-06, "epoch": 1.2647783251231526, "percentage": 42.16, "elapsed_time": "0:19:42", "remaining_time": "0:27:02"} +{"current_steps": 1028, "total_steps": 2436, "loss": 1.677919626235962, "lr": 7.168496310062959e-06, "epoch": 1.2660098522167487, "percentage": 42.2, "elapsed_time": "0:19:43", "remaining_time": "0:27:01"} +{"current_steps": 1029, "total_steps": 2436, "loss": 1.1518199443817139, "lr": 7.162037068253141e-06, "epoch": 1.2672413793103448, "percentage": 42.24, "elapsed_time": "0:19:44", "remaining_time": "0:27:00"} +{"current_steps": 1030, "total_steps": 2436, "loss": 2.1126716136932373, "lr": 7.155573385433772e-06, "epoch": 1.2684729064039408, "percentage": 42.28, "elapsed_time": "0:19:45", "remaining_time": "0:26:58"} +{"current_steps": 1031, "total_steps": 2436, "loss": 1.3222094774246216, "lr": 7.149105274881815e-06, "epoch": 1.269704433497537, "percentage": 42.32, "elapsed_time": "0:19:47", "remaining_time": "0:26:57"} +{"current_steps": 1032, "total_steps": 2436, "loss": 0.8843763470649719, "lr": 7.1426327498833174e-06, "epoch": 1.270935960591133, "percentage": 42.36, "elapsed_time": "0:19:48", "remaining_time": "0:26:56"} +{"current_steps": 1033, "total_steps": 2436, "loss": 1.3091545104980469, "lr": 7.136155823733405e-06, "epoch": 1.272167487684729, "percentage": 42.41, "elapsed_time": "0:19:49", "remaining_time": "0:26:55"} +{"current_steps": 1034, "total_steps": 2436, "loss": 1.4408364295959473, "lr": 7.129674509736237e-06, "epoch": 1.2733990147783252, "percentage": 42.45, "elapsed_time": "0:19:50", "remaining_time": "0:26:53"} +{"current_steps": 1035, "total_steps": 2436, "loss": 1.330906867980957, "lr": 7.12318882120499e-06, "epoch": 1.2746305418719213, "percentage": 42.49, "elapsed_time": "0:19:51", "remaining_time": "0:26:52"} +{"current_steps": 1036, "total_steps": 2436, "loss": 1.9561724662780762, "lr": 7.116698771461825e-06, "epoch": 1.2758620689655173, "percentage": 42.53, "elapsed_time": "0:19:52", "remaining_time": "0:26:51"} +{"current_steps": 1037, "total_steps": 2436, "loss": 2.185842275619507, "lr": 7.110204373837857e-06, "epoch": 1.2770935960591134, "percentage": 42.57, "elapsed_time": "0:19:53", "remaining_time": "0:26:50"} +{"current_steps": 1038, "total_steps": 2436, "loss": 1.724360466003418, "lr": 7.1037056416731395e-06, "epoch": 1.2783251231527093, "percentage": 42.61, "elapsed_time": "0:19:54", "remaining_time": "0:26:48"} +{"current_steps": 1039, "total_steps": 2436, "loss": 1.179841160774231, "lr": 7.097202588316625e-06, "epoch": 1.2795566502463054, "percentage": 42.65, "elapsed_time": "0:19:55", "remaining_time": "0:26:47"} +{"current_steps": 1040, "total_steps": 2436, "loss": 1.6783604621887207, "lr": 7.090695227126141e-06, "epoch": 1.2807881773399015, "percentage": 42.69, "elapsed_time": "0:19:56", "remaining_time": "0:26:46"} +{"current_steps": 1041, "total_steps": 2436, "loss": 1.761925220489502, "lr": 7.084183571468368e-06, "epoch": 1.2820197044334976, "percentage": 42.73, "elapsed_time": "0:19:57", "remaining_time": "0:26:45"} +{"current_steps": 1042, "total_steps": 2436, "loss": 0.9297729134559631, "lr": 7.077667634718801e-06, "epoch": 1.2832512315270936, "percentage": 42.78, "elapsed_time": "0:19:58", "remaining_time": "0:26:43"} +{"current_steps": 1043, "total_steps": 2436, "loss": 1.6091060638427734, "lr": 7.071147430261738e-06, "epoch": 1.2844827586206897, "percentage": 42.82, "elapsed_time": "0:19:59", "remaining_time": "0:26:42"} +{"current_steps": 1044, "total_steps": 2436, "loss": 1.280853509902954, "lr": 7.064622971490234e-06, "epoch": 1.2857142857142856, "percentage": 42.86, "elapsed_time": "0:20:01", "remaining_time": "0:26:41"} +{"current_steps": 1045, "total_steps": 2436, "loss": 2.4095635414123535, "lr": 7.058094271806091e-06, "epoch": 1.2869458128078817, "percentage": 42.9, "elapsed_time": "0:20:02", "remaining_time": "0:26:40"} +{"current_steps": 1046, "total_steps": 2436, "loss": 1.7969441413879395, "lr": 7.051561344619814e-06, "epoch": 1.2881773399014778, "percentage": 42.94, "elapsed_time": "0:20:03", "remaining_time": "0:26:38"} +{"current_steps": 1047, "total_steps": 2436, "loss": 2.4331698417663574, "lr": 7.045024203350598e-06, "epoch": 1.2894088669950738, "percentage": 42.98, "elapsed_time": "0:20:04", "remaining_time": "0:26:37"} +{"current_steps": 1048, "total_steps": 2436, "loss": 1.336733341217041, "lr": 7.0384828614262905e-06, "epoch": 1.29064039408867, "percentage": 43.02, "elapsed_time": "0:20:05", "remaining_time": "0:26:36"} +{"current_steps": 1049, "total_steps": 2436, "loss": 1.2959213256835938, "lr": 7.031937332283367e-06, "epoch": 1.291871921182266, "percentage": 43.06, "elapsed_time": "0:20:06", "remaining_time": "0:26:35"} +{"current_steps": 1050, "total_steps": 2436, "loss": 1.0095289945602417, "lr": 7.025387629366912e-06, "epoch": 1.293103448275862, "percentage": 43.1, "elapsed_time": "0:20:07", "remaining_time": "0:26:33"} +{"current_steps": 1051, "total_steps": 2436, "loss": 1.8314733505249023, "lr": 7.018833766130571e-06, "epoch": 1.2943349753694582, "percentage": 43.14, "elapsed_time": "0:20:08", "remaining_time": "0:26:32"} +{"current_steps": 1052, "total_steps": 2436, "loss": 1.121436595916748, "lr": 7.012275756036544e-06, "epoch": 1.2955665024630543, "percentage": 43.19, "elapsed_time": "0:20:09", "remaining_time": "0:26:31"} +{"current_steps": 1053, "total_steps": 2436, "loss": 1.5652289390563965, "lr": 7.0057136125555456e-06, "epoch": 1.2967980295566504, "percentage": 43.23, "elapsed_time": "0:20:10", "remaining_time": "0:26:30"} +{"current_steps": 1054, "total_steps": 2436, "loss": 1.1146215200424194, "lr": 6.999147349166779e-06, "epoch": 1.2980295566502464, "percentage": 43.27, "elapsed_time": "0:20:11", "remaining_time": "0:26:28"} +{"current_steps": 1055, "total_steps": 2436, "loss": 2.400024175643921, "lr": 6.9925769793579165e-06, "epoch": 1.2992610837438423, "percentage": 43.31, "elapsed_time": "0:20:12", "remaining_time": "0:26:27"} +{"current_steps": 1056, "total_steps": 2436, "loss": 1.7114648818969727, "lr": 6.986002516625058e-06, "epoch": 1.3004926108374384, "percentage": 43.35, "elapsed_time": "0:20:13", "remaining_time": "0:26:26"} +{"current_steps": 1057, "total_steps": 2436, "loss": 1.5338797569274902, "lr": 6.979423974472714e-06, "epoch": 1.3017241379310345, "percentage": 43.39, "elapsed_time": "0:20:15", "remaining_time": "0:26:25"} +{"current_steps": 1058, "total_steps": 2436, "loss": 1.078460931777954, "lr": 6.972841366413777e-06, "epoch": 1.3029556650246306, "percentage": 43.43, "elapsed_time": "0:20:16", "remaining_time": "0:26:23"} +{"current_steps": 1059, "total_steps": 2436, "loss": 1.5467915534973145, "lr": 6.966254705969484e-06, "epoch": 1.3041871921182266, "percentage": 43.47, "elapsed_time": "0:20:17", "remaining_time": "0:26:22"} +{"current_steps": 1060, "total_steps": 2436, "loss": 1.2715568542480469, "lr": 6.959664006669404e-06, "epoch": 1.3054187192118227, "percentage": 43.51, "elapsed_time": "0:20:18", "remaining_time": "0:26:21"} +{"current_steps": 1061, "total_steps": 2436, "loss": 1.887066125869751, "lr": 6.953069282051397e-06, "epoch": 1.3066502463054186, "percentage": 43.56, "elapsed_time": "0:20:19", "remaining_time": "0:26:20"} +{"current_steps": 1062, "total_steps": 2436, "loss": 1.419116497039795, "lr": 6.946470545661593e-06, "epoch": 1.3078817733990147, "percentage": 43.6, "elapsed_time": "0:20:20", "remaining_time": "0:26:19"} +{"current_steps": 1063, "total_steps": 2436, "loss": 1.3843079805374146, "lr": 6.939867811054365e-06, "epoch": 1.3091133004926108, "percentage": 43.64, "elapsed_time": "0:20:21", "remaining_time": "0:26:17"} +{"current_steps": 1064, "total_steps": 2436, "loss": 2.5894885063171387, "lr": 6.9332610917922915e-06, "epoch": 1.3103448275862069, "percentage": 43.68, "elapsed_time": "0:20:22", "remaining_time": "0:26:16"} +{"current_steps": 1065, "total_steps": 2436, "loss": 1.6600944995880127, "lr": 6.9266504014461425e-06, "epoch": 1.311576354679803, "percentage": 43.72, "elapsed_time": "0:20:23", "remaining_time": "0:26:15"} +{"current_steps": 1066, "total_steps": 2436, "loss": 1.7698057889938354, "lr": 6.920035753594845e-06, "epoch": 1.312807881773399, "percentage": 43.76, "elapsed_time": "0:20:24", "remaining_time": "0:26:14"} +{"current_steps": 1067, "total_steps": 2436, "loss": 1.5610848665237427, "lr": 6.913417161825449e-06, "epoch": 1.314039408866995, "percentage": 43.8, "elapsed_time": "0:20:25", "remaining_time": "0:26:12"} +{"current_steps": 1068, "total_steps": 2436, "loss": 1.6380643844604492, "lr": 6.906794639733114e-06, "epoch": 1.3152709359605912, "percentage": 43.84, "elapsed_time": "0:20:26", "remaining_time": "0:26:11"} +{"current_steps": 1069, "total_steps": 2436, "loss": 1.390014410018921, "lr": 6.900168200921065e-06, "epoch": 1.3165024630541873, "percentage": 43.88, "elapsed_time": "0:20:28", "remaining_time": "0:26:10"} +{"current_steps": 1070, "total_steps": 2436, "loss": 1.6589158773422241, "lr": 6.893537859000576e-06, "epoch": 1.3177339901477834, "percentage": 43.92, "elapsed_time": "0:20:29", "remaining_time": "0:26:09"} +{"current_steps": 1071, "total_steps": 2436, "loss": 1.5524673461914062, "lr": 6.886903627590938e-06, "epoch": 1.3189655172413794, "percentage": 43.97, "elapsed_time": "0:20:30", "remaining_time": "0:26:07"} +{"current_steps": 1072, "total_steps": 2436, "loss": 2.0204474925994873, "lr": 6.880265520319434e-06, "epoch": 1.3201970443349753, "percentage": 44.01, "elapsed_time": "0:20:31", "remaining_time": "0:26:06"} +{"current_steps": 1073, "total_steps": 2436, "loss": 1.7947957515716553, "lr": 6.8736235508213024e-06, "epoch": 1.3214285714285714, "percentage": 44.05, "elapsed_time": "0:20:32", "remaining_time": "0:26:05"} +{"current_steps": 1074, "total_steps": 2436, "loss": 1.6154756546020508, "lr": 6.866977732739719e-06, "epoch": 1.3226600985221675, "percentage": 44.09, "elapsed_time": "0:20:33", "remaining_time": "0:26:04"} +{"current_steps": 1075, "total_steps": 2436, "loss": 1.419677734375, "lr": 6.860328079725764e-06, "epoch": 1.3238916256157636, "percentage": 44.13, "elapsed_time": "0:20:34", "remaining_time": "0:26:02"} +{"current_steps": 1076, "total_steps": 2436, "loss": 2.2221052646636963, "lr": 6.853674605438395e-06, "epoch": 1.3251231527093597, "percentage": 44.17, "elapsed_time": "0:20:35", "remaining_time": "0:26:01"} +{"current_steps": 1077, "total_steps": 2436, "loss": 1.6474840641021729, "lr": 6.84701732354442e-06, "epoch": 1.3263546798029557, "percentage": 44.21, "elapsed_time": "0:20:36", "remaining_time": "0:26:00"} +{"current_steps": 1078, "total_steps": 2436, "loss": 2.035231828689575, "lr": 6.840356247718466e-06, "epoch": 1.3275862068965516, "percentage": 44.25, "elapsed_time": "0:20:37", "remaining_time": "0:25:59"} +{"current_steps": 1079, "total_steps": 2436, "loss": 1.5675947666168213, "lr": 6.8336913916429515e-06, "epoch": 1.3288177339901477, "percentage": 44.29, "elapsed_time": "0:20:38", "remaining_time": "0:25:57"} +{"current_steps": 1080, "total_steps": 2436, "loss": 1.2241394519805908, "lr": 6.827022769008068e-06, "epoch": 1.3300492610837438, "percentage": 44.33, "elapsed_time": "0:20:39", "remaining_time": "0:25:56"} +{"current_steps": 1081, "total_steps": 2436, "loss": 1.3507403135299683, "lr": 6.820350393511732e-06, "epoch": 1.3312807881773399, "percentage": 44.38, "elapsed_time": "0:20:40", "remaining_time": "0:25:55"} +{"current_steps": 1082, "total_steps": 2436, "loss": 2.256551504135132, "lr": 6.81367427885958e-06, "epoch": 1.332512315270936, "percentage": 44.42, "elapsed_time": "0:20:42", "remaining_time": "0:25:54"} +{"current_steps": 1083, "total_steps": 2436, "loss": 1.6412163972854614, "lr": 6.806994438764922e-06, "epoch": 1.333743842364532, "percentage": 44.46, "elapsed_time": "0:20:43", "remaining_time": "0:25:53"} +{"current_steps": 1084, "total_steps": 2436, "loss": 1.500988483428955, "lr": 6.8003108869487225e-06, "epoch": 1.3349753694581281, "percentage": 44.5, "elapsed_time": "0:20:44", "remaining_time": "0:25:51"} +{"current_steps": 1085, "total_steps": 2436, "loss": 1.4661070108413696, "lr": 6.79362363713957e-06, "epoch": 1.3362068965517242, "percentage": 44.54, "elapsed_time": "0:20:45", "remaining_time": "0:25:50"} +{"current_steps": 1086, "total_steps": 2436, "loss": 1.42755126953125, "lr": 6.786932703073648e-06, "epoch": 1.3374384236453203, "percentage": 44.58, "elapsed_time": "0:20:46", "remaining_time": "0:25:49"} +{"current_steps": 1087, "total_steps": 2436, "loss": 1.165806531906128, "lr": 6.780238098494711e-06, "epoch": 1.3386699507389164, "percentage": 44.62, "elapsed_time": "0:20:47", "remaining_time": "0:25:48"} +{"current_steps": 1088, "total_steps": 2436, "loss": 1.3795387744903564, "lr": 6.773539837154051e-06, "epoch": 1.3399014778325122, "percentage": 44.66, "elapsed_time": "0:20:48", "remaining_time": "0:25:46"} +{"current_steps": 1089, "total_steps": 2436, "loss": 1.3203850984573364, "lr": 6.766837932810468e-06, "epoch": 1.3411330049261083, "percentage": 44.7, "elapsed_time": "0:20:49", "remaining_time": "0:25:45"} +{"current_steps": 1090, "total_steps": 2436, "loss": 1.645883321762085, "lr": 6.7601323992302525e-06, "epoch": 1.3423645320197044, "percentage": 44.75, "elapsed_time": "0:20:50", "remaining_time": "0:25:44"} +{"current_steps": 1091, "total_steps": 2436, "loss": 1.6904821395874023, "lr": 6.7534232501871425e-06, "epoch": 1.3435960591133005, "percentage": 44.79, "elapsed_time": "0:20:51", "remaining_time": "0:25:43"} +{"current_steps": 1092, "total_steps": 2436, "loss": 1.332162618637085, "lr": 6.7467104994623066e-06, "epoch": 1.3448275862068966, "percentage": 44.83, "elapsed_time": "0:20:52", "remaining_time": "0:25:41"} +{"current_steps": 1093, "total_steps": 2436, "loss": 1.4389145374298096, "lr": 6.7399941608443096e-06, "epoch": 1.3460591133004927, "percentage": 44.87, "elapsed_time": "0:20:53", "remaining_time": "0:25:40"} +{"current_steps": 1094, "total_steps": 2436, "loss": 1.6597908735275269, "lr": 6.733274248129089e-06, "epoch": 1.3472906403940887, "percentage": 44.91, "elapsed_time": "0:20:55", "remaining_time": "0:25:39"} +{"current_steps": 1095, "total_steps": 2436, "loss": 0.9520257711410522, "lr": 6.72655077511992e-06, "epoch": 1.3485221674876846, "percentage": 44.95, "elapsed_time": "0:20:56", "remaining_time": "0:25:38"} +{"current_steps": 1096, "total_steps": 2436, "loss": 1.4488117694854736, "lr": 6.719823755627393e-06, "epoch": 1.3497536945812807, "percentage": 44.99, "elapsed_time": "0:20:57", "remaining_time": "0:25:37"} +{"current_steps": 1097, "total_steps": 2436, "loss": 1.5133984088897705, "lr": 6.713093203469384e-06, "epoch": 1.3509852216748768, "percentage": 45.03, "elapsed_time": "0:20:58", "remaining_time": "0:25:35"} +{"current_steps": 1098, "total_steps": 2436, "loss": 1.846522569656372, "lr": 6.7063591324710234e-06, "epoch": 1.3522167487684729, "percentage": 45.07, "elapsed_time": "0:20:59", "remaining_time": "0:25:34"} +{"current_steps": 1099, "total_steps": 2436, "loss": 0.9724826812744141, "lr": 6.6996215564646705e-06, "epoch": 1.353448275862069, "percentage": 45.11, "elapsed_time": "0:21:00", "remaining_time": "0:25:33"} +{"current_steps": 1100, "total_steps": 2436, "loss": 1.24728262424469, "lr": 6.692880489289885e-06, "epoch": 1.354679802955665, "percentage": 45.16, "elapsed_time": "0:21:01", "remaining_time": "0:25:32"} +{"current_steps": 1101, "total_steps": 2436, "loss": 1.5332872867584229, "lr": 6.686135944793395e-06, "epoch": 1.3559113300492611, "percentage": 45.2, "elapsed_time": "0:21:02", "remaining_time": "0:25:30"} +{"current_steps": 1102, "total_steps": 2436, "loss": 1.5978163480758667, "lr": 6.679387936829076e-06, "epoch": 1.3571428571428572, "percentage": 45.24, "elapsed_time": "0:21:03", "remaining_time": "0:25:29"} +{"current_steps": 1103, "total_steps": 2436, "loss": 2.05710506439209, "lr": 6.672636479257912e-06, "epoch": 1.3583743842364533, "percentage": 45.28, "elapsed_time": "0:21:04", "remaining_time": "0:25:28"} +{"current_steps": 1104, "total_steps": 2436, "loss": 1.667812466621399, "lr": 6.665881585947981e-06, "epoch": 1.3596059113300494, "percentage": 45.32, "elapsed_time": "0:21:05", "remaining_time": "0:25:27"} +{"current_steps": 1105, "total_steps": 2436, "loss": 1.3053381443023682, "lr": 6.659123270774406e-06, "epoch": 1.3608374384236452, "percentage": 45.36, "elapsed_time": "0:21:06", "remaining_time": "0:25:26"} +{"current_steps": 1106, "total_steps": 2436, "loss": 1.5228716135025024, "lr": 6.652361547619352e-06, "epoch": 1.3620689655172413, "percentage": 45.4, "elapsed_time": "0:21:07", "remaining_time": "0:25:24"} +{"current_steps": 1107, "total_steps": 2436, "loss": 1.3818378448486328, "lr": 6.645596430371976e-06, "epoch": 1.3633004926108374, "percentage": 45.44, "elapsed_time": "0:21:09", "remaining_time": "0:25:23"} +{"current_steps": 1108, "total_steps": 2436, "loss": 1.217841386795044, "lr": 6.6388279329284065e-06, "epoch": 1.3645320197044335, "percentage": 45.48, "elapsed_time": "0:21:10", "remaining_time": "0:25:22"} +{"current_steps": 1109, "total_steps": 2436, "loss": 1.4309210777282715, "lr": 6.632056069191723e-06, "epoch": 1.3657635467980296, "percentage": 45.53, "elapsed_time": "0:21:11", "remaining_time": "0:25:21"} +{"current_steps": 1110, "total_steps": 2436, "loss": 1.3015059232711792, "lr": 6.6252808530719095e-06, "epoch": 1.3669950738916257, "percentage": 45.57, "elapsed_time": "0:21:12", "remaining_time": "0:25:19"} +{"current_steps": 1111, "total_steps": 2436, "loss": 1.2734256982803345, "lr": 6.618502298485844e-06, "epoch": 1.3682266009852218, "percentage": 45.61, "elapsed_time": "0:21:13", "remaining_time": "0:25:18"} +{"current_steps": 1112, "total_steps": 2436, "loss": 1.907172441482544, "lr": 6.611720419357257e-06, "epoch": 1.3694581280788176, "percentage": 45.65, "elapsed_time": "0:21:14", "remaining_time": "0:25:17"} +{"current_steps": 1113, "total_steps": 2436, "loss": 1.1207606792449951, "lr": 6.604935229616711e-06, "epoch": 1.3706896551724137, "percentage": 45.69, "elapsed_time": "0:21:15", "remaining_time": "0:25:16"} +{"current_steps": 1114, "total_steps": 2436, "loss": 2.3231239318847656, "lr": 6.598146743201568e-06, "epoch": 1.3719211822660098, "percentage": 45.73, "elapsed_time": "0:21:16", "remaining_time": "0:25:14"} +{"current_steps": 1115, "total_steps": 2436, "loss": 1.1395865678787231, "lr": 6.5913549740559606e-06, "epoch": 1.3731527093596059, "percentage": 45.77, "elapsed_time": "0:21:17", "remaining_time": "0:25:13"} +{"current_steps": 1116, "total_steps": 2436, "loss": 3.1981747150421143, "lr": 6.584559936130763e-06, "epoch": 1.374384236453202, "percentage": 45.81, "elapsed_time": "0:21:18", "remaining_time": "0:25:12"} +{"current_steps": 1117, "total_steps": 2436, "loss": 1.7495319843292236, "lr": 6.57776164338357e-06, "epoch": 1.375615763546798, "percentage": 45.85, "elapsed_time": "0:21:19", "remaining_time": "0:25:11"} +{"current_steps": 1118, "total_steps": 2436, "loss": 1.3304778337478638, "lr": 6.570960109778655e-06, "epoch": 1.3768472906403941, "percentage": 45.89, "elapsed_time": "0:21:20", "remaining_time": "0:25:10"} +{"current_steps": 1119, "total_steps": 2436, "loss": 1.6510775089263916, "lr": 6.564155349286952e-06, "epoch": 1.3780788177339902, "percentage": 45.94, "elapsed_time": "0:21:21", "remaining_time": "0:25:08"} +{"current_steps": 1120, "total_steps": 2436, "loss": 1.3382967710494995, "lr": 6.557347375886022e-06, "epoch": 1.3793103448275863, "percentage": 45.98, "elapsed_time": "0:21:23", "remaining_time": "0:25:07"} +{"current_steps": 1121, "total_steps": 2436, "loss": 1.418992042541504, "lr": 6.550536203560028e-06, "epoch": 1.3805418719211824, "percentage": 46.02, "elapsed_time": "0:21:24", "remaining_time": "0:25:06"} +{"current_steps": 1122, "total_steps": 2436, "loss": 1.4815843105316162, "lr": 6.543721846299701e-06, "epoch": 1.3817733990147782, "percentage": 46.06, "elapsed_time": "0:21:25", "remaining_time": "0:25:05"} +{"current_steps": 1123, "total_steps": 2436, "loss": 0.9823303818702698, "lr": 6.536904318102314e-06, "epoch": 1.3830049261083743, "percentage": 46.1, "elapsed_time": "0:21:26", "remaining_time": "0:25:03"} +{"current_steps": 1124, "total_steps": 2436, "loss": 1.4959704875946045, "lr": 6.530083632971658e-06, "epoch": 1.3842364532019704, "percentage": 46.14, "elapsed_time": "0:21:27", "remaining_time": "0:25:02"} +{"current_steps": 1125, "total_steps": 2436, "loss": 1.3141142129898071, "lr": 6.523259804918001e-06, "epoch": 1.3854679802955665, "percentage": 46.18, "elapsed_time": "0:21:28", "remaining_time": "0:25:01"} +{"current_steps": 1126, "total_steps": 2436, "loss": 1.60225248336792, "lr": 6.516432847958074e-06, "epoch": 1.3866995073891626, "percentage": 46.22, "elapsed_time": "0:21:29", "remaining_time": "0:25:00"} +{"current_steps": 1127, "total_steps": 2436, "loss": 1.7774362564086914, "lr": 6.509602776115029e-06, "epoch": 1.3879310344827587, "percentage": 46.26, "elapsed_time": "0:21:30", "remaining_time": "0:24:58"} +{"current_steps": 1128, "total_steps": 2436, "loss": 1.3750693798065186, "lr": 6.502769603418423e-06, "epoch": 1.3891625615763548, "percentage": 46.31, "elapsed_time": "0:21:31", "remaining_time": "0:24:57"} +{"current_steps": 1129, "total_steps": 2436, "loss": 1.0850452184677124, "lr": 6.4959333439041775e-06, "epoch": 1.3903940886699506, "percentage": 46.35, "elapsed_time": "0:21:32", "remaining_time": "0:24:56"} +{"current_steps": 1130, "total_steps": 2436, "loss": 1.7440909147262573, "lr": 6.489094011614553e-06, "epoch": 1.3916256157635467, "percentage": 46.39, "elapsed_time": "0:21:33", "remaining_time": "0:24:55"} +{"current_steps": 1131, "total_steps": 2436, "loss": 1.5904752016067505, "lr": 6.482251620598129e-06, "epoch": 1.3928571428571428, "percentage": 46.43, "elapsed_time": "0:21:34", "remaining_time": "0:24:54"} +{"current_steps": 1132, "total_steps": 2436, "loss": 1.4864649772644043, "lr": 6.47540618490976e-06, "epoch": 1.3940886699507389, "percentage": 46.47, "elapsed_time": "0:21:35", "remaining_time": "0:24:52"} +{"current_steps": 1133, "total_steps": 2436, "loss": 1.3869491815567017, "lr": 6.4685577186105595e-06, "epoch": 1.395320197044335, "percentage": 46.51, "elapsed_time": "0:21:37", "remaining_time": "0:24:51"} +{"current_steps": 1134, "total_steps": 2436, "loss": 1.1635327339172363, "lr": 6.461706235767866e-06, "epoch": 1.396551724137931, "percentage": 46.55, "elapsed_time": "0:21:38", "remaining_time": "0:24:50"} +{"current_steps": 1135, "total_steps": 2436, "loss": 1.4063032865524292, "lr": 6.45485175045521e-06, "epoch": 1.3977832512315271, "percentage": 46.59, "elapsed_time": "0:21:39", "remaining_time": "0:24:49"} +{"current_steps": 1136, "total_steps": 2436, "loss": 2.2259998321533203, "lr": 6.447994276752293e-06, "epoch": 1.3990147783251232, "percentage": 46.63, "elapsed_time": "0:21:40", "remaining_time": "0:24:47"} +{"current_steps": 1137, "total_steps": 2436, "loss": 1.2302110195159912, "lr": 6.441133828744954e-06, "epoch": 1.4002463054187193, "percentage": 46.67, "elapsed_time": "0:21:41", "remaining_time": "0:24:46"} +{"current_steps": 1138, "total_steps": 2436, "loss": 1.2579622268676758, "lr": 6.434270420525144e-06, "epoch": 1.4014778325123154, "percentage": 46.72, "elapsed_time": "0:21:42", "remaining_time": "0:24:45"} +{"current_steps": 1139, "total_steps": 2436, "loss": 1.6761397123336792, "lr": 6.427404066190889e-06, "epoch": 1.4027093596059113, "percentage": 46.76, "elapsed_time": "0:21:43", "remaining_time": "0:24:44"} +{"current_steps": 1140, "total_steps": 2436, "loss": 1.3933346271514893, "lr": 6.4205347798462704e-06, "epoch": 1.4039408866995073, "percentage": 46.8, "elapsed_time": "0:21:44", "remaining_time": "0:24:43"} +{"current_steps": 1141, "total_steps": 2436, "loss": 1.9914003610610962, "lr": 6.413662575601391e-06, "epoch": 1.4051724137931034, "percentage": 46.84, "elapsed_time": "0:21:45", "remaining_time": "0:24:41"} +{"current_steps": 1142, "total_steps": 2436, "loss": 1.9921746253967285, "lr": 6.406787467572348e-06, "epoch": 1.4064039408866995, "percentage": 46.88, "elapsed_time": "0:21:46", "remaining_time": "0:24:40"} +{"current_steps": 1143, "total_steps": 2436, "loss": 1.6050479412078857, "lr": 6.3999094698812055e-06, "epoch": 1.4076354679802956, "percentage": 46.92, "elapsed_time": "0:21:47", "remaining_time": "0:24:39"} +{"current_steps": 1144, "total_steps": 2436, "loss": 1.7796251773834229, "lr": 6.393028596655958e-06, "epoch": 1.4088669950738917, "percentage": 46.96, "elapsed_time": "0:21:48", "remaining_time": "0:24:38"} +{"current_steps": 1145, "total_steps": 2436, "loss": 1.7936886548995972, "lr": 6.386144862030508e-06, "epoch": 1.4100985221674878, "percentage": 47.0, "elapsed_time": "0:21:49", "remaining_time": "0:24:36"} +{"current_steps": 1146, "total_steps": 2436, "loss": 1.9030745029449463, "lr": 6.37925828014464e-06, "epoch": 1.4113300492610836, "percentage": 47.04, "elapsed_time": "0:21:51", "remaining_time": "0:24:35"} +{"current_steps": 1147, "total_steps": 2436, "loss": 1.4446496963500977, "lr": 6.3723688651439806e-06, "epoch": 1.4125615763546797, "percentage": 47.09, "elapsed_time": "0:21:52", "remaining_time": "0:24:34"} +{"current_steps": 1148, "total_steps": 2436, "loss": 1.5683763027191162, "lr": 6.365476631179982e-06, "epoch": 1.4137931034482758, "percentage": 47.13, "elapsed_time": "0:21:53", "remaining_time": "0:24:33"} +{"current_steps": 1149, "total_steps": 2436, "loss": 1.4594917297363281, "lr": 6.358581592409881e-06, "epoch": 1.4150246305418719, "percentage": 47.17, "elapsed_time": "0:21:54", "remaining_time": "0:24:32"} +{"current_steps": 1150, "total_steps": 2436, "loss": 2.1706323623657227, "lr": 6.351683762996681e-06, "epoch": 1.416256157635468, "percentage": 47.21, "elapsed_time": "0:21:55", "remaining_time": "0:24:30"} +{"current_steps": 1151, "total_steps": 2436, "loss": 1.835425853729248, "lr": 6.344783157109114e-06, "epoch": 1.417487684729064, "percentage": 47.25, "elapsed_time": "0:21:56", "remaining_time": "0:24:29"} +{"current_steps": 1152, "total_steps": 2436, "loss": 1.1789867877960205, "lr": 6.337879788921615e-06, "epoch": 1.4187192118226601, "percentage": 47.29, "elapsed_time": "0:21:57", "remaining_time": "0:24:28"} +{"current_steps": 1153, "total_steps": 2436, "loss": 1.9750418663024902, "lr": 6.3309736726142965e-06, "epoch": 1.4199507389162562, "percentage": 47.33, "elapsed_time": "0:21:58", "remaining_time": "0:24:27"} +{"current_steps": 1154, "total_steps": 2436, "loss": 1.4960027933120728, "lr": 6.324064822372913e-06, "epoch": 1.4211822660098523, "percentage": 47.37, "elapsed_time": "0:21:59", "remaining_time": "0:24:25"} +{"current_steps": 1155, "total_steps": 2436, "loss": 1.12904691696167, "lr": 6.317153252388834e-06, "epoch": 1.4224137931034484, "percentage": 47.41, "elapsed_time": "0:22:00", "remaining_time": "0:24:24"} +{"current_steps": 1156, "total_steps": 2436, "loss": 1.30333411693573, "lr": 6.31023897685902e-06, "epoch": 1.4236453201970443, "percentage": 47.45, "elapsed_time": "0:22:01", "remaining_time": "0:24:23"} +{"current_steps": 1157, "total_steps": 2436, "loss": 2.5257434844970703, "lr": 6.303322009985984e-06, "epoch": 1.4248768472906403, "percentage": 47.5, "elapsed_time": "0:22:02", "remaining_time": "0:24:22"} +{"current_steps": 1158, "total_steps": 2436, "loss": 0.9684423208236694, "lr": 6.296402365977767e-06, "epoch": 1.4261083743842364, "percentage": 47.54, "elapsed_time": "0:22:03", "remaining_time": "0:24:21"} +{"current_steps": 1159, "total_steps": 2436, "loss": 1.457876443862915, "lr": 6.289480059047915e-06, "epoch": 1.4273399014778325, "percentage": 47.58, "elapsed_time": "0:22:04", "remaining_time": "0:24:19"} +{"current_steps": 1160, "total_steps": 2436, "loss": 1.5206713676452637, "lr": 6.282555103415438e-06, "epoch": 1.4285714285714286, "percentage": 47.62, "elapsed_time": "0:22:06", "remaining_time": "0:24:18"} +{"current_steps": 1161, "total_steps": 2436, "loss": 1.680644154548645, "lr": 6.27562751330479e-06, "epoch": 1.4298029556650247, "percentage": 47.66, "elapsed_time": "0:22:07", "remaining_time": "0:24:17"} +{"current_steps": 1162, "total_steps": 2436, "loss": 1.3704997301101685, "lr": 6.268697302945835e-06, "epoch": 1.4310344827586206, "percentage": 47.7, "elapsed_time": "0:22:08", "remaining_time": "0:24:16"} +{"current_steps": 1163, "total_steps": 2436, "loss": 1.3250343799591064, "lr": 6.261764486573816e-06, "epoch": 1.4322660098522166, "percentage": 47.74, "elapsed_time": "0:22:09", "remaining_time": "0:24:15"} +{"current_steps": 1164, "total_steps": 2436, "loss": 1.8659427165985107, "lr": 6.254829078429336e-06, "epoch": 1.4334975369458127, "percentage": 47.78, "elapsed_time": "0:22:10", "remaining_time": "0:24:13"} +{"current_steps": 1165, "total_steps": 2436, "loss": 2.043597936630249, "lr": 6.247891092758319e-06, "epoch": 1.4347290640394088, "percentage": 47.82, "elapsed_time": "0:22:11", "remaining_time": "0:24:12"} +{"current_steps": 1166, "total_steps": 2436, "loss": 1.5634403228759766, "lr": 6.24095054381198e-06, "epoch": 1.435960591133005, "percentage": 47.87, "elapsed_time": "0:22:12", "remaining_time": "0:24:11"} +{"current_steps": 1167, "total_steps": 2436, "loss": 1.1179373264312744, "lr": 6.2340074458468014e-06, "epoch": 1.437192118226601, "percentage": 47.91, "elapsed_time": "0:22:13", "remaining_time": "0:24:10"} +{"current_steps": 1168, "total_steps": 2436, "loss": 0.8013179302215576, "lr": 6.227061813124504e-06, "epoch": 1.438423645320197, "percentage": 47.95, "elapsed_time": "0:22:14", "remaining_time": "0:24:08"} +{"current_steps": 1169, "total_steps": 2436, "loss": 1.3435392379760742, "lr": 6.220113659912012e-06, "epoch": 1.4396551724137931, "percentage": 47.99, "elapsed_time": "0:22:15", "remaining_time": "0:24:07"} +{"current_steps": 1170, "total_steps": 2436, "loss": 1.39387845993042, "lr": 6.213163000481428e-06, "epoch": 1.4408866995073892, "percentage": 48.03, "elapsed_time": "0:22:16", "remaining_time": "0:24:06"} +{"current_steps": 1171, "total_steps": 2436, "loss": 1.760462760925293, "lr": 6.206209849110001e-06, "epoch": 1.4421182266009853, "percentage": 48.07, "elapsed_time": "0:22:17", "remaining_time": "0:24:05"} +{"current_steps": 1172, "total_steps": 2436, "loss": 1.0812432765960693, "lr": 6.1992542200801035e-06, "epoch": 1.4433497536945814, "percentage": 48.11, "elapsed_time": "0:22:19", "remaining_time": "0:24:04"} +{"current_steps": 1173, "total_steps": 2436, "loss": 1.7997616529464722, "lr": 6.1922961276791925e-06, "epoch": 1.4445812807881773, "percentage": 48.15, "elapsed_time": "0:22:20", "remaining_time": "0:24:02"} +{"current_steps": 1174, "total_steps": 2436, "loss": 1.773369550704956, "lr": 6.1853355861997854e-06, "epoch": 1.4458128078817734, "percentage": 48.19, "elapsed_time": "0:22:21", "remaining_time": "0:24:01"} +{"current_steps": 1175, "total_steps": 2436, "loss": 1.9488962888717651, "lr": 6.1783726099394324e-06, "epoch": 1.4470443349753694, "percentage": 48.23, "elapsed_time": "0:22:22", "remaining_time": "0:24:00"} +{"current_steps": 1176, "total_steps": 2436, "loss": 1.6990149021148682, "lr": 6.171407213200683e-06, "epoch": 1.4482758620689655, "percentage": 48.28, "elapsed_time": "0:22:23", "remaining_time": "0:23:59"} +{"current_steps": 1177, "total_steps": 2436, "loss": 1.4307571649551392, "lr": 6.164439410291061e-06, "epoch": 1.4495073891625616, "percentage": 48.32, "elapsed_time": "0:22:24", "remaining_time": "0:23:58"} +{"current_steps": 1178, "total_steps": 2436, "loss": 1.3966443538665771, "lr": 6.157469215523031e-06, "epoch": 1.4507389162561577, "percentage": 48.36, "elapsed_time": "0:22:25", "remaining_time": "0:23:56"} +{"current_steps": 1179, "total_steps": 2436, "loss": 1.2959253787994385, "lr": 6.150496643213969e-06, "epoch": 1.4519704433497536, "percentage": 48.4, "elapsed_time": "0:22:26", "remaining_time": "0:23:55"} +{"current_steps": 1180, "total_steps": 2436, "loss": 1.4992142915725708, "lr": 6.143521707686137e-06, "epoch": 1.4532019704433496, "percentage": 48.44, "elapsed_time": "0:22:27", "remaining_time": "0:23:54"} +{"current_steps": 1181, "total_steps": 2436, "loss": 1.8196167945861816, "lr": 6.136544423266651e-06, "epoch": 1.4544334975369457, "percentage": 48.48, "elapsed_time": "0:22:28", "remaining_time": "0:23:53"} +{"current_steps": 1182, "total_steps": 2436, "loss": 1.4129021167755127, "lr": 6.129564804287454e-06, "epoch": 1.4556650246305418, "percentage": 48.52, "elapsed_time": "0:22:29", "remaining_time": "0:23:51"} +{"current_steps": 1183, "total_steps": 2436, "loss": 1.2009403705596924, "lr": 6.122582865085278e-06, "epoch": 1.456896551724138, "percentage": 48.56, "elapsed_time": "0:22:30", "remaining_time": "0:23:50"} +{"current_steps": 1184, "total_steps": 2436, "loss": 1.698556661605835, "lr": 6.115598620001627e-06, "epoch": 1.458128078817734, "percentage": 48.6, "elapsed_time": "0:22:31", "remaining_time": "0:23:49"} +{"current_steps": 1185, "total_steps": 2436, "loss": 1.5819299221038818, "lr": 6.108612083382739e-06, "epoch": 1.45935960591133, "percentage": 48.65, "elapsed_time": "0:22:32", "remaining_time": "0:23:48"} +{"current_steps": 1186, "total_steps": 2436, "loss": 1.374379277229309, "lr": 6.101623269579558e-06, "epoch": 1.4605911330049262, "percentage": 48.69, "elapsed_time": "0:22:34", "remaining_time": "0:23:47"} +{"current_steps": 1187, "total_steps": 2436, "loss": 1.2765707969665527, "lr": 6.094632192947711e-06, "epoch": 1.4618226600985222, "percentage": 48.73, "elapsed_time": "0:22:35", "remaining_time": "0:23:45"} +{"current_steps": 1188, "total_steps": 2436, "loss": 1.2740705013275146, "lr": 6.087638867847465e-06, "epoch": 1.4630541871921183, "percentage": 48.77, "elapsed_time": "0:22:36", "remaining_time": "0:23:44"} +{"current_steps": 1189, "total_steps": 2436, "loss": 1.6713453531265259, "lr": 6.08064330864371e-06, "epoch": 1.4642857142857144, "percentage": 48.81, "elapsed_time": "0:22:37", "remaining_time": "0:23:43"} +{"current_steps": 1190, "total_steps": 2436, "loss": 1.6606531143188477, "lr": 6.073645529705926e-06, "epoch": 1.4655172413793103, "percentage": 48.85, "elapsed_time": "0:22:38", "remaining_time": "0:23:42"} +{"current_steps": 1191, "total_steps": 2436, "loss": 1.6029870510101318, "lr": 6.066645545408149e-06, "epoch": 1.4667487684729064, "percentage": 48.89, "elapsed_time": "0:22:39", "remaining_time": "0:23:41"} +{"current_steps": 1192, "total_steps": 2436, "loss": 1.5884819030761719, "lr": 6.0596433701289506e-06, "epoch": 1.4679802955665024, "percentage": 48.93, "elapsed_time": "0:22:40", "remaining_time": "0:23:39"} +{"current_steps": 1193, "total_steps": 2436, "loss": 1.060668706893921, "lr": 6.052639018251394e-06, "epoch": 1.4692118226600985, "percentage": 48.97, "elapsed_time": "0:22:41", "remaining_time": "0:23:38"} +{"current_steps": 1194, "total_steps": 2436, "loss": 1.6251329183578491, "lr": 6.045632504163024e-06, "epoch": 1.4704433497536946, "percentage": 49.01, "elapsed_time": "0:22:42", "remaining_time": "0:23:37"} +{"current_steps": 1195, "total_steps": 2436, "loss": 1.2369989156723022, "lr": 6.03862384225582e-06, "epoch": 1.4716748768472907, "percentage": 49.06, "elapsed_time": "0:22:43", "remaining_time": "0:23:36"} +{"current_steps": 1196, "total_steps": 2436, "loss": 1.7742527723312378, "lr": 6.0316130469261705e-06, "epoch": 1.4729064039408866, "percentage": 49.1, "elapsed_time": "0:22:44", "remaining_time": "0:23:35"} +{"current_steps": 1197, "total_steps": 2436, "loss": 2.166492223739624, "lr": 6.024600132574855e-06, "epoch": 1.4741379310344827, "percentage": 49.14, "elapsed_time": "0:22:45", "remaining_time": "0:23:33"} +{"current_steps": 1198, "total_steps": 2436, "loss": 1.8031083345413208, "lr": 6.017585113606999e-06, "epoch": 1.4753694581280787, "percentage": 49.18, "elapsed_time": "0:22:46", "remaining_time": "0:23:32"} +{"current_steps": 1199, "total_steps": 2436, "loss": 1.9966365098953247, "lr": 6.010568004432055e-06, "epoch": 1.4766009852216748, "percentage": 49.22, "elapsed_time": "0:22:48", "remaining_time": "0:23:31"} +{"current_steps": 1200, "total_steps": 2436, "loss": 1.0125515460968018, "lr": 6.0035488194637645e-06, "epoch": 1.477832512315271, "percentage": 49.26, "elapsed_time": "0:22:49", "remaining_time": "0:23:30"} +{"current_steps": 1201, "total_steps": 2436, "loss": 1.1396842002868652, "lr": 5.9965275731201364e-06, "epoch": 1.479064039408867, "percentage": 49.3, "elapsed_time": "0:22:50", "remaining_time": "0:23:29"} +{"current_steps": 1202, "total_steps": 2436, "loss": 1.8030388355255127, "lr": 5.9895042798234125e-06, "epoch": 1.480295566502463, "percentage": 49.34, "elapsed_time": "0:22:51", "remaining_time": "0:23:27"} +{"current_steps": 1203, "total_steps": 2436, "loss": 1.4132026433944702, "lr": 5.982478954000042e-06, "epoch": 1.4815270935960592, "percentage": 49.38, "elapsed_time": "0:22:52", "remaining_time": "0:23:26"} +{"current_steps": 1204, "total_steps": 2436, "loss": 1.3726825714111328, "lr": 5.975451610080643e-06, "epoch": 1.4827586206896552, "percentage": 49.43, "elapsed_time": "0:22:53", "remaining_time": "0:23:25"} +{"current_steps": 1205, "total_steps": 2436, "loss": 2.3436193466186523, "lr": 5.968422262499983e-06, "epoch": 1.4839901477832513, "percentage": 49.47, "elapsed_time": "0:22:54", "remaining_time": "0:23:24"} +{"current_steps": 1206, "total_steps": 2436, "loss": 1.4617420434951782, "lr": 5.961390925696947e-06, "epoch": 1.4852216748768474, "percentage": 49.51, "elapsed_time": "0:22:55", "remaining_time": "0:23:23"} +{"current_steps": 1207, "total_steps": 2436, "loss": 1.8050814867019653, "lr": 5.9543576141145035e-06, "epoch": 1.4864532019704433, "percentage": 49.55, "elapsed_time": "0:22:56", "remaining_time": "0:23:21"} +{"current_steps": 1208, "total_steps": 2436, "loss": 1.3426543474197388, "lr": 5.947322342199674e-06, "epoch": 1.4876847290640394, "percentage": 49.59, "elapsed_time": "0:22:57", "remaining_time": "0:23:20"} +{"current_steps": 1209, "total_steps": 2436, "loss": 1.6211771965026855, "lr": 5.940285124403517e-06, "epoch": 1.4889162561576355, "percentage": 49.63, "elapsed_time": "0:22:58", "remaining_time": "0:23:19"} +{"current_steps": 1210, "total_steps": 2436, "loss": 2.695863723754883, "lr": 5.933245975181074e-06, "epoch": 1.4901477832512315, "percentage": 49.67, "elapsed_time": "0:22:59", "remaining_time": "0:23:18"} +{"current_steps": 1211, "total_steps": 2436, "loss": 1.2743788957595825, "lr": 5.926204908991366e-06, "epoch": 1.4913793103448276, "percentage": 49.71, "elapsed_time": "0:23:01", "remaining_time": "0:23:17"} +{"current_steps": 1212, "total_steps": 2436, "loss": 1.652765154838562, "lr": 5.919161940297346e-06, "epoch": 1.4926108374384237, "percentage": 49.75, "elapsed_time": "0:23:02", "remaining_time": "0:23:15"} +{"current_steps": 1213, "total_steps": 2436, "loss": 1.3720670938491821, "lr": 5.912117083565874e-06, "epoch": 1.4938423645320196, "percentage": 49.79, "elapsed_time": "0:23:03", "remaining_time": "0:23:14"} +{"current_steps": 1214, "total_steps": 2436, "loss": 1.222616195678711, "lr": 5.905070353267692e-06, "epoch": 1.4950738916256157, "percentage": 49.84, "elapsed_time": "0:23:04", "remaining_time": "0:23:13"} +{"current_steps": 1215, "total_steps": 2436, "loss": 1.4626069068908691, "lr": 5.898021763877388e-06, "epoch": 1.4963054187192117, "percentage": 49.88, "elapsed_time": "0:23:05", "remaining_time": "0:23:12"} +{"current_steps": 1216, "total_steps": 2436, "loss": 1.7813634872436523, "lr": 5.890971329873366e-06, "epoch": 1.4975369458128078, "percentage": 49.92, "elapsed_time": "0:23:06", "remaining_time": "0:23:11"} +{"current_steps": 1217, "total_steps": 2436, "loss": 0.5114675760269165, "lr": 5.883919065737827e-06, "epoch": 1.498768472906404, "percentage": 49.96, "elapsed_time": "0:23:07", "remaining_time": "0:23:09"} +{"current_steps": 1218, "total_steps": 2436, "loss": 1.6000962257385254, "lr": 5.876864985956722e-06, "epoch": 1.5, "percentage": 50.0, "elapsed_time": "0:23:08", "remaining_time": "0:23:08"} +{"current_steps": 1219, "total_steps": 2436, "loss": 1.5674512386322021, "lr": 5.869809105019738e-06, "epoch": 1.501231527093596, "percentage": 50.04, "elapsed_time": "0:23:09", "remaining_time": "0:23:07"} +{"current_steps": 1220, "total_steps": 2436, "loss": 1.7963311672210693, "lr": 5.8627514374202596e-06, "epoch": 1.5024630541871922, "percentage": 50.08, "elapsed_time": "0:23:10", "remaining_time": "0:23:06"} +{"current_steps": 1221, "total_steps": 2436, "loss": 1.1649596691131592, "lr": 5.85569199765534e-06, "epoch": 1.5036945812807883, "percentage": 50.12, "elapsed_time": "0:23:11", "remaining_time": "0:23:05"} +{"current_steps": 1222, "total_steps": 2436, "loss": 1.140197992324829, "lr": 5.848630800225678e-06, "epoch": 1.5049261083743843, "percentage": 50.16, "elapsed_time": "0:23:12", "remaining_time": "0:23:03"} +{"current_steps": 1223, "total_steps": 2436, "loss": 1.865435242652893, "lr": 5.841567859635572e-06, "epoch": 1.5061576354679804, "percentage": 50.21, "elapsed_time": "0:23:14", "remaining_time": "0:23:02"} +{"current_steps": 1224, "total_steps": 2436, "loss": 1.457642912864685, "lr": 5.834503190392912e-06, "epoch": 1.5073891625615765, "percentage": 50.25, "elapsed_time": "0:23:15", "remaining_time": "0:23:01"} +{"current_steps": 1225, "total_steps": 2436, "loss": 1.3783336877822876, "lr": 5.827436807009133e-06, "epoch": 1.5086206896551724, "percentage": 50.29, "elapsed_time": "0:23:16", "remaining_time": "0:23:00"} +{"current_steps": 1226, "total_steps": 2436, "loss": 1.939549207687378, "lr": 5.8203687239991935e-06, "epoch": 1.5098522167487685, "percentage": 50.33, "elapsed_time": "0:23:17", "remaining_time": "0:22:59"} +{"current_steps": 1227, "total_steps": 2436, "loss": 1.3607597351074219, "lr": 5.813298955881542e-06, "epoch": 1.5110837438423645, "percentage": 50.37, "elapsed_time": "0:23:18", "remaining_time": "0:22:57"} +{"current_steps": 1228, "total_steps": 2436, "loss": 0.81966233253479, "lr": 5.806227517178089e-06, "epoch": 1.5123152709359606, "percentage": 50.41, "elapsed_time": "0:23:19", "remaining_time": "0:22:56"} +{"current_steps": 1229, "total_steps": 2436, "loss": 0.9481602311134338, "lr": 5.799154422414174e-06, "epoch": 1.5135467980295565, "percentage": 50.45, "elapsed_time": "0:23:20", "remaining_time": "0:22:55"} +{"current_steps": 1230, "total_steps": 2436, "loss": 1.3550889492034912, "lr": 5.79207968611854e-06, "epoch": 1.5147783251231526, "percentage": 50.49, "elapsed_time": "0:23:21", "remaining_time": "0:22:54"} +{"current_steps": 1231, "total_steps": 2436, "loss": 2.022425889968872, "lr": 5.785003322823307e-06, "epoch": 1.5160098522167487, "percentage": 50.53, "elapsed_time": "0:23:22", "remaining_time": "0:22:53"} +{"current_steps": 1232, "total_steps": 2436, "loss": 1.5649950504302979, "lr": 5.777925347063927e-06, "epoch": 1.5172413793103448, "percentage": 50.57, "elapsed_time": "0:23:23", "remaining_time": "0:22:51"} +{"current_steps": 1233, "total_steps": 2436, "loss": 1.9720977544784546, "lr": 5.7708457733791715e-06, "epoch": 1.5184729064039408, "percentage": 50.62, "elapsed_time": "0:23:24", "remaining_time": "0:22:50"} +{"current_steps": 1234, "total_steps": 2436, "loss": 1.0029213428497314, "lr": 5.763764616311089e-06, "epoch": 1.519704433497537, "percentage": 50.66, "elapsed_time": "0:23:25", "remaining_time": "0:22:49"} +{"current_steps": 1235, "total_steps": 2436, "loss": 1.8926727771759033, "lr": 5.756681890404987e-06, "epoch": 1.520935960591133, "percentage": 50.7, "elapsed_time": "0:23:27", "remaining_time": "0:22:48"} +{"current_steps": 1236, "total_steps": 2436, "loss": 1.462761402130127, "lr": 5.749597610209392e-06, "epoch": 1.522167487684729, "percentage": 50.74, "elapsed_time": "0:23:28", "remaining_time": "0:22:47"} +{"current_steps": 1237, "total_steps": 2436, "loss": 2.1467416286468506, "lr": 5.7425117902760195e-06, "epoch": 1.5233990147783252, "percentage": 50.78, "elapsed_time": "0:23:29", "remaining_time": "0:22:45"} +{"current_steps": 1238, "total_steps": 2436, "loss": 1.191473364830017, "lr": 5.7354244451597545e-06, "epoch": 1.5246305418719213, "percentage": 50.82, "elapsed_time": "0:23:30", "remaining_time": "0:22:44"} +{"current_steps": 1239, "total_steps": 2436, "loss": 0.896723210811615, "lr": 5.72833558941861e-06, "epoch": 1.5258620689655173, "percentage": 50.86, "elapsed_time": "0:23:31", "remaining_time": "0:22:43"} +{"current_steps": 1240, "total_steps": 2436, "loss": 0.8741526007652283, "lr": 5.721245237613704e-06, "epoch": 1.5270935960591134, "percentage": 50.9, "elapsed_time": "0:23:32", "remaining_time": "0:22:42"} +{"current_steps": 1241, "total_steps": 2436, "loss": 1.6330994367599487, "lr": 5.714153404309228e-06, "epoch": 1.5283251231527095, "percentage": 50.94, "elapsed_time": "0:23:33", "remaining_time": "0:22:41"} +{"current_steps": 1242, "total_steps": 2436, "loss": 2.2386982440948486, "lr": 5.707060104072415e-06, "epoch": 1.5295566502463054, "percentage": 50.99, "elapsed_time": "0:23:34", "remaining_time": "0:22:39"} +{"current_steps": 1243, "total_steps": 2436, "loss": 1.5266145467758179, "lr": 5.6999653514735124e-06, "epoch": 1.5307881773399015, "percentage": 51.03, "elapsed_time": "0:23:35", "remaining_time": "0:22:38"} +{"current_steps": 1244, "total_steps": 2436, "loss": 1.4918262958526611, "lr": 5.6928691610857515e-06, "epoch": 1.5320197044334976, "percentage": 51.07, "elapsed_time": "0:23:36", "remaining_time": "0:22:37"} +{"current_steps": 1245, "total_steps": 2436, "loss": 1.241945743560791, "lr": 5.685771547485312e-06, "epoch": 1.5332512315270936, "percentage": 51.11, "elapsed_time": "0:23:37", "remaining_time": "0:22:36"} +{"current_steps": 1246, "total_steps": 2436, "loss": 1.1569273471832275, "lr": 5.678672525251304e-06, "epoch": 1.5344827586206895, "percentage": 51.15, "elapsed_time": "0:23:38", "remaining_time": "0:22:35"} +{"current_steps": 1247, "total_steps": 2436, "loss": 1.946014404296875, "lr": 5.671572108965729e-06, "epoch": 1.5357142857142856, "percentage": 51.19, "elapsed_time": "0:23:39", "remaining_time": "0:22:33"} +{"current_steps": 1248, "total_steps": 2436, "loss": 1.8601741790771484, "lr": 5.664470313213448e-06, "epoch": 1.5369458128078817, "percentage": 51.23, "elapsed_time": "0:23:41", "remaining_time": "0:22:32"} +{"current_steps": 1249, "total_steps": 2436, "loss": 1.7164549827575684, "lr": 5.65736715258216e-06, "epoch": 1.5381773399014778, "percentage": 51.27, "elapsed_time": "0:23:42", "remaining_time": "0:22:31"} +{"current_steps": 1250, "total_steps": 2436, "loss": 2.0459697246551514, "lr": 5.650262641662367e-06, "epoch": 1.5394088669950738, "percentage": 51.31, "elapsed_time": "0:23:43", "remaining_time": "0:22:30"} +{"current_steps": 1251, "total_steps": 2436, "loss": 1.4485859870910645, "lr": 5.643156795047343e-06, "epoch": 1.54064039408867, "percentage": 51.35, "elapsed_time": "0:23:44", "remaining_time": "0:22:29"} +{"current_steps": 1252, "total_steps": 2436, "loss": 1.8672525882720947, "lr": 5.6360496273331055e-06, "epoch": 1.541871921182266, "percentage": 51.4, "elapsed_time": "0:23:45", "remaining_time": "0:22:27"} +{"current_steps": 1253, "total_steps": 2436, "loss": 1.4309324026107788, "lr": 5.628941153118388e-06, "epoch": 1.543103448275862, "percentage": 51.44, "elapsed_time": "0:23:46", "remaining_time": "0:22:26"} +{"current_steps": 1254, "total_steps": 2436, "loss": 1.8784745931625366, "lr": 5.621831387004603e-06, "epoch": 1.5443349753694582, "percentage": 51.48, "elapsed_time": "0:23:47", "remaining_time": "0:22:25"} +{"current_steps": 1255, "total_steps": 2436, "loss": 2.109992027282715, "lr": 5.6147203435958246e-06, "epoch": 1.5455665024630543, "percentage": 51.52, "elapsed_time": "0:23:48", "remaining_time": "0:22:24"} +{"current_steps": 1256, "total_steps": 2436, "loss": 1.5892071723937988, "lr": 5.607608037498742e-06, "epoch": 1.5467980295566504, "percentage": 51.56, "elapsed_time": "0:23:49", "remaining_time": "0:22:23"} +{"current_steps": 1257, "total_steps": 2436, "loss": 1.3583379983901978, "lr": 5.600494483322643e-06, "epoch": 1.5480295566502464, "percentage": 51.6, "elapsed_time": "0:23:50", "remaining_time": "0:22:21"} +{"current_steps": 1258, "total_steps": 2436, "loss": 2.126896381378174, "lr": 5.593379695679378e-06, "epoch": 1.5492610837438425, "percentage": 51.64, "elapsed_time": "0:23:51", "remaining_time": "0:22:20"} +{"current_steps": 1259, "total_steps": 2436, "loss": 1.7454299926757812, "lr": 5.586263689183332e-06, "epoch": 1.5504926108374384, "percentage": 51.68, "elapsed_time": "0:23:52", "remaining_time": "0:22:19"} +{"current_steps": 1260, "total_steps": 2436, "loss": 1.1533763408660889, "lr": 5.5791464784513905e-06, "epoch": 1.5517241379310345, "percentage": 51.72, "elapsed_time": "0:23:53", "remaining_time": "0:22:18"} +{"current_steps": 1261, "total_steps": 2436, "loss": 1.4818049669265747, "lr": 5.572028078102917e-06, "epoch": 1.5529556650246306, "percentage": 51.77, "elapsed_time": "0:23:55", "remaining_time": "0:22:17"} +{"current_steps": 1262, "total_steps": 2436, "loss": 1.7103283405303955, "lr": 5.564908502759714e-06, "epoch": 1.5541871921182266, "percentage": 51.81, "elapsed_time": "0:23:56", "remaining_time": "0:22:15"} +{"current_steps": 1263, "total_steps": 2436, "loss": 2.1653401851654053, "lr": 5.557787767046001e-06, "epoch": 1.5554187192118225, "percentage": 51.85, "elapsed_time": "0:23:57", "remaining_time": "0:22:14"} +{"current_steps": 1264, "total_steps": 2436, "loss": 1.3127275705337524, "lr": 5.55066588558838e-06, "epoch": 1.5566502463054186, "percentage": 51.89, "elapsed_time": "0:23:58", "remaining_time": "0:22:13"} +{"current_steps": 1265, "total_steps": 2436, "loss": 1.0865871906280518, "lr": 5.543542873015806e-06, "epoch": 1.5578817733990147, "percentage": 51.93, "elapsed_time": "0:23:59", "remaining_time": "0:22:12"} +{"current_steps": 1266, "total_steps": 2436, "loss": 1.341281533241272, "lr": 5.536418743959559e-06, "epoch": 1.5591133004926108, "percentage": 51.97, "elapsed_time": "0:24:00", "remaining_time": "0:22:11"} +{"current_steps": 1267, "total_steps": 2436, "loss": 1.1612720489501953, "lr": 5.529293513053207e-06, "epoch": 1.5603448275862069, "percentage": 52.01, "elapsed_time": "0:24:01", "remaining_time": "0:22:10"} +{"current_steps": 1268, "total_steps": 2436, "loss": 1.7491642236709595, "lr": 5.522167194932588e-06, "epoch": 1.561576354679803, "percentage": 52.05, "elapsed_time": "0:24:02", "remaining_time": "0:22:08"} +{"current_steps": 1269, "total_steps": 2436, "loss": 1.8244414329528809, "lr": 5.515039804235772e-06, "epoch": 1.562807881773399, "percentage": 52.09, "elapsed_time": "0:24:03", "remaining_time": "0:22:07"} +{"current_steps": 1270, "total_steps": 2436, "loss": 1.6449997425079346, "lr": 5.50791135560303e-06, "epoch": 1.564039408866995, "percentage": 52.13, "elapsed_time": "0:24:04", "remaining_time": "0:22:06"} +{"current_steps": 1271, "total_steps": 2436, "loss": 1.258559226989746, "lr": 5.5007818636768055e-06, "epoch": 1.5652709359605912, "percentage": 52.18, "elapsed_time": "0:24:05", "remaining_time": "0:22:05"} +{"current_steps": 1272, "total_steps": 2436, "loss": 2.075775146484375, "lr": 5.493651343101686e-06, "epoch": 1.5665024630541873, "percentage": 52.22, "elapsed_time": "0:24:06", "remaining_time": "0:22:04"} +{"current_steps": 1273, "total_steps": 2436, "loss": 1.8196138143539429, "lr": 5.486519808524374e-06, "epoch": 1.5677339901477834, "percentage": 52.26, "elapsed_time": "0:24:07", "remaining_time": "0:22:02"} +{"current_steps": 1274, "total_steps": 2436, "loss": 1.129037618637085, "lr": 5.479387274593653e-06, "epoch": 1.5689655172413794, "percentage": 52.3, "elapsed_time": "0:24:09", "remaining_time": "0:22:01"} +{"current_steps": 1275, "total_steps": 2436, "loss": 1.7367748022079468, "lr": 5.472253755960358e-06, "epoch": 1.5701970443349755, "percentage": 52.34, "elapsed_time": "0:24:10", "remaining_time": "0:22:00"} +{"current_steps": 1276, "total_steps": 2436, "loss": 1.9274532794952393, "lr": 5.4651192672773475e-06, "epoch": 1.5714285714285714, "percentage": 52.38, "elapsed_time": "0:24:11", "remaining_time": "0:21:59"} +{"current_steps": 1277, "total_steps": 2436, "loss": 1.4018654823303223, "lr": 5.457983823199475e-06, "epoch": 1.5726600985221675, "percentage": 52.42, "elapsed_time": "0:24:12", "remaining_time": "0:21:58"} +{"current_steps": 1278, "total_steps": 2436, "loss": 1.383131504058838, "lr": 5.450847438383555e-06, "epoch": 1.5738916256157636, "percentage": 52.46, "elapsed_time": "0:24:13", "remaining_time": "0:21:56"} +{"current_steps": 1279, "total_steps": 2436, "loss": 1.277740716934204, "lr": 5.443710127488331e-06, "epoch": 1.5751231527093597, "percentage": 52.5, "elapsed_time": "0:24:14", "remaining_time": "0:21:55"} +{"current_steps": 1280, "total_steps": 2436, "loss": 1.507627010345459, "lr": 5.4365719051744556e-06, "epoch": 1.5763546798029555, "percentage": 52.55, "elapsed_time": "0:24:15", "remaining_time": "0:21:54"} +{"current_steps": 1281, "total_steps": 2436, "loss": 1.609743595123291, "lr": 5.429432786104446e-06, "epoch": 1.5775862068965516, "percentage": 52.59, "elapsed_time": "0:24:16", "remaining_time": "0:21:53"} +{"current_steps": 1282, "total_steps": 2436, "loss": 3.7705276012420654, "lr": 5.422292784942666e-06, "epoch": 1.5788177339901477, "percentage": 52.63, "elapsed_time": "0:24:17", "remaining_time": "0:21:52"} +{"current_steps": 1283, "total_steps": 2436, "loss": 1.5003160238265991, "lr": 5.415151916355292e-06, "epoch": 1.5800492610837438, "percentage": 52.67, "elapsed_time": "0:24:18", "remaining_time": "0:21:50"} +{"current_steps": 1284, "total_steps": 2436, "loss": 2.2466366291046143, "lr": 5.408010195010278e-06, "epoch": 1.5812807881773399, "percentage": 52.71, "elapsed_time": "0:24:19", "remaining_time": "0:21:49"} +{"current_steps": 1285, "total_steps": 2436, "loss": 1.0722277164459229, "lr": 5.400867635577335e-06, "epoch": 1.582512315270936, "percentage": 52.75, "elapsed_time": "0:24:20", "remaining_time": "0:21:48"} +{"current_steps": 1286, "total_steps": 2436, "loss": 1.3113644123077393, "lr": 5.3937242527278885e-06, "epoch": 1.583743842364532, "percentage": 52.79, "elapsed_time": "0:24:22", "remaining_time": "0:21:47"} +{"current_steps": 1287, "total_steps": 2436, "loss": 1.4688694477081299, "lr": 5.3865800611350634e-06, "epoch": 1.5849753694581281, "percentage": 52.83, "elapsed_time": "0:24:23", "remaining_time": "0:21:46"} +{"current_steps": 1288, "total_steps": 2436, "loss": 1.3646764755249023, "lr": 5.379435075473641e-06, "epoch": 1.5862068965517242, "percentage": 52.87, "elapsed_time": "0:24:24", "remaining_time": "0:21:45"} +{"current_steps": 1289, "total_steps": 2436, "loss": 1.6248177289962769, "lr": 5.372289310420032e-06, "epoch": 1.5874384236453203, "percentage": 52.91, "elapsed_time": "0:24:25", "remaining_time": "0:21:43"} +{"current_steps": 1290, "total_steps": 2436, "loss": 1.5507471561431885, "lr": 5.365142780652255e-06, "epoch": 1.5886699507389164, "percentage": 52.96, "elapsed_time": "0:24:26", "remaining_time": "0:21:42"} +{"current_steps": 1291, "total_steps": 2436, "loss": 1.2866086959838867, "lr": 5.35799550084989e-06, "epoch": 1.5899014778325125, "percentage": 53.0, "elapsed_time": "0:24:27", "remaining_time": "0:21:41"} +{"current_steps": 1292, "total_steps": 2436, "loss": 2.336108684539795, "lr": 5.350847485694067e-06, "epoch": 1.5911330049261085, "percentage": 53.04, "elapsed_time": "0:24:28", "remaining_time": "0:21:40"} +{"current_steps": 1293, "total_steps": 2436, "loss": 1.6604368686676025, "lr": 5.343698749867421e-06, "epoch": 1.5923645320197044, "percentage": 53.08, "elapsed_time": "0:24:29", "remaining_time": "0:21:39"} +{"current_steps": 1294, "total_steps": 2436, "loss": 1.2169203758239746, "lr": 5.336549308054066e-06, "epoch": 1.5935960591133005, "percentage": 53.12, "elapsed_time": "0:24:30", "remaining_time": "0:21:37"} +{"current_steps": 1295, "total_steps": 2436, "loss": 1.546027421951294, "lr": 5.329399174939572e-06, "epoch": 1.5948275862068966, "percentage": 53.16, "elapsed_time": "0:24:31", "remaining_time": "0:21:36"} +{"current_steps": 1296, "total_steps": 2436, "loss": 1.1372979879379272, "lr": 5.3222483652109235e-06, "epoch": 1.5960591133004927, "percentage": 53.2, "elapsed_time": "0:24:32", "remaining_time": "0:21:35"} +{"current_steps": 1297, "total_steps": 2436, "loss": 1.3435921669006348, "lr": 5.315096893556497e-06, "epoch": 1.5972906403940885, "percentage": 53.24, "elapsed_time": "0:24:33", "remaining_time": "0:21:34"} +{"current_steps": 1298, "total_steps": 2436, "loss": 1.522647500038147, "lr": 5.307944774666029e-06, "epoch": 1.5985221674876846, "percentage": 53.28, "elapsed_time": "0:24:35", "remaining_time": "0:21:33"} +{"current_steps": 1299, "total_steps": 2436, "loss": 2.0829434394836426, "lr": 5.300792023230587e-06, "epoch": 1.5997536945812807, "percentage": 53.33, "elapsed_time": "0:24:36", "remaining_time": "0:21:32"} +{"current_steps": 1300, "total_steps": 2436, "loss": 1.761828064918518, "lr": 5.2936386539425325e-06, "epoch": 1.6009852216748768, "percentage": 53.37, "elapsed_time": "0:24:37", "remaining_time": "0:21:30"} +{"current_steps": 1301, "total_steps": 2436, "loss": 2.4108588695526123, "lr": 5.2864846814955e-06, "epoch": 1.6022167487684729, "percentage": 53.41, "elapsed_time": "0:24:38", "remaining_time": "0:21:29"} +{"current_steps": 1302, "total_steps": 2436, "loss": 1.626701831817627, "lr": 5.279330120584365e-06, "epoch": 1.603448275862069, "percentage": 53.45, "elapsed_time": "0:24:39", "remaining_time": "0:21:28"} +{"current_steps": 1303, "total_steps": 2436, "loss": 1.2424887418746948, "lr": 5.272174985905207e-06, "epoch": 1.604679802955665, "percentage": 53.49, "elapsed_time": "0:24:40", "remaining_time": "0:21:27"} +{"current_steps": 1304, "total_steps": 2436, "loss": 2.149031639099121, "lr": 5.2650192921552845e-06, "epoch": 1.6059113300492611, "percentage": 53.53, "elapsed_time": "0:24:41", "remaining_time": "0:21:26"} +{"current_steps": 1305, "total_steps": 2436, "loss": 2.6947379112243652, "lr": 5.257863054033012e-06, "epoch": 1.6071428571428572, "percentage": 53.57, "elapsed_time": "0:24:42", "remaining_time": "0:21:24"} +{"current_steps": 1306, "total_steps": 2436, "loss": 1.665069818496704, "lr": 5.25070628623791e-06, "epoch": 1.6083743842364533, "percentage": 53.61, "elapsed_time": "0:24:43", "remaining_time": "0:21:23"} +{"current_steps": 1307, "total_steps": 2436, "loss": 1.3887734413146973, "lr": 5.243549003470599e-06, "epoch": 1.6096059113300494, "percentage": 53.65, "elapsed_time": "0:24:44", "remaining_time": "0:21:22"} +{"current_steps": 1308, "total_steps": 2436, "loss": 1.340559720993042, "lr": 5.236391220432745e-06, "epoch": 1.6108374384236455, "percentage": 53.69, "elapsed_time": "0:24:45", "remaining_time": "0:21:21"} +{"current_steps": 1309, "total_steps": 2436, "loss": 1.1291146278381348, "lr": 5.229232951827054e-06, "epoch": 1.6120689655172413, "percentage": 53.74, "elapsed_time": "0:24:46", "remaining_time": "0:21:20"} +{"current_steps": 1310, "total_steps": 2436, "loss": 1.8375647068023682, "lr": 5.222074212357221e-06, "epoch": 1.6133004926108374, "percentage": 53.78, "elapsed_time": "0:24:47", "remaining_time": "0:21:18"} +{"current_steps": 1311, "total_steps": 2436, "loss": 1.3299870491027832, "lr": 5.2149150167279106e-06, "epoch": 1.6145320197044335, "percentage": 53.82, "elapsed_time": "0:24:49", "remaining_time": "0:21:17"} +{"current_steps": 1312, "total_steps": 2436, "loss": 1.1574440002441406, "lr": 5.2077553796447254e-06, "epoch": 1.6157635467980296, "percentage": 53.86, "elapsed_time": "0:24:50", "remaining_time": "0:21:16"} +{"current_steps": 1313, "total_steps": 2436, "loss": 1.8118785619735718, "lr": 5.200595315814174e-06, "epoch": 1.6169950738916257, "percentage": 53.9, "elapsed_time": "0:24:51", "remaining_time": "0:21:15"} +{"current_steps": 1314, "total_steps": 2436, "loss": 1.333923101425171, "lr": 5.19343483994364e-06, "epoch": 1.6182266009852215, "percentage": 53.94, "elapsed_time": "0:24:52", "remaining_time": "0:21:14"} +{"current_steps": 1315, "total_steps": 2436, "loss": 1.2107478380203247, "lr": 5.18627396674136e-06, "epoch": 1.6194581280788176, "percentage": 53.98, "elapsed_time": "0:24:53", "remaining_time": "0:21:13"} +{"current_steps": 1316, "total_steps": 2436, "loss": 1.662817120552063, "lr": 5.1791127109163734e-06, "epoch": 1.6206896551724137, "percentage": 54.02, "elapsed_time": "0:24:54", "remaining_time": "0:21:11"} +{"current_steps": 1317, "total_steps": 2436, "loss": 1.7790195941925049, "lr": 5.17195108717852e-06, "epoch": 1.6219211822660098, "percentage": 54.06, "elapsed_time": "0:24:55", "remaining_time": "0:21:10"} +{"current_steps": 1318, "total_steps": 2436, "loss": 1.5893058776855469, "lr": 5.164789110238387e-06, "epoch": 1.6231527093596059, "percentage": 54.11, "elapsed_time": "0:24:56", "remaining_time": "0:21:09"} +{"current_steps": 1319, "total_steps": 2436, "loss": 1.256395936012268, "lr": 5.15762679480729e-06, "epoch": 1.624384236453202, "percentage": 54.15, "elapsed_time": "0:24:57", "remaining_time": "0:21:08"} +{"current_steps": 1320, "total_steps": 2436, "loss": 1.3061628341674805, "lr": 5.150464155597239e-06, "epoch": 1.625615763546798, "percentage": 54.19, "elapsed_time": "0:24:58", "remaining_time": "0:21:07"} +{"current_steps": 1321, "total_steps": 2436, "loss": 1.4399319887161255, "lr": 5.143301207320909e-06, "epoch": 1.6268472906403941, "percentage": 54.23, "elapsed_time": "0:24:59", "remaining_time": "0:21:05"} +{"current_steps": 1322, "total_steps": 2436, "loss": 1.2071207761764526, "lr": 5.136137964691609e-06, "epoch": 1.6280788177339902, "percentage": 54.27, "elapsed_time": "0:25:00", "remaining_time": "0:21:04"} +{"current_steps": 1323, "total_steps": 2436, "loss": 2.2784008979797363, "lr": 5.128974442423254e-06, "epoch": 1.6293103448275863, "percentage": 54.31, "elapsed_time": "0:25:01", "remaining_time": "0:21:03"} +{"current_steps": 1324, "total_steps": 2436, "loss": 1.3703962564468384, "lr": 5.121810655230336e-06, "epoch": 1.6305418719211824, "percentage": 54.35, "elapsed_time": "0:25:03", "remaining_time": "0:21:02"} +{"current_steps": 1325, "total_steps": 2436, "loss": 0.6955282688140869, "lr": 5.114646617827884e-06, "epoch": 1.6317733990147785, "percentage": 54.39, "elapsed_time": "0:25:04", "remaining_time": "0:21:01"} +{"current_steps": 1326, "total_steps": 2436, "loss": 1.5774227380752563, "lr": 5.107482344931448e-06, "epoch": 1.6330049261083743, "percentage": 54.43, "elapsed_time": "0:25:05", "remaining_time": "0:21:00"} +{"current_steps": 1327, "total_steps": 2436, "loss": 1.6811349391937256, "lr": 5.100317851257057e-06, "epoch": 1.6342364532019704, "percentage": 54.47, "elapsed_time": "0:25:06", "remaining_time": "0:20:58"} +{"current_steps": 1328, "total_steps": 2436, "loss": 1.563596487045288, "lr": 5.093153151521196e-06, "epoch": 1.6354679802955665, "percentage": 54.52, "elapsed_time": "0:25:07", "remaining_time": "0:20:57"} +{"current_steps": 1329, "total_steps": 2436, "loss": 1.44309401512146, "lr": 5.085988260440776e-06, "epoch": 1.6366995073891626, "percentage": 54.56, "elapsed_time": "0:25:08", "remaining_time": "0:20:56"} +{"current_steps": 1330, "total_steps": 2436, "loss": 1.5392205715179443, "lr": 5.0788231927330924e-06, "epoch": 1.6379310344827587, "percentage": 54.6, "elapsed_time": "0:25:09", "remaining_time": "0:20:55"} +{"current_steps": 1331, "total_steps": 2436, "loss": 0.9557719826698303, "lr": 5.0716579631158124e-06, "epoch": 1.6391625615763545, "percentage": 54.64, "elapsed_time": "0:25:10", "remaining_time": "0:20:54"} +{"current_steps": 1332, "total_steps": 2436, "loss": 1.1032493114471436, "lr": 5.064492586306931e-06, "epoch": 1.6403940886699506, "percentage": 54.68, "elapsed_time": "0:25:11", "remaining_time": "0:20:52"} +{"current_steps": 1333, "total_steps": 2436, "loss": 1.4907091856002808, "lr": 5.057327077024745e-06, "epoch": 1.6416256157635467, "percentage": 54.72, "elapsed_time": "0:25:12", "remaining_time": "0:20:51"} +{"current_steps": 1334, "total_steps": 2436, "loss": 1.4919164180755615, "lr": 5.050161449987828e-06, "epoch": 1.6428571428571428, "percentage": 54.76, "elapsed_time": "0:25:13", "remaining_time": "0:20:50"} +{"current_steps": 1335, "total_steps": 2436, "loss": 2.177396297454834, "lr": 5.0429957199149905e-06, "epoch": 1.6440886699507389, "percentage": 54.8, "elapsed_time": "0:25:14", "remaining_time": "0:20:49"} +{"current_steps": 1336, "total_steps": 2436, "loss": 1.2386332750320435, "lr": 5.035829901525258e-06, "epoch": 1.645320197044335, "percentage": 54.84, "elapsed_time": "0:25:15", "remaining_time": "0:20:48"} +{"current_steps": 1337, "total_steps": 2436, "loss": 1.2984986305236816, "lr": 5.028664009537835e-06, "epoch": 1.646551724137931, "percentage": 54.89, "elapsed_time": "0:25:17", "remaining_time": "0:20:47"} +{"current_steps": 1338, "total_steps": 2436, "loss": 1.1399617195129395, "lr": 5.021498058672076e-06, "epoch": 1.6477832512315271, "percentage": 54.93, "elapsed_time": "0:25:18", "remaining_time": "0:20:45"} +{"current_steps": 1339, "total_steps": 2436, "loss": 1.9816789627075195, "lr": 5.014332063647462e-06, "epoch": 1.6490147783251232, "percentage": 54.97, "elapsed_time": "0:25:19", "remaining_time": "0:20:44"} +{"current_steps": 1340, "total_steps": 2436, "loss": 1.4210541248321533, "lr": 5.007166039183561e-06, "epoch": 1.6502463054187193, "percentage": 55.01, "elapsed_time": "0:25:20", "remaining_time": "0:20:43"} +{"current_steps": 1341, "total_steps": 2436, "loss": 1.5061390399932861, "lr": 5e-06, "epoch": 1.6514778325123154, "percentage": 55.05, "elapsed_time": "0:25:21", "remaining_time": "0:20:42"} +{"current_steps": 1342, "total_steps": 2436, "loss": 1.4701118469238281, "lr": 4.99283396081644e-06, "epoch": 1.6527093596059115, "percentage": 55.09, "elapsed_time": "0:25:22", "remaining_time": "0:20:41"} +{"current_steps": 1343, "total_steps": 2436, "loss": 1.4879779815673828, "lr": 4.985667936352538e-06, "epoch": 1.6539408866995073, "percentage": 55.13, "elapsed_time": "0:25:23", "remaining_time": "0:20:39"} +{"current_steps": 1344, "total_steps": 2436, "loss": 1.51373291015625, "lr": 4.978501941327926e-06, "epoch": 1.6551724137931034, "percentage": 55.17, "elapsed_time": "0:25:24", "remaining_time": "0:20:38"} +{"current_steps": 1345, "total_steps": 2436, "loss": 1.5439019203186035, "lr": 4.971335990462168e-06, "epoch": 1.6564039408866995, "percentage": 55.21, "elapsed_time": "0:25:25", "remaining_time": "0:20:37"} +{"current_steps": 1346, "total_steps": 2436, "loss": 1.7145721912384033, "lr": 4.964170098474744e-06, "epoch": 1.6576354679802956, "percentage": 55.25, "elapsed_time": "0:25:26", "remaining_time": "0:20:36"} +{"current_steps": 1347, "total_steps": 2436, "loss": 1.6367833614349365, "lr": 4.95700428008501e-06, "epoch": 1.6588669950738915, "percentage": 55.3, "elapsed_time": "0:25:27", "remaining_time": "0:20:35"} +{"current_steps": 1348, "total_steps": 2436, "loss": 1.4300103187561035, "lr": 4.949838550012172e-06, "epoch": 1.6600985221674875, "percentage": 55.34, "elapsed_time": "0:25:28", "remaining_time": "0:20:34"} +{"current_steps": 1349, "total_steps": 2436, "loss": 2.0569915771484375, "lr": 4.942672922975255e-06, "epoch": 1.6613300492610836, "percentage": 55.38, "elapsed_time": "0:25:30", "remaining_time": "0:20:32"} +{"current_steps": 1350, "total_steps": 2436, "loss": 1.1028980016708374, "lr": 4.935507413693071e-06, "epoch": 1.6625615763546797, "percentage": 55.42, "elapsed_time": "0:25:31", "remaining_time": "0:20:31"} +{"current_steps": 1351, "total_steps": 2436, "loss": 1.6323003768920898, "lr": 4.928342036884189e-06, "epoch": 1.6637931034482758, "percentage": 55.46, "elapsed_time": "0:25:32", "remaining_time": "0:20:30"} +{"current_steps": 1352, "total_steps": 2436, "loss": 1.5050472021102905, "lr": 4.921176807266909e-06, "epoch": 1.6650246305418719, "percentage": 55.5, "elapsed_time": "0:25:33", "remaining_time": "0:20:29"} +{"current_steps": 1353, "total_steps": 2436, "loss": 1.3893849849700928, "lr": 4.914011739559225e-06, "epoch": 1.666256157635468, "percentage": 55.54, "elapsed_time": "0:25:34", "remaining_time": "0:20:28"} +{"current_steps": 1354, "total_steps": 2436, "loss": 1.1478514671325684, "lr": 4.906846848478803e-06, "epoch": 1.667487684729064, "percentage": 55.58, "elapsed_time": "0:25:35", "remaining_time": "0:20:27"} +{"current_steps": 1355, "total_steps": 2436, "loss": 1.2397665977478027, "lr": 4.899682148742944e-06, "epoch": 1.6687192118226601, "percentage": 55.62, "elapsed_time": "0:25:36", "remaining_time": "0:20:25"} +{"current_steps": 1356, "total_steps": 2436, "loss": 1.1658974885940552, "lr": 4.892517655068555e-06, "epoch": 1.6699507389162562, "percentage": 55.67, "elapsed_time": "0:25:37", "remaining_time": "0:20:24"} +{"current_steps": 1357, "total_steps": 2436, "loss": 1.7130283117294312, "lr": 4.8853533821721175e-06, "epoch": 1.6711822660098523, "percentage": 55.71, "elapsed_time": "0:25:38", "remaining_time": "0:20:23"} +{"current_steps": 1358, "total_steps": 2436, "loss": 0.9516315460205078, "lr": 4.878189344769666e-06, "epoch": 1.6724137931034484, "percentage": 55.75, "elapsed_time": "0:25:39", "remaining_time": "0:20:22"} +{"current_steps": 1359, "total_steps": 2436, "loss": 1.143174171447754, "lr": 4.871025557576747e-06, "epoch": 1.6736453201970445, "percentage": 55.79, "elapsed_time": "0:25:40", "remaining_time": "0:20:21"} +{"current_steps": 1360, "total_steps": 2436, "loss": 1.7117831707000732, "lr": 4.863862035308392e-06, "epoch": 1.6748768472906403, "percentage": 55.83, "elapsed_time": "0:25:41", "remaining_time": "0:20:19"} +{"current_steps": 1361, "total_steps": 2436, "loss": 2.507868528366089, "lr": 4.8566987926790946e-06, "epoch": 1.6761083743842364, "percentage": 55.87, "elapsed_time": "0:25:43", "remaining_time": "0:20:18"} +{"current_steps": 1362, "total_steps": 2436, "loss": 1.476400375366211, "lr": 4.849535844402762e-06, "epoch": 1.6773399014778325, "percentage": 55.91, "elapsed_time": "0:25:44", "remaining_time": "0:20:17"} +{"current_steps": 1363, "total_steps": 2436, "loss": 1.3162943124771118, "lr": 4.8423732051927115e-06, "epoch": 1.6785714285714286, "percentage": 55.95, "elapsed_time": "0:25:45", "remaining_time": "0:20:16"} +{"current_steps": 1364, "total_steps": 2436, "loss": 2.2291440963745117, "lr": 4.835210889761614e-06, "epoch": 1.6798029556650245, "percentage": 55.99, "elapsed_time": "0:25:46", "remaining_time": "0:20:15"} +{"current_steps": 1365, "total_steps": 2436, "loss": 1.2231886386871338, "lr": 4.82804891282148e-06, "epoch": 1.6810344827586206, "percentage": 56.03, "elapsed_time": "0:25:47", "remaining_time": "0:20:14"} +{"current_steps": 1366, "total_steps": 2436, "loss": 1.3799304962158203, "lr": 4.820887289083629e-06, "epoch": 1.6822660098522166, "percentage": 56.08, "elapsed_time": "0:25:48", "remaining_time": "0:20:12"} +{"current_steps": 1367, "total_steps": 2436, "loss": 1.856811761856079, "lr": 4.813726033258643e-06, "epoch": 1.6834975369458127, "percentage": 56.12, "elapsed_time": "0:25:49", "remaining_time": "0:20:11"} +{"current_steps": 1368, "total_steps": 2436, "loss": 1.5948967933654785, "lr": 4.80656516005636e-06, "epoch": 1.6847290640394088, "percentage": 56.16, "elapsed_time": "0:25:50", "remaining_time": "0:20:10"} +{"current_steps": 1369, "total_steps": 2436, "loss": 1.5035887956619263, "lr": 4.799404684185828e-06, "epoch": 1.685960591133005, "percentage": 56.2, "elapsed_time": "0:25:51", "remaining_time": "0:20:09"} +{"current_steps": 1370, "total_steps": 2436, "loss": 1.4715675115585327, "lr": 4.792244620355275e-06, "epoch": 1.687192118226601, "percentage": 56.24, "elapsed_time": "0:25:52", "remaining_time": "0:20:08"} +{"current_steps": 1371, "total_steps": 2436, "loss": 1.393894076347351, "lr": 4.78508498327209e-06, "epoch": 1.688423645320197, "percentage": 56.28, "elapsed_time": "0:25:53", "remaining_time": "0:20:07"} +{"current_steps": 1372, "total_steps": 2436, "loss": 1.8458061218261719, "lr": 4.777925787642781e-06, "epoch": 1.6896551724137931, "percentage": 56.32, "elapsed_time": "0:25:54", "remaining_time": "0:20:05"} +{"current_steps": 1373, "total_steps": 2436, "loss": 1.0604429244995117, "lr": 4.770767048172948e-06, "epoch": 1.6908866995073892, "percentage": 56.36, "elapsed_time": "0:25:55", "remaining_time": "0:20:04"} +{"current_steps": 1374, "total_steps": 2436, "loss": 1.3261964321136475, "lr": 4.7636087795672565e-06, "epoch": 1.6921182266009853, "percentage": 56.4, "elapsed_time": "0:25:57", "remaining_time": "0:20:03"} +{"current_steps": 1375, "total_steps": 2436, "loss": 1.6243900060653687, "lr": 4.756450996529403e-06, "epoch": 1.6933497536945814, "percentage": 56.44, "elapsed_time": "0:25:58", "remaining_time": "0:20:02"} +{"current_steps": 1376, "total_steps": 2436, "loss": 1.8087639808654785, "lr": 4.749293713762091e-06, "epoch": 1.6945812807881775, "percentage": 56.49, "elapsed_time": "0:25:59", "remaining_time": "0:20:01"} +{"current_steps": 1377, "total_steps": 2436, "loss": 1.9180892705917358, "lr": 4.742136945966991e-06, "epoch": 1.6958128078817734, "percentage": 56.53, "elapsed_time": "0:26:00", "remaining_time": "0:19:59"} +{"current_steps": 1378, "total_steps": 2436, "loss": 1.6797364950180054, "lr": 4.734980707844716e-06, "epoch": 1.6970443349753694, "percentage": 56.57, "elapsed_time": "0:26:01", "remaining_time": "0:19:58"} +{"current_steps": 1379, "total_steps": 2436, "loss": 0.9649052023887634, "lr": 4.727825014094795e-06, "epoch": 1.6982758620689655, "percentage": 56.61, "elapsed_time": "0:26:02", "remaining_time": "0:19:57"} +{"current_steps": 1380, "total_steps": 2436, "loss": 1.4185916185379028, "lr": 4.720669879415637e-06, "epoch": 1.6995073891625616, "percentage": 56.65, "elapsed_time": "0:26:03", "remaining_time": "0:19:56"} +{"current_steps": 1381, "total_steps": 2436, "loss": 1.8681238889694214, "lr": 4.713515318504501e-06, "epoch": 1.7007389162561575, "percentage": 56.69, "elapsed_time": "0:26:04", "remaining_time": "0:19:55"} +{"current_steps": 1382, "total_steps": 2436, "loss": 1.2830915451049805, "lr": 4.706361346057468e-06, "epoch": 1.7019704433497536, "percentage": 56.73, "elapsed_time": "0:26:05", "remaining_time": "0:19:54"} +{"current_steps": 1383, "total_steps": 2436, "loss": 1.0888878107070923, "lr": 4.699207976769416e-06, "epoch": 1.7032019704433496, "percentage": 56.77, "elapsed_time": "0:26:06", "remaining_time": "0:19:52"} +{"current_steps": 1384, "total_steps": 2436, "loss": 1.4439440965652466, "lr": 4.692055225333972e-06, "epoch": 1.7044334975369457, "percentage": 56.81, "elapsed_time": "0:26:07", "remaining_time": "0:19:51"} +{"current_steps": 1385, "total_steps": 2436, "loss": 1.0282858610153198, "lr": 4.684903106443504e-06, "epoch": 1.7056650246305418, "percentage": 56.86, "elapsed_time": "0:26:08", "remaining_time": "0:19:50"} +{"current_steps": 1386, "total_steps": 2436, "loss": 1.6842533349990845, "lr": 4.677751634789078e-06, "epoch": 1.706896551724138, "percentage": 56.9, "elapsed_time": "0:26:10", "remaining_time": "0:19:49"} +{"current_steps": 1387, "total_steps": 2436, "loss": 1.5473763942718506, "lr": 4.670600825060429e-06, "epoch": 1.708128078817734, "percentage": 56.94, "elapsed_time": "0:26:11", "remaining_time": "0:19:48"} +{"current_steps": 1388, "total_steps": 2436, "loss": 1.839112401008606, "lr": 4.663450691945936e-06, "epoch": 1.70935960591133, "percentage": 56.98, "elapsed_time": "0:26:12", "remaining_time": "0:19:47"} +{"current_steps": 1389, "total_steps": 2436, "loss": 1.5349544286727905, "lr": 4.656301250132581e-06, "epoch": 1.7105911330049262, "percentage": 57.02, "elapsed_time": "0:26:13", "remaining_time": "0:19:45"} +{"current_steps": 1390, "total_steps": 2436, "loss": 1.5788905620574951, "lr": 4.649152514305934e-06, "epoch": 1.7118226600985222, "percentage": 57.06, "elapsed_time": "0:26:14", "remaining_time": "0:19:44"} +{"current_steps": 1391, "total_steps": 2436, "loss": 1.4541325569152832, "lr": 4.6420044991501104e-06, "epoch": 1.7130541871921183, "percentage": 57.1, "elapsed_time": "0:26:15", "remaining_time": "0:19:43"} +{"current_steps": 1392, "total_steps": 2436, "loss": 1.8231902122497559, "lr": 4.634857219347746e-06, "epoch": 1.7142857142857144, "percentage": 57.14, "elapsed_time": "0:26:16", "remaining_time": "0:19:42"} +{"current_steps": 1393, "total_steps": 2436, "loss": 1.6302368640899658, "lr": 4.627710689579968e-06, "epoch": 1.7155172413793105, "percentage": 57.18, "elapsed_time": "0:26:17", "remaining_time": "0:19:41"} +{"current_steps": 1394, "total_steps": 2436, "loss": 1.497374415397644, "lr": 4.62056492452636e-06, "epoch": 1.7167487684729064, "percentage": 57.22, "elapsed_time": "0:26:18", "remaining_time": "0:19:40"} +{"current_steps": 1395, "total_steps": 2436, "loss": 1.1390448808670044, "lr": 4.613419938864937e-06, "epoch": 1.7179802955665024, "percentage": 57.27, "elapsed_time": "0:26:19", "remaining_time": "0:19:38"} +{"current_steps": 1396, "total_steps": 2436, "loss": 1.4320652484893799, "lr": 4.606275747272112e-06, "epoch": 1.7192118226600985, "percentage": 57.31, "elapsed_time": "0:26:20", "remaining_time": "0:19:37"} +{"current_steps": 1397, "total_steps": 2436, "loss": 1.2651784420013428, "lr": 4.599132364422666e-06, "epoch": 1.7204433497536946, "percentage": 57.35, "elapsed_time": "0:26:21", "remaining_time": "0:19:36"} +{"current_steps": 1398, "total_steps": 2436, "loss": 1.719766616821289, "lr": 4.5919898049897225e-06, "epoch": 1.7216748768472905, "percentage": 57.39, "elapsed_time": "0:26:22", "remaining_time": "0:19:35"} +{"current_steps": 1399, "total_steps": 2436, "loss": 1.707594394683838, "lr": 4.58484808364471e-06, "epoch": 1.7229064039408866, "percentage": 57.43, "elapsed_time": "0:26:24", "remaining_time": "0:19:34"} +{"current_steps": 1400, "total_steps": 2436, "loss": 1.4608323574066162, "lr": 4.5777072150573355e-06, "epoch": 1.7241379310344827, "percentage": 57.47, "elapsed_time": "0:26:25", "remaining_time": "0:19:32"} +{"current_steps": 1401, "total_steps": 2436, "loss": 1.5542428493499756, "lr": 4.570567213895555e-06, "epoch": 1.7253694581280787, "percentage": 57.51, "elapsed_time": "0:26:26", "remaining_time": "0:19:31"} +{"current_steps": 1402, "total_steps": 2436, "loss": 1.2282288074493408, "lr": 4.563428094825546e-06, "epoch": 1.7266009852216748, "percentage": 57.55, "elapsed_time": "0:26:27", "remaining_time": "0:19:30"} +{"current_steps": 1403, "total_steps": 2436, "loss": 1.1870850324630737, "lr": 4.556289872511669e-06, "epoch": 1.727832512315271, "percentage": 57.59, "elapsed_time": "0:26:28", "remaining_time": "0:19:29"} +{"current_steps": 1404, "total_steps": 2436, "loss": 1.8125461339950562, "lr": 4.549152561616445e-06, "epoch": 1.729064039408867, "percentage": 57.64, "elapsed_time": "0:26:29", "remaining_time": "0:19:28"} +{"current_steps": 1405, "total_steps": 2436, "loss": 1.4419995546340942, "lr": 4.542016176800527e-06, "epoch": 1.730295566502463, "percentage": 57.68, "elapsed_time": "0:26:30", "remaining_time": "0:19:27"} +{"current_steps": 1406, "total_steps": 2436, "loss": 1.8834543228149414, "lr": 4.534880732722653e-06, "epoch": 1.7315270935960592, "percentage": 57.72, "elapsed_time": "0:26:31", "remaining_time": "0:19:25"} +{"current_steps": 1407, "total_steps": 2436, "loss": 1.120203971862793, "lr": 4.527746244039644e-06, "epoch": 1.7327586206896552, "percentage": 57.76, "elapsed_time": "0:26:32", "remaining_time": "0:19:24"} +{"current_steps": 1408, "total_steps": 2436, "loss": 0.9131630659103394, "lr": 4.5206127254063495e-06, "epoch": 1.7339901477832513, "percentage": 57.8, "elapsed_time": "0:26:33", "remaining_time": "0:19:23"} +{"current_steps": 1409, "total_steps": 2436, "loss": 1.86919367313385, "lr": 4.513480191475627e-06, "epoch": 1.7352216748768474, "percentage": 57.84, "elapsed_time": "0:26:34", "remaining_time": "0:19:22"} +{"current_steps": 1410, "total_steps": 2436, "loss": 1.6573272943496704, "lr": 4.506348656898316e-06, "epoch": 1.7364532019704435, "percentage": 57.88, "elapsed_time": "0:26:35", "remaining_time": "0:19:21"} +{"current_steps": 1411, "total_steps": 2436, "loss": 1.2864340543746948, "lr": 4.499218136323197e-06, "epoch": 1.7376847290640394, "percentage": 57.92, "elapsed_time": "0:26:36", "remaining_time": "0:19:20"} +{"current_steps": 1412, "total_steps": 2436, "loss": 1.5519993305206299, "lr": 4.492088644396972e-06, "epoch": 1.7389162561576355, "percentage": 57.96, "elapsed_time": "0:26:38", "remaining_time": "0:19:18"} +{"current_steps": 1413, "total_steps": 2436, "loss": 1.7556722164154053, "lr": 4.4849601957642295e-06, "epoch": 1.7401477832512315, "percentage": 58.0, "elapsed_time": "0:26:39", "remaining_time": "0:19:17"} +{"current_steps": 1414, "total_steps": 2436, "loss": 1.6349589824676514, "lr": 4.477832805067412e-06, "epoch": 1.7413793103448276, "percentage": 58.05, "elapsed_time": "0:26:40", "remaining_time": "0:19:16"} +{"current_steps": 1415, "total_steps": 2436, "loss": 1.3583035469055176, "lr": 4.470706486946797e-06, "epoch": 1.7426108374384235, "percentage": 58.09, "elapsed_time": "0:26:41", "remaining_time": "0:19:15"} +{"current_steps": 1416, "total_steps": 2436, "loss": 1.5367932319641113, "lr": 4.463581256040445e-06, "epoch": 1.7438423645320196, "percentage": 58.13, "elapsed_time": "0:26:42", "remaining_time": "0:19:14"} +{"current_steps": 1417, "total_steps": 2436, "loss": 1.5078128576278687, "lr": 4.456457126984196e-06, "epoch": 1.7450738916256157, "percentage": 58.17, "elapsed_time": "0:26:43", "remaining_time": "0:19:13"} +{"current_steps": 1418, "total_steps": 2436, "loss": 1.8653573989868164, "lr": 4.449334114411622e-06, "epoch": 1.7463054187192117, "percentage": 58.21, "elapsed_time": "0:26:44", "remaining_time": "0:19:11"} +{"current_steps": 1419, "total_steps": 2436, "loss": 1.1381313800811768, "lr": 4.4422122329539996e-06, "epoch": 1.7475369458128078, "percentage": 58.25, "elapsed_time": "0:26:45", "remaining_time": "0:19:10"} +{"current_steps": 1420, "total_steps": 2436, "loss": 1.4135184288024902, "lr": 4.435091497240287e-06, "epoch": 1.748768472906404, "percentage": 58.29, "elapsed_time": "0:26:46", "remaining_time": "0:19:09"} +{"current_steps": 1421, "total_steps": 2436, "loss": 1.2186479568481445, "lr": 4.427971921897086e-06, "epoch": 1.75, "percentage": 58.33, "elapsed_time": "0:26:47", "remaining_time": "0:19:08"} +{"current_steps": 1422, "total_steps": 2436, "loss": 1.3139259815216064, "lr": 4.420853521548611e-06, "epoch": 1.751231527093596, "percentage": 58.37, "elapsed_time": "0:26:48", "remaining_time": "0:19:07"} +{"current_steps": 1423, "total_steps": 2436, "loss": 2.0143887996673584, "lr": 4.413736310816669e-06, "epoch": 1.7524630541871922, "percentage": 58.42, "elapsed_time": "0:26:49", "remaining_time": "0:19:06"} +{"current_steps": 1424, "total_steps": 2436, "loss": 1.5800344944000244, "lr": 4.4066203043206226e-06, "epoch": 1.7536945812807883, "percentage": 58.46, "elapsed_time": "0:26:50", "remaining_time": "0:19:04"} +{"current_steps": 1425, "total_steps": 2436, "loss": 1.449183702468872, "lr": 4.399505516677358e-06, "epoch": 1.7549261083743843, "percentage": 58.5, "elapsed_time": "0:26:52", "remaining_time": "0:19:03"} +{"current_steps": 1426, "total_steps": 2436, "loss": 0.6957097053527832, "lr": 4.3923919625012605e-06, "epoch": 1.7561576354679804, "percentage": 58.54, "elapsed_time": "0:26:53", "remaining_time": "0:19:02"} +{"current_steps": 1427, "total_steps": 2436, "loss": 1.0665647983551025, "lr": 4.385279656404178e-06, "epoch": 1.7573891625615765, "percentage": 58.58, "elapsed_time": "0:26:54", "remaining_time": "0:19:01"} +{"current_steps": 1428, "total_steps": 2436, "loss": 1.2771016359329224, "lr": 4.3781686129953975e-06, "epoch": 1.7586206896551724, "percentage": 58.62, "elapsed_time": "0:26:55", "remaining_time": "0:19:00"} +{"current_steps": 1429, "total_steps": 2436, "loss": 1.4222235679626465, "lr": 4.371058846881614e-06, "epoch": 1.7598522167487685, "percentage": 58.66, "elapsed_time": "0:26:56", "remaining_time": "0:18:59"} +{"current_steps": 1430, "total_steps": 2436, "loss": 2.1237497329711914, "lr": 4.363950372666896e-06, "epoch": 1.7610837438423645, "percentage": 58.7, "elapsed_time": "0:26:57", "remaining_time": "0:18:57"} +{"current_steps": 1431, "total_steps": 2436, "loss": 1.3875718116760254, "lr": 4.356843204952657e-06, "epoch": 1.7623152709359606, "percentage": 58.74, "elapsed_time": "0:26:58", "remaining_time": "0:18:56"} +{"current_steps": 1432, "total_steps": 2436, "loss": 1.2585203647613525, "lr": 4.349737358337635e-06, "epoch": 1.7635467980295565, "percentage": 58.78, "elapsed_time": "0:26:59", "remaining_time": "0:18:55"} +{"current_steps": 1433, "total_steps": 2436, "loss": 1.3183746337890625, "lr": 4.3426328474178405e-06, "epoch": 1.7647783251231526, "percentage": 58.83, "elapsed_time": "0:27:00", "remaining_time": "0:18:54"} +{"current_steps": 1434, "total_steps": 2436, "loss": 1.7174941301345825, "lr": 4.335529686786554e-06, "epoch": 1.7660098522167487, "percentage": 58.87, "elapsed_time": "0:27:01", "remaining_time": "0:18:53"} +{"current_steps": 1435, "total_steps": 2436, "loss": 1.9503614902496338, "lr": 4.328427891034273e-06, "epoch": 1.7672413793103448, "percentage": 58.91, "elapsed_time": "0:27:02", "remaining_time": "0:18:51"} +{"current_steps": 1436, "total_steps": 2436, "loss": 1.3797223567962646, "lr": 4.321327474748697e-06, "epoch": 1.7684729064039408, "percentage": 58.95, "elapsed_time": "0:27:03", "remaining_time": "0:18:50"} +{"current_steps": 1437, "total_steps": 2436, "loss": 1.4113730192184448, "lr": 4.3142284525146915e-06, "epoch": 1.769704433497537, "percentage": 58.99, "elapsed_time": "0:27:04", "remaining_time": "0:18:49"} +{"current_steps": 1438, "total_steps": 2436, "loss": 2.383976697921753, "lr": 4.307130838914252e-06, "epoch": 1.770935960591133, "percentage": 59.03, "elapsed_time": "0:27:06", "remaining_time": "0:18:48"} +{"current_steps": 1439, "total_steps": 2436, "loss": 1.7687448263168335, "lr": 4.300034648526489e-06, "epoch": 1.772167487684729, "percentage": 59.07, "elapsed_time": "0:27:07", "remaining_time": "0:18:47"} +{"current_steps": 1440, "total_steps": 2436, "loss": 1.5130079984664917, "lr": 4.292939895927587e-06, "epoch": 1.7733990147783252, "percentage": 59.11, "elapsed_time": "0:27:08", "remaining_time": "0:18:46"} +{"current_steps": 1441, "total_steps": 2436, "loss": 1.0863475799560547, "lr": 4.2858465956907726e-06, "epoch": 1.7746305418719213, "percentage": 59.15, "elapsed_time": "0:27:09", "remaining_time": "0:18:44"} +{"current_steps": 1442, "total_steps": 2436, "loss": 1.1504137516021729, "lr": 4.278754762386297e-06, "epoch": 1.7758620689655173, "percentage": 59.2, "elapsed_time": "0:27:10", "remaining_time": "0:18:43"} +{"current_steps": 1443, "total_steps": 2436, "loss": 1.1227596998214722, "lr": 4.271664410581392e-06, "epoch": 1.7770935960591134, "percentage": 59.24, "elapsed_time": "0:27:11", "remaining_time": "0:18:42"} +{"current_steps": 1444, "total_steps": 2436, "loss": 1.4501817226409912, "lr": 4.264575554840248e-06, "epoch": 1.7783251231527095, "percentage": 59.28, "elapsed_time": "0:27:12", "remaining_time": "0:18:41"} +{"current_steps": 1445, "total_steps": 2436, "loss": 0.48442721366882324, "lr": 4.257488209723981e-06, "epoch": 1.7795566502463054, "percentage": 59.32, "elapsed_time": "0:27:13", "remaining_time": "0:18:40"} +{"current_steps": 1446, "total_steps": 2436, "loss": 1.218263864517212, "lr": 4.25040238979061e-06, "epoch": 1.7807881773399015, "percentage": 59.36, "elapsed_time": "0:27:14", "remaining_time": "0:18:39"} +{"current_steps": 1447, "total_steps": 2436, "loss": 1.1711516380310059, "lr": 4.243318109595014e-06, "epoch": 1.7820197044334976, "percentage": 59.4, "elapsed_time": "0:27:15", "remaining_time": "0:18:37"} +{"current_steps": 1448, "total_steps": 2436, "loss": 1.3575153350830078, "lr": 4.2362353836889126e-06, "epoch": 1.7832512315270936, "percentage": 59.44, "elapsed_time": "0:27:16", "remaining_time": "0:18:36"} +{"current_steps": 1449, "total_steps": 2436, "loss": 2.6967573165893555, "lr": 4.229154226620832e-06, "epoch": 1.7844827586206895, "percentage": 59.48, "elapsed_time": "0:27:17", "remaining_time": "0:18:35"} +{"current_steps": 1450, "total_steps": 2436, "loss": 2.2812700271606445, "lr": 4.2220746529360745e-06, "epoch": 1.7857142857142856, "percentage": 59.52, "elapsed_time": "0:27:18", "remaining_time": "0:18:34"} +{"current_steps": 1451, "total_steps": 2436, "loss": 1.2746225595474243, "lr": 4.2149966771766945e-06, "epoch": 1.7869458128078817, "percentage": 59.56, "elapsed_time": "0:27:19", "remaining_time": "0:18:33"} +{"current_steps": 1452, "total_steps": 2436, "loss": 1.4866999387741089, "lr": 4.207920313881459e-06, "epoch": 1.7881773399014778, "percentage": 59.61, "elapsed_time": "0:27:21", "remaining_time": "0:18:32"} +{"current_steps": 1453, "total_steps": 2436, "loss": 1.4830021858215332, "lr": 4.200845577585827e-06, "epoch": 1.7894088669950738, "percentage": 59.65, "elapsed_time": "0:27:22", "remaining_time": "0:18:30"} +{"current_steps": 1454, "total_steps": 2436, "loss": 2.5529747009277344, "lr": 4.193772482821914e-06, "epoch": 1.79064039408867, "percentage": 59.69, "elapsed_time": "0:27:23", "remaining_time": "0:18:29"} +{"current_steps": 1455, "total_steps": 2436, "loss": 1.413874626159668, "lr": 4.186701044118459e-06, "epoch": 1.791871921182266, "percentage": 59.73, "elapsed_time": "0:27:24", "remaining_time": "0:18:28"} +{"current_steps": 1456, "total_steps": 2436, "loss": 2.1567163467407227, "lr": 4.179631276000807e-06, "epoch": 1.793103448275862, "percentage": 59.77, "elapsed_time": "0:27:25", "remaining_time": "0:18:27"} +{"current_steps": 1457, "total_steps": 2436, "loss": 1.851858139038086, "lr": 4.1725631929908684e-06, "epoch": 1.7943349753694582, "percentage": 59.81, "elapsed_time": "0:27:26", "remaining_time": "0:18:26"} +{"current_steps": 1458, "total_steps": 2436, "loss": 1.2765101194381714, "lr": 4.165496809607089e-06, "epoch": 1.7955665024630543, "percentage": 59.85, "elapsed_time": "0:27:27", "remaining_time": "0:18:25"} +{"current_steps": 1459, "total_steps": 2436, "loss": 1.9869401454925537, "lr": 4.158432140364431e-06, "epoch": 1.7967980295566504, "percentage": 59.89, "elapsed_time": "0:27:28", "remaining_time": "0:18:23"} +{"current_steps": 1460, "total_steps": 2436, "loss": 1.5319430828094482, "lr": 4.151369199774325e-06, "epoch": 1.7980295566502464, "percentage": 59.93, "elapsed_time": "0:27:29", "remaining_time": "0:18:22"} +{"current_steps": 1461, "total_steps": 2436, "loss": 1.487468957901001, "lr": 4.1443080023446605e-06, "epoch": 1.7992610837438425, "percentage": 59.98, "elapsed_time": "0:27:30", "remaining_time": "0:18:21"} +{"current_steps": 1462, "total_steps": 2436, "loss": 1.6152423620224, "lr": 4.137248562579742e-06, "epoch": 1.8004926108374384, "percentage": 60.02, "elapsed_time": "0:27:31", "remaining_time": "0:18:20"} +{"current_steps": 1463, "total_steps": 2436, "loss": 1.5262070894241333, "lr": 4.130190894980262e-06, "epoch": 1.8017241379310345, "percentage": 60.06, "elapsed_time": "0:27:32", "remaining_time": "0:18:19"} +{"current_steps": 1464, "total_steps": 2436, "loss": 1.6697289943695068, "lr": 4.123135014043279e-06, "epoch": 1.8029556650246306, "percentage": 60.1, "elapsed_time": "0:27:33", "remaining_time": "0:18:18"} +{"current_steps": 1465, "total_steps": 2436, "loss": 1.470789909362793, "lr": 4.116080934262175e-06, "epoch": 1.8041871921182266, "percentage": 60.14, "elapsed_time": "0:27:35", "remaining_time": "0:18:16"} +{"current_steps": 1466, "total_steps": 2436, "loss": 1.62421715259552, "lr": 4.109028670126635e-06, "epoch": 1.8054187192118225, "percentage": 60.18, "elapsed_time": "0:27:36", "remaining_time": "0:18:15"} +{"current_steps": 1467, "total_steps": 2436, "loss": 2.1249561309814453, "lr": 4.101978236122613e-06, "epoch": 1.8066502463054186, "percentage": 60.22, "elapsed_time": "0:27:37", "remaining_time": "0:18:14"} +{"current_steps": 1468, "total_steps": 2436, "loss": 1.3368217945098877, "lr": 4.094929646732309e-06, "epoch": 1.8078817733990147, "percentage": 60.26, "elapsed_time": "0:27:38", "remaining_time": "0:18:13"} +{"current_steps": 1469, "total_steps": 2436, "loss": 0.8684915900230408, "lr": 4.087882916434126e-06, "epoch": 1.8091133004926108, "percentage": 60.3, "elapsed_time": "0:27:39", "remaining_time": "0:18:12"} +{"current_steps": 1470, "total_steps": 2436, "loss": 1.6997764110565186, "lr": 4.080838059702656e-06, "epoch": 1.8103448275862069, "percentage": 60.34, "elapsed_time": "0:27:40", "remaining_time": "0:18:11"} +{"current_steps": 1471, "total_steps": 2436, "loss": 0.8933043479919434, "lr": 4.0737950910086354e-06, "epoch": 1.811576354679803, "percentage": 60.39, "elapsed_time": "0:27:41", "remaining_time": "0:18:09"} +{"current_steps": 1472, "total_steps": 2436, "loss": 1.689558982849121, "lr": 4.0667540248189265e-06, "epoch": 1.812807881773399, "percentage": 60.43, "elapsed_time": "0:27:42", "remaining_time": "0:18:08"} +{"current_steps": 1473, "total_steps": 2436, "loss": 1.797630786895752, "lr": 4.059714875596486e-06, "epoch": 1.814039408866995, "percentage": 60.47, "elapsed_time": "0:27:43", "remaining_time": "0:18:07"} +{"current_steps": 1474, "total_steps": 2436, "loss": 2.023120164871216, "lr": 4.052677657800327e-06, "epoch": 1.8152709359605912, "percentage": 60.51, "elapsed_time": "0:27:44", "remaining_time": "0:18:06"} +{"current_steps": 1475, "total_steps": 2436, "loss": 1.5412349700927734, "lr": 4.045642385885497e-06, "epoch": 1.8165024630541873, "percentage": 60.55, "elapsed_time": "0:27:45", "remaining_time": "0:18:05"} +{"current_steps": 1476, "total_steps": 2436, "loss": 0.786411464214325, "lr": 4.038609074303055e-06, "epoch": 1.8177339901477834, "percentage": 60.59, "elapsed_time": "0:27:46", "remaining_time": "0:18:04"} +{"current_steps": 1477, "total_steps": 2436, "loss": 1.3470659255981445, "lr": 4.0315777375000185e-06, "epoch": 1.8189655172413794, "percentage": 60.63, "elapsed_time": "0:27:48", "remaining_time": "0:18:03"} +{"current_steps": 1478, "total_steps": 2436, "loss": 1.3983774185180664, "lr": 4.02454838991936e-06, "epoch": 1.8201970443349755, "percentage": 60.67, "elapsed_time": "0:27:49", "remaining_time": "0:18:01"} +{"current_steps": 1479, "total_steps": 2436, "loss": 1.9945271015167236, "lr": 4.017521045999961e-06, "epoch": 1.8214285714285714, "percentage": 60.71, "elapsed_time": "0:27:50", "remaining_time": "0:18:00"} +{"current_steps": 1480, "total_steps": 2436, "loss": 1.6103991270065308, "lr": 4.0104957201765874e-06, "epoch": 1.8226600985221675, "percentage": 60.76, "elapsed_time": "0:27:51", "remaining_time": "0:17:59"} +{"current_steps": 1481, "total_steps": 2436, "loss": 1.2794644832611084, "lr": 4.003472426879866e-06, "epoch": 1.8238916256157636, "percentage": 60.8, "elapsed_time": "0:27:52", "remaining_time": "0:17:58"} +{"current_steps": 1482, "total_steps": 2436, "loss": 1.4485671520233154, "lr": 3.996451180536237e-06, "epoch": 1.8251231527093597, "percentage": 60.84, "elapsed_time": "0:27:53", "remaining_time": "0:17:57"} +{"current_steps": 1483, "total_steps": 2436, "loss": 1.1264885663986206, "lr": 3.989431995567947e-06, "epoch": 1.8263546798029555, "percentage": 60.88, "elapsed_time": "0:27:54", "remaining_time": "0:17:56"} +{"current_steps": 1484, "total_steps": 2436, "loss": 1.7849301099777222, "lr": 3.982414886393002e-06, "epoch": 1.8275862068965516, "percentage": 60.92, "elapsed_time": "0:27:55", "remaining_time": "0:17:54"} +{"current_steps": 1485, "total_steps": 2436, "loss": 2.4955849647521973, "lr": 3.975399867425146e-06, "epoch": 1.8288177339901477, "percentage": 60.96, "elapsed_time": "0:27:56", "remaining_time": "0:17:53"} +{"current_steps": 1486, "total_steps": 2436, "loss": 1.3440265655517578, "lr": 3.96838695307383e-06, "epoch": 1.8300492610837438, "percentage": 61.0, "elapsed_time": "0:27:57", "remaining_time": "0:17:52"} +{"current_steps": 1487, "total_steps": 2436, "loss": 1.7565090656280518, "lr": 3.961376157744183e-06, "epoch": 1.8312807881773399, "percentage": 61.04, "elapsed_time": "0:27:58", "remaining_time": "0:17:51"} +{"current_steps": 1488, "total_steps": 2436, "loss": 2.086646318435669, "lr": 3.954367495836978e-06, "epoch": 1.832512315270936, "percentage": 61.08, "elapsed_time": "0:27:59", "remaining_time": "0:17:50"} +{"current_steps": 1489, "total_steps": 2436, "loss": 2.0356874465942383, "lr": 3.947360981748607e-06, "epoch": 1.833743842364532, "percentage": 61.12, "elapsed_time": "0:28:00", "remaining_time": "0:17:49"} +{"current_steps": 1490, "total_steps": 2436, "loss": 1.3129501342773438, "lr": 3.940356629871051e-06, "epoch": 1.8349753694581281, "percentage": 61.17, "elapsed_time": "0:28:02", "remaining_time": "0:17:47"} +{"current_steps": 1491, "total_steps": 2436, "loss": 1.468184471130371, "lr": 3.933354454591851e-06, "epoch": 1.8362068965517242, "percentage": 61.21, "elapsed_time": "0:28:03", "remaining_time": "0:17:46"} +{"current_steps": 1492, "total_steps": 2436, "loss": 1.4110320806503296, "lr": 3.926354470294077e-06, "epoch": 1.8374384236453203, "percentage": 61.25, "elapsed_time": "0:28:04", "remaining_time": "0:17:45"} +{"current_steps": 1493, "total_steps": 2436, "loss": 1.0595703125, "lr": 3.9193566913562915e-06, "epoch": 1.8386699507389164, "percentage": 61.29, "elapsed_time": "0:28:05", "remaining_time": "0:17:44"} +{"current_steps": 1494, "total_steps": 2436, "loss": 1.628462791442871, "lr": 3.912361132152537e-06, "epoch": 1.8399014778325125, "percentage": 61.33, "elapsed_time": "0:28:06", "remaining_time": "0:17:43"} +{"current_steps": 1495, "total_steps": 2436, "loss": 1.3903121948242188, "lr": 3.9053678070522904e-06, "epoch": 1.8411330049261085, "percentage": 61.37, "elapsed_time": "0:28:07", "remaining_time": "0:17:42"} +{"current_steps": 1496, "total_steps": 2436, "loss": 1.6935603618621826, "lr": 3.898376730420442e-06, "epoch": 1.8423645320197044, "percentage": 61.41, "elapsed_time": "0:28:08", "remaining_time": "0:17:40"} +{"current_steps": 1497, "total_steps": 2436, "loss": 1.2785383462905884, "lr": 3.891387916617261e-06, "epoch": 1.8435960591133005, "percentage": 61.45, "elapsed_time": "0:28:09", "remaining_time": "0:17:39"} +{"current_steps": 1498, "total_steps": 2436, "loss": 0.9488393068313599, "lr": 3.884401379998375e-06, "epoch": 1.8448275862068966, "percentage": 61.49, "elapsed_time": "0:28:10", "remaining_time": "0:17:38"} +{"current_steps": 1499, "total_steps": 2436, "loss": 1.7822269201278687, "lr": 3.877417134914724e-06, "epoch": 1.8460591133004927, "percentage": 61.54, "elapsed_time": "0:28:11", "remaining_time": "0:17:37"} +{"current_steps": 1500, "total_steps": 2436, "loss": 2.0112462043762207, "lr": 3.870435195712547e-06, "epoch": 1.8472906403940885, "percentage": 61.58, "elapsed_time": "0:28:12", "remaining_time": "0:17:36"} +{"current_steps": 1501, "total_steps": 2436, "loss": 1.3558632135391235, "lr": 3.863455576733349e-06, "epoch": 1.8485221674876846, "percentage": 61.62, "elapsed_time": "0:28:13", "remaining_time": "0:17:35"} +{"current_steps": 1502, "total_steps": 2436, "loss": 1.34049391746521, "lr": 3.856478292313864e-06, "epoch": 1.8497536945812807, "percentage": 61.66, "elapsed_time": "0:28:14", "remaining_time": "0:17:33"} +{"current_steps": 1503, "total_steps": 2436, "loss": 1.5048649311065674, "lr": 3.849503356786034e-06, "epoch": 1.8509852216748768, "percentage": 61.7, "elapsed_time": "0:28:16", "remaining_time": "0:17:32"} +{"current_steps": 1504, "total_steps": 2436, "loss": 1.595820426940918, "lr": 3.842530784476971e-06, "epoch": 1.8522167487684729, "percentage": 61.74, "elapsed_time": "0:28:17", "remaining_time": "0:17:31"} +{"current_steps": 1505, "total_steps": 2436, "loss": 1.4003782272338867, "lr": 3.83556058970894e-06, "epoch": 1.853448275862069, "percentage": 61.78, "elapsed_time": "0:28:18", "remaining_time": "0:17:30"} +{"current_steps": 1506, "total_steps": 2436, "loss": 1.6082279682159424, "lr": 3.828592786799318e-06, "epoch": 1.854679802955665, "percentage": 61.82, "elapsed_time": "0:28:19", "remaining_time": "0:17:29"} +{"current_steps": 1507, "total_steps": 2436, "loss": 1.7311087846755981, "lr": 3.821627390060568e-06, "epoch": 1.8559113300492611, "percentage": 61.86, "elapsed_time": "0:28:20", "remaining_time": "0:17:28"} +{"current_steps": 1508, "total_steps": 2436, "loss": 1.2369680404663086, "lr": 3.8146644138002154e-06, "epoch": 1.8571428571428572, "percentage": 61.9, "elapsed_time": "0:28:21", "remaining_time": "0:17:27"} +{"current_steps": 1509, "total_steps": 2436, "loss": 0.8267203569412231, "lr": 3.807703872320809e-06, "epoch": 1.8583743842364533, "percentage": 61.95, "elapsed_time": "0:28:22", "remaining_time": "0:17:25"} +{"current_steps": 1510, "total_steps": 2436, "loss": 1.310041904449463, "lr": 3.8007457799198977e-06, "epoch": 1.8596059113300494, "percentage": 61.99, "elapsed_time": "0:28:23", "remaining_time": "0:17:24"} +{"current_steps": 1511, "total_steps": 2436, "loss": 1.483811378479004, "lr": 3.79379015089e-06, "epoch": 1.8608374384236455, "percentage": 62.03, "elapsed_time": "0:28:24", "remaining_time": "0:17:23"} +{"current_steps": 1512, "total_steps": 2436, "loss": 1.7339284420013428, "lr": 3.7868369995185734e-06, "epoch": 1.8620689655172413, "percentage": 62.07, "elapsed_time": "0:28:25", "remaining_time": "0:17:22"} +{"current_steps": 1513, "total_steps": 2436, "loss": 0.8915985822677612, "lr": 3.7798863400879894e-06, "epoch": 1.8633004926108374, "percentage": 62.11, "elapsed_time": "0:28:26", "remaining_time": "0:17:21"} +{"current_steps": 1514, "total_steps": 2436, "loss": 2.3413619995117188, "lr": 3.7729381868754985e-06, "epoch": 1.8645320197044335, "percentage": 62.15, "elapsed_time": "0:28:27", "remaining_time": "0:17:20"} +{"current_steps": 1515, "total_steps": 2436, "loss": 1.422214388847351, "lr": 3.7659925541532006e-06, "epoch": 1.8657635467980296, "percentage": 62.19, "elapsed_time": "0:28:28", "remaining_time": "0:17:18"} +{"current_steps": 1516, "total_steps": 2436, "loss": 1.435701847076416, "lr": 3.759049456188022e-06, "epoch": 1.8669950738916257, "percentage": 62.23, "elapsed_time": "0:28:30", "remaining_time": "0:17:17"} +{"current_steps": 1517, "total_steps": 2436, "loss": 1.0702649354934692, "lr": 3.752108907241682e-06, "epoch": 1.8682266009852215, "percentage": 62.27, "elapsed_time": "0:28:31", "remaining_time": "0:17:16"} +{"current_steps": 1518, "total_steps": 2436, "loss": 1.3625175952911377, "lr": 3.7451709215706643e-06, "epoch": 1.8694581280788176, "percentage": 62.32, "elapsed_time": "0:28:32", "remaining_time": "0:17:15"} +{"current_steps": 1519, "total_steps": 2436, "loss": 0.6707335710525513, "lr": 3.738235513426184e-06, "epoch": 1.8706896551724137, "percentage": 62.36, "elapsed_time": "0:28:33", "remaining_time": "0:17:14"} +{"current_steps": 1520, "total_steps": 2436, "loss": 0.9573410749435425, "lr": 3.7313026970541687e-06, "epoch": 1.8719211822660098, "percentage": 62.4, "elapsed_time": "0:28:34", "remaining_time": "0:17:13"} +{"current_steps": 1521, "total_steps": 2436, "loss": 1.625769853591919, "lr": 3.7243724866952114e-06, "epoch": 1.8731527093596059, "percentage": 62.44, "elapsed_time": "0:28:35", "remaining_time": "0:17:11"} +{"current_steps": 1522, "total_steps": 2436, "loss": 1.2327096462249756, "lr": 3.717444896584562e-06, "epoch": 1.874384236453202, "percentage": 62.48, "elapsed_time": "0:28:36", "remaining_time": "0:17:10"} +{"current_steps": 1523, "total_steps": 2436, "loss": 1.9436770677566528, "lr": 3.710519940952085e-06, "epoch": 1.875615763546798, "percentage": 62.52, "elapsed_time": "0:28:37", "remaining_time": "0:17:09"} +{"current_steps": 1524, "total_steps": 2436, "loss": 1.260964274406433, "lr": 3.703597634022232e-06, "epoch": 1.8768472906403941, "percentage": 62.56, "elapsed_time": "0:28:38", "remaining_time": "0:17:08"} +{"current_steps": 1525, "total_steps": 2436, "loss": 0.9448941946029663, "lr": 3.6966779900140193e-06, "epoch": 1.8780788177339902, "percentage": 62.6, "elapsed_time": "0:28:39", "remaining_time": "0:17:07"} +{"current_steps": 1526, "total_steps": 2436, "loss": 1.0470240116119385, "lr": 3.689761023140981e-06, "epoch": 1.8793103448275863, "percentage": 62.64, "elapsed_time": "0:28:40", "remaining_time": "0:17:06"} +{"current_steps": 1527, "total_steps": 2436, "loss": 1.290519118309021, "lr": 3.6828467476111664e-06, "epoch": 1.8805418719211824, "percentage": 62.68, "elapsed_time": "0:28:41", "remaining_time": "0:17:05"} +{"current_steps": 1528, "total_steps": 2436, "loss": 1.6617997884750366, "lr": 3.675935177627088e-06, "epoch": 1.8817733990147785, "percentage": 62.73, "elapsed_time": "0:28:42", "remaining_time": "0:17:03"} +{"current_steps": 1529, "total_steps": 2436, "loss": 2.624133825302124, "lr": 3.6690263273857035e-06, "epoch": 1.8830049261083743, "percentage": 62.77, "elapsed_time": "0:28:44", "remaining_time": "0:17:02"} +{"current_steps": 1530, "total_steps": 2436, "loss": 1.189339518547058, "lr": 3.662120211078385e-06, "epoch": 1.8842364532019704, "percentage": 62.81, "elapsed_time": "0:28:45", "remaining_time": "0:17:01"} +{"current_steps": 1531, "total_steps": 2436, "loss": 1.2045223712921143, "lr": 3.6552168428908886e-06, "epoch": 1.8854679802955665, "percentage": 62.85, "elapsed_time": "0:28:46", "remaining_time": "0:17:00"} +{"current_steps": 1532, "total_steps": 2436, "loss": 1.4260770082473755, "lr": 3.648316237003321e-06, "epoch": 1.8866995073891626, "percentage": 62.89, "elapsed_time": "0:28:47", "remaining_time": "0:16:59"} +{"current_steps": 1533, "total_steps": 2436, "loss": 1.1973135471343994, "lr": 3.6414184075901206e-06, "epoch": 1.8879310344827587, "percentage": 62.93, "elapsed_time": "0:28:48", "remaining_time": "0:16:58"} +{"current_steps": 1534, "total_steps": 2436, "loss": 1.4474105834960938, "lr": 3.6345233688200195e-06, "epoch": 1.8891625615763545, "percentage": 62.97, "elapsed_time": "0:28:49", "remaining_time": "0:16:56"} +{"current_steps": 1535, "total_steps": 2436, "loss": 1.5732392072677612, "lr": 3.62763113485602e-06, "epoch": 1.8903940886699506, "percentage": 63.01, "elapsed_time": "0:28:50", "remaining_time": "0:16:55"} +{"current_steps": 1536, "total_steps": 2436, "loss": 1.992612361907959, "lr": 3.6207417198553624e-06, "epoch": 1.8916256157635467, "percentage": 63.05, "elapsed_time": "0:28:51", "remaining_time": "0:16:54"} +{"current_steps": 1537, "total_steps": 2436, "loss": 1.8015589714050293, "lr": 3.6138551379694936e-06, "epoch": 1.8928571428571428, "percentage": 63.1, "elapsed_time": "0:28:52", "remaining_time": "0:16:53"} +{"current_steps": 1538, "total_steps": 2436, "loss": 1.1887943744659424, "lr": 3.606971403344044e-06, "epoch": 1.8940886699507389, "percentage": 63.14, "elapsed_time": "0:28:53", "remaining_time": "0:16:52"} +{"current_steps": 1539, "total_steps": 2436, "loss": 1.035568118095398, "lr": 3.6000905301187953e-06, "epoch": 1.895320197044335, "percentage": 63.18, "elapsed_time": "0:28:54", "remaining_time": "0:16:51"} +{"current_steps": 1540, "total_steps": 2436, "loss": 1.8441094160079956, "lr": 3.5932125324276524e-06, "epoch": 1.896551724137931, "percentage": 63.22, "elapsed_time": "0:28:55", "remaining_time": "0:16:49"} +{"current_steps": 1541, "total_steps": 2436, "loss": 2.7305843830108643, "lr": 3.586337424398609e-06, "epoch": 1.8977832512315271, "percentage": 63.26, "elapsed_time": "0:28:56", "remaining_time": "0:16:48"} +{"current_steps": 1542, "total_steps": 2436, "loss": 2.1233139038085938, "lr": 3.579465220153733e-06, "epoch": 1.8990147783251232, "percentage": 63.3, "elapsed_time": "0:28:58", "remaining_time": "0:16:47"} +{"current_steps": 1543, "total_steps": 2436, "loss": 1.232177495956421, "lr": 3.5725959338091133e-06, "epoch": 1.9002463054187193, "percentage": 63.34, "elapsed_time": "0:28:59", "remaining_time": "0:16:46"} +{"current_steps": 1544, "total_steps": 2436, "loss": 1.89857017993927, "lr": 3.565729579474858e-06, "epoch": 1.9014778325123154, "percentage": 63.38, "elapsed_time": "0:29:00", "remaining_time": "0:16:45"} +{"current_steps": 1545, "total_steps": 2436, "loss": 1.1281499862670898, "lr": 3.5588661712550464e-06, "epoch": 1.9027093596059115, "percentage": 63.42, "elapsed_time": "0:29:01", "remaining_time": "0:16:44"} +{"current_steps": 1546, "total_steps": 2436, "loss": 1.2526335716247559, "lr": 3.5520057232477073e-06, "epoch": 1.9039408866995073, "percentage": 63.46, "elapsed_time": "0:29:02", "remaining_time": "0:16:43"} +{"current_steps": 1547, "total_steps": 2436, "loss": 1.8187229633331299, "lr": 3.545148249544793e-06, "epoch": 1.9051724137931034, "percentage": 63.51, "elapsed_time": "0:29:03", "remaining_time": "0:16:41"} +{"current_steps": 1548, "total_steps": 2436, "loss": 2.5140726566314697, "lr": 3.5382937642321356e-06, "epoch": 1.9064039408866995, "percentage": 63.55, "elapsed_time": "0:29:04", "remaining_time": "0:16:40"} +{"current_steps": 1549, "total_steps": 2436, "loss": 1.4403750896453857, "lr": 3.5314422813894413e-06, "epoch": 1.9076354679802956, "percentage": 63.59, "elapsed_time": "0:29:05", "remaining_time": "0:16:39"} +{"current_steps": 1550, "total_steps": 2436, "loss": 2.1372480392456055, "lr": 3.524593815090241e-06, "epoch": 1.9088669950738915, "percentage": 63.63, "elapsed_time": "0:29:06", "remaining_time": "0:16:38"} +{"current_steps": 1551, "total_steps": 2436, "loss": 1.3283928632736206, "lr": 3.517748379401872e-06, "epoch": 1.9100985221674875, "percentage": 63.67, "elapsed_time": "0:29:07", "remaining_time": "0:16:37"} +{"current_steps": 1552, "total_steps": 2436, "loss": 0.915777325630188, "lr": 3.510905988385449e-06, "epoch": 1.9113300492610836, "percentage": 63.71, "elapsed_time": "0:29:08", "remaining_time": "0:16:36"} +{"current_steps": 1553, "total_steps": 2436, "loss": 1.4235864877700806, "lr": 3.5040666560958246e-06, "epoch": 1.9125615763546797, "percentage": 63.75, "elapsed_time": "0:29:09", "remaining_time": "0:16:34"} +{"current_steps": 1554, "total_steps": 2436, "loss": 1.0727063417434692, "lr": 3.497230396581579e-06, "epoch": 1.9137931034482758, "percentage": 63.79, "elapsed_time": "0:29:10", "remaining_time": "0:16:33"} +{"current_steps": 1555, "total_steps": 2436, "loss": 1.2492493391036987, "lr": 3.4903972238849727e-06, "epoch": 1.9150246305418719, "percentage": 63.83, "elapsed_time": "0:29:12", "remaining_time": "0:16:32"} +{"current_steps": 1556, "total_steps": 2436, "loss": 1.855743408203125, "lr": 3.483567152041928e-06, "epoch": 1.916256157635468, "percentage": 63.88, "elapsed_time": "0:29:13", "remaining_time": "0:16:31"} +{"current_steps": 1557, "total_steps": 2436, "loss": 1.2882115840911865, "lr": 3.4767401950820003e-06, "epoch": 1.917487684729064, "percentage": 63.92, "elapsed_time": "0:29:14", "remaining_time": "0:16:30"} +{"current_steps": 1558, "total_steps": 2436, "loss": 1.0586508512496948, "lr": 3.469916367028345e-06, "epoch": 1.9187192118226601, "percentage": 63.96, "elapsed_time": "0:29:15", "remaining_time": "0:16:29"} +{"current_steps": 1559, "total_steps": 2436, "loss": 1.6678158044815063, "lr": 3.4630956818976875e-06, "epoch": 1.9199507389162562, "percentage": 64.0, "elapsed_time": "0:29:16", "remaining_time": "0:16:28"} +{"current_steps": 1560, "total_steps": 2436, "loss": 1.242276906967163, "lr": 3.4562781537003e-06, "epoch": 1.9211822660098523, "percentage": 64.04, "elapsed_time": "0:29:17", "remaining_time": "0:16:26"} +{"current_steps": 1561, "total_steps": 2436, "loss": 1.1909584999084473, "lr": 3.4494637964399723e-06, "epoch": 1.9224137931034484, "percentage": 64.08, "elapsed_time": "0:29:18", "remaining_time": "0:16:25"} +{"current_steps": 1562, "total_steps": 2436, "loss": 1.7636524438858032, "lr": 3.4426526241139778e-06, "epoch": 1.9236453201970445, "percentage": 64.12, "elapsed_time": "0:29:19", "remaining_time": "0:16:24"} +{"current_steps": 1563, "total_steps": 2436, "loss": 1.709825873374939, "lr": 3.4358446507130503e-06, "epoch": 1.9248768472906403, "percentage": 64.16, "elapsed_time": "0:29:20", "remaining_time": "0:16:23"} +{"current_steps": 1564, "total_steps": 2436, "loss": 1.0826925039291382, "lr": 3.4290398902213473e-06, "epoch": 1.9261083743842364, "percentage": 64.2, "elapsed_time": "0:29:21", "remaining_time": "0:16:22"} +{"current_steps": 1565, "total_steps": 2436, "loss": 1.2868252992630005, "lr": 3.4222383566164314e-06, "epoch": 1.9273399014778325, "percentage": 64.24, "elapsed_time": "0:29:22", "remaining_time": "0:16:21"} +{"current_steps": 1566, "total_steps": 2436, "loss": 1.9238274097442627, "lr": 3.4154400638692376e-06, "epoch": 1.9285714285714286, "percentage": 64.29, "elapsed_time": "0:29:23", "remaining_time": "0:16:19"} +{"current_steps": 1567, "total_steps": 2436, "loss": 1.615818977355957, "lr": 3.408645025944042e-06, "epoch": 1.9298029556650245, "percentage": 64.33, "elapsed_time": "0:29:25", "remaining_time": "0:16:18"} +{"current_steps": 1568, "total_steps": 2436, "loss": 1.124712586402893, "lr": 3.4018532567984326e-06, "epoch": 1.9310344827586206, "percentage": 64.37, "elapsed_time": "0:29:26", "remaining_time": "0:16:17"} +{"current_steps": 1569, "total_steps": 2436, "loss": 1.0411077737808228, "lr": 3.3950647703832907e-06, "epoch": 1.9322660098522166, "percentage": 64.41, "elapsed_time": "0:29:27", "remaining_time": "0:16:16"} +{"current_steps": 1570, "total_steps": 2436, "loss": 1.4247188568115234, "lr": 3.3882795806427437e-06, "epoch": 1.9334975369458127, "percentage": 64.45, "elapsed_time": "0:29:28", "remaining_time": "0:16:15"} +{"current_steps": 1571, "total_steps": 2436, "loss": 1.9558757543563843, "lr": 3.3814977015141576e-06, "epoch": 1.9347290640394088, "percentage": 64.49, "elapsed_time": "0:29:29", "remaining_time": "0:16:14"} +{"current_steps": 1572, "total_steps": 2436, "loss": 1.4765770435333252, "lr": 3.3747191469280917e-06, "epoch": 1.935960591133005, "percentage": 64.53, "elapsed_time": "0:29:30", "remaining_time": "0:16:13"} +{"current_steps": 1573, "total_steps": 2436, "loss": 1.2025914192199707, "lr": 3.3679439308082777e-06, "epoch": 1.937192118226601, "percentage": 64.57, "elapsed_time": "0:29:31", "remaining_time": "0:16:11"} +{"current_steps": 1574, "total_steps": 2436, "loss": 1.938293695449829, "lr": 3.361172067071595e-06, "epoch": 1.938423645320197, "percentage": 64.61, "elapsed_time": "0:29:32", "remaining_time": "0:16:10"} +{"current_steps": 1575, "total_steps": 2436, "loss": 1.9626538753509521, "lr": 3.3544035696280264e-06, "epoch": 1.9396551724137931, "percentage": 64.66, "elapsed_time": "0:29:33", "remaining_time": "0:16:09"} +{"current_steps": 1576, "total_steps": 2436, "loss": 2.4771430492401123, "lr": 3.34763845238065e-06, "epoch": 1.9408866995073892, "percentage": 64.7, "elapsed_time": "0:29:34", "remaining_time": "0:16:08"} +{"current_steps": 1577, "total_steps": 2436, "loss": 1.5694981813430786, "lr": 3.340876729225595e-06, "epoch": 1.9421182266009853, "percentage": 64.74, "elapsed_time": "0:29:35", "remaining_time": "0:16:07"} +{"current_steps": 1578, "total_steps": 2436, "loss": 1.3358147144317627, "lr": 3.334118414052021e-06, "epoch": 1.9433497536945814, "percentage": 64.78, "elapsed_time": "0:29:36", "remaining_time": "0:16:06"} +{"current_steps": 1579, "total_steps": 2436, "loss": 1.6929140090942383, "lr": 3.327363520742087e-06, "epoch": 1.9445812807881775, "percentage": 64.82, "elapsed_time": "0:29:37", "remaining_time": "0:16:04"} +{"current_steps": 1580, "total_steps": 2436, "loss": 1.1454588174819946, "lr": 3.320612063170926e-06, "epoch": 1.9458128078817734, "percentage": 64.86, "elapsed_time": "0:29:39", "remaining_time": "0:16:03"} +{"current_steps": 1581, "total_steps": 2436, "loss": 1.3037209510803223, "lr": 3.313864055206607e-06, "epoch": 1.9470443349753694, "percentage": 64.9, "elapsed_time": "0:29:40", "remaining_time": "0:16:02"} +{"current_steps": 1582, "total_steps": 2436, "loss": 1.2016770839691162, "lr": 3.3071195107101163e-06, "epoch": 1.9482758620689655, "percentage": 64.94, "elapsed_time": "0:29:41", "remaining_time": "0:16:01"} +{"current_steps": 1583, "total_steps": 2436, "loss": 1.5525718927383423, "lr": 3.3003784435353304e-06, "epoch": 1.9495073891625616, "percentage": 64.98, "elapsed_time": "0:29:42", "remaining_time": "0:16:00"} +{"current_steps": 1584, "total_steps": 2436, "loss": 1.293796420097351, "lr": 3.293640867528978e-06, "epoch": 1.9507389162561575, "percentage": 65.02, "elapsed_time": "0:29:43", "remaining_time": "0:15:59"} +{"current_steps": 1585, "total_steps": 2436, "loss": 1.544161081314087, "lr": 3.2869067965306178e-06, "epoch": 1.9519704433497536, "percentage": 65.07, "elapsed_time": "0:29:44", "remaining_time": "0:15:58"} +{"current_steps": 1586, "total_steps": 2436, "loss": 1.584174633026123, "lr": 3.2801762443726087e-06, "epoch": 1.9532019704433496, "percentage": 65.11, "elapsed_time": "0:29:45", "remaining_time": "0:15:56"} +{"current_steps": 1587, "total_steps": 2436, "loss": 1.4985432624816895, "lr": 3.273449224880081e-06, "epoch": 1.9544334975369457, "percentage": 65.15, "elapsed_time": "0:29:46", "remaining_time": "0:15:55"} +{"current_steps": 1588, "total_steps": 2436, "loss": 1.4310071468353271, "lr": 3.2667257518709124e-06, "epoch": 1.9556650246305418, "percentage": 65.19, "elapsed_time": "0:29:47", "remaining_time": "0:15:54"} +{"current_steps": 1589, "total_steps": 2436, "loss": 1.2174272537231445, "lr": 3.260005839155691e-06, "epoch": 1.956896551724138, "percentage": 65.23, "elapsed_time": "0:29:48", "remaining_time": "0:15:53"} +{"current_steps": 1590, "total_steps": 2436, "loss": 1.4618067741394043, "lr": 3.2532895005376943e-06, "epoch": 1.958128078817734, "percentage": 65.27, "elapsed_time": "0:29:49", "remaining_time": "0:15:52"} +{"current_steps": 1591, "total_steps": 2436, "loss": 1.2786412239074707, "lr": 3.2465767498128596e-06, "epoch": 1.95935960591133, "percentage": 65.31, "elapsed_time": "0:29:50", "remaining_time": "0:15:51"} +{"current_steps": 1592, "total_steps": 2436, "loss": 1.152226209640503, "lr": 3.2398676007697495e-06, "epoch": 1.9605911330049262, "percentage": 65.35, "elapsed_time": "0:29:52", "remaining_time": "0:15:50"} +{"current_steps": 1593, "total_steps": 2436, "loss": 1.8345131874084473, "lr": 3.233162067189533e-06, "epoch": 1.9618226600985222, "percentage": 65.39, "elapsed_time": "0:29:53", "remaining_time": "0:15:48"} +{"current_steps": 1594, "total_steps": 2436, "loss": 1.310433030128479, "lr": 3.2264601628459513e-06, "epoch": 1.9630541871921183, "percentage": 65.44, "elapsed_time": "0:29:54", "remaining_time": "0:15:47"} +{"current_steps": 1595, "total_steps": 2436, "loss": 2.3967676162719727, "lr": 3.2197619015052893e-06, "epoch": 1.9642857142857144, "percentage": 65.48, "elapsed_time": "0:29:55", "remaining_time": "0:15:46"} +{"current_steps": 1596, "total_steps": 2436, "loss": 1.7937273979187012, "lr": 3.2130672969263543e-06, "epoch": 1.9655172413793105, "percentage": 65.52, "elapsed_time": "0:29:56", "remaining_time": "0:15:45"} +{"current_steps": 1597, "total_steps": 2436, "loss": 2.0265514850616455, "lr": 3.206376362860432e-06, "epoch": 1.9667487684729064, "percentage": 65.56, "elapsed_time": "0:29:57", "remaining_time": "0:15:44"} +{"current_steps": 1598, "total_steps": 2436, "loss": 1.9514051675796509, "lr": 3.1996891130512796e-06, "epoch": 1.9679802955665024, "percentage": 65.6, "elapsed_time": "0:29:58", "remaining_time": "0:15:43"} +{"current_steps": 1599, "total_steps": 2436, "loss": 1.4068338871002197, "lr": 3.1930055612350795e-06, "epoch": 1.9692118226600985, "percentage": 65.64, "elapsed_time": "0:29:59", "remaining_time": "0:15:41"} +{"current_steps": 1600, "total_steps": 2436, "loss": 1.9438577890396118, "lr": 3.18632572114042e-06, "epoch": 1.9704433497536946, "percentage": 65.68, "elapsed_time": "0:30:00", "remaining_time": "0:15:40"} +{"current_steps": 1601, "total_steps": 2436, "loss": 1.432902455329895, "lr": 3.1796496064882677e-06, "epoch": 1.9716748768472905, "percentage": 65.72, "elapsed_time": "0:30:01", "remaining_time": "0:15:39"} +{"current_steps": 1602, "total_steps": 2436, "loss": 1.6505646705627441, "lr": 3.172977230991935e-06, "epoch": 1.9729064039408866, "percentage": 65.76, "elapsed_time": "0:30:02", "remaining_time": "0:15:38"} +{"current_steps": 1603, "total_steps": 2436, "loss": 2.332062005996704, "lr": 3.1663086083570493e-06, "epoch": 1.9741379310344827, "percentage": 65.8, "elapsed_time": "0:30:03", "remaining_time": "0:15:37"} +{"current_steps": 1604, "total_steps": 2436, "loss": 1.737352967262268, "lr": 3.159643752281536e-06, "epoch": 1.9753694581280787, "percentage": 65.85, "elapsed_time": "0:30:04", "remaining_time": "0:15:36"} +{"current_steps": 1605, "total_steps": 2436, "loss": 1.5183820724487305, "lr": 3.152982676455581e-06, "epoch": 1.9766009852216748, "percentage": 65.89, "elapsed_time": "0:30:06", "remaining_time": "0:15:35"} +{"current_steps": 1606, "total_steps": 2436, "loss": 1.5560420751571655, "lr": 3.1463253945616056e-06, "epoch": 1.977832512315271, "percentage": 65.93, "elapsed_time": "0:30:07", "remaining_time": "0:15:33"} +{"current_steps": 1607, "total_steps": 2436, "loss": 2.2159786224365234, "lr": 3.1396719202742375e-06, "epoch": 1.979064039408867, "percentage": 65.97, "elapsed_time": "0:30:08", "remaining_time": "0:15:32"} +{"current_steps": 1608, "total_steps": 2436, "loss": 3.4431471824645996, "lr": 3.133022267260283e-06, "epoch": 1.980295566502463, "percentage": 66.01, "elapsed_time": "0:30:09", "remaining_time": "0:15:31"} +{"current_steps": 1609, "total_steps": 2436, "loss": 1.0674099922180176, "lr": 3.1263764491786984e-06, "epoch": 1.9815270935960592, "percentage": 66.05, "elapsed_time": "0:30:10", "remaining_time": "0:15:30"} +{"current_steps": 1610, "total_steps": 2436, "loss": 1.2427492141723633, "lr": 3.1197344796805675e-06, "epoch": 1.9827586206896552, "percentage": 66.09, "elapsed_time": "0:30:11", "remaining_time": "0:15:29"} +{"current_steps": 1611, "total_steps": 2436, "loss": 1.5895799398422241, "lr": 3.1130963724090626e-06, "epoch": 1.9839901477832513, "percentage": 66.13, "elapsed_time": "0:30:12", "remaining_time": "0:15:28"} +{"current_steps": 1612, "total_steps": 2436, "loss": 1.3781355619430542, "lr": 3.1064621409994245e-06, "epoch": 1.9852216748768474, "percentage": 66.17, "elapsed_time": "0:30:13", "remaining_time": "0:15:27"} +{"current_steps": 1613, "total_steps": 2436, "loss": 1.3307732343673706, "lr": 3.0998317990789378e-06, "epoch": 1.9864532019704435, "percentage": 66.22, "elapsed_time": "0:30:14", "remaining_time": "0:15:25"} +{"current_steps": 1614, "total_steps": 2436, "loss": 1.340241551399231, "lr": 3.0932053602668876e-06, "epoch": 1.9876847290640394, "percentage": 66.26, "elapsed_time": "0:30:15", "remaining_time": "0:15:24"} +{"current_steps": 1615, "total_steps": 2436, "loss": 1.5866634845733643, "lr": 3.0865828381745515e-06, "epoch": 1.9889162561576355, "percentage": 66.3, "elapsed_time": "0:30:16", "remaining_time": "0:15:23"} +{"current_steps": 1616, "total_steps": 2436, "loss": 1.363608717918396, "lr": 3.0799642464051573e-06, "epoch": 1.9901477832512315, "percentage": 66.34, "elapsed_time": "0:30:17", "remaining_time": "0:15:22"} +{"current_steps": 1617, "total_steps": 2436, "loss": 0.8918144106864929, "lr": 3.0733495985538575e-06, "epoch": 1.9913793103448276, "percentage": 66.38, "elapsed_time": "0:30:18", "remaining_time": "0:15:21"} +{"current_steps": 1618, "total_steps": 2436, "loss": 1.4538538455963135, "lr": 3.0667389082077114e-06, "epoch": 1.9926108374384235, "percentage": 66.42, "elapsed_time": "0:30:20", "remaining_time": "0:15:20"} +{"current_steps": 1619, "total_steps": 2436, "loss": 1.6913137435913086, "lr": 3.0601321889456378e-06, "epoch": 1.9938423645320196, "percentage": 66.46, "elapsed_time": "0:30:21", "remaining_time": "0:15:18"} +{"current_steps": 1620, "total_steps": 2436, "loss": 1.4266109466552734, "lr": 3.0535294543384074e-06, "epoch": 1.9950738916256157, "percentage": 66.5, "elapsed_time": "0:30:22", "remaining_time": "0:15:17"} +{"current_steps": 1621, "total_steps": 2436, "loss": 1.2479441165924072, "lr": 3.046930717948604e-06, "epoch": 1.9963054187192117, "percentage": 66.54, "elapsed_time": "0:30:23", "remaining_time": "0:15:16"} +{"current_steps": 1622, "total_steps": 2436, "loss": 2.138500213623047, "lr": 3.0403359933305965e-06, "epoch": 1.9975369458128078, "percentage": 66.58, "elapsed_time": "0:30:24", "remaining_time": "0:15:15"} +{"current_steps": 1623, "total_steps": 2436, "loss": 1.7762420177459717, "lr": 3.033745294030517e-06, "epoch": 1.998768472906404, "percentage": 66.63, "elapsed_time": "0:30:25", "remaining_time": "0:15:14"} +{"current_steps": 1624, "total_steps": 2436, "loss": 0.858219563961029, "lr": 3.0271586335862258e-06, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "0:30:26", "remaining_time": "0:15:13"} +{"current_steps": 1625, "total_steps": 2436, "loss": 0.5493918657302856, "lr": 3.0205760255272874e-06, "epoch": 2.001231527093596, "percentage": 66.71, "elapsed_time": "0:31:50", "remaining_time": "0:15:53"} +{"current_steps": 1626, "total_steps": 2436, "loss": 0.25155016779899597, "lr": 3.013997483374944e-06, "epoch": 2.002463054187192, "percentage": 66.75, "elapsed_time": "0:31:51", "remaining_time": "0:15:52"} +{"current_steps": 1627, "total_steps": 2436, "loss": 0.7727752923965454, "lr": 3.007423020642084e-06, "epoch": 2.0036945812807883, "percentage": 66.79, "elapsed_time": "0:31:52", "remaining_time": "0:15:50"} +{"current_steps": 1628, "total_steps": 2436, "loss": 0.43595510721206665, "lr": 3.0008526508332216e-06, "epoch": 2.0049261083743843, "percentage": 66.83, "elapsed_time": "0:31:53", "remaining_time": "0:15:49"} +{"current_steps": 1629, "total_steps": 2436, "loss": 0.3856297433376312, "lr": 2.9942863874444565e-06, "epoch": 2.0061576354679804, "percentage": 66.87, "elapsed_time": "0:31:54", "remaining_time": "0:15:48"} +{"current_steps": 1630, "total_steps": 2436, "loss": 0.8458558917045593, "lr": 2.987724243963458e-06, "epoch": 2.0073891625615765, "percentage": 66.91, "elapsed_time": "0:31:55", "remaining_time": "0:15:47"} +{"current_steps": 1631, "total_steps": 2436, "loss": 0.46873772144317627, "lr": 2.981166233869429e-06, "epoch": 2.0086206896551726, "percentage": 66.95, "elapsed_time": "0:31:56", "remaining_time": "0:15:46"} +{"current_steps": 1632, "total_steps": 2436, "loss": 0.42779290676116943, "lr": 2.9746123706330886e-06, "epoch": 2.0098522167487687, "percentage": 67.0, "elapsed_time": "0:31:57", "remaining_time": "0:15:44"} +{"current_steps": 1633, "total_steps": 2436, "loss": 0.627717912197113, "lr": 2.9680626677166324e-06, "epoch": 2.0110837438423643, "percentage": 67.04, "elapsed_time": "0:31:59", "remaining_time": "0:15:43"} +{"current_steps": 1634, "total_steps": 2436, "loss": 1.0879265069961548, "lr": 2.9615171385737107e-06, "epoch": 2.0123152709359604, "percentage": 67.08, "elapsed_time": "0:32:00", "remaining_time": "0:15:42"} +{"current_steps": 1635, "total_steps": 2436, "loss": 0.6282559037208557, "lr": 2.9549757966494053e-06, "epoch": 2.0135467980295565, "percentage": 67.12, "elapsed_time": "0:32:01", "remaining_time": "0:15:41"} +{"current_steps": 1636, "total_steps": 2436, "loss": 0.5774171352386475, "lr": 2.9484386553801875e-06, "epoch": 2.0147783251231526, "percentage": 67.16, "elapsed_time": "0:32:02", "remaining_time": "0:15:39"} +{"current_steps": 1637, "total_steps": 2436, "loss": 0.38788995146751404, "lr": 2.9419057281939106e-06, "epoch": 2.0160098522167487, "percentage": 67.2, "elapsed_time": "0:32:03", "remaining_time": "0:15:38"} +{"current_steps": 1638, "total_steps": 2436, "loss": 1.1726861000061035, "lr": 2.935377028509766e-06, "epoch": 2.0172413793103448, "percentage": 67.24, "elapsed_time": "0:32:04", "remaining_time": "0:15:37"} +{"current_steps": 1639, "total_steps": 2436, "loss": 0.7854858636856079, "lr": 2.9288525697382623e-06, "epoch": 2.018472906403941, "percentage": 67.28, "elapsed_time": "0:32:05", "remaining_time": "0:15:36"} +{"current_steps": 1640, "total_steps": 2436, "loss": 0.25507253408432007, "lr": 2.922332365281201e-06, "epoch": 2.019704433497537, "percentage": 67.32, "elapsed_time": "0:32:06", "remaining_time": "0:15:35"} +{"current_steps": 1641, "total_steps": 2436, "loss": 0.5835862755775452, "lr": 2.9158164285316356e-06, "epoch": 2.020935960591133, "percentage": 67.36, "elapsed_time": "0:32:07", "remaining_time": "0:15:33"} +{"current_steps": 1642, "total_steps": 2436, "loss": 0.49123138189315796, "lr": 2.9093047728738604e-06, "epoch": 2.022167487684729, "percentage": 67.41, "elapsed_time": "0:32:08", "remaining_time": "0:15:32"} +{"current_steps": 1643, "total_steps": 2436, "loss": 0.20273317396640778, "lr": 2.9027974116833756e-06, "epoch": 2.023399014778325, "percentage": 67.45, "elapsed_time": "0:32:09", "remaining_time": "0:15:31"} +{"current_steps": 1644, "total_steps": 2436, "loss": 0.46980565786361694, "lr": 2.896294358326862e-06, "epoch": 2.0246305418719213, "percentage": 67.49, "elapsed_time": "0:32:10", "remaining_time": "0:15:30"} +{"current_steps": 1645, "total_steps": 2436, "loss": 0.23243547976016998, "lr": 2.889795626162143e-06, "epoch": 2.0258620689655173, "percentage": 67.53, "elapsed_time": "0:32:12", "remaining_time": "0:15:29"} +{"current_steps": 1646, "total_steps": 2436, "loss": 1.3259830474853516, "lr": 2.883301228538178e-06, "epoch": 2.0270935960591134, "percentage": 67.57, "elapsed_time": "0:32:13", "remaining_time": "0:15:27"} +{"current_steps": 1647, "total_steps": 2436, "loss": 0.3021068274974823, "lr": 2.8768111787950105e-06, "epoch": 2.0283251231527095, "percentage": 67.61, "elapsed_time": "0:32:14", "remaining_time": "0:15:26"} +{"current_steps": 1648, "total_steps": 2436, "loss": 0.3854427933692932, "lr": 2.8703254902637646e-06, "epoch": 2.0295566502463056, "percentage": 67.65, "elapsed_time": "0:32:15", "remaining_time": "0:15:25"} +{"current_steps": 1649, "total_steps": 2436, "loss": 0.3356427848339081, "lr": 2.8638441762665957e-06, "epoch": 2.0307881773399017, "percentage": 67.69, "elapsed_time": "0:32:16", "remaining_time": "0:15:24"} +{"current_steps": 1650, "total_steps": 2436, "loss": 0.4785861372947693, "lr": 2.857367250116682e-06, "epoch": 2.0320197044334973, "percentage": 67.73, "elapsed_time": "0:32:17", "remaining_time": "0:15:22"} +{"current_steps": 1651, "total_steps": 2436, "loss": 0.1944020539522171, "lr": 2.8508947251181885e-06, "epoch": 2.0332512315270934, "percentage": 67.78, "elapsed_time": "0:32:18", "remaining_time": "0:15:21"} +{"current_steps": 1652, "total_steps": 2436, "loss": 0.29677248001098633, "lr": 2.8444266145662284e-06, "epoch": 2.0344827586206895, "percentage": 67.82, "elapsed_time": "0:32:19", "remaining_time": "0:15:20"} +{"current_steps": 1653, "total_steps": 2436, "loss": 1.517862319946289, "lr": 2.8379629317468604e-06, "epoch": 2.0357142857142856, "percentage": 67.86, "elapsed_time": "0:32:20", "remaining_time": "0:15:19"} +{"current_steps": 1654, "total_steps": 2436, "loss": 0.5191118717193604, "lr": 2.8315036899370442e-06, "epoch": 2.0369458128078817, "percentage": 67.9, "elapsed_time": "0:32:21", "remaining_time": "0:15:18"} +{"current_steps": 1655, "total_steps": 2436, "loss": 0.42354950308799744, "lr": 2.825048902404612e-06, "epoch": 2.0381773399014778, "percentage": 67.94, "elapsed_time": "0:32:22", "remaining_time": "0:15:16"} +{"current_steps": 1656, "total_steps": 2436, "loss": 0.6974557638168335, "lr": 2.818598582408255e-06, "epoch": 2.039408866995074, "percentage": 67.98, "elapsed_time": "0:32:24", "remaining_time": "0:15:15"} +{"current_steps": 1657, "total_steps": 2436, "loss": 0.8337801694869995, "lr": 2.8121527431974838e-06, "epoch": 2.04064039408867, "percentage": 68.02, "elapsed_time": "0:32:25", "remaining_time": "0:15:14"} +{"current_steps": 1658, "total_steps": 2436, "loss": 0.48300114274024963, "lr": 2.805711398012604e-06, "epoch": 2.041871921182266, "percentage": 68.06, "elapsed_time": "0:32:26", "remaining_time": "0:15:13"} +{"current_steps": 1659, "total_steps": 2436, "loss": 0.2231900542974472, "lr": 2.799274560084688e-06, "epoch": 2.043103448275862, "percentage": 68.1, "elapsed_time": "0:32:27", "remaining_time": "0:15:12"} +{"current_steps": 1660, "total_steps": 2436, "loss": 0.7431713342666626, "lr": 2.7928422426355554e-06, "epoch": 2.044334975369458, "percentage": 68.14, "elapsed_time": "0:32:28", "remaining_time": "0:15:10"} +{"current_steps": 1661, "total_steps": 2436, "loss": 0.5905585289001465, "lr": 2.7864144588777403e-06, "epoch": 2.0455665024630543, "percentage": 68.19, "elapsed_time": "0:32:29", "remaining_time": "0:15:09"} +{"current_steps": 1662, "total_steps": 2436, "loss": 0.5379045009613037, "lr": 2.779991222014459e-06, "epoch": 2.0467980295566504, "percentage": 68.23, "elapsed_time": "0:32:30", "remaining_time": "0:15:08"} +{"current_steps": 1663, "total_steps": 2436, "loss": 0.4073173403739929, "lr": 2.77357254523959e-06, "epoch": 2.0480295566502464, "percentage": 68.27, "elapsed_time": "0:32:31", "remaining_time": "0:15:07"} +{"current_steps": 1664, "total_steps": 2436, "loss": 0.37792834639549255, "lr": 2.767158441737646e-06, "epoch": 2.0492610837438425, "percentage": 68.31, "elapsed_time": "0:32:32", "remaining_time": "0:15:05"} +{"current_steps": 1665, "total_steps": 2436, "loss": 0.5250200629234314, "lr": 2.7607489246837505e-06, "epoch": 2.0504926108374386, "percentage": 68.35, "elapsed_time": "0:32:33", "remaining_time": "0:15:04"} +{"current_steps": 1666, "total_steps": 2436, "loss": 0.7716425061225891, "lr": 2.754344007243594e-06, "epoch": 2.0517241379310347, "percentage": 68.39, "elapsed_time": "0:32:34", "remaining_time": "0:15:03"} +{"current_steps": 1667, "total_steps": 2436, "loss": 0.6505113244056702, "lr": 2.74794370257343e-06, "epoch": 2.0529556650246303, "percentage": 68.43, "elapsed_time": "0:32:36", "remaining_time": "0:15:02"} +{"current_steps": 1668, "total_steps": 2436, "loss": 1.237591028213501, "lr": 2.741548023820037e-06, "epoch": 2.0541871921182264, "percentage": 68.47, "elapsed_time": "0:32:37", "remaining_time": "0:15:01"} +{"current_steps": 1669, "total_steps": 2436, "loss": 0.33151859045028687, "lr": 2.7351569841206792e-06, "epoch": 2.0554187192118225, "percentage": 68.51, "elapsed_time": "0:32:38", "remaining_time": "0:14:59"} +{"current_steps": 1670, "total_steps": 2436, "loss": 0.42522889375686646, "lr": 2.728770596603105e-06, "epoch": 2.0566502463054186, "percentage": 68.56, "elapsed_time": "0:32:39", "remaining_time": "0:14:58"} +{"current_steps": 1671, "total_steps": 2436, "loss": 0.3359280824661255, "lr": 2.722388874385503e-06, "epoch": 2.0578817733990147, "percentage": 68.6, "elapsed_time": "0:32:40", "remaining_time": "0:14:57"} +{"current_steps": 1672, "total_steps": 2436, "loss": 0.23182198405265808, "lr": 2.716011830576475e-06, "epoch": 2.0591133004926108, "percentage": 68.64, "elapsed_time": "0:32:41", "remaining_time": "0:14:56"} +{"current_steps": 1673, "total_steps": 2436, "loss": 0.30262982845306396, "lr": 2.7096394782750186e-06, "epoch": 2.060344827586207, "percentage": 68.68, "elapsed_time": "0:32:42", "remaining_time": "0:14:55"} +{"current_steps": 1674, "total_steps": 2436, "loss": 0.23311859369277954, "lr": 2.7032718305704887e-06, "epoch": 2.061576354679803, "percentage": 68.72, "elapsed_time": "0:32:43", "remaining_time": "0:14:53"} +{"current_steps": 1675, "total_steps": 2436, "loss": 0.6328019499778748, "lr": 2.696908900542584e-06, "epoch": 2.062807881773399, "percentage": 68.76, "elapsed_time": "0:32:44", "remaining_time": "0:14:52"} +{"current_steps": 1676, "total_steps": 2436, "loss": 0.30473750829696655, "lr": 2.690550701261304e-06, "epoch": 2.064039408866995, "percentage": 68.8, "elapsed_time": "0:32:45", "remaining_time": "0:14:51"} +{"current_steps": 1677, "total_steps": 2436, "loss": 0.2824372947216034, "lr": 2.684197245786938e-06, "epoch": 2.065270935960591, "percentage": 68.84, "elapsed_time": "0:32:46", "remaining_time": "0:14:50"} +{"current_steps": 1678, "total_steps": 2436, "loss": 0.3543265163898468, "lr": 2.677848547170029e-06, "epoch": 2.0665024630541873, "percentage": 68.88, "elapsed_time": "0:32:47", "remaining_time": "0:14:48"} +{"current_steps": 1679, "total_steps": 2436, "loss": 0.6176484823226929, "lr": 2.671504618451348e-06, "epoch": 2.0677339901477834, "percentage": 68.92, "elapsed_time": "0:32:49", "remaining_time": "0:14:47"} +{"current_steps": 1680, "total_steps": 2436, "loss": 0.5290611386299133, "lr": 2.665165472661866e-06, "epoch": 2.0689655172413794, "percentage": 68.97, "elapsed_time": "0:32:50", "remaining_time": "0:14:46"} +{"current_steps": 1681, "total_steps": 2436, "loss": 0.5321454405784607, "lr": 2.658831122822735e-06, "epoch": 2.0701970443349755, "percentage": 69.01, "elapsed_time": "0:32:51", "remaining_time": "0:14:45"} +{"current_steps": 1682, "total_steps": 2436, "loss": 0.27902156114578247, "lr": 2.6525015819452504e-06, "epoch": 2.0714285714285716, "percentage": 69.05, "elapsed_time": "0:32:52", "remaining_time": "0:14:44"} +{"current_steps": 1683, "total_steps": 2436, "loss": 0.46582847833633423, "lr": 2.6461768630308326e-06, "epoch": 2.0726600985221673, "percentage": 69.09, "elapsed_time": "0:32:53", "remaining_time": "0:14:42"} +{"current_steps": 1684, "total_steps": 2436, "loss": 0.651951014995575, "lr": 2.6398569790710007e-06, "epoch": 2.0738916256157633, "percentage": 69.13, "elapsed_time": "0:32:54", "remaining_time": "0:14:41"} +{"current_steps": 1685, "total_steps": 2436, "loss": 0.36612239480018616, "lr": 2.633541943047334e-06, "epoch": 2.0751231527093594, "percentage": 69.17, "elapsed_time": "0:32:55", "remaining_time": "0:14:40"} +{"current_steps": 1686, "total_steps": 2436, "loss": 0.22278031706809998, "lr": 2.6272317679314573e-06, "epoch": 2.0763546798029555, "percentage": 69.21, "elapsed_time": "0:32:56", "remaining_time": "0:14:39"} +{"current_steps": 1687, "total_steps": 2436, "loss": 0.33012956380844116, "lr": 2.620926466685013e-06, "epoch": 2.0775862068965516, "percentage": 69.25, "elapsed_time": "0:32:57", "remaining_time": "0:14:38"} +{"current_steps": 1688, "total_steps": 2436, "loss": 0.7396690845489502, "lr": 2.6146260522596334e-06, "epoch": 2.0788177339901477, "percentage": 69.29, "elapsed_time": "0:32:58", "remaining_time": "0:14:36"} +{"current_steps": 1689, "total_steps": 2436, "loss": 0.8257578611373901, "lr": 2.608330537596907e-06, "epoch": 2.0800492610837438, "percentage": 69.33, "elapsed_time": "0:33:00", "remaining_time": "0:14:35"} +{"current_steps": 1690, "total_steps": 2436, "loss": 0.4538348317146301, "lr": 2.6020399356283586e-06, "epoch": 2.08128078817734, "percentage": 69.38, "elapsed_time": "0:33:01", "remaining_time": "0:14:34"} +{"current_steps": 1691, "total_steps": 2436, "loss": 0.992777943611145, "lr": 2.595754259275428e-06, "epoch": 2.082512315270936, "percentage": 69.42, "elapsed_time": "0:33:02", "remaining_time": "0:14:33"} +{"current_steps": 1692, "total_steps": 2436, "loss": 0.346379816532135, "lr": 2.589473521449434e-06, "epoch": 2.083743842364532, "percentage": 69.46, "elapsed_time": "0:33:03", "remaining_time": "0:14:32"} +{"current_steps": 1693, "total_steps": 2436, "loss": 0.4523533284664154, "lr": 2.583197735051546e-06, "epoch": 2.084975369458128, "percentage": 69.5, "elapsed_time": "0:33:04", "remaining_time": "0:14:30"} +{"current_steps": 1694, "total_steps": 2436, "loss": 0.11842907965183258, "lr": 2.576926912972771e-06, "epoch": 2.086206896551724, "percentage": 69.54, "elapsed_time": "0:33:05", "remaining_time": "0:14:29"} +{"current_steps": 1695, "total_steps": 2436, "loss": 0.381897896528244, "lr": 2.5706610680939186e-06, "epoch": 2.0874384236453203, "percentage": 69.58, "elapsed_time": "0:33:06", "remaining_time": "0:14:28"} +{"current_steps": 1696, "total_steps": 2436, "loss": 0.3824227452278137, "lr": 2.564400213285564e-06, "epoch": 2.0886699507389164, "percentage": 69.62, "elapsed_time": "0:33:07", "remaining_time": "0:14:27"} +{"current_steps": 1697, "total_steps": 2436, "loss": 0.4153192639350891, "lr": 2.5581443614080433e-06, "epoch": 2.0899014778325125, "percentage": 69.66, "elapsed_time": "0:33:08", "remaining_time": "0:14:26"} +{"current_steps": 1698, "total_steps": 2436, "loss": 0.3284783959388733, "lr": 2.5518935253114153e-06, "epoch": 2.0911330049261085, "percentage": 69.7, "elapsed_time": "0:33:09", "remaining_time": "0:14:24"} +{"current_steps": 1699, "total_steps": 2436, "loss": 0.7730638980865479, "lr": 2.545647717835428e-06, "epoch": 2.0923645320197046, "percentage": 69.75, "elapsed_time": "0:33:10", "remaining_time": "0:14:23"} +{"current_steps": 1700, "total_steps": 2436, "loss": 0.31647253036499023, "lr": 2.539406951809512e-06, "epoch": 2.0935960591133007, "percentage": 69.79, "elapsed_time": "0:33:12", "remaining_time": "0:14:22"} +{"current_steps": 1701, "total_steps": 2436, "loss": 0.5977708101272583, "lr": 2.53317124005273e-06, "epoch": 2.0948275862068964, "percentage": 69.83, "elapsed_time": "0:33:13", "remaining_time": "0:14:21"} +{"current_steps": 1702, "total_steps": 2436, "loss": 0.2646758556365967, "lr": 2.5269405953737735e-06, "epoch": 2.0960591133004924, "percentage": 69.87, "elapsed_time": "0:33:14", "remaining_time": "0:14:20"} +{"current_steps": 1703, "total_steps": 2436, "loss": 0.5242122411727905, "lr": 2.5207150305709167e-06, "epoch": 2.0972906403940885, "percentage": 69.91, "elapsed_time": "0:33:15", "remaining_time": "0:14:18"} +{"current_steps": 1704, "total_steps": 2436, "loss": 0.43271976709365845, "lr": 2.5144945584320056e-06, "epoch": 2.0985221674876846, "percentage": 69.95, "elapsed_time": "0:33:16", "remaining_time": "0:14:17"} +{"current_steps": 1705, "total_steps": 2436, "loss": 0.902009904384613, "lr": 2.5082791917344256e-06, "epoch": 2.0997536945812807, "percentage": 69.99, "elapsed_time": "0:33:17", "remaining_time": "0:14:16"} +{"current_steps": 1706, "total_steps": 2436, "loss": 0.5218071937561035, "lr": 2.5020689432450706e-06, "epoch": 2.100985221674877, "percentage": 70.03, "elapsed_time": "0:33:18", "remaining_time": "0:14:15"} +{"current_steps": 1707, "total_steps": 2436, "loss": 0.7475143671035767, "lr": 2.495863825720322e-06, "epoch": 2.102216748768473, "percentage": 70.07, "elapsed_time": "0:33:19", "remaining_time": "0:14:13"} +{"current_steps": 1708, "total_steps": 2436, "loss": 0.31655290722846985, "lr": 2.4896638519060257e-06, "epoch": 2.103448275862069, "percentage": 70.11, "elapsed_time": "0:33:20", "remaining_time": "0:14:12"} +{"current_steps": 1709, "total_steps": 2436, "loss": 0.30808842182159424, "lr": 2.4834690345374608e-06, "epoch": 2.104679802955665, "percentage": 70.16, "elapsed_time": "0:33:21", "remaining_time": "0:14:11"} +{"current_steps": 1710, "total_steps": 2436, "loss": 0.7037611603736877, "lr": 2.477279386339309e-06, "epoch": 2.105911330049261, "percentage": 70.2, "elapsed_time": "0:33:22", "remaining_time": "0:14:10"} +{"current_steps": 1711, "total_steps": 2436, "loss": 0.4699273407459259, "lr": 2.471094920025644e-06, "epoch": 2.107142857142857, "percentage": 70.24, "elapsed_time": "0:33:23", "remaining_time": "0:14:09"} +{"current_steps": 1712, "total_steps": 2436, "loss": 0.5032830238342285, "lr": 2.4649156482998873e-06, "epoch": 2.1083743842364533, "percentage": 70.28, "elapsed_time": "0:33:24", "remaining_time": "0:14:07"} +{"current_steps": 1713, "total_steps": 2436, "loss": 1.2563080787658691, "lr": 2.45874158385479e-06, "epoch": 2.1096059113300494, "percentage": 70.32, "elapsed_time": "0:33:26", "remaining_time": "0:14:06"} +{"current_steps": 1714, "total_steps": 2436, "loss": 0.29728978872299194, "lr": 2.4525727393724136e-06, "epoch": 2.1108374384236455, "percentage": 70.36, "elapsed_time": "0:33:27", "remaining_time": "0:14:05"} +{"current_steps": 1715, "total_steps": 2436, "loss": 0.2391032576560974, "lr": 2.446409127524094e-06, "epoch": 2.1120689655172415, "percentage": 70.4, "elapsed_time": "0:33:28", "remaining_time": "0:14:04"} +{"current_steps": 1716, "total_steps": 2436, "loss": 0.4612117409706116, "lr": 2.4402507609704163e-06, "epoch": 2.1133004926108376, "percentage": 70.44, "elapsed_time": "0:33:29", "remaining_time": "0:14:03"} +{"current_steps": 1717, "total_steps": 2436, "loss": 0.36539849638938904, "lr": 2.4340976523611957e-06, "epoch": 2.1145320197044333, "percentage": 70.48, "elapsed_time": "0:33:30", "remaining_time": "0:14:01"} +{"current_steps": 1718, "total_steps": 2436, "loss": 0.2918080687522888, "lr": 2.427949814335443e-06, "epoch": 2.1157635467980294, "percentage": 70.53, "elapsed_time": "0:33:31", "remaining_time": "0:14:00"} +{"current_steps": 1719, "total_steps": 2436, "loss": 0.4508627653121948, "lr": 2.4218072595213467e-06, "epoch": 2.1169950738916254, "percentage": 70.57, "elapsed_time": "0:33:32", "remaining_time": "0:13:59"} +{"current_steps": 1720, "total_steps": 2436, "loss": 0.43477705121040344, "lr": 2.4156700005362384e-06, "epoch": 2.1182266009852215, "percentage": 70.61, "elapsed_time": "0:33:33", "remaining_time": "0:13:58"} +{"current_steps": 1721, "total_steps": 2436, "loss": 0.36739200353622437, "lr": 2.409538049986576e-06, "epoch": 2.1194581280788176, "percentage": 70.65, "elapsed_time": "0:33:34", "remaining_time": "0:13:57"} +{"current_steps": 1722, "total_steps": 2436, "loss": 0.722801923751831, "lr": 2.403411420467916e-06, "epoch": 2.1206896551724137, "percentage": 70.69, "elapsed_time": "0:33:35", "remaining_time": "0:13:55"} +{"current_steps": 1723, "total_steps": 2436, "loss": 0.3729158043861389, "lr": 2.3972901245648724e-06, "epoch": 2.12192118226601, "percentage": 70.73, "elapsed_time": "0:33:36", "remaining_time": "0:13:54"} +{"current_steps": 1724, "total_steps": 2436, "loss": 0.741644024848938, "lr": 2.3911741748511163e-06, "epoch": 2.123152709359606, "percentage": 70.77, "elapsed_time": "0:33:37", "remaining_time": "0:13:53"} +{"current_steps": 1725, "total_steps": 2436, "loss": 0.21925917267799377, "lr": 2.385063583889335e-06, "epoch": 2.124384236453202, "percentage": 70.81, "elapsed_time": "0:33:39", "remaining_time": "0:13:52"} +{"current_steps": 1726, "total_steps": 2436, "loss": 0.3161308765411377, "lr": 2.378958364231202e-06, "epoch": 2.125615763546798, "percentage": 70.85, "elapsed_time": "0:33:40", "remaining_time": "0:13:50"} +{"current_steps": 1727, "total_steps": 2436, "loss": 0.2520957887172699, "lr": 2.3728585284173646e-06, "epoch": 2.126847290640394, "percentage": 70.89, "elapsed_time": "0:33:41", "remaining_time": "0:13:49"} +{"current_steps": 1728, "total_steps": 2436, "loss": 0.5538915991783142, "lr": 2.3667640889774096e-06, "epoch": 2.12807881773399, "percentage": 70.94, "elapsed_time": "0:33:42", "remaining_time": "0:13:48"} +{"current_steps": 1729, "total_steps": 2436, "loss": 0.5438660979270935, "lr": 2.3606750584298375e-06, "epoch": 2.1293103448275863, "percentage": 70.98, "elapsed_time": "0:33:43", "remaining_time": "0:13:47"} +{"current_steps": 1730, "total_steps": 2436, "loss": 0.39724698662757874, "lr": 2.3545914492820366e-06, "epoch": 2.1305418719211824, "percentage": 71.02, "elapsed_time": "0:33:44", "remaining_time": "0:13:46"} +{"current_steps": 1731, "total_steps": 2436, "loss": 0.3480866551399231, "lr": 2.348513274030264e-06, "epoch": 2.1317733990147785, "percentage": 71.06, "elapsed_time": "0:33:45", "remaining_time": "0:13:44"} +{"current_steps": 1732, "total_steps": 2436, "loss": 0.9076392650604248, "lr": 2.3424405451596143e-06, "epoch": 2.1330049261083746, "percentage": 71.1, "elapsed_time": "0:33:46", "remaining_time": "0:13:43"} +{"current_steps": 1733, "total_steps": 2436, "loss": 0.19863876700401306, "lr": 2.3363732751439926e-06, "epoch": 2.1342364532019706, "percentage": 71.14, "elapsed_time": "0:33:47", "remaining_time": "0:13:42"} +{"current_steps": 1734, "total_steps": 2436, "loss": 0.5347404479980469, "lr": 2.3303114764460887e-06, "epoch": 2.1354679802955667, "percentage": 71.18, "elapsed_time": "0:33:48", "remaining_time": "0:13:41"} +{"current_steps": 1735, "total_steps": 2436, "loss": 0.4876821041107178, "lr": 2.32425516151736e-06, "epoch": 2.1366995073891624, "percentage": 71.22, "elapsed_time": "0:33:49", "remaining_time": "0:13:40"} +{"current_steps": 1736, "total_steps": 2436, "loss": 0.24914954602718353, "lr": 2.3182043427979973e-06, "epoch": 2.1379310344827585, "percentage": 71.26, "elapsed_time": "0:33:50", "remaining_time": "0:13:38"} +{"current_steps": 1737, "total_steps": 2436, "loss": 0.5773565769195557, "lr": 2.3121590327168987e-06, "epoch": 2.1391625615763545, "percentage": 71.31, "elapsed_time": "0:33:51", "remaining_time": "0:13:37"} +{"current_steps": 1738, "total_steps": 2436, "loss": 0.7779598832130432, "lr": 2.30611924369165e-06, "epoch": 2.1403940886699506, "percentage": 71.35, "elapsed_time": "0:33:53", "remaining_time": "0:13:36"} +{"current_steps": 1739, "total_steps": 2436, "loss": 0.27866464853286743, "lr": 2.3000849881285016e-06, "epoch": 2.1416256157635467, "percentage": 71.39, "elapsed_time": "0:33:54", "remaining_time": "0:13:35"} +{"current_steps": 1740, "total_steps": 2436, "loss": 0.5243108868598938, "lr": 2.2940562784223224e-06, "epoch": 2.142857142857143, "percentage": 71.43, "elapsed_time": "0:33:55", "remaining_time": "0:13:34"} +{"current_steps": 1741, "total_steps": 2436, "loss": 0.6560786366462708, "lr": 2.2880331269566043e-06, "epoch": 2.144088669950739, "percentage": 71.47, "elapsed_time": "0:33:56", "remaining_time": "0:13:32"} +{"current_steps": 1742, "total_steps": 2436, "loss": 0.6339880228042603, "lr": 2.282015546103418e-06, "epoch": 2.145320197044335, "percentage": 71.51, "elapsed_time": "0:33:57", "remaining_time": "0:13:31"} +{"current_steps": 1743, "total_steps": 2436, "loss": 0.2517808973789215, "lr": 2.2760035482233868e-06, "epoch": 2.146551724137931, "percentage": 71.55, "elapsed_time": "0:33:58", "remaining_time": "0:13:30"} +{"current_steps": 1744, "total_steps": 2436, "loss": 0.40347909927368164, "lr": 2.269997145665674e-06, "epoch": 2.147783251231527, "percentage": 71.59, "elapsed_time": "0:33:59", "remaining_time": "0:13:29"} +{"current_steps": 1745, "total_steps": 2436, "loss": 0.4681488573551178, "lr": 2.263996350767942e-06, "epoch": 2.149014778325123, "percentage": 71.63, "elapsed_time": "0:34:00", "remaining_time": "0:13:28"} +{"current_steps": 1746, "total_steps": 2436, "loss": 0.6371068954467773, "lr": 2.2580011758563418e-06, "epoch": 2.1502463054187193, "percentage": 71.67, "elapsed_time": "0:34:01", "remaining_time": "0:13:26"} +{"current_steps": 1747, "total_steps": 2436, "loss": 0.4741581678390503, "lr": 2.2520116332454726e-06, "epoch": 2.1514778325123154, "percentage": 71.72, "elapsed_time": "0:34:02", "remaining_time": "0:13:25"} +{"current_steps": 1748, "total_steps": 2436, "loss": 0.3354438543319702, "lr": 2.2460277352383713e-06, "epoch": 2.1527093596059115, "percentage": 71.76, "elapsed_time": "0:34:03", "remaining_time": "0:13:24"} +{"current_steps": 1749, "total_steps": 2436, "loss": 0.593233585357666, "lr": 2.240049494126479e-06, "epoch": 2.1539408866995076, "percentage": 71.8, "elapsed_time": "0:34:04", "remaining_time": "0:13:23"} +{"current_steps": 1750, "total_steps": 2436, "loss": 0.32123100757598877, "lr": 2.234076922189613e-06, "epoch": 2.1551724137931036, "percentage": 71.84, "elapsed_time": "0:34:05", "remaining_time": "0:13:21"} +{"current_steps": 1751, "total_steps": 2436, "loss": 1.0594584941864014, "lr": 2.2281100316959476e-06, "epoch": 2.1564039408866993, "percentage": 71.88, "elapsed_time": "0:34:06", "remaining_time": "0:13:20"} +{"current_steps": 1752, "total_steps": 2436, "loss": 0.8586208820343018, "lr": 2.2221488349019903e-06, "epoch": 2.1576354679802954, "percentage": 71.92, "elapsed_time": "0:34:08", "remaining_time": "0:13:19"} +{"current_steps": 1753, "total_steps": 2436, "loss": 0.38074642419815063, "lr": 2.2161933440525474e-06, "epoch": 2.1588669950738915, "percentage": 71.96, "elapsed_time": "0:34:09", "remaining_time": "0:13:18"} +{"current_steps": 1754, "total_steps": 2436, "loss": 0.28768736124038696, "lr": 2.21024357138071e-06, "epoch": 2.1600985221674875, "percentage": 72.0, "elapsed_time": "0:34:10", "remaining_time": "0:13:17"} +{"current_steps": 1755, "total_steps": 2436, "loss": 1.1843211650848389, "lr": 2.2042995291078227e-06, "epoch": 2.1613300492610836, "percentage": 72.04, "elapsed_time": "0:34:11", "remaining_time": "0:13:15"} +{"current_steps": 1756, "total_steps": 2436, "loss": 0.7616925835609436, "lr": 2.1983612294434563e-06, "epoch": 2.1625615763546797, "percentage": 72.09, "elapsed_time": "0:34:12", "remaining_time": "0:13:14"} +{"current_steps": 1757, "total_steps": 2436, "loss": 0.4518227279186249, "lr": 2.192428684585386e-06, "epoch": 2.163793103448276, "percentage": 72.13, "elapsed_time": "0:34:13", "remaining_time": "0:13:13"} +{"current_steps": 1758, "total_steps": 2436, "loss": 0.9173997640609741, "lr": 2.1865019067195685e-06, "epoch": 2.165024630541872, "percentage": 72.17, "elapsed_time": "0:34:14", "remaining_time": "0:13:12"} +{"current_steps": 1759, "total_steps": 2436, "loss": 0.4044645428657532, "lr": 2.180580908020117e-06, "epoch": 2.166256157635468, "percentage": 72.21, "elapsed_time": "0:34:15", "remaining_time": "0:13:11"} +{"current_steps": 1760, "total_steps": 2436, "loss": 0.7771418690681458, "lr": 2.174665700649267e-06, "epoch": 2.167487684729064, "percentage": 72.25, "elapsed_time": "0:34:16", "remaining_time": "0:13:09"} +{"current_steps": 1761, "total_steps": 2436, "loss": 0.39461982250213623, "lr": 2.1687562967573645e-06, "epoch": 2.16871921182266, "percentage": 72.29, "elapsed_time": "0:34:17", "remaining_time": "0:13:08"} +{"current_steps": 1762, "total_steps": 2436, "loss": 0.2924491763114929, "lr": 2.1628527084828283e-06, "epoch": 2.1699507389162562, "percentage": 72.33, "elapsed_time": "0:34:18", "remaining_time": "0:13:07"} +{"current_steps": 1763, "total_steps": 2436, "loss": 0.2507514953613281, "lr": 2.156954947952139e-06, "epoch": 2.1711822660098523, "percentage": 72.37, "elapsed_time": "0:34:19", "remaining_time": "0:13:06"} +{"current_steps": 1764, "total_steps": 2436, "loss": 0.44257861375808716, "lr": 2.151063027279798e-06, "epoch": 2.1724137931034484, "percentage": 72.41, "elapsed_time": "0:34:21", "remaining_time": "0:13:05"} +{"current_steps": 1765, "total_steps": 2436, "loss": 0.2863251268863678, "lr": 2.1451769585683196e-06, "epoch": 2.1736453201970445, "percentage": 72.45, "elapsed_time": "0:34:22", "remaining_time": "0:13:03"} +{"current_steps": 1766, "total_steps": 2436, "loss": 0.6882431507110596, "lr": 2.139296753908195e-06, "epoch": 2.1748768472906406, "percentage": 72.5, "elapsed_time": "0:34:23", "remaining_time": "0:13:02"} +{"current_steps": 1767, "total_steps": 2436, "loss": 0.8318816423416138, "lr": 2.1334224253778628e-06, "epoch": 2.1761083743842367, "percentage": 72.54, "elapsed_time": "0:34:24", "remaining_time": "0:13:01"} +{"current_steps": 1768, "total_steps": 2436, "loss": 0.3899531364440918, "lr": 2.1275539850437006e-06, "epoch": 2.1773399014778327, "percentage": 72.58, "elapsed_time": "0:34:25", "remaining_time": "0:13:00"} +{"current_steps": 1769, "total_steps": 2436, "loss": 0.6424532532691956, "lr": 2.1216914449599905e-06, "epoch": 2.1785714285714284, "percentage": 72.62, "elapsed_time": "0:34:26", "remaining_time": "0:12:59"} +{"current_steps": 1770, "total_steps": 2436, "loss": 0.6676028370857239, "lr": 2.1158348171688888e-06, "epoch": 2.1798029556650245, "percentage": 72.66, "elapsed_time": "0:34:27", "remaining_time": "0:12:57"} +{"current_steps": 1771, "total_steps": 2436, "loss": 0.4219639301300049, "lr": 2.109984113700413e-06, "epoch": 2.1810344827586206, "percentage": 72.7, "elapsed_time": "0:34:28", "remaining_time": "0:12:56"} +{"current_steps": 1772, "total_steps": 2436, "loss": 0.32283568382263184, "lr": 2.1041393465724114e-06, "epoch": 2.1822660098522166, "percentage": 72.74, "elapsed_time": "0:34:29", "remaining_time": "0:12:55"} +{"current_steps": 1773, "total_steps": 2436, "loss": 0.26172614097595215, "lr": 2.0983005277905348e-06, "epoch": 2.1834975369458127, "percentage": 72.78, "elapsed_time": "0:34:30", "remaining_time": "0:12:54"} +{"current_steps": 1774, "total_steps": 2436, "loss": 0.585732638835907, "lr": 2.092467669348217e-06, "epoch": 2.184729064039409, "percentage": 72.82, "elapsed_time": "0:34:31", "remaining_time": "0:12:53"} +{"current_steps": 1775, "total_steps": 2436, "loss": 0.42734187841415405, "lr": 2.0866407832266506e-06, "epoch": 2.185960591133005, "percentage": 72.87, "elapsed_time": "0:34:32", "remaining_time": "0:12:51"} +{"current_steps": 1776, "total_steps": 2436, "loss": 0.24151989817619324, "lr": 2.0808198813947606e-06, "epoch": 2.187192118226601, "percentage": 72.91, "elapsed_time": "0:34:34", "remaining_time": "0:12:50"} +{"current_steps": 1777, "total_steps": 2436, "loss": 0.12940426170825958, "lr": 2.0750049758091778e-06, "epoch": 2.188423645320197, "percentage": 72.95, "elapsed_time": "0:34:35", "remaining_time": "0:12:49"} +{"current_steps": 1778, "total_steps": 2436, "loss": 0.7501548528671265, "lr": 2.0691960784142143e-06, "epoch": 2.189655172413793, "percentage": 72.99, "elapsed_time": "0:34:36", "remaining_time": "0:12:48"} +{"current_steps": 1779, "total_steps": 2436, "loss": 0.43730083107948303, "lr": 2.063393201141846e-06, "epoch": 2.1908866995073892, "percentage": 73.03, "elapsed_time": "0:34:37", "remaining_time": "0:12:47"} +{"current_steps": 1780, "total_steps": 2436, "loss": 0.3335978388786316, "lr": 2.0575963559116823e-06, "epoch": 2.1921182266009853, "percentage": 73.07, "elapsed_time": "0:34:38", "remaining_time": "0:12:45"} +{"current_steps": 1781, "total_steps": 2436, "loss": 0.3262137174606323, "lr": 2.0518055546309362e-06, "epoch": 2.1933497536945814, "percentage": 73.11, "elapsed_time": "0:34:39", "remaining_time": "0:12:44"} +{"current_steps": 1782, "total_steps": 2436, "loss": 0.3336663544178009, "lr": 2.0460208091944122e-06, "epoch": 2.1945812807881775, "percentage": 73.15, "elapsed_time": "0:34:40", "remaining_time": "0:12:43"} +{"current_steps": 1783, "total_steps": 2436, "loss": 0.6050255298614502, "lr": 2.0402421314844774e-06, "epoch": 2.1958128078817736, "percentage": 73.19, "elapsed_time": "0:34:41", "remaining_time": "0:12:42"} +{"current_steps": 1784, "total_steps": 2436, "loss": 0.33584898710250854, "lr": 2.0344695333710234e-06, "epoch": 2.1970443349753697, "percentage": 73.23, "elapsed_time": "0:34:42", "remaining_time": "0:12:41"} +{"current_steps": 1785, "total_steps": 2436, "loss": 0.4711458683013916, "lr": 2.0287030267114665e-06, "epoch": 2.1982758620689653, "percentage": 73.28, "elapsed_time": "0:34:43", "remaining_time": "0:12:39"} +{"current_steps": 1786, "total_steps": 2436, "loss": 0.6127311587333679, "lr": 2.0229426233507067e-06, "epoch": 2.1995073891625614, "percentage": 73.32, "elapsed_time": "0:34:44", "remaining_time": "0:12:38"} +{"current_steps": 1787, "total_steps": 2436, "loss": 0.7195362448692322, "lr": 2.0171883351211038e-06, "epoch": 2.2007389162561575, "percentage": 73.36, "elapsed_time": "0:34:45", "remaining_time": "0:12:37"} +{"current_steps": 1788, "total_steps": 2436, "loss": 1.412251591682434, "lr": 2.0114401738424618e-06, "epoch": 2.2019704433497536, "percentage": 73.4, "elapsed_time": "0:34:47", "remaining_time": "0:12:36"} +{"current_steps": 1789, "total_steps": 2436, "loss": 0.48954465985298157, "lr": 2.0056981513219944e-06, "epoch": 2.2032019704433496, "percentage": 73.44, "elapsed_time": "0:34:48", "remaining_time": "0:12:35"} +{"current_steps": 1790, "total_steps": 2436, "loss": 0.32414451241493225, "lr": 1.999962279354311e-06, "epoch": 2.2044334975369457, "percentage": 73.48, "elapsed_time": "0:34:49", "remaining_time": "0:12:33"} +{"current_steps": 1791, "total_steps": 2436, "loss": 0.4072822034358978, "lr": 1.9942325697213817e-06, "epoch": 2.205665024630542, "percentage": 73.52, "elapsed_time": "0:34:50", "remaining_time": "0:12:32"} +{"current_steps": 1792, "total_steps": 2436, "loss": 0.25958192348480225, "lr": 1.988509034192522e-06, "epoch": 2.206896551724138, "percentage": 73.56, "elapsed_time": "0:34:51", "remaining_time": "0:12:31"} +{"current_steps": 1793, "total_steps": 2436, "loss": 0.2943662405014038, "lr": 1.9827916845243687e-06, "epoch": 2.208128078817734, "percentage": 73.6, "elapsed_time": "0:34:52", "remaining_time": "0:12:30"} +{"current_steps": 1794, "total_steps": 2436, "loss": 0.6713488698005676, "lr": 1.9770805324608446e-06, "epoch": 2.20935960591133, "percentage": 73.65, "elapsed_time": "0:34:53", "remaining_time": "0:12:29"} +{"current_steps": 1795, "total_steps": 2436, "loss": 0.5103387236595154, "lr": 1.971375589733145e-06, "epoch": 2.210591133004926, "percentage": 73.69, "elapsed_time": "0:34:54", "remaining_time": "0:12:27"} +{"current_steps": 1796, "total_steps": 2436, "loss": 0.4981153905391693, "lr": 1.965676868059714e-06, "epoch": 2.2118226600985222, "percentage": 73.73, "elapsed_time": "0:34:55", "remaining_time": "0:12:26"} +{"current_steps": 1797, "total_steps": 2436, "loss": 0.2828434407711029, "lr": 1.9599843791462123e-06, "epoch": 2.2130541871921183, "percentage": 73.77, "elapsed_time": "0:34:56", "remaining_time": "0:12:25"} +{"current_steps": 1798, "total_steps": 2436, "loss": 0.36899659037590027, "lr": 1.9542981346855015e-06, "epoch": 2.2142857142857144, "percentage": 73.81, "elapsed_time": "0:34:57", "remaining_time": "0:12:24"} +{"current_steps": 1799, "total_steps": 2436, "loss": 0.46039581298828125, "lr": 1.9486181463576176e-06, "epoch": 2.2155172413793105, "percentage": 73.85, "elapsed_time": "0:34:58", "remaining_time": "0:12:23"} +{"current_steps": 1800, "total_steps": 2436, "loss": 0.611553966999054, "lr": 1.942944425829741e-06, "epoch": 2.2167487684729066, "percentage": 73.89, "elapsed_time": "0:34:59", "remaining_time": "0:12:21"} +{"current_steps": 1801, "total_steps": 2436, "loss": 0.23928876221179962, "lr": 1.937276984756179e-06, "epoch": 2.2179802955665027, "percentage": 73.93, "elapsed_time": "0:35:00", "remaining_time": "0:12:20"} +{"current_steps": 1802, "total_steps": 2436, "loss": 0.3270934820175171, "lr": 1.9316158347783436e-06, "epoch": 2.2192118226600988, "percentage": 73.97, "elapsed_time": "0:35:02", "remaining_time": "0:12:19"} +{"current_steps": 1803, "total_steps": 2436, "loss": 0.30926424264907837, "lr": 1.925960987524724e-06, "epoch": 2.2204433497536944, "percentage": 74.01, "elapsed_time": "0:35:03", "remaining_time": "0:12:18"} +{"current_steps": 1804, "total_steps": 2436, "loss": 0.6049486994743347, "lr": 1.9203124546108583e-06, "epoch": 2.2216748768472905, "percentage": 74.06, "elapsed_time": "0:35:04", "remaining_time": "0:12:17"} +{"current_steps": 1805, "total_steps": 2436, "loss": 0.7592355012893677, "lr": 1.91467024763932e-06, "epoch": 2.2229064039408866, "percentage": 74.1, "elapsed_time": "0:35:05", "remaining_time": "0:12:15"} +{"current_steps": 1806, "total_steps": 2436, "loss": 0.26057887077331543, "lr": 1.9090343781996828e-06, "epoch": 2.2241379310344827, "percentage": 74.14, "elapsed_time": "0:35:06", "remaining_time": "0:12:14"} +{"current_steps": 1807, "total_steps": 2436, "loss": 0.4014609754085541, "lr": 1.9034048578685099e-06, "epoch": 2.2253694581280787, "percentage": 74.18, "elapsed_time": "0:35:07", "remaining_time": "0:12:13"} +{"current_steps": 1808, "total_steps": 2436, "loss": 0.26397138833999634, "lr": 1.897781698209315e-06, "epoch": 2.226600985221675, "percentage": 74.22, "elapsed_time": "0:35:08", "remaining_time": "0:12:12"} +{"current_steps": 1809, "total_steps": 2436, "loss": 0.8727256059646606, "lr": 1.8921649107725525e-06, "epoch": 2.227832512315271, "percentage": 74.26, "elapsed_time": "0:35:09", "remaining_time": "0:12:11"} +{"current_steps": 1810, "total_steps": 2436, "loss": 0.45729875564575195, "lr": 1.8865545070955882e-06, "epoch": 2.229064039408867, "percentage": 74.3, "elapsed_time": "0:35:10", "remaining_time": "0:12:09"} +{"current_steps": 1811, "total_steps": 2436, "loss": 0.3261849880218506, "lr": 1.880950498702666e-06, "epoch": 2.230295566502463, "percentage": 74.34, "elapsed_time": "0:35:11", "remaining_time": "0:12:08"} +{"current_steps": 1812, "total_steps": 2436, "loss": 0.682532787322998, "lr": 1.875352897104903e-06, "epoch": 2.231527093596059, "percentage": 74.38, "elapsed_time": "0:35:12", "remaining_time": "0:12:07"} +{"current_steps": 1813, "total_steps": 2436, "loss": 0.4255359470844269, "lr": 1.8697617138002545e-06, "epoch": 2.2327586206896552, "percentage": 74.43, "elapsed_time": "0:35:13", "remaining_time": "0:12:06"} +{"current_steps": 1814, "total_steps": 2436, "loss": 0.3307432234287262, "lr": 1.8641769602734872e-06, "epoch": 2.2339901477832513, "percentage": 74.47, "elapsed_time": "0:35:14", "remaining_time": "0:12:05"} +{"current_steps": 1815, "total_steps": 2436, "loss": 0.26837313175201416, "lr": 1.8585986479961653e-06, "epoch": 2.2352216748768474, "percentage": 74.51, "elapsed_time": "0:35:16", "remaining_time": "0:12:04"} +{"current_steps": 1816, "total_steps": 2436, "loss": 0.5036531686782837, "lr": 1.8530267884266228e-06, "epoch": 2.2364532019704435, "percentage": 74.55, "elapsed_time": "0:35:17", "remaining_time": "0:12:02"} +{"current_steps": 1817, "total_steps": 2436, "loss": 0.4444383680820465, "lr": 1.8474613930099356e-06, "epoch": 2.2376847290640396, "percentage": 74.59, "elapsed_time": "0:35:18", "remaining_time": "0:12:01"} +{"current_steps": 1818, "total_steps": 2436, "loss": 0.24592629075050354, "lr": 1.8419024731779e-06, "epoch": 2.2389162561576357, "percentage": 74.63, "elapsed_time": "0:35:19", "remaining_time": "0:12:00"} +{"current_steps": 1819, "total_steps": 2436, "loss": 0.9310093522071838, "lr": 1.8363500403490175e-06, "epoch": 2.2401477832512313, "percentage": 74.67, "elapsed_time": "0:35:20", "remaining_time": "0:11:59"} +{"current_steps": 1820, "total_steps": 2436, "loss": 0.3252318799495697, "lr": 1.8308041059284621e-06, "epoch": 2.2413793103448274, "percentage": 74.71, "elapsed_time": "0:35:21", "remaining_time": "0:11:58"} +{"current_steps": 1821, "total_steps": 2436, "loss": 0.44218361377716064, "lr": 1.8252646813080566e-06, "epoch": 2.2426108374384235, "percentage": 74.75, "elapsed_time": "0:35:22", "remaining_time": "0:11:56"} +{"current_steps": 1822, "total_steps": 2436, "loss": 0.631632924079895, "lr": 1.8197317778662533e-06, "epoch": 2.2438423645320196, "percentage": 74.79, "elapsed_time": "0:35:23", "remaining_time": "0:11:55"} +{"current_steps": 1823, "total_steps": 2436, "loss": 0.2570488154888153, "lr": 1.814205406968112e-06, "epoch": 2.2450738916256157, "percentage": 74.84, "elapsed_time": "0:35:24", "remaining_time": "0:11:54"} +{"current_steps": 1824, "total_steps": 2436, "loss": 0.6113500595092773, "lr": 1.8086855799652737e-06, "epoch": 2.2463054187192117, "percentage": 74.88, "elapsed_time": "0:35:25", "remaining_time": "0:11:53"} +{"current_steps": 1825, "total_steps": 2436, "loss": 0.5997953414916992, "lr": 1.8031723081959334e-06, "epoch": 2.247536945812808, "percentage": 74.92, "elapsed_time": "0:35:26", "remaining_time": "0:11:52"} +{"current_steps": 1826, "total_steps": 2436, "loss": 0.501262903213501, "lr": 1.7976656029848271e-06, "epoch": 2.248768472906404, "percentage": 74.96, "elapsed_time": "0:35:27", "remaining_time": "0:11:50"} +{"current_steps": 1827, "total_steps": 2436, "loss": 0.9116629362106323, "lr": 1.792165475643199e-06, "epoch": 2.25, "percentage": 75.0, "elapsed_time": "0:35:29", "remaining_time": "0:11:49"} +{"current_steps": 1828, "total_steps": 2436, "loss": 0.3302918076515198, "lr": 1.786671937468779e-06, "epoch": 2.251231527093596, "percentage": 75.04, "elapsed_time": "0:35:30", "remaining_time": "0:11:48"} +{"current_steps": 1829, "total_steps": 2436, "loss": 0.26528751850128174, "lr": 1.7811849997457681e-06, "epoch": 2.252463054187192, "percentage": 75.08, "elapsed_time": "0:35:31", "remaining_time": "0:11:47"} +{"current_steps": 1830, "total_steps": 2436, "loss": 0.25929901003837585, "lr": 1.775704673744809e-06, "epoch": 2.2536945812807883, "percentage": 75.12, "elapsed_time": "0:35:32", "remaining_time": "0:11:46"} +{"current_steps": 1831, "total_steps": 2436, "loss": 0.4980836808681488, "lr": 1.7702309707229576e-06, "epoch": 2.2549261083743843, "percentage": 75.16, "elapsed_time": "0:35:33", "remaining_time": "0:11:44"} +{"current_steps": 1832, "total_steps": 2436, "loss": 0.5196325182914734, "lr": 1.764763901923673e-06, "epoch": 2.2561576354679804, "percentage": 75.21, "elapsed_time": "0:35:34", "remaining_time": "0:11:43"} +{"current_steps": 1833, "total_steps": 2436, "loss": 0.20513209700584412, "lr": 1.7593034785767788e-06, "epoch": 2.2573891625615765, "percentage": 75.25, "elapsed_time": "0:35:35", "remaining_time": "0:11:42"} +{"current_steps": 1834, "total_steps": 2436, "loss": 0.3052961826324463, "lr": 1.753849711898457e-06, "epoch": 2.2586206896551726, "percentage": 75.29, "elapsed_time": "0:35:36", "remaining_time": "0:11:41"} +{"current_steps": 1835, "total_steps": 2436, "loss": 0.32289302349090576, "lr": 1.7484026130912097e-06, "epoch": 2.2598522167487687, "percentage": 75.33, "elapsed_time": "0:35:37", "remaining_time": "0:11:40"} +{"current_steps": 1836, "total_steps": 2436, "loss": 0.5892568826675415, "lr": 1.742962193343845e-06, "epoch": 2.2610837438423648, "percentage": 75.37, "elapsed_time": "0:35:38", "remaining_time": "0:11:38"} +{"current_steps": 1837, "total_steps": 2436, "loss": 0.24824300408363342, "lr": 1.737528463831456e-06, "epoch": 2.2623152709359604, "percentage": 75.41, "elapsed_time": "0:35:39", "remaining_time": "0:11:37"} +{"current_steps": 1838, "total_steps": 2436, "loss": 0.23833397030830383, "lr": 1.7321014357153815e-06, "epoch": 2.2635467980295565, "percentage": 75.45, "elapsed_time": "0:35:40", "remaining_time": "0:11:36"} +{"current_steps": 1839, "total_steps": 2436, "loss": 0.4855925738811493, "lr": 1.726681120143207e-06, "epoch": 2.2647783251231526, "percentage": 75.49, "elapsed_time": "0:35:41", "remaining_time": "0:11:35"} +{"current_steps": 1840, "total_steps": 2436, "loss": 0.44992727041244507, "lr": 1.7212675282487269e-06, "epoch": 2.2660098522167487, "percentage": 75.53, "elapsed_time": "0:35:43", "remaining_time": "0:11:34"} +{"current_steps": 1841, "total_steps": 2436, "loss": 0.41251128911972046, "lr": 1.7158606711519193e-06, "epoch": 2.2672413793103448, "percentage": 75.57, "elapsed_time": "0:35:44", "remaining_time": "0:11:32"} +{"current_steps": 1842, "total_steps": 2436, "loss": 0.4418972134590149, "lr": 1.7104605599589353e-06, "epoch": 2.268472906403941, "percentage": 75.62, "elapsed_time": "0:35:45", "remaining_time": "0:11:31"} +{"current_steps": 1843, "total_steps": 2436, "loss": 0.4425298571586609, "lr": 1.7050672057620666e-06, "epoch": 2.269704433497537, "percentage": 75.66, "elapsed_time": "0:35:46", "remaining_time": "0:11:30"} +{"current_steps": 1844, "total_steps": 2436, "loss": 0.3141231834888458, "lr": 1.6996806196397243e-06, "epoch": 2.270935960591133, "percentage": 75.7, "elapsed_time": "0:35:47", "remaining_time": "0:11:29"} +{"current_steps": 1845, "total_steps": 2436, "loss": 0.2843426764011383, "lr": 1.6943008126564164e-06, "epoch": 2.272167487684729, "percentage": 75.74, "elapsed_time": "0:35:48", "remaining_time": "0:11:28"} +{"current_steps": 1846, "total_steps": 2436, "loss": 0.36104702949523926, "lr": 1.6889277958627293e-06, "epoch": 2.273399014778325, "percentage": 75.78, "elapsed_time": "0:35:49", "remaining_time": "0:11:26"} +{"current_steps": 1847, "total_steps": 2436, "loss": 0.3061131536960602, "lr": 1.6835615802953026e-06, "epoch": 2.2746305418719213, "percentage": 75.82, "elapsed_time": "0:35:50", "remaining_time": "0:11:25"} +{"current_steps": 1848, "total_steps": 2436, "loss": 0.26009926199913025, "lr": 1.6782021769768015e-06, "epoch": 2.2758620689655173, "percentage": 75.86, "elapsed_time": "0:35:51", "remaining_time": "0:11:24"} +{"current_steps": 1849, "total_steps": 2436, "loss": 0.33785128593444824, "lr": 1.6728495969158976e-06, "epoch": 2.2770935960591134, "percentage": 75.9, "elapsed_time": "0:35:52", "remaining_time": "0:11:23"} +{"current_steps": 1850, "total_steps": 2436, "loss": 0.675277829170227, "lr": 1.6675038511072518e-06, "epoch": 2.2783251231527095, "percentage": 75.94, "elapsed_time": "0:35:53", "remaining_time": "0:11:22"} +{"current_steps": 1851, "total_steps": 2436, "loss": 0.30536460876464844, "lr": 1.6621649505314853e-06, "epoch": 2.2795566502463056, "percentage": 75.99, "elapsed_time": "0:35:54", "remaining_time": "0:11:21"} +{"current_steps": 1852, "total_steps": 2436, "loss": 0.483297735452652, "lr": 1.6568329061551552e-06, "epoch": 2.2807881773399012, "percentage": 76.03, "elapsed_time": "0:35:55", "remaining_time": "0:11:19"} +{"current_steps": 1853, "total_steps": 2436, "loss": 1.2728561162948608, "lr": 1.6515077289307391e-06, "epoch": 2.2820197044334973, "percentage": 76.07, "elapsed_time": "0:35:57", "remaining_time": "0:11:18"} +{"current_steps": 1854, "total_steps": 2436, "loss": 1.2634159326553345, "lr": 1.6461894297966113e-06, "epoch": 2.2832512315270934, "percentage": 76.11, "elapsed_time": "0:35:58", "remaining_time": "0:11:17"} +{"current_steps": 1855, "total_steps": 2436, "loss": 0.2823532819747925, "lr": 1.640878019677008e-06, "epoch": 2.2844827586206895, "percentage": 76.15, "elapsed_time": "0:35:59", "remaining_time": "0:11:16"} +{"current_steps": 1856, "total_steps": 2436, "loss": 0.34143221378326416, "lr": 1.6355735094820236e-06, "epoch": 2.2857142857142856, "percentage": 76.19, "elapsed_time": "0:36:00", "remaining_time": "0:11:15"} +{"current_steps": 1857, "total_steps": 2436, "loss": 1.6820435523986816, "lr": 1.6302759101075788e-06, "epoch": 2.2869458128078817, "percentage": 76.23, "elapsed_time": "0:36:01", "remaining_time": "0:11:13"} +{"current_steps": 1858, "total_steps": 2436, "loss": 0.5194296836853027, "lr": 1.6249852324353943e-06, "epoch": 2.2881773399014778, "percentage": 76.27, "elapsed_time": "0:36:02", "remaining_time": "0:11:12"} +{"current_steps": 1859, "total_steps": 2436, "loss": 0.5637781023979187, "lr": 1.619701487332978e-06, "epoch": 2.289408866995074, "percentage": 76.31, "elapsed_time": "0:36:03", "remaining_time": "0:11:11"} +{"current_steps": 1860, "total_steps": 2436, "loss": 0.34875303506851196, "lr": 1.6144246856535933e-06, "epoch": 2.29064039408867, "percentage": 76.35, "elapsed_time": "0:36:04", "remaining_time": "0:11:10"} +{"current_steps": 1861, "total_steps": 2436, "loss": 1.098509430885315, "lr": 1.609154838236246e-06, "epoch": 2.291871921182266, "percentage": 76.4, "elapsed_time": "0:36:05", "remaining_time": "0:11:09"} +{"current_steps": 1862, "total_steps": 2436, "loss": 0.28303658962249756, "lr": 1.603891955905652e-06, "epoch": 2.293103448275862, "percentage": 76.44, "elapsed_time": "0:36:06", "remaining_time": "0:11:07"} +{"current_steps": 1863, "total_steps": 2436, "loss": 0.2923981547355652, "lr": 1.5986360494722237e-06, "epoch": 2.294334975369458, "percentage": 76.48, "elapsed_time": "0:36:07", "remaining_time": "0:11:06"} +{"current_steps": 1864, "total_steps": 2436, "loss": 0.7381842136383057, "lr": 1.5933871297320458e-06, "epoch": 2.2955665024630543, "percentage": 76.52, "elapsed_time": "0:36:08", "remaining_time": "0:11:05"} +{"current_steps": 1865, "total_steps": 2436, "loss": 0.3092786371707916, "lr": 1.5881452074668474e-06, "epoch": 2.2967980295566504, "percentage": 76.56, "elapsed_time": "0:36:09", "remaining_time": "0:11:04"} +{"current_steps": 1866, "total_steps": 2436, "loss": 0.23155847191810608, "lr": 1.5829102934439855e-06, "epoch": 2.2980295566502464, "percentage": 76.6, "elapsed_time": "0:36:11", "remaining_time": "0:11:03"} +{"current_steps": 1867, "total_steps": 2436, "loss": 0.28587496280670166, "lr": 1.577682398416424e-06, "epoch": 2.2992610837438425, "percentage": 76.64, "elapsed_time": "0:36:12", "remaining_time": "0:11:01"} +{"current_steps": 1868, "total_steps": 2436, "loss": 0.28047090768814087, "lr": 1.572461533122709e-06, "epoch": 2.3004926108374386, "percentage": 76.68, "elapsed_time": "0:36:13", "remaining_time": "0:11:00"} +{"current_steps": 1869, "total_steps": 2436, "loss": 0.23015758395195007, "lr": 1.567247708286942e-06, "epoch": 2.3017241379310347, "percentage": 76.72, "elapsed_time": "0:36:14", "remaining_time": "0:10:59"} +{"current_steps": 1870, "total_steps": 2436, "loss": 0.4323405623435974, "lr": 1.5620409346187697e-06, "epoch": 2.302955665024631, "percentage": 76.77, "elapsed_time": "0:36:15", "remaining_time": "0:10:58"} +{"current_steps": 1871, "total_steps": 2436, "loss": 0.23572880029678345, "lr": 1.5568412228133506e-06, "epoch": 2.3041871921182264, "percentage": 76.81, "elapsed_time": "0:36:16", "remaining_time": "0:10:57"} +{"current_steps": 1872, "total_steps": 2436, "loss": 0.3727877140045166, "lr": 1.5516485835513368e-06, "epoch": 2.3054187192118225, "percentage": 76.85, "elapsed_time": "0:36:17", "remaining_time": "0:10:56"} +{"current_steps": 1873, "total_steps": 2436, "loss": 0.45042985677719116, "lr": 1.5464630274988558e-06, "epoch": 2.3066502463054186, "percentage": 76.89, "elapsed_time": "0:36:18", "remaining_time": "0:10:54"} +{"current_steps": 1874, "total_steps": 2436, "loss": 0.2898573875427246, "lr": 1.5412845653074871e-06, "epoch": 2.3078817733990147, "percentage": 76.93, "elapsed_time": "0:36:19", "remaining_time": "0:10:53"} +{"current_steps": 1875, "total_steps": 2436, "loss": 0.5285981893539429, "lr": 1.5361132076142316e-06, "epoch": 2.3091133004926108, "percentage": 76.97, "elapsed_time": "0:36:20", "remaining_time": "0:10:52"} +{"current_steps": 1876, "total_steps": 2436, "loss": 0.32582932710647583, "lr": 1.5309489650415056e-06, "epoch": 2.310344827586207, "percentage": 77.01, "elapsed_time": "0:36:21", "remaining_time": "0:10:51"} +{"current_steps": 1877, "total_steps": 2436, "loss": 0.2169458121061325, "lr": 1.5257918481971028e-06, "epoch": 2.311576354679803, "percentage": 77.05, "elapsed_time": "0:36:22", "remaining_time": "0:10:50"} +{"current_steps": 1878, "total_steps": 2436, "loss": 0.618523359298706, "lr": 1.5206418676741868e-06, "epoch": 2.312807881773399, "percentage": 77.09, "elapsed_time": "0:36:23", "remaining_time": "0:10:48"} +{"current_steps": 1879, "total_steps": 2436, "loss": 0.7014099359512329, "lr": 1.515499034051256e-06, "epoch": 2.314039408866995, "percentage": 77.13, "elapsed_time": "0:36:24", "remaining_time": "0:10:47"} +{"current_steps": 1880, "total_steps": 2436, "loss": 0.44798558950424194, "lr": 1.510363357892133e-06, "epoch": 2.315270935960591, "percentage": 77.18, "elapsed_time": "0:36:26", "remaining_time": "0:10:46"} +{"current_steps": 1881, "total_steps": 2436, "loss": 0.4824434220790863, "lr": 1.50523484974594e-06, "epoch": 2.3165024630541873, "percentage": 77.22, "elapsed_time": "0:36:27", "remaining_time": "0:10:45"} +{"current_steps": 1882, "total_steps": 2436, "loss": 0.16904819011688232, "lr": 1.5001135201470673e-06, "epoch": 2.3177339901477834, "percentage": 77.26, "elapsed_time": "0:36:28", "remaining_time": "0:10:44"} +{"current_steps": 1883, "total_steps": 2436, "loss": 0.8792778253555298, "lr": 1.4949993796151675e-06, "epoch": 2.3189655172413794, "percentage": 77.3, "elapsed_time": "0:36:29", "remaining_time": "0:10:42"} +{"current_steps": 1884, "total_steps": 2436, "loss": 0.6592487096786499, "lr": 1.4898924386551256e-06, "epoch": 2.3201970443349755, "percentage": 77.34, "elapsed_time": "0:36:30", "remaining_time": "0:10:41"} +{"current_steps": 1885, "total_steps": 2436, "loss": 1.6036354303359985, "lr": 1.4847927077570324e-06, "epoch": 2.3214285714285716, "percentage": 77.38, "elapsed_time": "0:36:31", "remaining_time": "0:10:40"} +{"current_steps": 1886, "total_steps": 2436, "loss": 0.34490981698036194, "lr": 1.4797001973961755e-06, "epoch": 2.3226600985221673, "percentage": 77.42, "elapsed_time": "0:36:32", "remaining_time": "0:10:39"} +{"current_steps": 1887, "total_steps": 2436, "loss": 0.3186146914958954, "lr": 1.4746149180330082e-06, "epoch": 2.3238916256157633, "percentage": 77.46, "elapsed_time": "0:36:33", "remaining_time": "0:10:38"} +{"current_steps": 1888, "total_steps": 2436, "loss": 0.5050108432769775, "lr": 1.4695368801131293e-06, "epoch": 2.3251231527093594, "percentage": 77.5, "elapsed_time": "0:36:34", "remaining_time": "0:10:37"} +{"current_steps": 1889, "total_steps": 2436, "loss": 0.3541644215583801, "lr": 1.4644660940672628e-06, "epoch": 2.3263546798029555, "percentage": 77.55, "elapsed_time": "0:36:35", "remaining_time": "0:10:35"} +{"current_steps": 1890, "total_steps": 2436, "loss": 0.3495083749294281, "lr": 1.4594025703112397e-06, "epoch": 2.3275862068965516, "percentage": 77.59, "elapsed_time": "0:36:36", "remaining_time": "0:10:34"} +{"current_steps": 1891, "total_steps": 2436, "loss": 0.9918674826622009, "lr": 1.4543463192459728e-06, "epoch": 2.3288177339901477, "percentage": 77.63, "elapsed_time": "0:36:37", "remaining_time": "0:10:33"} +{"current_steps": 1892, "total_steps": 2436, "loss": 0.9601753950119019, "lr": 1.4492973512574348e-06, "epoch": 2.3300492610837438, "percentage": 77.67, "elapsed_time": "0:36:39", "remaining_time": "0:10:32"} +{"current_steps": 1893, "total_steps": 2436, "loss": 0.48341238498687744, "lr": 1.4442556767166371e-06, "epoch": 2.33128078817734, "percentage": 77.71, "elapsed_time": "0:36:40", "remaining_time": "0:10:31"} +{"current_steps": 1894, "total_steps": 2436, "loss": 0.38372108340263367, "lr": 1.4392213059796133e-06, "epoch": 2.332512315270936, "percentage": 77.75, "elapsed_time": "0:36:41", "remaining_time": "0:10:29"} +{"current_steps": 1895, "total_steps": 2436, "loss": 0.45662760734558105, "lr": 1.4341942493873934e-06, "epoch": 2.333743842364532, "percentage": 77.79, "elapsed_time": "0:36:42", "remaining_time": "0:10:28"} +{"current_steps": 1896, "total_steps": 2436, "loss": 0.6601132154464722, "lr": 1.4291745172659804e-06, "epoch": 2.334975369458128, "percentage": 77.83, "elapsed_time": "0:36:43", "remaining_time": "0:10:27"} +{"current_steps": 1897, "total_steps": 2436, "loss": 0.7569577097892761, "lr": 1.4241621199263362e-06, "epoch": 2.336206896551724, "percentage": 77.87, "elapsed_time": "0:36:44", "remaining_time": "0:10:26"} +{"current_steps": 1898, "total_steps": 2436, "loss": 0.7162508964538574, "lr": 1.4191570676643573e-06, "epoch": 2.3374384236453203, "percentage": 77.91, "elapsed_time": "0:36:45", "remaining_time": "0:10:25"} +{"current_steps": 1899, "total_steps": 2436, "loss": 0.6121374368667603, "lr": 1.4141593707608441e-06, "epoch": 2.3386699507389164, "percentage": 77.96, "elapsed_time": "0:36:46", "remaining_time": "0:10:23"} +{"current_steps": 1900, "total_steps": 2436, "loss": 0.550343930721283, "lr": 1.4091690394814989e-06, "epoch": 2.3399014778325125, "percentage": 78.0, "elapsed_time": "0:36:47", "remaining_time": "0:10:22"} +{"current_steps": 1901, "total_steps": 2436, "loss": 0.644547700881958, "lr": 1.40418608407689e-06, "epoch": 2.3411330049261085, "percentage": 78.04, "elapsed_time": "0:36:48", "remaining_time": "0:10:21"} +{"current_steps": 1902, "total_steps": 2436, "loss": 0.463761568069458, "lr": 1.3992105147824326e-06, "epoch": 2.3423645320197046, "percentage": 78.08, "elapsed_time": "0:36:49", "remaining_time": "0:10:20"} +{"current_steps": 1903, "total_steps": 2436, "loss": 0.5593357682228088, "lr": 1.3942423418183764e-06, "epoch": 2.3435960591133007, "percentage": 78.12, "elapsed_time": "0:36:50", "remaining_time": "0:10:19"} +{"current_steps": 1904, "total_steps": 2436, "loss": 0.5090635418891907, "lr": 1.3892815753897708e-06, "epoch": 2.344827586206897, "percentage": 78.16, "elapsed_time": "0:36:51", "remaining_time": "0:10:18"} +{"current_steps": 1905, "total_steps": 2436, "loss": 0.4595394432544708, "lr": 1.3843282256864599e-06, "epoch": 2.3460591133004924, "percentage": 78.2, "elapsed_time": "0:36:52", "remaining_time": "0:10:16"} +{"current_steps": 1906, "total_steps": 2436, "loss": 0.8381729125976562, "lr": 1.379382302883044e-06, "epoch": 2.3472906403940885, "percentage": 78.24, "elapsed_time": "0:36:54", "remaining_time": "0:10:15"} +{"current_steps": 1907, "total_steps": 2436, "loss": 0.37937110662460327, "lr": 1.3744438171388752e-06, "epoch": 2.3485221674876846, "percentage": 78.28, "elapsed_time": "0:36:55", "remaining_time": "0:10:14"} +{"current_steps": 1908, "total_steps": 2436, "loss": 0.4255325496196747, "lr": 1.3695127785980279e-06, "epoch": 2.3497536945812807, "percentage": 78.33, "elapsed_time": "0:36:56", "remaining_time": "0:10:13"} +{"current_steps": 1909, "total_steps": 2436, "loss": 1.1354942321777344, "lr": 1.3645891973892772e-06, "epoch": 2.350985221674877, "percentage": 78.37, "elapsed_time": "0:36:57", "remaining_time": "0:10:12"} +{"current_steps": 1910, "total_steps": 2436, "loss": 0.30018460750579834, "lr": 1.359673083626079e-06, "epoch": 2.352216748768473, "percentage": 78.41, "elapsed_time": "0:36:58", "remaining_time": "0:10:10"} +{"current_steps": 1911, "total_steps": 2436, "loss": 0.22174029052257538, "lr": 1.3547644474065557e-06, "epoch": 2.353448275862069, "percentage": 78.45, "elapsed_time": "0:36:59", "remaining_time": "0:10:09"} +{"current_steps": 1912, "total_steps": 2436, "loss": 0.27310076355934143, "lr": 1.349863298813464e-06, "epoch": 2.354679802955665, "percentage": 78.49, "elapsed_time": "0:37:00", "remaining_time": "0:10:08"} +{"current_steps": 1913, "total_steps": 2436, "loss": 0.39454638957977295, "lr": 1.3449696479141855e-06, "epoch": 2.355911330049261, "percentage": 78.53, "elapsed_time": "0:37:01", "remaining_time": "0:10:07"} +{"current_steps": 1914, "total_steps": 2436, "loss": 0.39921119809150696, "lr": 1.3400835047606997e-06, "epoch": 2.357142857142857, "percentage": 78.57, "elapsed_time": "0:37:02", "remaining_time": "0:10:06"} +{"current_steps": 1915, "total_steps": 2436, "loss": 0.45110660791397095, "lr": 1.3352048793895623e-06, "epoch": 2.3583743842364533, "percentage": 78.61, "elapsed_time": "0:37:03", "remaining_time": "0:10:05"} +{"current_steps": 1916, "total_steps": 2436, "loss": 0.5453286170959473, "lr": 1.330333781821887e-06, "epoch": 2.3596059113300494, "percentage": 78.65, "elapsed_time": "0:37:04", "remaining_time": "0:10:03"} +{"current_steps": 1917, "total_steps": 2436, "loss": 0.21928450465202332, "lr": 1.325470222063327e-06, "epoch": 2.3608374384236455, "percentage": 78.69, "elapsed_time": "0:37:05", "remaining_time": "0:10:02"} +{"current_steps": 1918, "total_steps": 2436, "loss": 0.8491370677947998, "lr": 1.3206142101040525e-06, "epoch": 2.3620689655172415, "percentage": 78.74, "elapsed_time": "0:37:07", "remaining_time": "0:10:01"} +{"current_steps": 1919, "total_steps": 2436, "loss": 0.5052551031112671, "lr": 1.3157657559187264e-06, "epoch": 2.363300492610837, "percentage": 78.78, "elapsed_time": "0:37:08", "remaining_time": "0:10:00"} +{"current_steps": 1920, "total_steps": 2436, "loss": 1.0034559965133667, "lr": 1.3109248694664917e-06, "epoch": 2.3645320197044333, "percentage": 78.82, "elapsed_time": "0:37:09", "remaining_time": "0:09:59"} +{"current_steps": 1921, "total_steps": 2436, "loss": 0.3685661554336548, "lr": 1.3060915606909413e-06, "epoch": 2.3657635467980294, "percentage": 78.86, "elapsed_time": "0:37:10", "remaining_time": "0:09:57"} +{"current_steps": 1922, "total_steps": 2436, "loss": 0.33304983377456665, "lr": 1.301265839520109e-06, "epoch": 2.3669950738916254, "percentage": 78.9, "elapsed_time": "0:37:11", "remaining_time": "0:09:56"} +{"current_steps": 1923, "total_steps": 2436, "loss": 1.3396000862121582, "lr": 1.2964477158664367e-06, "epoch": 2.3682266009852215, "percentage": 78.94, "elapsed_time": "0:37:12", "remaining_time": "0:09:55"} +{"current_steps": 1924, "total_steps": 2436, "loss": 0.3852962851524353, "lr": 1.2916371996267656e-06, "epoch": 2.3694581280788176, "percentage": 78.98, "elapsed_time": "0:37:13", "remaining_time": "0:09:54"} +{"current_steps": 1925, "total_steps": 2436, "loss": 0.5070800185203552, "lr": 1.2868343006823113e-06, "epoch": 2.3706896551724137, "percentage": 79.02, "elapsed_time": "0:37:14", "remaining_time": "0:09:53"} +{"current_steps": 1926, "total_steps": 2436, "loss": 0.1917571723461151, "lr": 1.2820390288986345e-06, "epoch": 2.37192118226601, "percentage": 79.06, "elapsed_time": "0:37:15", "remaining_time": "0:09:51"} +{"current_steps": 1927, "total_steps": 2436, "loss": 0.19884659349918365, "lr": 1.2772513941256371e-06, "epoch": 2.373152709359606, "percentage": 79.11, "elapsed_time": "0:37:16", "remaining_time": "0:09:50"} +{"current_steps": 1928, "total_steps": 2436, "loss": 0.27710244059562683, "lr": 1.2724714061975335e-06, "epoch": 2.374384236453202, "percentage": 79.15, "elapsed_time": "0:37:17", "remaining_time": "0:09:49"} +{"current_steps": 1929, "total_steps": 2436, "loss": 0.7216998338699341, "lr": 1.2676990749328255e-06, "epoch": 2.375615763546798, "percentage": 79.19, "elapsed_time": "0:37:18", "remaining_time": "0:09:48"} +{"current_steps": 1930, "total_steps": 2436, "loss": 0.35512983798980713, "lr": 1.262934410134292e-06, "epoch": 2.376847290640394, "percentage": 79.23, "elapsed_time": "0:37:19", "remaining_time": "0:09:47"} +{"current_steps": 1931, "total_steps": 2436, "loss": 0.21548208594322205, "lr": 1.2581774215889653e-06, "epoch": 2.37807881773399, "percentage": 79.27, "elapsed_time": "0:37:21", "remaining_time": "0:09:46"} +{"current_steps": 1932, "total_steps": 2436, "loss": 0.7191505432128906, "lr": 1.2534281190681059e-06, "epoch": 2.3793103448275863, "percentage": 79.31, "elapsed_time": "0:37:22", "remaining_time": "0:09:44"} +{"current_steps": 1933, "total_steps": 2436, "loss": 0.5658040046691895, "lr": 1.2486865123271868e-06, "epoch": 2.3805418719211824, "percentage": 79.35, "elapsed_time": "0:37:23", "remaining_time": "0:09:43"} +{"current_steps": 1934, "total_steps": 2436, "loss": 0.42820805311203003, "lr": 1.243952611105877e-06, "epoch": 2.3817733990147785, "percentage": 79.39, "elapsed_time": "0:37:24", "remaining_time": "0:09:42"} +{"current_steps": 1935, "total_steps": 2436, "loss": 0.3223640024662018, "lr": 1.2392264251280167e-06, "epoch": 2.3830049261083746, "percentage": 79.43, "elapsed_time": "0:37:25", "remaining_time": "0:09:41"} +{"current_steps": 1936, "total_steps": 2436, "loss": 0.5262437462806702, "lr": 1.2345079641015955e-06, "epoch": 2.3842364532019706, "percentage": 79.47, "elapsed_time": "0:37:26", "remaining_time": "0:09:40"} +{"current_steps": 1937, "total_steps": 2436, "loss": 0.32022416591644287, "lr": 1.2297972377187361e-06, "epoch": 2.3854679802955667, "percentage": 79.52, "elapsed_time": "0:37:27", "remaining_time": "0:09:39"} +{"current_steps": 1938, "total_steps": 2436, "loss": 0.76932692527771, "lr": 1.2250942556556754e-06, "epoch": 2.386699507389163, "percentage": 79.56, "elapsed_time": "0:37:28", "remaining_time": "0:09:37"} +{"current_steps": 1939, "total_steps": 2436, "loss": 0.23026564717292786, "lr": 1.2203990275727435e-06, "epoch": 2.3879310344827585, "percentage": 79.6, "elapsed_time": "0:37:29", "remaining_time": "0:09:36"} +{"current_steps": 1940, "total_steps": 2436, "loss": 0.4533492624759674, "lr": 1.2157115631143384e-06, "epoch": 2.3891625615763545, "percentage": 79.64, "elapsed_time": "0:37:30", "remaining_time": "0:09:35"} +{"current_steps": 1941, "total_steps": 2436, "loss": 0.6235211491584778, "lr": 1.211031871908916e-06, "epoch": 2.3903940886699506, "percentage": 79.68, "elapsed_time": "0:37:32", "remaining_time": "0:09:34"} +{"current_steps": 1942, "total_steps": 2436, "loss": 0.2519042193889618, "lr": 1.206359963568966e-06, "epoch": 2.3916256157635467, "percentage": 79.72, "elapsed_time": "0:37:33", "remaining_time": "0:09:33"} +{"current_steps": 1943, "total_steps": 2436, "loss": 0.3229137659072876, "lr": 1.201695847690983e-06, "epoch": 2.392857142857143, "percentage": 79.76, "elapsed_time": "0:37:34", "remaining_time": "0:09:31"} +{"current_steps": 1944, "total_steps": 2436, "loss": 0.19324302673339844, "lr": 1.1970395338554642e-06, "epoch": 2.394088669950739, "percentage": 79.8, "elapsed_time": "0:37:35", "remaining_time": "0:09:30"} +{"current_steps": 1945, "total_steps": 2436, "loss": 0.6342459917068481, "lr": 1.1923910316268783e-06, "epoch": 2.395320197044335, "percentage": 79.84, "elapsed_time": "0:37:36", "remaining_time": "0:09:29"} +{"current_steps": 1946, "total_steps": 2436, "loss": 0.3010944724082947, "lr": 1.1877503505536453e-06, "epoch": 2.396551724137931, "percentage": 79.89, "elapsed_time": "0:37:37", "remaining_time": "0:09:28"} +{"current_steps": 1947, "total_steps": 2436, "loss": 0.40499716997146606, "lr": 1.183117500168125e-06, "epoch": 2.397783251231527, "percentage": 79.93, "elapsed_time": "0:37:38", "remaining_time": "0:09:27"} +{"current_steps": 1948, "total_steps": 2436, "loss": 0.9692997336387634, "lr": 1.1784924899865856e-06, "epoch": 2.399014778325123, "percentage": 79.97, "elapsed_time": "0:37:39", "remaining_time": "0:09:26"} +{"current_steps": 1949, "total_steps": 2436, "loss": 0.3848229646682739, "lr": 1.1738753295091986e-06, "epoch": 2.4002463054187193, "percentage": 80.01, "elapsed_time": "0:37:40", "remaining_time": "0:09:24"} +{"current_steps": 1950, "total_steps": 2436, "loss": 0.4472384750843048, "lr": 1.169266028220004e-06, "epoch": 2.4014778325123154, "percentage": 80.05, "elapsed_time": "0:37:41", "remaining_time": "0:09:23"} +{"current_steps": 1951, "total_steps": 2436, "loss": 0.21374854445457458, "lr": 1.164664595586904e-06, "epoch": 2.4027093596059115, "percentage": 80.09, "elapsed_time": "0:37:42", "remaining_time": "0:09:22"} +{"current_steps": 1952, "total_steps": 2436, "loss": 0.4789981544017792, "lr": 1.1600710410616367e-06, "epoch": 2.4039408866995076, "percentage": 80.13, "elapsed_time": "0:37:43", "remaining_time": "0:09:21"} +{"current_steps": 1953, "total_steps": 2436, "loss": 0.6235543489456177, "lr": 1.1554853740797556e-06, "epoch": 2.405172413793103, "percentage": 80.17, "elapsed_time": "0:37:44", "remaining_time": "0:09:20"} +{"current_steps": 1954, "total_steps": 2436, "loss": 0.42575669288635254, "lr": 1.1509076040606127e-06, "epoch": 2.4064039408866993, "percentage": 80.21, "elapsed_time": "0:37:46", "remaining_time": "0:09:18"} +{"current_steps": 1955, "total_steps": 2436, "loss": 0.22154280543327332, "lr": 1.1463377404073433e-06, "epoch": 2.4076354679802954, "percentage": 80.25, "elapsed_time": "0:37:47", "remaining_time": "0:09:17"} +{"current_steps": 1956, "total_steps": 2436, "loss": 0.5722556114196777, "lr": 1.1417757925068362e-06, "epoch": 2.4088669950738915, "percentage": 80.3, "elapsed_time": "0:37:48", "remaining_time": "0:09:16"} +{"current_steps": 1957, "total_steps": 2436, "loss": 0.6502832174301147, "lr": 1.137221769729725e-06, "epoch": 2.4100985221674875, "percentage": 80.34, "elapsed_time": "0:37:49", "remaining_time": "0:09:15"} +{"current_steps": 1958, "total_steps": 2436, "loss": 0.41717976331710815, "lr": 1.132675681430364e-06, "epoch": 2.4113300492610836, "percentage": 80.38, "elapsed_time": "0:37:50", "remaining_time": "0:09:14"} +{"current_steps": 1959, "total_steps": 2436, "loss": 0.3705020248889923, "lr": 1.1281375369468078e-06, "epoch": 2.4125615763546797, "percentage": 80.42, "elapsed_time": "0:37:51", "remaining_time": "0:09:13"} +{"current_steps": 1960, "total_steps": 2436, "loss": 0.8128242492675781, "lr": 1.1236073456007928e-06, "epoch": 2.413793103448276, "percentage": 80.46, "elapsed_time": "0:37:52", "remaining_time": "0:09:11"} +{"current_steps": 1961, "total_steps": 2436, "loss": 0.7350403070449829, "lr": 1.1190851166977218e-06, "epoch": 2.415024630541872, "percentage": 80.5, "elapsed_time": "0:37:53", "remaining_time": "0:09:10"} +{"current_steps": 1962, "total_steps": 2436, "loss": 0.5837904214859009, "lr": 1.1145708595266418e-06, "epoch": 2.416256157635468, "percentage": 80.54, "elapsed_time": "0:37:54", "remaining_time": "0:09:09"} +{"current_steps": 1963, "total_steps": 2436, "loss": 0.436983585357666, "lr": 1.1100645833602231e-06, "epoch": 2.417487684729064, "percentage": 80.58, "elapsed_time": "0:37:55", "remaining_time": "0:09:08"} +{"current_steps": 1964, "total_steps": 2436, "loss": 0.4708068370819092, "lr": 1.105566297454742e-06, "epoch": 2.41871921182266, "percentage": 80.62, "elapsed_time": "0:37:56", "remaining_time": "0:09:07"} +{"current_steps": 1965, "total_steps": 2436, "loss": 0.37972012162208557, "lr": 1.1010760110500652e-06, "epoch": 2.4199507389162562, "percentage": 80.67, "elapsed_time": "0:37:57", "remaining_time": "0:09:05"} +{"current_steps": 1966, "total_steps": 2436, "loss": 0.3167269229888916, "lr": 1.0965937333696264e-06, "epoch": 2.4211822660098523, "percentage": 80.71, "elapsed_time": "0:37:58", "remaining_time": "0:09:04"} +{"current_steps": 1967, "total_steps": 2436, "loss": 0.3407049775123596, "lr": 1.0921194736204066e-06, "epoch": 2.4224137931034484, "percentage": 80.75, "elapsed_time": "0:38:00", "remaining_time": "0:09:03"} +{"current_steps": 1968, "total_steps": 2436, "loss": 0.7673642635345459, "lr": 1.0876532409929208e-06, "epoch": 2.4236453201970445, "percentage": 80.79, "elapsed_time": "0:38:01", "remaining_time": "0:09:02"} +{"current_steps": 1969, "total_steps": 2436, "loss": 0.3029213845729828, "lr": 1.083195044661195e-06, "epoch": 2.4248768472906406, "percentage": 80.83, "elapsed_time": "0:38:02", "remaining_time": "0:09:01"} +{"current_steps": 1970, "total_steps": 2436, "loss": 0.5143488049507141, "lr": 1.0787448937827428e-06, "epoch": 2.4261083743842367, "percentage": 80.87, "elapsed_time": "0:38:03", "remaining_time": "0:09:00"} +{"current_steps": 1971, "total_steps": 2436, "loss": 0.5086369514465332, "lr": 1.0743027974985576e-06, "epoch": 2.4273399014778327, "percentage": 80.91, "elapsed_time": "0:38:04", "remaining_time": "0:08:58"} +{"current_steps": 1972, "total_steps": 2436, "loss": 0.7999781966209412, "lr": 1.069868764933088e-06, "epoch": 2.4285714285714284, "percentage": 80.95, "elapsed_time": "0:38:05", "remaining_time": "0:08:57"} +{"current_steps": 1973, "total_steps": 2436, "loss": 0.2686223089694977, "lr": 1.065442805194214e-06, "epoch": 2.4298029556650245, "percentage": 80.99, "elapsed_time": "0:38:06", "remaining_time": "0:08:56"} +{"current_steps": 1974, "total_steps": 2436, "loss": 0.2520446181297302, "lr": 1.0610249273732393e-06, "epoch": 2.4310344827586206, "percentage": 81.03, "elapsed_time": "0:38:07", "remaining_time": "0:08:55"} +{"current_steps": 1975, "total_steps": 2436, "loss": 0.28887757658958435, "lr": 1.056615140544861e-06, "epoch": 2.4322660098522166, "percentage": 81.08, "elapsed_time": "0:38:08", "remaining_time": "0:08:54"} +{"current_steps": 1976, "total_steps": 2436, "loss": 0.3709273338317871, "lr": 1.0522134537671625e-06, "epoch": 2.4334975369458127, "percentage": 81.12, "elapsed_time": "0:38:09", "remaining_time": "0:08:53"} +{"current_steps": 1977, "total_steps": 2436, "loss": 0.6718100309371948, "lr": 1.0478198760815833e-06, "epoch": 2.434729064039409, "percentage": 81.16, "elapsed_time": "0:38:10", "remaining_time": "0:08:51"} +{"current_steps": 1978, "total_steps": 2436, "loss": 0.17143529653549194, "lr": 1.0434344165129095e-06, "epoch": 2.435960591133005, "percentage": 81.2, "elapsed_time": "0:38:11", "remaining_time": "0:08:50"} +{"current_steps": 1979, "total_steps": 2436, "loss": 0.7128796577453613, "lr": 1.0390570840692527e-06, "epoch": 2.437192118226601, "percentage": 81.24, "elapsed_time": "0:38:12", "remaining_time": "0:08:49"} +{"current_steps": 1980, "total_steps": 2436, "loss": 0.24575555324554443, "lr": 1.034687887742028e-06, "epoch": 2.438423645320197, "percentage": 81.28, "elapsed_time": "0:38:14", "remaining_time": "0:08:48"} +{"current_steps": 1981, "total_steps": 2436, "loss": 0.5631250739097595, "lr": 1.0303268365059383e-06, "epoch": 2.439655172413793, "percentage": 81.32, "elapsed_time": "0:38:15", "remaining_time": "0:08:47"} +{"current_steps": 1982, "total_steps": 2436, "loss": 0.3094029128551483, "lr": 1.0259739393189573e-06, "epoch": 2.4408866995073892, "percentage": 81.36, "elapsed_time": "0:38:16", "remaining_time": "0:08:45"} +{"current_steps": 1983, "total_steps": 2436, "loss": 0.4754146635532379, "lr": 1.021629205122311e-06, "epoch": 2.4421182266009853, "percentage": 81.4, "elapsed_time": "0:38:17", "remaining_time": "0:08:44"} +{"current_steps": 1984, "total_steps": 2436, "loss": 0.18599992990493774, "lr": 1.0172926428404527e-06, "epoch": 2.4433497536945814, "percentage": 81.44, "elapsed_time": "0:38:18", "remaining_time": "0:08:43"} +{"current_steps": 1985, "total_steps": 2436, "loss": 0.3831806480884552, "lr": 1.0129642613810576e-06, "epoch": 2.4445812807881775, "percentage": 81.49, "elapsed_time": "0:38:19", "remaining_time": "0:08:42"} +{"current_steps": 1986, "total_steps": 2436, "loss": 0.7717353105545044, "lr": 1.008644069634989e-06, "epoch": 2.4458128078817736, "percentage": 81.53, "elapsed_time": "0:38:20", "remaining_time": "0:08:41"} +{"current_steps": 1987, "total_steps": 2436, "loss": 0.3248934745788574, "lr": 1.0043320764762915e-06, "epoch": 2.447044334975369, "percentage": 81.57, "elapsed_time": "0:38:21", "remaining_time": "0:08:40"} +{"current_steps": 1988, "total_steps": 2436, "loss": 0.27836111187934875, "lr": 1.0000282907621694e-06, "epoch": 2.4482758620689653, "percentage": 81.61, "elapsed_time": "0:38:22", "remaining_time": "0:08:38"} +{"current_steps": 1989, "total_steps": 2436, "loss": 0.20251630246639252, "lr": 9.957327213329687e-07, "epoch": 2.4495073891625614, "percentage": 81.65, "elapsed_time": "0:38:23", "remaining_time": "0:08:37"} +{"current_steps": 1990, "total_steps": 2436, "loss": 0.6009274125099182, "lr": 9.914453770121557e-07, "epoch": 2.4507389162561575, "percentage": 81.69, "elapsed_time": "0:38:24", "remaining_time": "0:08:36"} +{"current_steps": 1991, "total_steps": 2436, "loss": 0.3312684893608093, "lr": 9.871662666063054e-07, "epoch": 2.4519704433497536, "percentage": 81.73, "elapsed_time": "0:38:25", "remaining_time": "0:08:35"} +{"current_steps": 1992, "total_steps": 2436, "loss": 0.38521629571914673, "lr": 9.828953989050744e-07, "epoch": 2.4532019704433496, "percentage": 81.77, "elapsed_time": "0:38:26", "remaining_time": "0:08:34"} +{"current_steps": 1993, "total_steps": 2436, "loss": 0.2508774995803833, "lr": 9.786327826811942e-07, "epoch": 2.4544334975369457, "percentage": 81.81, "elapsed_time": "0:38:28", "remaining_time": "0:08:33"} +{"current_steps": 1994, "total_steps": 2436, "loss": 0.36097291111946106, "lr": 9.743784266904422e-07, "epoch": 2.455665024630542, "percentage": 81.86, "elapsed_time": "0:38:29", "remaining_time": "0:08:31"} +{"current_steps": 1995, "total_steps": 2436, "loss": 0.6703237295150757, "lr": 9.701323396716312e-07, "epoch": 2.456896551724138, "percentage": 81.9, "elapsed_time": "0:38:30", "remaining_time": "0:08:30"} +{"current_steps": 1996, "total_steps": 2436, "loss": 0.9553302526473999, "lr": 9.6589453034659e-07, "epoch": 2.458128078817734, "percentage": 81.94, "elapsed_time": "0:38:31", "remaining_time": "0:08:29"} +{"current_steps": 1997, "total_steps": 2436, "loss": 0.3288821578025818, "lr": 9.616650074201383e-07, "epoch": 2.45935960591133, "percentage": 81.98, "elapsed_time": "0:38:32", "remaining_time": "0:08:28"} +{"current_steps": 1998, "total_steps": 2436, "loss": 0.3195754885673523, "lr": 9.574437795800806e-07, "epoch": 2.460591133004926, "percentage": 82.02, "elapsed_time": "0:38:33", "remaining_time": "0:08:27"} +{"current_steps": 1999, "total_steps": 2436, "loss": 0.26505401730537415, "lr": 9.532308554971831e-07, "epoch": 2.4618226600985222, "percentage": 82.06, "elapsed_time": "0:38:34", "remaining_time": "0:08:25"} +{"current_steps": 2000, "total_steps": 2436, "loss": 0.43558627367019653, "lr": 9.490262438251496e-07, "epoch": 2.4630541871921183, "percentage": 82.1, "elapsed_time": "0:38:35", "remaining_time": "0:08:24"} +{"current_steps": 2001, "total_steps": 2436, "loss": 0.3582439720630646, "lr": 9.44829953200615e-07, "epoch": 2.4642857142857144, "percentage": 82.14, "elapsed_time": "0:38:36", "remaining_time": "0:08:23"} +{"current_steps": 2002, "total_steps": 2436, "loss": 0.7142423987388611, "lr": 9.406419922431214e-07, "epoch": 2.4655172413793105, "percentage": 82.18, "elapsed_time": "0:38:37", "remaining_time": "0:08:22"} +{"current_steps": 2003, "total_steps": 2436, "loss": 0.24947094917297363, "lr": 9.364623695550979e-07, "epoch": 2.4667487684729066, "percentage": 82.22, "elapsed_time": "0:38:38", "remaining_time": "0:08:21"} +{"current_steps": 2004, "total_steps": 2436, "loss": 1.0376765727996826, "lr": 9.322910937218471e-07, "epoch": 2.4679802955665027, "percentage": 82.27, "elapsed_time": "0:38:39", "remaining_time": "0:08:20"} +{"current_steps": 2005, "total_steps": 2436, "loss": 0.39291733503341675, "lr": 9.281281733115288e-07, "epoch": 2.4692118226600988, "percentage": 82.31, "elapsed_time": "0:38:41", "remaining_time": "0:08:18"} +{"current_steps": 2006, "total_steps": 2436, "loss": 1.1038362979888916, "lr": 9.239736168751395e-07, "epoch": 2.4704433497536944, "percentage": 82.35, "elapsed_time": "0:38:42", "remaining_time": "0:08:17"} +{"current_steps": 2007, "total_steps": 2436, "loss": 0.8542830944061279, "lr": 9.198274329464929e-07, "epoch": 2.4716748768472905, "percentage": 82.39, "elapsed_time": "0:38:43", "remaining_time": "0:08:16"} +{"current_steps": 2008, "total_steps": 2436, "loss": 0.807994544506073, "lr": 9.156896300422053e-07, "epoch": 2.4729064039408866, "percentage": 82.43, "elapsed_time": "0:38:44", "remaining_time": "0:08:15"} +{"current_steps": 2009, "total_steps": 2436, "loss": 0.17016081511974335, "lr": 9.115602166616805e-07, "epoch": 2.4741379310344827, "percentage": 82.47, "elapsed_time": "0:38:45", "remaining_time": "0:08:14"} +{"current_steps": 2010, "total_steps": 2436, "loss": 0.7831156849861145, "lr": 9.07439201287088e-07, "epoch": 2.4753694581280787, "percentage": 82.51, "elapsed_time": "0:38:46", "remaining_time": "0:08:13"} +{"current_steps": 2011, "total_steps": 2436, "loss": 0.5146660804748535, "lr": 9.033265923833446e-07, "epoch": 2.476600985221675, "percentage": 82.55, "elapsed_time": "0:38:47", "remaining_time": "0:08:11"} +{"current_steps": 2012, "total_steps": 2436, "loss": 0.5641926527023315, "lr": 8.992223983981035e-07, "epoch": 2.477832512315271, "percentage": 82.59, "elapsed_time": "0:38:48", "remaining_time": "0:08:10"} +{"current_steps": 2013, "total_steps": 2436, "loss": 0.2155514359474182, "lr": 8.951266277617326e-07, "epoch": 2.479064039408867, "percentage": 82.64, "elapsed_time": "0:38:49", "remaining_time": "0:08:09"} +{"current_steps": 2014, "total_steps": 2436, "loss": 0.28125351667404175, "lr": 8.91039288887292e-07, "epoch": 2.480295566502463, "percentage": 82.68, "elapsed_time": "0:38:50", "remaining_time": "0:08:08"} +{"current_steps": 2015, "total_steps": 2436, "loss": 0.5349509716033936, "lr": 8.869603901705287e-07, "epoch": 2.481527093596059, "percentage": 82.72, "elapsed_time": "0:38:51", "remaining_time": "0:08:07"} +{"current_steps": 2016, "total_steps": 2436, "loss": 0.43747422099113464, "lr": 8.82889939989851e-07, "epoch": 2.4827586206896552, "percentage": 82.76, "elapsed_time": "0:38:52", "remaining_time": "0:08:06"} +{"current_steps": 2017, "total_steps": 2436, "loss": 0.4629102647304535, "lr": 8.78827946706311e-07, "epoch": 2.4839901477832513, "percentage": 82.8, "elapsed_time": "0:38:53", "remaining_time": "0:08:04"} +{"current_steps": 2018, "total_steps": 2436, "loss": 0.41271477937698364, "lr": 8.747744186635932e-07, "epoch": 2.4852216748768474, "percentage": 82.84, "elapsed_time": "0:38:55", "remaining_time": "0:08:03"} +{"current_steps": 2019, "total_steps": 2436, "loss": 0.27247580885887146, "lr": 8.707293641879888e-07, "epoch": 2.4864532019704435, "percentage": 82.88, "elapsed_time": "0:38:56", "remaining_time": "0:08:02"} +{"current_steps": 2020, "total_steps": 2436, "loss": 1.4255273342132568, "lr": 8.666927915883905e-07, "epoch": 2.4876847290640396, "percentage": 82.92, "elapsed_time": "0:38:57", "remaining_time": "0:08:01"} +{"current_steps": 2021, "total_steps": 2436, "loss": 0.8762021660804749, "lr": 8.626647091562612e-07, "epoch": 2.4889162561576352, "percentage": 82.96, "elapsed_time": "0:38:58", "remaining_time": "0:08:00"} +{"current_steps": 2022, "total_steps": 2436, "loss": 0.43475109338760376, "lr": 8.586451251656286e-07, "epoch": 2.4901477832512313, "percentage": 83.0, "elapsed_time": "0:38:59", "remaining_time": "0:07:58"} +{"current_steps": 2023, "total_steps": 2436, "loss": 0.16091346740722656, "lr": 8.546340478730647e-07, "epoch": 2.4913793103448274, "percentage": 83.05, "elapsed_time": "0:39:00", "remaining_time": "0:07:57"} +{"current_steps": 2024, "total_steps": 2436, "loss": 0.491144061088562, "lr": 8.506314855176651e-07, "epoch": 2.4926108374384235, "percentage": 83.09, "elapsed_time": "0:39:01", "remaining_time": "0:07:56"} +{"current_steps": 2025, "total_steps": 2436, "loss": 0.792976438999176, "lr": 8.466374463210348e-07, "epoch": 2.4938423645320196, "percentage": 83.13, "elapsed_time": "0:39:02", "remaining_time": "0:07:55"} +{"current_steps": 2026, "total_steps": 2436, "loss": 0.8023815155029297, "lr": 8.426519384872733e-07, "epoch": 2.4950738916256157, "percentage": 83.17, "elapsed_time": "0:39:03", "remaining_time": "0:07:54"} +{"current_steps": 2027, "total_steps": 2436, "loss": 0.7008549571037292, "lr": 8.386749702029578e-07, "epoch": 2.4963054187192117, "percentage": 83.21, "elapsed_time": "0:39:04", "remaining_time": "0:07:53"} +{"current_steps": 2028, "total_steps": 2436, "loss": 0.3158326745033264, "lr": 8.347065496371193e-07, "epoch": 2.497536945812808, "percentage": 83.25, "elapsed_time": "0:39:05", "remaining_time": "0:07:51"} +{"current_steps": 2029, "total_steps": 2436, "loss": 0.4847475588321686, "lr": 8.307466849412365e-07, "epoch": 2.498768472906404, "percentage": 83.29, "elapsed_time": "0:39:06", "remaining_time": "0:07:50"} +{"current_steps": 2030, "total_steps": 2436, "loss": 0.42490729689598083, "lr": 8.2679538424921e-07, "epoch": 2.5, "percentage": 83.33, "elapsed_time": "0:39:08", "remaining_time": "0:07:49"} +{"current_steps": 2031, "total_steps": 2436, "loss": 0.4303053021430969, "lr": 8.228526556773486e-07, "epoch": 2.501231527093596, "percentage": 83.37, "elapsed_time": "0:39:09", "remaining_time": "0:07:48"} +{"current_steps": 2032, "total_steps": 2436, "loss": 0.20669305324554443, "lr": 8.18918507324356e-07, "epoch": 2.502463054187192, "percentage": 83.42, "elapsed_time": "0:39:10", "remaining_time": "0:07:47"} +{"current_steps": 2033, "total_steps": 2436, "loss": 0.4146193265914917, "lr": 8.149929472713126e-07, "epoch": 2.5036945812807883, "percentage": 83.46, "elapsed_time": "0:39:11", "remaining_time": "0:07:46"} +{"current_steps": 2034, "total_steps": 2436, "loss": 0.2852465510368347, "lr": 8.110759835816518e-07, "epoch": 2.5049261083743843, "percentage": 83.5, "elapsed_time": "0:39:12", "remaining_time": "0:07:44"} +{"current_steps": 2035, "total_steps": 2436, "loss": 0.5811144113540649, "lr": 8.071676243011556e-07, "epoch": 2.5061576354679804, "percentage": 83.54, "elapsed_time": "0:39:13", "remaining_time": "0:07:43"} +{"current_steps": 2036, "total_steps": 2436, "loss": 0.6767745614051819, "lr": 8.032678774579272e-07, "epoch": 2.5073891625615765, "percentage": 83.58, "elapsed_time": "0:39:14", "remaining_time": "0:07:42"} +{"current_steps": 2037, "total_steps": 2436, "loss": 0.5063849687576294, "lr": 7.993767510623834e-07, "epoch": 2.5086206896551726, "percentage": 83.62, "elapsed_time": "0:39:15", "remaining_time": "0:07:41"} +{"current_steps": 2038, "total_steps": 2436, "loss": 0.534786581993103, "lr": 7.954942531072285e-07, "epoch": 2.5098522167487687, "percentage": 83.66, "elapsed_time": "0:39:16", "remaining_time": "0:07:40"} +{"current_steps": 2039, "total_steps": 2436, "loss": 0.45122361183166504, "lr": 7.91620391567448e-07, "epoch": 2.5110837438423648, "percentage": 83.7, "elapsed_time": "0:39:17", "remaining_time": "0:07:39"} +{"current_steps": 2040, "total_steps": 2436, "loss": 0.2832280099391937, "lr": 7.877551744002881e-07, "epoch": 2.512315270935961, "percentage": 83.74, "elapsed_time": "0:39:18", "remaining_time": "0:07:37"} +{"current_steps": 2041, "total_steps": 2436, "loss": 0.8926963806152344, "lr": 7.838986095452311e-07, "epoch": 2.5135467980295565, "percentage": 83.78, "elapsed_time": "0:39:19", "remaining_time": "0:07:36"} +{"current_steps": 2042, "total_steps": 2436, "loss": 0.9263632893562317, "lr": 7.800507049239947e-07, "epoch": 2.5147783251231526, "percentage": 83.83, "elapsed_time": "0:39:20", "remaining_time": "0:07:35"} +{"current_steps": 2043, "total_steps": 2436, "loss": 0.3994196653366089, "lr": 7.762114684405064e-07, "epoch": 2.5160098522167487, "percentage": 83.87, "elapsed_time": "0:39:22", "remaining_time": "0:07:34"} +{"current_steps": 2044, "total_steps": 2436, "loss": 0.3273079991340637, "lr": 7.723809079808842e-07, "epoch": 2.5172413793103448, "percentage": 83.91, "elapsed_time": "0:39:23", "remaining_time": "0:07:33"} +{"current_steps": 2045, "total_steps": 2436, "loss": 0.4566258192062378, "lr": 7.685590314134294e-07, "epoch": 2.518472906403941, "percentage": 83.95, "elapsed_time": "0:39:24", "remaining_time": "0:07:32"} +{"current_steps": 2046, "total_steps": 2436, "loss": 0.4199177026748657, "lr": 7.647458465886055e-07, "epoch": 2.519704433497537, "percentage": 83.99, "elapsed_time": "0:39:25", "remaining_time": "0:07:30"} +{"current_steps": 2047, "total_steps": 2436, "loss": 0.2789694666862488, "lr": 7.609413613390199e-07, "epoch": 2.520935960591133, "percentage": 84.03, "elapsed_time": "0:39:26", "remaining_time": "0:07:29"} +{"current_steps": 2048, "total_steps": 2436, "loss": 0.39359426498413086, "lr": 7.571455834794095e-07, "epoch": 2.522167487684729, "percentage": 84.07, "elapsed_time": "0:39:27", "remaining_time": "0:07:28"} +{"current_steps": 2049, "total_steps": 2436, "loss": 0.38510677218437195, "lr": 7.533585208066302e-07, "epoch": 2.523399014778325, "percentage": 84.11, "elapsed_time": "0:39:28", "remaining_time": "0:07:27"} +{"current_steps": 2050, "total_steps": 2436, "loss": 1.0861276388168335, "lr": 7.495801810996334e-07, "epoch": 2.5246305418719213, "percentage": 84.15, "elapsed_time": "0:39:29", "remaining_time": "0:07:26"} +{"current_steps": 2051, "total_steps": 2436, "loss": 0.35866010189056396, "lr": 7.458105721194525e-07, "epoch": 2.5258620689655173, "percentage": 84.2, "elapsed_time": "0:39:30", "remaining_time": "0:07:25"} +{"current_steps": 2052, "total_steps": 2436, "loss": 0.3436219394207001, "lr": 7.420497016091866e-07, "epoch": 2.5270935960591134, "percentage": 84.24, "elapsed_time": "0:39:31", "remaining_time": "0:07:23"} +{"current_steps": 2053, "total_steps": 2436, "loss": 0.3687105178833008, "lr": 7.382975772939866e-07, "epoch": 2.5283251231527095, "percentage": 84.28, "elapsed_time": "0:39:32", "remaining_time": "0:07:22"} +{"current_steps": 2054, "total_steps": 2436, "loss": 0.32671070098876953, "lr": 7.34554206881039e-07, "epoch": 2.529556650246305, "percentage": 84.32, "elapsed_time": "0:39:33", "remaining_time": "0:07:21"} +{"current_steps": 2055, "total_steps": 2436, "loss": 0.7302184104919434, "lr": 7.308195980595462e-07, "epoch": 2.5307881773399012, "percentage": 84.36, "elapsed_time": "0:39:35", "remaining_time": "0:07:20"} +{"current_steps": 2056, "total_steps": 2436, "loss": 0.7430564761161804, "lr": 7.270937585007149e-07, "epoch": 2.5320197044334973, "percentage": 84.4, "elapsed_time": "0:39:36", "remaining_time": "0:07:19"} +{"current_steps": 2057, "total_steps": 2436, "loss": 0.305151104927063, "lr": 7.233766958577421e-07, "epoch": 2.5332512315270934, "percentage": 84.44, "elapsed_time": "0:39:37", "remaining_time": "0:07:17"} +{"current_steps": 2058, "total_steps": 2436, "loss": 0.4311235547065735, "lr": 7.196684177657887e-07, "epoch": 2.5344827586206895, "percentage": 84.48, "elapsed_time": "0:39:38", "remaining_time": "0:07:16"} +{"current_steps": 2059, "total_steps": 2436, "loss": 0.29697108268737793, "lr": 7.159689318419777e-07, "epoch": 2.5357142857142856, "percentage": 84.52, "elapsed_time": "0:39:39", "remaining_time": "0:07:15"} +{"current_steps": 2060, "total_steps": 2436, "loss": 0.5012999176979065, "lr": 7.122782456853722e-07, "epoch": 2.5369458128078817, "percentage": 84.56, "elapsed_time": "0:39:40", "remaining_time": "0:07:14"} +{"current_steps": 2061, "total_steps": 2436, "loss": 0.24754227697849274, "lr": 7.085963668769552e-07, "epoch": 2.5381773399014778, "percentage": 84.61, "elapsed_time": "0:39:41", "remaining_time": "0:07:13"} +{"current_steps": 2062, "total_steps": 2436, "loss": 0.1311894953250885, "lr": 7.049233029796243e-07, "epoch": 2.539408866995074, "percentage": 84.65, "elapsed_time": "0:39:42", "remaining_time": "0:07:12"} +{"current_steps": 2063, "total_steps": 2436, "loss": 0.3458009958267212, "lr": 7.012590615381654e-07, "epoch": 2.54064039408867, "percentage": 84.69, "elapsed_time": "0:39:43", "remaining_time": "0:07:10"} +{"current_steps": 2064, "total_steps": 2436, "loss": 0.6216360330581665, "lr": 6.976036500792466e-07, "epoch": 2.541871921182266, "percentage": 84.73, "elapsed_time": "0:39:44", "remaining_time": "0:07:09"} +{"current_steps": 2065, "total_steps": 2436, "loss": 0.41114604473114014, "lr": 6.939570761113939e-07, "epoch": 2.543103448275862, "percentage": 84.77, "elapsed_time": "0:39:45", "remaining_time": "0:07:08"} +{"current_steps": 2066, "total_steps": 2436, "loss": 0.35362619161605835, "lr": 6.903193471249853e-07, "epoch": 2.544334975369458, "percentage": 84.81, "elapsed_time": "0:39:46", "remaining_time": "0:07:07"} +{"current_steps": 2067, "total_steps": 2436, "loss": 1.7280857563018799, "lr": 6.866904705922284e-07, "epoch": 2.5455665024630543, "percentage": 84.85, "elapsed_time": "0:39:47", "remaining_time": "0:07:06"} +{"current_steps": 2068, "total_steps": 2436, "loss": 1.3645777702331543, "lr": 6.830704539671462e-07, "epoch": 2.5467980295566504, "percentage": 84.89, "elapsed_time": "0:39:49", "remaining_time": "0:07:05"} +{"current_steps": 2069, "total_steps": 2436, "loss": 0.46488872170448303, "lr": 6.794593046855613e-07, "epoch": 2.5480295566502464, "percentage": 84.93, "elapsed_time": "0:39:50", "remaining_time": "0:07:03"} +{"current_steps": 2070, "total_steps": 2436, "loss": 0.9913250803947449, "lr": 6.758570301650869e-07, "epoch": 2.5492610837438425, "percentage": 84.98, "elapsed_time": "0:39:51", "remaining_time": "0:07:02"} +{"current_steps": 2071, "total_steps": 2436, "loss": 0.8180273771286011, "lr": 6.722636378051011e-07, "epoch": 2.5504926108374386, "percentage": 85.02, "elapsed_time": "0:39:52", "remaining_time": "0:07:01"} +{"current_steps": 2072, "total_steps": 2436, "loss": 0.5234679579734802, "lr": 6.686791349867422e-07, "epoch": 2.5517241379310347, "percentage": 85.06, "elapsed_time": "0:39:53", "remaining_time": "0:07:00"} +{"current_steps": 2073, "total_steps": 2436, "loss": 0.08975313603878021, "lr": 6.651035290728858e-07, "epoch": 2.552955665024631, "percentage": 85.1, "elapsed_time": "0:39:54", "remaining_time": "0:06:59"} +{"current_steps": 2074, "total_steps": 2436, "loss": 0.35545456409454346, "lr": 6.615368274081335e-07, "epoch": 2.554187192118227, "percentage": 85.14, "elapsed_time": "0:39:55", "remaining_time": "0:06:58"} +{"current_steps": 2075, "total_steps": 2436, "loss": 1.192006230354309, "lr": 6.579790373187944e-07, "epoch": 2.5554187192118225, "percentage": 85.18, "elapsed_time": "0:39:56", "remaining_time": "0:06:56"} +{"current_steps": 2076, "total_steps": 2436, "loss": 0.35069915652275085, "lr": 6.54430166112876e-07, "epoch": 2.5566502463054186, "percentage": 85.22, "elapsed_time": "0:39:57", "remaining_time": "0:06:55"} +{"current_steps": 2077, "total_steps": 2436, "loss": 0.20691820979118347, "lr": 6.508902210800649e-07, "epoch": 2.5578817733990147, "percentage": 85.26, "elapsed_time": "0:39:58", "remaining_time": "0:06:54"} +{"current_steps": 2078, "total_steps": 2436, "loss": 0.4561042785644531, "lr": 6.473592094917092e-07, "epoch": 2.5591133004926108, "percentage": 85.3, "elapsed_time": "0:39:59", "remaining_time": "0:06:53"} +{"current_steps": 2079, "total_steps": 2436, "loss": 0.32198822498321533, "lr": 6.43837138600813e-07, "epoch": 2.560344827586207, "percentage": 85.34, "elapsed_time": "0:40:00", "remaining_time": "0:06:52"} +{"current_steps": 2080, "total_steps": 2436, "loss": 0.35681653022766113, "lr": 6.403240156420087e-07, "epoch": 2.561576354679803, "percentage": 85.39, "elapsed_time": "0:40:01", "remaining_time": "0:06:51"} +{"current_steps": 2081, "total_steps": 2436, "loss": 0.5826268196105957, "lr": 6.36819847831554e-07, "epoch": 2.562807881773399, "percentage": 85.43, "elapsed_time": "0:40:03", "remaining_time": "0:06:49"} +{"current_steps": 2082, "total_steps": 2436, "loss": 0.23084279894828796, "lr": 6.333246423673096e-07, "epoch": 2.564039408866995, "percentage": 85.47, "elapsed_time": "0:40:04", "remaining_time": "0:06:48"} +{"current_steps": 2083, "total_steps": 2436, "loss": 0.5527750253677368, "lr": 6.298384064287261e-07, "epoch": 2.565270935960591, "percentage": 85.51, "elapsed_time": "0:40:05", "remaining_time": "0:06:47"} +{"current_steps": 2084, "total_steps": 2436, "loss": 0.4125085175037384, "lr": 6.263611471768349e-07, "epoch": 2.5665024630541873, "percentage": 85.55, "elapsed_time": "0:40:06", "remaining_time": "0:06:46"} +{"current_steps": 2085, "total_steps": 2436, "loss": 0.37431174516677856, "lr": 6.228928717542205e-07, "epoch": 2.5677339901477834, "percentage": 85.59, "elapsed_time": "0:40:07", "remaining_time": "0:06:45"} +{"current_steps": 2086, "total_steps": 2436, "loss": 0.17119471728801727, "lr": 6.194335872850188e-07, "epoch": 2.5689655172413794, "percentage": 85.63, "elapsed_time": "0:40:08", "remaining_time": "0:06:44"} +{"current_steps": 2087, "total_steps": 2436, "loss": 0.9465748071670532, "lr": 6.159833008748988e-07, "epoch": 2.5701970443349755, "percentage": 85.67, "elapsed_time": "0:40:09", "remaining_time": "0:06:42"} +{"current_steps": 2088, "total_steps": 2436, "loss": 0.48980847001075745, "lr": 6.125420196110426e-07, "epoch": 2.571428571428571, "percentage": 85.71, "elapsed_time": "0:40:10", "remaining_time": "0:06:41"} +{"current_steps": 2089, "total_steps": 2436, "loss": 0.7195557951927185, "lr": 6.091097505621374e-07, "epoch": 2.5726600985221673, "percentage": 85.76, "elapsed_time": "0:40:11", "remaining_time": "0:06:40"} +{"current_steps": 2090, "total_steps": 2436, "loss": 1.83125638961792, "lr": 6.056865007783602e-07, "epoch": 2.5738916256157633, "percentage": 85.8, "elapsed_time": "0:40:12", "remaining_time": "0:06:39"} +{"current_steps": 2091, "total_steps": 2436, "loss": 0.3298517167568207, "lr": 6.022722772913581e-07, "epoch": 2.5751231527093594, "percentage": 85.84, "elapsed_time": "0:40:13", "remaining_time": "0:06:38"} +{"current_steps": 2092, "total_steps": 2436, "loss": 0.47125905752182007, "lr": 5.988670871142377e-07, "epoch": 2.5763546798029555, "percentage": 85.88, "elapsed_time": "0:40:14", "remaining_time": "0:06:37"} +{"current_steps": 2093, "total_steps": 2436, "loss": 0.288496196269989, "lr": 5.954709372415524e-07, "epoch": 2.5775862068965516, "percentage": 85.92, "elapsed_time": "0:40:15", "remaining_time": "0:06:35"} +{"current_steps": 2094, "total_steps": 2436, "loss": 0.3627285957336426, "lr": 5.920838346492874e-07, "epoch": 2.5788177339901477, "percentage": 85.96, "elapsed_time": "0:40:17", "remaining_time": "0:06:34"} +{"current_steps": 2095, "total_steps": 2436, "loss": 0.7072806358337402, "lr": 5.887057862948403e-07, "epoch": 2.5800492610837438, "percentage": 86.0, "elapsed_time": "0:40:18", "remaining_time": "0:06:33"} +{"current_steps": 2096, "total_steps": 2436, "loss": 0.3386034071445465, "lr": 5.853367991170106e-07, "epoch": 2.58128078817734, "percentage": 86.04, "elapsed_time": "0:40:19", "remaining_time": "0:06:32"} +{"current_steps": 2097, "total_steps": 2436, "loss": 0.4901737570762634, "lr": 5.819768800359882e-07, "epoch": 2.582512315270936, "percentage": 86.08, "elapsed_time": "0:40:20", "remaining_time": "0:06:31"} +{"current_steps": 2098, "total_steps": 2436, "loss": 1.683629035949707, "lr": 5.786260359533369e-07, "epoch": 2.583743842364532, "percentage": 86.12, "elapsed_time": "0:40:21", "remaining_time": "0:06:30"} +{"current_steps": 2099, "total_steps": 2436, "loss": 0.4275779128074646, "lr": 5.752842737519743e-07, "epoch": 2.584975369458128, "percentage": 86.17, "elapsed_time": "0:40:22", "remaining_time": "0:06:28"} +{"current_steps": 2100, "total_steps": 2436, "loss": 0.6892256736755371, "lr": 5.7195160029617e-07, "epoch": 2.586206896551724, "percentage": 86.21, "elapsed_time": "0:40:23", "remaining_time": "0:06:27"} +{"current_steps": 2101, "total_steps": 2436, "loss": 0.6548988819122314, "lr": 5.686280224315189e-07, "epoch": 2.5874384236453203, "percentage": 86.25, "elapsed_time": "0:40:24", "remaining_time": "0:06:26"} +{"current_steps": 2102, "total_steps": 2436, "loss": 0.4431142807006836, "lr": 5.653135469849347e-07, "epoch": 2.5886699507389164, "percentage": 86.29, "elapsed_time": "0:40:25", "remaining_time": "0:06:25"} +{"current_steps": 2103, "total_steps": 2436, "loss": 0.5730191469192505, "lr": 5.62008180764635e-07, "epoch": 2.5899014778325125, "percentage": 86.33, "elapsed_time": "0:40:26", "remaining_time": "0:06:24"} +{"current_steps": 2104, "total_steps": 2436, "loss": 0.8734421730041504, "lr": 5.587119305601263e-07, "epoch": 2.5911330049261085, "percentage": 86.37, "elapsed_time": "0:40:27", "remaining_time": "0:06:23"} +{"current_steps": 2105, "total_steps": 2436, "loss": 0.30810514092445374, "lr": 5.554248031421872e-07, "epoch": 2.5923645320197046, "percentage": 86.41, "elapsed_time": "0:40:29", "remaining_time": "0:06:21"} +{"current_steps": 2106, "total_steps": 2436, "loss": 0.5941227078437805, "lr": 5.521468052628615e-07, "epoch": 2.5935960591133007, "percentage": 86.45, "elapsed_time": "0:40:30", "remaining_time": "0:06:20"} +{"current_steps": 2107, "total_steps": 2436, "loss": 0.32648181915283203, "lr": 5.488779436554359e-07, "epoch": 2.594827586206897, "percentage": 86.49, "elapsed_time": "0:40:31", "remaining_time": "0:06:19"} +{"current_steps": 2108, "total_steps": 2436, "loss": 0.2934610843658447, "lr": 5.456182250344349e-07, "epoch": 2.596059113300493, "percentage": 86.54, "elapsed_time": "0:40:32", "remaining_time": "0:06:18"} +{"current_steps": 2109, "total_steps": 2436, "loss": 0.20387941598892212, "lr": 5.423676560955976e-07, "epoch": 2.5972906403940885, "percentage": 86.58, "elapsed_time": "0:40:33", "remaining_time": "0:06:17"} +{"current_steps": 2110, "total_steps": 2436, "loss": 0.6115235090255737, "lr": 5.391262435158722e-07, "epoch": 2.5985221674876846, "percentage": 86.62, "elapsed_time": "0:40:34", "remaining_time": "0:06:16"} +{"current_steps": 2111, "total_steps": 2436, "loss": 0.45280611515045166, "lr": 5.358939939534002e-07, "epoch": 2.5997536945812807, "percentage": 86.66, "elapsed_time": "0:40:35", "remaining_time": "0:06:14"} +{"current_steps": 2112, "total_steps": 2436, "loss": 0.29169538617134094, "lr": 5.326709140474962e-07, "epoch": 2.600985221674877, "percentage": 86.7, "elapsed_time": "0:40:36", "remaining_time": "0:06:13"} +{"current_steps": 2113, "total_steps": 2436, "loss": 0.4924798011779785, "lr": 5.294570104186436e-07, "epoch": 2.602216748768473, "percentage": 86.74, "elapsed_time": "0:40:37", "remaining_time": "0:06:12"} +{"current_steps": 2114, "total_steps": 2436, "loss": 0.6751348376274109, "lr": 5.262522896684774e-07, "epoch": 2.603448275862069, "percentage": 86.78, "elapsed_time": "0:40:38", "remaining_time": "0:06:11"} +{"current_steps": 2115, "total_steps": 2436, "loss": 0.6676002740859985, "lr": 5.230567583797674e-07, "epoch": 2.604679802955665, "percentage": 86.82, "elapsed_time": "0:40:39", "remaining_time": "0:06:10"} +{"current_steps": 2116, "total_steps": 2436, "loss": 0.3112475275993347, "lr": 5.198704231164093e-07, "epoch": 2.605911330049261, "percentage": 86.86, "elapsed_time": "0:40:40", "remaining_time": "0:06:09"} +{"current_steps": 2117, "total_steps": 2436, "loss": 0.5024739503860474, "lr": 5.166932904234101e-07, "epoch": 2.607142857142857, "percentage": 86.9, "elapsed_time": "0:40:41", "remaining_time": "0:06:07"} +{"current_steps": 2118, "total_steps": 2436, "loss": 2.6769824028015137, "lr": 5.135253668268724e-07, "epoch": 2.6083743842364533, "percentage": 86.95, "elapsed_time": "0:40:43", "remaining_time": "0:06:06"} +{"current_steps": 2119, "total_steps": 2436, "loss": 0.4120222330093384, "lr": 5.103666588339812e-07, "epoch": 2.6096059113300494, "percentage": 86.99, "elapsed_time": "0:40:44", "remaining_time": "0:06:05"} +{"current_steps": 2120, "total_steps": 2436, "loss": 0.3238741457462311, "lr": 5.072171729329944e-07, "epoch": 2.6108374384236455, "percentage": 87.03, "elapsed_time": "0:40:45", "remaining_time": "0:06:04"} +{"current_steps": 2121, "total_steps": 2436, "loss": 0.41853106021881104, "lr": 5.040769155932285e-07, "epoch": 2.612068965517241, "percentage": 87.07, "elapsed_time": "0:40:46", "remaining_time": "0:06:03"} +{"current_steps": 2122, "total_steps": 2436, "loss": 0.5511228442192078, "lr": 5.00945893265039e-07, "epoch": 2.613300492610837, "percentage": 87.11, "elapsed_time": "0:40:47", "remaining_time": "0:06:02"} +{"current_steps": 2123, "total_steps": 2436, "loss": 0.6076939105987549, "lr": 4.978241123798133e-07, "epoch": 2.6145320197044333, "percentage": 87.15, "elapsed_time": "0:40:48", "remaining_time": "0:06:00"} +{"current_steps": 2124, "total_steps": 2436, "loss": 0.32137832045555115, "lr": 4.94711579349959e-07, "epoch": 2.6157635467980294, "percentage": 87.19, "elapsed_time": "0:40:49", "remaining_time": "0:05:59"} +{"current_steps": 2125, "total_steps": 2436, "loss": 0.2919730246067047, "lr": 4.916083005688865e-07, "epoch": 2.6169950738916254, "percentage": 87.23, "elapsed_time": "0:40:50", "remaining_time": "0:05:58"} +{"current_steps": 2126, "total_steps": 2436, "loss": 0.3521897792816162, "lr": 4.885142824109946e-07, "epoch": 2.6182266009852215, "percentage": 87.27, "elapsed_time": "0:40:51", "remaining_time": "0:05:57"} +{"current_steps": 2127, "total_steps": 2436, "loss": 0.5645777583122253, "lr": 4.85429531231662e-07, "epoch": 2.6194581280788176, "percentage": 87.32, "elapsed_time": "0:40:52", "remaining_time": "0:05:56"} +{"current_steps": 2128, "total_steps": 2436, "loss": 0.21364668011665344, "lr": 4.823540533672355e-07, "epoch": 2.6206896551724137, "percentage": 87.36, "elapsed_time": "0:40:53", "remaining_time": "0:05:55"} +{"current_steps": 2129, "total_steps": 2436, "loss": 0.3472633957862854, "lr": 4.792878551350055e-07, "epoch": 2.62192118226601, "percentage": 87.4, "elapsed_time": "0:40:54", "remaining_time": "0:05:53"} +{"current_steps": 2130, "total_steps": 2436, "loss": 0.2312706857919693, "lr": 4.7623094283320905e-07, "epoch": 2.623152709359606, "percentage": 87.44, "elapsed_time": "0:40:55", "remaining_time": "0:05:52"} +{"current_steps": 2131, "total_steps": 2436, "loss": 0.4227292835712433, "lr": 4.7318332274100595e-07, "epoch": 2.624384236453202, "percentage": 87.48, "elapsed_time": "0:40:57", "remaining_time": "0:05:51"} +{"current_steps": 2132, "total_steps": 2436, "loss": 0.4835679531097412, "lr": 4.701450011184677e-07, "epoch": 2.625615763546798, "percentage": 87.52, "elapsed_time": "0:40:58", "remaining_time": "0:05:50"} +{"current_steps": 2133, "total_steps": 2436, "loss": 0.30153489112854004, "lr": 4.671159842065698e-07, "epoch": 2.626847290640394, "percentage": 87.56, "elapsed_time": "0:40:59", "remaining_time": "0:05:49"} +{"current_steps": 2134, "total_steps": 2436, "loss": 0.19820570945739746, "lr": 4.640962782271707e-07, "epoch": 2.62807881773399, "percentage": 87.6, "elapsed_time": "0:41:00", "remaining_time": "0:05:48"} +{"current_steps": 2135, "total_steps": 2436, "loss": 0.5798308253288269, "lr": 4.6108588938300725e-07, "epoch": 2.6293103448275863, "percentage": 87.64, "elapsed_time": "0:41:01", "remaining_time": "0:05:47"} +{"current_steps": 2136, "total_steps": 2436, "loss": 0.4840395450592041, "lr": 4.5808482385767407e-07, "epoch": 2.6305418719211824, "percentage": 87.68, "elapsed_time": "0:41:02", "remaining_time": "0:05:45"} +{"current_steps": 2137, "total_steps": 2436, "loss": 0.33036884665489197, "lr": 4.5509308781561846e-07, "epoch": 2.6317733990147785, "percentage": 87.73, "elapsed_time": "0:41:03", "remaining_time": "0:05:44"} +{"current_steps": 2138, "total_steps": 2436, "loss": 0.4032250642776489, "lr": 4.521106874021242e-07, "epoch": 2.6330049261083746, "percentage": 87.77, "elapsed_time": "0:41:04", "remaining_time": "0:05:43"} +{"current_steps": 2139, "total_steps": 2436, "loss": 0.5196541547775269, "lr": 4.4913762874329527e-07, "epoch": 2.6342364532019706, "percentage": 87.81, "elapsed_time": "0:41:05", "remaining_time": "0:05:42"} +{"current_steps": 2140, "total_steps": 2436, "loss": 0.5049697160720825, "lr": 4.4617391794604946e-07, "epoch": 2.6354679802955667, "percentage": 87.85, "elapsed_time": "0:41:07", "remaining_time": "0:05:41"} +{"current_steps": 2141, "total_steps": 2436, "loss": 0.6910302639007568, "lr": 4.4321956109810327e-07, "epoch": 2.636699507389163, "percentage": 87.89, "elapsed_time": "0:41:08", "remaining_time": "0:05:40"} +{"current_steps": 2142, "total_steps": 2436, "loss": 0.8860565423965454, "lr": 4.4027456426796014e-07, "epoch": 2.637931034482759, "percentage": 87.93, "elapsed_time": "0:41:09", "remaining_time": "0:05:38"} +{"current_steps": 2143, "total_steps": 2436, "loss": 0.3347795307636261, "lr": 4.3733893350489386e-07, "epoch": 2.6391625615763545, "percentage": 87.97, "elapsed_time": "0:41:10", "remaining_time": "0:05:37"} +{"current_steps": 2144, "total_steps": 2436, "loss": 0.5979218482971191, "lr": 4.344126748389438e-07, "epoch": 2.6403940886699506, "percentage": 88.01, "elapsed_time": "0:41:11", "remaining_time": "0:05:36"} +{"current_steps": 2145, "total_steps": 2436, "loss": 0.6724722385406494, "lr": 4.314957942808956e-07, "epoch": 2.6416256157635467, "percentage": 88.05, "elapsed_time": "0:41:12", "remaining_time": "0:05:35"} +{"current_steps": 2146, "total_steps": 2436, "loss": 0.23655423521995544, "lr": 4.2858829782227107e-07, "epoch": 2.642857142857143, "percentage": 88.1, "elapsed_time": "0:41:13", "remaining_time": "0:05:34"} +{"current_steps": 2147, "total_steps": 2436, "loss": 0.7535929679870605, "lr": 4.2569019143531845e-07, "epoch": 2.644088669950739, "percentage": 88.14, "elapsed_time": "0:41:14", "remaining_time": "0:05:33"} +{"current_steps": 2148, "total_steps": 2436, "loss": 0.5065590143203735, "lr": 4.228014810729963e-07, "epoch": 2.645320197044335, "percentage": 88.18, "elapsed_time": "0:41:15", "remaining_time": "0:05:31"} +{"current_steps": 2149, "total_steps": 2436, "loss": 0.8232078552246094, "lr": 4.199221726689634e-07, "epoch": 2.646551724137931, "percentage": 88.22, "elapsed_time": "0:41:16", "remaining_time": "0:05:30"} +{"current_steps": 2150, "total_steps": 2436, "loss": 0.3928985595703125, "lr": 4.170522721375669e-07, "epoch": 2.647783251231527, "percentage": 88.26, "elapsed_time": "0:41:18", "remaining_time": "0:05:29"} +{"current_steps": 2151, "total_steps": 2436, "loss": 0.6924771070480347, "lr": 4.1419178537382756e-07, "epoch": 2.649014778325123, "percentage": 88.3, "elapsed_time": "0:41:19", "remaining_time": "0:05:28"} +{"current_steps": 2152, "total_steps": 2436, "loss": 0.3323458135128021, "lr": 4.1134071825343124e-07, "epoch": 2.6502463054187193, "percentage": 88.34, "elapsed_time": "0:41:20", "remaining_time": "0:05:27"} +{"current_steps": 2153, "total_steps": 2436, "loss": 0.6068896651268005, "lr": 4.0849907663271346e-07, "epoch": 2.6514778325123154, "percentage": 88.38, "elapsed_time": "0:41:21", "remaining_time": "0:05:26"} +{"current_steps": 2154, "total_steps": 2436, "loss": 0.2112211287021637, "lr": 4.0566686634865016e-07, "epoch": 2.6527093596059115, "percentage": 88.42, "elapsed_time": "0:41:22", "remaining_time": "0:05:24"} +{"current_steps": 2155, "total_steps": 2436, "loss": 0.3340219259262085, "lr": 4.028440932188465e-07, "epoch": 2.653940886699507, "percentage": 88.46, "elapsed_time": "0:41:23", "remaining_time": "0:05:23"} +{"current_steps": 2156, "total_steps": 2436, "loss": 0.4172120690345764, "lr": 4.0003076304151624e-07, "epoch": 2.655172413793103, "percentage": 88.51, "elapsed_time": "0:41:24", "remaining_time": "0:05:22"} +{"current_steps": 2157, "total_steps": 2436, "loss": 0.3891775608062744, "lr": 3.972268815954833e-07, "epoch": 2.6564039408866993, "percentage": 88.55, "elapsed_time": "0:41:25", "remaining_time": "0:05:21"} +{"current_steps": 2158, "total_steps": 2436, "loss": 0.4906957149505615, "lr": 3.944324546401607e-07, "epoch": 2.6576354679802954, "percentage": 88.59, "elapsed_time": "0:41:26", "remaining_time": "0:05:20"} +{"current_steps": 2159, "total_steps": 2436, "loss": 0.8216167688369751, "lr": 3.916474879155402e-07, "epoch": 2.6588669950738915, "percentage": 88.63, "elapsed_time": "0:41:27", "remaining_time": "0:05:19"} +{"current_steps": 2160, "total_steps": 2436, "loss": 0.2030409872531891, "lr": 3.8887198714218255e-07, "epoch": 2.6600985221674875, "percentage": 88.67, "elapsed_time": "0:41:28", "remaining_time": "0:05:18"} +{"current_steps": 2161, "total_steps": 2436, "loss": 0.24565047025680542, "lr": 3.8610595802120564e-07, "epoch": 2.6613300492610836, "percentage": 88.71, "elapsed_time": "0:41:29", "remaining_time": "0:05:16"} +{"current_steps": 2162, "total_steps": 2436, "loss": 0.3111516833305359, "lr": 3.833494062342691e-07, "epoch": 2.6625615763546797, "percentage": 88.75, "elapsed_time": "0:41:31", "remaining_time": "0:05:15"} +{"current_steps": 2163, "total_steps": 2436, "loss": 0.32978883385658264, "lr": 3.8060233744356634e-07, "epoch": 2.663793103448276, "percentage": 88.79, "elapsed_time": "0:41:32", "remaining_time": "0:05:14"} +{"current_steps": 2164, "total_steps": 2436, "loss": 0.5468876361846924, "lr": 3.7786475729181314e-07, "epoch": 2.665024630541872, "percentage": 88.83, "elapsed_time": "0:41:33", "remaining_time": "0:05:13"} +{"current_steps": 2165, "total_steps": 2436, "loss": 0.25511908531188965, "lr": 3.751366714022342e-07, "epoch": 2.666256157635468, "percentage": 88.88, "elapsed_time": "0:41:34", "remaining_time": "0:05:12"} +{"current_steps": 2166, "total_steps": 2436, "loss": 0.9938629269599915, "lr": 3.724180853785514e-07, "epoch": 2.667487684729064, "percentage": 88.92, "elapsed_time": "0:41:35", "remaining_time": "0:05:11"} +{"current_steps": 2167, "total_steps": 2436, "loss": 0.4233144223690033, "lr": 3.6970900480497287e-07, "epoch": 2.66871921182266, "percentage": 88.96, "elapsed_time": "0:41:36", "remaining_time": "0:05:09"} +{"current_steps": 2168, "total_steps": 2436, "loss": 0.39373546838760376, "lr": 3.6700943524618284e-07, "epoch": 2.6699507389162562, "percentage": 89.0, "elapsed_time": "0:41:37", "remaining_time": "0:05:08"} +{"current_steps": 2169, "total_steps": 2436, "loss": 0.40346717834472656, "lr": 3.643193822473301e-07, "epoch": 2.6711822660098523, "percentage": 89.04, "elapsed_time": "0:41:38", "remaining_time": "0:05:07"} +{"current_steps": 2170, "total_steps": 2436, "loss": 0.35343194007873535, "lr": 3.616388513340124e-07, "epoch": 2.6724137931034484, "percentage": 89.08, "elapsed_time": "0:41:39", "remaining_time": "0:05:06"} +{"current_steps": 2171, "total_steps": 2436, "loss": 0.38300061225891113, "lr": 3.5896784801227046e-07, "epoch": 2.6736453201970445, "percentage": 89.12, "elapsed_time": "0:41:40", "remaining_time": "0:05:05"} +{"current_steps": 2172, "total_steps": 2436, "loss": 0.5319961905479431, "lr": 3.56306377768576e-07, "epoch": 2.6748768472906406, "percentage": 89.16, "elapsed_time": "0:41:41", "remaining_time": "0:05:04"} +{"current_steps": 2173, "total_steps": 2436, "loss": 0.45474281907081604, "lr": 3.5365444606981434e-07, "epoch": 2.6761083743842367, "percentage": 89.2, "elapsed_time": "0:41:42", "remaining_time": "0:05:02"} +{"current_steps": 2174, "total_steps": 2436, "loss": 0.41422080993652344, "lr": 3.5101205836328144e-07, "epoch": 2.6773399014778327, "percentage": 89.24, "elapsed_time": "0:41:43", "remaining_time": "0:05:01"} +{"current_steps": 2175, "total_steps": 2436, "loss": 0.5486617088317871, "lr": 3.4837922007667e-07, "epoch": 2.678571428571429, "percentage": 89.29, "elapsed_time": "0:41:44", "remaining_time": "0:05:00"} +{"current_steps": 2176, "total_steps": 2436, "loss": 0.27931463718414307, "lr": 3.4575593661805296e-07, "epoch": 2.6798029556650245, "percentage": 89.33, "elapsed_time": "0:41:46", "remaining_time": "0:04:59"} +{"current_steps": 2177, "total_steps": 2436, "loss": 0.45936134457588196, "lr": 3.4314221337588217e-07, "epoch": 2.6810344827586206, "percentage": 89.37, "elapsed_time": "0:41:47", "remaining_time": "0:04:58"} +{"current_steps": 2178, "total_steps": 2436, "loss": 0.5659298896789551, "lr": 3.405380557189669e-07, "epoch": 2.6822660098522166, "percentage": 89.41, "elapsed_time": "0:41:48", "remaining_time": "0:04:57"} +{"current_steps": 2179, "total_steps": 2436, "loss": 0.3952332139015198, "lr": 3.379434689964728e-07, "epoch": 2.6834975369458127, "percentage": 89.45, "elapsed_time": "0:41:49", "remaining_time": "0:04:55"} +{"current_steps": 2180, "total_steps": 2436, "loss": 0.36344432830810547, "lr": 3.3535845853790105e-07, "epoch": 2.684729064039409, "percentage": 89.49, "elapsed_time": "0:41:50", "remaining_time": "0:04:54"} +{"current_steps": 2181, "total_steps": 2436, "loss": 0.29526573419570923, "lr": 3.3278302965308593e-07, "epoch": 2.685960591133005, "percentage": 89.53, "elapsed_time": "0:41:51", "remaining_time": "0:04:53"} +{"current_steps": 2182, "total_steps": 2436, "loss": 0.35098952054977417, "lr": 3.3021718763218025e-07, "epoch": 2.687192118226601, "percentage": 89.57, "elapsed_time": "0:41:52", "remaining_time": "0:04:52"} +{"current_steps": 2183, "total_steps": 2436, "loss": 0.9407736659049988, "lr": 3.276609377456419e-07, "epoch": 2.688423645320197, "percentage": 89.61, "elapsed_time": "0:41:53", "remaining_time": "0:04:51"} +{"current_steps": 2184, "total_steps": 2436, "loss": 0.29226356744766235, "lr": 3.2511428524422793e-07, "epoch": 2.689655172413793, "percentage": 89.66, "elapsed_time": "0:41:54", "remaining_time": "0:04:50"} +{"current_steps": 2185, "total_steps": 2436, "loss": 0.78415846824646, "lr": 3.2257723535898177e-07, "epoch": 2.6908866995073892, "percentage": 89.7, "elapsed_time": "0:41:55", "remaining_time": "0:04:48"} +{"current_steps": 2186, "total_steps": 2436, "loss": 0.22600015997886658, "lr": 3.200497933012198e-07, "epoch": 2.6921182266009853, "percentage": 89.74, "elapsed_time": "0:41:56", "remaining_time": "0:04:47"} +{"current_steps": 2187, "total_steps": 2436, "loss": 0.3907809853553772, "lr": 3.1753196426252573e-07, "epoch": 2.6933497536945814, "percentage": 89.78, "elapsed_time": "0:41:57", "remaining_time": "0:04:46"} +{"current_steps": 2188, "total_steps": 2436, "loss": 0.7056915760040283, "lr": 3.150237534147366e-07, "epoch": 2.6945812807881775, "percentage": 89.82, "elapsed_time": "0:41:58", "remaining_time": "0:04:45"} +{"current_steps": 2189, "total_steps": 2436, "loss": 0.35921359062194824, "lr": 3.125251659099332e-07, "epoch": 2.695812807881773, "percentage": 89.86, "elapsed_time": "0:42:00", "remaining_time": "0:04:44"} +{"current_steps": 2190, "total_steps": 2436, "loss": 0.17715278267860413, "lr": 3.1003620688042636e-07, "epoch": 2.697044334975369, "percentage": 89.9, "elapsed_time": "0:42:01", "remaining_time": "0:04:43"} +{"current_steps": 2191, "total_steps": 2436, "loss": 0.20512376725673676, "lr": 3.0755688143875253e-07, "epoch": 2.6982758620689653, "percentage": 89.94, "elapsed_time": "0:42:02", "remaining_time": "0:04:42"} +{"current_steps": 2192, "total_steps": 2436, "loss": 0.38939356803894043, "lr": 3.050871946776596e-07, "epoch": 2.6995073891625614, "percentage": 89.98, "elapsed_time": "0:42:03", "remaining_time": "0:04:40"} +{"current_steps": 2193, "total_steps": 2436, "loss": 0.3292514681816101, "lr": 3.026271516700946e-07, "epoch": 2.7007389162561575, "percentage": 90.02, "elapsed_time": "0:42:04", "remaining_time": "0:04:39"} +{"current_steps": 2194, "total_steps": 2436, "loss": 0.2732661962509155, "lr": 3.0017675746919883e-07, "epoch": 2.7019704433497536, "percentage": 90.07, "elapsed_time": "0:42:05", "remaining_time": "0:04:38"} +{"current_steps": 2195, "total_steps": 2436, "loss": 0.3058941960334778, "lr": 2.9773601710828937e-07, "epoch": 2.7032019704433496, "percentage": 90.11, "elapsed_time": "0:42:06", "remaining_time": "0:04:37"} +{"current_steps": 2196, "total_steps": 2436, "loss": 0.7454397082328796, "lr": 2.953049356008586e-07, "epoch": 2.7044334975369457, "percentage": 90.15, "elapsed_time": "0:42:07", "remaining_time": "0:04:36"} +{"current_steps": 2197, "total_steps": 2436, "loss": 0.3679504692554474, "lr": 2.928835179405548e-07, "epoch": 2.705665024630542, "percentage": 90.19, "elapsed_time": "0:42:08", "remaining_time": "0:04:35"} +{"current_steps": 2198, "total_steps": 2436, "loss": 0.2241794466972351, "lr": 2.9047176910117824e-07, "epoch": 2.706896551724138, "percentage": 90.23, "elapsed_time": "0:42:09", "remaining_time": "0:04:33"} +{"current_steps": 2199, "total_steps": 2436, "loss": 0.19927407801151276, "lr": 2.8806969403666897e-07, "epoch": 2.708128078817734, "percentage": 90.27, "elapsed_time": "0:42:10", "remaining_time": "0:04:32"} +{"current_steps": 2200, "total_steps": 2436, "loss": 0.2808955907821655, "lr": 2.856772976810929e-07, "epoch": 2.70935960591133, "percentage": 90.31, "elapsed_time": "0:42:11", "remaining_time": "0:04:31"} +{"current_steps": 2201, "total_steps": 2436, "loss": 0.7279784083366394, "lr": 2.8329458494863846e-07, "epoch": 2.710591133004926, "percentage": 90.35, "elapsed_time": "0:42:12", "remaining_time": "0:04:30"} +{"current_steps": 2202, "total_steps": 2436, "loss": 0.47690945863723755, "lr": 2.809215607336024e-07, "epoch": 2.7118226600985222, "percentage": 90.39, "elapsed_time": "0:42:14", "remaining_time": "0:04:29"} +{"current_steps": 2203, "total_steps": 2436, "loss": 0.1997358649969101, "lr": 2.7855822991037895e-07, "epoch": 2.7130541871921183, "percentage": 90.44, "elapsed_time": "0:42:15", "remaining_time": "0:04:28"} +{"current_steps": 2204, "total_steps": 2436, "loss": 0.3269602954387665, "lr": 2.762045973334526e-07, "epoch": 2.7142857142857144, "percentage": 90.48, "elapsed_time": "0:42:16", "remaining_time": "0:04:26"} +{"current_steps": 2205, "total_steps": 2436, "loss": 0.5450934767723083, "lr": 2.738606678373873e-07, "epoch": 2.7155172413793105, "percentage": 90.52, "elapsed_time": "0:42:17", "remaining_time": "0:04:25"} +{"current_steps": 2206, "total_steps": 2436, "loss": 0.4732050895690918, "lr": 2.7152644623681503e-07, "epoch": 2.7167487684729066, "percentage": 90.56, "elapsed_time": "0:42:18", "remaining_time": "0:04:24"} +{"current_steps": 2207, "total_steps": 2436, "loss": 0.26588505506515503, "lr": 2.6920193732642594e-07, "epoch": 2.7179802955665027, "percentage": 90.6, "elapsed_time": "0:42:19", "remaining_time": "0:04:23"} +{"current_steps": 2208, "total_steps": 2436, "loss": 0.09280772507190704, "lr": 2.668871458809613e-07, "epoch": 2.7192118226600988, "percentage": 90.64, "elapsed_time": "0:42:20", "remaining_time": "0:04:22"} +{"current_steps": 2209, "total_steps": 2436, "loss": 0.3763241767883301, "lr": 2.6458207665520266e-07, "epoch": 2.720443349753695, "percentage": 90.68, "elapsed_time": "0:42:21", "remaining_time": "0:04:21"} +{"current_steps": 2210, "total_steps": 2436, "loss": 0.46730220317840576, "lr": 2.6228673438395804e-07, "epoch": 2.7216748768472905, "percentage": 90.72, "elapsed_time": "0:42:22", "remaining_time": "0:04:20"} +{"current_steps": 2211, "total_steps": 2436, "loss": 0.42677825689315796, "lr": 2.600011237820577e-07, "epoch": 2.7229064039408866, "percentage": 90.76, "elapsed_time": "0:42:23", "remaining_time": "0:04:18"} +{"current_steps": 2212, "total_steps": 2436, "loss": 0.4460552930831909, "lr": 2.577252495443422e-07, "epoch": 2.7241379310344827, "percentage": 90.8, "elapsed_time": "0:42:24", "remaining_time": "0:04:17"} +{"current_steps": 2213, "total_steps": 2436, "loss": 0.5031150579452515, "lr": 2.5545911634565266e-07, "epoch": 2.7253694581280787, "percentage": 90.85, "elapsed_time": "0:42:25", "remaining_time": "0:04:16"} +{"current_steps": 2214, "total_steps": 2436, "loss": 0.18559831380844116, "lr": 2.5320272884081955e-07, "epoch": 2.726600985221675, "percentage": 90.89, "elapsed_time": "0:42:26", "remaining_time": "0:04:15"} +{"current_steps": 2215, "total_steps": 2436, "loss": 0.2087395340204239, "lr": 2.5095609166465805e-07, "epoch": 2.727832512315271, "percentage": 90.93, "elapsed_time": "0:42:28", "remaining_time": "0:04:14"} +{"current_steps": 2216, "total_steps": 2436, "loss": 0.21503375470638275, "lr": 2.4871920943195404e-07, "epoch": 2.729064039408867, "percentage": 90.97, "elapsed_time": "0:42:29", "remaining_time": "0:04:13"} +{"current_steps": 2217, "total_steps": 2436, "loss": 0.20347240567207336, "lr": 2.4649208673745317e-07, "epoch": 2.730295566502463, "percentage": 91.01, "elapsed_time": "0:42:30", "remaining_time": "0:04:11"} +{"current_steps": 2218, "total_steps": 2436, "loss": 0.20019523799419403, "lr": 2.442747281558572e-07, "epoch": 2.731527093596059, "percentage": 91.05, "elapsed_time": "0:42:31", "remaining_time": "0:04:10"} +{"current_steps": 2219, "total_steps": 2436, "loss": 0.6672437191009521, "lr": 2.420671382418122e-07, "epoch": 2.7327586206896552, "percentage": 91.09, "elapsed_time": "0:42:32", "remaining_time": "0:04:09"} +{"current_steps": 2220, "total_steps": 2436, "loss": 0.28304070234298706, "lr": 2.398693215298953e-07, "epoch": 2.7339901477832513, "percentage": 91.13, "elapsed_time": "0:42:33", "remaining_time": "0:04:08"} +{"current_steps": 2221, "total_steps": 2436, "loss": 0.7915571331977844, "lr": 2.3768128253461253e-07, "epoch": 2.7352216748768474, "percentage": 91.17, "elapsed_time": "0:42:34", "remaining_time": "0:04:07"} +{"current_steps": 2222, "total_steps": 2436, "loss": 0.2920302152633667, "lr": 2.3550302575038154e-07, "epoch": 2.7364532019704435, "percentage": 91.22, "elapsed_time": "0:42:35", "remaining_time": "0:04:06"} +{"current_steps": 2223, "total_steps": 2436, "loss": 0.7924119830131531, "lr": 2.333345556515304e-07, "epoch": 2.737684729064039, "percentage": 91.26, "elapsed_time": "0:42:36", "remaining_time": "0:04:04"} +{"current_steps": 2224, "total_steps": 2436, "loss": 2.4264345169067383, "lr": 2.311758766922806e-07, "epoch": 2.7389162561576352, "percentage": 91.3, "elapsed_time": "0:42:37", "remaining_time": "0:04:03"} +{"current_steps": 2225, "total_steps": 2436, "loss": 0.6286523342132568, "lr": 2.290269933067457e-07, "epoch": 2.7401477832512313, "percentage": 91.34, "elapsed_time": "0:42:38", "remaining_time": "0:04:02"} +{"current_steps": 2226, "total_steps": 2436, "loss": 0.4733774662017822, "lr": 2.2688790990891606e-07, "epoch": 2.7413793103448274, "percentage": 91.38, "elapsed_time": "0:42:39", "remaining_time": "0:04:01"} +{"current_steps": 2227, "total_steps": 2436, "loss": 0.41262203454971313, "lr": 2.2475863089265193e-07, "epoch": 2.7426108374384235, "percentage": 91.42, "elapsed_time": "0:42:40", "remaining_time": "0:04:00"} +{"current_steps": 2228, "total_steps": 2436, "loss": 0.9069987535476685, "lr": 2.2263916063167523e-07, "epoch": 2.7438423645320196, "percentage": 91.46, "elapsed_time": "0:42:42", "remaining_time": "0:03:59"} +{"current_steps": 2229, "total_steps": 2436, "loss": 0.33371949195861816, "lr": 2.205295034795596e-07, "epoch": 2.7450738916256157, "percentage": 91.5, "elapsed_time": "0:42:43", "remaining_time": "0:03:58"} +{"current_steps": 2230, "total_steps": 2436, "loss": 0.2515576183795929, "lr": 2.1842966376972142e-07, "epoch": 2.7463054187192117, "percentage": 91.54, "elapsed_time": "0:42:44", "remaining_time": "0:03:56"} +{"current_steps": 2231, "total_steps": 2436, "loss": 0.5854448080062866, "lr": 2.1633964581541212e-07, "epoch": 2.747536945812808, "percentage": 91.58, "elapsed_time": "0:42:45", "remaining_time": "0:03:55"} +{"current_steps": 2232, "total_steps": 2436, "loss": 0.36172378063201904, "lr": 2.1425945390970816e-07, "epoch": 2.748768472906404, "percentage": 91.63, "elapsed_time": "0:42:46", "remaining_time": "0:03:54"} +{"current_steps": 2233, "total_steps": 2436, "loss": 0.8217978477478027, "lr": 2.1218909232550156e-07, "epoch": 2.75, "percentage": 91.67, "elapsed_time": "0:42:47", "remaining_time": "0:03:53"} +{"current_steps": 2234, "total_steps": 2436, "loss": 0.5560616850852966, "lr": 2.1012856531549163e-07, "epoch": 2.751231527093596, "percentage": 91.71, "elapsed_time": "0:42:48", "remaining_time": "0:03:52"} +{"current_steps": 2235, "total_steps": 2436, "loss": 0.3503821790218353, "lr": 2.0807787711217887e-07, "epoch": 2.752463054187192, "percentage": 91.75, "elapsed_time": "0:42:49", "remaining_time": "0:03:51"} +{"current_steps": 2236, "total_steps": 2436, "loss": 0.6000460982322693, "lr": 2.0603703192785264e-07, "epoch": 2.7536945812807883, "percentage": 91.79, "elapsed_time": "0:42:50", "remaining_time": "0:03:49"} +{"current_steps": 2237, "total_steps": 2436, "loss": 0.20410886406898499, "lr": 2.0400603395458408e-07, "epoch": 2.7549261083743843, "percentage": 91.83, "elapsed_time": "0:42:51", "remaining_time": "0:03:48"} +{"current_steps": 2238, "total_steps": 2436, "loss": 0.2497151494026184, "lr": 2.0198488736421607e-07, "epoch": 2.7561576354679804, "percentage": 91.87, "elapsed_time": "0:42:52", "remaining_time": "0:03:47"} +{"current_steps": 2239, "total_steps": 2436, "loss": 0.2881111800670624, "lr": 1.999735963083571e-07, "epoch": 2.7573891625615765, "percentage": 91.91, "elapsed_time": "0:42:53", "remaining_time": "0:03:46"} +{"current_steps": 2240, "total_steps": 2436, "loss": 0.38934653997421265, "lr": 1.9797216491837356e-07, "epoch": 2.7586206896551726, "percentage": 91.95, "elapsed_time": "0:42:54", "remaining_time": "0:03:45"} +{"current_steps": 2241, "total_steps": 2436, "loss": 0.3553803563117981, "lr": 1.9598059730537465e-07, "epoch": 2.7598522167487687, "percentage": 92.0, "elapsed_time": "0:42:56", "remaining_time": "0:03:44"} +{"current_steps": 2242, "total_steps": 2436, "loss": 0.3653762936592102, "lr": 1.9399889756021196e-07, "epoch": 2.7610837438423648, "percentage": 92.04, "elapsed_time": "0:42:57", "remaining_time": "0:03:42"} +{"current_steps": 2243, "total_steps": 2436, "loss": 0.2600834369659424, "lr": 1.9202706975346875e-07, "epoch": 2.762315270935961, "percentage": 92.08, "elapsed_time": "0:42:58", "remaining_time": "0:03:41"} +{"current_steps": 2244, "total_steps": 2436, "loss": 0.4601256847381592, "lr": 1.9006511793544458e-07, "epoch": 2.7635467980295565, "percentage": 92.12, "elapsed_time": "0:42:59", "remaining_time": "0:03:40"} +{"current_steps": 2245, "total_steps": 2436, "loss": 0.33677470684051514, "lr": 1.881130461361591e-07, "epoch": 2.7647783251231526, "percentage": 92.16, "elapsed_time": "0:43:00", "remaining_time": "0:03:39"} +{"current_steps": 2246, "total_steps": 2436, "loss": 0.8099600672721863, "lr": 1.8617085836533544e-07, "epoch": 2.7660098522167487, "percentage": 92.2, "elapsed_time": "0:43:01", "remaining_time": "0:03:38"} +{"current_steps": 2247, "total_steps": 2436, "loss": 0.6992620229721069, "lr": 1.8423855861239238e-07, "epoch": 2.7672413793103448, "percentage": 92.24, "elapsed_time": "0:43:02", "remaining_time": "0:03:37"} +{"current_steps": 2248, "total_steps": 2436, "loss": 0.3640286326408386, "lr": 1.8231615084644105e-07, "epoch": 2.768472906403941, "percentage": 92.28, "elapsed_time": "0:43:03", "remaining_time": "0:03:36"} +{"current_steps": 2249, "total_steps": 2436, "loss": 0.2996286451816559, "lr": 1.8040363901627001e-07, "epoch": 2.769704433497537, "percentage": 92.32, "elapsed_time": "0:43:04", "remaining_time": "0:03:34"} +{"current_steps": 2250, "total_steps": 2436, "loss": 0.43687328696250916, "lr": 1.7850102705034455e-07, "epoch": 2.770935960591133, "percentage": 92.36, "elapsed_time": "0:43:05", "remaining_time": "0:03:33"} +{"current_steps": 2251, "total_steps": 2436, "loss": 0.7942696809768677, "lr": 1.7660831885679074e-07, "epoch": 2.772167487684729, "percentage": 92.41, "elapsed_time": "0:43:06", "remaining_time": "0:03:32"} +{"current_steps": 2252, "total_steps": 2436, "loss": 1.1030818223953247, "lr": 1.747255183233948e-07, "epoch": 2.773399014778325, "percentage": 92.45, "elapsed_time": "0:43:07", "remaining_time": "0:03:31"} +{"current_steps": 2253, "total_steps": 2436, "loss": 0.5030316114425659, "lr": 1.7285262931759084e-07, "epoch": 2.7746305418719213, "percentage": 92.49, "elapsed_time": "0:43:08", "remaining_time": "0:03:30"} +{"current_steps": 2254, "total_steps": 2436, "loss": 0.6707223653793335, "lr": 1.7098965568645264e-07, "epoch": 2.7758620689655173, "percentage": 92.53, "elapsed_time": "0:43:10", "remaining_time": "0:03:29"} +{"current_steps": 2255, "total_steps": 2436, "loss": 0.2983396351337433, "lr": 1.6913660125668806e-07, "epoch": 2.7770935960591134, "percentage": 92.57, "elapsed_time": "0:43:11", "remaining_time": "0:03:27"} +{"current_steps": 2256, "total_steps": 2436, "loss": 0.6233869791030884, "lr": 1.6729346983462957e-07, "epoch": 2.7783251231527095, "percentage": 92.61, "elapsed_time": "0:43:12", "remaining_time": "0:03:26"} +{"current_steps": 2257, "total_steps": 2436, "loss": 0.2838573455810547, "lr": 1.654602652062276e-07, "epoch": 2.779556650246305, "percentage": 92.65, "elapsed_time": "0:43:13", "remaining_time": "0:03:25"} +{"current_steps": 2258, "total_steps": 2436, "loss": 0.516904354095459, "lr": 1.636369911370417e-07, "epoch": 2.7807881773399012, "percentage": 92.69, "elapsed_time": "0:43:14", "remaining_time": "0:03:24"} +{"current_steps": 2259, "total_steps": 2436, "loss": 0.2637355625629425, "lr": 1.6182365137223266e-07, "epoch": 2.7820197044334973, "percentage": 92.73, "elapsed_time": "0:43:15", "remaining_time": "0:03:23"} +{"current_steps": 2260, "total_steps": 2436, "loss": 0.2973381280899048, "lr": 1.600202496365566e-07, "epoch": 2.7832512315270934, "percentage": 92.78, "elapsed_time": "0:43:16", "remaining_time": "0:03:22"} +{"current_steps": 2261, "total_steps": 2436, "loss": 0.731842041015625, "lr": 1.5822678963435479e-07, "epoch": 2.7844827586206895, "percentage": 92.82, "elapsed_time": "0:43:17", "remaining_time": "0:03:21"} +{"current_steps": 2262, "total_steps": 2436, "loss": 0.9091979265213013, "lr": 1.564432750495476e-07, "epoch": 2.7857142857142856, "percentage": 92.86, "elapsed_time": "0:43:18", "remaining_time": "0:03:19"} +{"current_steps": 2263, "total_steps": 2436, "loss": 0.9223085641860962, "lr": 1.5466970954562786e-07, "epoch": 2.7869458128078817, "percentage": 92.9, "elapsed_time": "0:43:19", "remaining_time": "0:03:18"} +{"current_steps": 2264, "total_steps": 2436, "loss": 0.35786327719688416, "lr": 1.5290609676564982e-07, "epoch": 2.7881773399014778, "percentage": 92.94, "elapsed_time": "0:43:20", "remaining_time": "0:03:17"} +{"current_steps": 2265, "total_steps": 2436, "loss": 0.7312544584274292, "lr": 1.5115244033222732e-07, "epoch": 2.789408866995074, "percentage": 92.98, "elapsed_time": "0:43:21", "remaining_time": "0:03:16"} +{"current_steps": 2266, "total_steps": 2436, "loss": 0.8420913219451904, "lr": 1.4940874384751947e-07, "epoch": 2.79064039408867, "percentage": 93.02, "elapsed_time": "0:43:22", "remaining_time": "0:03:15"} +{"current_steps": 2267, "total_steps": 2436, "loss": 0.3239392042160034, "lr": 1.47675010893229e-07, "epoch": 2.791871921182266, "percentage": 93.06, "elapsed_time": "0:43:24", "remaining_time": "0:03:14"} +{"current_steps": 2268, "total_steps": 2436, "loss": 0.3498873710632324, "lr": 1.4595124503059165e-07, "epoch": 2.793103448275862, "percentage": 93.1, "elapsed_time": "0:43:25", "remaining_time": "0:03:12"} +{"current_steps": 2269, "total_steps": 2436, "loss": 0.22733798623085022, "lr": 1.4423744980037068e-07, "epoch": 2.794334975369458, "percentage": 93.14, "elapsed_time": "0:43:26", "remaining_time": "0:03:11"} +{"current_steps": 2270, "total_steps": 2436, "loss": 0.2721923291683197, "lr": 1.425336287228496e-07, "epoch": 2.7955665024630543, "percentage": 93.19, "elapsed_time": "0:43:27", "remaining_time": "0:03:10"} +{"current_steps": 2271, "total_steps": 2436, "loss": 0.344375342130661, "lr": 1.408397852978205e-07, "epoch": 2.7967980295566504, "percentage": 93.23, "elapsed_time": "0:43:28", "remaining_time": "0:03:09"} +{"current_steps": 2272, "total_steps": 2436, "loss": 0.4529953896999359, "lr": 1.391559230045847e-07, "epoch": 2.7980295566502464, "percentage": 93.27, "elapsed_time": "0:43:29", "remaining_time": "0:03:08"} +{"current_steps": 2273, "total_steps": 2436, "loss": 0.1639999896287918, "lr": 1.3748204530193987e-07, "epoch": 2.7992610837438425, "percentage": 93.31, "elapsed_time": "0:43:30", "remaining_time": "0:03:07"} +{"current_steps": 2274, "total_steps": 2436, "loss": 0.23326484858989716, "lr": 1.3581815562817402e-07, "epoch": 2.8004926108374386, "percentage": 93.35, "elapsed_time": "0:43:31", "remaining_time": "0:03:06"} +{"current_steps": 2275, "total_steps": 2436, "loss": 0.22694149613380432, "lr": 1.341642574010582e-07, "epoch": 2.8017241379310347, "percentage": 93.39, "elapsed_time": "0:43:32", "remaining_time": "0:03:04"} +{"current_steps": 2276, "total_steps": 2436, "loss": 0.3588021993637085, "lr": 1.3252035401784324e-07, "epoch": 2.802955665024631, "percentage": 93.43, "elapsed_time": "0:43:34", "remaining_time": "0:03:03"} +{"current_steps": 2277, "total_steps": 2436, "loss": 0.4335256516933441, "lr": 1.3088644885524637e-07, "epoch": 2.804187192118227, "percentage": 93.47, "elapsed_time": "0:43:35", "remaining_time": "0:03:02"} +{"current_steps": 2278, "total_steps": 2436, "loss": 0.1874769926071167, "lr": 1.2926254526944904e-07, "epoch": 2.8054187192118225, "percentage": 93.51, "elapsed_time": "0:43:36", "remaining_time": "0:03:01"} +{"current_steps": 2279, "total_steps": 2436, "loss": 0.3144474923610687, "lr": 1.27648646596088e-07, "epoch": 2.8066502463054186, "percentage": 93.56, "elapsed_time": "0:43:37", "remaining_time": "0:03:00"} +{"current_steps": 2280, "total_steps": 2436, "loss": 0.7241795063018799, "lr": 1.2604475615025092e-07, "epoch": 2.8078817733990147, "percentage": 93.6, "elapsed_time": "0:43:38", "remaining_time": "0:02:59"} +{"current_steps": 2281, "total_steps": 2436, "loss": 0.5169468522071838, "lr": 1.2445087722646576e-07, "epoch": 2.8091133004926108, "percentage": 93.64, "elapsed_time": "0:43:39", "remaining_time": "0:02:57"} +{"current_steps": 2282, "total_steps": 2436, "loss": 1.6869860887527466, "lr": 1.228670130986953e-07, "epoch": 2.810344827586207, "percentage": 93.68, "elapsed_time": "0:43:40", "remaining_time": "0:02:56"} +{"current_steps": 2283, "total_steps": 2436, "loss": 0.47550255060195923, "lr": 1.212931670203338e-07, "epoch": 2.811576354679803, "percentage": 93.72, "elapsed_time": "0:43:41", "remaining_time": "0:02:55"} +{"current_steps": 2284, "total_steps": 2436, "loss": 0.2437782883644104, "lr": 1.197293422241952e-07, "epoch": 2.812807881773399, "percentage": 93.76, "elapsed_time": "0:43:42", "remaining_time": "0:02:54"} +{"current_steps": 2285, "total_steps": 2436, "loss": 0.37867432832717896, "lr": 1.1817554192251002e-07, "epoch": 2.814039408866995, "percentage": 93.8, "elapsed_time": "0:43:43", "remaining_time": "0:02:53"} +{"current_steps": 2286, "total_steps": 2436, "loss": 0.8604614734649658, "lr": 1.1663176930691744e-07, "epoch": 2.815270935960591, "percentage": 93.84, "elapsed_time": "0:43:44", "remaining_time": "0:02:52"} +{"current_steps": 2287, "total_steps": 2436, "loss": 1.1947153806686401, "lr": 1.1509802754845978e-07, "epoch": 2.8165024630541873, "percentage": 93.88, "elapsed_time": "0:43:46", "remaining_time": "0:02:51"} +{"current_steps": 2288, "total_steps": 2436, "loss": 0.30131372809410095, "lr": 1.1357431979757194e-07, "epoch": 2.8177339901477834, "percentage": 93.92, "elapsed_time": "0:43:47", "remaining_time": "0:02:49"} +{"current_steps": 2289, "total_steps": 2436, "loss": 0.47112587094306946, "lr": 1.1206064918408143e-07, "epoch": 2.8189655172413794, "percentage": 93.97, "elapsed_time": "0:43:48", "remaining_time": "0:02:48"} +{"current_steps": 2290, "total_steps": 2436, "loss": 0.2062550187110901, "lr": 1.1055701881719838e-07, "epoch": 2.8201970443349755, "percentage": 94.01, "elapsed_time": "0:43:49", "remaining_time": "0:02:47"} +{"current_steps": 2291, "total_steps": 2436, "loss": 0.30918222665786743, "lr": 1.0906343178550715e-07, "epoch": 2.821428571428571, "percentage": 94.05, "elapsed_time": "0:43:50", "remaining_time": "0:02:46"} +{"current_steps": 2292, "total_steps": 2436, "loss": 0.46675896644592285, "lr": 1.0757989115696421e-07, "epoch": 2.8226600985221673, "percentage": 94.09, "elapsed_time": "0:43:51", "remaining_time": "0:02:45"} +{"current_steps": 2293, "total_steps": 2436, "loss": 0.2514066696166992, "lr": 1.0610639997888917e-07, "epoch": 2.8238916256157633, "percentage": 94.13, "elapsed_time": "0:43:52", "remaining_time": "0:02:44"} +{"current_steps": 2294, "total_steps": 2436, "loss": 0.37799739837646484, "lr": 1.0464296127795926e-07, "epoch": 2.8251231527093594, "percentage": 94.17, "elapsed_time": "0:43:53", "remaining_time": "0:02:43"} +{"current_steps": 2295, "total_steps": 2436, "loss": 1.170919418334961, "lr": 1.0318957806020269e-07, "epoch": 2.8263546798029555, "percentage": 94.21, "elapsed_time": "0:43:54", "remaining_time": "0:02:41"} +{"current_steps": 2296, "total_steps": 2436, "loss": 0.34683138132095337, "lr": 1.0174625331099363e-07, "epoch": 2.8275862068965516, "percentage": 94.25, "elapsed_time": "0:43:55", "remaining_time": "0:02:40"} +{"current_steps": 2297, "total_steps": 2436, "loss": 0.24154211580753326, "lr": 1.0031298999504557e-07, "epoch": 2.8288177339901477, "percentage": 94.29, "elapsed_time": "0:43:56", "remaining_time": "0:02:39"} +{"current_steps": 2298, "total_steps": 2436, "loss": 0.3270137906074524, "lr": 9.888979105640295e-08, "epoch": 2.8300492610837438, "percentage": 94.33, "elapsed_time": "0:43:57", "remaining_time": "0:02:38"} +{"current_steps": 2299, "total_steps": 2436, "loss": 0.33205774426460266, "lr": 9.747665941843953e-08, "epoch": 2.83128078817734, "percentage": 94.38, "elapsed_time": "0:43:58", "remaining_time": "0:02:37"} +{"current_steps": 2300, "total_steps": 2436, "loss": 1.5672454833984375, "lr": 9.607359798384785e-08, "epoch": 2.832512315270936, "percentage": 94.42, "elapsed_time": "0:44:00", "remaining_time": "0:02:36"} +{"current_steps": 2301, "total_steps": 2436, "loss": 0.1868615597486496, "lr": 9.468060963463754e-08, "epoch": 2.833743842364532, "percentage": 94.46, "elapsed_time": "0:44:01", "remaining_time": "0:02:34"} +{"current_steps": 2302, "total_steps": 2436, "loss": 0.3485974371433258, "lr": 9.329769723212478e-08, "epoch": 2.834975369458128, "percentage": 94.5, "elapsed_time": "0:44:02", "remaining_time": "0:02:33"} +{"current_steps": 2303, "total_steps": 2436, "loss": 0.5702242851257324, "lr": 9.192486361693175e-08, "epoch": 2.836206896551724, "percentage": 94.54, "elapsed_time": "0:44:03", "remaining_time": "0:02:32"} +{"current_steps": 2304, "total_steps": 2436, "loss": 0.7004730105400085, "lr": 9.056211160897555e-08, "epoch": 2.8374384236453203, "percentage": 94.58, "elapsed_time": "0:44:04", "remaining_time": "0:02:31"} +{"current_steps": 2305, "total_steps": 2436, "loss": 0.29311710596084595, "lr": 8.920944400746589e-08, "epoch": 2.8386699507389164, "percentage": 94.62, "elapsed_time": "0:44:05", "remaining_time": "0:02:30"} +{"current_steps": 2306, "total_steps": 2436, "loss": 0.18041157722473145, "lr": 8.786686359089747e-08, "epoch": 2.8399014778325125, "percentage": 94.66, "elapsed_time": "0:44:06", "remaining_time": "0:02:29"} +{"current_steps": 2307, "total_steps": 2436, "loss": 0.2873387634754181, "lr": 8.653437311704648e-08, "epoch": 2.8411330049261085, "percentage": 94.7, "elapsed_time": "0:44:07", "remaining_time": "0:02:28"} +{"current_steps": 2308, "total_steps": 2436, "loss": 0.23781178891658783, "lr": 8.521197532296188e-08, "epoch": 2.8423645320197046, "percentage": 94.75, "elapsed_time": "0:44:08", "remaining_time": "0:02:26"} +{"current_steps": 2309, "total_steps": 2436, "loss": 0.5913131833076477, "lr": 8.38996729249636e-08, "epoch": 2.8435960591133007, "percentage": 94.79, "elapsed_time": "0:44:09", "remaining_time": "0:02:25"} +{"current_steps": 2310, "total_steps": 2436, "loss": 0.9139914512634277, "lr": 8.259746861863094e-08, "epoch": 2.844827586206897, "percentage": 94.83, "elapsed_time": "0:44:10", "remaining_time": "0:02:24"} +{"current_steps": 2311, "total_steps": 2436, "loss": 0.22883841395378113, "lr": 8.130536507880538e-08, "epoch": 2.846059113300493, "percentage": 94.87, "elapsed_time": "0:44:11", "remaining_time": "0:02:23"} +{"current_steps": 2312, "total_steps": 2436, "loss": 0.6467199325561523, "lr": 8.002336495957664e-08, "epoch": 2.8472906403940885, "percentage": 94.91, "elapsed_time": "0:44:13", "remaining_time": "0:02:22"} +{"current_steps": 2313, "total_steps": 2436, "loss": 0.48100385069847107, "lr": 7.875147089428436e-08, "epoch": 2.8485221674876846, "percentage": 94.95, "elapsed_time": "0:44:14", "remaining_time": "0:02:21"} +{"current_steps": 2314, "total_steps": 2436, "loss": 0.22535499930381775, "lr": 7.748968549550761e-08, "epoch": 2.8497536945812807, "percentage": 94.99, "elapsed_time": "0:44:15", "remaining_time": "0:02:19"} +{"current_steps": 2315, "total_steps": 2436, "loss": 0.7971012592315674, "lr": 7.623801135506148e-08, "epoch": 2.850985221674877, "percentage": 95.03, "elapsed_time": "0:44:16", "remaining_time": "0:02:18"} +{"current_steps": 2316, "total_steps": 2436, "loss": 0.6965846419334412, "lr": 7.499645104399156e-08, "epoch": 2.852216748768473, "percentage": 95.07, "elapsed_time": "0:44:17", "remaining_time": "0:02:17"} +{"current_steps": 2317, "total_steps": 2436, "loss": 0.2827698588371277, "lr": 7.376500711257062e-08, "epoch": 2.853448275862069, "percentage": 95.11, "elapsed_time": "0:44:18", "remaining_time": "0:02:16"} +{"current_steps": 2318, "total_steps": 2436, "loss": 0.4453064203262329, "lr": 7.254368209028862e-08, "epoch": 2.854679802955665, "percentage": 95.16, "elapsed_time": "0:44:19", "remaining_time": "0:02:15"} +{"current_steps": 2319, "total_steps": 2436, "loss": 0.5363994836807251, "lr": 7.133247848585268e-08, "epoch": 2.855911330049261, "percentage": 95.2, "elapsed_time": "0:44:20", "remaining_time": "0:02:14"} +{"current_steps": 2320, "total_steps": 2436, "loss": 0.33071067929267883, "lr": 7.013139878717934e-08, "epoch": 2.857142857142857, "percentage": 95.24, "elapsed_time": "0:44:21", "remaining_time": "0:02:13"} +{"current_steps": 2321, "total_steps": 2436, "loss": 0.6118582487106323, "lr": 6.894044546138845e-08, "epoch": 2.8583743842364533, "percentage": 95.28, "elapsed_time": "0:44:22", "remaining_time": "0:02:11"} +{"current_steps": 2322, "total_steps": 2436, "loss": 0.4941851496696472, "lr": 6.775962095480037e-08, "epoch": 2.8596059113300494, "percentage": 95.32, "elapsed_time": "0:44:23", "remaining_time": "0:02:10"} +{"current_steps": 2323, "total_steps": 2436, "loss": 0.9043294191360474, "lr": 6.65889276929299e-08, "epoch": 2.8608374384236455, "percentage": 95.36, "elapsed_time": "0:44:24", "remaining_time": "0:02:09"} +{"current_steps": 2324, "total_steps": 2436, "loss": 0.5352662801742554, "lr": 6.542836808048181e-08, "epoch": 2.862068965517241, "percentage": 95.4, "elapsed_time": "0:44:25", "remaining_time": "0:02:08"} +{"current_steps": 2325, "total_steps": 2436, "loss": 0.622706413269043, "lr": 6.427794450134529e-08, "epoch": 2.863300492610837, "percentage": 95.44, "elapsed_time": "0:44:27", "remaining_time": "0:02:07"} +{"current_steps": 2326, "total_steps": 2436, "loss": 0.32065168023109436, "lr": 6.313765931858785e-08, "epoch": 2.8645320197044333, "percentage": 95.48, "elapsed_time": "0:44:28", "remaining_time": "0:02:06"} +{"current_steps": 2327, "total_steps": 2436, "loss": 0.5308477878570557, "lr": 6.200751487445367e-08, "epoch": 2.8657635467980294, "percentage": 95.53, "elapsed_time": "0:44:29", "remaining_time": "0:02:05"} +{"current_steps": 2328, "total_steps": 2436, "loss": 0.4006965756416321, "lr": 6.088751349035693e-08, "epoch": 2.8669950738916254, "percentage": 95.57, "elapsed_time": "0:44:30", "remaining_time": "0:02:03"} +{"current_steps": 2329, "total_steps": 2436, "loss": 0.29346001148223877, "lr": 5.977765746687569e-08, "epoch": 2.8682266009852215, "percentage": 95.61, "elapsed_time": "0:44:31", "remaining_time": "0:02:02"} +{"current_steps": 2330, "total_steps": 2436, "loss": 0.17921757698059082, "lr": 5.8677949083749686e-08, "epoch": 2.8694581280788176, "percentage": 95.65, "elapsed_time": "0:44:32", "remaining_time": "0:02:01"} +{"current_steps": 2331, "total_steps": 2436, "loss": 0.3909390866756439, "lr": 5.758839059987531e-08, "epoch": 2.8706896551724137, "percentage": 95.69, "elapsed_time": "0:44:33", "remaining_time": "0:02:00"} +{"current_steps": 2332, "total_steps": 2436, "loss": 0.2947097420692444, "lr": 5.650898425329676e-08, "epoch": 2.87192118226601, "percentage": 95.73, "elapsed_time": "0:44:34", "remaining_time": "0:01:59"} +{"current_steps": 2333, "total_steps": 2436, "loss": 0.27580755949020386, "lr": 5.5439732261209356e-08, "epoch": 2.873152709359606, "percentage": 95.77, "elapsed_time": "0:44:35", "remaining_time": "0:01:58"} +{"current_steps": 2334, "total_steps": 2436, "loss": 0.5352618098258972, "lr": 5.438063681994732e-08, "epoch": 2.874384236453202, "percentage": 95.81, "elapsed_time": "0:44:36", "remaining_time": "0:01:56"} +{"current_steps": 2335, "total_steps": 2436, "loss": 0.4425346553325653, "lr": 5.333170010498434e-08, "epoch": 2.875615763546798, "percentage": 95.85, "elapsed_time": "0:44:37", "remaining_time": "0:01:55"} +{"current_steps": 2336, "total_steps": 2436, "loss": 0.3107433319091797, "lr": 5.229292427092525e-08, "epoch": 2.876847290640394, "percentage": 95.89, "elapsed_time": "0:44:38", "remaining_time": "0:01:54"} +{"current_steps": 2337, "total_steps": 2436, "loss": 0.8459264039993286, "lr": 5.126431145150546e-08, "epoch": 2.87807881773399, "percentage": 95.94, "elapsed_time": "0:44:39", "remaining_time": "0:01:53"} +{"current_steps": 2338, "total_steps": 2436, "loss": 0.6122205257415771, "lr": 5.024586375958429e-08, "epoch": 2.8793103448275863, "percentage": 95.98, "elapsed_time": "0:44:41", "remaining_time": "0:01:52"} +{"current_steps": 2339, "total_steps": 2436, "loss": 0.28234463930130005, "lr": 4.9237583287139454e-08, "epoch": 2.8805418719211824, "percentage": 96.02, "elapsed_time": "0:44:42", "remaining_time": "0:01:51"} +{"current_steps": 2340, "total_steps": 2436, "loss": 0.26258403062820435, "lr": 4.823947210526647e-08, "epoch": 2.8817733990147785, "percentage": 96.06, "elapsed_time": "0:44:43", "remaining_time": "0:01:50"} +{"current_steps": 2341, "total_steps": 2436, "loss": 0.16676993668079376, "lr": 4.72515322641709e-08, "epoch": 2.8830049261083746, "percentage": 96.1, "elapsed_time": "0:44:44", "remaining_time": "0:01:48"} +{"current_steps": 2342, "total_steps": 2436, "loss": 0.5982980132102966, "lr": 4.627376579316667e-08, "epoch": 2.8842364532019706, "percentage": 96.14, "elapsed_time": "0:44:45", "remaining_time": "0:01:47"} +{"current_steps": 2343, "total_steps": 2436, "loss": 0.3576871156692505, "lr": 4.530617470066834e-08, "epoch": 2.8854679802955667, "percentage": 96.18, "elapsed_time": "0:44:46", "remaining_time": "0:01:46"} +{"current_steps": 2344, "total_steps": 2436, "loss": 0.22213858366012573, "lr": 4.4348760974192715e-08, "epoch": 2.886699507389163, "percentage": 96.22, "elapsed_time": "0:44:47", "remaining_time": "0:01:45"} +{"current_steps": 2345, "total_steps": 2436, "loss": 0.7075624465942383, "lr": 4.340152658034835e-08, "epoch": 2.887931034482759, "percentage": 96.26, "elapsed_time": "0:44:48", "remaining_time": "0:01:44"} +{"current_steps": 2346, "total_steps": 2436, "loss": 0.35476282238960266, "lr": 4.246447346483662e-08, "epoch": 2.8891625615763545, "percentage": 96.31, "elapsed_time": "0:44:49", "remaining_time": "0:01:43"} +{"current_steps": 2347, "total_steps": 2436, "loss": 0.4569534659385681, "lr": 4.153760355244507e-08, "epoch": 2.8903940886699506, "percentage": 96.35, "elapsed_time": "0:44:50", "remaining_time": "0:01:42"} +{"current_steps": 2348, "total_steps": 2436, "loss": 0.8425757884979248, "lr": 4.062091874704355e-08, "epoch": 2.8916256157635467, "percentage": 96.39, "elapsed_time": "0:44:51", "remaining_time": "0:01:40"} +{"current_steps": 2349, "total_steps": 2436, "loss": 0.6543349623680115, "lr": 3.971442093158195e-08, "epoch": 2.892857142857143, "percentage": 96.43, "elapsed_time": "0:44:52", "remaining_time": "0:01:39"} +{"current_steps": 2350, "total_steps": 2436, "loss": 0.4949587285518646, "lr": 3.8818111968083607e-08, "epoch": 2.894088669950739, "percentage": 96.47, "elapsed_time": "0:44:53", "remaining_time": "0:01:38"} +{"current_steps": 2351, "total_steps": 2436, "loss": 1.0205111503601074, "lr": 3.7931993697644664e-08, "epoch": 2.895320197044335, "percentage": 96.51, "elapsed_time": "0:44:55", "remaining_time": "0:01:37"} +{"current_steps": 2352, "total_steps": 2436, "loss": 0.429599404335022, "lr": 3.7056067940427484e-08, "epoch": 2.896551724137931, "percentage": 96.55, "elapsed_time": "0:44:56", "remaining_time": "0:01:36"} +{"current_steps": 2353, "total_steps": 2436, "loss": 0.6471319198608398, "lr": 3.6190336495659504e-08, "epoch": 2.897783251231527, "percentage": 96.59, "elapsed_time": "0:44:57", "remaining_time": "0:01:35"} +{"current_steps": 2354, "total_steps": 2436, "loss": 0.6227458715438843, "lr": 3.533480114162713e-08, "epoch": 2.899014778325123, "percentage": 96.63, "elapsed_time": "0:44:58", "remaining_time": "0:01:33"} +{"current_steps": 2355, "total_steps": 2436, "loss": 0.35620149970054626, "lr": 3.448946363567296e-08, "epoch": 2.9002463054187193, "percentage": 96.67, "elapsed_time": "0:44:59", "remaining_time": "0:01:32"} +{"current_steps": 2356, "total_steps": 2436, "loss": 0.41157659888267517, "lr": 3.365432571419247e-08, "epoch": 2.9014778325123154, "percentage": 96.72, "elapsed_time": "0:45:00", "remaining_time": "0:01:31"} +{"current_steps": 2357, "total_steps": 2436, "loss": 0.39660418033599854, "lr": 3.282938909263122e-08, "epoch": 2.9027093596059115, "percentage": 96.76, "elapsed_time": "0:45:01", "remaining_time": "0:01:30"} +{"current_steps": 2358, "total_steps": 2436, "loss": 0.37891146540641785, "lr": 3.201465546547988e-08, "epoch": 2.903940886699507, "percentage": 96.8, "elapsed_time": "0:45:02", "remaining_time": "0:01:29"} +{"current_steps": 2359, "total_steps": 2436, "loss": 0.4459425210952759, "lr": 3.121012650627031e-08, "epoch": 2.905172413793103, "percentage": 96.84, "elapsed_time": "0:45:03", "remaining_time": "0:01:28"} +{"current_steps": 2360, "total_steps": 2436, "loss": 0.4933587610721588, "lr": 3.041580386757448e-08, "epoch": 2.9064039408866993, "percentage": 96.88, "elapsed_time": "0:45:04", "remaining_time": "0:01:27"} +{"current_steps": 2361, "total_steps": 2436, "loss": 0.16229723393917084, "lr": 2.9631689180999457e-08, "epoch": 2.9076354679802954, "percentage": 96.92, "elapsed_time": "0:45:05", "remaining_time": "0:01:25"} +{"current_steps": 2362, "total_steps": 2436, "loss": 0.4784936308860779, "lr": 2.885778405718409e-08, "epoch": 2.9088669950738915, "percentage": 96.96, "elapsed_time": "0:45:06", "remaining_time": "0:01:24"} +{"current_steps": 2363, "total_steps": 2436, "loss": 0.6622560620307922, "lr": 2.8094090085795112e-08, "epoch": 2.9100985221674875, "percentage": 97.0, "elapsed_time": "0:45:07", "remaining_time": "0:01:23"} +{"current_steps": 2364, "total_steps": 2436, "loss": 0.3672278821468353, "lr": 2.7340608835526584e-08, "epoch": 2.9113300492610836, "percentage": 97.04, "elapsed_time": "0:45:09", "remaining_time": "0:01:22"} +{"current_steps": 2365, "total_steps": 2436, "loss": 0.3247770667076111, "lr": 2.6597341854092685e-08, "epoch": 2.9125615763546797, "percentage": 97.09, "elapsed_time": "0:45:10", "remaining_time": "0:01:21"} +{"current_steps": 2366, "total_steps": 2436, "loss": 0.3467229902744293, "lr": 2.586429066822771e-08, "epoch": 2.913793103448276, "percentage": 97.13, "elapsed_time": "0:45:11", "remaining_time": "0:01:20"} +{"current_steps": 2367, "total_steps": 2436, "loss": 0.6725019812583923, "lr": 2.514145678368163e-08, "epoch": 2.915024630541872, "percentage": 97.17, "elapsed_time": "0:45:12", "remaining_time": "0:01:19"} +{"current_steps": 2368, "total_steps": 2436, "loss": 0.6760755777359009, "lr": 2.4428841685217863e-08, "epoch": 2.916256157635468, "percentage": 97.21, "elapsed_time": "0:45:13", "remaining_time": "0:01:17"} +{"current_steps": 2369, "total_steps": 2436, "loss": 0.5354422330856323, "lr": 2.3726446836608298e-08, "epoch": 2.917487684729064, "percentage": 97.25, "elapsed_time": "0:45:14", "remaining_time": "0:01:16"} +{"current_steps": 2370, "total_steps": 2436, "loss": 0.3656280040740967, "lr": 2.3034273680632157e-08, "epoch": 2.91871921182266, "percentage": 97.29, "elapsed_time": "0:45:15", "remaining_time": "0:01:15"} +{"current_steps": 2371, "total_steps": 2436, "loss": 0.28186920285224915, "lr": 2.235232363907269e-08, "epoch": 2.9199507389162562, "percentage": 97.33, "elapsed_time": "0:45:16", "remaining_time": "0:01:14"} +{"current_steps": 2372, "total_steps": 2436, "loss": 0.31556010246276855, "lr": 2.168059811271439e-08, "epoch": 2.9211822660098523, "percentage": 97.37, "elapsed_time": "0:45:17", "remaining_time": "0:01:13"} +{"current_steps": 2373, "total_steps": 2436, "loss": 0.33978280425071716, "lr": 2.101909848133743e-08, "epoch": 2.9224137931034484, "percentage": 97.41, "elapsed_time": "0:45:18", "remaining_time": "0:01:12"} +{"current_steps": 2374, "total_steps": 2436, "loss": 0.5645813941955566, "lr": 2.0367826103720457e-08, "epoch": 2.9236453201970445, "percentage": 97.45, "elapsed_time": "0:45:19", "remaining_time": "0:01:11"} +{"current_steps": 2375, "total_steps": 2436, "loss": 0.21976767480373383, "lr": 1.9726782317632255e-08, "epoch": 2.9248768472906406, "percentage": 97.5, "elapsed_time": "0:45:20", "remaining_time": "0:01:09"} +{"current_steps": 2376, "total_steps": 2436, "loss": 0.6068276166915894, "lr": 1.9095968439830637e-08, "epoch": 2.9261083743842367, "percentage": 97.54, "elapsed_time": "0:45:22", "remaining_time": "0:01:08"} +{"current_steps": 2377, "total_steps": 2436, "loss": 0.2844882607460022, "lr": 1.8475385766063002e-08, "epoch": 2.9273399014778327, "percentage": 97.58, "elapsed_time": "0:45:23", "remaining_time": "0:01:07"} +{"current_steps": 2378, "total_steps": 2436, "loss": 1.1885827779769897, "lr": 1.786503557105912e-08, "epoch": 2.928571428571429, "percentage": 97.62, "elapsed_time": "0:45:24", "remaining_time": "0:01:06"} +{"current_steps": 2379, "total_steps": 2436, "loss": 0.4241114854812622, "lr": 1.7264919108529455e-08, "epoch": 2.9298029556650245, "percentage": 97.66, "elapsed_time": "0:45:25", "remaining_time": "0:01:05"} +{"current_steps": 2380, "total_steps": 2436, "loss": 0.9062713980674744, "lr": 1.6675037611165735e-08, "epoch": 2.9310344827586206, "percentage": 97.7, "elapsed_time": "0:45:26", "remaining_time": "0:01:04"} +{"current_steps": 2381, "total_steps": 2436, "loss": 0.29996055364608765, "lr": 1.6095392290635393e-08, "epoch": 2.9322660098522166, "percentage": 97.74, "elapsed_time": "0:45:27", "remaining_time": "0:01:03"} +{"current_steps": 2382, "total_steps": 2436, "loss": 0.3901692032814026, "lr": 1.552598433757879e-08, "epoch": 2.9334975369458127, "percentage": 97.78, "elapsed_time": "0:45:28", "remaining_time": "0:01:01"} +{"current_steps": 2383, "total_steps": 2436, "loss": 0.36974531412124634, "lr": 1.4966814921608674e-08, "epoch": 2.934729064039409, "percentage": 97.82, "elapsed_time": "0:45:29", "remaining_time": "0:01:00"} +{"current_steps": 2384, "total_steps": 2436, "loss": 0.2913818359375, "lr": 1.441788519130738e-08, "epoch": 2.935960591133005, "percentage": 97.87, "elapsed_time": "0:45:30", "remaining_time": "0:00:59"} +{"current_steps": 2385, "total_steps": 2436, "loss": 2.8897290229797363, "lr": 1.3879196274224626e-08, "epoch": 2.937192118226601, "percentage": 97.91, "elapsed_time": "0:45:31", "remaining_time": "0:00:58"} +{"current_steps": 2386, "total_steps": 2436, "loss": 0.7396224141120911, "lr": 1.335074927687141e-08, "epoch": 2.938423645320197, "percentage": 97.95, "elapsed_time": "0:45:32", "remaining_time": "0:00:57"} +{"current_steps": 2387, "total_steps": 2436, "loss": 0.2923913896083832, "lr": 1.2832545284724995e-08, "epoch": 2.939655172413793, "percentage": 97.99, "elapsed_time": "0:45:33", "remaining_time": "0:00:56"} +{"current_steps": 2388, "total_steps": 2436, "loss": 0.60726398229599, "lr": 1.2324585362220032e-08, "epoch": 2.9408866995073892, "percentage": 98.03, "elapsed_time": "0:45:35", "remaining_time": "0:00:54"} +{"current_steps": 2389, "total_steps": 2436, "loss": 0.3081626892089844, "lr": 1.1826870552749669e-08, "epoch": 2.9421182266009853, "percentage": 98.07, "elapsed_time": "0:45:36", "remaining_time": "0:00:53"} +{"current_steps": 2390, "total_steps": 2436, "loss": 0.7774905562400818, "lr": 1.1339401878663337e-08, "epoch": 2.9433497536945814, "percentage": 98.11, "elapsed_time": "0:45:37", "remaining_time": "0:00:52"} +{"current_steps": 2391, "total_steps": 2436, "loss": 0.5568622350692749, "lr": 1.0862180341263962e-08, "epoch": 2.9445812807881775, "percentage": 98.15, "elapsed_time": "0:45:38", "remaining_time": "0:00:51"} +{"current_steps": 2392, "total_steps": 2436, "loss": 0.42753443121910095, "lr": 1.039520692080409e-08, "epoch": 2.945812807881773, "percentage": 98.19, "elapsed_time": "0:45:39", "remaining_time": "0:00:50"} +{"current_steps": 2393, "total_steps": 2436, "loss": 0.33313125371932983, "lr": 9.938482576487551e-09, "epoch": 2.947044334975369, "percentage": 98.23, "elapsed_time": "0:45:40", "remaining_time": "0:00:49"} +{"current_steps": 2394, "total_steps": 2436, "loss": 0.4345099925994873, "lr": 9.492008246466122e-09, "epoch": 2.9482758620689653, "percentage": 98.28, "elapsed_time": "0:45:41", "remaining_time": "0:00:48"} +{"current_steps": 2395, "total_steps": 2436, "loss": 0.6844139695167542, "lr": 9.055784847836202e-09, "epoch": 2.9495073891625614, "percentage": 98.32, "elapsed_time": "0:45:42", "remaining_time": "0:00:46"} +{"current_steps": 2396, "total_steps": 2436, "loss": 0.4944530725479126, "lr": 8.629813276637144e-09, "epoch": 2.9507389162561575, "percentage": 98.36, "elapsed_time": "0:45:43", "remaining_time": "0:00:45"} +{"current_steps": 2397, "total_steps": 2436, "loss": 0.1517336368560791, "lr": 8.214094407851814e-09, "epoch": 2.9519704433497536, "percentage": 98.4, "elapsed_time": "0:45:44", "remaining_time": "0:00:44"} +{"current_steps": 2398, "total_steps": 2436, "loss": 0.24804279208183289, "lr": 7.808629095402697e-09, "epoch": 2.9532019704433496, "percentage": 98.44, "elapsed_time": "0:45:45", "remaining_time": "0:00:43"} +{"current_steps": 2399, "total_steps": 2436, "loss": 1.2773240804672241, "lr": 7.413418172149689e-09, "epoch": 2.9544334975369457, "percentage": 98.48, "elapsed_time": "0:45:46", "remaining_time": "0:00:42"} +{"current_steps": 2400, "total_steps": 2436, "loss": 0.20905320346355438, "lr": 7.028462449889528e-09, "epoch": 2.955665024630542, "percentage": 98.52, "elapsed_time": "0:45:47", "remaining_time": "0:00:41"} +{"current_steps": 2401, "total_steps": 2436, "loss": 0.24830211699008942, "lr": 6.6537627193558055e-09, "epoch": 2.956896551724138, "percentage": 98.56, "elapsed_time": "0:45:49", "remaining_time": "0:00:40"} +{"current_steps": 2402, "total_steps": 2436, "loss": 0.30148234963417053, "lr": 6.289319750212852e-09, "epoch": 2.958128078817734, "percentage": 98.6, "elapsed_time": "0:45:50", "remaining_time": "0:00:38"} +{"current_steps": 2403, "total_steps": 2436, "loss": 0.7273882031440735, "lr": 5.93513429105741e-09, "epoch": 2.95935960591133, "percentage": 98.65, "elapsed_time": "0:45:51", "remaining_time": "0:00:37"} +{"current_steps": 2404, "total_steps": 2436, "loss": 0.4958484172821045, "lr": 5.591207069417515e-09, "epoch": 2.960591133004926, "percentage": 98.69, "elapsed_time": "0:45:52", "remaining_time": "0:00:36"} +{"current_steps": 2405, "total_steps": 2436, "loss": 0.5852301120758057, "lr": 5.257538791749173e-09, "epoch": 2.9618226600985222, "percentage": 98.73, "elapsed_time": "0:45:53", "remaining_time": "0:00:35"} +{"current_steps": 2406, "total_steps": 2436, "loss": 0.5483534336090088, "lr": 4.934130143435245e-09, "epoch": 2.9630541871921183, "percentage": 98.77, "elapsed_time": "0:45:54", "remaining_time": "0:00:34"} +{"current_steps": 2407, "total_steps": 2436, "loss": 0.49854928255081177, "lr": 4.6209817887848955e-09, "epoch": 2.9642857142857144, "percentage": 98.81, "elapsed_time": "0:45:55", "remaining_time": "0:00:33"} +{"current_steps": 2408, "total_steps": 2436, "loss": 0.9770829677581787, "lr": 4.318094371031922e-09, "epoch": 2.9655172413793105, "percentage": 98.85, "elapsed_time": "0:45:56", "remaining_time": "0:00:32"} +{"current_steps": 2409, "total_steps": 2436, "loss": 0.4265647530555725, "lr": 4.025468512333098e-09, "epoch": 2.9667487684729066, "percentage": 98.89, "elapsed_time": "0:45:57", "remaining_time": "0:00:30"} +{"current_steps": 2410, "total_steps": 2436, "loss": 0.6890873908996582, "lr": 3.743104813767051e-09, "epoch": 2.9679802955665027, "percentage": 98.93, "elapsed_time": "0:45:58", "remaining_time": "0:00:29"} +{"current_steps": 2411, "total_steps": 2436, "loss": 0.28604504466056824, "lr": 3.471003855332611e-09, "epoch": 2.9692118226600988, "percentage": 98.97, "elapsed_time": "0:45:59", "remaining_time": "0:00:28"} +{"current_steps": 2412, "total_steps": 2436, "loss": 0.3280025124549866, "lr": 3.2091661959487986e-09, "epoch": 2.970443349753695, "percentage": 99.01, "elapsed_time": "0:46:00", "remaining_time": "0:00:27"} +{"current_steps": 2413, "total_steps": 2436, "loss": 0.23375985026359558, "lr": 2.9575923734520562e-09, "epoch": 2.9716748768472905, "percentage": 99.06, "elapsed_time": "0:46:02", "remaining_time": "0:00:26"} +{"current_steps": 2414, "total_steps": 2436, "loss": 0.5062013864517212, "lr": 2.7162829045979113e-09, "epoch": 2.9729064039408866, "percentage": 99.1, "elapsed_time": "0:46:03", "remaining_time": "0:00:25"} +{"current_steps": 2415, "total_steps": 2436, "loss": 0.46517398953437805, "lr": 2.4852382850554245e-09, "epoch": 2.9741379310344827, "percentage": 99.14, "elapsed_time": "0:46:04", "remaining_time": "0:00:24"} +{"current_steps": 2416, "total_steps": 2436, "loss": 0.43281105160713196, "lr": 2.264458989410523e-09, "epoch": 2.9753694581280787, "percentage": 99.18, "elapsed_time": "0:46:05", "remaining_time": "0:00:22"} +{"current_steps": 2417, "total_steps": 2436, "loss": 0.6278485655784607, "lr": 2.0539454711626663e-09, "epoch": 2.976600985221675, "percentage": 99.22, "elapsed_time": "0:46:06", "remaining_time": "0:00:21"} +{"current_steps": 2418, "total_steps": 2436, "loss": 0.3320518136024475, "lr": 1.8536981627254036e-09, "epoch": 2.977832512315271, "percentage": 99.26, "elapsed_time": "0:46:07", "remaining_time": "0:00:20"} +{"current_steps": 2419, "total_steps": 2436, "loss": 0.4568738341331482, "lr": 1.6637174754230435e-09, "epoch": 2.979064039408867, "percentage": 99.3, "elapsed_time": "0:46:08", "remaining_time": "0:00:19"} +{"current_steps": 2420, "total_steps": 2436, "loss": 0.24025380611419678, "lr": 1.4840037994923173e-09, "epoch": 2.980295566502463, "percentage": 99.34, "elapsed_time": "0:46:09", "remaining_time": "0:00:18"} +{"current_steps": 2421, "total_steps": 2436, "loss": 0.33217573165893555, "lr": 1.3145575040801605e-09, "epoch": 2.981527093596059, "percentage": 99.38, "elapsed_time": "0:46:10", "remaining_time": "0:00:17"} +{"current_steps": 2422, "total_steps": 2436, "loss": 1.5295354127883911, "lr": 1.1553789372453771e-09, "epoch": 2.9827586206896552, "percentage": 99.43, "elapsed_time": "0:46:11", "remaining_time": "0:00:16"} +{"current_steps": 2423, "total_steps": 2436, "loss": 0.6207250952720642, "lr": 1.0064684259525337e-09, "epoch": 2.9839901477832513, "percentage": 99.47, "elapsed_time": "0:46:12", "remaining_time": "0:00:14"} +{"current_steps": 2424, "total_steps": 2436, "loss": 0.4011062681674957, "lr": 8.678262760775102e-10, "epoch": 2.9852216748768474, "percentage": 99.51, "elapsed_time": "0:46:13", "remaining_time": "0:00:13"} +{"current_steps": 2425, "total_steps": 2436, "loss": 0.8355351090431213, "lr": 7.394527724030598e-10, "epoch": 2.9864532019704435, "percentage": 99.55, "elapsed_time": "0:46:14", "remaining_time": "0:00:12"} +{"current_steps": 2426, "total_steps": 2436, "loss": 0.6552157998085022, "lr": 6.213481786199182e-10, "epoch": 2.987684729064039, "percentage": 99.59, "elapsed_time": "0:46:16", "remaining_time": "0:00:11"} +{"current_steps": 2427, "total_steps": 2436, "loss": 0.4416411519050598, "lr": 5.13512737324029e-10, "epoch": 2.9889162561576352, "percentage": 99.63, "elapsed_time": "0:46:17", "remaining_time": "0:00:10"} +{"current_steps": 2428, "total_steps": 2436, "loss": 0.3720128834247589, "lr": 4.159466700187631e-10, "epoch": 2.9901477832512313, "percentage": 99.67, "elapsed_time": "0:46:18", "remaining_time": "0:00:09"} +{"current_steps": 2429, "total_steps": 2436, "loss": 0.6710848212242126, "lr": 3.2865017711380955e-10, "epoch": 2.9913793103448274, "percentage": 99.71, "elapsed_time": "0:46:19", "remaining_time": "0:00:08"} +{"current_steps": 2430, "total_steps": 2436, "loss": 0.7640970349311829, "lr": 2.516234379235094e-10, "epoch": 2.9926108374384235, "percentage": 99.75, "elapsed_time": "0:46:20", "remaining_time": "0:00:06"} +{"current_steps": 2431, "total_steps": 2436, "loss": 0.5783921480178833, "lr": 1.848666106674113e-10, "epoch": 2.9938423645320196, "percentage": 99.79, "elapsed_time": "0:46:21", "remaining_time": "0:00:05"} +{"current_steps": 2432, "total_steps": 2436, "loss": 0.411626935005188, "lr": 1.2837983246916098e-10, "epoch": 2.9950738916256157, "percentage": 99.84, "elapsed_time": "0:46:22", "remaining_time": "0:00:04"} +{"current_steps": 2433, "total_steps": 2436, "loss": 0.529446005821228, "lr": 8.216321935816673e-11, "epoch": 2.9963054187192117, "percentage": 99.88, "elapsed_time": "0:46:23", "remaining_time": "0:00:03"} +{"current_steps": 2434, "total_steps": 2436, "loss": 0.44549500942230225, "lr": 4.6216866266823867e-11, "epoch": 2.997536945812808, "percentage": 99.92, "elapsed_time": "0:46:24", "remaining_time": "0:00:02"} +{"current_steps": 2435, "total_steps": 2436, "loss": 0.2854122519493103, "lr": 2.0540847032179955e-11, "epoch": 2.998768472906404, "percentage": 99.96, "elapsed_time": "0:46:25", "remaining_time": "0:00:01"} +{"current_steps": 2436, "total_steps": 2436, "loss": 0.4455873966217041, "lr": 5.135214394824672e-12, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "0:46:26", "remaining_time": "0:00:00"} +{"current_steps": 2436, "total_steps": 2436, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "0:47:44", "remaining_time": "0:00:00"} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..dd89de8 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,17095 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 2436, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0012315270935960591, + "grad_norm": 36.7600685768779, + "learning_rate": 0.0, + "loss": 4.157936096191406, + "step": 1 + }, + { + "epoch": 0.0024630541871921183, + "grad_norm": 37.664654386111934, + "learning_rate": 4.098360655737705e-08, + "loss": 3.8494455814361572, + "step": 2 + }, + { + "epoch": 0.003694581280788177, + "grad_norm": 38.23654519991739, + "learning_rate": 8.19672131147541e-08, + "loss": 3.7497382164001465, + "step": 3 + }, + { + "epoch": 0.0049261083743842365, + "grad_norm": 49.1212230676838, + "learning_rate": 1.2295081967213116e-07, + "loss": 4.874395847320557, + "step": 4 + }, + { + "epoch": 0.006157635467980296, + "grad_norm": 51.23013396325368, + "learning_rate": 1.639344262295082e-07, + "loss": 5.729328155517578, + "step": 5 + }, + { + "epoch": 0.007389162561576354, + "grad_norm": 33.06662236870545, + "learning_rate": 2.0491803278688524e-07, + "loss": 3.968146324157715, + "step": 6 + }, + { + "epoch": 0.008620689655172414, + "grad_norm": 33.94224964860029, + "learning_rate": 2.459016393442623e-07, + "loss": 4.092198848724365, + "step": 7 + }, + { + "epoch": 0.009852216748768473, + "grad_norm": 28.585037517248036, + "learning_rate": 2.8688524590163937e-07, + "loss": 3.4101109504699707, + "step": 8 + }, + { + "epoch": 0.011083743842364532, + "grad_norm": 39.512646004891735, + "learning_rate": 3.278688524590164e-07, + "loss": 4.387180805206299, + "step": 9 + }, + { + "epoch": 0.012315270935960592, + "grad_norm": 29.487139965581328, + "learning_rate": 3.6885245901639347e-07, + "loss": 3.4985814094543457, + "step": 10 + }, + { + "epoch": 0.013546798029556651, + "grad_norm": 35.1254398727907, + "learning_rate": 4.0983606557377047e-07, + "loss": 5.157108306884766, + "step": 11 + }, + { + "epoch": 0.014778325123152709, + "grad_norm": 33.7037580376338, + "learning_rate": 4.508196721311476e-07, + "loss": 4.057161808013916, + "step": 12 + }, + { + "epoch": 0.01600985221674877, + "grad_norm": 35.136997816960864, + "learning_rate": 4.918032786885246e-07, + "loss": 4.237695693969727, + "step": 13 + }, + { + "epoch": 0.017241379310344827, + "grad_norm": 39.34259468640213, + "learning_rate": 5.327868852459017e-07, + "loss": 4.635364532470703, + "step": 14 + }, + { + "epoch": 0.01847290640394089, + "grad_norm": 33.5811322334086, + "learning_rate": 5.737704918032787e-07, + "loss": 3.3291709423065186, + "step": 15 + }, + { + "epoch": 0.019704433497536946, + "grad_norm": 33.93459885987163, + "learning_rate": 6.147540983606558e-07, + "loss": 3.8693442344665527, + "step": 16 + }, + { + "epoch": 0.020935960591133004, + "grad_norm": 25.605142057165235, + "learning_rate": 6.557377049180328e-07, + "loss": 3.4419002532958984, + "step": 17 + }, + { + "epoch": 0.022167487684729065, + "grad_norm": 33.566059151369195, + "learning_rate": 6.967213114754098e-07, + "loss": 3.8446784019470215, + "step": 18 + }, + { + "epoch": 0.023399014778325122, + "grad_norm": 29.72848721122937, + "learning_rate": 7.377049180327869e-07, + "loss": 3.5930001735687256, + "step": 19 + }, + { + "epoch": 0.024630541871921183, + "grad_norm": 26.393927957123275, + "learning_rate": 7.78688524590164e-07, + "loss": 3.638699531555176, + "step": 20 + }, + { + "epoch": 0.02586206896551724, + "grad_norm": 26.06446386508918, + "learning_rate": 8.196721311475409e-07, + "loss": 3.6789143085479736, + "step": 21 + }, + { + "epoch": 0.027093596059113302, + "grad_norm": 35.2733178056508, + "learning_rate": 8.606557377049181e-07, + "loss": 3.959703207015991, + "step": 22 + }, + { + "epoch": 0.02832512315270936, + "grad_norm": 33.03896583989334, + "learning_rate": 9.016393442622952e-07, + "loss": 3.8822054862976074, + "step": 23 + }, + { + "epoch": 0.029556650246305417, + "grad_norm": 33.57337166473473, + "learning_rate": 9.426229508196721e-07, + "loss": 3.8448376655578613, + "step": 24 + }, + { + "epoch": 0.03078817733990148, + "grad_norm": 20.141759958099808, + "learning_rate": 9.836065573770493e-07, + "loss": 3.372765064239502, + "step": 25 + }, + { + "epoch": 0.03201970443349754, + "grad_norm": 23.420906015149534, + "learning_rate": 1.0245901639344263e-06, + "loss": 3.4989559650421143, + "step": 26 + }, + { + "epoch": 0.0332512315270936, + "grad_norm": 33.133583346249836, + "learning_rate": 1.0655737704918034e-06, + "loss": 3.6318516731262207, + "step": 27 + }, + { + "epoch": 0.034482758620689655, + "grad_norm": 18.99907077955952, + "learning_rate": 1.1065573770491804e-06, + "loss": 3.351621627807617, + "step": 28 + }, + { + "epoch": 0.03571428571428571, + "grad_norm": 18.353082575411992, + "learning_rate": 1.1475409836065575e-06, + "loss": 3.1978442668914795, + "step": 29 + }, + { + "epoch": 0.03694581280788178, + "grad_norm": 26.628518248775677, + "learning_rate": 1.1885245901639345e-06, + "loss": 4.033670902252197, + "step": 30 + }, + { + "epoch": 0.038177339901477834, + "grad_norm": 16.452853960671934, + "learning_rate": 1.2295081967213116e-06, + "loss": 3.626315116882324, + "step": 31 + }, + { + "epoch": 0.03940886699507389, + "grad_norm": 16.372280561150735, + "learning_rate": 1.2704918032786886e-06, + "loss": 3.385767936706543, + "step": 32 + }, + { + "epoch": 0.04064039408866995, + "grad_norm": 23.073122100098054, + "learning_rate": 1.3114754098360657e-06, + "loss": 3.946913719177246, + "step": 33 + }, + { + "epoch": 0.04187192118226601, + "grad_norm": 11.580002792760054, + "learning_rate": 1.352459016393443e-06, + "loss": 3.3034565448760986, + "step": 34 + }, + { + "epoch": 0.04310344827586207, + "grad_norm": 17.961230909917667, + "learning_rate": 1.3934426229508196e-06, + "loss": 3.2368359565734863, + "step": 35 + }, + { + "epoch": 0.04433497536945813, + "grad_norm": 11.543206406321579, + "learning_rate": 1.4344262295081968e-06, + "loss": 3.728569984436035, + "step": 36 + }, + { + "epoch": 0.04556650246305419, + "grad_norm": 14.762221765187595, + "learning_rate": 1.4754098360655739e-06, + "loss": 3.3756117820739746, + "step": 37 + }, + { + "epoch": 0.046798029556650245, + "grad_norm": 13.981113216433073, + "learning_rate": 1.516393442622951e-06, + "loss": 3.399596691131592, + "step": 38 + }, + { + "epoch": 0.0480295566502463, + "grad_norm": 24.184372796013783, + "learning_rate": 1.557377049180328e-06, + "loss": 4.209182262420654, + "step": 39 + }, + { + "epoch": 0.04926108374384237, + "grad_norm": 11.628888477605962, + "learning_rate": 1.5983606557377053e-06, + "loss": 2.797691822052002, + "step": 40 + }, + { + "epoch": 0.050492610837438424, + "grad_norm": 16.948512477650098, + "learning_rate": 1.6393442622950819e-06, + "loss": 3.630617141723633, + "step": 41 + }, + { + "epoch": 0.05172413793103448, + "grad_norm": 14.186312302659116, + "learning_rate": 1.6803278688524592e-06, + "loss": 3.182535171508789, + "step": 42 + }, + { + "epoch": 0.05295566502463054, + "grad_norm": 13.666441097834594, + "learning_rate": 1.7213114754098362e-06, + "loss": 3.554767370223999, + "step": 43 + }, + { + "epoch": 0.054187192118226604, + "grad_norm": 16.91458664100256, + "learning_rate": 1.7622950819672133e-06, + "loss": 3.675961494445801, + "step": 44 + }, + { + "epoch": 0.05541871921182266, + "grad_norm": 16.161861225550066, + "learning_rate": 1.8032786885245903e-06, + "loss": 3.346269369125366, + "step": 45 + }, + { + "epoch": 0.05665024630541872, + "grad_norm": 14.040742605132769, + "learning_rate": 1.8442622950819674e-06, + "loss": 3.4892683029174805, + "step": 46 + }, + { + "epoch": 0.05788177339901478, + "grad_norm": 14.981644166015332, + "learning_rate": 1.8852459016393442e-06, + "loss": 3.3602352142333984, + "step": 47 + }, + { + "epoch": 0.059113300492610835, + "grad_norm": 9.346123052417639, + "learning_rate": 1.9262295081967215e-06, + "loss": 3.301713228225708, + "step": 48 + }, + { + "epoch": 0.0603448275862069, + "grad_norm": 22.6894652203607, + "learning_rate": 1.9672131147540985e-06, + "loss": 3.7745046615600586, + "step": 49 + }, + { + "epoch": 0.06157635467980296, + "grad_norm": 8.465817304604528, + "learning_rate": 2.0081967213114756e-06, + "loss": 3.0452070236206055, + "step": 50 + }, + { + "epoch": 0.06280788177339902, + "grad_norm": 20.560185363485036, + "learning_rate": 2.0491803278688526e-06, + "loss": 3.7955079078674316, + "step": 51 + }, + { + "epoch": 0.06403940886699508, + "grad_norm": 8.75621229547506, + "learning_rate": 2.0901639344262297e-06, + "loss": 3.1644039154052734, + "step": 52 + }, + { + "epoch": 0.06527093596059114, + "grad_norm": 13.679443353464602, + "learning_rate": 2.1311475409836067e-06, + "loss": 3.2459874153137207, + "step": 53 + }, + { + "epoch": 0.0665024630541872, + "grad_norm": 12.278683741598382, + "learning_rate": 2.1721311475409838e-06, + "loss": 3.61742901802063, + "step": 54 + }, + { + "epoch": 0.06773399014778325, + "grad_norm": 12.717536959646948, + "learning_rate": 2.213114754098361e-06, + "loss": 3.3136467933654785, + "step": 55 + }, + { + "epoch": 0.06896551724137931, + "grad_norm": 15.543240982145285, + "learning_rate": 2.254098360655738e-06, + "loss": 3.272696018218994, + "step": 56 + }, + { + "epoch": 0.07019704433497537, + "grad_norm": 13.101250342680272, + "learning_rate": 2.295081967213115e-06, + "loss": 3.041365385055542, + "step": 57 + }, + { + "epoch": 0.07142857142857142, + "grad_norm": 11.7077150462335, + "learning_rate": 2.336065573770492e-06, + "loss": 3.309293746948242, + "step": 58 + }, + { + "epoch": 0.07266009852216748, + "grad_norm": 26.32874973946408, + "learning_rate": 2.377049180327869e-06, + "loss": 3.4676990509033203, + "step": 59 + }, + { + "epoch": 0.07389162561576355, + "grad_norm": 16.588748060840203, + "learning_rate": 2.418032786885246e-06, + "loss": 2.8236446380615234, + "step": 60 + }, + { + "epoch": 0.07512315270935961, + "grad_norm": 8.18040938852151, + "learning_rate": 2.459016393442623e-06, + "loss": 2.716705083847046, + "step": 61 + }, + { + "epoch": 0.07635467980295567, + "grad_norm": 20.07190390154421, + "learning_rate": 2.5e-06, + "loss": 2.5590922832489014, + "step": 62 + }, + { + "epoch": 0.07758620689655173, + "grad_norm": 11.418876796774995, + "learning_rate": 2.5409836065573773e-06, + "loss": 2.6987993717193604, + "step": 63 + }, + { + "epoch": 0.07881773399014778, + "grad_norm": 13.315536498724418, + "learning_rate": 2.5819672131147543e-06, + "loss": 4.340274810791016, + "step": 64 + }, + { + "epoch": 0.08004926108374384, + "grad_norm": 17.075484530853824, + "learning_rate": 2.6229508196721314e-06, + "loss": 4.166017532348633, + "step": 65 + }, + { + "epoch": 0.0812807881773399, + "grad_norm": 9.586520693266204, + "learning_rate": 2.6639344262295084e-06, + "loss": 2.664743185043335, + "step": 66 + }, + { + "epoch": 0.08251231527093596, + "grad_norm": 11.154276667212649, + "learning_rate": 2.704918032786886e-06, + "loss": 3.4285409450531006, + "step": 67 + }, + { + "epoch": 0.08374384236453201, + "grad_norm": 23.203683210215114, + "learning_rate": 2.745901639344263e-06, + "loss": 2.613044023513794, + "step": 68 + }, + { + "epoch": 0.08497536945812807, + "grad_norm": 13.748249566024421, + "learning_rate": 2.786885245901639e-06, + "loss": 3.1923232078552246, + "step": 69 + }, + { + "epoch": 0.08620689655172414, + "grad_norm": 23.6456335605133, + "learning_rate": 2.8278688524590166e-06, + "loss": 3.881509780883789, + "step": 70 + }, + { + "epoch": 0.0874384236453202, + "grad_norm": 12.242314523228817, + "learning_rate": 2.8688524590163937e-06, + "loss": 3.3872318267822266, + "step": 71 + }, + { + "epoch": 0.08866995073891626, + "grad_norm": 10.174962303917177, + "learning_rate": 2.9098360655737707e-06, + "loss": 3.1114461421966553, + "step": 72 + }, + { + "epoch": 0.08990147783251232, + "grad_norm": 9.979115596445391, + "learning_rate": 2.9508196721311478e-06, + "loss": 3.182547092437744, + "step": 73 + }, + { + "epoch": 0.09113300492610837, + "grad_norm": 10.437140873327547, + "learning_rate": 2.991803278688525e-06, + "loss": 3.488222599029541, + "step": 74 + }, + { + "epoch": 0.09236453201970443, + "grad_norm": 9.422729886318432, + "learning_rate": 3.032786885245902e-06, + "loss": 3.0836119651794434, + "step": 75 + }, + { + "epoch": 0.09359605911330049, + "grad_norm": 9.576987414129725, + "learning_rate": 3.073770491803279e-06, + "loss": 2.965284824371338, + "step": 76 + }, + { + "epoch": 0.09482758620689655, + "grad_norm": 9.051063368959207, + "learning_rate": 3.114754098360656e-06, + "loss": 3.0366950035095215, + "step": 77 + }, + { + "epoch": 0.0960591133004926, + "grad_norm": 19.769081445901076, + "learning_rate": 3.155737704918033e-06, + "loss": 3.7336153984069824, + "step": 78 + }, + { + "epoch": 0.09729064039408868, + "grad_norm": 17.150697728192082, + "learning_rate": 3.1967213114754105e-06, + "loss": 3.3801069259643555, + "step": 79 + }, + { + "epoch": 0.09852216748768473, + "grad_norm": 11.029522805215215, + "learning_rate": 3.2377049180327876e-06, + "loss": 3.1140761375427246, + "step": 80 + }, + { + "epoch": 0.09975369458128079, + "grad_norm": 9.099280236883942, + "learning_rate": 3.2786885245901638e-06, + "loss": 3.1199679374694824, + "step": 81 + }, + { + "epoch": 0.10098522167487685, + "grad_norm": 10.894555994753386, + "learning_rate": 3.3196721311475413e-06, + "loss": 2.919370651245117, + "step": 82 + }, + { + "epoch": 0.1022167487684729, + "grad_norm": 10.246835888516838, + "learning_rate": 3.3606557377049183e-06, + "loss": 3.0058987140655518, + "step": 83 + }, + { + "epoch": 0.10344827586206896, + "grad_norm": 8.315907792605513, + "learning_rate": 3.4016393442622954e-06, + "loss": 3.201812744140625, + "step": 84 + }, + { + "epoch": 0.10467980295566502, + "grad_norm": 10.55746200109404, + "learning_rate": 3.4426229508196724e-06, + "loss": 2.8387913703918457, + "step": 85 + }, + { + "epoch": 0.10591133004926108, + "grad_norm": 23.69077930997652, + "learning_rate": 3.4836065573770495e-06, + "loss": 3.565217971801758, + "step": 86 + }, + { + "epoch": 0.10714285714285714, + "grad_norm": 17.752023971892026, + "learning_rate": 3.5245901639344265e-06, + "loss": 3.563566207885742, + "step": 87 + }, + { + "epoch": 0.10837438423645321, + "grad_norm": 7.328374103560201, + "learning_rate": 3.5655737704918036e-06, + "loss": 3.3282840251922607, + "step": 88 + }, + { + "epoch": 0.10960591133004927, + "grad_norm": 9.307632619059875, + "learning_rate": 3.6065573770491806e-06, + "loss": 2.693999767303467, + "step": 89 + }, + { + "epoch": 0.11083743842364532, + "grad_norm": 9.537047052971076, + "learning_rate": 3.6475409836065577e-06, + "loss": 3.0820372104644775, + "step": 90 + }, + { + "epoch": 0.11206896551724138, + "grad_norm": 11.895652602739977, + "learning_rate": 3.6885245901639347e-06, + "loss": 2.5853302478790283, + "step": 91 + }, + { + "epoch": 0.11330049261083744, + "grad_norm": 19.909007675751152, + "learning_rate": 3.729508196721312e-06, + "loss": 3.622239589691162, + "step": 92 + }, + { + "epoch": 0.1145320197044335, + "grad_norm": 9.562243449141407, + "learning_rate": 3.7704918032786884e-06, + "loss": 3.269063949584961, + "step": 93 + }, + { + "epoch": 0.11576354679802955, + "grad_norm": 10.402493100303827, + "learning_rate": 3.811475409836066e-06, + "loss": 2.932877540588379, + "step": 94 + }, + { + "epoch": 0.11699507389162561, + "grad_norm": 7.9937288583052, + "learning_rate": 3.852459016393443e-06, + "loss": 2.8118062019348145, + "step": 95 + }, + { + "epoch": 0.11822660098522167, + "grad_norm": 12.161021036700474, + "learning_rate": 3.8934426229508196e-06, + "loss": 2.977217674255371, + "step": 96 + }, + { + "epoch": 0.11945812807881774, + "grad_norm": 9.48055025878799, + "learning_rate": 3.934426229508197e-06, + "loss": 2.534318685531616, + "step": 97 + }, + { + "epoch": 0.1206896551724138, + "grad_norm": 8.971246829575332, + "learning_rate": 3.975409836065574e-06, + "loss": 2.888187885284424, + "step": 98 + }, + { + "epoch": 0.12192118226600986, + "grad_norm": 9.005963079459367, + "learning_rate": 4.016393442622951e-06, + "loss": 2.6558847427368164, + "step": 99 + }, + { + "epoch": 0.12315270935960591, + "grad_norm": 9.651575487247985, + "learning_rate": 4.057377049180329e-06, + "loss": 2.707779884338379, + "step": 100 + }, + { + "epoch": 0.12438423645320197, + "grad_norm": 8.8113086796363, + "learning_rate": 4.098360655737705e-06, + "loss": 3.2292768955230713, + "step": 101 + }, + { + "epoch": 0.12561576354679804, + "grad_norm": 13.438004585842267, + "learning_rate": 4.139344262295083e-06, + "loss": 2.9476242065429688, + "step": 102 + }, + { + "epoch": 0.1268472906403941, + "grad_norm": 9.014089316100105, + "learning_rate": 4.180327868852459e-06, + "loss": 2.9598989486694336, + "step": 103 + }, + { + "epoch": 0.12807881773399016, + "grad_norm": 8.84790292690003, + "learning_rate": 4.221311475409837e-06, + "loss": 2.593669891357422, + "step": 104 + }, + { + "epoch": 0.12931034482758622, + "grad_norm": 9.732549020932908, + "learning_rate": 4.2622950819672135e-06, + "loss": 2.884164810180664, + "step": 105 + }, + { + "epoch": 0.13054187192118227, + "grad_norm": 16.843882776588455, + "learning_rate": 4.30327868852459e-06, + "loss": 3.091454267501831, + "step": 106 + }, + { + "epoch": 0.13177339901477833, + "grad_norm": 11.588593389024608, + "learning_rate": 4.3442622950819676e-06, + "loss": 2.913923740386963, + "step": 107 + }, + { + "epoch": 0.1330049261083744, + "grad_norm": 18.29569166468431, + "learning_rate": 4.385245901639344e-06, + "loss": 2.779545307159424, + "step": 108 + }, + { + "epoch": 0.13423645320197045, + "grad_norm": 9.202902461418143, + "learning_rate": 4.426229508196722e-06, + "loss": 1.8711936473846436, + "step": 109 + }, + { + "epoch": 0.1354679802955665, + "grad_norm": 13.481452134492262, + "learning_rate": 4.467213114754098e-06, + "loss": 2.892902374267578, + "step": 110 + }, + { + "epoch": 0.13669950738916256, + "grad_norm": 12.958399723073786, + "learning_rate": 4.508196721311476e-06, + "loss": 3.0064496994018555, + "step": 111 + }, + { + "epoch": 0.13793103448275862, + "grad_norm": 13.016721832572243, + "learning_rate": 4.549180327868853e-06, + "loss": 2.8515172004699707, + "step": 112 + }, + { + "epoch": 0.13916256157635468, + "grad_norm": 8.374489861175874, + "learning_rate": 4.59016393442623e-06, + "loss": 3.2504403591156006, + "step": 113 + }, + { + "epoch": 0.14039408866995073, + "grad_norm": 7.893218569270328, + "learning_rate": 4.631147540983607e-06, + "loss": 2.67405366897583, + "step": 114 + }, + { + "epoch": 0.1416256157635468, + "grad_norm": 10.146133271952388, + "learning_rate": 4.672131147540984e-06, + "loss": 3.079516887664795, + "step": 115 + }, + { + "epoch": 0.14285714285714285, + "grad_norm": 19.354096600007853, + "learning_rate": 4.7131147540983615e-06, + "loss": 2.8897287845611572, + "step": 116 + }, + { + "epoch": 0.1440886699507389, + "grad_norm": 13.276953948761626, + "learning_rate": 4.754098360655738e-06, + "loss": 2.7275729179382324, + "step": 117 + }, + { + "epoch": 0.14532019704433496, + "grad_norm": 9.682874064462416, + "learning_rate": 4.795081967213115e-06, + "loss": 2.9996538162231445, + "step": 118 + }, + { + "epoch": 0.14655172413793102, + "grad_norm": 7.397102570298892, + "learning_rate": 4.836065573770492e-06, + "loss": 3.307245969772339, + "step": 119 + }, + { + "epoch": 0.1477832512315271, + "grad_norm": 12.665703486872426, + "learning_rate": 4.877049180327869e-06, + "loss": 3.475133180618286, + "step": 120 + }, + { + "epoch": 0.14901477832512317, + "grad_norm": 11.317195785901513, + "learning_rate": 4.918032786885246e-06, + "loss": 3.0947790145874023, + "step": 121 + }, + { + "epoch": 0.15024630541871922, + "grad_norm": 7.236267930218516, + "learning_rate": 4.959016393442623e-06, + "loss": 2.9675135612487793, + "step": 122 + }, + { + "epoch": 0.15147783251231528, + "grad_norm": 8.759893869589918, + "learning_rate": 5e-06, + "loss": 2.7873148918151855, + "step": 123 + }, + { + "epoch": 0.15270935960591134, + "grad_norm": 10.395692764487977, + "learning_rate": 5.040983606557377e-06, + "loss": 3.10044264793396, + "step": 124 + }, + { + "epoch": 0.1539408866995074, + "grad_norm": 10.40007835832301, + "learning_rate": 5.0819672131147545e-06, + "loss": 3.755798101425171, + "step": 125 + }, + { + "epoch": 0.15517241379310345, + "grad_norm": 13.715148535872732, + "learning_rate": 5.122950819672131e-06, + "loss": 3.0117135047912598, + "step": 126 + }, + { + "epoch": 0.1564039408866995, + "grad_norm": 12.668410235183005, + "learning_rate": 5.163934426229509e-06, + "loss": 2.944417953491211, + "step": 127 + }, + { + "epoch": 0.15763546798029557, + "grad_norm": 14.317219715469237, + "learning_rate": 5.204918032786885e-06, + "loss": 2.672874927520752, + "step": 128 + }, + { + "epoch": 0.15886699507389163, + "grad_norm": 16.489459603874575, + "learning_rate": 5.245901639344263e-06, + "loss": 2.7205734252929688, + "step": 129 + }, + { + "epoch": 0.16009852216748768, + "grad_norm": 16.41932178225047, + "learning_rate": 5.286885245901639e-06, + "loss": 2.883897304534912, + "step": 130 + }, + { + "epoch": 0.16133004926108374, + "grad_norm": 15.043569897203326, + "learning_rate": 5.327868852459017e-06, + "loss": 2.782104253768921, + "step": 131 + }, + { + "epoch": 0.1625615763546798, + "grad_norm": 8.98371180872493, + "learning_rate": 5.3688524590163935e-06, + "loss": 2.6445870399475098, + "step": 132 + }, + { + "epoch": 0.16379310344827586, + "grad_norm": 11.815392040561601, + "learning_rate": 5.409836065573772e-06, + "loss": 2.9319727420806885, + "step": 133 + }, + { + "epoch": 0.16502463054187191, + "grad_norm": 10.152797634103624, + "learning_rate": 5.4508196721311476e-06, + "loss": 3.169668674468994, + "step": 134 + }, + { + "epoch": 0.16625615763546797, + "grad_norm": 14.778160076043047, + "learning_rate": 5.491803278688526e-06, + "loss": 2.8588128089904785, + "step": 135 + }, + { + "epoch": 0.16748768472906403, + "grad_norm": 10.175583728158522, + "learning_rate": 5.5327868852459025e-06, + "loss": 2.9894580841064453, + "step": 136 + }, + { + "epoch": 0.1687192118226601, + "grad_norm": 9.056737222762985, + "learning_rate": 5.573770491803278e-06, + "loss": 2.5721185207366943, + "step": 137 + }, + { + "epoch": 0.16995073891625614, + "grad_norm": 13.273464461148466, + "learning_rate": 5.614754098360657e-06, + "loss": 2.927572727203369, + "step": 138 + }, + { + "epoch": 0.17118226600985223, + "grad_norm": 6.55893818610158, + "learning_rate": 5.655737704918033e-06, + "loss": 2.1956796646118164, + "step": 139 + }, + { + "epoch": 0.1724137931034483, + "grad_norm": 29.225445444647217, + "learning_rate": 5.696721311475411e-06, + "loss": 2.9739363193511963, + "step": 140 + }, + { + "epoch": 0.17364532019704434, + "grad_norm": 11.15274917433196, + "learning_rate": 5.737704918032787e-06, + "loss": 2.9413986206054688, + "step": 141 + }, + { + "epoch": 0.1748768472906404, + "grad_norm": 10.26279112360335, + "learning_rate": 5.778688524590165e-06, + "loss": 3.267493724822998, + "step": 142 + }, + { + "epoch": 0.17610837438423646, + "grad_norm": 10.574770426769376, + "learning_rate": 5.8196721311475415e-06, + "loss": 3.355569362640381, + "step": 143 + }, + { + "epoch": 0.17733990147783252, + "grad_norm": 30.57215689151005, + "learning_rate": 5.860655737704919e-06, + "loss": 1.9742871522903442, + "step": 144 + }, + { + "epoch": 0.17857142857142858, + "grad_norm": 12.842491765573998, + "learning_rate": 5.9016393442622956e-06, + "loss": 3.571032762527466, + "step": 145 + }, + { + "epoch": 0.17980295566502463, + "grad_norm": 12.726974439363154, + "learning_rate": 5.942622950819673e-06, + "loss": 3.3115599155426025, + "step": 146 + }, + { + "epoch": 0.1810344827586207, + "grad_norm": 17.55458268041124, + "learning_rate": 5.98360655737705e-06, + "loss": 2.781893730163574, + "step": 147 + }, + { + "epoch": 0.18226600985221675, + "grad_norm": 21.115989900825127, + "learning_rate": 6.024590163934426e-06, + "loss": 3.5053911209106445, + "step": 148 + }, + { + "epoch": 0.1834975369458128, + "grad_norm": 14.601719954400593, + "learning_rate": 6.065573770491804e-06, + "loss": 2.797297477722168, + "step": 149 + }, + { + "epoch": 0.18472906403940886, + "grad_norm": 11.706500964440364, + "learning_rate": 6.10655737704918e-06, + "loss": 2.995811939239502, + "step": 150 + }, + { + "epoch": 0.18596059113300492, + "grad_norm": 15.414506649569596, + "learning_rate": 6.147540983606558e-06, + "loss": 3.028142213821411, + "step": 151 + }, + { + "epoch": 0.18719211822660098, + "grad_norm": 16.893206406115734, + "learning_rate": 6.1885245901639345e-06, + "loss": 3.092806816101074, + "step": 152 + }, + { + "epoch": 0.18842364532019704, + "grad_norm": 15.790657692703299, + "learning_rate": 6.229508196721312e-06, + "loss": 3.4657726287841797, + "step": 153 + }, + { + "epoch": 0.1896551724137931, + "grad_norm": 14.336314687505745, + "learning_rate": 6.270491803278689e-06, + "loss": 2.888990879058838, + "step": 154 + }, + { + "epoch": 0.19088669950738915, + "grad_norm": 8.384597105554349, + "learning_rate": 6.311475409836066e-06, + "loss": 2.21640682220459, + "step": 155 + }, + { + "epoch": 0.1921182266009852, + "grad_norm": 15.11144998304732, + "learning_rate": 6.352459016393443e-06, + "loss": 3.1153030395507812, + "step": 156 + }, + { + "epoch": 0.1933497536945813, + "grad_norm": 10.552333909396582, + "learning_rate": 6.393442622950821e-06, + "loss": 3.5814146995544434, + "step": 157 + }, + { + "epoch": 0.19458128078817735, + "grad_norm": 16.968338748229492, + "learning_rate": 6.434426229508197e-06, + "loss": 3.3865175247192383, + "step": 158 + }, + { + "epoch": 0.1958128078817734, + "grad_norm": 18.57431273466726, + "learning_rate": 6.475409836065575e-06, + "loss": 3.2125191688537598, + "step": 159 + }, + { + "epoch": 0.19704433497536947, + "grad_norm": 6.884951933192958, + "learning_rate": 6.516393442622952e-06, + "loss": 3.137500286102295, + "step": 160 + }, + { + "epoch": 0.19827586206896552, + "grad_norm": 14.232532156130397, + "learning_rate": 6.5573770491803276e-06, + "loss": 2.63275408744812, + "step": 161 + }, + { + "epoch": 0.19950738916256158, + "grad_norm": 8.457248873163048, + "learning_rate": 6.598360655737706e-06, + "loss": 3.1714844703674316, + "step": 162 + }, + { + "epoch": 0.20073891625615764, + "grad_norm": 8.202663921028103, + "learning_rate": 6.6393442622950825e-06, + "loss": 2.2414371967315674, + "step": 163 + }, + { + "epoch": 0.2019704433497537, + "grad_norm": 21.716160496341246, + "learning_rate": 6.68032786885246e-06, + "loss": 2.4281110763549805, + "step": 164 + }, + { + "epoch": 0.20320197044334976, + "grad_norm": 14.06837422573523, + "learning_rate": 6.721311475409837e-06, + "loss": 2.6953632831573486, + "step": 165 + }, + { + "epoch": 0.2044334975369458, + "grad_norm": 12.440616463990054, + "learning_rate": 6.762295081967214e-06, + "loss": 2.7645516395568848, + "step": 166 + }, + { + "epoch": 0.20566502463054187, + "grad_norm": 9.155924284482328, + "learning_rate": 6.803278688524591e-06, + "loss": 2.676801919937134, + "step": 167 + }, + { + "epoch": 0.20689655172413793, + "grad_norm": 18.399209140322007, + "learning_rate": 6.844262295081968e-06, + "loss": 3.2417163848876953, + "step": 168 + }, + { + "epoch": 0.20812807881773399, + "grad_norm": 10.633235724872472, + "learning_rate": 6.885245901639345e-06, + "loss": 3.1967976093292236, + "step": 169 + }, + { + "epoch": 0.20935960591133004, + "grad_norm": 9.001521768789516, + "learning_rate": 6.926229508196722e-06, + "loss": 3.4212145805358887, + "step": 170 + }, + { + "epoch": 0.2105911330049261, + "grad_norm": 19.131341549460146, + "learning_rate": 6.967213114754099e-06, + "loss": 3.0731911659240723, + "step": 171 + }, + { + "epoch": 0.21182266009852216, + "grad_norm": 24.78027708091891, + "learning_rate": 7.0081967213114756e-06, + "loss": 3.8659727573394775, + "step": 172 + }, + { + "epoch": 0.21305418719211822, + "grad_norm": 7.256951095872975, + "learning_rate": 7.049180327868853e-06, + "loss": 3.036478042602539, + "step": 173 + }, + { + "epoch": 0.21428571428571427, + "grad_norm": 13.753177425595323, + "learning_rate": 7.09016393442623e-06, + "loss": 2.489211082458496, + "step": 174 + }, + { + "epoch": 0.21551724137931033, + "grad_norm": 15.568690129763258, + "learning_rate": 7.131147540983607e-06, + "loss": 3.8306775093078613, + "step": 175 + }, + { + "epoch": 0.21674876847290642, + "grad_norm": 14.053955715138319, + "learning_rate": 7.172131147540984e-06, + "loss": 3.0287742614746094, + "step": 176 + }, + { + "epoch": 0.21798029556650247, + "grad_norm": 7.402046078874498, + "learning_rate": 7.213114754098361e-06, + "loss": 2.767753839492798, + "step": 177 + }, + { + "epoch": 0.21921182266009853, + "grad_norm": 7.607064770644376, + "learning_rate": 7.254098360655738e-06, + "loss": 2.8400726318359375, + "step": 178 + }, + { + "epoch": 0.2204433497536946, + "grad_norm": 9.218463959135196, + "learning_rate": 7.295081967213115e-06, + "loss": 2.9013113975524902, + "step": 179 + }, + { + "epoch": 0.22167487684729065, + "grad_norm": 14.207394035741054, + "learning_rate": 7.336065573770492e-06, + "loss": 3.1111714839935303, + "step": 180 + }, + { + "epoch": 0.2229064039408867, + "grad_norm": 22.91981906121516, + "learning_rate": 7.3770491803278695e-06, + "loss": 2.968287229537964, + "step": 181 + }, + { + "epoch": 0.22413793103448276, + "grad_norm": 25.20920899192849, + "learning_rate": 7.418032786885246e-06, + "loss": 3.2560596466064453, + "step": 182 + }, + { + "epoch": 0.22536945812807882, + "grad_norm": 11.263908332317076, + "learning_rate": 7.459016393442624e-06, + "loss": 2.6196365356445312, + "step": 183 + }, + { + "epoch": 0.22660098522167488, + "grad_norm": 9.253114778490854, + "learning_rate": 7.500000000000001e-06, + "loss": 2.48789644241333, + "step": 184 + }, + { + "epoch": 0.22783251231527094, + "grad_norm": 10.894130133931592, + "learning_rate": 7.540983606557377e-06, + "loss": 3.492011308670044, + "step": 185 + }, + { + "epoch": 0.229064039408867, + "grad_norm": 10.265317756792616, + "learning_rate": 7.581967213114755e-06, + "loss": 2.643688917160034, + "step": 186 + }, + { + "epoch": 0.23029556650246305, + "grad_norm": 18.91537781193984, + "learning_rate": 7.622950819672132e-06, + "loss": 3.291731834411621, + "step": 187 + }, + { + "epoch": 0.2315270935960591, + "grad_norm": 8.094549723224802, + "learning_rate": 7.66393442622951e-06, + "loss": 2.9554359912872314, + "step": 188 + }, + { + "epoch": 0.23275862068965517, + "grad_norm": 8.032083532292669, + "learning_rate": 7.704918032786886e-06, + "loss": 2.634860038757324, + "step": 189 + }, + { + "epoch": 0.23399014778325122, + "grad_norm": 12.421064936443088, + "learning_rate": 7.745901639344263e-06, + "loss": 3.505284309387207, + "step": 190 + }, + { + "epoch": 0.23522167487684728, + "grad_norm": 9.73160074977933, + "learning_rate": 7.786885245901639e-06, + "loss": 2.8865461349487305, + "step": 191 + }, + { + "epoch": 0.23645320197044334, + "grad_norm": 9.154882618515046, + "learning_rate": 7.827868852459017e-06, + "loss": 2.804072618484497, + "step": 192 + }, + { + "epoch": 0.2376847290640394, + "grad_norm": 19.13061642741136, + "learning_rate": 7.868852459016394e-06, + "loss": 2.830981969833374, + "step": 193 + }, + { + "epoch": 0.23891625615763548, + "grad_norm": 15.563283146640595, + "learning_rate": 7.909836065573772e-06, + "loss": 2.2295336723327637, + "step": 194 + }, + { + "epoch": 0.24014778325123154, + "grad_norm": 12.225259694302743, + "learning_rate": 7.950819672131147e-06, + "loss": 2.338548183441162, + "step": 195 + }, + { + "epoch": 0.2413793103448276, + "grad_norm": 9.892040827483035, + "learning_rate": 7.991803278688526e-06, + "loss": 3.0856008529663086, + "step": 196 + }, + { + "epoch": 0.24261083743842365, + "grad_norm": 7.694617498251832, + "learning_rate": 8.032786885245902e-06, + "loss": 2.8032941818237305, + "step": 197 + }, + { + "epoch": 0.2438423645320197, + "grad_norm": 14.517107480578428, + "learning_rate": 8.073770491803279e-06, + "loss": 2.793623924255371, + "step": 198 + }, + { + "epoch": 0.24507389162561577, + "grad_norm": 14.257539519236145, + "learning_rate": 8.114754098360657e-06, + "loss": 3.316802740097046, + "step": 199 + }, + { + "epoch": 0.24630541871921183, + "grad_norm": 9.345732169704513, + "learning_rate": 8.155737704918034e-06, + "loss": 2.7230677604675293, + "step": 200 + }, + { + "epoch": 0.24753694581280788, + "grad_norm": 15.629904452590212, + "learning_rate": 8.19672131147541e-06, + "loss": 3.3343541622161865, + "step": 201 + }, + { + "epoch": 0.24876847290640394, + "grad_norm": 15.523761255621764, + "learning_rate": 8.237704918032787e-06, + "loss": 2.6796741485595703, + "step": 202 + }, + { + "epoch": 0.25, + "grad_norm": 19.56220339462512, + "learning_rate": 8.278688524590165e-06, + "loss": 3.5974526405334473, + "step": 203 + }, + { + "epoch": 0.2512315270935961, + "grad_norm": 13.897070581153926, + "learning_rate": 8.319672131147542e-06, + "loss": 2.2697930335998535, + "step": 204 + }, + { + "epoch": 0.2524630541871921, + "grad_norm": 58.73834156491825, + "learning_rate": 8.360655737704919e-06, + "loss": 3.692251682281494, + "step": 205 + }, + { + "epoch": 0.2536945812807882, + "grad_norm": 7.38409958845656, + "learning_rate": 8.401639344262295e-06, + "loss": 1.9303261041641235, + "step": 206 + }, + { + "epoch": 0.25492610837438423, + "grad_norm": 9.965151267955871, + "learning_rate": 8.442622950819674e-06, + "loss": 2.538956880569458, + "step": 207 + }, + { + "epoch": 0.2561576354679803, + "grad_norm": 9.12744959101674, + "learning_rate": 8.48360655737705e-06, + "loss": 2.777608633041382, + "step": 208 + }, + { + "epoch": 0.25738916256157635, + "grad_norm": 7.651759491423955, + "learning_rate": 8.524590163934427e-06, + "loss": 2.5776896476745605, + "step": 209 + }, + { + "epoch": 0.25862068965517243, + "grad_norm": 7.384463920815584, + "learning_rate": 8.565573770491804e-06, + "loss": 2.9199795722961426, + "step": 210 + }, + { + "epoch": 0.25985221674876846, + "grad_norm": 20.103355409171535, + "learning_rate": 8.60655737704918e-06, + "loss": 3.515129566192627, + "step": 211 + }, + { + "epoch": 0.26108374384236455, + "grad_norm": 11.426838299111452, + "learning_rate": 8.647540983606559e-06, + "loss": 2.5549678802490234, + "step": 212 + }, + { + "epoch": 0.2623152709359606, + "grad_norm": 9.257633699344172, + "learning_rate": 8.688524590163935e-06, + "loss": 2.769425630569458, + "step": 213 + }, + { + "epoch": 0.26354679802955666, + "grad_norm": 10.532098802898833, + "learning_rate": 8.729508196721312e-06, + "loss": 3.369231700897217, + "step": 214 + }, + { + "epoch": 0.2647783251231527, + "grad_norm": 9.351621764685488, + "learning_rate": 8.770491803278688e-06, + "loss": 2.942309856414795, + "step": 215 + }, + { + "epoch": 0.2660098522167488, + "grad_norm": 13.925057065300786, + "learning_rate": 8.811475409836067e-06, + "loss": 2.7516608238220215, + "step": 216 + }, + { + "epoch": 0.2672413793103448, + "grad_norm": 36.50661601809998, + "learning_rate": 8.852459016393443e-06, + "loss": 2.8445613384246826, + "step": 217 + }, + { + "epoch": 0.2684729064039409, + "grad_norm": 22.25960453914331, + "learning_rate": 8.893442622950822e-06, + "loss": 2.987518787384033, + "step": 218 + }, + { + "epoch": 0.2697044334975369, + "grad_norm": 16.564591915051718, + "learning_rate": 8.934426229508197e-06, + "loss": 3.2499587535858154, + "step": 219 + }, + { + "epoch": 0.270935960591133, + "grad_norm": 17.28227853231096, + "learning_rate": 8.975409836065575e-06, + "loss": 2.926447868347168, + "step": 220 + }, + { + "epoch": 0.27216748768472904, + "grad_norm": 11.211927116407436, + "learning_rate": 9.016393442622952e-06, + "loss": 2.8910017013549805, + "step": 221 + }, + { + "epoch": 0.2733990147783251, + "grad_norm": 8.72596083956733, + "learning_rate": 9.057377049180328e-06, + "loss": 3.0613536834716797, + "step": 222 + }, + { + "epoch": 0.2746305418719212, + "grad_norm": 9.803135692376356, + "learning_rate": 9.098360655737707e-06, + "loss": 2.829414129257202, + "step": 223 + }, + { + "epoch": 0.27586206896551724, + "grad_norm": 12.92734853493422, + "learning_rate": 9.139344262295083e-06, + "loss": 2.7085399627685547, + "step": 224 + }, + { + "epoch": 0.2770935960591133, + "grad_norm": 9.4118708856159, + "learning_rate": 9.18032786885246e-06, + "loss": 2.6637799739837646, + "step": 225 + }, + { + "epoch": 0.27832512315270935, + "grad_norm": 18.83957093140758, + "learning_rate": 9.221311475409836e-06, + "loss": 2.845503807067871, + "step": 226 + }, + { + "epoch": 0.27955665024630544, + "grad_norm": 13.475569415500434, + "learning_rate": 9.262295081967215e-06, + "loss": 2.954394817352295, + "step": 227 + }, + { + "epoch": 0.28078817733990147, + "grad_norm": 8.290170639522628, + "learning_rate": 9.303278688524591e-06, + "loss": 2.640540838241577, + "step": 228 + }, + { + "epoch": 0.28201970443349755, + "grad_norm": 11.224559700746246, + "learning_rate": 9.344262295081968e-06, + "loss": 2.806300163269043, + "step": 229 + }, + { + "epoch": 0.2832512315270936, + "grad_norm": 7.885675569548075, + "learning_rate": 9.385245901639345e-06, + "loss": 2.6030101776123047, + "step": 230 + }, + { + "epoch": 0.28448275862068967, + "grad_norm": 24.236973973758758, + "learning_rate": 9.426229508196723e-06, + "loss": 2.7991466522216797, + "step": 231 + }, + { + "epoch": 0.2857142857142857, + "grad_norm": 8.845347044883379, + "learning_rate": 9.4672131147541e-06, + "loss": 3.106261968612671, + "step": 232 + }, + { + "epoch": 0.2869458128078818, + "grad_norm": 51.821805980416265, + "learning_rate": 9.508196721311476e-06, + "loss": 3.2630815505981445, + "step": 233 + }, + { + "epoch": 0.2881773399014778, + "grad_norm": 16.78742746550897, + "learning_rate": 9.549180327868853e-06, + "loss": 3.1156482696533203, + "step": 234 + }, + { + "epoch": 0.2894088669950739, + "grad_norm": 13.713777073631656, + "learning_rate": 9.59016393442623e-06, + "loss": 3.1271071434020996, + "step": 235 + }, + { + "epoch": 0.29064039408866993, + "grad_norm": 13.698738323083157, + "learning_rate": 9.631147540983608e-06, + "loss": 2.536348342895508, + "step": 236 + }, + { + "epoch": 0.291871921182266, + "grad_norm": 15.926322663194057, + "learning_rate": 9.672131147540984e-06, + "loss": 2.8055825233459473, + "step": 237 + }, + { + "epoch": 0.29310344827586204, + "grad_norm": 10.519363729962654, + "learning_rate": 9.713114754098361e-06, + "loss": 2.9949395656585693, + "step": 238 + }, + { + "epoch": 0.29433497536945813, + "grad_norm": 12.579584872972768, + "learning_rate": 9.754098360655738e-06, + "loss": 2.125136137008667, + "step": 239 + }, + { + "epoch": 0.2955665024630542, + "grad_norm": 11.391036061101172, + "learning_rate": 9.795081967213116e-06, + "loss": 2.830984592437744, + "step": 240 + }, + { + "epoch": 0.29679802955665024, + "grad_norm": 14.46789942529014, + "learning_rate": 9.836065573770493e-06, + "loss": 3.2255706787109375, + "step": 241 + }, + { + "epoch": 0.29802955665024633, + "grad_norm": 8.899469108078774, + "learning_rate": 9.87704918032787e-06, + "loss": 2.686436653137207, + "step": 242 + }, + { + "epoch": 0.29926108374384236, + "grad_norm": 10.094433891654246, + "learning_rate": 9.918032786885246e-06, + "loss": 2.497978687286377, + "step": 243 + }, + { + "epoch": 0.30049261083743845, + "grad_norm": 8.691385167763809, + "learning_rate": 9.959016393442624e-06, + "loss": 3.308448076248169, + "step": 244 + }, + { + "epoch": 0.3017241379310345, + "grad_norm": 15.757524580227669, + "learning_rate": 1e-05, + "loss": 3.2378220558166504, + "step": 245 + }, + { + "epoch": 0.30295566502463056, + "grad_norm": 8.671108255060687, + "learning_rate": 9.999994864785605e-06, + "loss": 2.4129133224487305, + "step": 246 + }, + { + "epoch": 0.3041871921182266, + "grad_norm": 13.501190126023713, + "learning_rate": 9.99997945915297e-06, + "loss": 2.938180923461914, + "step": 247 + }, + { + "epoch": 0.3054187192118227, + "grad_norm": 11.217667256673044, + "learning_rate": 9.999953783133733e-06, + "loss": 2.5165305137634277, + "step": 248 + }, + { + "epoch": 0.3066502463054187, + "grad_norm": 7.520771962392289, + "learning_rate": 9.999917836780642e-06, + "loss": 3.425577163696289, + "step": 249 + }, + { + "epoch": 0.3078817733990148, + "grad_norm": 13.889092280188136, + "learning_rate": 9.999871620167532e-06, + "loss": 2.876093626022339, + "step": 250 + }, + { + "epoch": 0.3091133004926108, + "grad_norm": 7.799661481860974, + "learning_rate": 9.999815133389334e-06, + "loss": 2.9071428775787354, + "step": 251 + }, + { + "epoch": 0.3103448275862069, + "grad_norm": 18.185225557276123, + "learning_rate": 9.999748376562078e-06, + "loss": 2.998086452484131, + "step": 252 + }, + { + "epoch": 0.31157635467980294, + "grad_norm": 27.086825836566575, + "learning_rate": 9.999671349822887e-06, + "loss": 2.1193456649780273, + "step": 253 + }, + { + "epoch": 0.312807881773399, + "grad_norm": 13.320934166458603, + "learning_rate": 9.999584053329983e-06, + "loss": 2.753380298614502, + "step": 254 + }, + { + "epoch": 0.31403940886699505, + "grad_norm": 14.498031739385082, + "learning_rate": 9.999486487262677e-06, + "loss": 2.876704216003418, + "step": 255 + }, + { + "epoch": 0.31527093596059114, + "grad_norm": 13.532410059083729, + "learning_rate": 9.999378651821381e-06, + "loss": 3.0882208347320557, + "step": 256 + }, + { + "epoch": 0.31650246305418717, + "grad_norm": 13.700484400761207, + "learning_rate": 9.999260547227599e-06, + "loss": 3.155285120010376, + "step": 257 + }, + { + "epoch": 0.31773399014778325, + "grad_norm": 12.6000984521867, + "learning_rate": 9.999132173723923e-06, + "loss": 2.7646055221557617, + "step": 258 + }, + { + "epoch": 0.31896551724137934, + "grad_norm": 15.115470197004113, + "learning_rate": 9.998993531574048e-06, + "loss": 2.7237563133239746, + "step": 259 + }, + { + "epoch": 0.32019704433497537, + "grad_norm": 20.594748113733633, + "learning_rate": 9.998844621062755e-06, + "loss": 3.3845739364624023, + "step": 260 + }, + { + "epoch": 0.32142857142857145, + "grad_norm": 10.767576295669059, + "learning_rate": 9.998685442495921e-06, + "loss": 3.8065264225006104, + "step": 261 + }, + { + "epoch": 0.3226600985221675, + "grad_norm": 20.754860824013544, + "learning_rate": 9.998515996200508e-06, + "loss": 2.8899989128112793, + "step": 262 + }, + { + "epoch": 0.32389162561576357, + "grad_norm": 15.819137797930164, + "learning_rate": 9.998336282524579e-06, + "loss": 3.253079414367676, + "step": 263 + }, + { + "epoch": 0.3251231527093596, + "grad_norm": 18.790797790728803, + "learning_rate": 9.998146301837274e-06, + "loss": 3.346510648727417, + "step": 264 + }, + { + "epoch": 0.3263546798029557, + "grad_norm": 23.146345527241454, + "learning_rate": 9.997946054528837e-06, + "loss": 3.4698657989501953, + "step": 265 + }, + { + "epoch": 0.3275862068965517, + "grad_norm": 14.512612088330997, + "learning_rate": 9.99773554101059e-06, + "loss": 3.174567699432373, + "step": 266 + }, + { + "epoch": 0.3288177339901478, + "grad_norm": 12.860516080892424, + "learning_rate": 9.997514761714946e-06, + "loss": 2.5275719165802, + "step": 267 + }, + { + "epoch": 0.33004926108374383, + "grad_norm": 9.43003857415246, + "learning_rate": 9.997283717095403e-06, + "loss": 2.9102673530578613, + "step": 268 + }, + { + "epoch": 0.3312807881773399, + "grad_norm": 11.178249951549107, + "learning_rate": 9.99704240762655e-06, + "loss": 2.865558624267578, + "step": 269 + }, + { + "epoch": 0.33251231527093594, + "grad_norm": 24.802063921828417, + "learning_rate": 9.996790833804053e-06, + "loss": 2.749305248260498, + "step": 270 + }, + { + "epoch": 0.33374384236453203, + "grad_norm": 24.70724769915988, + "learning_rate": 9.996528996144668e-06, + "loss": 2.0590691566467285, + "step": 271 + }, + { + "epoch": 0.33497536945812806, + "grad_norm": 14.115920333851845, + "learning_rate": 9.996256895186234e-06, + "loss": 3.0421628952026367, + "step": 272 + }, + { + "epoch": 0.33620689655172414, + "grad_norm": 12.058059347872495, + "learning_rate": 9.995974531487668e-06, + "loss": 2.8302841186523438, + "step": 273 + }, + { + "epoch": 0.3374384236453202, + "grad_norm": 12.632643288786921, + "learning_rate": 9.995681905628968e-06, + "loss": 2.7192673683166504, + "step": 274 + }, + { + "epoch": 0.33866995073891626, + "grad_norm": 15.484122360072316, + "learning_rate": 9.995379018211215e-06, + "loss": 2.3330166339874268, + "step": 275 + }, + { + "epoch": 0.3399014778325123, + "grad_norm": 13.2967377526589, + "learning_rate": 9.995065869856566e-06, + "loss": 2.5359480381011963, + "step": 276 + }, + { + "epoch": 0.3411330049261084, + "grad_norm": 15.221286627267526, + "learning_rate": 9.994742461208251e-06, + "loss": 3.049252986907959, + "step": 277 + }, + { + "epoch": 0.34236453201970446, + "grad_norm": 15.24270242699156, + "learning_rate": 9.994408792930584e-06, + "loss": 3.3440940380096436, + "step": 278 + }, + { + "epoch": 0.3435960591133005, + "grad_norm": 14.053973379642196, + "learning_rate": 9.994064865708944e-06, + "loss": 3.038376808166504, + "step": 279 + }, + { + "epoch": 0.3448275862068966, + "grad_norm": 22.631635572415856, + "learning_rate": 9.993710680249788e-06, + "loss": 3.6074423789978027, + "step": 280 + }, + { + "epoch": 0.3460591133004926, + "grad_norm": 20.559687915989883, + "learning_rate": 9.993346237280646e-06, + "loss": 2.686741352081299, + "step": 281 + }, + { + "epoch": 0.3472906403940887, + "grad_norm": 12.521946549290966, + "learning_rate": 9.992971537550112e-06, + "loss": 2.4198198318481445, + "step": 282 + }, + { + "epoch": 0.3485221674876847, + "grad_norm": 6.138840145200369, + "learning_rate": 9.992586581827853e-06, + "loss": 2.8091788291931152, + "step": 283 + }, + { + "epoch": 0.3497536945812808, + "grad_norm": 9.177811201919399, + "learning_rate": 9.992191370904599e-06, + "loss": 3.0199592113494873, + "step": 284 + }, + { + "epoch": 0.35098522167487683, + "grad_norm": 11.072879739046153, + "learning_rate": 9.991785905592149e-06, + "loss": 2.6372945308685303, + "step": 285 + }, + { + "epoch": 0.3522167487684729, + "grad_norm": 12.835701532770578, + "learning_rate": 9.991370186723363e-06, + "loss": 2.9127607345581055, + "step": 286 + }, + { + "epoch": 0.35344827586206895, + "grad_norm": 16.621843867679726, + "learning_rate": 9.990944215152166e-06, + "loss": 2.464376926422119, + "step": 287 + }, + { + "epoch": 0.35467980295566504, + "grad_norm": 9.777456171349527, + "learning_rate": 9.990507991753535e-06, + "loss": 2.8306374549865723, + "step": 288 + }, + { + "epoch": 0.35591133004926107, + "grad_norm": 11.701262899932036, + "learning_rate": 9.990061517423513e-06, + "loss": 2.9181313514709473, + "step": 289 + }, + { + "epoch": 0.35714285714285715, + "grad_norm": 12.914380903938605, + "learning_rate": 9.989604793079198e-06, + "loss": 3.1937739849090576, + "step": 290 + }, + { + "epoch": 0.3583743842364532, + "grad_norm": 25.41280169964493, + "learning_rate": 9.989137819658738e-06, + "loss": 4.190927028656006, + "step": 291 + }, + { + "epoch": 0.35960591133004927, + "grad_norm": 12.268585179317036, + "learning_rate": 9.988660598121337e-06, + "loss": 2.8343558311462402, + "step": 292 + }, + { + "epoch": 0.3608374384236453, + "grad_norm": 14.508602864953724, + "learning_rate": 9.988173129447251e-06, + "loss": 3.741821050643921, + "step": 293 + }, + { + "epoch": 0.3620689655172414, + "grad_norm": 8.935077328629724, + "learning_rate": 9.98767541463778e-06, + "loss": 2.484419345855713, + "step": 294 + }, + { + "epoch": 0.3633004926108374, + "grad_norm": 8.195009351092525, + "learning_rate": 9.987167454715277e-06, + "loss": 2.671337127685547, + "step": 295 + }, + { + "epoch": 0.3645320197044335, + "grad_norm": 11.197259917333458, + "learning_rate": 9.986649250723129e-06, + "loss": 3.118803024291992, + "step": 296 + }, + { + "epoch": 0.3657635467980296, + "grad_norm": 15.270785643435941, + "learning_rate": 9.986120803725776e-06, + "loss": 3.10141658782959, + "step": 297 + }, + { + "epoch": 0.3669950738916256, + "grad_norm": 11.19651727126236, + "learning_rate": 9.985582114808693e-06, + "loss": 2.7978734970092773, + "step": 298 + }, + { + "epoch": 0.3682266009852217, + "grad_norm": 14.058148431334251, + "learning_rate": 9.985033185078392e-06, + "loss": 2.5770411491394043, + "step": 299 + }, + { + "epoch": 0.3694581280788177, + "grad_norm": 9.544840021071943, + "learning_rate": 9.984474015662421e-06, + "loss": 3.0273873805999756, + "step": 300 + }, + { + "epoch": 0.3706896551724138, + "grad_norm": 8.198220678999139, + "learning_rate": 9.983904607709365e-06, + "loss": 2.9202780723571777, + "step": 301 + }, + { + "epoch": 0.37192118226600984, + "grad_norm": 12.107800006970532, + "learning_rate": 9.983324962388835e-06, + "loss": 2.9816439151763916, + "step": 302 + }, + { + "epoch": 0.3731527093596059, + "grad_norm": 7.601271321831279, + "learning_rate": 9.982735080891471e-06, + "loss": 2.5605852603912354, + "step": 303 + }, + { + "epoch": 0.37438423645320196, + "grad_norm": 13.035543237033318, + "learning_rate": 9.982134964428942e-06, + "loss": 2.9378490447998047, + "step": 304 + }, + { + "epoch": 0.37561576354679804, + "grad_norm": 7.731680542963359, + "learning_rate": 9.981524614233938e-06, + "loss": 2.410521984100342, + "step": 305 + }, + { + "epoch": 0.3768472906403941, + "grad_norm": 13.52353943681927, + "learning_rate": 9.98090403156017e-06, + "loss": 2.381927013397217, + "step": 306 + }, + { + "epoch": 0.37807881773399016, + "grad_norm": 17.35628297309107, + "learning_rate": 9.98027321768237e-06, + "loss": 3.1156816482543945, + "step": 307 + }, + { + "epoch": 0.3793103448275862, + "grad_norm": 8.977028820084396, + "learning_rate": 9.97963217389628e-06, + "loss": 3.2660152912139893, + "step": 308 + }, + { + "epoch": 0.3805418719211823, + "grad_norm": 14.66965301106164, + "learning_rate": 9.978980901518663e-06, + "loss": 3.1832613945007324, + "step": 309 + }, + { + "epoch": 0.3817733990147783, + "grad_norm": 27.78972817701185, + "learning_rate": 9.978319401887287e-06, + "loss": 2.719600200653076, + "step": 310 + }, + { + "epoch": 0.3830049261083744, + "grad_norm": 10.666579101176065, + "learning_rate": 9.977647676360927e-06, + "loss": 2.652092456817627, + "step": 311 + }, + { + "epoch": 0.3842364532019704, + "grad_norm": 8.005520537074315, + "learning_rate": 9.976965726319369e-06, + "loss": 2.5932788848876953, + "step": 312 + }, + { + "epoch": 0.3854679802955665, + "grad_norm": 15.690472287679249, + "learning_rate": 9.976273553163393e-06, + "loss": 2.558863401412964, + "step": 313 + }, + { + "epoch": 0.3866995073891626, + "grad_norm": 11.958180437694066, + "learning_rate": 9.975571158314783e-06, + "loss": 3.1973023414611816, + "step": 314 + }, + { + "epoch": 0.3879310344827586, + "grad_norm": 12.749275597057334, + "learning_rate": 9.974858543216319e-06, + "loss": 3.286236524581909, + "step": 315 + }, + { + "epoch": 0.3891625615763547, + "grad_norm": 16.985399241319477, + "learning_rate": 9.974135709331774e-06, + "loss": 3.5159969329833984, + "step": 316 + }, + { + "epoch": 0.39039408866995073, + "grad_norm": 10.457440991240187, + "learning_rate": 9.973402658145908e-06, + "loss": 2.647761821746826, + "step": 317 + }, + { + "epoch": 0.3916256157635468, + "grad_norm": 9.450705495020088, + "learning_rate": 9.972659391164473e-06, + "loss": 2.8499808311462402, + "step": 318 + }, + { + "epoch": 0.39285714285714285, + "grad_norm": 10.546244474419336, + "learning_rate": 9.971905909914206e-06, + "loss": 2.332852840423584, + "step": 319 + }, + { + "epoch": 0.39408866995073893, + "grad_norm": 10.2366500934473, + "learning_rate": 9.971142215942817e-06, + "loss": 2.627098560333252, + "step": 320 + }, + { + "epoch": 0.39532019704433496, + "grad_norm": 6.472838949640434, + "learning_rate": 9.970368310819e-06, + "loss": 2.302323341369629, + "step": 321 + }, + { + "epoch": 0.39655172413793105, + "grad_norm": 6.421471401290025, + "learning_rate": 9.969584196132427e-06, + "loss": 2.6783509254455566, + "step": 322 + }, + { + "epoch": 0.3977832512315271, + "grad_norm": 12.353934861805914, + "learning_rate": 9.96878987349373e-06, + "loss": 2.9487061500549316, + "step": 323 + }, + { + "epoch": 0.39901477832512317, + "grad_norm": 13.993445702154649, + "learning_rate": 9.967985344534521e-06, + "loss": 2.5883233547210693, + "step": 324 + }, + { + "epoch": 0.4002463054187192, + "grad_norm": 20.380213804590188, + "learning_rate": 9.96717061090737e-06, + "loss": 3.125821590423584, + "step": 325 + }, + { + "epoch": 0.4014778325123153, + "grad_norm": 6.812077926758059, + "learning_rate": 9.966345674285808e-06, + "loss": 2.829881191253662, + "step": 326 + }, + { + "epoch": 0.4027093596059113, + "grad_norm": 16.808551579421827, + "learning_rate": 9.965510536364329e-06, + "loss": 2.5988128185272217, + "step": 327 + }, + { + "epoch": 0.4039408866995074, + "grad_norm": 7.777965739175337, + "learning_rate": 9.964665198858375e-06, + "loss": 2.158940315246582, + "step": 328 + }, + { + "epoch": 0.4051724137931034, + "grad_norm": 10.632017505369658, + "learning_rate": 9.96380966350434e-06, + "loss": 2.716994285583496, + "step": 329 + }, + { + "epoch": 0.4064039408866995, + "grad_norm": 12.778378390552197, + "learning_rate": 9.962943932059573e-06, + "loss": 3.1283516883850098, + "step": 330 + }, + { + "epoch": 0.40763546798029554, + "grad_norm": 12.686658918372668, + "learning_rate": 9.962068006302357e-06, + "loss": 3.0957908630371094, + "step": 331 + }, + { + "epoch": 0.4088669950738916, + "grad_norm": 24.890731349370103, + "learning_rate": 9.961181888031917e-06, + "loss": 2.3027350902557373, + "step": 332 + }, + { + "epoch": 0.4100985221674877, + "grad_norm": 10.45514873243925, + "learning_rate": 9.960285579068419e-06, + "loss": 2.956791877746582, + "step": 333 + }, + { + "epoch": 0.41133004926108374, + "grad_norm": 28.23036034704062, + "learning_rate": 9.959379081252958e-06, + "loss": 2.5689826011657715, + "step": 334 + }, + { + "epoch": 0.4125615763546798, + "grad_norm": 8.031700376672275, + "learning_rate": 9.958462396447556e-06, + "loss": 3.1086199283599854, + "step": 335 + }, + { + "epoch": 0.41379310344827586, + "grad_norm": 15.790958589129726, + "learning_rate": 9.957535526535165e-06, + "loss": 3.134901285171509, + "step": 336 + }, + { + "epoch": 0.41502463054187194, + "grad_norm": 12.433447054233632, + "learning_rate": 9.956598473419652e-06, + "loss": 2.642225742340088, + "step": 337 + }, + { + "epoch": 0.41625615763546797, + "grad_norm": 9.36121478561991, + "learning_rate": 9.95565123902581e-06, + "loss": 2.828200340270996, + "step": 338 + }, + { + "epoch": 0.41748768472906406, + "grad_norm": 14.194698913635616, + "learning_rate": 9.954693825299333e-06, + "loss": 2.751354217529297, + "step": 339 + }, + { + "epoch": 0.4187192118226601, + "grad_norm": 13.475276856352862, + "learning_rate": 9.953726234206835e-06, + "loss": 2.818434715270996, + "step": 340 + }, + { + "epoch": 0.41995073891625617, + "grad_norm": 14.017642174434487, + "learning_rate": 9.95274846773583e-06, + "loss": 2.8631365299224854, + "step": 341 + }, + { + "epoch": 0.4211822660098522, + "grad_norm": 37.92442284518435, + "learning_rate": 9.951760527894733e-06, + "loss": 2.387998580932617, + "step": 342 + }, + { + "epoch": 0.4224137931034483, + "grad_norm": 8.636388354492292, + "learning_rate": 9.950762416712862e-06, + "loss": 2.366614580154419, + "step": 343 + }, + { + "epoch": 0.4236453201970443, + "grad_norm": 10.06521281831273, + "learning_rate": 9.949754136240416e-06, + "loss": 2.4502060413360596, + "step": 344 + }, + { + "epoch": 0.4248768472906404, + "grad_norm": 12.481723752818217, + "learning_rate": 9.948735688548496e-06, + "loss": 2.47091007232666, + "step": 345 + }, + { + "epoch": 0.42610837438423643, + "grad_norm": 8.973793469902368, + "learning_rate": 9.947707075729076e-06, + "loss": 3.0400021076202393, + "step": 346 + }, + { + "epoch": 0.4273399014778325, + "grad_norm": 10.331950331735893, + "learning_rate": 9.946668299895017e-06, + "loss": 2.622288227081299, + "step": 347 + }, + { + "epoch": 0.42857142857142855, + "grad_norm": 22.195871941281137, + "learning_rate": 9.945619363180054e-06, + "loss": 3.3773419857025146, + "step": 348 + }, + { + "epoch": 0.42980295566502463, + "grad_norm": 19.575310687428036, + "learning_rate": 9.944560267738792e-06, + "loss": 3.279005527496338, + "step": 349 + }, + { + "epoch": 0.43103448275862066, + "grad_norm": 11.204766296525598, + "learning_rate": 9.943491015746704e-06, + "loss": 2.8206255435943604, + "step": 350 + }, + { + "epoch": 0.43226600985221675, + "grad_norm": 19.31443626404287, + "learning_rate": 9.942411609400127e-06, + "loss": 3.312700033187866, + "step": 351 + }, + { + "epoch": 0.43349753694581283, + "grad_norm": 12.40959825169754, + "learning_rate": 9.941322050916251e-06, + "loss": 2.580315113067627, + "step": 352 + }, + { + "epoch": 0.43472906403940886, + "grad_norm": 18.26867922192619, + "learning_rate": 9.940222342533126e-06, + "loss": 2.8339614868164062, + "step": 353 + }, + { + "epoch": 0.43596059113300495, + "grad_norm": 15.240586085653998, + "learning_rate": 9.939112486509644e-06, + "loss": 2.582752227783203, + "step": 354 + }, + { + "epoch": 0.437192118226601, + "grad_norm": 14.054810279727889, + "learning_rate": 9.937992485125547e-06, + "loss": 2.9355309009552, + "step": 355 + }, + { + "epoch": 0.43842364532019706, + "grad_norm": 7.204056413186231, + "learning_rate": 9.936862340681412e-06, + "loss": 2.796612024307251, + "step": 356 + }, + { + "epoch": 0.4396551724137931, + "grad_norm": 5.797127744814052, + "learning_rate": 9.935722055498655e-06, + "loss": 2.6307716369628906, + "step": 357 + }, + { + "epoch": 0.4408866995073892, + "grad_norm": 8.742348132173227, + "learning_rate": 9.934571631919518e-06, + "loss": 2.8603620529174805, + "step": 358 + }, + { + "epoch": 0.4421182266009852, + "grad_norm": 12.186262361276388, + "learning_rate": 9.933411072307071e-06, + "loss": 3.1397266387939453, + "step": 359 + }, + { + "epoch": 0.4433497536945813, + "grad_norm": 8.973047578523662, + "learning_rate": 9.9322403790452e-06, + "loss": 2.5362772941589355, + "step": 360 + }, + { + "epoch": 0.4445812807881773, + "grad_norm": 17.982816499460725, + "learning_rate": 9.931059554538613e-06, + "loss": 2.7547712326049805, + "step": 361 + }, + { + "epoch": 0.4458128078817734, + "grad_norm": 15.389405107024809, + "learning_rate": 9.929868601212822e-06, + "loss": 3.144801139831543, + "step": 362 + }, + { + "epoch": 0.44704433497536944, + "grad_norm": 16.343273720769005, + "learning_rate": 9.928667521514149e-06, + "loss": 2.600550889968872, + "step": 363 + }, + { + "epoch": 0.4482758620689655, + "grad_norm": 11.532249256759682, + "learning_rate": 9.927456317909711e-06, + "loss": 2.176116704940796, + "step": 364 + }, + { + "epoch": 0.44950738916256155, + "grad_norm": 25.088404612293182, + "learning_rate": 9.92623499288743e-06, + "loss": 3.1918365955352783, + "step": 365 + }, + { + "epoch": 0.45073891625615764, + "grad_norm": 12.864077493891681, + "learning_rate": 9.92500354895601e-06, + "loss": 2.6937577724456787, + "step": 366 + }, + { + "epoch": 0.45197044334975367, + "grad_norm": 29.27990733585633, + "learning_rate": 9.92376198864494e-06, + "loss": 3.6490774154663086, + "step": 367 + }, + { + "epoch": 0.45320197044334976, + "grad_norm": 7.620954232577737, + "learning_rate": 9.922510314504493e-06, + "loss": 3.0342392921447754, + "step": 368 + }, + { + "epoch": 0.4544334975369458, + "grad_norm": 14.562498240608573, + "learning_rate": 9.921248529105716e-06, + "loss": 3.175008773803711, + "step": 369 + }, + { + "epoch": 0.45566502463054187, + "grad_norm": 9.096092875139751, + "learning_rate": 9.919976635040425e-06, + "loss": 1.9000710248947144, + "step": 370 + }, + { + "epoch": 0.45689655172413796, + "grad_norm": 19.30965262540543, + "learning_rate": 9.918694634921195e-06, + "loss": 3.5248589515686035, + "step": 371 + }, + { + "epoch": 0.458128078817734, + "grad_norm": 10.529945298812061, + "learning_rate": 9.91740253138137e-06, + "loss": 2.869842529296875, + "step": 372 + }, + { + "epoch": 0.45935960591133007, + "grad_norm": 10.698638706211932, + "learning_rate": 9.916100327075038e-06, + "loss": 1.9380724430084229, + "step": 373 + }, + { + "epoch": 0.4605911330049261, + "grad_norm": 17.707591147238283, + "learning_rate": 9.914788024677039e-06, + "loss": 2.2112460136413574, + "step": 374 + }, + { + "epoch": 0.4618226600985222, + "grad_norm": 10.065846050311237, + "learning_rate": 9.913465626882954e-06, + "loss": 3.1283068656921387, + "step": 375 + }, + { + "epoch": 0.4630541871921182, + "grad_norm": 25.33369677490011, + "learning_rate": 9.912133136409103e-06, + "loss": 2.692117929458618, + "step": 376 + }, + { + "epoch": 0.4642857142857143, + "grad_norm": 57.3231139544447, + "learning_rate": 9.910790555992536e-06, + "loss": 3.047241687774658, + "step": 377 + }, + { + "epoch": 0.46551724137931033, + "grad_norm": 11.840834448379393, + "learning_rate": 9.909437888391025e-06, + "loss": 3.0103232860565186, + "step": 378 + }, + { + "epoch": 0.4667487684729064, + "grad_norm": 15.056907160003684, + "learning_rate": 9.908075136383068e-06, + "loss": 2.8296966552734375, + "step": 379 + }, + { + "epoch": 0.46798029556650245, + "grad_norm": 8.534626696858023, + "learning_rate": 9.906702302767876e-06, + "loss": 2.818819999694824, + "step": 380 + }, + { + "epoch": 0.46921182266009853, + "grad_norm": 29.849300222390532, + "learning_rate": 9.905319390365364e-06, + "loss": 3.6281867027282715, + "step": 381 + }, + { + "epoch": 0.47044334975369456, + "grad_norm": 17.161390821083423, + "learning_rate": 9.903926402016153e-06, + "loss": 2.7123236656188965, + "step": 382 + }, + { + "epoch": 0.47167487684729065, + "grad_norm": 13.097065098778378, + "learning_rate": 9.902523340581562e-06, + "loss": 2.69736909866333, + "step": 383 + }, + { + "epoch": 0.4729064039408867, + "grad_norm": 11.269340257234004, + "learning_rate": 9.901110208943599e-06, + "loss": 3.088184118270874, + "step": 384 + }, + { + "epoch": 0.47413793103448276, + "grad_norm": 6.6950707947616745, + "learning_rate": 9.899687010004956e-06, + "loss": 2.606736183166504, + "step": 385 + }, + { + "epoch": 0.4753694581280788, + "grad_norm": 10.297903581299613, + "learning_rate": 9.898253746689007e-06, + "loss": 2.684105157852173, + "step": 386 + }, + { + "epoch": 0.4766009852216749, + "grad_norm": 15.82478266058562, + "learning_rate": 9.896810421939797e-06, + "loss": 2.8739280700683594, + "step": 387 + }, + { + "epoch": 0.47783251231527096, + "grad_norm": 8.284309924074774, + "learning_rate": 9.895357038722043e-06, + "loss": 2.835542917251587, + "step": 388 + }, + { + "epoch": 0.479064039408867, + "grad_norm": 15.854123121769446, + "learning_rate": 9.893893600021112e-06, + "loss": 2.855287551879883, + "step": 389 + }, + { + "epoch": 0.4802955665024631, + "grad_norm": 7.88725535997062, + "learning_rate": 9.892420108843038e-06, + "loss": 2.8026838302612305, + "step": 390 + }, + { + "epoch": 0.4815270935960591, + "grad_norm": 11.000709518913423, + "learning_rate": 9.890936568214493e-06, + "loss": 3.1150124073028564, + "step": 391 + }, + { + "epoch": 0.4827586206896552, + "grad_norm": 13.588584372243895, + "learning_rate": 9.889442981182802e-06, + "loss": 2.578108072280884, + "step": 392 + }, + { + "epoch": 0.4839901477832512, + "grad_norm": 16.34748858179715, + "learning_rate": 9.88793935081592e-06, + "loss": 2.7470006942749023, + "step": 393 + }, + { + "epoch": 0.4852216748768473, + "grad_norm": 10.809579161505546, + "learning_rate": 9.88642568020243e-06, + "loss": 2.9015283584594727, + "step": 394 + }, + { + "epoch": 0.48645320197044334, + "grad_norm": 13.55439142286002, + "learning_rate": 9.884901972451542e-06, + "loss": 3.79250431060791, + "step": 395 + }, + { + "epoch": 0.4876847290640394, + "grad_norm": 8.909988613184693, + "learning_rate": 9.883368230693082e-06, + "loss": 3.0748767852783203, + "step": 396 + }, + { + "epoch": 0.48891625615763545, + "grad_norm": 13.412610776910293, + "learning_rate": 9.881824458077491e-06, + "loss": 2.822726011276245, + "step": 397 + }, + { + "epoch": 0.49014778325123154, + "grad_norm": 11.426335338698937, + "learning_rate": 9.880270657775806e-06, + "loss": 2.7966151237487793, + "step": 398 + }, + { + "epoch": 0.49137931034482757, + "grad_norm": 10.55324948832395, + "learning_rate": 9.878706832979668e-06, + "loss": 2.8517651557922363, + "step": 399 + }, + { + "epoch": 0.49261083743842365, + "grad_norm": 11.070058186972197, + "learning_rate": 9.877132986901306e-06, + "loss": 2.7754080295562744, + "step": 400 + }, + { + "epoch": 0.4938423645320197, + "grad_norm": 8.886322673700336, + "learning_rate": 9.875549122773536e-06, + "loss": 2.9478702545166016, + "step": 401 + }, + { + "epoch": 0.49507389162561577, + "grad_norm": 9.759021404672636, + "learning_rate": 9.87395524384975e-06, + "loss": 2.9535412788391113, + "step": 402 + }, + { + "epoch": 0.4963054187192118, + "grad_norm": 22.265516010081125, + "learning_rate": 9.872351353403912e-06, + "loss": 3.415161609649658, + "step": 403 + }, + { + "epoch": 0.4975369458128079, + "grad_norm": 10.3371436402533, + "learning_rate": 9.870737454730552e-06, + "loss": 2.573082447052002, + "step": 404 + }, + { + "epoch": 0.4987684729064039, + "grad_norm": 14.615736501967937, + "learning_rate": 9.869113551144754e-06, + "loss": 2.4743850231170654, + "step": 405 + }, + { + "epoch": 0.5, + "grad_norm": 10.275697391044838, + "learning_rate": 9.867479645982158e-06, + "loss": 2.6644279956817627, + "step": 406 + }, + { + "epoch": 0.5012315270935961, + "grad_norm": 7.731558128938727, + "learning_rate": 9.865835742598942e-06, + "loss": 2.7798032760620117, + "step": 407 + }, + { + "epoch": 0.5024630541871922, + "grad_norm": 28.59542346400597, + "learning_rate": 9.864181844371828e-06, + "loss": 3.939884662628174, + "step": 408 + }, + { + "epoch": 0.5036945812807881, + "grad_norm": 21.07739414791098, + "learning_rate": 9.86251795469806e-06, + "loss": 2.8093104362487793, + "step": 409 + }, + { + "epoch": 0.5049261083743842, + "grad_norm": 8.961555424981583, + "learning_rate": 9.860844076995416e-06, + "loss": 2.1494715213775635, + "step": 410 + }, + { + "epoch": 0.5061576354679803, + "grad_norm": 21.200756727942377, + "learning_rate": 9.85916021470218e-06, + "loss": 2.964136838912964, + "step": 411 + }, + { + "epoch": 0.5073891625615764, + "grad_norm": 11.020672835034468, + "learning_rate": 9.857466371277152e-06, + "loss": 2.641287088394165, + "step": 412 + }, + { + "epoch": 0.5086206896551724, + "grad_norm": 9.8391871787113, + "learning_rate": 9.85576255019963e-06, + "loss": 2.454512357711792, + "step": 413 + }, + { + "epoch": 0.5098522167487685, + "grad_norm": 9.302782088404763, + "learning_rate": 9.85404875496941e-06, + "loss": 2.4566071033477783, + "step": 414 + }, + { + "epoch": 0.5110837438423645, + "grad_norm": 12.209048739605382, + "learning_rate": 9.852324989106772e-06, + "loss": 2.7254204750061035, + "step": 415 + }, + { + "epoch": 0.5123152709359606, + "grad_norm": 17.193015982984093, + "learning_rate": 9.850591256152483e-06, + "loss": 2.743382215499878, + "step": 416 + }, + { + "epoch": 0.5135467980295566, + "grad_norm": 31.54989094640885, + "learning_rate": 9.848847559667774e-06, + "loss": 3.376046657562256, + "step": 417 + }, + { + "epoch": 0.5147783251231527, + "grad_norm": 11.734812553622533, + "learning_rate": 9.847093903234351e-06, + "loss": 2.73980975151062, + "step": 418 + }, + { + "epoch": 0.5160098522167488, + "grad_norm": 8.164256099521083, + "learning_rate": 9.845330290454373e-06, + "loss": 2.7565903663635254, + "step": 419 + }, + { + "epoch": 0.5172413793103449, + "grad_norm": 9.178438912949575, + "learning_rate": 9.843556724950454e-06, + "loss": 2.9061315059661865, + "step": 420 + }, + { + "epoch": 0.5184729064039408, + "grad_norm": 18.23493245534027, + "learning_rate": 9.841773210365646e-06, + "loss": 3.1584839820861816, + "step": 421 + }, + { + "epoch": 0.5197044334975369, + "grad_norm": 13.406138718704618, + "learning_rate": 9.839979750363443e-06, + "loss": 3.300762176513672, + "step": 422 + }, + { + "epoch": 0.520935960591133, + "grad_norm": 16.907140017416133, + "learning_rate": 9.838176348627768e-06, + "loss": 2.5202269554138184, + "step": 423 + }, + { + "epoch": 0.5221674876847291, + "grad_norm": 14.800436222535966, + "learning_rate": 9.83636300886296e-06, + "loss": 3.9240634441375732, + "step": 424 + }, + { + "epoch": 0.5233990147783252, + "grad_norm": 13.058319822050642, + "learning_rate": 9.834539734793774e-06, + "loss": 3.1783556938171387, + "step": 425 + }, + { + "epoch": 0.5246305418719212, + "grad_norm": 9.577210971277129, + "learning_rate": 9.832706530165372e-06, + "loss": 2.787106513977051, + "step": 426 + }, + { + "epoch": 0.5258620689655172, + "grad_norm": 17.432663310497652, + "learning_rate": 9.830863398743313e-06, + "loss": 3.270280599594116, + "step": 427 + }, + { + "epoch": 0.5270935960591133, + "grad_norm": 13.065514198679326, + "learning_rate": 9.829010344313548e-06, + "loss": 3.0135059356689453, + "step": 428 + }, + { + "epoch": 0.5283251231527094, + "grad_norm": 12.9248393025633, + "learning_rate": 9.82714737068241e-06, + "loss": 2.989795207977295, + "step": 429 + }, + { + "epoch": 0.5295566502463054, + "grad_norm": 15.64315185844485, + "learning_rate": 9.825274481676605e-06, + "loss": 2.5208187103271484, + "step": 430 + }, + { + "epoch": 0.5307881773399015, + "grad_norm": 11.452591471364267, + "learning_rate": 9.82339168114321e-06, + "loss": 3.1890928745269775, + "step": 431 + }, + { + "epoch": 0.5320197044334976, + "grad_norm": 11.650610381993676, + "learning_rate": 9.821498972949657e-06, + "loss": 3.0655789375305176, + "step": 432 + }, + { + "epoch": 0.5332512315270936, + "grad_norm": 7.7840344730355335, + "learning_rate": 9.81959636098373e-06, + "loss": 2.611284017562866, + "step": 433 + }, + { + "epoch": 0.5344827586206896, + "grad_norm": 8.93478095027874, + "learning_rate": 9.817683849153561e-06, + "loss": 2.863576889038086, + "step": 434 + }, + { + "epoch": 0.5357142857142857, + "grad_norm": 10.52062689285789, + "learning_rate": 9.815761441387609e-06, + "loss": 2.6186623573303223, + "step": 435 + }, + { + "epoch": 0.5369458128078818, + "grad_norm": 6.68274047677578, + "learning_rate": 9.813829141634666e-06, + "loss": 1.3848458528518677, + "step": 436 + }, + { + "epoch": 0.5381773399014779, + "grad_norm": 9.593848866659638, + "learning_rate": 9.811886953863841e-06, + "loss": 3.00791597366333, + "step": 437 + }, + { + "epoch": 0.5394088669950738, + "grad_norm": 7.8032629730941565, + "learning_rate": 9.809934882064555e-06, + "loss": 2.8431854248046875, + "step": 438 + }, + { + "epoch": 0.5406403940886699, + "grad_norm": 10.324361743530943, + "learning_rate": 9.807972930246531e-06, + "loss": 2.3595449924468994, + "step": 439 + }, + { + "epoch": 0.541871921182266, + "grad_norm": 15.306323140698186, + "learning_rate": 9.806001102439789e-06, + "loss": 2.55434250831604, + "step": 440 + }, + { + "epoch": 0.5431034482758621, + "grad_norm": 23.37582741202724, + "learning_rate": 9.804019402694627e-06, + "loss": 2.4509990215301514, + "step": 441 + }, + { + "epoch": 0.5443349753694581, + "grad_norm": 9.38267743442567, + "learning_rate": 9.802027835081628e-06, + "loss": 2.825401782989502, + "step": 442 + }, + { + "epoch": 0.5455665024630542, + "grad_norm": 10.449224530160473, + "learning_rate": 9.800026403691643e-06, + "loss": 2.7315573692321777, + "step": 443 + }, + { + "epoch": 0.5467980295566502, + "grad_norm": 22.900410887080454, + "learning_rate": 9.798015112635786e-06, + "loss": 3.1359333992004395, + "step": 444 + }, + { + "epoch": 0.5480295566502463, + "grad_norm": 9.839888483337905, + "learning_rate": 9.795993966045418e-06, + "loss": 3.2884740829467773, + "step": 445 + }, + { + "epoch": 0.5492610837438424, + "grad_norm": 9.35231433219537, + "learning_rate": 9.793962968072149e-06, + "loss": 2.8281359672546387, + "step": 446 + }, + { + "epoch": 0.5504926108374384, + "grad_norm": 6.698793862232108, + "learning_rate": 9.791922122887823e-06, + "loss": 2.633974313735962, + "step": 447 + }, + { + "epoch": 0.5517241379310345, + "grad_norm": 8.317360049933578, + "learning_rate": 9.78987143468451e-06, + "loss": 2.1651690006256104, + "step": 448 + }, + { + "epoch": 0.5529556650246306, + "grad_norm": 11.511312923842238, + "learning_rate": 9.7878109076745e-06, + "loss": 3.011908531188965, + "step": 449 + }, + { + "epoch": 0.5541871921182266, + "grad_norm": 15.627130212627556, + "learning_rate": 9.785740546090293e-06, + "loss": 3.121683359146118, + "step": 450 + }, + { + "epoch": 0.5554187192118226, + "grad_norm": 14.263261857694998, + "learning_rate": 9.783660354184589e-06, + "loss": 2.9901375770568848, + "step": 451 + }, + { + "epoch": 0.5566502463054187, + "grad_norm": 15.230602091833177, + "learning_rate": 9.78157033623028e-06, + "loss": 3.1121528148651123, + "step": 452 + }, + { + "epoch": 0.5578817733990148, + "grad_norm": 22.32110731618789, + "learning_rate": 9.779470496520442e-06, + "loss": 2.9811508655548096, + "step": 453 + }, + { + "epoch": 0.5591133004926109, + "grad_norm": 11.801131103021726, + "learning_rate": 9.777360839368327e-06, + "loss": 2.8219947814941406, + "step": 454 + }, + { + "epoch": 0.5603448275862069, + "grad_norm": 10.166506753796495, + "learning_rate": 9.77524136910735e-06, + "loss": 2.870987892150879, + "step": 455 + }, + { + "epoch": 0.5615763546798029, + "grad_norm": 9.413959781223877, + "learning_rate": 9.773112090091084e-06, + "loss": 3.1902365684509277, + "step": 456 + }, + { + "epoch": 0.562807881773399, + "grad_norm": 12.723571043561764, + "learning_rate": 9.770973006693256e-06, + "loss": 3.3052220344543457, + "step": 457 + }, + { + "epoch": 0.5640394088669951, + "grad_norm": 14.337077670753716, + "learning_rate": 9.76882412330772e-06, + "loss": 2.3376049995422363, + "step": 458 + }, + { + "epoch": 0.5652709359605911, + "grad_norm": 10.245935627064924, + "learning_rate": 9.766665444348472e-06, + "loss": 2.8364970684051514, + "step": 459 + }, + { + "epoch": 0.5665024630541872, + "grad_norm": 18.308636912090915, + "learning_rate": 9.76449697424962e-06, + "loss": 2.582505702972412, + "step": 460 + }, + { + "epoch": 0.5677339901477833, + "grad_norm": 8.927255205757533, + "learning_rate": 9.76231871746539e-06, + "loss": 2.485147476196289, + "step": 461 + }, + { + "epoch": 0.5689655172413793, + "grad_norm": 11.356171958036413, + "learning_rate": 9.760130678470106e-06, + "loss": 3.0910027027130127, + "step": 462 + }, + { + "epoch": 0.5701970443349754, + "grad_norm": 10.937354765360512, + "learning_rate": 9.757932861758188e-06, + "loss": 3.3621506690979004, + "step": 463 + }, + { + "epoch": 0.5714285714285714, + "grad_norm": 11.222097055926637, + "learning_rate": 9.755725271844142e-06, + "loss": 2.8310019969940186, + "step": 464 + }, + { + "epoch": 0.5726600985221675, + "grad_norm": 30.527175863167063, + "learning_rate": 9.753507913262548e-06, + "loss": 2.797703742980957, + "step": 465 + }, + { + "epoch": 0.5738916256157636, + "grad_norm": 15.045285480872131, + "learning_rate": 9.751280790568047e-06, + "loss": 2.6609878540039062, + "step": 466 + }, + { + "epoch": 0.5751231527093597, + "grad_norm": 13.871081363987201, + "learning_rate": 9.749043908335343e-06, + "loss": 2.778043508529663, + "step": 467 + }, + { + "epoch": 0.5763546798029556, + "grad_norm": 13.771545893500338, + "learning_rate": 9.74679727115918e-06, + "loss": 2.8315014839172363, + "step": 468 + }, + { + "epoch": 0.5775862068965517, + "grad_norm": 19.916341772532764, + "learning_rate": 9.744540883654348e-06, + "loss": 3.3902840614318848, + "step": 469 + }, + { + "epoch": 0.5788177339901478, + "grad_norm": 22.648986055714484, + "learning_rate": 9.742274750455659e-06, + "loss": 3.53080153465271, + "step": 470 + }, + { + "epoch": 0.5800492610837439, + "grad_norm": 23.493391135041467, + "learning_rate": 9.739998876217943e-06, + "loss": 2.270110845565796, + "step": 471 + }, + { + "epoch": 0.5812807881773399, + "grad_norm": 12.049204240060057, + "learning_rate": 9.737713265616043e-06, + "loss": 2.7059872150421143, + "step": 472 + }, + { + "epoch": 0.5825123152709359, + "grad_norm": 20.2953123538445, + "learning_rate": 9.735417923344798e-06, + "loss": 4.328514575958252, + "step": 473 + }, + { + "epoch": 0.583743842364532, + "grad_norm": 14.790979425207205, + "learning_rate": 9.73311285411904e-06, + "loss": 3.2155938148498535, + "step": 474 + }, + { + "epoch": 0.5849753694581281, + "grad_norm": 35.79655633932577, + "learning_rate": 9.730798062673575e-06, + "loss": 2.277022361755371, + "step": 475 + }, + { + "epoch": 0.5862068965517241, + "grad_norm": 10.760493401180613, + "learning_rate": 9.728473553763186e-06, + "loss": 2.794111490249634, + "step": 476 + }, + { + "epoch": 0.5874384236453202, + "grad_norm": 7.877057642797786, + "learning_rate": 9.726139332162613e-06, + "loss": 3.00388765335083, + "step": 477 + }, + { + "epoch": 0.5886699507389163, + "grad_norm": 10.974644270731439, + "learning_rate": 9.723795402666549e-06, + "loss": 2.5355563163757324, + "step": 478 + }, + { + "epoch": 0.5899014778325123, + "grad_norm": 22.285874447386394, + "learning_rate": 9.721441770089621e-06, + "loss": 3.2441415786743164, + "step": 479 + }, + { + "epoch": 0.5911330049261084, + "grad_norm": 13.333764613863938, + "learning_rate": 9.719078439266399e-06, + "loss": 2.826803207397461, + "step": 480 + }, + { + "epoch": 0.5923645320197044, + "grad_norm": 6.843940415955184, + "learning_rate": 9.716705415051362e-06, + "loss": 2.5396804809570312, + "step": 481 + }, + { + "epoch": 0.5935960591133005, + "grad_norm": 23.860174795633608, + "learning_rate": 9.714322702318908e-06, + "loss": 2.85546875, + "step": 482 + }, + { + "epoch": 0.5948275862068966, + "grad_norm": 12.255473790019064, + "learning_rate": 9.711930305963333e-06, + "loss": 3.217014789581299, + "step": 483 + }, + { + "epoch": 0.5960591133004927, + "grad_norm": 8.15967079186392, + "learning_rate": 9.70952823089882e-06, + "loss": 2.781094551086426, + "step": 484 + }, + { + "epoch": 0.5972906403940886, + "grad_norm": 11.942750739396006, + "learning_rate": 9.707116482059447e-06, + "loss": 2.617154121398926, + "step": 485 + }, + { + "epoch": 0.5985221674876847, + "grad_norm": 15.243819163950327, + "learning_rate": 9.704695064399143e-06, + "loss": 2.601886510848999, + "step": 486 + }, + { + "epoch": 0.5997536945812808, + "grad_norm": 27.321867153996244, + "learning_rate": 9.702263982891712e-06, + "loss": 2.9616146087646484, + "step": 487 + }, + { + "epoch": 0.6009852216748769, + "grad_norm": 9.511966390540264, + "learning_rate": 9.699823242530803e-06, + "loss": 2.8881943225860596, + "step": 488 + }, + { + "epoch": 0.6022167487684729, + "grad_norm": 9.673073669047454, + "learning_rate": 9.697372848329905e-06, + "loss": 2.6718311309814453, + "step": 489 + }, + { + "epoch": 0.603448275862069, + "grad_norm": 12.946431548834504, + "learning_rate": 9.69491280532234e-06, + "loss": 2.959104537963867, + "step": 490 + }, + { + "epoch": 0.604679802955665, + "grad_norm": 13.919071872066077, + "learning_rate": 9.692443118561248e-06, + "loss": 2.085991621017456, + "step": 491 + }, + { + "epoch": 0.6059113300492611, + "grad_norm": 168.7126461149896, + "learning_rate": 9.689963793119574e-06, + "loss": 4.498569488525391, + "step": 492 + }, + { + "epoch": 0.6071428571428571, + "grad_norm": 12.118400731206464, + "learning_rate": 9.68747483409007e-06, + "loss": 2.7837424278259277, + "step": 493 + }, + { + "epoch": 0.6083743842364532, + "grad_norm": 14.436749099341482, + "learning_rate": 9.684976246585264e-06, + "loss": 2.637524366378784, + "step": 494 + }, + { + "epoch": 0.6096059113300493, + "grad_norm": 12.923969042105849, + "learning_rate": 9.682468035737475e-06, + "loss": 2.765727996826172, + "step": 495 + }, + { + "epoch": 0.6108374384236454, + "grad_norm": 12.957696638033102, + "learning_rate": 9.679950206698782e-06, + "loss": 2.825129270553589, + "step": 496 + }, + { + "epoch": 0.6120689655172413, + "grad_norm": 12.328586386653942, + "learning_rate": 9.677422764641021e-06, + "loss": 2.733224630355835, + "step": 497 + }, + { + "epoch": 0.6133004926108374, + "grad_norm": 10.367355913707218, + "learning_rate": 9.674885714755773e-06, + "loss": 3.6287670135498047, + "step": 498 + }, + { + "epoch": 0.6145320197044335, + "grad_norm": 8.212604152981882, + "learning_rate": 9.672339062254359e-06, + "loss": 2.38788104057312, + "step": 499 + }, + { + "epoch": 0.6157635467980296, + "grad_norm": 13.545719741820621, + "learning_rate": 9.66978281236782e-06, + "loss": 2.942269802093506, + "step": 500 + }, + { + "epoch": 0.6169950738916257, + "grad_norm": 12.748449735511594, + "learning_rate": 9.667216970346916e-06, + "loss": 2.4100990295410156, + "step": 501 + }, + { + "epoch": 0.6182266009852216, + "grad_norm": 15.669540249604715, + "learning_rate": 9.6646415414621e-06, + "loss": 2.3959155082702637, + "step": 502 + }, + { + "epoch": 0.6194581280788177, + "grad_norm": 7.949797631449559, + "learning_rate": 9.662056531003528e-06, + "loss": 2.93027925491333, + "step": 503 + }, + { + "epoch": 0.6206896551724138, + "grad_norm": 10.116460165226645, + "learning_rate": 9.659461944281035e-06, + "loss": 3.164715528488159, + "step": 504 + }, + { + "epoch": 0.6219211822660099, + "grad_norm": 16.218136964088803, + "learning_rate": 9.656857786624119e-06, + "loss": 2.634587287902832, + "step": 505 + }, + { + "epoch": 0.6231527093596059, + "grad_norm": 10.922060482445831, + "learning_rate": 9.654244063381948e-06, + "loss": 3.5667788982391357, + "step": 506 + }, + { + "epoch": 0.624384236453202, + "grad_norm": 8.542161812174806, + "learning_rate": 9.651620779923332e-06, + "loss": 2.9383740425109863, + "step": 507 + }, + { + "epoch": 0.625615763546798, + "grad_norm": 10.347829866523263, + "learning_rate": 9.648987941636719e-06, + "loss": 2.7658987045288086, + "step": 508 + }, + { + "epoch": 0.6268472906403941, + "grad_norm": 8.548905747003822, + "learning_rate": 9.646345553930187e-06, + "loss": 3.3089890480041504, + "step": 509 + }, + { + "epoch": 0.6280788177339901, + "grad_norm": 6.487031716645425, + "learning_rate": 9.643693622231426e-06, + "loss": 2.6208066940307617, + "step": 510 + }, + { + "epoch": 0.6293103448275862, + "grad_norm": 8.110412464341984, + "learning_rate": 9.64103215198773e-06, + "loss": 2.7099995613098145, + "step": 511 + }, + { + "epoch": 0.6305418719211823, + "grad_norm": 14.245396567085763, + "learning_rate": 9.638361148665989e-06, + "loss": 2.894531488418579, + "step": 512 + }, + { + "epoch": 0.6317733990147784, + "grad_norm": 11.657856176430656, + "learning_rate": 9.63568061775267e-06, + "loss": 3.1289191246032715, + "step": 513 + }, + { + "epoch": 0.6330049261083743, + "grad_norm": 14.82098703249081, + "learning_rate": 9.632990564753817e-06, + "loss": 2.954707145690918, + "step": 514 + }, + { + "epoch": 0.6342364532019704, + "grad_norm": 6.808305322372754, + "learning_rate": 9.630290995195028e-06, + "loss": 2.93411922454834, + "step": 515 + }, + { + "epoch": 0.6354679802955665, + "grad_norm": 7.276364027378903, + "learning_rate": 9.62758191462145e-06, + "loss": 2.637021541595459, + "step": 516 + }, + { + "epoch": 0.6366995073891626, + "grad_norm": 13.898029887698447, + "learning_rate": 9.624863328597767e-06, + "loss": 3.020066261291504, + "step": 517 + }, + { + "epoch": 0.6379310344827587, + "grad_norm": 24.08793299798331, + "learning_rate": 9.622135242708188e-06, + "loss": 2.5983335971832275, + "step": 518 + }, + { + "epoch": 0.6391625615763546, + "grad_norm": 13.609628946959008, + "learning_rate": 9.619397662556434e-06, + "loss": 2.714207410812378, + "step": 519 + }, + { + "epoch": 0.6403940886699507, + "grad_norm": 8.67874834351866, + "learning_rate": 9.616650593765733e-06, + "loss": 2.8505520820617676, + "step": 520 + }, + { + "epoch": 0.6416256157635468, + "grad_norm": 8.300798802306481, + "learning_rate": 9.613894041978795e-06, + "loss": 2.8081271648406982, + "step": 521 + }, + { + "epoch": 0.6428571428571429, + "grad_norm": 10.020203888067801, + "learning_rate": 9.611128012857818e-06, + "loss": 3.106411933898926, + "step": 522 + }, + { + "epoch": 0.6440886699507389, + "grad_norm": 9.32846194404547, + "learning_rate": 9.60835251208446e-06, + "loss": 3.087594985961914, + "step": 523 + }, + { + "epoch": 0.645320197044335, + "grad_norm": 15.30312860694116, + "learning_rate": 9.60556754535984e-06, + "loss": 2.7104361057281494, + "step": 524 + }, + { + "epoch": 0.646551724137931, + "grad_norm": 14.847900307580543, + "learning_rate": 9.602773118404518e-06, + "loss": 2.8562324047088623, + "step": 525 + }, + { + "epoch": 0.6477832512315271, + "grad_norm": 8.874728218475076, + "learning_rate": 9.599969236958485e-06, + "loss": 3.282554864883423, + "step": 526 + }, + { + "epoch": 0.6490147783251231, + "grad_norm": 8.797844640723032, + "learning_rate": 9.597155906781154e-06, + "loss": 2.623101234436035, + "step": 527 + }, + { + "epoch": 0.6502463054187192, + "grad_norm": 9.863712955626877, + "learning_rate": 9.59433313365135e-06, + "loss": 2.889674663543701, + "step": 528 + }, + { + "epoch": 0.6514778325123153, + "grad_norm": 10.895399946836921, + "learning_rate": 9.591500923367287e-06, + "loss": 2.787289619445801, + "step": 529 + }, + { + "epoch": 0.6527093596059114, + "grad_norm": 10.227588231836696, + "learning_rate": 9.58865928174657e-06, + "loss": 2.879824161529541, + "step": 530 + }, + { + "epoch": 0.6539408866995073, + "grad_norm": 8.869590002729453, + "learning_rate": 9.585808214626173e-06, + "loss": 2.967193126678467, + "step": 531 + }, + { + "epoch": 0.6551724137931034, + "grad_norm": 8.822784237769133, + "learning_rate": 9.582947727862433e-06, + "loss": 3.1004772186279297, + "step": 532 + }, + { + "epoch": 0.6564039408866995, + "grad_norm": 13.346747444504954, + "learning_rate": 9.580077827331038e-06, + "loss": 2.69935941696167, + "step": 533 + }, + { + "epoch": 0.6576354679802956, + "grad_norm": 13.781647523739567, + "learning_rate": 9.577198518927005e-06, + "loss": 3.2806637287139893, + "step": 534 + }, + { + "epoch": 0.6588669950738916, + "grad_norm": 17.336818625260154, + "learning_rate": 9.574309808564682e-06, + "loss": 3.050356149673462, + "step": 535 + }, + { + "epoch": 0.6600985221674877, + "grad_norm": 9.311777076008125, + "learning_rate": 9.57141170217773e-06, + "loss": 2.8415322303771973, + "step": 536 + }, + { + "epoch": 0.6613300492610837, + "grad_norm": 12.410317292425518, + "learning_rate": 9.568504205719106e-06, + "loss": 2.5309085845947266, + "step": 537 + }, + { + "epoch": 0.6625615763546798, + "grad_norm": 15.225443304522335, + "learning_rate": 9.565587325161056e-06, + "loss": 3.5695877075195312, + "step": 538 + }, + { + "epoch": 0.6637931034482759, + "grad_norm": 9.562550097283651, + "learning_rate": 9.562661066495108e-06, + "loss": 2.7938594818115234, + "step": 539 + }, + { + "epoch": 0.6650246305418719, + "grad_norm": 8.825138850911314, + "learning_rate": 9.559725435732042e-06, + "loss": 2.8548948764801025, + "step": 540 + }, + { + "epoch": 0.666256157635468, + "grad_norm": 10.262300101456184, + "learning_rate": 9.556780438901899e-06, + "loss": 3.054051399230957, + "step": 541 + }, + { + "epoch": 0.6674876847290641, + "grad_norm": 26.545357662435233, + "learning_rate": 9.553826082053951e-06, + "loss": 3.566359281539917, + "step": 542 + }, + { + "epoch": 0.6687192118226601, + "grad_norm": 12.751257760928588, + "learning_rate": 9.550862371256705e-06, + "loss": 2.8619909286499023, + "step": 543 + }, + { + "epoch": 0.6699507389162561, + "grad_norm": 14.522375958962538, + "learning_rate": 9.547889312597877e-06, + "loss": 3.0177836418151855, + "step": 544 + }, + { + "epoch": 0.6711822660098522, + "grad_norm": 21.356139863129055, + "learning_rate": 9.544906912184383e-06, + "loss": 1.9943304061889648, + "step": 545 + }, + { + "epoch": 0.6724137931034483, + "grad_norm": 5.562548029921876, + "learning_rate": 9.541915176142326e-06, + "loss": 2.650038957595825, + "step": 546 + }, + { + "epoch": 0.6736453201970444, + "grad_norm": 12.716408540810125, + "learning_rate": 9.538914110616995e-06, + "loss": 2.826953411102295, + "step": 547 + }, + { + "epoch": 0.6748768472906403, + "grad_norm": 9.963475586190201, + "learning_rate": 9.53590372177283e-06, + "loss": 2.770202159881592, + "step": 548 + }, + { + "epoch": 0.6761083743842364, + "grad_norm": 32.875675817649174, + "learning_rate": 9.532884015793432e-06, + "loss": 2.0859670639038086, + "step": 549 + }, + { + "epoch": 0.6773399014778325, + "grad_norm": 11.983581363761447, + "learning_rate": 9.529854998881534e-06, + "loss": 2.7557499408721924, + "step": 550 + }, + { + "epoch": 0.6785714285714286, + "grad_norm": 13.15410482971192, + "learning_rate": 9.526816677258995e-06, + "loss": 2.710692882537842, + "step": 551 + }, + { + "epoch": 0.6798029556650246, + "grad_norm": 9.416519545873685, + "learning_rate": 9.523769057166791e-06, + "loss": 3.055102825164795, + "step": 552 + }, + { + "epoch": 0.6810344827586207, + "grad_norm": 11.60625904359093, + "learning_rate": 9.520712144864997e-06, + "loss": 2.606031894683838, + "step": 553 + }, + { + "epoch": 0.6822660098522167, + "grad_norm": 12.067258837088112, + "learning_rate": 9.517645946632766e-06, + "loss": 2.9099555015563965, + "step": 554 + }, + { + "epoch": 0.6834975369458128, + "grad_norm": 10.888483887311708, + "learning_rate": 9.514570468768338e-06, + "loss": 2.7148189544677734, + "step": 555 + }, + { + "epoch": 0.6847290640394089, + "grad_norm": 15.652077873544759, + "learning_rate": 9.511485717589006e-06, + "loss": 2.528857707977295, + "step": 556 + }, + { + "epoch": 0.6859605911330049, + "grad_norm": 12.750166049911234, + "learning_rate": 9.508391699431114e-06, + "loss": 2.814006805419922, + "step": 557 + }, + { + "epoch": 0.687192118226601, + "grad_norm": 12.187355034460829, + "learning_rate": 9.50528842065004e-06, + "loss": 3.3046352863311768, + "step": 558 + }, + { + "epoch": 0.6884236453201971, + "grad_norm": 12.182964964248615, + "learning_rate": 9.502175887620188e-06, + "loss": 3.1519320011138916, + "step": 559 + }, + { + "epoch": 0.6896551724137931, + "grad_norm": 26.00958255437091, + "learning_rate": 9.499054106734963e-06, + "loss": 2.2819509506225586, + "step": 560 + }, + { + "epoch": 0.6908866995073891, + "grad_norm": 10.437408285902773, + "learning_rate": 9.495923084406773e-06, + "loss": 2.7894287109375, + "step": 561 + }, + { + "epoch": 0.6921182266009852, + "grad_norm": 27.469926449959043, + "learning_rate": 9.492782827067006e-06, + "loss": 3.233968734741211, + "step": 562 + }, + { + "epoch": 0.6933497536945813, + "grad_norm": 19.246363086379436, + "learning_rate": 9.48963334116602e-06, + "loss": 2.594421863555908, + "step": 563 + }, + { + "epoch": 0.6945812807881774, + "grad_norm": 11.788384104886402, + "learning_rate": 9.486474633173129e-06, + "loss": 3.181318759918213, + "step": 564 + }, + { + "epoch": 0.6958128078817734, + "grad_norm": 10.754721829366346, + "learning_rate": 9.48330670957659e-06, + "loss": 3.2115392684936523, + "step": 565 + }, + { + "epoch": 0.6970443349753694, + "grad_norm": 12.089226690676854, + "learning_rate": 9.480129576883592e-06, + "loss": 2.408634901046753, + "step": 566 + }, + { + "epoch": 0.6982758620689655, + "grad_norm": 13.370163003636199, + "learning_rate": 9.476943241620233e-06, + "loss": 2.9304041862487793, + "step": 567 + }, + { + "epoch": 0.6995073891625616, + "grad_norm": 23.52604617683973, + "learning_rate": 9.473747710331524e-06, + "loss": 2.75127911567688, + "step": 568 + }, + { + "epoch": 0.7007389162561576, + "grad_norm": 33.407245089515435, + "learning_rate": 9.470542989581357e-06, + "loss": 3.3793530464172363, + "step": 569 + }, + { + "epoch": 0.7019704433497537, + "grad_norm": 8.494714152681327, + "learning_rate": 9.467329085952505e-06, + "loss": 3.001579999923706, + "step": 570 + }, + { + "epoch": 0.7032019704433498, + "grad_norm": 12.457476112208125, + "learning_rate": 9.464106006046602e-06, + "loss": 2.063443422317505, + "step": 571 + }, + { + "epoch": 0.7044334975369458, + "grad_norm": 11.893453239405563, + "learning_rate": 9.460873756484128e-06, + "loss": 3.079399585723877, + "step": 572 + }, + { + "epoch": 0.7056650246305419, + "grad_norm": 17.600286095390665, + "learning_rate": 9.457632343904404e-06, + "loss": 2.6499621868133545, + "step": 573 + }, + { + "epoch": 0.7068965517241379, + "grad_norm": 11.052824766544509, + "learning_rate": 9.454381774965567e-06, + "loss": 2.848517656326294, + "step": 574 + }, + { + "epoch": 0.708128078817734, + "grad_norm": 11.779141171142625, + "learning_rate": 9.451122056344564e-06, + "loss": 2.936286687850952, + "step": 575 + }, + { + "epoch": 0.7093596059113301, + "grad_norm": 12.447965784800195, + "learning_rate": 9.44785319473714e-06, + "loss": 2.315443515777588, + "step": 576 + }, + { + "epoch": 0.7105911330049262, + "grad_norm": 13.488894073216153, + "learning_rate": 9.444575196857814e-06, + "loss": 3.121138334274292, + "step": 577 + }, + { + "epoch": 0.7118226600985221, + "grad_norm": 15.155327825693226, + "learning_rate": 9.441288069439876e-06, + "loss": 3.326282501220703, + "step": 578 + }, + { + "epoch": 0.7130541871921182, + "grad_norm": 12.463167654535278, + "learning_rate": 9.437991819235366e-06, + "loss": 2.8816466331481934, + "step": 579 + }, + { + "epoch": 0.7142857142857143, + "grad_norm": 14.769356931380226, + "learning_rate": 9.434686453015067e-06, + "loss": 3.6819610595703125, + "step": 580 + }, + { + "epoch": 0.7155172413793104, + "grad_norm": 33.4724384154282, + "learning_rate": 9.431371977568483e-06, + "loss": 2.904045343399048, + "step": 581 + }, + { + "epoch": 0.7167487684729064, + "grad_norm": 8.623967512206425, + "learning_rate": 9.428048399703831e-06, + "loss": 3.5356435775756836, + "step": 582 + }, + { + "epoch": 0.7179802955665024, + "grad_norm": 11.543651581364673, + "learning_rate": 9.424715726248027e-06, + "loss": 2.4456870555877686, + "step": 583 + }, + { + "epoch": 0.7192118226600985, + "grad_norm": 6.392692599853808, + "learning_rate": 9.421373964046665e-06, + "loss": 2.5000674724578857, + "step": 584 + }, + { + "epoch": 0.7204433497536946, + "grad_norm": 14.327212598984625, + "learning_rate": 9.418023119964012e-06, + "loss": 2.856738567352295, + "step": 585 + }, + { + "epoch": 0.7216748768472906, + "grad_norm": 6.593431351524387, + "learning_rate": 9.414663200882991e-06, + "loss": 2.623438835144043, + "step": 586 + }, + { + "epoch": 0.7229064039408867, + "grad_norm": 21.188129548487396, + "learning_rate": 9.411294213705162e-06, + "loss": 2.987426996231079, + "step": 587 + }, + { + "epoch": 0.7241379310344828, + "grad_norm": 16.308054128010806, + "learning_rate": 9.407916165350713e-06, + "loss": 2.8868589401245117, + "step": 588 + }, + { + "epoch": 0.7253694581280788, + "grad_norm": 5.6345787753710965, + "learning_rate": 9.404529062758447e-06, + "loss": 2.878659725189209, + "step": 589 + }, + { + "epoch": 0.7266009852216748, + "grad_norm": 21.624096395043555, + "learning_rate": 9.401132912885764e-06, + "loss": 3.197636127471924, + "step": 590 + }, + { + "epoch": 0.7278325123152709, + "grad_norm": 28.674970274616843, + "learning_rate": 9.397727722708643e-06, + "loss": 2.8974030017852783, + "step": 591 + }, + { + "epoch": 0.729064039408867, + "grad_norm": 14.603582651571138, + "learning_rate": 9.39431349922164e-06, + "loss": 2.558945894241333, + "step": 592 + }, + { + "epoch": 0.7302955665024631, + "grad_norm": 6.004290408591086, + "learning_rate": 9.390890249437863e-06, + "loss": 1.0518803596496582, + "step": 593 + }, + { + "epoch": 0.7315270935960592, + "grad_norm": 16.62422153547852, + "learning_rate": 9.38745798038896e-06, + "loss": 3.5599231719970703, + "step": 594 + }, + { + "epoch": 0.7327586206896551, + "grad_norm": 9.731487783525235, + "learning_rate": 9.384016699125102e-06, + "loss": 3.1517539024353027, + "step": 595 + }, + { + "epoch": 0.7339901477832512, + "grad_norm": 10.319265754066222, + "learning_rate": 9.380566412714982e-06, + "loss": 2.809019088745117, + "step": 596 + }, + { + "epoch": 0.7352216748768473, + "grad_norm": 14.675772943073882, + "learning_rate": 9.377107128245782e-06, + "loss": 3.2317776679992676, + "step": 597 + }, + { + "epoch": 0.7364532019704434, + "grad_norm": 15.494293767128655, + "learning_rate": 9.373638852823166e-06, + "loss": 2.7792513370513916, + "step": 598 + }, + { + "epoch": 0.7376847290640394, + "grad_norm": 17.02704136876628, + "learning_rate": 9.370161593571274e-06, + "loss": 2.75253963470459, + "step": 599 + }, + { + "epoch": 0.7389162561576355, + "grad_norm": 14.987899586174, + "learning_rate": 9.36667535763269e-06, + "loss": 3.381519317626953, + "step": 600 + }, + { + "epoch": 0.7401477832512315, + "grad_norm": 19.24830788986111, + "learning_rate": 9.363180152168448e-06, + "loss": 2.62427020072937, + "step": 601 + }, + { + "epoch": 0.7413793103448276, + "grad_norm": 29.185871046378647, + "learning_rate": 9.359675984357992e-06, + "loss": 2.4824719429016113, + "step": 602 + }, + { + "epoch": 0.7426108374384236, + "grad_norm": 8.673285241589555, + "learning_rate": 9.356162861399188e-06, + "loss": 2.8167097568511963, + "step": 603 + }, + { + "epoch": 0.7438423645320197, + "grad_norm": 15.318689439779794, + "learning_rate": 9.352640790508291e-06, + "loss": 2.9545063972473145, + "step": 604 + }, + { + "epoch": 0.7450738916256158, + "grad_norm": 16.1719679891284, + "learning_rate": 9.349109778919938e-06, + "loss": 2.833635091781616, + "step": 605 + }, + { + "epoch": 0.7463054187192119, + "grad_norm": 9.791828516981264, + "learning_rate": 9.345569833887124e-06, + "loss": 2.775730609893799, + "step": 606 + }, + { + "epoch": 0.7475369458128078, + "grad_norm": 28.327643593931583, + "learning_rate": 9.342020962681206e-06, + "loss": 2.652602195739746, + "step": 607 + }, + { + "epoch": 0.7487684729064039, + "grad_norm": 10.194351110042778, + "learning_rate": 9.338463172591868e-06, + "loss": 2.7008144855499268, + "step": 608 + }, + { + "epoch": 0.75, + "grad_norm": 9.445868833849106, + "learning_rate": 9.334896470927115e-06, + "loss": 2.7525248527526855, + "step": 609 + }, + { + "epoch": 0.7512315270935961, + "grad_norm": 26.640278263158898, + "learning_rate": 9.331320865013257e-06, + "loss": 3.446526527404785, + "step": 610 + }, + { + "epoch": 0.7524630541871922, + "grad_norm": 14.322498892724218, + "learning_rate": 9.327736362194899e-06, + "loss": 3.0489022731781006, + "step": 611 + }, + { + "epoch": 0.7536945812807881, + "grad_norm": 9.879694468014232, + "learning_rate": 9.324142969834916e-06, + "loss": 2.840083360671997, + "step": 612 + }, + { + "epoch": 0.7549261083743842, + "grad_norm": 8.637072486896487, + "learning_rate": 9.32054069531444e-06, + "loss": 2.878903388977051, + "step": 613 + }, + { + "epoch": 0.7561576354679803, + "grad_norm": 10.815449949874669, + "learning_rate": 9.316929546032855e-06, + "loss": 2.568045139312744, + "step": 614 + }, + { + "epoch": 0.7573891625615764, + "grad_norm": 18.206411357576574, + "learning_rate": 9.313309529407773e-06, + "loss": 2.8981618881225586, + "step": 615 + }, + { + "epoch": 0.7586206896551724, + "grad_norm": 14.515670827099761, + "learning_rate": 9.309680652875015e-06, + "loss": 3.3486928939819336, + "step": 616 + }, + { + "epoch": 0.7598522167487685, + "grad_norm": 10.208627841304171, + "learning_rate": 9.306042923888607e-06, + "loss": 3.1101677417755127, + "step": 617 + }, + { + "epoch": 0.7610837438423645, + "grad_norm": 9.545526159427496, + "learning_rate": 9.302396349920756e-06, + "loss": 2.5806779861450195, + "step": 618 + }, + { + "epoch": 0.7623152709359606, + "grad_norm": 14.260459979245976, + "learning_rate": 9.298740938461835e-06, + "loss": 2.678412437438965, + "step": 619 + }, + { + "epoch": 0.7635467980295566, + "grad_norm": 10.808443055524243, + "learning_rate": 9.295076697020378e-06, + "loss": 2.62287974357605, + "step": 620 + }, + { + "epoch": 0.7647783251231527, + "grad_norm": 7.635004154714619, + "learning_rate": 9.291403633123046e-06, + "loss": 3.0267720222473145, + "step": 621 + }, + { + "epoch": 0.7660098522167488, + "grad_norm": 15.707612902426492, + "learning_rate": 9.287721754314629e-06, + "loss": 3.147644281387329, + "step": 622 + }, + { + "epoch": 0.7672413793103449, + "grad_norm": 14.526297785533162, + "learning_rate": 9.284031068158023e-06, + "loss": 3.159574031829834, + "step": 623 + }, + { + "epoch": 0.7684729064039408, + "grad_norm": 13.384426615670701, + "learning_rate": 9.280331582234212e-06, + "loss": 2.6432247161865234, + "step": 624 + }, + { + "epoch": 0.7697044334975369, + "grad_norm": 14.835270706650137, + "learning_rate": 9.27662330414226e-06, + "loss": 3.2058279514312744, + "step": 625 + }, + { + "epoch": 0.770935960591133, + "grad_norm": 10.18160016154191, + "learning_rate": 9.272906241499285e-06, + "loss": 2.787260055541992, + "step": 626 + }, + { + "epoch": 0.7721674876847291, + "grad_norm": 13.10691777443293, + "learning_rate": 9.269180401940455e-06, + "loss": 2.5751729011535645, + "step": 627 + }, + { + "epoch": 0.7733990147783252, + "grad_norm": 31.695378978025254, + "learning_rate": 9.265445793118962e-06, + "loss": 2.7433929443359375, + "step": 628 + }, + { + "epoch": 0.7746305418719212, + "grad_norm": 14.739647225699887, + "learning_rate": 9.261702422706014e-06, + "loss": 2.771510124206543, + "step": 629 + }, + { + "epoch": 0.7758620689655172, + "grad_norm": 10.064291707891675, + "learning_rate": 9.257950298390815e-06, + "loss": 2.873830795288086, + "step": 630 + }, + { + "epoch": 0.7770935960591133, + "grad_norm": 11.389694880244464, + "learning_rate": 9.254189427880548e-06, + "loss": 2.7849340438842773, + "step": 631 + }, + { + "epoch": 0.7783251231527094, + "grad_norm": 9.049096315314397, + "learning_rate": 9.250419818900366e-06, + "loss": 3.1721668243408203, + "step": 632 + }, + { + "epoch": 0.7795566502463054, + "grad_norm": 10.167539529464127, + "learning_rate": 9.24664147919337e-06, + "loss": 2.7493605613708496, + "step": 633 + }, + { + "epoch": 0.7807881773399015, + "grad_norm": 16.15312048584227, + "learning_rate": 9.242854416520591e-06, + "loss": 2.470233917236328, + "step": 634 + }, + { + "epoch": 0.7820197044334976, + "grad_norm": 11.446898989077285, + "learning_rate": 9.239058638660983e-06, + "loss": 2.7109014987945557, + "step": 635 + }, + { + "epoch": 0.7832512315270936, + "grad_norm": 15.265461277758774, + "learning_rate": 9.235254153411394e-06, + "loss": 3.0344791412353516, + "step": 636 + }, + { + "epoch": 0.7844827586206896, + "grad_norm": 12.820354961892846, + "learning_rate": 9.231440968586572e-06, + "loss": 2.381561279296875, + "step": 637 + }, + { + "epoch": 0.7857142857142857, + "grad_norm": 11.033746075983524, + "learning_rate": 9.227619092019116e-06, + "loss": 1.716524362564087, + "step": 638 + }, + { + "epoch": 0.7869458128078818, + "grad_norm": 36.36927433118522, + "learning_rate": 9.223788531559495e-06, + "loss": 2.591820240020752, + "step": 639 + }, + { + "epoch": 0.7881773399014779, + "grad_norm": 22.998289773218893, + "learning_rate": 9.219949295076006e-06, + "loss": 3.0194711685180664, + "step": 640 + }, + { + "epoch": 0.7894088669950738, + "grad_norm": 9.82623401522864, + "learning_rate": 9.216101390454771e-06, + "loss": 2.852489471435547, + "step": 641 + }, + { + "epoch": 0.7906403940886699, + "grad_norm": 16.052245879830704, + "learning_rate": 9.212244825599714e-06, + "loss": 3.1419005393981934, + "step": 642 + }, + { + "epoch": 0.791871921182266, + "grad_norm": 7.825862600095094, + "learning_rate": 9.208379608432552e-06, + "loss": 2.8307576179504395, + "step": 643 + }, + { + "epoch": 0.7931034482758621, + "grad_norm": 8.143984458879574, + "learning_rate": 9.204505746892772e-06, + "loss": 2.581083297729492, + "step": 644 + }, + { + "epoch": 0.7943349753694581, + "grad_norm": 18.48744043986469, + "learning_rate": 9.200623248937619e-06, + "loss": 2.868973731994629, + "step": 645 + }, + { + "epoch": 0.7955665024630542, + "grad_norm": 8.257209013058233, + "learning_rate": 9.196732122542073e-06, + "loss": 2.8063859939575195, + "step": 646 + }, + { + "epoch": 0.7967980295566502, + "grad_norm": 12.8457758247775, + "learning_rate": 9.192832375698845e-06, + "loss": 2.990504264831543, + "step": 647 + }, + { + "epoch": 0.7980295566502463, + "grad_norm": 15.29216631759892, + "learning_rate": 9.18892401641835e-06, + "loss": 2.390320301055908, + "step": 648 + }, + { + "epoch": 0.7992610837438424, + "grad_norm": 10.724837816433517, + "learning_rate": 9.185007052728689e-06, + "loss": 2.671368360519409, + "step": 649 + }, + { + "epoch": 0.8004926108374384, + "grad_norm": 34.65249876179552, + "learning_rate": 9.181081492675645e-06, + "loss": 3.259225845336914, + "step": 650 + }, + { + "epoch": 0.8017241379310345, + "grad_norm": 15.454469742488547, + "learning_rate": 9.177147344322651e-06, + "loss": 2.6810710430145264, + "step": 651 + }, + { + "epoch": 0.8029556650246306, + "grad_norm": 11.530365704888945, + "learning_rate": 9.173204615750792e-06, + "loss": 2.833371162414551, + "step": 652 + }, + { + "epoch": 0.8041871921182266, + "grad_norm": 16.732932575361076, + "learning_rate": 9.169253315058764e-06, + "loss": 2.3488945960998535, + "step": 653 + }, + { + "epoch": 0.8054187192118226, + "grad_norm": 9.726564803680413, + "learning_rate": 9.165293450362882e-06, + "loss": 2.609282970428467, + "step": 654 + }, + { + "epoch": 0.8066502463054187, + "grad_norm": 7.091881545178562, + "learning_rate": 9.161325029797044e-06, + "loss": 2.536142587661743, + "step": 655 + }, + { + "epoch": 0.8078817733990148, + "grad_norm": 9.986592341017682, + "learning_rate": 9.157348061512728e-06, + "loss": 2.7175073623657227, + "step": 656 + }, + { + "epoch": 0.8091133004926109, + "grad_norm": 8.682128121343633, + "learning_rate": 9.153362553678967e-06, + "loss": 2.99211049079895, + "step": 657 + }, + { + "epoch": 0.8103448275862069, + "grad_norm": 9.322932294885456, + "learning_rate": 9.149368514482337e-06, + "loss": 2.9390807151794434, + "step": 658 + }, + { + "epoch": 0.8115763546798029, + "grad_norm": 18.322306761451276, + "learning_rate": 9.145365952126937e-06, + "loss": 3.0422894954681396, + "step": 659 + }, + { + "epoch": 0.812807881773399, + "grad_norm": 13.085537087984829, + "learning_rate": 9.141354874834372e-06, + "loss": 3.0573301315307617, + "step": 660 + }, + { + "epoch": 0.8140394088669951, + "grad_norm": 11.125925990068074, + "learning_rate": 9.13733529084374e-06, + "loss": 2.5086781978607178, + "step": 661 + }, + { + "epoch": 0.8152709359605911, + "grad_norm": 12.865460326379043, + "learning_rate": 9.13330720841161e-06, + "loss": 2.858813762664795, + "step": 662 + }, + { + "epoch": 0.8165024630541872, + "grad_norm": 16.68197454357427, + "learning_rate": 9.129270635812013e-06, + "loss": 2.6715052127838135, + "step": 663 + }, + { + "epoch": 0.8177339901477833, + "grad_norm": 8.328828299636488, + "learning_rate": 9.125225581336408e-06, + "loss": 3.18508243560791, + "step": 664 + }, + { + "epoch": 0.8189655172413793, + "grad_norm": 12.129831350250795, + "learning_rate": 9.12117205329369e-06, + "loss": 3.0426509380340576, + "step": 665 + }, + { + "epoch": 0.8201970443349754, + "grad_norm": 10.31532455027376, + "learning_rate": 9.11711006001015e-06, + "loss": 2.8654000759124756, + "step": 666 + }, + { + "epoch": 0.8214285714285714, + "grad_norm": 22.312769944556898, + "learning_rate": 9.113039609829472e-06, + "loss": 3.141207695007324, + "step": 667 + }, + { + "epoch": 0.8226600985221675, + "grad_norm": 9.864189257198062, + "learning_rate": 9.108960711112709e-06, + "loss": 2.3188462257385254, + "step": 668 + }, + { + "epoch": 0.8238916256157636, + "grad_norm": 7.227847497482275, + "learning_rate": 9.104873372238269e-06, + "loss": 2.785968542098999, + "step": 669 + }, + { + "epoch": 0.8251231527093597, + "grad_norm": 11.651688072805056, + "learning_rate": 9.100777601601896e-06, + "loss": 3.0693092346191406, + "step": 670 + }, + { + "epoch": 0.8263546798029556, + "grad_norm": 14.359029220301974, + "learning_rate": 9.096673407616656e-06, + "loss": 3.038943290710449, + "step": 671 + }, + { + "epoch": 0.8275862068965517, + "grad_norm": 11.367718044029667, + "learning_rate": 9.092560798712913e-06, + "loss": 3.259847640991211, + "step": 672 + }, + { + "epoch": 0.8288177339901478, + "grad_norm": 7.44988788267686, + "learning_rate": 9.08843978333832e-06, + "loss": 2.8227295875549316, + "step": 673 + }, + { + "epoch": 0.8300492610837439, + "grad_norm": 11.316814915640423, + "learning_rate": 9.084310369957795e-06, + "loss": 3.373309850692749, + "step": 674 + }, + { + "epoch": 0.8312807881773399, + "grad_norm": 8.828902957926932, + "learning_rate": 9.08017256705351e-06, + "loss": 3.2833662033081055, + "step": 675 + }, + { + "epoch": 0.8325123152709359, + "grad_norm": 26.42438693311499, + "learning_rate": 9.076026383124863e-06, + "loss": 2.7175965309143066, + "step": 676 + }, + { + "epoch": 0.833743842364532, + "grad_norm": 15.34429558424053, + "learning_rate": 9.071871826688472e-06, + "loss": 2.594611167907715, + "step": 677 + }, + { + "epoch": 0.8349753694581281, + "grad_norm": 23.79233069504134, + "learning_rate": 9.067708906278155e-06, + "loss": 2.8605175018310547, + "step": 678 + }, + { + "epoch": 0.8362068965517241, + "grad_norm": 16.81935056764866, + "learning_rate": 9.063537630444903e-06, + "loss": 2.1438748836517334, + "step": 679 + }, + { + "epoch": 0.8374384236453202, + "grad_norm": 10.888612008792562, + "learning_rate": 9.05935800775688e-06, + "loss": 2.8170299530029297, + "step": 680 + }, + { + "epoch": 0.8386699507389163, + "grad_norm": 14.167748893628115, + "learning_rate": 9.055170046799386e-06, + "loss": 1.7328954935073853, + "step": 681 + }, + { + "epoch": 0.8399014778325123, + "grad_norm": 9.011227940975711, + "learning_rate": 9.050973756174852e-06, + "loss": 2.8324766159057617, + "step": 682 + }, + { + "epoch": 0.8411330049261084, + "grad_norm": 10.1469630150836, + "learning_rate": 9.046769144502818e-06, + "loss": 2.805690288543701, + "step": 683 + }, + { + "epoch": 0.8423645320197044, + "grad_norm": 18.955236663194235, + "learning_rate": 9.04255622041992e-06, + "loss": 2.1270194053649902, + "step": 684 + }, + { + "epoch": 0.8435960591133005, + "grad_norm": 15.32094380068091, + "learning_rate": 9.038334992579863e-06, + "loss": 2.8757829666137695, + "step": 685 + }, + { + "epoch": 0.8448275862068966, + "grad_norm": 11.38695715200097, + "learning_rate": 9.034105469653412e-06, + "loss": 2.84549617767334, + "step": 686 + }, + { + "epoch": 0.8460591133004927, + "grad_norm": 9.897557814234148, + "learning_rate": 9.029867660328369e-06, + "loss": 2.4058642387390137, + "step": 687 + }, + { + "epoch": 0.8472906403940886, + "grad_norm": 11.793589267069729, + "learning_rate": 9.025621573309559e-06, + "loss": 3.2583184242248535, + "step": 688 + }, + { + "epoch": 0.8485221674876847, + "grad_norm": 16.425935376287054, + "learning_rate": 9.021367217318808e-06, + "loss": 2.951143264770508, + "step": 689 + }, + { + "epoch": 0.8497536945812808, + "grad_norm": 23.876213749579968, + "learning_rate": 9.017104601094927e-06, + "loss": 3.0142836570739746, + "step": 690 + }, + { + "epoch": 0.8509852216748769, + "grad_norm": 6.8041557155789345, + "learning_rate": 9.012833733393697e-06, + "loss": 2.7629013061523438, + "step": 691 + }, + { + "epoch": 0.8522167487684729, + "grad_norm": 12.775266706976657, + "learning_rate": 9.008554622987845e-06, + "loss": 2.6153712272644043, + "step": 692 + }, + { + "epoch": 0.853448275862069, + "grad_norm": 10.104362674966435, + "learning_rate": 9.004267278667032e-06, + "loss": 2.7227087020874023, + "step": 693 + }, + { + "epoch": 0.854679802955665, + "grad_norm": 10.955806195385584, + "learning_rate": 8.999971709237832e-06, + "loss": 2.7320899963378906, + "step": 694 + }, + { + "epoch": 0.8559113300492611, + "grad_norm": 9.04416662510961, + "learning_rate": 8.99566792352371e-06, + "loss": 2.4416356086730957, + "step": 695 + }, + { + "epoch": 0.8571428571428571, + "grad_norm": 23.838296750423428, + "learning_rate": 8.991355930365013e-06, + "loss": 3.251642942428589, + "step": 696 + }, + { + "epoch": 0.8583743842364532, + "grad_norm": 46.67562045008053, + "learning_rate": 8.987035738618943e-06, + "loss": 2.9292666912078857, + "step": 697 + }, + { + "epoch": 0.8596059113300493, + "grad_norm": 16.120654552226135, + "learning_rate": 8.982707357159549e-06, + "loss": 2.804452896118164, + "step": 698 + }, + { + "epoch": 0.8608374384236454, + "grad_norm": 9.903594099304835, + "learning_rate": 8.978370794877691e-06, + "loss": 2.4997687339782715, + "step": 699 + }, + { + "epoch": 0.8620689655172413, + "grad_norm": 43.24532276513338, + "learning_rate": 8.974026060681044e-06, + "loss": 2.459716558456421, + "step": 700 + }, + { + "epoch": 0.8633004926108374, + "grad_norm": 6.407514764745252, + "learning_rate": 8.969673163494063e-06, + "loss": 2.57291316986084, + "step": 701 + }, + { + "epoch": 0.8645320197044335, + "grad_norm": 9.925965111489338, + "learning_rate": 8.965312112257973e-06, + "loss": 2.6452269554138184, + "step": 702 + }, + { + "epoch": 0.8657635467980296, + "grad_norm": 15.666974346483006, + "learning_rate": 8.960942915930749e-06, + "loss": 2.4361040592193604, + "step": 703 + }, + { + "epoch": 0.8669950738916257, + "grad_norm": 12.205200732214369, + "learning_rate": 8.956565583487092e-06, + "loss": 2.819046974182129, + "step": 704 + }, + { + "epoch": 0.8682266009852216, + "grad_norm": 23.813445037945687, + "learning_rate": 8.952180123918419e-06, + "loss": 3.536510944366455, + "step": 705 + }, + { + "epoch": 0.8694581280788177, + "grad_norm": 19.455220333084014, + "learning_rate": 8.94778654623284e-06, + "loss": 3.340855121612549, + "step": 706 + }, + { + "epoch": 0.8706896551724138, + "grad_norm": 15.988003472296347, + "learning_rate": 8.94338485945514e-06, + "loss": 2.7881288528442383, + "step": 707 + }, + { + "epoch": 0.8719211822660099, + "grad_norm": 18.44911045759373, + "learning_rate": 8.938975072626762e-06, + "loss": 3.119422197341919, + "step": 708 + }, + { + "epoch": 0.8731527093596059, + "grad_norm": 18.233236078041163, + "learning_rate": 8.934557194805787e-06, + "loss": 2.694553852081299, + "step": 709 + }, + { + "epoch": 0.874384236453202, + "grad_norm": 13.897466836595251, + "learning_rate": 8.930131235066914e-06, + "loss": 2.7162301540374756, + "step": 710 + }, + { + "epoch": 0.875615763546798, + "grad_norm": 9.86969530883223, + "learning_rate": 8.925697202501442e-06, + "loss": 2.4017574787139893, + "step": 711 + }, + { + "epoch": 0.8768472906403941, + "grad_norm": 22.07024366462836, + "learning_rate": 8.92125510621726e-06, + "loss": 2.491663932800293, + "step": 712 + }, + { + "epoch": 0.8780788177339901, + "grad_norm": 9.704458797982127, + "learning_rate": 8.916804955338807e-06, + "loss": 3.09323787689209, + "step": 713 + }, + { + "epoch": 0.8793103448275862, + "grad_norm": 14.245234888372442, + "learning_rate": 8.91234675900708e-06, + "loss": 3.0273964405059814, + "step": 714 + }, + { + "epoch": 0.8805418719211823, + "grad_norm": 10.033605733175728, + "learning_rate": 8.907880526379594e-06, + "loss": 2.5009701251983643, + "step": 715 + }, + { + "epoch": 0.8817733990147784, + "grad_norm": 14.04261929200788, + "learning_rate": 8.903406266630374e-06, + "loss": 2.7629752159118652, + "step": 716 + }, + { + "epoch": 0.8830049261083743, + "grad_norm": 19.00265649950274, + "learning_rate": 8.898923988949936e-06, + "loss": 2.5285563468933105, + "step": 717 + }, + { + "epoch": 0.8842364532019704, + "grad_norm": 11.293266358312355, + "learning_rate": 8.89443370254526e-06, + "loss": 2.6903738975524902, + "step": 718 + }, + { + "epoch": 0.8854679802955665, + "grad_norm": 4.918527502448237, + "learning_rate": 8.88993541663978e-06, + "loss": 2.8083925247192383, + "step": 719 + }, + { + "epoch": 0.8866995073891626, + "grad_norm": 14.900444889845339, + "learning_rate": 8.885429140473361e-06, + "loss": 3.0920486450195312, + "step": 720 + }, + { + "epoch": 0.8879310344827587, + "grad_norm": 15.55585461742265, + "learning_rate": 8.880914883302278e-06, + "loss": 2.7464776039123535, + "step": 721 + }, + { + "epoch": 0.8891625615763546, + "grad_norm": 28.218307852720514, + "learning_rate": 8.876392654399208e-06, + "loss": 2.7022242546081543, + "step": 722 + }, + { + "epoch": 0.8903940886699507, + "grad_norm": 7.9907639594026385, + "learning_rate": 8.871862463053193e-06, + "loss": 3.202090263366699, + "step": 723 + }, + { + "epoch": 0.8916256157635468, + "grad_norm": 12.370662746549176, + "learning_rate": 8.867324318569637e-06, + "loss": 2.792590856552124, + "step": 724 + }, + { + "epoch": 0.8928571428571429, + "grad_norm": 12.485149742498526, + "learning_rate": 8.862778230270276e-06, + "loss": 2.8918404579162598, + "step": 725 + }, + { + "epoch": 0.8940886699507389, + "grad_norm": 17.523163987955954, + "learning_rate": 8.858224207493165e-06, + "loss": 2.881380081176758, + "step": 726 + }, + { + "epoch": 0.895320197044335, + "grad_norm": 10.929446497515306, + "learning_rate": 8.85366225959266e-06, + "loss": 2.7197518348693848, + "step": 727 + }, + { + "epoch": 0.896551724137931, + "grad_norm": 14.58273441890301, + "learning_rate": 8.849092395939388e-06, + "loss": 2.8458380699157715, + "step": 728 + }, + { + "epoch": 0.8977832512315271, + "grad_norm": 9.240130544994555, + "learning_rate": 8.844514625920246e-06, + "loss": 2.5815629959106445, + "step": 729 + }, + { + "epoch": 0.8990147783251231, + "grad_norm": 12.536324929930204, + "learning_rate": 8.839928958938364e-06, + "loss": 2.388244867324829, + "step": 730 + }, + { + "epoch": 0.9002463054187192, + "grad_norm": 9.268565736662921, + "learning_rate": 8.835335404413096e-06, + "loss": 2.678809404373169, + "step": 731 + }, + { + "epoch": 0.9014778325123153, + "grad_norm": 13.664345931125762, + "learning_rate": 8.830733971779996e-06, + "loss": 3.4926984310150146, + "step": 732 + }, + { + "epoch": 0.9027093596059114, + "grad_norm": 8.38741339708261, + "learning_rate": 8.826124670490804e-06, + "loss": 3.143955707550049, + "step": 733 + }, + { + "epoch": 0.9039408866995073, + "grad_norm": 8.285169477267281, + "learning_rate": 8.821507510013416e-06, + "loss": 2.30763840675354, + "step": 734 + }, + { + "epoch": 0.9051724137931034, + "grad_norm": 11.658087999854533, + "learning_rate": 8.816882499831877e-06, + "loss": 3.2019965648651123, + "step": 735 + }, + { + "epoch": 0.9064039408866995, + "grad_norm": 11.03286006250671, + "learning_rate": 8.812249649446357e-06, + "loss": 2.5554118156433105, + "step": 736 + }, + { + "epoch": 0.9076354679802956, + "grad_norm": 10.468019775536181, + "learning_rate": 8.807608968373123e-06, + "loss": 2.6560721397399902, + "step": 737 + }, + { + "epoch": 0.9088669950738916, + "grad_norm": 21.753543318554573, + "learning_rate": 8.802960466144537e-06, + "loss": 3.2792091369628906, + "step": 738 + }, + { + "epoch": 0.9100985221674877, + "grad_norm": 8.801113008077715, + "learning_rate": 8.798304152309019e-06, + "loss": 2.4306914806365967, + "step": 739 + }, + { + "epoch": 0.9113300492610837, + "grad_norm": 11.427047186823343, + "learning_rate": 8.793640036431036e-06, + "loss": 2.791334867477417, + "step": 740 + }, + { + "epoch": 0.9125615763546798, + "grad_norm": 11.78168946860072, + "learning_rate": 8.788968128091084e-06, + "loss": 2.8516879081726074, + "step": 741 + }, + { + "epoch": 0.9137931034482759, + "grad_norm": 18.40294226204317, + "learning_rate": 8.784288436885663e-06, + "loss": 2.783674716949463, + "step": 742 + }, + { + "epoch": 0.9150246305418719, + "grad_norm": 9.042045966372719, + "learning_rate": 8.779600972427257e-06, + "loss": 2.538564443588257, + "step": 743 + }, + { + "epoch": 0.916256157635468, + "grad_norm": 21.11608056647587, + "learning_rate": 8.774905744344326e-06, + "loss": 2.603914260864258, + "step": 744 + }, + { + "epoch": 0.9174876847290641, + "grad_norm": 18.991966127623154, + "learning_rate": 8.770202762281267e-06, + "loss": 2.6232197284698486, + "step": 745 + }, + { + "epoch": 0.9187192118226601, + "grad_norm": 9.533961363388334, + "learning_rate": 8.765492035898406e-06, + "loss": 2.586906671524048, + "step": 746 + }, + { + "epoch": 0.9199507389162561, + "grad_norm": 11.702571386481814, + "learning_rate": 8.760773574871985e-06, + "loss": 3.019075870513916, + "step": 747 + }, + { + "epoch": 0.9211822660098522, + "grad_norm": 13.549959986762131, + "learning_rate": 8.756047388894123e-06, + "loss": 2.6554617881774902, + "step": 748 + }, + { + "epoch": 0.9224137931034483, + "grad_norm": 10.617389263376301, + "learning_rate": 8.751313487672815e-06, + "loss": 3.3622567653656006, + "step": 749 + }, + { + "epoch": 0.9236453201970444, + "grad_norm": 15.62971817318244, + "learning_rate": 8.746571880931896e-06, + "loss": 2.748253345489502, + "step": 750 + }, + { + "epoch": 0.9248768472906403, + "grad_norm": 10.680533586135248, + "learning_rate": 8.741822578411036e-06, + "loss": 3.358571767807007, + "step": 751 + }, + { + "epoch": 0.9261083743842364, + "grad_norm": 8.513871800316197, + "learning_rate": 8.737065589865709e-06, + "loss": 2.707146167755127, + "step": 752 + }, + { + "epoch": 0.9273399014778325, + "grad_norm": 15.06206429941032, + "learning_rate": 8.732300925067177e-06, + "loss": 2.782027006149292, + "step": 753 + }, + { + "epoch": 0.9285714285714286, + "grad_norm": 13.377969237833796, + "learning_rate": 8.727528593802469e-06, + "loss": 2.758582830429077, + "step": 754 + }, + { + "epoch": 0.9298029556650246, + "grad_norm": 12.5189792863405, + "learning_rate": 8.722748605874365e-06, + "loss": 2.798398971557617, + "step": 755 + }, + { + "epoch": 0.9310344827586207, + "grad_norm": 7.0237993457565056, + "learning_rate": 8.717960971101367e-06, + "loss": 2.8893141746520996, + "step": 756 + }, + { + "epoch": 0.9322660098522167, + "grad_norm": 13.108491345078546, + "learning_rate": 8.71316569931769e-06, + "loss": 2.8260703086853027, + "step": 757 + }, + { + "epoch": 0.9334975369458128, + "grad_norm": 13.669452983841648, + "learning_rate": 8.708362800373235e-06, + "loss": 2.8373727798461914, + "step": 758 + }, + { + "epoch": 0.9347290640394089, + "grad_norm": 9.979755254671996, + "learning_rate": 8.703552284133565e-06, + "loss": 2.7638840675354004, + "step": 759 + }, + { + "epoch": 0.9359605911330049, + "grad_norm": 12.948663627163679, + "learning_rate": 8.698734160479892e-06, + "loss": 3.436288833618164, + "step": 760 + }, + { + "epoch": 0.937192118226601, + "grad_norm": 11.570964225425659, + "learning_rate": 8.69390843930906e-06, + "loss": 2.9463398456573486, + "step": 761 + }, + { + "epoch": 0.9384236453201971, + "grad_norm": 7.2963116550893945, + "learning_rate": 8.68907513053351e-06, + "loss": 2.8301844596862793, + "step": 762 + }, + { + "epoch": 0.9396551724137931, + "grad_norm": 22.281531901716622, + "learning_rate": 8.684234244081274e-06, + "loss": 2.329922676086426, + "step": 763 + }, + { + "epoch": 0.9408866995073891, + "grad_norm": 7.190935942786577, + "learning_rate": 8.67938578989595e-06, + "loss": 2.2752580642700195, + "step": 764 + }, + { + "epoch": 0.9421182266009852, + "grad_norm": 15.09705330042877, + "learning_rate": 8.674529777936674e-06, + "loss": 2.549682378768921, + "step": 765 + }, + { + "epoch": 0.9433497536945813, + "grad_norm": 12.2992067648861, + "learning_rate": 8.669666218178114e-06, + "loss": 2.177875518798828, + "step": 766 + }, + { + "epoch": 0.9445812807881774, + "grad_norm": 17.93631082058447, + "learning_rate": 8.66479512061044e-06, + "loss": 3.4030704498291016, + "step": 767 + }, + { + "epoch": 0.9458128078817734, + "grad_norm": 12.986753736790972, + "learning_rate": 8.659916495239302e-06, + "loss": 2.8890881538391113, + "step": 768 + }, + { + "epoch": 0.9470443349753694, + "grad_norm": 7.80817017570662, + "learning_rate": 8.655030352085816e-06, + "loss": 2.6665287017822266, + "step": 769 + }, + { + "epoch": 0.9482758620689655, + "grad_norm": 8.892699708308717, + "learning_rate": 8.650136701186537e-06, + "loss": 2.8044798374176025, + "step": 770 + }, + { + "epoch": 0.9495073891625616, + "grad_norm": 12.053681412169821, + "learning_rate": 8.645235552593447e-06, + "loss": 2.809295654296875, + "step": 771 + }, + { + "epoch": 0.9507389162561576, + "grad_norm": 9.563242350440067, + "learning_rate": 8.640326916373923e-06, + "loss": 2.66239070892334, + "step": 772 + }, + { + "epoch": 0.9519704433497537, + "grad_norm": 11.397593157331492, + "learning_rate": 8.635410802610724e-06, + "loss": 3.0714645385742188, + "step": 773 + }, + { + "epoch": 0.9532019704433498, + "grad_norm": 11.141014900339497, + "learning_rate": 8.630487221401974e-06, + "loss": 2.5254178047180176, + "step": 774 + }, + { + "epoch": 0.9544334975369458, + "grad_norm": 61.411465635020065, + "learning_rate": 8.625556182861126e-06, + "loss": 2.4160585403442383, + "step": 775 + }, + { + "epoch": 0.9556650246305419, + "grad_norm": 15.426050261321397, + "learning_rate": 8.620617697116957e-06, + "loss": 2.972367763519287, + "step": 776 + }, + { + "epoch": 0.9568965517241379, + "grad_norm": 11.628713988566439, + "learning_rate": 8.615671774313543e-06, + "loss": 2.9206340312957764, + "step": 777 + }, + { + "epoch": 0.958128078817734, + "grad_norm": 9.967877704713992, + "learning_rate": 8.61071842461023e-06, + "loss": 3.192002296447754, + "step": 778 + }, + { + "epoch": 0.9593596059113301, + "grad_norm": 8.547648553030225, + "learning_rate": 8.605757658181626e-06, + "loss": 3.0840883255004883, + "step": 779 + }, + { + "epoch": 0.9605911330049262, + "grad_norm": 16.72939304902535, + "learning_rate": 8.60078948521757e-06, + "loss": 3.344426155090332, + "step": 780 + }, + { + "epoch": 0.9618226600985221, + "grad_norm": 14.860196885671575, + "learning_rate": 8.595813915923113e-06, + "loss": 2.887132406234741, + "step": 781 + }, + { + "epoch": 0.9630541871921182, + "grad_norm": 16.504287008501006, + "learning_rate": 8.590830960518502e-06, + "loss": 2.354299306869507, + "step": 782 + }, + { + "epoch": 0.9642857142857143, + "grad_norm": 14.601237072457945, + "learning_rate": 8.585840629239158e-06, + "loss": 2.574817657470703, + "step": 783 + }, + { + "epoch": 0.9655172413793104, + "grad_norm": 13.581762855163804, + "learning_rate": 8.580842932335644e-06, + "loss": 2.3363120555877686, + "step": 784 + }, + { + "epoch": 0.9667487684729064, + "grad_norm": 8.025263413179824, + "learning_rate": 8.575837880073663e-06, + "loss": 2.452828884124756, + "step": 785 + }, + { + "epoch": 0.9679802955665024, + "grad_norm": 13.65572211743131, + "learning_rate": 8.57082548273402e-06, + "loss": 2.8182177543640137, + "step": 786 + }, + { + "epoch": 0.9692118226600985, + "grad_norm": 22.799475456448384, + "learning_rate": 8.565805750612607e-06, + "loss": 3.2871310710906982, + "step": 787 + }, + { + "epoch": 0.9704433497536946, + "grad_norm": 18.807286124868686, + "learning_rate": 8.560778694020387e-06, + "loss": 2.959153175354004, + "step": 788 + }, + { + "epoch": 0.9716748768472906, + "grad_norm": 10.644957881123116, + "learning_rate": 8.555744323283364e-06, + "loss": 2.859107732772827, + "step": 789 + }, + { + "epoch": 0.9729064039408867, + "grad_norm": 9.606245608690044, + "learning_rate": 8.550702648742566e-06, + "loss": 2.8537421226501465, + "step": 790 + }, + { + "epoch": 0.9741379310344828, + "grad_norm": 11.364684038946328, + "learning_rate": 8.545653680754029e-06, + "loss": 2.77693772315979, + "step": 791 + }, + { + "epoch": 0.9753694581280788, + "grad_norm": 14.67534992412754, + "learning_rate": 8.540597429688761e-06, + "loss": 2.6960999965667725, + "step": 792 + }, + { + "epoch": 0.9766009852216748, + "grad_norm": 14.854511519014162, + "learning_rate": 8.535533905932739e-06, + "loss": 3.3942298889160156, + "step": 793 + }, + { + "epoch": 0.9778325123152709, + "grad_norm": 14.090660071520212, + "learning_rate": 8.530463119886871e-06, + "loss": 2.8664398193359375, + "step": 794 + }, + { + "epoch": 0.979064039408867, + "grad_norm": 15.427403822127253, + "learning_rate": 8.525385081966992e-06, + "loss": 3.023148536682129, + "step": 795 + }, + { + "epoch": 0.9802955665024631, + "grad_norm": 27.257958140053717, + "learning_rate": 8.520299802603826e-06, + "loss": 2.7858657836914062, + "step": 796 + }, + { + "epoch": 0.9815270935960592, + "grad_norm": 9.983005237782791, + "learning_rate": 8.515207292242969e-06, + "loss": 2.4665451049804688, + "step": 797 + }, + { + "epoch": 0.9827586206896551, + "grad_norm": 11.230050254551738, + "learning_rate": 8.510107561344876e-06, + "loss": 2.412269115447998, + "step": 798 + }, + { + "epoch": 0.9839901477832512, + "grad_norm": 18.314579409480903, + "learning_rate": 8.505000620384834e-06, + "loss": 3.08200740814209, + "step": 799 + }, + { + "epoch": 0.9852216748768473, + "grad_norm": 12.337382000838234, + "learning_rate": 8.499886479852935e-06, + "loss": 2.851126194000244, + "step": 800 + }, + { + "epoch": 0.9864532019704434, + "grad_norm": 16.588814488060716, + "learning_rate": 8.494765150254063e-06, + "loss": 2.7692008018493652, + "step": 801 + }, + { + "epoch": 0.9876847290640394, + "grad_norm": 10.778667289136193, + "learning_rate": 8.489636642107867e-06, + "loss": 2.045649290084839, + "step": 802 + }, + { + "epoch": 0.9889162561576355, + "grad_norm": 16.235817598925898, + "learning_rate": 8.484500965948746e-06, + "loss": 3.0901870727539062, + "step": 803 + }, + { + "epoch": 0.9901477832512315, + "grad_norm": 12.772148604340376, + "learning_rate": 8.479358132325815e-06, + "loss": 4.652253150939941, + "step": 804 + }, + { + "epoch": 0.9913793103448276, + "grad_norm": 30.743685192648066, + "learning_rate": 8.474208151802898e-06, + "loss": 3.992189884185791, + "step": 805 + }, + { + "epoch": 0.9926108374384236, + "grad_norm": 8.73281768145785, + "learning_rate": 8.469051034958496e-06, + "loss": 2.7150464057922363, + "step": 806 + }, + { + "epoch": 0.9938423645320197, + "grad_norm": 9.053303002827397, + "learning_rate": 8.46388679238577e-06, + "loss": 2.807770013809204, + "step": 807 + }, + { + "epoch": 0.9950738916256158, + "grad_norm": 10.322870900342917, + "learning_rate": 8.458715434692515e-06, + "loss": 2.386625289916992, + "step": 808 + }, + { + "epoch": 0.9963054187192119, + "grad_norm": 11.08968761753187, + "learning_rate": 8.453536972501146e-06, + "loss": 2.585855484008789, + "step": 809 + }, + { + "epoch": 0.9975369458128078, + "grad_norm": 17.867602225530977, + "learning_rate": 8.448351416448664e-06, + "loss": 1.9756630659103394, + "step": 810 + }, + { + "epoch": 0.9987684729064039, + "grad_norm": 10.119397987976452, + "learning_rate": 8.443158777186652e-06, + "loss": 2.844794511795044, + "step": 811 + }, + { + "epoch": 1.0, + "grad_norm": 7.980679156666685, + "learning_rate": 8.437959065381232e-06, + "loss": 2.8835721015930176, + "step": 812 + }, + { + "epoch": 1.001231527093596, + "grad_norm": 7.910274895398585, + "learning_rate": 8.432752291713058e-06, + "loss": 1.4173179864883423, + "step": 813 + }, + { + "epoch": 1.0024630541871922, + "grad_norm": 11.748384071481883, + "learning_rate": 8.427538466877294e-06, + "loss": 1.3743655681610107, + "step": 814 + }, + { + "epoch": 1.0036945812807883, + "grad_norm": 15.520903995356328, + "learning_rate": 8.422317601583576e-06, + "loss": 1.448968768119812, + "step": 815 + }, + { + "epoch": 1.0049261083743843, + "grad_norm": 10.900297712673185, + "learning_rate": 8.417089706556015e-06, + "loss": 1.4555410146713257, + "step": 816 + }, + { + "epoch": 1.0061576354679802, + "grad_norm": 14.944365989075473, + "learning_rate": 8.411854792533154e-06, + "loss": 1.3096075057983398, + "step": 817 + }, + { + "epoch": 1.0073891625615763, + "grad_norm": 28.47454569698464, + "learning_rate": 8.406612870267957e-06, + "loss": 1.8452348709106445, + "step": 818 + }, + { + "epoch": 1.0086206896551724, + "grad_norm": 15.756002610301957, + "learning_rate": 8.401363950527777e-06, + "loss": 1.6339285373687744, + "step": 819 + }, + { + "epoch": 1.0098522167487685, + "grad_norm": 6.289340790151406, + "learning_rate": 8.39610804409435e-06, + "loss": 1.714133381843567, + "step": 820 + }, + { + "epoch": 1.0110837438423645, + "grad_norm": 11.713574774158978, + "learning_rate": 8.390845161763756e-06, + "loss": 1.7810550928115845, + "step": 821 + }, + { + "epoch": 1.0123152709359606, + "grad_norm": 13.688437053039554, + "learning_rate": 8.385575314346408e-06, + "loss": 1.2523250579833984, + "step": 822 + }, + { + "epoch": 1.0135467980295567, + "grad_norm": 9.835238587520983, + "learning_rate": 8.380298512667023e-06, + "loss": 1.4618515968322754, + "step": 823 + }, + { + "epoch": 1.0147783251231528, + "grad_norm": 12.580368500055666, + "learning_rate": 8.375014767564606e-06, + "loss": 1.5188508033752441, + "step": 824 + }, + { + "epoch": 1.0160098522167487, + "grad_norm": 13.76649655840591, + "learning_rate": 8.369724089892423e-06, + "loss": 1.3847301006317139, + "step": 825 + }, + { + "epoch": 1.0172413793103448, + "grad_norm": 10.435853268719002, + "learning_rate": 8.364426490517978e-06, + "loss": 1.2926149368286133, + "step": 826 + }, + { + "epoch": 1.0184729064039408, + "grad_norm": 16.445003227804108, + "learning_rate": 8.359121980322992e-06, + "loss": 2.3063907623291016, + "step": 827 + }, + { + "epoch": 1.019704433497537, + "grad_norm": 11.557235656795728, + "learning_rate": 8.353810570203392e-06, + "loss": 1.8268505334854126, + "step": 828 + }, + { + "epoch": 1.020935960591133, + "grad_norm": 14.632274264873946, + "learning_rate": 8.34849227106926e-06, + "loss": 1.7018903493881226, + "step": 829 + }, + { + "epoch": 1.022167487684729, + "grad_norm": 11.600489411721503, + "learning_rate": 8.343167093844847e-06, + "loss": 1.228044867515564, + "step": 830 + }, + { + "epoch": 1.0233990147783252, + "grad_norm": 16.088239405853525, + "learning_rate": 8.337835049468517e-06, + "loss": 1.8953372240066528, + "step": 831 + }, + { + "epoch": 1.0246305418719213, + "grad_norm": 18.96191614490354, + "learning_rate": 8.332496148892748e-06, + "loss": 2.2595765590667725, + "step": 832 + }, + { + "epoch": 1.0258620689655173, + "grad_norm": 15.40920733163635, + "learning_rate": 8.327150403084105e-06, + "loss": 1.9772108793258667, + "step": 833 + }, + { + "epoch": 1.0270935960591132, + "grad_norm": 13.682030994380478, + "learning_rate": 8.321797823023201e-06, + "loss": 1.6397690773010254, + "step": 834 + }, + { + "epoch": 1.0283251231527093, + "grad_norm": 15.155038881668695, + "learning_rate": 8.3164384197047e-06, + "loss": 1.8092628717422485, + "step": 835 + }, + { + "epoch": 1.0295566502463054, + "grad_norm": 11.138568264810678, + "learning_rate": 8.311072204137272e-06, + "loss": 1.4974594116210938, + "step": 836 + }, + { + "epoch": 1.0307881773399015, + "grad_norm": 12.21109867389211, + "learning_rate": 8.305699187343586e-06, + "loss": 1.6198664903640747, + "step": 837 + }, + { + "epoch": 1.0320197044334976, + "grad_norm": 15.324750685835358, + "learning_rate": 8.300319380360278e-06, + "loss": 1.3746960163116455, + "step": 838 + }, + { + "epoch": 1.0332512315270936, + "grad_norm": 7.824249576144248, + "learning_rate": 8.294932794237936e-06, + "loss": 1.6171293258666992, + "step": 839 + }, + { + "epoch": 1.0344827586206897, + "grad_norm": 8.892333167572344, + "learning_rate": 8.289539440041066e-06, + "loss": 1.569738507270813, + "step": 840 + }, + { + "epoch": 1.0357142857142858, + "grad_norm": 11.852198048161208, + "learning_rate": 8.284139328848083e-06, + "loss": 1.2823517322540283, + "step": 841 + }, + { + "epoch": 1.0369458128078817, + "grad_norm": 8.261136034676777, + "learning_rate": 8.278732471751275e-06, + "loss": 1.646303415298462, + "step": 842 + }, + { + "epoch": 1.0381773399014778, + "grad_norm": 10.756475200770923, + "learning_rate": 8.273318879856794e-06, + "loss": 1.1557375192642212, + "step": 843 + }, + { + "epoch": 1.0394088669950738, + "grad_norm": 11.706598803766697, + "learning_rate": 8.26789856428462e-06, + "loss": 1.8793773651123047, + "step": 844 + }, + { + "epoch": 1.04064039408867, + "grad_norm": 12.96726521358098, + "learning_rate": 8.262471536168547e-06, + "loss": 1.8577170372009277, + "step": 845 + }, + { + "epoch": 1.041871921182266, + "grad_norm": 9.437922676603566, + "learning_rate": 8.257037806656156e-06, + "loss": 1.6104650497436523, + "step": 846 + }, + { + "epoch": 1.043103448275862, + "grad_norm": 9.578661144979, + "learning_rate": 8.251597386908791e-06, + "loss": 1.5425922870635986, + "step": 847 + }, + { + "epoch": 1.0443349753694582, + "grad_norm": 20.263987667471525, + "learning_rate": 8.246150288101544e-06, + "loss": 1.681383728981018, + "step": 848 + }, + { + "epoch": 1.0455665024630543, + "grad_norm": 13.601576634163374, + "learning_rate": 8.240696521423221e-06, + "loss": 1.7646219730377197, + "step": 849 + }, + { + "epoch": 1.0467980295566504, + "grad_norm": 7.679649660703675, + "learning_rate": 8.23523609807633e-06, + "loss": 1.445223331451416, + "step": 850 + }, + { + "epoch": 1.0480295566502462, + "grad_norm": 14.66829985016366, + "learning_rate": 8.229769029277044e-06, + "loss": 0.9492518901824951, + "step": 851 + }, + { + "epoch": 1.0492610837438423, + "grad_norm": 10.487758371701569, + "learning_rate": 8.224295326255194e-06, + "loss": 1.33433997631073, + "step": 852 + }, + { + "epoch": 1.0504926108374384, + "grad_norm": 10.533804685248148, + "learning_rate": 8.218815000254233e-06, + "loss": 1.712221384048462, + "step": 853 + }, + { + "epoch": 1.0517241379310345, + "grad_norm": 9.208819021387981, + "learning_rate": 8.213328062531223e-06, + "loss": 2.256254196166992, + "step": 854 + }, + { + "epoch": 1.0529556650246306, + "grad_norm": 20.4330836347585, + "learning_rate": 8.207834524356804e-06, + "loss": 1.1827871799468994, + "step": 855 + }, + { + "epoch": 1.0541871921182266, + "grad_norm": 16.459676535775454, + "learning_rate": 8.202334397015173e-06, + "loss": 1.831944465637207, + "step": 856 + }, + { + "epoch": 1.0554187192118227, + "grad_norm": 9.540607740889314, + "learning_rate": 8.196827691804066e-06, + "loss": 1.4239716529846191, + "step": 857 + }, + { + "epoch": 1.0566502463054188, + "grad_norm": 8.826612392912715, + "learning_rate": 8.191314420034728e-06, + "loss": 1.4468379020690918, + "step": 858 + }, + { + "epoch": 1.0578817733990147, + "grad_norm": 11.710928299860754, + "learning_rate": 8.185794593031889e-06, + "loss": 1.5082018375396729, + "step": 859 + }, + { + "epoch": 1.0591133004926108, + "grad_norm": 11.098469341339896, + "learning_rate": 8.180268222133748e-06, + "loss": 1.7838118076324463, + "step": 860 + }, + { + "epoch": 1.0603448275862069, + "grad_norm": 14.517325254327519, + "learning_rate": 8.174735318691946e-06, + "loss": 2.0072226524353027, + "step": 861 + }, + { + "epoch": 1.061576354679803, + "grad_norm": 15.816554295123568, + "learning_rate": 8.16919589407154e-06, + "loss": 1.521295189857483, + "step": 862 + }, + { + "epoch": 1.062807881773399, + "grad_norm": 10.07588615463877, + "learning_rate": 8.163649959650983e-06, + "loss": 1.790357232093811, + "step": 863 + }, + { + "epoch": 1.064039408866995, + "grad_norm": 12.92318973646725, + "learning_rate": 8.1580975268221e-06, + "loss": 1.602294683456421, + "step": 864 + }, + { + "epoch": 1.0652709359605912, + "grad_norm": 16.86268483373184, + "learning_rate": 8.152538606990065e-06, + "loss": 1.4220796823501587, + "step": 865 + }, + { + "epoch": 1.0665024630541873, + "grad_norm": 8.194415784575718, + "learning_rate": 8.146973211573378e-06, + "loss": 1.5728261470794678, + "step": 866 + }, + { + "epoch": 1.0677339901477834, + "grad_norm": 9.338981810977407, + "learning_rate": 8.141401352003834e-06, + "loss": 1.4759845733642578, + "step": 867 + }, + { + "epoch": 1.0689655172413792, + "grad_norm": 13.09579029321424, + "learning_rate": 8.135823039726513e-06, + "loss": 1.0524405241012573, + "step": 868 + }, + { + "epoch": 1.0701970443349753, + "grad_norm": 11.844876838448121, + "learning_rate": 8.130238286199747e-06, + "loss": 1.538460373878479, + "step": 869 + }, + { + "epoch": 1.0714285714285714, + "grad_norm": 14.772231246122598, + "learning_rate": 8.124647102895098e-06, + "loss": 1.1455146074295044, + "step": 870 + }, + { + "epoch": 1.0726600985221675, + "grad_norm": 6.428068633502984, + "learning_rate": 8.119049501297336e-06, + "loss": 1.5209722518920898, + "step": 871 + }, + { + "epoch": 1.0738916256157636, + "grad_norm": 8.28556104097166, + "learning_rate": 8.113445492904416e-06, + "loss": 1.359959602355957, + "step": 872 + }, + { + "epoch": 1.0751231527093597, + "grad_norm": 17.73488508571987, + "learning_rate": 8.107835089227446e-06, + "loss": 0.7508935928344727, + "step": 873 + }, + { + "epoch": 1.0763546798029557, + "grad_norm": 11.851747710913228, + "learning_rate": 8.102218301790686e-06, + "loss": 1.1200660467147827, + "step": 874 + }, + { + "epoch": 1.0775862068965518, + "grad_norm": 19.474238137735632, + "learning_rate": 8.096595142131491e-06, + "loss": 1.4502555131912231, + "step": 875 + }, + { + "epoch": 1.0788177339901477, + "grad_norm": 15.231876740076657, + "learning_rate": 8.090965621800317e-06, + "loss": 1.4533472061157227, + "step": 876 + }, + { + "epoch": 1.0800492610837438, + "grad_norm": 11.532100577512736, + "learning_rate": 8.085329752360683e-06, + "loss": 1.3467981815338135, + "step": 877 + }, + { + "epoch": 1.0812807881773399, + "grad_norm": 13.292362259628844, + "learning_rate": 8.079687545389144e-06, + "loss": 1.5720915794372559, + "step": 878 + }, + { + "epoch": 1.082512315270936, + "grad_norm": 9.912980730028881, + "learning_rate": 8.074039012475277e-06, + "loss": 0.9794504642486572, + "step": 879 + }, + { + "epoch": 1.083743842364532, + "grad_norm": 13.363222552608596, + "learning_rate": 8.068384165221657e-06, + "loss": 1.8581080436706543, + "step": 880 + }, + { + "epoch": 1.0849753694581281, + "grad_norm": 11.004102766432679, + "learning_rate": 8.062723015243821e-06, + "loss": 1.5307658910751343, + "step": 881 + }, + { + "epoch": 1.0862068965517242, + "grad_norm": 18.014628524050508, + "learning_rate": 8.05705557417026e-06, + "loss": 2.7890782356262207, + "step": 882 + }, + { + "epoch": 1.0874384236453203, + "grad_norm": 14.288061386453462, + "learning_rate": 8.051381853642385e-06, + "loss": 1.7938904762268066, + "step": 883 + }, + { + "epoch": 1.0886699507389164, + "grad_norm": 10.969422494881371, + "learning_rate": 8.0457018653145e-06, + "loss": 1.7228388786315918, + "step": 884 + }, + { + "epoch": 1.0899014778325122, + "grad_norm": 12.323796763628843, + "learning_rate": 8.04001562085379e-06, + "loss": 1.2761911153793335, + "step": 885 + }, + { + "epoch": 1.0911330049261083, + "grad_norm": 14.027385869484647, + "learning_rate": 8.034323131940288e-06, + "loss": 1.2001762390136719, + "step": 886 + }, + { + "epoch": 1.0923645320197044, + "grad_norm": 14.618738176876956, + "learning_rate": 8.028624410266856e-06, + "loss": 1.0602792501449585, + "step": 887 + }, + { + "epoch": 1.0935960591133005, + "grad_norm": 11.93157233511751, + "learning_rate": 8.022919467539157e-06, + "loss": 1.6093053817749023, + "step": 888 + }, + { + "epoch": 1.0948275862068966, + "grad_norm": 10.808992515441345, + "learning_rate": 8.017208315475633e-06, + "loss": 1.3845837116241455, + "step": 889 + }, + { + "epoch": 1.0960591133004927, + "grad_norm": 12.467752533525676, + "learning_rate": 8.011490965807479e-06, + "loss": 1.170523762702942, + "step": 890 + }, + { + "epoch": 1.0972906403940887, + "grad_norm": 17.336013797078692, + "learning_rate": 8.005767430278619e-06, + "loss": 2.2524640560150146, + "step": 891 + }, + { + "epoch": 1.0985221674876848, + "grad_norm": 15.86628802074285, + "learning_rate": 8.00003772064569e-06, + "loss": 1.900492787361145, + "step": 892 + }, + { + "epoch": 1.0997536945812807, + "grad_norm": 19.413325130840665, + "learning_rate": 7.994301848678006e-06, + "loss": 1.9371180534362793, + "step": 893 + }, + { + "epoch": 1.1009852216748768, + "grad_norm": 4.577148785717797, + "learning_rate": 7.98855982615754e-06, + "loss": 0.5737314224243164, + "step": 894 + }, + { + "epoch": 1.1022167487684729, + "grad_norm": 10.864604119199031, + "learning_rate": 7.982811664878897e-06, + "loss": 1.9806501865386963, + "step": 895 + }, + { + "epoch": 1.103448275862069, + "grad_norm": 8.224536911257772, + "learning_rate": 7.977057376649295e-06, + "loss": 1.0362755060195923, + "step": 896 + }, + { + "epoch": 1.104679802955665, + "grad_norm": 13.847190655637428, + "learning_rate": 7.971296973288534e-06, + "loss": 1.70633864402771, + "step": 897 + }, + { + "epoch": 1.1059113300492611, + "grad_norm": 11.90483842365472, + "learning_rate": 7.965530466628977e-06, + "loss": 1.787100911140442, + "step": 898 + }, + { + "epoch": 1.1071428571428572, + "grad_norm": 7.493522717607931, + "learning_rate": 7.959757868515526e-06, + "loss": 1.725630283355713, + "step": 899 + }, + { + "epoch": 1.1083743842364533, + "grad_norm": 12.386314393672189, + "learning_rate": 7.953979190805587e-06, + "loss": 1.216347575187683, + "step": 900 + }, + { + "epoch": 1.1096059113300494, + "grad_norm": 13.629660364524488, + "learning_rate": 7.948194445369065e-06, + "loss": 1.4683033227920532, + "step": 901 + }, + { + "epoch": 1.1108374384236452, + "grad_norm": 9.487923792239608, + "learning_rate": 7.942403644088319e-06, + "loss": 1.1516010761260986, + "step": 902 + }, + { + "epoch": 1.1120689655172413, + "grad_norm": 10.340810165841779, + "learning_rate": 7.936606798858154e-06, + "loss": 1.9040346145629883, + "step": 903 + }, + { + "epoch": 1.1133004926108374, + "grad_norm": 10.742162155829218, + "learning_rate": 7.930803921585787e-06, + "loss": 1.3092480897903442, + "step": 904 + }, + { + "epoch": 1.1145320197044335, + "grad_norm": 16.471340717748625, + "learning_rate": 7.924995024190825e-06, + "loss": 1.5384130477905273, + "step": 905 + }, + { + "epoch": 1.1157635467980296, + "grad_norm": 11.414793353837775, + "learning_rate": 7.91918011860524e-06, + "loss": 1.537634015083313, + "step": 906 + }, + { + "epoch": 1.1169950738916257, + "grad_norm": 12.176064899819426, + "learning_rate": 7.91335921677335e-06, + "loss": 1.7487473487854004, + "step": 907 + }, + { + "epoch": 1.1182266009852218, + "grad_norm": 12.781345279460623, + "learning_rate": 7.907532330651784e-06, + "loss": 2.079786539077759, + "step": 908 + }, + { + "epoch": 1.1194581280788178, + "grad_norm": 10.30058954805613, + "learning_rate": 7.901699472209467e-06, + "loss": 1.8143104314804077, + "step": 909 + }, + { + "epoch": 1.1206896551724137, + "grad_norm": 15.820572235657158, + "learning_rate": 7.89586065342759e-06, + "loss": 1.532914161682129, + "step": 910 + }, + { + "epoch": 1.1219211822660098, + "grad_norm": 26.078680608781927, + "learning_rate": 7.890015886299587e-06, + "loss": 1.2643623352050781, + "step": 911 + }, + { + "epoch": 1.1231527093596059, + "grad_norm": 15.92927259283418, + "learning_rate": 7.884165182831112e-06, + "loss": 1.9245643615722656, + "step": 912 + }, + { + "epoch": 1.124384236453202, + "grad_norm": 8.730585299979154, + "learning_rate": 7.878308555040012e-06, + "loss": 1.7177766561508179, + "step": 913 + }, + { + "epoch": 1.125615763546798, + "grad_norm": 13.722962990198047, + "learning_rate": 7.872446014956302e-06, + "loss": 1.8152745962142944, + "step": 914 + }, + { + "epoch": 1.1268472906403941, + "grad_norm": 12.040054937289696, + "learning_rate": 7.86657757462214e-06, + "loss": 1.1599400043487549, + "step": 915 + }, + { + "epoch": 1.1280788177339902, + "grad_norm": 17.03991328119548, + "learning_rate": 7.860703246091808e-06, + "loss": 2.191415786743164, + "step": 916 + }, + { + "epoch": 1.1293103448275863, + "grad_norm": 8.884816055359531, + "learning_rate": 7.85482304143168e-06, + "loss": 1.395401120185852, + "step": 917 + }, + { + "epoch": 1.1305418719211824, + "grad_norm": 10.016142876641439, + "learning_rate": 7.848936972720203e-06, + "loss": 1.3161064386367798, + "step": 918 + }, + { + "epoch": 1.1317733990147782, + "grad_norm": 10.950651931490869, + "learning_rate": 7.843045052047863e-06, + "loss": 1.1442368030548096, + "step": 919 + }, + { + "epoch": 1.1330049261083743, + "grad_norm": 11.684566217639523, + "learning_rate": 7.837147291517172e-06, + "loss": 1.7718126773834229, + "step": 920 + }, + { + "epoch": 1.1342364532019704, + "grad_norm": 38.19632435773612, + "learning_rate": 7.831243703242636e-06, + "loss": 0.8722761869430542, + "step": 921 + }, + { + "epoch": 1.1354679802955665, + "grad_norm": 13.481663274756508, + "learning_rate": 7.825334299350733e-06, + "loss": 1.5427806377410889, + "step": 922 + }, + { + "epoch": 1.1366995073891626, + "grad_norm": 12.916623808621747, + "learning_rate": 7.819419091979884e-06, + "loss": 1.1668936014175415, + "step": 923 + }, + { + "epoch": 1.1379310344827587, + "grad_norm": 33.988394562573184, + "learning_rate": 7.813498093280432e-06, + "loss": 1.1266424655914307, + "step": 924 + }, + { + "epoch": 1.1391625615763548, + "grad_norm": 12.20456485780647, + "learning_rate": 7.807571315414616e-06, + "loss": 1.493699550628662, + "step": 925 + }, + { + "epoch": 1.1403940886699506, + "grad_norm": 11.501099824006364, + "learning_rate": 7.801638770556547e-06, + "loss": 1.6297705173492432, + "step": 926 + }, + { + "epoch": 1.1416256157635467, + "grad_norm": 15.624448888450939, + "learning_rate": 7.795700470892177e-06, + "loss": 2.0215024948120117, + "step": 927 + }, + { + "epoch": 1.1428571428571428, + "grad_norm": 16.250949070025708, + "learning_rate": 7.78975642861929e-06, + "loss": 1.6887433528900146, + "step": 928 + }, + { + "epoch": 1.1440886699507389, + "grad_norm": 11.317008900299918, + "learning_rate": 7.783806655947454e-06, + "loss": 1.3021103143692017, + "step": 929 + }, + { + "epoch": 1.145320197044335, + "grad_norm": 18.00432398689311, + "learning_rate": 7.777851165098012e-06, + "loss": 1.2565847635269165, + "step": 930 + }, + { + "epoch": 1.146551724137931, + "grad_norm": 12.425268826770786, + "learning_rate": 7.771889968304054e-06, + "loss": 2.616732358932495, + "step": 931 + }, + { + "epoch": 1.1477832512315271, + "grad_norm": 8.224670550968264, + "learning_rate": 7.765923077810389e-06, + "loss": 1.4130675792694092, + "step": 932 + }, + { + "epoch": 1.1490147783251232, + "grad_norm": 10.969684493935905, + "learning_rate": 7.759950505873523e-06, + "loss": 1.4476386308670044, + "step": 933 + }, + { + "epoch": 1.1502463054187193, + "grad_norm": 11.651048950094761, + "learning_rate": 7.753972264761629e-06, + "loss": 2.25156307220459, + "step": 934 + }, + { + "epoch": 1.1514778325123154, + "grad_norm": 8.613574530576384, + "learning_rate": 7.747988366754529e-06, + "loss": 1.5051602125167847, + "step": 935 + }, + { + "epoch": 1.1527093596059113, + "grad_norm": 7.732488282674765, + "learning_rate": 7.74199882414366e-06, + "loss": 1.6275739669799805, + "step": 936 + }, + { + "epoch": 1.1539408866995073, + "grad_norm": 8.808852629450387, + "learning_rate": 7.736003649232058e-06, + "loss": 1.595947504043579, + "step": 937 + }, + { + "epoch": 1.1551724137931034, + "grad_norm": 9.458208308368622, + "learning_rate": 7.730002854334328e-06, + "loss": 1.4467124938964844, + "step": 938 + }, + { + "epoch": 1.1564039408866995, + "grad_norm": 9.214195809195965, + "learning_rate": 7.723996451776615e-06, + "loss": 1.2888911962509155, + "step": 939 + }, + { + "epoch": 1.1576354679802956, + "grad_norm": 9.788392349003187, + "learning_rate": 7.717984453896585e-06, + "loss": 1.2005081176757812, + "step": 940 + }, + { + "epoch": 1.1588669950738917, + "grad_norm": 13.47176609715776, + "learning_rate": 7.711966873043396e-06, + "loss": 1.5737872123718262, + "step": 941 + }, + { + "epoch": 1.1600985221674878, + "grad_norm": 14.995704151739991, + "learning_rate": 7.705943721577679e-06, + "loss": 1.929309368133545, + "step": 942 + }, + { + "epoch": 1.1613300492610836, + "grad_norm": 17.48600802078703, + "learning_rate": 7.699915011871502e-06, + "loss": 1.2395710945129395, + "step": 943 + }, + { + "epoch": 1.1625615763546797, + "grad_norm": 17.02963003158409, + "learning_rate": 7.693880756308349e-06, + "loss": 1.5058845281600952, + "step": 944 + }, + { + "epoch": 1.1637931034482758, + "grad_norm": 9.980347268918823, + "learning_rate": 7.687840967283102e-06, + "loss": 1.1811325550079346, + "step": 945 + }, + { + "epoch": 1.1650246305418719, + "grad_norm": 10.638678008803145, + "learning_rate": 7.681795657202004e-06, + "loss": 1.0631262063980103, + "step": 946 + }, + { + "epoch": 1.166256157635468, + "grad_norm": 13.280226823401785, + "learning_rate": 7.675744838482641e-06, + "loss": 1.8445112705230713, + "step": 947 + }, + { + "epoch": 1.167487684729064, + "grad_norm": 14.581956189852988, + "learning_rate": 7.669688523553913e-06, + "loss": 0.4735199511051178, + "step": 948 + }, + { + "epoch": 1.1687192118226601, + "grad_norm": 17.412681962110952, + "learning_rate": 7.66362672485601e-06, + "loss": 2.7862026691436768, + "step": 949 + }, + { + "epoch": 1.1699507389162562, + "grad_norm": 15.996981867868751, + "learning_rate": 7.657559454840386e-06, + "loss": 2.1690142154693604, + "step": 950 + }, + { + "epoch": 1.1711822660098523, + "grad_norm": 13.46492564795987, + "learning_rate": 7.651486725969736e-06, + "loss": 1.7143161296844482, + "step": 951 + }, + { + "epoch": 1.1724137931034484, + "grad_norm": 60.546763405202356, + "learning_rate": 7.645408550717966e-06, + "loss": 1.5288606882095337, + "step": 952 + }, + { + "epoch": 1.1736453201970443, + "grad_norm": 20.830833617022666, + "learning_rate": 7.639324941570165e-06, + "loss": 1.8929002285003662, + "step": 953 + }, + { + "epoch": 1.1748768472906403, + "grad_norm": 11.758979912185547, + "learning_rate": 7.633235911022592e-06, + "loss": 1.5853391885757446, + "step": 954 + }, + { + "epoch": 1.1761083743842364, + "grad_norm": 9.321138258104417, + "learning_rate": 7.627141471582635e-06, + "loss": 1.1136324405670166, + "step": 955 + }, + { + "epoch": 1.1773399014778325, + "grad_norm": 12.598497007373025, + "learning_rate": 7.6210416357687975e-06, + "loss": 1.868667721748352, + "step": 956 + }, + { + "epoch": 1.1785714285714286, + "grad_norm": 18.119098704002848, + "learning_rate": 7.614936416110668e-06, + "loss": 1.5594688653945923, + "step": 957 + }, + { + "epoch": 1.1798029556650247, + "grad_norm": 12.510268205050629, + "learning_rate": 7.6088258251488845e-06, + "loss": 2.3145830631256104, + "step": 958 + }, + { + "epoch": 1.1810344827586208, + "grad_norm": 21.45877658729593, + "learning_rate": 7.6027098754351306e-06, + "loss": 1.1473604440689087, + "step": 959 + }, + { + "epoch": 1.1822660098522166, + "grad_norm": 14.411977842812997, + "learning_rate": 7.596588579532087e-06, + "loss": 2.2835638523101807, + "step": 960 + }, + { + "epoch": 1.1834975369458127, + "grad_norm": 10.612962818159787, + "learning_rate": 7.590461950013424e-06, + "loss": 1.8787577152252197, + "step": 961 + }, + { + "epoch": 1.1847290640394088, + "grad_norm": 14.448843378652771, + "learning_rate": 7.584329999463763e-06, + "loss": 2.114804983139038, + "step": 962 + }, + { + "epoch": 1.185960591133005, + "grad_norm": 18.66312529631292, + "learning_rate": 7.578192740478656e-06, + "loss": 1.288927435874939, + "step": 963 + }, + { + "epoch": 1.187192118226601, + "grad_norm": 13.413800953526167, + "learning_rate": 7.572050185664558e-06, + "loss": 1.929607629776001, + "step": 964 + }, + { + "epoch": 1.188423645320197, + "grad_norm": 33.30553598268168, + "learning_rate": 7.565902347638806e-06, + "loss": 0.5397343039512634, + "step": 965 + }, + { + "epoch": 1.1896551724137931, + "grad_norm": 22.357001178408265, + "learning_rate": 7.559749239029584e-06, + "loss": 1.1908174753189087, + "step": 966 + }, + { + "epoch": 1.1908866995073892, + "grad_norm": 12.645033432851402, + "learning_rate": 7.553590872475909e-06, + "loss": 1.624518632888794, + "step": 967 + }, + { + "epoch": 1.1921182266009853, + "grad_norm": 7.88579724345472, + "learning_rate": 7.547427260627586e-06, + "loss": 1.3011376857757568, + "step": 968 + }, + { + "epoch": 1.1933497536945814, + "grad_norm": 12.668296763355277, + "learning_rate": 7.541258416145212e-06, + "loss": 1.2930490970611572, + "step": 969 + }, + { + "epoch": 1.1945812807881773, + "grad_norm": 13.656364437533624, + "learning_rate": 7.535084351700117e-06, + "loss": 1.34272038936615, + "step": 970 + }, + { + "epoch": 1.1958128078817734, + "grad_norm": 7.953764967047039, + "learning_rate": 7.528905079974358e-06, + "loss": 1.2804269790649414, + "step": 971 + }, + { + "epoch": 1.1970443349753694, + "grad_norm": 30.30009152991955, + "learning_rate": 7.522720613660691e-06, + "loss": 1.7138396501541138, + "step": 972 + }, + { + "epoch": 1.1982758620689655, + "grad_norm": 11.304720421109014, + "learning_rate": 7.5165309654625405e-06, + "loss": 1.7358574867248535, + "step": 973 + }, + { + "epoch": 1.1995073891625616, + "grad_norm": 12.764936977199811, + "learning_rate": 7.510336148093975e-06, + "loss": 1.0514552593231201, + "step": 974 + }, + { + "epoch": 1.2007389162561577, + "grad_norm": 13.712017805285841, + "learning_rate": 7.504136174279679e-06, + "loss": 1.7314313650131226, + "step": 975 + }, + { + "epoch": 1.2019704433497538, + "grad_norm": 10.549295388514395, + "learning_rate": 7.4979310567549315e-06, + "loss": 1.0069202184677124, + "step": 976 + }, + { + "epoch": 1.2032019704433496, + "grad_norm": 11.995004609846932, + "learning_rate": 7.491720808265576e-06, + "loss": 1.1851680278778076, + "step": 977 + }, + { + "epoch": 1.2044334975369457, + "grad_norm": 9.145447142909285, + "learning_rate": 7.485505441567995e-06, + "loss": 1.355776309967041, + "step": 978 + }, + { + "epoch": 1.2056650246305418, + "grad_norm": 12.426586307445273, + "learning_rate": 7.4792849694290846e-06, + "loss": 1.5034677982330322, + "step": 979 + }, + { + "epoch": 1.206896551724138, + "grad_norm": 10.349726791509415, + "learning_rate": 7.473059404626229e-06, + "loss": 1.9321900606155396, + "step": 980 + }, + { + "epoch": 1.208128078817734, + "grad_norm": 15.998756607416226, + "learning_rate": 7.466828759947271e-06, + "loss": 1.4899095296859741, + "step": 981 + }, + { + "epoch": 1.20935960591133, + "grad_norm": 9.148483453369403, + "learning_rate": 7.46059304819049e-06, + "loss": 1.9984737634658813, + "step": 982 + }, + { + "epoch": 1.2105911330049262, + "grad_norm": 14.110455851158502, + "learning_rate": 7.454352282164572e-06, + "loss": 1.7756625413894653, + "step": 983 + }, + { + "epoch": 1.2118226600985222, + "grad_norm": 14.856359846911952, + "learning_rate": 7.448106474688588e-06, + "loss": 1.47117018699646, + "step": 984 + }, + { + "epoch": 1.2130541871921183, + "grad_norm": 11.010014718420686, + "learning_rate": 7.441855638591958e-06, + "loss": 1.3485603332519531, + "step": 985 + }, + { + "epoch": 1.2142857142857142, + "grad_norm": 9.111669104291623, + "learning_rate": 7.435599786714438e-06, + "loss": 1.3982055187225342, + "step": 986 + }, + { + "epoch": 1.2155172413793103, + "grad_norm": 8.494506145789243, + "learning_rate": 7.429338931906085e-06, + "loss": 1.4942795038223267, + "step": 987 + }, + { + "epoch": 1.2167487684729064, + "grad_norm": 10.475857134873458, + "learning_rate": 7.423073087027228e-06, + "loss": 2.227587938308716, + "step": 988 + }, + { + "epoch": 1.2179802955665024, + "grad_norm": 14.131512244457296, + "learning_rate": 7.416802264948455e-06, + "loss": 1.523234486579895, + "step": 989 + }, + { + "epoch": 1.2192118226600985, + "grad_norm": 26.011485441346537, + "learning_rate": 7.410526478550568e-06, + "loss": 3.9873814582824707, + "step": 990 + }, + { + "epoch": 1.2204433497536946, + "grad_norm": 8.306933788704631, + "learning_rate": 7.404245740724573e-06, + "loss": 1.279615044593811, + "step": 991 + }, + { + "epoch": 1.2216748768472907, + "grad_norm": 9.109406755351628, + "learning_rate": 7.3979600643716435e-06, + "loss": 0.9347010850906372, + "step": 992 + }, + { + "epoch": 1.2229064039408868, + "grad_norm": 8.57513677802596, + "learning_rate": 7.391669462403096e-06, + "loss": 1.9017002582550049, + "step": 993 + }, + { + "epoch": 1.2241379310344827, + "grad_norm": 10.325069084719962, + "learning_rate": 7.385373947740369e-06, + "loss": 1.7247897386550903, + "step": 994 + }, + { + "epoch": 1.2253694581280787, + "grad_norm": 13.648497855444653, + "learning_rate": 7.379073533314988e-06, + "loss": 0.7111251950263977, + "step": 995 + }, + { + "epoch": 1.2266009852216748, + "grad_norm": 10.812707758109589, + "learning_rate": 7.372768232068544e-06, + "loss": 0.9086591601371765, + "step": 996 + }, + { + "epoch": 1.227832512315271, + "grad_norm": 11.1413160950967, + "learning_rate": 7.366458056952668e-06, + "loss": 1.6426423788070679, + "step": 997 + }, + { + "epoch": 1.229064039408867, + "grad_norm": 19.358982299314505, + "learning_rate": 7.360143020929e-06, + "loss": 1.2501566410064697, + "step": 998 + }, + { + "epoch": 1.230295566502463, + "grad_norm": 15.35154457763416, + "learning_rate": 7.353823136969167e-06, + "loss": 2.263824939727783, + "step": 999 + }, + { + "epoch": 1.2315270935960592, + "grad_norm": 15.502037939673096, + "learning_rate": 7.34749841805475e-06, + "loss": 1.3503868579864502, + "step": 1000 + }, + { + "epoch": 1.2327586206896552, + "grad_norm": 12.387685564521446, + "learning_rate": 7.341168877177267e-06, + "loss": 1.2844277620315552, + "step": 1001 + }, + { + "epoch": 1.2339901477832513, + "grad_norm": 21.028406448646585, + "learning_rate": 7.3348345273381365e-06, + "loss": 1.823725700378418, + "step": 1002 + }, + { + "epoch": 1.2352216748768472, + "grad_norm": 12.53431965462443, + "learning_rate": 7.328495381548655e-06, + "loss": 1.8349339962005615, + "step": 1003 + }, + { + "epoch": 1.2364532019704433, + "grad_norm": 11.75012181314542, + "learning_rate": 7.322151452829972e-06, + "loss": 1.431024432182312, + "step": 1004 + }, + { + "epoch": 1.2376847290640394, + "grad_norm": 7.268447687614364, + "learning_rate": 7.315802754213062e-06, + "loss": 0.8406596183776855, + "step": 1005 + }, + { + "epoch": 1.2389162561576355, + "grad_norm": 16.476664169610704, + "learning_rate": 7.309449298738696e-06, + "loss": 1.7037804126739502, + "step": 1006 + }, + { + "epoch": 1.2401477832512315, + "grad_norm": 10.719400575974607, + "learning_rate": 7.303091099457418e-06, + "loss": 1.4264461994171143, + "step": 1007 + }, + { + "epoch": 1.2413793103448276, + "grad_norm": 11.634717084876037, + "learning_rate": 7.296728169429511e-06, + "loss": 2.502678632736206, + "step": 1008 + }, + { + "epoch": 1.2426108374384237, + "grad_norm": 9.436373278027489, + "learning_rate": 7.290360521724984e-06, + "loss": 1.5582114458084106, + "step": 1009 + }, + { + "epoch": 1.2438423645320198, + "grad_norm": 10.373164591549747, + "learning_rate": 7.283988169423526e-06, + "loss": 1.494875192642212, + "step": 1010 + }, + { + "epoch": 1.2450738916256157, + "grad_norm": 13.031187040858585, + "learning_rate": 7.277611125614499e-06, + "loss": 1.886913776397705, + "step": 1011 + }, + { + "epoch": 1.2463054187192117, + "grad_norm": 19.92471933345498, + "learning_rate": 7.271229403396896e-06, + "loss": 1.8913657665252686, + "step": 1012 + }, + { + "epoch": 1.2475369458128078, + "grad_norm": 21.8856932814209, + "learning_rate": 7.264843015879321e-06, + "loss": 1.1614234447479248, + "step": 1013 + }, + { + "epoch": 1.248768472906404, + "grad_norm": 11.581317439717322, + "learning_rate": 7.258451976179967e-06, + "loss": 1.6838147640228271, + "step": 1014 + }, + { + "epoch": 1.25, + "grad_norm": 14.274704649607155, + "learning_rate": 7.25205629742657e-06, + "loss": 1.1039239168167114, + "step": 1015 + }, + { + "epoch": 1.251231527093596, + "grad_norm": 10.222730157124893, + "learning_rate": 7.245655992756406e-06, + "loss": 1.519346833229065, + "step": 1016 + }, + { + "epoch": 1.2524630541871922, + "grad_norm": 8.325249693832719, + "learning_rate": 7.2392510753162516e-06, + "loss": 1.0175197124481201, + "step": 1017 + }, + { + "epoch": 1.2536945812807883, + "grad_norm": 12.766382857494223, + "learning_rate": 7.232841558262354e-06, + "loss": 0.9778202772140503, + "step": 1018 + }, + { + "epoch": 1.2549261083743843, + "grad_norm": 17.499343558391605, + "learning_rate": 7.226427454760412e-06, + "loss": 1.8379024267196655, + "step": 1019 + }, + { + "epoch": 1.2561576354679804, + "grad_norm": 11.150234617545141, + "learning_rate": 7.2200087779855435e-06, + "loss": 1.8412721157073975, + "step": 1020 + }, + { + "epoch": 1.2573891625615763, + "grad_norm": 8.992400726896724, + "learning_rate": 7.213585541122261e-06, + "loss": 1.8508501052856445, + "step": 1021 + }, + { + "epoch": 1.2586206896551724, + "grad_norm": 12.44309006439825, + "learning_rate": 7.207157757364445e-06, + "loss": 1.3070871829986572, + "step": 1022 + }, + { + "epoch": 1.2598522167487685, + "grad_norm": 12.840031276685824, + "learning_rate": 7.200725439915314e-06, + "loss": 2.1278223991394043, + "step": 1023 + }, + { + "epoch": 1.2610837438423645, + "grad_norm": 8.633495704921142, + "learning_rate": 7.194288601987398e-06, + "loss": 1.0636892318725586, + "step": 1024 + }, + { + "epoch": 1.2623152709359606, + "grad_norm": 10.874767223460788, + "learning_rate": 7.187847256802518e-06, + "loss": 1.7365200519561768, + "step": 1025 + }, + { + "epoch": 1.2635467980295567, + "grad_norm": 12.21472476387578, + "learning_rate": 7.181401417591746e-06, + "loss": 1.792116403579712, + "step": 1026 + }, + { + "epoch": 1.2647783251231526, + "grad_norm": 8.787411821208611, + "learning_rate": 7.174951097595389e-06, + "loss": 1.3348667621612549, + "step": 1027 + }, + { + "epoch": 1.2660098522167487, + "grad_norm": 17.72872801553084, + "learning_rate": 7.168496310062959e-06, + "loss": 1.677919626235962, + "step": 1028 + }, + { + "epoch": 1.2672413793103448, + "grad_norm": 13.283913596324016, + "learning_rate": 7.162037068253141e-06, + "loss": 1.1518199443817139, + "step": 1029 + }, + { + "epoch": 1.2684729064039408, + "grad_norm": 7.98681967422814, + "learning_rate": 7.155573385433772e-06, + "loss": 2.1126716136932373, + "step": 1030 + }, + { + "epoch": 1.269704433497537, + "grad_norm": 11.20695829302969, + "learning_rate": 7.149105274881815e-06, + "loss": 1.3222094774246216, + "step": 1031 + }, + { + "epoch": 1.270935960591133, + "grad_norm": 9.408024877970139, + "learning_rate": 7.1426327498833174e-06, + "loss": 0.8843763470649719, + "step": 1032 + }, + { + "epoch": 1.272167487684729, + "grad_norm": 18.111033872908873, + "learning_rate": 7.136155823733405e-06, + "loss": 1.3091545104980469, + "step": 1033 + }, + { + "epoch": 1.2733990147783252, + "grad_norm": 11.598349915801498, + "learning_rate": 7.129674509736237e-06, + "loss": 1.4408364295959473, + "step": 1034 + }, + { + "epoch": 1.2746305418719213, + "grad_norm": 17.074081488403696, + "learning_rate": 7.12318882120499e-06, + "loss": 1.330906867980957, + "step": 1035 + }, + { + "epoch": 1.2758620689655173, + "grad_norm": 11.931439673872655, + "learning_rate": 7.116698771461825e-06, + "loss": 1.9561724662780762, + "step": 1036 + }, + { + "epoch": 1.2770935960591134, + "grad_norm": 14.506364150634404, + "learning_rate": 7.110204373837857e-06, + "loss": 2.185842275619507, + "step": 1037 + }, + { + "epoch": 1.2783251231527093, + "grad_norm": 8.783423067272876, + "learning_rate": 7.1037056416731395e-06, + "loss": 1.724360466003418, + "step": 1038 + }, + { + "epoch": 1.2795566502463054, + "grad_norm": 10.548795738669158, + "learning_rate": 7.097202588316625e-06, + "loss": 1.179841160774231, + "step": 1039 + }, + { + "epoch": 1.2807881773399015, + "grad_norm": 14.968187776502731, + "learning_rate": 7.090695227126141e-06, + "loss": 1.6783604621887207, + "step": 1040 + }, + { + "epoch": 1.2820197044334976, + "grad_norm": 10.70366989067169, + "learning_rate": 7.084183571468368e-06, + "loss": 1.761925220489502, + "step": 1041 + }, + { + "epoch": 1.2832512315270936, + "grad_norm": 12.9020971876039, + "learning_rate": 7.077667634718801e-06, + "loss": 0.9297729134559631, + "step": 1042 + }, + { + "epoch": 1.2844827586206897, + "grad_norm": 12.446847341840494, + "learning_rate": 7.071147430261738e-06, + "loss": 1.6091060638427734, + "step": 1043 + }, + { + "epoch": 1.2857142857142856, + "grad_norm": 8.238449521430923, + "learning_rate": 7.064622971490234e-06, + "loss": 1.280853509902954, + "step": 1044 + }, + { + "epoch": 1.2869458128078817, + "grad_norm": 10.190528956891907, + "learning_rate": 7.058094271806091e-06, + "loss": 2.4095635414123535, + "step": 1045 + }, + { + "epoch": 1.2881773399014778, + "grad_norm": 12.210698142217534, + "learning_rate": 7.051561344619814e-06, + "loss": 1.7969441413879395, + "step": 1046 + }, + { + "epoch": 1.2894088669950738, + "grad_norm": 8.968258930303262, + "learning_rate": 7.045024203350598e-06, + "loss": 2.4331698417663574, + "step": 1047 + }, + { + "epoch": 1.29064039408867, + "grad_norm": 9.034111830970843, + "learning_rate": 7.0384828614262905e-06, + "loss": 1.336733341217041, + "step": 1048 + }, + { + "epoch": 1.291871921182266, + "grad_norm": 9.358643506315515, + "learning_rate": 7.031937332283367e-06, + "loss": 1.2959213256835938, + "step": 1049 + }, + { + "epoch": 1.293103448275862, + "grad_norm": 15.177096960870495, + "learning_rate": 7.025387629366912e-06, + "loss": 1.0095289945602417, + "step": 1050 + }, + { + "epoch": 1.2943349753694582, + "grad_norm": 8.708668143059782, + "learning_rate": 7.018833766130571e-06, + "loss": 1.8314733505249023, + "step": 1051 + }, + { + "epoch": 1.2955665024630543, + "grad_norm": 12.10925693324793, + "learning_rate": 7.012275756036544e-06, + "loss": 1.121436595916748, + "step": 1052 + }, + { + "epoch": 1.2967980295566504, + "grad_norm": 20.569530418297486, + "learning_rate": 7.0057136125555456e-06, + "loss": 1.5652289390563965, + "step": 1053 + }, + { + "epoch": 1.2980295566502464, + "grad_norm": 14.018717429311812, + "learning_rate": 6.999147349166779e-06, + "loss": 1.1146215200424194, + "step": 1054 + }, + { + "epoch": 1.2992610837438423, + "grad_norm": 17.232932273490494, + "learning_rate": 6.9925769793579165e-06, + "loss": 2.400024175643921, + "step": 1055 + }, + { + "epoch": 1.3004926108374384, + "grad_norm": 11.12761938883381, + "learning_rate": 6.986002516625058e-06, + "loss": 1.7114648818969727, + "step": 1056 + }, + { + "epoch": 1.3017241379310345, + "grad_norm": 10.072038004871871, + "learning_rate": 6.979423974472714e-06, + "loss": 1.5338797569274902, + "step": 1057 + }, + { + "epoch": 1.3029556650246306, + "grad_norm": 8.812025010262357, + "learning_rate": 6.972841366413777e-06, + "loss": 1.078460931777954, + "step": 1058 + }, + { + "epoch": 1.3041871921182266, + "grad_norm": 11.356722343645167, + "learning_rate": 6.966254705969484e-06, + "loss": 1.5467915534973145, + "step": 1059 + }, + { + "epoch": 1.3054187192118227, + "grad_norm": 14.67705148794403, + "learning_rate": 6.959664006669404e-06, + "loss": 1.2715568542480469, + "step": 1060 + }, + { + "epoch": 1.3066502463054186, + "grad_norm": 8.890913561904203, + "learning_rate": 6.953069282051397e-06, + "loss": 1.887066125869751, + "step": 1061 + }, + { + "epoch": 1.3078817733990147, + "grad_norm": 10.182269397064065, + "learning_rate": 6.946470545661593e-06, + "loss": 1.419116497039795, + "step": 1062 + }, + { + "epoch": 1.3091133004926108, + "grad_norm": 8.361662711059678, + "learning_rate": 6.939867811054365e-06, + "loss": 1.3843079805374146, + "step": 1063 + }, + { + "epoch": 1.3103448275862069, + "grad_norm": 27.704350160970165, + "learning_rate": 6.9332610917922915e-06, + "loss": 2.5894885063171387, + "step": 1064 + }, + { + "epoch": 1.311576354679803, + "grad_norm": 16.17688431061018, + "learning_rate": 6.9266504014461425e-06, + "loss": 1.6600944995880127, + "step": 1065 + }, + { + "epoch": 1.312807881773399, + "grad_norm": 18.474330510936614, + "learning_rate": 6.920035753594845e-06, + "loss": 1.7698057889938354, + "step": 1066 + }, + { + "epoch": 1.314039408866995, + "grad_norm": 9.914676123570585, + "learning_rate": 6.913417161825449e-06, + "loss": 1.5610848665237427, + "step": 1067 + }, + { + "epoch": 1.3152709359605912, + "grad_norm": 8.489359998020161, + "learning_rate": 6.906794639733114e-06, + "loss": 1.6380643844604492, + "step": 1068 + }, + { + "epoch": 1.3165024630541873, + "grad_norm": 8.9532327938231, + "learning_rate": 6.900168200921065e-06, + "loss": 1.390014410018921, + "step": 1069 + }, + { + "epoch": 1.3177339901477834, + "grad_norm": 10.45013795003969, + "learning_rate": 6.893537859000576e-06, + "loss": 1.6589158773422241, + "step": 1070 + }, + { + "epoch": 1.3189655172413794, + "grad_norm": 12.436644147912617, + "learning_rate": 6.886903627590938e-06, + "loss": 1.5524673461914062, + "step": 1071 + }, + { + "epoch": 1.3201970443349753, + "grad_norm": 12.240484798983633, + "learning_rate": 6.880265520319434e-06, + "loss": 2.0204474925994873, + "step": 1072 + }, + { + "epoch": 1.3214285714285714, + "grad_norm": 10.928634620934101, + "learning_rate": 6.8736235508213024e-06, + "loss": 1.7947957515716553, + "step": 1073 + }, + { + "epoch": 1.3226600985221675, + "grad_norm": 12.192004015491179, + "learning_rate": 6.866977732739719e-06, + "loss": 1.6154756546020508, + "step": 1074 + }, + { + "epoch": 1.3238916256157636, + "grad_norm": 10.239608872921218, + "learning_rate": 6.860328079725764e-06, + "loss": 1.419677734375, + "step": 1075 + }, + { + "epoch": 1.3251231527093597, + "grad_norm": 11.490298083513249, + "learning_rate": 6.853674605438395e-06, + "loss": 2.2221052646636963, + "step": 1076 + }, + { + "epoch": 1.3263546798029557, + "grad_norm": 10.796599749157496, + "learning_rate": 6.84701732354442e-06, + "loss": 1.6474840641021729, + "step": 1077 + }, + { + "epoch": 1.3275862068965516, + "grad_norm": 16.05723789346112, + "learning_rate": 6.840356247718466e-06, + "loss": 2.035231828689575, + "step": 1078 + }, + { + "epoch": 1.3288177339901477, + "grad_norm": 12.127949373836048, + "learning_rate": 6.8336913916429515e-06, + "loss": 1.5675947666168213, + "step": 1079 + }, + { + "epoch": 1.3300492610837438, + "grad_norm": 12.561351822867852, + "learning_rate": 6.827022769008068e-06, + "loss": 1.2241394519805908, + "step": 1080 + }, + { + "epoch": 1.3312807881773399, + "grad_norm": 10.606640209072971, + "learning_rate": 6.820350393511732e-06, + "loss": 1.3507403135299683, + "step": 1081 + }, + { + "epoch": 1.332512315270936, + "grad_norm": 23.44696719245062, + "learning_rate": 6.81367427885958e-06, + "loss": 2.256551504135132, + "step": 1082 + }, + { + "epoch": 1.333743842364532, + "grad_norm": 17.90054749002111, + "learning_rate": 6.806994438764922e-06, + "loss": 1.6412163972854614, + "step": 1083 + }, + { + "epoch": 1.3349753694581281, + "grad_norm": 10.747816339677435, + "learning_rate": 6.8003108869487225e-06, + "loss": 1.500988483428955, + "step": 1084 + }, + { + "epoch": 1.3362068965517242, + "grad_norm": 8.86240548184895, + "learning_rate": 6.79362363713957e-06, + "loss": 1.4661070108413696, + "step": 1085 + }, + { + "epoch": 1.3374384236453203, + "grad_norm": 9.325455271074935, + "learning_rate": 6.786932703073648e-06, + "loss": 1.42755126953125, + "step": 1086 + }, + { + "epoch": 1.3386699507389164, + "grad_norm": 14.863538954404982, + "learning_rate": 6.780238098494711e-06, + "loss": 1.165806531906128, + "step": 1087 + }, + { + "epoch": 1.3399014778325122, + "grad_norm": 21.9332846077213, + "learning_rate": 6.773539837154051e-06, + "loss": 1.3795387744903564, + "step": 1088 + }, + { + "epoch": 1.3411330049261083, + "grad_norm": 15.064922882268542, + "learning_rate": 6.766837932810468e-06, + "loss": 1.3203850984573364, + "step": 1089 + }, + { + "epoch": 1.3423645320197044, + "grad_norm": 12.791071147567429, + "learning_rate": 6.7601323992302525e-06, + "loss": 1.645883321762085, + "step": 1090 + }, + { + "epoch": 1.3435960591133005, + "grad_norm": 8.072143933965927, + "learning_rate": 6.7534232501871425e-06, + "loss": 1.6904821395874023, + "step": 1091 + }, + { + "epoch": 1.3448275862068966, + "grad_norm": 8.711589751937055, + "learning_rate": 6.7467104994623066e-06, + "loss": 1.332162618637085, + "step": 1092 + }, + { + "epoch": 1.3460591133004927, + "grad_norm": 9.451447429997234, + "learning_rate": 6.7399941608443096e-06, + "loss": 1.4389145374298096, + "step": 1093 + }, + { + "epoch": 1.3472906403940887, + "grad_norm": 7.323937666452591, + "learning_rate": 6.733274248129089e-06, + "loss": 1.6597908735275269, + "step": 1094 + }, + { + "epoch": 1.3485221674876846, + "grad_norm": 17.883843051775344, + "learning_rate": 6.72655077511992e-06, + "loss": 0.9520257711410522, + "step": 1095 + }, + { + "epoch": 1.3497536945812807, + "grad_norm": 11.223594087909252, + "learning_rate": 6.719823755627393e-06, + "loss": 1.4488117694854736, + "step": 1096 + }, + { + "epoch": 1.3509852216748768, + "grad_norm": 7.977177991617555, + "learning_rate": 6.713093203469384e-06, + "loss": 1.5133984088897705, + "step": 1097 + }, + { + "epoch": 1.3522167487684729, + "grad_norm": 8.682066451366055, + "learning_rate": 6.7063591324710234e-06, + "loss": 1.846522569656372, + "step": 1098 + }, + { + "epoch": 1.353448275862069, + "grad_norm": 12.792486675857687, + "learning_rate": 6.6996215564646705e-06, + "loss": 0.9724826812744141, + "step": 1099 + }, + { + "epoch": 1.354679802955665, + "grad_norm": 11.989074062954435, + "learning_rate": 6.692880489289885e-06, + "loss": 1.24728262424469, + "step": 1100 + }, + { + "epoch": 1.3559113300492611, + "grad_norm": 22.734635359059652, + "learning_rate": 6.686135944793395e-06, + "loss": 1.5332872867584229, + "step": 1101 + }, + { + "epoch": 1.3571428571428572, + "grad_norm": 11.645074036110657, + "learning_rate": 6.679387936829076e-06, + "loss": 1.5978163480758667, + "step": 1102 + }, + { + "epoch": 1.3583743842364533, + "grad_norm": 9.223736434919791, + "learning_rate": 6.672636479257912e-06, + "loss": 2.05710506439209, + "step": 1103 + }, + { + "epoch": 1.3596059113300494, + "grad_norm": 11.48041589458668, + "learning_rate": 6.665881585947981e-06, + "loss": 1.667812466621399, + "step": 1104 + }, + { + "epoch": 1.3608374384236452, + "grad_norm": 18.141176793209265, + "learning_rate": 6.659123270774406e-06, + "loss": 1.3053381443023682, + "step": 1105 + }, + { + "epoch": 1.3620689655172413, + "grad_norm": 11.11014263526773, + "learning_rate": 6.652361547619352e-06, + "loss": 1.5228716135025024, + "step": 1106 + }, + { + "epoch": 1.3633004926108374, + "grad_norm": 11.869708221541034, + "learning_rate": 6.645596430371976e-06, + "loss": 1.3818378448486328, + "step": 1107 + }, + { + "epoch": 1.3645320197044335, + "grad_norm": 11.298030039811758, + "learning_rate": 6.6388279329284065e-06, + "loss": 1.217841386795044, + "step": 1108 + }, + { + "epoch": 1.3657635467980296, + "grad_norm": 21.11595250544298, + "learning_rate": 6.632056069191723e-06, + "loss": 1.4309210777282715, + "step": 1109 + }, + { + "epoch": 1.3669950738916257, + "grad_norm": 13.7021684816084, + "learning_rate": 6.6252808530719095e-06, + "loss": 1.3015059232711792, + "step": 1110 + }, + { + "epoch": 1.3682266009852218, + "grad_norm": 11.973457349226296, + "learning_rate": 6.618502298485844e-06, + "loss": 1.2734256982803345, + "step": 1111 + }, + { + "epoch": 1.3694581280788176, + "grad_norm": 15.830227785424638, + "learning_rate": 6.611720419357257e-06, + "loss": 1.907172441482544, + "step": 1112 + }, + { + "epoch": 1.3706896551724137, + "grad_norm": 10.756653422484252, + "learning_rate": 6.604935229616711e-06, + "loss": 1.1207606792449951, + "step": 1113 + }, + { + "epoch": 1.3719211822660098, + "grad_norm": 12.736281126843005, + "learning_rate": 6.598146743201568e-06, + "loss": 2.3231239318847656, + "step": 1114 + }, + { + "epoch": 1.3731527093596059, + "grad_norm": 11.597483205953116, + "learning_rate": 6.5913549740559606e-06, + "loss": 1.1395865678787231, + "step": 1115 + }, + { + "epoch": 1.374384236453202, + "grad_norm": 14.754486017260728, + "learning_rate": 6.584559936130763e-06, + "loss": 3.1981747150421143, + "step": 1116 + }, + { + "epoch": 1.375615763546798, + "grad_norm": 12.874438415282308, + "learning_rate": 6.57776164338357e-06, + "loss": 1.7495319843292236, + "step": 1117 + }, + { + "epoch": 1.3768472906403941, + "grad_norm": 12.611228408009778, + "learning_rate": 6.570960109778655e-06, + "loss": 1.3304778337478638, + "step": 1118 + }, + { + "epoch": 1.3780788177339902, + "grad_norm": 11.84441441686591, + "learning_rate": 6.564155349286952e-06, + "loss": 1.6510775089263916, + "step": 1119 + }, + { + "epoch": 1.3793103448275863, + "grad_norm": 13.996316648052032, + "learning_rate": 6.557347375886022e-06, + "loss": 1.3382967710494995, + "step": 1120 + }, + { + "epoch": 1.3805418719211824, + "grad_norm": 11.351524045305764, + "learning_rate": 6.550536203560028e-06, + "loss": 1.418992042541504, + "step": 1121 + }, + { + "epoch": 1.3817733990147782, + "grad_norm": 16.848897992260934, + "learning_rate": 6.543721846299701e-06, + "loss": 1.4815843105316162, + "step": 1122 + }, + { + "epoch": 1.3830049261083743, + "grad_norm": 13.42654012333122, + "learning_rate": 6.536904318102314e-06, + "loss": 0.9823303818702698, + "step": 1123 + }, + { + "epoch": 1.3842364532019704, + "grad_norm": 11.039715301984293, + "learning_rate": 6.530083632971658e-06, + "loss": 1.4959704875946045, + "step": 1124 + }, + { + "epoch": 1.3854679802955665, + "grad_norm": 13.499332863560449, + "learning_rate": 6.523259804918001e-06, + "loss": 1.3141142129898071, + "step": 1125 + }, + { + "epoch": 1.3866995073891626, + "grad_norm": 18.762617405218773, + "learning_rate": 6.516432847958074e-06, + "loss": 1.60225248336792, + "step": 1126 + }, + { + "epoch": 1.3879310344827587, + "grad_norm": 12.76800599324204, + "learning_rate": 6.509602776115029e-06, + "loss": 1.7774362564086914, + "step": 1127 + }, + { + "epoch": 1.3891625615763548, + "grad_norm": 14.80003777651342, + "learning_rate": 6.502769603418423e-06, + "loss": 1.3750693798065186, + "step": 1128 + }, + { + "epoch": 1.3903940886699506, + "grad_norm": 12.846839874270263, + "learning_rate": 6.4959333439041775e-06, + "loss": 1.0850452184677124, + "step": 1129 + }, + { + "epoch": 1.3916256157635467, + "grad_norm": 17.175837709461415, + "learning_rate": 6.489094011614553e-06, + "loss": 1.7440909147262573, + "step": 1130 + }, + { + "epoch": 1.3928571428571428, + "grad_norm": 8.34120026588026, + "learning_rate": 6.482251620598129e-06, + "loss": 1.5904752016067505, + "step": 1131 + }, + { + "epoch": 1.3940886699507389, + "grad_norm": 10.398946422121055, + "learning_rate": 6.47540618490976e-06, + "loss": 1.4864649772644043, + "step": 1132 + }, + { + "epoch": 1.395320197044335, + "grad_norm": 16.449380414530893, + "learning_rate": 6.4685577186105595e-06, + "loss": 1.3869491815567017, + "step": 1133 + }, + { + "epoch": 1.396551724137931, + "grad_norm": 11.708541771363075, + "learning_rate": 6.461706235767866e-06, + "loss": 1.1635327339172363, + "step": 1134 + }, + { + "epoch": 1.3977832512315271, + "grad_norm": 6.616557203492817, + "learning_rate": 6.45485175045521e-06, + "loss": 1.4063032865524292, + "step": 1135 + }, + { + "epoch": 1.3990147783251232, + "grad_norm": 26.794737362449215, + "learning_rate": 6.447994276752293e-06, + "loss": 2.2259998321533203, + "step": 1136 + }, + { + "epoch": 1.4002463054187193, + "grad_norm": 10.511853223185177, + "learning_rate": 6.441133828744954e-06, + "loss": 1.2302110195159912, + "step": 1137 + }, + { + "epoch": 1.4014778325123154, + "grad_norm": 10.658533095355526, + "learning_rate": 6.434270420525144e-06, + "loss": 1.2579622268676758, + "step": 1138 + }, + { + "epoch": 1.4027093596059113, + "grad_norm": 18.972607390940905, + "learning_rate": 6.427404066190889e-06, + "loss": 1.6761397123336792, + "step": 1139 + }, + { + "epoch": 1.4039408866995073, + "grad_norm": 12.172946298049014, + "learning_rate": 6.4205347798462704e-06, + "loss": 1.3933346271514893, + "step": 1140 + }, + { + "epoch": 1.4051724137931034, + "grad_norm": 13.681043588339055, + "learning_rate": 6.413662575601391e-06, + "loss": 1.9914003610610962, + "step": 1141 + }, + { + "epoch": 1.4064039408866995, + "grad_norm": 16.934291210588032, + "learning_rate": 6.406787467572348e-06, + "loss": 1.9921746253967285, + "step": 1142 + }, + { + "epoch": 1.4076354679802956, + "grad_norm": 18.5006822922468, + "learning_rate": 6.3999094698812055e-06, + "loss": 1.6050479412078857, + "step": 1143 + }, + { + "epoch": 1.4088669950738917, + "grad_norm": 12.333046745730567, + "learning_rate": 6.393028596655958e-06, + "loss": 1.7796251773834229, + "step": 1144 + }, + { + "epoch": 1.4100985221674878, + "grad_norm": 18.731485023409682, + "learning_rate": 6.386144862030508e-06, + "loss": 1.7936886548995972, + "step": 1145 + }, + { + "epoch": 1.4113300492610836, + "grad_norm": 18.37593149730845, + "learning_rate": 6.37925828014464e-06, + "loss": 1.9030745029449463, + "step": 1146 + }, + { + "epoch": 1.4125615763546797, + "grad_norm": 11.93678536094984, + "learning_rate": 6.3723688651439806e-06, + "loss": 1.4446496963500977, + "step": 1147 + }, + { + "epoch": 1.4137931034482758, + "grad_norm": 13.469356839829612, + "learning_rate": 6.365476631179982e-06, + "loss": 1.5683763027191162, + "step": 1148 + }, + { + "epoch": 1.4150246305418719, + "grad_norm": 8.488203520402504, + "learning_rate": 6.358581592409881e-06, + "loss": 1.4594917297363281, + "step": 1149 + }, + { + "epoch": 1.416256157635468, + "grad_norm": 25.588676453436552, + "learning_rate": 6.351683762996681e-06, + "loss": 2.1706323623657227, + "step": 1150 + }, + { + "epoch": 1.417487684729064, + "grad_norm": 11.810343655960159, + "learning_rate": 6.344783157109114e-06, + "loss": 1.835425853729248, + "step": 1151 + }, + { + "epoch": 1.4187192118226601, + "grad_norm": 10.711102782202751, + "learning_rate": 6.337879788921615e-06, + "loss": 1.1789867877960205, + "step": 1152 + }, + { + "epoch": 1.4199507389162562, + "grad_norm": 28.404082710690172, + "learning_rate": 6.3309736726142965e-06, + "loss": 1.9750418663024902, + "step": 1153 + }, + { + "epoch": 1.4211822660098523, + "grad_norm": 14.02852797567233, + "learning_rate": 6.324064822372913e-06, + "loss": 1.4960027933120728, + "step": 1154 + }, + { + "epoch": 1.4224137931034484, + "grad_norm": 20.199397968799044, + "learning_rate": 6.317153252388834e-06, + "loss": 1.12904691696167, + "step": 1155 + }, + { + "epoch": 1.4236453201970443, + "grad_norm": 10.534543863605384, + "learning_rate": 6.31023897685902e-06, + "loss": 1.30333411693573, + "step": 1156 + }, + { + "epoch": 1.4248768472906403, + "grad_norm": 15.66714236524435, + "learning_rate": 6.303322009985984e-06, + "loss": 2.5257434844970703, + "step": 1157 + }, + { + "epoch": 1.4261083743842364, + "grad_norm": 18.065303617570866, + "learning_rate": 6.296402365977767e-06, + "loss": 0.9684423208236694, + "step": 1158 + }, + { + "epoch": 1.4273399014778325, + "grad_norm": 12.376925974972115, + "learning_rate": 6.289480059047915e-06, + "loss": 1.457876443862915, + "step": 1159 + }, + { + "epoch": 1.4285714285714286, + "grad_norm": 9.05985921030025, + "learning_rate": 6.282555103415438e-06, + "loss": 1.5206713676452637, + "step": 1160 + }, + { + "epoch": 1.4298029556650247, + "grad_norm": 14.712390356925216, + "learning_rate": 6.27562751330479e-06, + "loss": 1.680644154548645, + "step": 1161 + }, + { + "epoch": 1.4310344827586206, + "grad_norm": 9.786932196785434, + "learning_rate": 6.268697302945835e-06, + "loss": 1.3704997301101685, + "step": 1162 + }, + { + "epoch": 1.4322660098522166, + "grad_norm": 9.786888328650228, + "learning_rate": 6.261764486573816e-06, + "loss": 1.3250343799591064, + "step": 1163 + }, + { + "epoch": 1.4334975369458127, + "grad_norm": 15.544106160026582, + "learning_rate": 6.254829078429336e-06, + "loss": 1.8659427165985107, + "step": 1164 + }, + { + "epoch": 1.4347290640394088, + "grad_norm": 21.077430430000046, + "learning_rate": 6.247891092758319e-06, + "loss": 2.043597936630249, + "step": 1165 + }, + { + "epoch": 1.435960591133005, + "grad_norm": 12.476492579798414, + "learning_rate": 6.24095054381198e-06, + "loss": 1.5634403228759766, + "step": 1166 + }, + { + "epoch": 1.437192118226601, + "grad_norm": 11.790373846414154, + "learning_rate": 6.2340074458468014e-06, + "loss": 1.1179373264312744, + "step": 1167 + }, + { + "epoch": 1.438423645320197, + "grad_norm": 13.094422813370427, + "learning_rate": 6.227061813124504e-06, + "loss": 0.8013179302215576, + "step": 1168 + }, + { + "epoch": 1.4396551724137931, + "grad_norm": 9.010286032120458, + "learning_rate": 6.220113659912012e-06, + "loss": 1.3435392379760742, + "step": 1169 + }, + { + "epoch": 1.4408866995073892, + "grad_norm": 8.308881028265468, + "learning_rate": 6.213163000481428e-06, + "loss": 1.39387845993042, + "step": 1170 + }, + { + "epoch": 1.4421182266009853, + "grad_norm": 8.499060752632088, + "learning_rate": 6.206209849110001e-06, + "loss": 1.760462760925293, + "step": 1171 + }, + { + "epoch": 1.4433497536945814, + "grad_norm": 13.348998095152654, + "learning_rate": 6.1992542200801035e-06, + "loss": 1.0812432765960693, + "step": 1172 + }, + { + "epoch": 1.4445812807881773, + "grad_norm": 9.263056193047571, + "learning_rate": 6.1922961276791925e-06, + "loss": 1.7997616529464722, + "step": 1173 + }, + { + "epoch": 1.4458128078817734, + "grad_norm": 11.646405372699148, + "learning_rate": 6.1853355861997854e-06, + "loss": 1.773369550704956, + "step": 1174 + }, + { + "epoch": 1.4470443349753694, + "grad_norm": 8.442523087287304, + "learning_rate": 6.1783726099394324e-06, + "loss": 1.9488962888717651, + "step": 1175 + }, + { + "epoch": 1.4482758620689655, + "grad_norm": 13.332895782423902, + "learning_rate": 6.171407213200683e-06, + "loss": 1.6990149021148682, + "step": 1176 + }, + { + "epoch": 1.4495073891625616, + "grad_norm": 12.609637801512664, + "learning_rate": 6.164439410291061e-06, + "loss": 1.4307571649551392, + "step": 1177 + }, + { + "epoch": 1.4507389162561577, + "grad_norm": 8.885074358137231, + "learning_rate": 6.157469215523031e-06, + "loss": 1.3966443538665771, + "step": 1178 + }, + { + "epoch": 1.4519704433497536, + "grad_norm": 16.606696238854166, + "learning_rate": 6.150496643213969e-06, + "loss": 1.2959253787994385, + "step": 1179 + }, + { + "epoch": 1.4532019704433496, + "grad_norm": 16.898895754976742, + "learning_rate": 6.143521707686137e-06, + "loss": 1.4992142915725708, + "step": 1180 + }, + { + "epoch": 1.4544334975369457, + "grad_norm": 16.69245348652636, + "learning_rate": 6.136544423266651e-06, + "loss": 1.8196167945861816, + "step": 1181 + }, + { + "epoch": 1.4556650246305418, + "grad_norm": 16.12465629803321, + "learning_rate": 6.129564804287454e-06, + "loss": 1.4129021167755127, + "step": 1182 + }, + { + "epoch": 1.456896551724138, + "grad_norm": 15.4451290282442, + "learning_rate": 6.122582865085278e-06, + "loss": 1.2009403705596924, + "step": 1183 + }, + { + "epoch": 1.458128078817734, + "grad_norm": 12.682560791700617, + "learning_rate": 6.115598620001627e-06, + "loss": 1.698556661605835, + "step": 1184 + }, + { + "epoch": 1.45935960591133, + "grad_norm": 21.414952415899087, + "learning_rate": 6.108612083382739e-06, + "loss": 1.5819299221038818, + "step": 1185 + }, + { + "epoch": 1.4605911330049262, + "grad_norm": 10.708464197323055, + "learning_rate": 6.101623269579558e-06, + "loss": 1.374379277229309, + "step": 1186 + }, + { + "epoch": 1.4618226600985222, + "grad_norm": 10.541290993965774, + "learning_rate": 6.094632192947711e-06, + "loss": 1.2765707969665527, + "step": 1187 + }, + { + "epoch": 1.4630541871921183, + "grad_norm": 14.098976562454558, + "learning_rate": 6.087638867847465e-06, + "loss": 1.2740705013275146, + "step": 1188 + }, + { + "epoch": 1.4642857142857144, + "grad_norm": 11.154362665776958, + "learning_rate": 6.08064330864371e-06, + "loss": 1.6713453531265259, + "step": 1189 + }, + { + "epoch": 1.4655172413793103, + "grad_norm": 9.205967970627526, + "learning_rate": 6.073645529705926e-06, + "loss": 1.6606531143188477, + "step": 1190 + }, + { + "epoch": 1.4667487684729064, + "grad_norm": 12.43504089477338, + "learning_rate": 6.066645545408149e-06, + "loss": 1.6029870510101318, + "step": 1191 + }, + { + "epoch": 1.4679802955665024, + "grad_norm": 9.416406443647212, + "learning_rate": 6.0596433701289506e-06, + "loss": 1.5884819030761719, + "step": 1192 + }, + { + "epoch": 1.4692118226600985, + "grad_norm": 17.434043985101933, + "learning_rate": 6.052639018251394e-06, + "loss": 1.060668706893921, + "step": 1193 + }, + { + "epoch": 1.4704433497536946, + "grad_norm": 13.053843358479307, + "learning_rate": 6.045632504163024e-06, + "loss": 1.6251329183578491, + "step": 1194 + }, + { + "epoch": 1.4716748768472907, + "grad_norm": 10.200397873502725, + "learning_rate": 6.03862384225582e-06, + "loss": 1.2369989156723022, + "step": 1195 + }, + { + "epoch": 1.4729064039408866, + "grad_norm": 28.146477262288624, + "learning_rate": 6.0316130469261705e-06, + "loss": 1.7742527723312378, + "step": 1196 + }, + { + "epoch": 1.4741379310344827, + "grad_norm": 6.380213600146285, + "learning_rate": 6.024600132574855e-06, + "loss": 2.166492223739624, + "step": 1197 + }, + { + "epoch": 1.4753694581280787, + "grad_norm": 15.296147923549848, + "learning_rate": 6.017585113606999e-06, + "loss": 1.8031083345413208, + "step": 1198 + }, + { + "epoch": 1.4766009852216748, + "grad_norm": 7.580109898357858, + "learning_rate": 6.010568004432055e-06, + "loss": 1.9966365098953247, + "step": 1199 + }, + { + "epoch": 1.477832512315271, + "grad_norm": 13.138438168026589, + "learning_rate": 6.0035488194637645e-06, + "loss": 1.0125515460968018, + "step": 1200 + }, + { + "epoch": 1.479064039408867, + "grad_norm": 16.24938270382903, + "learning_rate": 5.9965275731201364e-06, + "loss": 1.1396842002868652, + "step": 1201 + }, + { + "epoch": 1.480295566502463, + "grad_norm": 6.579201955073294, + "learning_rate": 5.9895042798234125e-06, + "loss": 1.8030388355255127, + "step": 1202 + }, + { + "epoch": 1.4815270935960592, + "grad_norm": 12.865016417179568, + "learning_rate": 5.982478954000042e-06, + "loss": 1.4132026433944702, + "step": 1203 + }, + { + "epoch": 1.4827586206896552, + "grad_norm": 11.295614659779242, + "learning_rate": 5.975451610080643e-06, + "loss": 1.3726825714111328, + "step": 1204 + }, + { + "epoch": 1.4839901477832513, + "grad_norm": 10.812781562044428, + "learning_rate": 5.968422262499983e-06, + "loss": 2.3436193466186523, + "step": 1205 + }, + { + "epoch": 1.4852216748768474, + "grad_norm": 11.93980767439267, + "learning_rate": 5.961390925696947e-06, + "loss": 1.4617420434951782, + "step": 1206 + }, + { + "epoch": 1.4864532019704433, + "grad_norm": 8.752972802049372, + "learning_rate": 5.9543576141145035e-06, + "loss": 1.8050814867019653, + "step": 1207 + }, + { + "epoch": 1.4876847290640394, + "grad_norm": 11.595272230479853, + "learning_rate": 5.947322342199674e-06, + "loss": 1.3426543474197388, + "step": 1208 + }, + { + "epoch": 1.4889162561576355, + "grad_norm": 13.910327681643947, + "learning_rate": 5.940285124403517e-06, + "loss": 1.6211771965026855, + "step": 1209 + }, + { + "epoch": 1.4901477832512315, + "grad_norm": 10.490417163522949, + "learning_rate": 5.933245975181074e-06, + "loss": 2.695863723754883, + "step": 1210 + }, + { + "epoch": 1.4913793103448276, + "grad_norm": 9.128292414129945, + "learning_rate": 5.926204908991366e-06, + "loss": 1.2743788957595825, + "step": 1211 + }, + { + "epoch": 1.4926108374384237, + "grad_norm": 11.2632445422812, + "learning_rate": 5.919161940297346e-06, + "loss": 1.652765154838562, + "step": 1212 + }, + { + "epoch": 1.4938423645320196, + "grad_norm": 7.537950882850561, + "learning_rate": 5.912117083565874e-06, + "loss": 1.3720670938491821, + "step": 1213 + }, + { + "epoch": 1.4950738916256157, + "grad_norm": 14.216763115794095, + "learning_rate": 5.905070353267692e-06, + "loss": 1.222616195678711, + "step": 1214 + }, + { + "epoch": 1.4963054187192117, + "grad_norm": 7.742622309976788, + "learning_rate": 5.898021763877388e-06, + "loss": 1.4626069068908691, + "step": 1215 + }, + { + "epoch": 1.4975369458128078, + "grad_norm": 10.044815043339705, + "learning_rate": 5.890971329873366e-06, + "loss": 1.7813634872436523, + "step": 1216 + }, + { + "epoch": 1.498768472906404, + "grad_norm": 14.537107209189347, + "learning_rate": 5.883919065737827e-06, + "loss": 0.5114675760269165, + "step": 1217 + }, + { + "epoch": 1.5, + "grad_norm": 18.934697309871, + "learning_rate": 5.876864985956722e-06, + "loss": 1.6000962257385254, + "step": 1218 + }, + { + "epoch": 1.501231527093596, + "grad_norm": 33.040397060632486, + "learning_rate": 5.869809105019738e-06, + "loss": 1.5674512386322021, + "step": 1219 + }, + { + "epoch": 1.5024630541871922, + "grad_norm": 9.76563438047523, + "learning_rate": 5.8627514374202596e-06, + "loss": 1.7963311672210693, + "step": 1220 + }, + { + "epoch": 1.5036945812807883, + "grad_norm": 10.95067481959561, + "learning_rate": 5.85569199765534e-06, + "loss": 1.1649596691131592, + "step": 1221 + }, + { + "epoch": 1.5049261083743843, + "grad_norm": 9.927773449159055, + "learning_rate": 5.848630800225678e-06, + "loss": 1.140197992324829, + "step": 1222 + }, + { + "epoch": 1.5061576354679804, + "grad_norm": 8.586607717080767, + "learning_rate": 5.841567859635572e-06, + "loss": 1.865435242652893, + "step": 1223 + }, + { + "epoch": 1.5073891625615765, + "grad_norm": 11.43552738813054, + "learning_rate": 5.834503190392912e-06, + "loss": 1.457642912864685, + "step": 1224 + }, + { + "epoch": 1.5086206896551724, + "grad_norm": 9.978595721772624, + "learning_rate": 5.827436807009133e-06, + "loss": 1.3783336877822876, + "step": 1225 + }, + { + "epoch": 1.5098522167487685, + "grad_norm": 10.75044326200818, + "learning_rate": 5.8203687239991935e-06, + "loss": 1.939549207687378, + "step": 1226 + }, + { + "epoch": 1.5110837438423645, + "grad_norm": 14.588582695069839, + "learning_rate": 5.813298955881542e-06, + "loss": 1.3607597351074219, + "step": 1227 + }, + { + "epoch": 1.5123152709359606, + "grad_norm": 9.739548479278437, + "learning_rate": 5.806227517178089e-06, + "loss": 0.81966233253479, + "step": 1228 + }, + { + "epoch": 1.5135467980295565, + "grad_norm": 7.228017183846092, + "learning_rate": 5.799154422414174e-06, + "loss": 0.9481602311134338, + "step": 1229 + }, + { + "epoch": 1.5147783251231526, + "grad_norm": 16.162733557662186, + "learning_rate": 5.79207968611854e-06, + "loss": 1.3550889492034912, + "step": 1230 + }, + { + "epoch": 1.5160098522167487, + "grad_norm": 10.696500057601996, + "learning_rate": 5.785003322823307e-06, + "loss": 2.022425889968872, + "step": 1231 + }, + { + "epoch": 1.5172413793103448, + "grad_norm": 8.501680697642309, + "learning_rate": 5.777925347063927e-06, + "loss": 1.5649950504302979, + "step": 1232 + }, + { + "epoch": 1.5184729064039408, + "grad_norm": 12.185227926920462, + "learning_rate": 5.7708457733791715e-06, + "loss": 1.9720977544784546, + "step": 1233 + }, + { + "epoch": 1.519704433497537, + "grad_norm": 12.902985615374178, + "learning_rate": 5.763764616311089e-06, + "loss": 1.0029213428497314, + "step": 1234 + }, + { + "epoch": 1.520935960591133, + "grad_norm": 13.23751211435566, + "learning_rate": 5.756681890404987e-06, + "loss": 1.8926727771759033, + "step": 1235 + }, + { + "epoch": 1.522167487684729, + "grad_norm": 8.93687413398984, + "learning_rate": 5.749597610209392e-06, + "loss": 1.462761402130127, + "step": 1236 + }, + { + "epoch": 1.5233990147783252, + "grad_norm": 10.137890971821589, + "learning_rate": 5.7425117902760195e-06, + "loss": 2.1467416286468506, + "step": 1237 + }, + { + "epoch": 1.5246305418719213, + "grad_norm": 12.30865285718221, + "learning_rate": 5.7354244451597545e-06, + "loss": 1.191473364830017, + "step": 1238 + }, + { + "epoch": 1.5258620689655173, + "grad_norm": 11.884477014639941, + "learning_rate": 5.72833558941861e-06, + "loss": 0.896723210811615, + "step": 1239 + }, + { + "epoch": 1.5270935960591134, + "grad_norm": 12.439035862181441, + "learning_rate": 5.721245237613704e-06, + "loss": 0.8741526007652283, + "step": 1240 + }, + { + "epoch": 1.5283251231527095, + "grad_norm": 11.437489612490284, + "learning_rate": 5.714153404309228e-06, + "loss": 1.6330994367599487, + "step": 1241 + }, + { + "epoch": 1.5295566502463054, + "grad_norm": 8.493940846915361, + "learning_rate": 5.707060104072415e-06, + "loss": 2.2386982440948486, + "step": 1242 + }, + { + "epoch": 1.5307881773399015, + "grad_norm": 15.002139823216499, + "learning_rate": 5.6999653514735124e-06, + "loss": 1.5266145467758179, + "step": 1243 + }, + { + "epoch": 1.5320197044334976, + "grad_norm": 10.763593391596421, + "learning_rate": 5.6928691610857515e-06, + "loss": 1.4918262958526611, + "step": 1244 + }, + { + "epoch": 1.5332512315270936, + "grad_norm": 13.978563202935332, + "learning_rate": 5.685771547485312e-06, + "loss": 1.241945743560791, + "step": 1245 + }, + { + "epoch": 1.5344827586206895, + "grad_norm": 13.403953021065679, + "learning_rate": 5.678672525251304e-06, + "loss": 1.1569273471832275, + "step": 1246 + }, + { + "epoch": 1.5357142857142856, + "grad_norm": 11.182023407334606, + "learning_rate": 5.671572108965729e-06, + "loss": 1.946014404296875, + "step": 1247 + }, + { + "epoch": 1.5369458128078817, + "grad_norm": 11.304302205859694, + "learning_rate": 5.664470313213448e-06, + "loss": 1.8601741790771484, + "step": 1248 + }, + { + "epoch": 1.5381773399014778, + "grad_norm": 16.894321658591, + "learning_rate": 5.65736715258216e-06, + "loss": 1.7164549827575684, + "step": 1249 + }, + { + "epoch": 1.5394088669950738, + "grad_norm": 10.02548837159482, + "learning_rate": 5.650262641662367e-06, + "loss": 2.0459697246551514, + "step": 1250 + }, + { + "epoch": 1.54064039408867, + "grad_norm": 9.37570660013781, + "learning_rate": 5.643156795047343e-06, + "loss": 1.4485859870910645, + "step": 1251 + }, + { + "epoch": 1.541871921182266, + "grad_norm": 7.685396722064439, + "learning_rate": 5.6360496273331055e-06, + "loss": 1.8672525882720947, + "step": 1252 + }, + { + "epoch": 1.543103448275862, + "grad_norm": 10.04870984968868, + "learning_rate": 5.628941153118388e-06, + "loss": 1.4309324026107788, + "step": 1253 + }, + { + "epoch": 1.5443349753694582, + "grad_norm": 8.68197237847592, + "learning_rate": 5.621831387004603e-06, + "loss": 1.8784745931625366, + "step": 1254 + }, + { + "epoch": 1.5455665024630543, + "grad_norm": 13.277977807429252, + "learning_rate": 5.6147203435958246e-06, + "loss": 2.109992027282715, + "step": 1255 + }, + { + "epoch": 1.5467980295566504, + "grad_norm": 12.972460738003901, + "learning_rate": 5.607608037498742e-06, + "loss": 1.5892071723937988, + "step": 1256 + }, + { + "epoch": 1.5480295566502464, + "grad_norm": 13.365650986627243, + "learning_rate": 5.600494483322643e-06, + "loss": 1.3583379983901978, + "step": 1257 + }, + { + "epoch": 1.5492610837438425, + "grad_norm": 20.27099102357665, + "learning_rate": 5.593379695679378e-06, + "loss": 2.126896381378174, + "step": 1258 + }, + { + "epoch": 1.5504926108374384, + "grad_norm": 17.176572909103676, + "learning_rate": 5.586263689183332e-06, + "loss": 1.7454299926757812, + "step": 1259 + }, + { + "epoch": 1.5517241379310345, + "grad_norm": 13.916773869762237, + "learning_rate": 5.5791464784513905e-06, + "loss": 1.1533763408660889, + "step": 1260 + }, + { + "epoch": 1.5529556650246306, + "grad_norm": 7.929553367189426, + "learning_rate": 5.572028078102917e-06, + "loss": 1.4818049669265747, + "step": 1261 + }, + { + "epoch": 1.5541871921182266, + "grad_norm": 10.401505556673449, + "learning_rate": 5.564908502759714e-06, + "loss": 1.7103283405303955, + "step": 1262 + }, + { + "epoch": 1.5554187192118225, + "grad_norm": 9.47500952850124, + "learning_rate": 5.557787767046001e-06, + "loss": 2.1653401851654053, + "step": 1263 + }, + { + "epoch": 1.5566502463054186, + "grad_norm": 11.53902942298552, + "learning_rate": 5.55066588558838e-06, + "loss": 1.3127275705337524, + "step": 1264 + }, + { + "epoch": 1.5578817733990147, + "grad_norm": 16.55540616140196, + "learning_rate": 5.543542873015806e-06, + "loss": 1.0865871906280518, + "step": 1265 + }, + { + "epoch": 1.5591133004926108, + "grad_norm": 11.513704169835737, + "learning_rate": 5.536418743959559e-06, + "loss": 1.341281533241272, + "step": 1266 + }, + { + "epoch": 1.5603448275862069, + "grad_norm": 13.363897307451165, + "learning_rate": 5.529293513053207e-06, + "loss": 1.1612720489501953, + "step": 1267 + }, + { + "epoch": 1.561576354679803, + "grad_norm": 8.231595025537441, + "learning_rate": 5.522167194932588e-06, + "loss": 1.7491642236709595, + "step": 1268 + }, + { + "epoch": 1.562807881773399, + "grad_norm": 14.714195860173573, + "learning_rate": 5.515039804235772e-06, + "loss": 1.8244414329528809, + "step": 1269 + }, + { + "epoch": 1.564039408866995, + "grad_norm": 14.369418745397832, + "learning_rate": 5.50791135560303e-06, + "loss": 1.6449997425079346, + "step": 1270 + }, + { + "epoch": 1.5652709359605912, + "grad_norm": 10.791840038500066, + "learning_rate": 5.5007818636768055e-06, + "loss": 1.258559226989746, + "step": 1271 + }, + { + "epoch": 1.5665024630541873, + "grad_norm": 12.265469895779276, + "learning_rate": 5.493651343101686e-06, + "loss": 2.075775146484375, + "step": 1272 + }, + { + "epoch": 1.5677339901477834, + "grad_norm": 33.663491606092755, + "learning_rate": 5.486519808524374e-06, + "loss": 1.8196138143539429, + "step": 1273 + }, + { + "epoch": 1.5689655172413794, + "grad_norm": 10.504622195873791, + "learning_rate": 5.479387274593653e-06, + "loss": 1.129037618637085, + "step": 1274 + }, + { + "epoch": 1.5701970443349755, + "grad_norm": 10.887519946570082, + "learning_rate": 5.472253755960358e-06, + "loss": 1.7367748022079468, + "step": 1275 + }, + { + "epoch": 1.5714285714285714, + "grad_norm": 9.127598313619417, + "learning_rate": 5.4651192672773475e-06, + "loss": 1.9274532794952393, + "step": 1276 + }, + { + "epoch": 1.5726600985221675, + "grad_norm": 17.490821839529264, + "learning_rate": 5.457983823199475e-06, + "loss": 1.4018654823303223, + "step": 1277 + }, + { + "epoch": 1.5738916256157636, + "grad_norm": 17.899672160499332, + "learning_rate": 5.450847438383555e-06, + "loss": 1.383131504058838, + "step": 1278 + }, + { + "epoch": 1.5751231527093597, + "grad_norm": 6.595048027752494, + "learning_rate": 5.443710127488331e-06, + "loss": 1.277740716934204, + "step": 1279 + }, + { + "epoch": 1.5763546798029555, + "grad_norm": 9.304406142462632, + "learning_rate": 5.4365719051744556e-06, + "loss": 1.507627010345459, + "step": 1280 + }, + { + "epoch": 1.5775862068965516, + "grad_norm": 13.383687869982538, + "learning_rate": 5.429432786104446e-06, + "loss": 1.609743595123291, + "step": 1281 + }, + { + "epoch": 1.5788177339901477, + "grad_norm": 14.966009265010456, + "learning_rate": 5.422292784942666e-06, + "loss": 3.7705276012420654, + "step": 1282 + }, + { + "epoch": 1.5800492610837438, + "grad_norm": 8.997880163576188, + "learning_rate": 5.415151916355292e-06, + "loss": 1.5003160238265991, + "step": 1283 + }, + { + "epoch": 1.5812807881773399, + "grad_norm": 9.476478190888859, + "learning_rate": 5.408010195010278e-06, + "loss": 2.2466366291046143, + "step": 1284 + }, + { + "epoch": 1.582512315270936, + "grad_norm": 7.465134227448914, + "learning_rate": 5.400867635577335e-06, + "loss": 1.0722277164459229, + "step": 1285 + }, + { + "epoch": 1.583743842364532, + "grad_norm": 13.942249242079209, + "learning_rate": 5.3937242527278885e-06, + "loss": 1.3113644123077393, + "step": 1286 + }, + { + "epoch": 1.5849753694581281, + "grad_norm": 14.224147707467683, + "learning_rate": 5.3865800611350634e-06, + "loss": 1.4688694477081299, + "step": 1287 + }, + { + "epoch": 1.5862068965517242, + "grad_norm": 9.648975936769988, + "learning_rate": 5.379435075473641e-06, + "loss": 1.3646764755249023, + "step": 1288 + }, + { + "epoch": 1.5874384236453203, + "grad_norm": 8.753285038565833, + "learning_rate": 5.372289310420032e-06, + "loss": 1.6248177289962769, + "step": 1289 + }, + { + "epoch": 1.5886699507389164, + "grad_norm": 9.773114583134893, + "learning_rate": 5.365142780652255e-06, + "loss": 1.5507471561431885, + "step": 1290 + }, + { + "epoch": 1.5899014778325125, + "grad_norm": 8.752822975110762, + "learning_rate": 5.35799550084989e-06, + "loss": 1.2866086959838867, + "step": 1291 + }, + { + "epoch": 1.5911330049261085, + "grad_norm": 10.021050170312028, + "learning_rate": 5.350847485694067e-06, + "loss": 2.336108684539795, + "step": 1292 + }, + { + "epoch": 1.5923645320197044, + "grad_norm": 11.648640054355637, + "learning_rate": 5.343698749867421e-06, + "loss": 1.6604368686676025, + "step": 1293 + }, + { + "epoch": 1.5935960591133005, + "grad_norm": 16.28378480699955, + "learning_rate": 5.336549308054066e-06, + "loss": 1.2169203758239746, + "step": 1294 + }, + { + "epoch": 1.5948275862068966, + "grad_norm": 14.069009000417143, + "learning_rate": 5.329399174939572e-06, + "loss": 1.546027421951294, + "step": 1295 + }, + { + "epoch": 1.5960591133004927, + "grad_norm": 9.646944240372145, + "learning_rate": 5.3222483652109235e-06, + "loss": 1.1372979879379272, + "step": 1296 + }, + { + "epoch": 1.5972906403940885, + "grad_norm": 10.548510904543294, + "learning_rate": 5.315096893556497e-06, + "loss": 1.3435921669006348, + "step": 1297 + }, + { + "epoch": 1.5985221674876846, + "grad_norm": 14.79008878560828, + "learning_rate": 5.307944774666029e-06, + "loss": 1.522647500038147, + "step": 1298 + }, + { + "epoch": 1.5997536945812807, + "grad_norm": 17.912683434114346, + "learning_rate": 5.300792023230587e-06, + "loss": 2.0829434394836426, + "step": 1299 + }, + { + "epoch": 1.6009852216748768, + "grad_norm": 8.420566897576393, + "learning_rate": 5.2936386539425325e-06, + "loss": 1.761828064918518, + "step": 1300 + }, + { + "epoch": 1.6022167487684729, + "grad_norm": 14.83308627903251, + "learning_rate": 5.2864846814955e-06, + "loss": 2.4108588695526123, + "step": 1301 + }, + { + "epoch": 1.603448275862069, + "grad_norm": 7.959651684795871, + "learning_rate": 5.279330120584365e-06, + "loss": 1.626701831817627, + "step": 1302 + }, + { + "epoch": 1.604679802955665, + "grad_norm": 15.705970904875606, + "learning_rate": 5.272174985905207e-06, + "loss": 1.2424887418746948, + "step": 1303 + }, + { + "epoch": 1.6059113300492611, + "grad_norm": 12.239359710615943, + "learning_rate": 5.2650192921552845e-06, + "loss": 2.149031639099121, + "step": 1304 + }, + { + "epoch": 1.6071428571428572, + "grad_norm": 10.231856507403213, + "learning_rate": 5.257863054033012e-06, + "loss": 2.6947379112243652, + "step": 1305 + }, + { + "epoch": 1.6083743842364533, + "grad_norm": 18.838018326977505, + "learning_rate": 5.25070628623791e-06, + "loss": 1.665069818496704, + "step": 1306 + }, + { + "epoch": 1.6096059113300494, + "grad_norm": 14.325294673284358, + "learning_rate": 5.243549003470599e-06, + "loss": 1.3887734413146973, + "step": 1307 + }, + { + "epoch": 1.6108374384236455, + "grad_norm": 11.840772011671689, + "learning_rate": 5.236391220432745e-06, + "loss": 1.340559720993042, + "step": 1308 + }, + { + "epoch": 1.6120689655172413, + "grad_norm": 10.400173398296557, + "learning_rate": 5.229232951827054e-06, + "loss": 1.1291146278381348, + "step": 1309 + }, + { + "epoch": 1.6133004926108374, + "grad_norm": 11.008129364503455, + "learning_rate": 5.222074212357221e-06, + "loss": 1.8375647068023682, + "step": 1310 + }, + { + "epoch": 1.6145320197044335, + "grad_norm": 26.174008264121436, + "learning_rate": 5.2149150167279106e-06, + "loss": 1.3299870491027832, + "step": 1311 + }, + { + "epoch": 1.6157635467980296, + "grad_norm": 9.874671943961642, + "learning_rate": 5.2077553796447254e-06, + "loss": 1.1574440002441406, + "step": 1312 + }, + { + "epoch": 1.6169950738916257, + "grad_norm": 9.304756709434216, + "learning_rate": 5.200595315814174e-06, + "loss": 1.8118785619735718, + "step": 1313 + }, + { + "epoch": 1.6182266009852215, + "grad_norm": 10.54430610217864, + "learning_rate": 5.19343483994364e-06, + "loss": 1.333923101425171, + "step": 1314 + }, + { + "epoch": 1.6194581280788176, + "grad_norm": 8.365290613104223, + "learning_rate": 5.18627396674136e-06, + "loss": 1.2107478380203247, + "step": 1315 + }, + { + "epoch": 1.6206896551724137, + "grad_norm": 11.934365489822259, + "learning_rate": 5.1791127109163734e-06, + "loss": 1.662817120552063, + "step": 1316 + }, + { + "epoch": 1.6219211822660098, + "grad_norm": 11.66068657995672, + "learning_rate": 5.17195108717852e-06, + "loss": 1.7790195941925049, + "step": 1317 + }, + { + "epoch": 1.6231527093596059, + "grad_norm": 15.883414066148024, + "learning_rate": 5.164789110238387e-06, + "loss": 1.5893058776855469, + "step": 1318 + }, + { + "epoch": 1.624384236453202, + "grad_norm": 9.631844787083402, + "learning_rate": 5.15762679480729e-06, + "loss": 1.256395936012268, + "step": 1319 + }, + { + "epoch": 1.625615763546798, + "grad_norm": 18.80096398191795, + "learning_rate": 5.150464155597239e-06, + "loss": 1.3061628341674805, + "step": 1320 + }, + { + "epoch": 1.6268472906403941, + "grad_norm": 8.93680164244121, + "learning_rate": 5.143301207320909e-06, + "loss": 1.4399319887161255, + "step": 1321 + }, + { + "epoch": 1.6280788177339902, + "grad_norm": 13.559338660465917, + "learning_rate": 5.136137964691609e-06, + "loss": 1.2071207761764526, + "step": 1322 + }, + { + "epoch": 1.6293103448275863, + "grad_norm": 15.329093630080337, + "learning_rate": 5.128974442423254e-06, + "loss": 2.2784008979797363, + "step": 1323 + }, + { + "epoch": 1.6305418719211824, + "grad_norm": 10.677223802578135, + "learning_rate": 5.121810655230336e-06, + "loss": 1.3703962564468384, + "step": 1324 + }, + { + "epoch": 1.6317733990147785, + "grad_norm": 7.672085033643185, + "learning_rate": 5.114646617827884e-06, + "loss": 0.6955282688140869, + "step": 1325 + }, + { + "epoch": 1.6330049261083743, + "grad_norm": 9.372418453872616, + "learning_rate": 5.107482344931448e-06, + "loss": 1.5774227380752563, + "step": 1326 + }, + { + "epoch": 1.6342364532019704, + "grad_norm": 7.569882170382433, + "learning_rate": 5.100317851257057e-06, + "loss": 1.6811349391937256, + "step": 1327 + }, + { + "epoch": 1.6354679802955665, + "grad_norm": 13.234466243138659, + "learning_rate": 5.093153151521196e-06, + "loss": 1.563596487045288, + "step": 1328 + }, + { + "epoch": 1.6366995073891626, + "grad_norm": 13.317086470459271, + "learning_rate": 5.085988260440776e-06, + "loss": 1.44309401512146, + "step": 1329 + }, + { + "epoch": 1.6379310344827587, + "grad_norm": 12.614583983426193, + "learning_rate": 5.0788231927330924e-06, + "loss": 1.5392205715179443, + "step": 1330 + }, + { + "epoch": 1.6391625615763545, + "grad_norm": 19.688183928504156, + "learning_rate": 5.0716579631158124e-06, + "loss": 0.9557719826698303, + "step": 1331 + }, + { + "epoch": 1.6403940886699506, + "grad_norm": 12.748000945416605, + "learning_rate": 5.064492586306931e-06, + "loss": 1.1032493114471436, + "step": 1332 + }, + { + "epoch": 1.6416256157635467, + "grad_norm": 14.590229259835747, + "learning_rate": 5.057327077024745e-06, + "loss": 1.4907091856002808, + "step": 1333 + }, + { + "epoch": 1.6428571428571428, + "grad_norm": 13.569513298786392, + "learning_rate": 5.050161449987828e-06, + "loss": 1.4919164180755615, + "step": 1334 + }, + { + "epoch": 1.6440886699507389, + "grad_norm": 17.53788627610522, + "learning_rate": 5.0429957199149905e-06, + "loss": 2.177396297454834, + "step": 1335 + }, + { + "epoch": 1.645320197044335, + "grad_norm": 9.011039030303097, + "learning_rate": 5.035829901525258e-06, + "loss": 1.2386332750320435, + "step": 1336 + }, + { + "epoch": 1.646551724137931, + "grad_norm": 7.326320563707851, + "learning_rate": 5.028664009537835e-06, + "loss": 1.2984986305236816, + "step": 1337 + }, + { + "epoch": 1.6477832512315271, + "grad_norm": 8.373461994458872, + "learning_rate": 5.021498058672076e-06, + "loss": 1.1399617195129395, + "step": 1338 + }, + { + "epoch": 1.6490147783251232, + "grad_norm": 7.295316739226097, + "learning_rate": 5.014332063647462e-06, + "loss": 1.9816789627075195, + "step": 1339 + }, + { + "epoch": 1.6502463054187193, + "grad_norm": 7.86464342129843, + "learning_rate": 5.007166039183561e-06, + "loss": 1.4210541248321533, + "step": 1340 + }, + { + "epoch": 1.6514778325123154, + "grad_norm": 12.713637168049194, + "learning_rate": 5e-06, + "loss": 1.5061390399932861, + "step": 1341 + }, + { + "epoch": 1.6527093596059115, + "grad_norm": 8.899156333262312, + "learning_rate": 4.99283396081644e-06, + "loss": 1.4701118469238281, + "step": 1342 + }, + { + "epoch": 1.6539408866995073, + "grad_norm": 10.54571567541005, + "learning_rate": 4.985667936352538e-06, + "loss": 1.4879779815673828, + "step": 1343 + }, + { + "epoch": 1.6551724137931034, + "grad_norm": 10.432279538827562, + "learning_rate": 4.978501941327926e-06, + "loss": 1.51373291015625, + "step": 1344 + }, + { + "epoch": 1.6564039408866995, + "grad_norm": 7.981064947021898, + "learning_rate": 4.971335990462168e-06, + "loss": 1.5439019203186035, + "step": 1345 + }, + { + "epoch": 1.6576354679802956, + "grad_norm": 14.863181962691362, + "learning_rate": 4.964170098474744e-06, + "loss": 1.7145721912384033, + "step": 1346 + }, + { + "epoch": 1.6588669950738915, + "grad_norm": 7.816226303611453, + "learning_rate": 4.95700428008501e-06, + "loss": 1.6367833614349365, + "step": 1347 + }, + { + "epoch": 1.6600985221674875, + "grad_norm": 12.087333147554537, + "learning_rate": 4.949838550012172e-06, + "loss": 1.4300103187561035, + "step": 1348 + }, + { + "epoch": 1.6613300492610836, + "grad_norm": 6.881924405292677, + "learning_rate": 4.942672922975255e-06, + "loss": 2.0569915771484375, + "step": 1349 + }, + { + "epoch": 1.6625615763546797, + "grad_norm": 15.296469591183284, + "learning_rate": 4.935507413693071e-06, + "loss": 1.1028980016708374, + "step": 1350 + }, + { + "epoch": 1.6637931034482758, + "grad_norm": 9.201861102909985, + "learning_rate": 4.928342036884189e-06, + "loss": 1.6323003768920898, + "step": 1351 + }, + { + "epoch": 1.6650246305418719, + "grad_norm": 10.996157407203105, + "learning_rate": 4.921176807266909e-06, + "loss": 1.5050472021102905, + "step": 1352 + }, + { + "epoch": 1.666256157635468, + "grad_norm": 17.127722044101333, + "learning_rate": 4.914011739559225e-06, + "loss": 1.3893849849700928, + "step": 1353 + }, + { + "epoch": 1.667487684729064, + "grad_norm": 13.548169676262727, + "learning_rate": 4.906846848478803e-06, + "loss": 1.1478514671325684, + "step": 1354 + }, + { + "epoch": 1.6687192118226601, + "grad_norm": 16.337726396970115, + "learning_rate": 4.899682148742944e-06, + "loss": 1.2397665977478027, + "step": 1355 + }, + { + "epoch": 1.6699507389162562, + "grad_norm": 8.122019629920894, + "learning_rate": 4.892517655068555e-06, + "loss": 1.1658974885940552, + "step": 1356 + }, + { + "epoch": 1.6711822660098523, + "grad_norm": 10.105771734426996, + "learning_rate": 4.8853533821721175e-06, + "loss": 1.7130283117294312, + "step": 1357 + }, + { + "epoch": 1.6724137931034484, + "grad_norm": 10.758386009234124, + "learning_rate": 4.878189344769666e-06, + "loss": 0.9516315460205078, + "step": 1358 + }, + { + "epoch": 1.6736453201970445, + "grad_norm": 11.103808898671073, + "learning_rate": 4.871025557576747e-06, + "loss": 1.143174171447754, + "step": 1359 + }, + { + "epoch": 1.6748768472906403, + "grad_norm": 11.525961008953772, + "learning_rate": 4.863862035308392e-06, + "loss": 1.7117831707000732, + "step": 1360 + }, + { + "epoch": 1.6761083743842364, + "grad_norm": 17.64687941795743, + "learning_rate": 4.8566987926790946e-06, + "loss": 2.507868528366089, + "step": 1361 + }, + { + "epoch": 1.6773399014778325, + "grad_norm": 9.376137745201675, + "learning_rate": 4.849535844402762e-06, + "loss": 1.476400375366211, + "step": 1362 + }, + { + "epoch": 1.6785714285714286, + "grad_norm": 8.721089378493017, + "learning_rate": 4.8423732051927115e-06, + "loss": 1.3162943124771118, + "step": 1363 + }, + { + "epoch": 1.6798029556650245, + "grad_norm": 10.422911150427735, + "learning_rate": 4.835210889761614e-06, + "loss": 2.2291440963745117, + "step": 1364 + }, + { + "epoch": 1.6810344827586206, + "grad_norm": 9.602624562609396, + "learning_rate": 4.82804891282148e-06, + "loss": 1.2231886386871338, + "step": 1365 + }, + { + "epoch": 1.6822660098522166, + "grad_norm": 14.076238439157445, + "learning_rate": 4.820887289083629e-06, + "loss": 1.3799304962158203, + "step": 1366 + }, + { + "epoch": 1.6834975369458127, + "grad_norm": 15.54796648321669, + "learning_rate": 4.813726033258643e-06, + "loss": 1.856811761856079, + "step": 1367 + }, + { + "epoch": 1.6847290640394088, + "grad_norm": 9.64062645814171, + "learning_rate": 4.80656516005636e-06, + "loss": 1.5948967933654785, + "step": 1368 + }, + { + "epoch": 1.685960591133005, + "grad_norm": 13.962004352631022, + "learning_rate": 4.799404684185828e-06, + "loss": 1.5035887956619263, + "step": 1369 + }, + { + "epoch": 1.687192118226601, + "grad_norm": 11.27741103317867, + "learning_rate": 4.792244620355275e-06, + "loss": 1.4715675115585327, + "step": 1370 + }, + { + "epoch": 1.688423645320197, + "grad_norm": 15.373869655729267, + "learning_rate": 4.78508498327209e-06, + "loss": 1.393894076347351, + "step": 1371 + }, + { + "epoch": 1.6896551724137931, + "grad_norm": 12.537169523242483, + "learning_rate": 4.777925787642781e-06, + "loss": 1.8458061218261719, + "step": 1372 + }, + { + "epoch": 1.6908866995073892, + "grad_norm": 12.62635000347042, + "learning_rate": 4.770767048172948e-06, + "loss": 1.0604429244995117, + "step": 1373 + }, + { + "epoch": 1.6921182266009853, + "grad_norm": 10.74648464318841, + "learning_rate": 4.7636087795672565e-06, + "loss": 1.3261964321136475, + "step": 1374 + }, + { + "epoch": 1.6933497536945814, + "grad_norm": 9.576848082824501, + "learning_rate": 4.756450996529403e-06, + "loss": 1.6243900060653687, + "step": 1375 + }, + { + "epoch": 1.6945812807881775, + "grad_norm": 13.575969601291865, + "learning_rate": 4.749293713762091e-06, + "loss": 1.8087639808654785, + "step": 1376 + }, + { + "epoch": 1.6958128078817734, + "grad_norm": 8.48685992922433, + "learning_rate": 4.742136945966991e-06, + "loss": 1.9180892705917358, + "step": 1377 + }, + { + "epoch": 1.6970443349753694, + "grad_norm": 12.706829097920151, + "learning_rate": 4.734980707844716e-06, + "loss": 1.6797364950180054, + "step": 1378 + }, + { + "epoch": 1.6982758620689655, + "grad_norm": 10.281614379219002, + "learning_rate": 4.727825014094795e-06, + "loss": 0.9649052023887634, + "step": 1379 + }, + { + "epoch": 1.6995073891625616, + "grad_norm": 7.785652444986331, + "learning_rate": 4.720669879415637e-06, + "loss": 1.4185916185379028, + "step": 1380 + }, + { + "epoch": 1.7007389162561575, + "grad_norm": 10.73836489858494, + "learning_rate": 4.713515318504501e-06, + "loss": 1.8681238889694214, + "step": 1381 + }, + { + "epoch": 1.7019704433497536, + "grad_norm": 9.950804244952993, + "learning_rate": 4.706361346057468e-06, + "loss": 1.2830915451049805, + "step": 1382 + }, + { + "epoch": 1.7032019704433496, + "grad_norm": 18.988866497939586, + "learning_rate": 4.699207976769416e-06, + "loss": 1.0888878107070923, + "step": 1383 + }, + { + "epoch": 1.7044334975369457, + "grad_norm": 12.689992799691533, + "learning_rate": 4.692055225333972e-06, + "loss": 1.4439440965652466, + "step": 1384 + }, + { + "epoch": 1.7056650246305418, + "grad_norm": 7.183191439849756, + "learning_rate": 4.684903106443504e-06, + "loss": 1.0282858610153198, + "step": 1385 + }, + { + "epoch": 1.706896551724138, + "grad_norm": 13.261845343202891, + "learning_rate": 4.677751634789078e-06, + "loss": 1.6842533349990845, + "step": 1386 + }, + { + "epoch": 1.708128078817734, + "grad_norm": 14.612290761713947, + "learning_rate": 4.670600825060429e-06, + "loss": 1.5473763942718506, + "step": 1387 + }, + { + "epoch": 1.70935960591133, + "grad_norm": 19.73106165634469, + "learning_rate": 4.663450691945936e-06, + "loss": 1.839112401008606, + "step": 1388 + }, + { + "epoch": 1.7105911330049262, + "grad_norm": 10.917539579247505, + "learning_rate": 4.656301250132581e-06, + "loss": 1.5349544286727905, + "step": 1389 + }, + { + "epoch": 1.7118226600985222, + "grad_norm": 11.132766984186494, + "learning_rate": 4.649152514305934e-06, + "loss": 1.5788905620574951, + "step": 1390 + }, + { + "epoch": 1.7130541871921183, + "grad_norm": 10.21681078103426, + "learning_rate": 4.6420044991501104e-06, + "loss": 1.4541325569152832, + "step": 1391 + }, + { + "epoch": 1.7142857142857144, + "grad_norm": 9.227689699191664, + "learning_rate": 4.634857219347746e-06, + "loss": 1.8231902122497559, + "step": 1392 + }, + { + "epoch": 1.7155172413793105, + "grad_norm": 10.500866364265818, + "learning_rate": 4.627710689579968e-06, + "loss": 1.6302368640899658, + "step": 1393 + }, + { + "epoch": 1.7167487684729064, + "grad_norm": 17.60594188273056, + "learning_rate": 4.62056492452636e-06, + "loss": 1.497374415397644, + "step": 1394 + }, + { + "epoch": 1.7179802955665024, + "grad_norm": 15.287585545597818, + "learning_rate": 4.613419938864937e-06, + "loss": 1.1390448808670044, + "step": 1395 + }, + { + "epoch": 1.7192118226600985, + "grad_norm": 10.328419466218456, + "learning_rate": 4.606275747272112e-06, + "loss": 1.4320652484893799, + "step": 1396 + }, + { + "epoch": 1.7204433497536946, + "grad_norm": 9.176084187845012, + "learning_rate": 4.599132364422666e-06, + "loss": 1.2651784420013428, + "step": 1397 + }, + { + "epoch": 1.7216748768472905, + "grad_norm": 15.836729193949362, + "learning_rate": 4.5919898049897225e-06, + "loss": 1.719766616821289, + "step": 1398 + }, + { + "epoch": 1.7229064039408866, + "grad_norm": 12.937422715545681, + "learning_rate": 4.58484808364471e-06, + "loss": 1.707594394683838, + "step": 1399 + }, + { + "epoch": 1.7241379310344827, + "grad_norm": 14.730027238842638, + "learning_rate": 4.5777072150573355e-06, + "loss": 1.4608323574066162, + "step": 1400 + }, + { + "epoch": 1.7253694581280787, + "grad_norm": 9.894706364799527, + "learning_rate": 4.570567213895555e-06, + "loss": 1.5542428493499756, + "step": 1401 + }, + { + "epoch": 1.7266009852216748, + "grad_norm": 10.251938635324704, + "learning_rate": 4.563428094825546e-06, + "loss": 1.2282288074493408, + "step": 1402 + }, + { + "epoch": 1.727832512315271, + "grad_norm": 12.91095594163412, + "learning_rate": 4.556289872511669e-06, + "loss": 1.1870850324630737, + "step": 1403 + }, + { + "epoch": 1.729064039408867, + "grad_norm": 19.656749282746095, + "learning_rate": 4.549152561616445e-06, + "loss": 1.8125461339950562, + "step": 1404 + }, + { + "epoch": 1.730295566502463, + "grad_norm": 13.055834351152246, + "learning_rate": 4.542016176800527e-06, + "loss": 1.4419995546340942, + "step": 1405 + }, + { + "epoch": 1.7315270935960592, + "grad_norm": 12.427293973832745, + "learning_rate": 4.534880732722653e-06, + "loss": 1.8834543228149414, + "step": 1406 + }, + { + "epoch": 1.7327586206896552, + "grad_norm": 9.308568400780414, + "learning_rate": 4.527746244039644e-06, + "loss": 1.120203971862793, + "step": 1407 + }, + { + "epoch": 1.7339901477832513, + "grad_norm": 10.965136861668267, + "learning_rate": 4.5206127254063495e-06, + "loss": 0.9131630659103394, + "step": 1408 + }, + { + "epoch": 1.7352216748768474, + "grad_norm": 18.40693337146411, + "learning_rate": 4.513480191475627e-06, + "loss": 1.86919367313385, + "step": 1409 + }, + { + "epoch": 1.7364532019704435, + "grad_norm": 16.72423206220796, + "learning_rate": 4.506348656898316e-06, + "loss": 1.6573272943496704, + "step": 1410 + }, + { + "epoch": 1.7376847290640394, + "grad_norm": 12.29145112798753, + "learning_rate": 4.499218136323197e-06, + "loss": 1.2864340543746948, + "step": 1411 + }, + { + "epoch": 1.7389162561576355, + "grad_norm": 9.205794418080544, + "learning_rate": 4.492088644396972e-06, + "loss": 1.5519993305206299, + "step": 1412 + }, + { + "epoch": 1.7401477832512315, + "grad_norm": 10.304423144578244, + "learning_rate": 4.4849601957642295e-06, + "loss": 1.7556722164154053, + "step": 1413 + }, + { + "epoch": 1.7413793103448276, + "grad_norm": 12.170127229505125, + "learning_rate": 4.477832805067412e-06, + "loss": 1.6349589824676514, + "step": 1414 + }, + { + "epoch": 1.7426108374384235, + "grad_norm": 18.04544459439354, + "learning_rate": 4.470706486946797e-06, + "loss": 1.3583035469055176, + "step": 1415 + }, + { + "epoch": 1.7438423645320196, + "grad_norm": 16.035788014412844, + "learning_rate": 4.463581256040445e-06, + "loss": 1.5367932319641113, + "step": 1416 + }, + { + "epoch": 1.7450738916256157, + "grad_norm": 10.971734568897116, + "learning_rate": 4.456457126984196e-06, + "loss": 1.5078128576278687, + "step": 1417 + }, + { + "epoch": 1.7463054187192117, + "grad_norm": 8.435567334501869, + "learning_rate": 4.449334114411622e-06, + "loss": 1.8653573989868164, + "step": 1418 + }, + { + "epoch": 1.7475369458128078, + "grad_norm": 11.511023238806931, + "learning_rate": 4.4422122329539996e-06, + "loss": 1.1381313800811768, + "step": 1419 + }, + { + "epoch": 1.748768472906404, + "grad_norm": 9.115530827164923, + "learning_rate": 4.435091497240287e-06, + "loss": 1.4135184288024902, + "step": 1420 + }, + { + "epoch": 1.75, + "grad_norm": 19.148242044300115, + "learning_rate": 4.427971921897086e-06, + "loss": 1.2186479568481445, + "step": 1421 + }, + { + "epoch": 1.751231527093596, + "grad_norm": 11.735225834432583, + "learning_rate": 4.420853521548611e-06, + "loss": 1.3139259815216064, + "step": 1422 + }, + { + "epoch": 1.7524630541871922, + "grad_norm": 9.908228964820347, + "learning_rate": 4.413736310816669e-06, + "loss": 2.0143887996673584, + "step": 1423 + }, + { + "epoch": 1.7536945812807883, + "grad_norm": 11.72709904223931, + "learning_rate": 4.4066203043206226e-06, + "loss": 1.5800344944000244, + "step": 1424 + }, + { + "epoch": 1.7549261083743843, + "grad_norm": 13.351525970289408, + "learning_rate": 4.399505516677358e-06, + "loss": 1.449183702468872, + "step": 1425 + }, + { + "epoch": 1.7561576354679804, + "grad_norm": 14.449460918267059, + "learning_rate": 4.3923919625012605e-06, + "loss": 0.6957097053527832, + "step": 1426 + }, + { + "epoch": 1.7573891625615765, + "grad_norm": 16.656517142384814, + "learning_rate": 4.385279656404178e-06, + "loss": 1.0665647983551025, + "step": 1427 + }, + { + "epoch": 1.7586206896551724, + "grad_norm": 8.728452405950277, + "learning_rate": 4.3781686129953975e-06, + "loss": 1.2771016359329224, + "step": 1428 + }, + { + "epoch": 1.7598522167487685, + "grad_norm": 9.380843658329356, + "learning_rate": 4.371058846881614e-06, + "loss": 1.4222235679626465, + "step": 1429 + }, + { + "epoch": 1.7610837438423645, + "grad_norm": 18.6167744042239, + "learning_rate": 4.363950372666896e-06, + "loss": 2.1237497329711914, + "step": 1430 + }, + { + "epoch": 1.7623152709359606, + "grad_norm": 15.81534835320748, + "learning_rate": 4.356843204952657e-06, + "loss": 1.3875718116760254, + "step": 1431 + }, + { + "epoch": 1.7635467980295565, + "grad_norm": 11.325736932128727, + "learning_rate": 4.349737358337635e-06, + "loss": 1.2585203647613525, + "step": 1432 + }, + { + "epoch": 1.7647783251231526, + "grad_norm": 10.890833810787267, + "learning_rate": 4.3426328474178405e-06, + "loss": 1.3183746337890625, + "step": 1433 + }, + { + "epoch": 1.7660098522167487, + "grad_norm": 11.455742000334912, + "learning_rate": 4.335529686786554e-06, + "loss": 1.7174941301345825, + "step": 1434 + }, + { + "epoch": 1.7672413793103448, + "grad_norm": 9.946830568051285, + "learning_rate": 4.328427891034273e-06, + "loss": 1.9503614902496338, + "step": 1435 + }, + { + "epoch": 1.7684729064039408, + "grad_norm": 13.787149559571247, + "learning_rate": 4.321327474748697e-06, + "loss": 1.3797223567962646, + "step": 1436 + }, + { + "epoch": 1.769704433497537, + "grad_norm": 14.935693009519694, + "learning_rate": 4.3142284525146915e-06, + "loss": 1.4113730192184448, + "step": 1437 + }, + { + "epoch": 1.770935960591133, + "grad_norm": 11.978351079391912, + "learning_rate": 4.307130838914252e-06, + "loss": 2.383976697921753, + "step": 1438 + }, + { + "epoch": 1.772167487684729, + "grad_norm": 10.033247535379967, + "learning_rate": 4.300034648526489e-06, + "loss": 1.7687448263168335, + "step": 1439 + }, + { + "epoch": 1.7733990147783252, + "grad_norm": 15.25338664216219, + "learning_rate": 4.292939895927587e-06, + "loss": 1.5130079984664917, + "step": 1440 + }, + { + "epoch": 1.7746305418719213, + "grad_norm": 16.671641040457516, + "learning_rate": 4.2858465956907726e-06, + "loss": 1.0863475799560547, + "step": 1441 + }, + { + "epoch": 1.7758620689655173, + "grad_norm": 21.777249707868723, + "learning_rate": 4.278754762386297e-06, + "loss": 1.1504137516021729, + "step": 1442 + }, + { + "epoch": 1.7770935960591134, + "grad_norm": 10.960123964926488, + "learning_rate": 4.271664410581392e-06, + "loss": 1.1227596998214722, + "step": 1443 + }, + { + "epoch": 1.7783251231527095, + "grad_norm": 10.668478758892386, + "learning_rate": 4.264575554840248e-06, + "loss": 1.4501817226409912, + "step": 1444 + }, + { + "epoch": 1.7795566502463054, + "grad_norm": 8.508770946365994, + "learning_rate": 4.257488209723981e-06, + "loss": 0.48442721366882324, + "step": 1445 + }, + { + "epoch": 1.7807881773399015, + "grad_norm": 19.774025943442037, + "learning_rate": 4.25040238979061e-06, + "loss": 1.218263864517212, + "step": 1446 + }, + { + "epoch": 1.7820197044334976, + "grad_norm": 11.107941835251008, + "learning_rate": 4.243318109595014e-06, + "loss": 1.1711516380310059, + "step": 1447 + }, + { + "epoch": 1.7832512315270936, + "grad_norm": 14.393581709964357, + "learning_rate": 4.2362353836889126e-06, + "loss": 1.3575153350830078, + "step": 1448 + }, + { + "epoch": 1.7844827586206895, + "grad_norm": 15.514668018354685, + "learning_rate": 4.229154226620832e-06, + "loss": 2.6967573165893555, + "step": 1449 + }, + { + "epoch": 1.7857142857142856, + "grad_norm": 16.398555290477788, + "learning_rate": 4.2220746529360745e-06, + "loss": 2.2812700271606445, + "step": 1450 + }, + { + "epoch": 1.7869458128078817, + "grad_norm": 7.44372678737394, + "learning_rate": 4.2149966771766945e-06, + "loss": 1.2746225595474243, + "step": 1451 + }, + { + "epoch": 1.7881773399014778, + "grad_norm": 24.76309740203676, + "learning_rate": 4.207920313881459e-06, + "loss": 1.4866999387741089, + "step": 1452 + }, + { + "epoch": 1.7894088669950738, + "grad_norm": 12.129429402231283, + "learning_rate": 4.200845577585827e-06, + "loss": 1.4830021858215332, + "step": 1453 + }, + { + "epoch": 1.79064039408867, + "grad_norm": 14.927464924948287, + "learning_rate": 4.193772482821914e-06, + "loss": 2.5529747009277344, + "step": 1454 + }, + { + "epoch": 1.791871921182266, + "grad_norm": 10.342903175989482, + "learning_rate": 4.186701044118459e-06, + "loss": 1.413874626159668, + "step": 1455 + }, + { + "epoch": 1.793103448275862, + "grad_norm": 25.730295260232445, + "learning_rate": 4.179631276000807e-06, + "loss": 2.1567163467407227, + "step": 1456 + }, + { + "epoch": 1.7943349753694582, + "grad_norm": 30.70195031797357, + "learning_rate": 4.1725631929908684e-06, + "loss": 1.851858139038086, + "step": 1457 + }, + { + "epoch": 1.7955665024630543, + "grad_norm": 15.74317099171368, + "learning_rate": 4.165496809607089e-06, + "loss": 1.2765101194381714, + "step": 1458 + }, + { + "epoch": 1.7967980295566504, + "grad_norm": 10.995413854030392, + "learning_rate": 4.158432140364431e-06, + "loss": 1.9869401454925537, + "step": 1459 + }, + { + "epoch": 1.7980295566502464, + "grad_norm": 14.263851286153963, + "learning_rate": 4.151369199774325e-06, + "loss": 1.5319430828094482, + "step": 1460 + }, + { + "epoch": 1.7992610837438425, + "grad_norm": 10.506976676212952, + "learning_rate": 4.1443080023446605e-06, + "loss": 1.487468957901001, + "step": 1461 + }, + { + "epoch": 1.8004926108374384, + "grad_norm": 23.04137362584248, + "learning_rate": 4.137248562579742e-06, + "loss": 1.6152423620224, + "step": 1462 + }, + { + "epoch": 1.8017241379310345, + "grad_norm": 8.431434363474125, + "learning_rate": 4.130190894980262e-06, + "loss": 1.5262070894241333, + "step": 1463 + }, + { + "epoch": 1.8029556650246306, + "grad_norm": 9.129193697661835, + "learning_rate": 4.123135014043279e-06, + "loss": 1.6697289943695068, + "step": 1464 + }, + { + "epoch": 1.8041871921182266, + "grad_norm": 14.310350877734502, + "learning_rate": 4.116080934262175e-06, + "loss": 1.470789909362793, + "step": 1465 + }, + { + "epoch": 1.8054187192118225, + "grad_norm": 10.462627135626132, + "learning_rate": 4.109028670126635e-06, + "loss": 1.62421715259552, + "step": 1466 + }, + { + "epoch": 1.8066502463054186, + "grad_norm": 9.463272161807932, + "learning_rate": 4.101978236122613e-06, + "loss": 2.1249561309814453, + "step": 1467 + }, + { + "epoch": 1.8078817733990147, + "grad_norm": 10.291280772031216, + "learning_rate": 4.094929646732309e-06, + "loss": 1.3368217945098877, + "step": 1468 + }, + { + "epoch": 1.8091133004926108, + "grad_norm": 13.897028873169491, + "learning_rate": 4.087882916434126e-06, + "loss": 0.8684915900230408, + "step": 1469 + }, + { + "epoch": 1.8103448275862069, + "grad_norm": 9.114980502172534, + "learning_rate": 4.080838059702656e-06, + "loss": 1.6997764110565186, + "step": 1470 + }, + { + "epoch": 1.811576354679803, + "grad_norm": 15.00723435129453, + "learning_rate": 4.0737950910086354e-06, + "loss": 0.8933043479919434, + "step": 1471 + }, + { + "epoch": 1.812807881773399, + "grad_norm": 8.849165431721978, + "learning_rate": 4.0667540248189265e-06, + "loss": 1.689558982849121, + "step": 1472 + }, + { + "epoch": 1.814039408866995, + "grad_norm": 8.28022241305891, + "learning_rate": 4.059714875596486e-06, + "loss": 1.797630786895752, + "step": 1473 + }, + { + "epoch": 1.8152709359605912, + "grad_norm": 8.44088037241126, + "learning_rate": 4.052677657800327e-06, + "loss": 2.023120164871216, + "step": 1474 + }, + { + "epoch": 1.8165024630541873, + "grad_norm": 13.31766346957086, + "learning_rate": 4.045642385885497e-06, + "loss": 1.5412349700927734, + "step": 1475 + }, + { + "epoch": 1.8177339901477834, + "grad_norm": 11.713991741569846, + "learning_rate": 4.038609074303055e-06, + "loss": 0.786411464214325, + "step": 1476 + }, + { + "epoch": 1.8189655172413794, + "grad_norm": 12.300017528117012, + "learning_rate": 4.0315777375000185e-06, + "loss": 1.3470659255981445, + "step": 1477 + }, + { + "epoch": 1.8201970443349755, + "grad_norm": 10.149728213380525, + "learning_rate": 4.02454838991936e-06, + "loss": 1.3983774185180664, + "step": 1478 + }, + { + "epoch": 1.8214285714285714, + "grad_norm": 8.907879387840488, + "learning_rate": 4.017521045999961e-06, + "loss": 1.9945271015167236, + "step": 1479 + }, + { + "epoch": 1.8226600985221675, + "grad_norm": 14.485464092551117, + "learning_rate": 4.0104957201765874e-06, + "loss": 1.6103991270065308, + "step": 1480 + }, + { + "epoch": 1.8238916256157636, + "grad_norm": 10.17521459795804, + "learning_rate": 4.003472426879866e-06, + "loss": 1.2794644832611084, + "step": 1481 + }, + { + "epoch": 1.8251231527093597, + "grad_norm": 12.76602401465421, + "learning_rate": 3.996451180536237e-06, + "loss": 1.4485671520233154, + "step": 1482 + }, + { + "epoch": 1.8263546798029555, + "grad_norm": 10.794290467835673, + "learning_rate": 3.989431995567947e-06, + "loss": 1.1264885663986206, + "step": 1483 + }, + { + "epoch": 1.8275862068965516, + "grad_norm": 9.866085409894106, + "learning_rate": 3.982414886393002e-06, + "loss": 1.7849301099777222, + "step": 1484 + }, + { + "epoch": 1.8288177339901477, + "grad_norm": 12.201702589426084, + "learning_rate": 3.975399867425146e-06, + "loss": 2.4955849647521973, + "step": 1485 + }, + { + "epoch": 1.8300492610837438, + "grad_norm": 9.102568432625791, + "learning_rate": 3.96838695307383e-06, + "loss": 1.3440265655517578, + "step": 1486 + }, + { + "epoch": 1.8312807881773399, + "grad_norm": 8.145548979456889, + "learning_rate": 3.961376157744183e-06, + "loss": 1.7565090656280518, + "step": 1487 + }, + { + "epoch": 1.832512315270936, + "grad_norm": 10.525904376218351, + "learning_rate": 3.954367495836978e-06, + "loss": 2.086646318435669, + "step": 1488 + }, + { + "epoch": 1.833743842364532, + "grad_norm": 11.110223461103494, + "learning_rate": 3.947360981748607e-06, + "loss": 2.0356874465942383, + "step": 1489 + }, + { + "epoch": 1.8349753694581281, + "grad_norm": 18.648426152647907, + "learning_rate": 3.940356629871051e-06, + "loss": 1.3129501342773438, + "step": 1490 + }, + { + "epoch": 1.8362068965517242, + "grad_norm": 9.730568476467749, + "learning_rate": 3.933354454591851e-06, + "loss": 1.468184471130371, + "step": 1491 + }, + { + "epoch": 1.8374384236453203, + "grad_norm": 11.185413004826554, + "learning_rate": 3.926354470294077e-06, + "loss": 1.4110320806503296, + "step": 1492 + }, + { + "epoch": 1.8386699507389164, + "grad_norm": 12.98897769174535, + "learning_rate": 3.9193566913562915e-06, + "loss": 1.0595703125, + "step": 1493 + }, + { + "epoch": 1.8399014778325125, + "grad_norm": 10.530840377449582, + "learning_rate": 3.912361132152537e-06, + "loss": 1.628462791442871, + "step": 1494 + }, + { + "epoch": 1.8411330049261085, + "grad_norm": 14.948049661995398, + "learning_rate": 3.9053678070522904e-06, + "loss": 1.3903121948242188, + "step": 1495 + }, + { + "epoch": 1.8423645320197044, + "grad_norm": 9.309801488918017, + "learning_rate": 3.898376730420442e-06, + "loss": 1.6935603618621826, + "step": 1496 + }, + { + "epoch": 1.8435960591133005, + "grad_norm": 12.543386647265335, + "learning_rate": 3.891387916617261e-06, + "loss": 1.2785383462905884, + "step": 1497 + }, + { + "epoch": 1.8448275862068966, + "grad_norm": 16.302631057977127, + "learning_rate": 3.884401379998375e-06, + "loss": 0.9488393068313599, + "step": 1498 + }, + { + "epoch": 1.8460591133004927, + "grad_norm": 13.324215983939714, + "learning_rate": 3.877417134914724e-06, + "loss": 1.7822269201278687, + "step": 1499 + }, + { + "epoch": 1.8472906403940885, + "grad_norm": 18.86267601616338, + "learning_rate": 3.870435195712547e-06, + "loss": 2.0112462043762207, + "step": 1500 + }, + { + "epoch": 1.8485221674876846, + "grad_norm": 9.69652966834403, + "learning_rate": 3.863455576733349e-06, + "loss": 1.3558632135391235, + "step": 1501 + }, + { + "epoch": 1.8497536945812807, + "grad_norm": 11.295411751598015, + "learning_rate": 3.856478292313864e-06, + "loss": 1.34049391746521, + "step": 1502 + }, + { + "epoch": 1.8509852216748768, + "grad_norm": 14.146066291430358, + "learning_rate": 3.849503356786034e-06, + "loss": 1.5048649311065674, + "step": 1503 + }, + { + "epoch": 1.8522167487684729, + "grad_norm": 15.401780869737596, + "learning_rate": 3.842530784476971e-06, + "loss": 1.595820426940918, + "step": 1504 + }, + { + "epoch": 1.853448275862069, + "grad_norm": 14.910425010360937, + "learning_rate": 3.83556058970894e-06, + "loss": 1.4003782272338867, + "step": 1505 + }, + { + "epoch": 1.854679802955665, + "grad_norm": 7.9611824961674476, + "learning_rate": 3.828592786799318e-06, + "loss": 1.6082279682159424, + "step": 1506 + }, + { + "epoch": 1.8559113300492611, + "grad_norm": 10.255592390028927, + "learning_rate": 3.821627390060568e-06, + "loss": 1.7311087846755981, + "step": 1507 + }, + { + "epoch": 1.8571428571428572, + "grad_norm": 12.058780526558753, + "learning_rate": 3.8146644138002154e-06, + "loss": 1.2369680404663086, + "step": 1508 + }, + { + "epoch": 1.8583743842364533, + "grad_norm": 19.050247314658538, + "learning_rate": 3.807703872320809e-06, + "loss": 0.8267203569412231, + "step": 1509 + }, + { + "epoch": 1.8596059113300494, + "grad_norm": 10.351521057178017, + "learning_rate": 3.8007457799198977e-06, + "loss": 1.310041904449463, + "step": 1510 + }, + { + "epoch": 1.8608374384236455, + "grad_norm": 10.657442856658305, + "learning_rate": 3.79379015089e-06, + "loss": 1.483811378479004, + "step": 1511 + }, + { + "epoch": 1.8620689655172413, + "grad_norm": 11.888669790205059, + "learning_rate": 3.7868369995185734e-06, + "loss": 1.7339284420013428, + "step": 1512 + }, + { + "epoch": 1.8633004926108374, + "grad_norm": 10.593168183344854, + "learning_rate": 3.7798863400879894e-06, + "loss": 0.8915985822677612, + "step": 1513 + }, + { + "epoch": 1.8645320197044335, + "grad_norm": 10.734489115549072, + "learning_rate": 3.7729381868754985e-06, + "loss": 2.3413619995117188, + "step": 1514 + }, + { + "epoch": 1.8657635467980296, + "grad_norm": 9.967376867351366, + "learning_rate": 3.7659925541532006e-06, + "loss": 1.422214388847351, + "step": 1515 + }, + { + "epoch": 1.8669950738916257, + "grad_norm": 9.453365529159266, + "learning_rate": 3.759049456188022e-06, + "loss": 1.435701847076416, + "step": 1516 + }, + { + "epoch": 1.8682266009852215, + "grad_norm": 13.939960554468646, + "learning_rate": 3.752108907241682e-06, + "loss": 1.0702649354934692, + "step": 1517 + }, + { + "epoch": 1.8694581280788176, + "grad_norm": 14.375834204057075, + "learning_rate": 3.7451709215706643e-06, + "loss": 1.3625175952911377, + "step": 1518 + }, + { + "epoch": 1.8706896551724137, + "grad_norm": 14.38912976471083, + "learning_rate": 3.738235513426184e-06, + "loss": 0.6707335710525513, + "step": 1519 + }, + { + "epoch": 1.8719211822660098, + "grad_norm": 6.68307140655082, + "learning_rate": 3.7313026970541687e-06, + "loss": 0.9573410749435425, + "step": 1520 + }, + { + "epoch": 1.8731527093596059, + "grad_norm": 8.282620378739653, + "learning_rate": 3.7243724866952114e-06, + "loss": 1.625769853591919, + "step": 1521 + }, + { + "epoch": 1.874384236453202, + "grad_norm": 12.4684771792282, + "learning_rate": 3.717444896584562e-06, + "loss": 1.2327096462249756, + "step": 1522 + }, + { + "epoch": 1.875615763546798, + "grad_norm": 13.733071586817578, + "learning_rate": 3.710519940952085e-06, + "loss": 1.9436770677566528, + "step": 1523 + }, + { + "epoch": 1.8768472906403941, + "grad_norm": 11.428790282383929, + "learning_rate": 3.703597634022232e-06, + "loss": 1.260964274406433, + "step": 1524 + }, + { + "epoch": 1.8780788177339902, + "grad_norm": 10.74418094547702, + "learning_rate": 3.6966779900140193e-06, + "loss": 0.9448941946029663, + "step": 1525 + }, + { + "epoch": 1.8793103448275863, + "grad_norm": 14.784266967626037, + "learning_rate": 3.689761023140981e-06, + "loss": 1.0470240116119385, + "step": 1526 + }, + { + "epoch": 1.8805418719211824, + "grad_norm": 12.626289871406675, + "learning_rate": 3.6828467476111664e-06, + "loss": 1.290519118309021, + "step": 1527 + }, + { + "epoch": 1.8817733990147785, + "grad_norm": 8.368189133022403, + "learning_rate": 3.675935177627088e-06, + "loss": 1.6617997884750366, + "step": 1528 + }, + { + "epoch": 1.8830049261083743, + "grad_norm": 22.331563820583295, + "learning_rate": 3.6690263273857035e-06, + "loss": 2.624133825302124, + "step": 1529 + }, + { + "epoch": 1.8842364532019704, + "grad_norm": 11.125845605261798, + "learning_rate": 3.662120211078385e-06, + "loss": 1.189339518547058, + "step": 1530 + }, + { + "epoch": 1.8854679802955665, + "grad_norm": 11.063623504952298, + "learning_rate": 3.6552168428908886e-06, + "loss": 1.2045223712921143, + "step": 1531 + }, + { + "epoch": 1.8866995073891626, + "grad_norm": 21.05973901513674, + "learning_rate": 3.648316237003321e-06, + "loss": 1.4260770082473755, + "step": 1532 + }, + { + "epoch": 1.8879310344827587, + "grad_norm": 9.70528654459795, + "learning_rate": 3.6414184075901206e-06, + "loss": 1.1973135471343994, + "step": 1533 + }, + { + "epoch": 1.8891625615763545, + "grad_norm": 18.383885319550775, + "learning_rate": 3.6345233688200195e-06, + "loss": 1.4474105834960938, + "step": 1534 + }, + { + "epoch": 1.8903940886699506, + "grad_norm": 9.565993696711384, + "learning_rate": 3.62763113485602e-06, + "loss": 1.5732392072677612, + "step": 1535 + }, + { + "epoch": 1.8916256157635467, + "grad_norm": 18.830417927799424, + "learning_rate": 3.6207417198553624e-06, + "loss": 1.992612361907959, + "step": 1536 + }, + { + "epoch": 1.8928571428571428, + "grad_norm": 8.528733872408509, + "learning_rate": 3.6138551379694936e-06, + "loss": 1.8015589714050293, + "step": 1537 + }, + { + "epoch": 1.8940886699507389, + "grad_norm": 20.045548838222032, + "learning_rate": 3.606971403344044e-06, + "loss": 1.1887943744659424, + "step": 1538 + }, + { + "epoch": 1.895320197044335, + "grad_norm": 8.574686397942823, + "learning_rate": 3.6000905301187953e-06, + "loss": 1.035568118095398, + "step": 1539 + }, + { + "epoch": 1.896551724137931, + "grad_norm": 8.862677959647126, + "learning_rate": 3.5932125324276524e-06, + "loss": 1.8441094160079956, + "step": 1540 + }, + { + "epoch": 1.8977832512315271, + "grad_norm": 21.317551937175974, + "learning_rate": 3.586337424398609e-06, + "loss": 2.7305843830108643, + "step": 1541 + }, + { + "epoch": 1.8990147783251232, + "grad_norm": 12.092619936908829, + "learning_rate": 3.579465220153733e-06, + "loss": 2.1233139038085938, + "step": 1542 + }, + { + "epoch": 1.9002463054187193, + "grad_norm": 11.705206958955536, + "learning_rate": 3.5725959338091133e-06, + "loss": 1.232177495956421, + "step": 1543 + }, + { + "epoch": 1.9014778325123154, + "grad_norm": 7.174113743881224, + "learning_rate": 3.565729579474858e-06, + "loss": 1.89857017993927, + "step": 1544 + }, + { + "epoch": 1.9027093596059115, + "grad_norm": 15.788866110425763, + "learning_rate": 3.5588661712550464e-06, + "loss": 1.1281499862670898, + "step": 1545 + }, + { + "epoch": 1.9039408866995073, + "grad_norm": 10.470956040036935, + "learning_rate": 3.5520057232477073e-06, + "loss": 1.2526335716247559, + "step": 1546 + }, + { + "epoch": 1.9051724137931034, + "grad_norm": 9.301464059536526, + "learning_rate": 3.545148249544793e-06, + "loss": 1.8187229633331299, + "step": 1547 + }, + { + "epoch": 1.9064039408866995, + "grad_norm": 9.75451095353705, + "learning_rate": 3.5382937642321356e-06, + "loss": 2.5140726566314697, + "step": 1548 + }, + { + "epoch": 1.9076354679802956, + "grad_norm": 12.829934813861579, + "learning_rate": 3.5314422813894413e-06, + "loss": 1.4403750896453857, + "step": 1549 + }, + { + "epoch": 1.9088669950738915, + "grad_norm": 16.531679337353626, + "learning_rate": 3.524593815090241e-06, + "loss": 2.1372480392456055, + "step": 1550 + }, + { + "epoch": 1.9100985221674875, + "grad_norm": 15.674375359336546, + "learning_rate": 3.517748379401872e-06, + "loss": 1.3283928632736206, + "step": 1551 + }, + { + "epoch": 1.9113300492610836, + "grad_norm": 18.1169052598084, + "learning_rate": 3.510905988385449e-06, + "loss": 0.915777325630188, + "step": 1552 + }, + { + "epoch": 1.9125615763546797, + "grad_norm": 9.21207861248202, + "learning_rate": 3.5040666560958246e-06, + "loss": 1.4235864877700806, + "step": 1553 + }, + { + "epoch": 1.9137931034482758, + "grad_norm": 10.331880853016509, + "learning_rate": 3.497230396581579e-06, + "loss": 1.0727063417434692, + "step": 1554 + }, + { + "epoch": 1.9150246305418719, + "grad_norm": 6.2183233261424675, + "learning_rate": 3.4903972238849727e-06, + "loss": 1.2492493391036987, + "step": 1555 + }, + { + "epoch": 1.916256157635468, + "grad_norm": 8.689347093090742, + "learning_rate": 3.483567152041928e-06, + "loss": 1.855743408203125, + "step": 1556 + }, + { + "epoch": 1.917487684729064, + "grad_norm": 13.400775432098582, + "learning_rate": 3.4767401950820003e-06, + "loss": 1.2882115840911865, + "step": 1557 + }, + { + "epoch": 1.9187192118226601, + "grad_norm": 17.24953530796186, + "learning_rate": 3.469916367028345e-06, + "loss": 1.0586508512496948, + "step": 1558 + }, + { + "epoch": 1.9199507389162562, + "grad_norm": 7.936641918837841, + "learning_rate": 3.4630956818976875e-06, + "loss": 1.6678158044815063, + "step": 1559 + }, + { + "epoch": 1.9211822660098523, + "grad_norm": 7.533268622313887, + "learning_rate": 3.4562781537003e-06, + "loss": 1.242276906967163, + "step": 1560 + }, + { + "epoch": 1.9224137931034484, + "grad_norm": 11.64160436044446, + "learning_rate": 3.4494637964399723e-06, + "loss": 1.1909584999084473, + "step": 1561 + }, + { + "epoch": 1.9236453201970445, + "grad_norm": 10.255728334199201, + "learning_rate": 3.4426526241139778e-06, + "loss": 1.7636524438858032, + "step": 1562 + }, + { + "epoch": 1.9248768472906403, + "grad_norm": 9.49054957516609, + "learning_rate": 3.4358446507130503e-06, + "loss": 1.709825873374939, + "step": 1563 + }, + { + "epoch": 1.9261083743842364, + "grad_norm": 10.818350574028944, + "learning_rate": 3.4290398902213473e-06, + "loss": 1.0826925039291382, + "step": 1564 + }, + { + "epoch": 1.9273399014778325, + "grad_norm": 8.939498431984473, + "learning_rate": 3.4222383566164314e-06, + "loss": 1.2868252992630005, + "step": 1565 + }, + { + "epoch": 1.9285714285714286, + "grad_norm": 8.295112275795647, + "learning_rate": 3.4154400638692376e-06, + "loss": 1.9238274097442627, + "step": 1566 + }, + { + "epoch": 1.9298029556650245, + "grad_norm": 15.317456416232107, + "learning_rate": 3.408645025944042e-06, + "loss": 1.615818977355957, + "step": 1567 + }, + { + "epoch": 1.9310344827586206, + "grad_norm": 10.763654992556582, + "learning_rate": 3.4018532567984326e-06, + "loss": 1.124712586402893, + "step": 1568 + }, + { + "epoch": 1.9322660098522166, + "grad_norm": 12.365184508586257, + "learning_rate": 3.3950647703832907e-06, + "loss": 1.0411077737808228, + "step": 1569 + }, + { + "epoch": 1.9334975369458127, + "grad_norm": 12.632249203055522, + "learning_rate": 3.3882795806427437e-06, + "loss": 1.4247188568115234, + "step": 1570 + }, + { + "epoch": 1.9347290640394088, + "grad_norm": 9.103913844192295, + "learning_rate": 3.3814977015141576e-06, + "loss": 1.9558757543563843, + "step": 1571 + }, + { + "epoch": 1.935960591133005, + "grad_norm": 13.783502778663575, + "learning_rate": 3.3747191469280917e-06, + "loss": 1.4765770435333252, + "step": 1572 + }, + { + "epoch": 1.937192118226601, + "grad_norm": 12.11586545643866, + "learning_rate": 3.3679439308082777e-06, + "loss": 1.2025914192199707, + "step": 1573 + }, + { + "epoch": 1.938423645320197, + "grad_norm": 8.389746847537833, + "learning_rate": 3.361172067071595e-06, + "loss": 1.938293695449829, + "step": 1574 + }, + { + "epoch": 1.9396551724137931, + "grad_norm": 24.18653835255333, + "learning_rate": 3.3544035696280264e-06, + "loss": 1.9626538753509521, + "step": 1575 + }, + { + "epoch": 1.9408866995073892, + "grad_norm": 16.707227251461827, + "learning_rate": 3.34763845238065e-06, + "loss": 2.4771430492401123, + "step": 1576 + }, + { + "epoch": 1.9421182266009853, + "grad_norm": 9.24643762447737, + "learning_rate": 3.340876729225595e-06, + "loss": 1.5694981813430786, + "step": 1577 + }, + { + "epoch": 1.9433497536945814, + "grad_norm": 12.976086056891674, + "learning_rate": 3.334118414052021e-06, + "loss": 1.3358147144317627, + "step": 1578 + }, + { + "epoch": 1.9445812807881775, + "grad_norm": 10.05009781073385, + "learning_rate": 3.327363520742087e-06, + "loss": 1.6929140090942383, + "step": 1579 + }, + { + "epoch": 1.9458128078817734, + "grad_norm": 14.460477433027636, + "learning_rate": 3.320612063170926e-06, + "loss": 1.1454588174819946, + "step": 1580 + }, + { + "epoch": 1.9470443349753694, + "grad_norm": 15.890241219417488, + "learning_rate": 3.313864055206607e-06, + "loss": 1.3037209510803223, + "step": 1581 + }, + { + "epoch": 1.9482758620689655, + "grad_norm": 18.657112628058126, + "learning_rate": 3.3071195107101163e-06, + "loss": 1.2016770839691162, + "step": 1582 + }, + { + "epoch": 1.9495073891625616, + "grad_norm": 8.600208828774889, + "learning_rate": 3.3003784435353304e-06, + "loss": 1.5525718927383423, + "step": 1583 + }, + { + "epoch": 1.9507389162561575, + "grad_norm": 12.025296512404239, + "learning_rate": 3.293640867528978e-06, + "loss": 1.293796420097351, + "step": 1584 + }, + { + "epoch": 1.9519704433497536, + "grad_norm": 14.973626912716192, + "learning_rate": 3.2869067965306178e-06, + "loss": 1.544161081314087, + "step": 1585 + }, + { + "epoch": 1.9532019704433496, + "grad_norm": 12.518775732631475, + "learning_rate": 3.2801762443726087e-06, + "loss": 1.584174633026123, + "step": 1586 + }, + { + "epoch": 1.9544334975369457, + "grad_norm": 9.595940744200961, + "learning_rate": 3.273449224880081e-06, + "loss": 1.4985432624816895, + "step": 1587 + }, + { + "epoch": 1.9556650246305418, + "grad_norm": 14.194278219604545, + "learning_rate": 3.2667257518709124e-06, + "loss": 1.4310071468353271, + "step": 1588 + }, + { + "epoch": 1.956896551724138, + "grad_norm": 6.232251277924355, + "learning_rate": 3.260005839155691e-06, + "loss": 1.2174272537231445, + "step": 1589 + }, + { + "epoch": 1.958128078817734, + "grad_norm": 8.206207570805137, + "learning_rate": 3.2532895005376943e-06, + "loss": 1.4618067741394043, + "step": 1590 + }, + { + "epoch": 1.95935960591133, + "grad_norm": 9.028580710101858, + "learning_rate": 3.2465767498128596e-06, + "loss": 1.2786412239074707, + "step": 1591 + }, + { + "epoch": 1.9605911330049262, + "grad_norm": 14.53956960212149, + "learning_rate": 3.2398676007697495e-06, + "loss": 1.152226209640503, + "step": 1592 + }, + { + "epoch": 1.9618226600985222, + "grad_norm": 9.573027989064228, + "learning_rate": 3.233162067189533e-06, + "loss": 1.8345131874084473, + "step": 1593 + }, + { + "epoch": 1.9630541871921183, + "grad_norm": 12.386896406400556, + "learning_rate": 3.2264601628459513e-06, + "loss": 1.310433030128479, + "step": 1594 + }, + { + "epoch": 1.9642857142857144, + "grad_norm": 18.010952199354442, + "learning_rate": 3.2197619015052893e-06, + "loss": 2.3967676162719727, + "step": 1595 + }, + { + "epoch": 1.9655172413793105, + "grad_norm": 8.956387198130372, + "learning_rate": 3.2130672969263543e-06, + "loss": 1.7937273979187012, + "step": 1596 + }, + { + "epoch": 1.9667487684729064, + "grad_norm": 8.393117465017726, + "learning_rate": 3.206376362860432e-06, + "loss": 2.0265514850616455, + "step": 1597 + }, + { + "epoch": 1.9679802955665024, + "grad_norm": 21.13089299468655, + "learning_rate": 3.1996891130512796e-06, + "loss": 1.9514051675796509, + "step": 1598 + }, + { + "epoch": 1.9692118226600985, + "grad_norm": 13.738115707885685, + "learning_rate": 3.1930055612350795e-06, + "loss": 1.4068338871002197, + "step": 1599 + }, + { + "epoch": 1.9704433497536946, + "grad_norm": 11.875525005970715, + "learning_rate": 3.18632572114042e-06, + "loss": 1.9438577890396118, + "step": 1600 + }, + { + "epoch": 1.9716748768472905, + "grad_norm": 12.6800038807384, + "learning_rate": 3.1796496064882677e-06, + "loss": 1.432902455329895, + "step": 1601 + }, + { + "epoch": 1.9729064039408866, + "grad_norm": 10.748520734517344, + "learning_rate": 3.172977230991935e-06, + "loss": 1.6505646705627441, + "step": 1602 + }, + { + "epoch": 1.9741379310344827, + "grad_norm": 9.807738223531803, + "learning_rate": 3.1663086083570493e-06, + "loss": 2.332062005996704, + "step": 1603 + }, + { + "epoch": 1.9753694581280787, + "grad_norm": 7.777919459923873, + "learning_rate": 3.159643752281536e-06, + "loss": 1.737352967262268, + "step": 1604 + }, + { + "epoch": 1.9766009852216748, + "grad_norm": 12.828820681008972, + "learning_rate": 3.152982676455581e-06, + "loss": 1.5183820724487305, + "step": 1605 + }, + { + "epoch": 1.977832512315271, + "grad_norm": 12.058545370748947, + "learning_rate": 3.1463253945616056e-06, + "loss": 1.5560420751571655, + "step": 1606 + }, + { + "epoch": 1.979064039408867, + "grad_norm": 12.080370196486308, + "learning_rate": 3.1396719202742375e-06, + "loss": 2.2159786224365234, + "step": 1607 + }, + { + "epoch": 1.980295566502463, + "grad_norm": 11.349700550180101, + "learning_rate": 3.133022267260283e-06, + "loss": 3.4431471824645996, + "step": 1608 + }, + { + "epoch": 1.9815270935960592, + "grad_norm": 15.960971258656029, + "learning_rate": 3.1263764491786984e-06, + "loss": 1.0674099922180176, + "step": 1609 + }, + { + "epoch": 1.9827586206896552, + "grad_norm": 10.915353003367029, + "learning_rate": 3.1197344796805675e-06, + "loss": 1.2427492141723633, + "step": 1610 + }, + { + "epoch": 1.9839901477832513, + "grad_norm": 13.554860694250717, + "learning_rate": 3.1130963724090626e-06, + "loss": 1.5895799398422241, + "step": 1611 + }, + { + "epoch": 1.9852216748768474, + "grad_norm": 8.558375384118374, + "learning_rate": 3.1064621409994245e-06, + "loss": 1.3781355619430542, + "step": 1612 + }, + { + "epoch": 1.9864532019704435, + "grad_norm": 17.36928034840775, + "learning_rate": 3.0998317990789378e-06, + "loss": 1.3307732343673706, + "step": 1613 + }, + { + "epoch": 1.9876847290640394, + "grad_norm": 13.9784605520041, + "learning_rate": 3.0932053602668876e-06, + "loss": 1.340241551399231, + "step": 1614 + }, + { + "epoch": 1.9889162561576355, + "grad_norm": 9.756766918680166, + "learning_rate": 3.0865828381745515e-06, + "loss": 1.5866634845733643, + "step": 1615 + }, + { + "epoch": 1.9901477832512315, + "grad_norm": 14.514845100981475, + "learning_rate": 3.0799642464051573e-06, + "loss": 1.363608717918396, + "step": 1616 + }, + { + "epoch": 1.9913793103448276, + "grad_norm": 13.803723137880525, + "learning_rate": 3.0733495985538575e-06, + "loss": 0.8918144106864929, + "step": 1617 + }, + { + "epoch": 1.9926108374384235, + "grad_norm": 18.044340986569775, + "learning_rate": 3.0667389082077114e-06, + "loss": 1.4538538455963135, + "step": 1618 + }, + { + "epoch": 1.9938423645320196, + "grad_norm": 11.435301654271841, + "learning_rate": 3.0601321889456378e-06, + "loss": 1.6913137435913086, + "step": 1619 + }, + { + "epoch": 1.9950738916256157, + "grad_norm": 9.858778951797417, + "learning_rate": 3.0535294543384074e-06, + "loss": 1.4266109466552734, + "step": 1620 + }, + { + "epoch": 1.9963054187192117, + "grad_norm": 22.051543439765215, + "learning_rate": 3.046930717948604e-06, + "loss": 1.2479441165924072, + "step": 1621 + }, + { + "epoch": 1.9975369458128078, + "grad_norm": 9.286359312990374, + "learning_rate": 3.0403359933305965e-06, + "loss": 2.138500213623047, + "step": 1622 + }, + { + "epoch": 1.998768472906404, + "grad_norm": 7.759425069440999, + "learning_rate": 3.033745294030517e-06, + "loss": 1.7762420177459717, + "step": 1623 + }, + { + "epoch": 2.0, + "grad_norm": 16.72677410836059, + "learning_rate": 3.0271586335862258e-06, + "loss": 0.858219563961029, + "step": 1624 + }, + { + "epoch": 2.001231527093596, + "grad_norm": 14.643925249137768, + "learning_rate": 3.0205760255272874e-06, + "loss": 0.5493918657302856, + "step": 1625 + }, + { + "epoch": 2.002463054187192, + "grad_norm": 6.249448248328766, + "learning_rate": 3.013997483374944e-06, + "loss": 0.25155016779899597, + "step": 1626 + }, + { + "epoch": 2.0036945812807883, + "grad_norm": 12.443278487913815, + "learning_rate": 3.007423020642084e-06, + "loss": 0.7727752923965454, + "step": 1627 + }, + { + "epoch": 2.0049261083743843, + "grad_norm": 8.331944645794822, + "learning_rate": 3.0008526508332216e-06, + "loss": 0.43595510721206665, + "step": 1628 + }, + { + "epoch": 2.0061576354679804, + "grad_norm": 12.199248861649188, + "learning_rate": 2.9942863874444565e-06, + "loss": 0.3856297433376312, + "step": 1629 + }, + { + "epoch": 2.0073891625615765, + "grad_norm": 10.194964984786639, + "learning_rate": 2.987724243963458e-06, + "loss": 0.8458558917045593, + "step": 1630 + }, + { + "epoch": 2.0086206896551726, + "grad_norm": 10.400619109316716, + "learning_rate": 2.981166233869429e-06, + "loss": 0.46873772144317627, + "step": 1631 + }, + { + "epoch": 2.0098522167487687, + "grad_norm": 7.542731982064387, + "learning_rate": 2.9746123706330886e-06, + "loss": 0.42779290676116943, + "step": 1632 + }, + { + "epoch": 2.0110837438423643, + "grad_norm": 9.375159014521008, + "learning_rate": 2.9680626677166324e-06, + "loss": 0.627717912197113, + "step": 1633 + }, + { + "epoch": 2.0123152709359604, + "grad_norm": 7.3118642493157155, + "learning_rate": 2.9615171385737107e-06, + "loss": 1.0879265069961548, + "step": 1634 + }, + { + "epoch": 2.0135467980295565, + "grad_norm": 10.467281128404773, + "learning_rate": 2.9549757966494053e-06, + "loss": 0.6282559037208557, + "step": 1635 + }, + { + "epoch": 2.0147783251231526, + "grad_norm": 11.126192184454366, + "learning_rate": 2.9484386553801875e-06, + "loss": 0.5774171352386475, + "step": 1636 + }, + { + "epoch": 2.0160098522167487, + "grad_norm": 10.360450434232337, + "learning_rate": 2.9419057281939106e-06, + "loss": 0.38788995146751404, + "step": 1637 + }, + { + "epoch": 2.0172413793103448, + "grad_norm": 13.340772113855921, + "learning_rate": 2.935377028509766e-06, + "loss": 1.1726861000061035, + "step": 1638 + }, + { + "epoch": 2.018472906403941, + "grad_norm": 9.74656398362734, + "learning_rate": 2.9288525697382623e-06, + "loss": 0.7854858636856079, + "step": 1639 + }, + { + "epoch": 2.019704433497537, + "grad_norm": 11.086797967435993, + "learning_rate": 2.922332365281201e-06, + "loss": 0.25507253408432007, + "step": 1640 + }, + { + "epoch": 2.020935960591133, + "grad_norm": 13.738902835067712, + "learning_rate": 2.9158164285316356e-06, + "loss": 0.5835862755775452, + "step": 1641 + }, + { + "epoch": 2.022167487684729, + "grad_norm": 12.908512466729006, + "learning_rate": 2.9093047728738604e-06, + "loss": 0.49123138189315796, + "step": 1642 + }, + { + "epoch": 2.023399014778325, + "grad_norm": 6.708189349635942, + "learning_rate": 2.9027974116833756e-06, + "loss": 0.20273317396640778, + "step": 1643 + }, + { + "epoch": 2.0246305418719213, + "grad_norm": 12.517783768989945, + "learning_rate": 2.896294358326862e-06, + "loss": 0.46980565786361694, + "step": 1644 + }, + { + "epoch": 2.0258620689655173, + "grad_norm": 12.98671748044912, + "learning_rate": 2.889795626162143e-06, + "loss": 0.23243547976016998, + "step": 1645 + }, + { + "epoch": 2.0270935960591134, + "grad_norm": 21.52509717224934, + "learning_rate": 2.883301228538178e-06, + "loss": 1.3259830474853516, + "step": 1646 + }, + { + "epoch": 2.0283251231527095, + "grad_norm": 10.539113199927511, + "learning_rate": 2.8768111787950105e-06, + "loss": 0.3021068274974823, + "step": 1647 + }, + { + "epoch": 2.0295566502463056, + "grad_norm": 9.17401806944997, + "learning_rate": 2.8703254902637646e-06, + "loss": 0.3854427933692932, + "step": 1648 + }, + { + "epoch": 2.0307881773399017, + "grad_norm": 14.201306893364228, + "learning_rate": 2.8638441762665957e-06, + "loss": 0.3356427848339081, + "step": 1649 + }, + { + "epoch": 2.0320197044334973, + "grad_norm": 17.83956908779597, + "learning_rate": 2.857367250116682e-06, + "loss": 0.4785861372947693, + "step": 1650 + }, + { + "epoch": 2.0332512315270934, + "grad_norm": 7.19305688493566, + "learning_rate": 2.8508947251181885e-06, + "loss": 0.1944020539522171, + "step": 1651 + }, + { + "epoch": 2.0344827586206895, + "grad_norm": 10.046970652926046, + "learning_rate": 2.8444266145662284e-06, + "loss": 0.29677248001098633, + "step": 1652 + }, + { + "epoch": 2.0357142857142856, + "grad_norm": 24.647186410998657, + "learning_rate": 2.8379629317468604e-06, + "loss": 1.517862319946289, + "step": 1653 + }, + { + "epoch": 2.0369458128078817, + "grad_norm": 13.23680169167266, + "learning_rate": 2.8315036899370442e-06, + "loss": 0.5191118717193604, + "step": 1654 + }, + { + "epoch": 2.0381773399014778, + "grad_norm": 13.059908687808356, + "learning_rate": 2.825048902404612e-06, + "loss": 0.42354950308799744, + "step": 1655 + }, + { + "epoch": 2.039408866995074, + "grad_norm": 12.282344754345834, + "learning_rate": 2.818598582408255e-06, + "loss": 0.6974557638168335, + "step": 1656 + }, + { + "epoch": 2.04064039408867, + "grad_norm": 11.678426390945974, + "learning_rate": 2.8121527431974838e-06, + "loss": 0.8337801694869995, + "step": 1657 + }, + { + "epoch": 2.041871921182266, + "grad_norm": 11.653625925472546, + "learning_rate": 2.805711398012604e-06, + "loss": 0.48300114274024963, + "step": 1658 + }, + { + "epoch": 2.043103448275862, + "grad_norm": 8.699921165351283, + "learning_rate": 2.799274560084688e-06, + "loss": 0.2231900542974472, + "step": 1659 + }, + { + "epoch": 2.044334975369458, + "grad_norm": 11.080926890704283, + "learning_rate": 2.7928422426355554e-06, + "loss": 0.7431713342666626, + "step": 1660 + }, + { + "epoch": 2.0455665024630543, + "grad_norm": 10.18242138749306, + "learning_rate": 2.7864144588777403e-06, + "loss": 0.5905585289001465, + "step": 1661 + }, + { + "epoch": 2.0467980295566504, + "grad_norm": 12.79007023215843, + "learning_rate": 2.779991222014459e-06, + "loss": 0.5379045009613037, + "step": 1662 + }, + { + "epoch": 2.0480295566502464, + "grad_norm": 10.204627357114346, + "learning_rate": 2.77357254523959e-06, + "loss": 0.4073173403739929, + "step": 1663 + }, + { + "epoch": 2.0492610837438425, + "grad_norm": 16.54029756463169, + "learning_rate": 2.767158441737646e-06, + "loss": 0.37792834639549255, + "step": 1664 + }, + { + "epoch": 2.0504926108374386, + "grad_norm": 12.199606214048373, + "learning_rate": 2.7607489246837505e-06, + "loss": 0.5250200629234314, + "step": 1665 + }, + { + "epoch": 2.0517241379310347, + "grad_norm": 15.23569807667072, + "learning_rate": 2.754344007243594e-06, + "loss": 0.7716425061225891, + "step": 1666 + }, + { + "epoch": 2.0529556650246303, + "grad_norm": 7.925817755895629, + "learning_rate": 2.74794370257343e-06, + "loss": 0.6505113244056702, + "step": 1667 + }, + { + "epoch": 2.0541871921182264, + "grad_norm": 13.232372975936459, + "learning_rate": 2.741548023820037e-06, + "loss": 1.237591028213501, + "step": 1668 + }, + { + "epoch": 2.0554187192118225, + "grad_norm": 7.821194651549222, + "learning_rate": 2.7351569841206792e-06, + "loss": 0.33151859045028687, + "step": 1669 + }, + { + "epoch": 2.0566502463054186, + "grad_norm": 9.91473906287112, + "learning_rate": 2.728770596603105e-06, + "loss": 0.42522889375686646, + "step": 1670 + }, + { + "epoch": 2.0578817733990147, + "grad_norm": 10.678926533172987, + "learning_rate": 2.722388874385503e-06, + "loss": 0.3359280824661255, + "step": 1671 + }, + { + "epoch": 2.0591133004926108, + "grad_norm": 9.193563725792906, + "learning_rate": 2.716011830576475e-06, + "loss": 0.23182198405265808, + "step": 1672 + }, + { + "epoch": 2.060344827586207, + "grad_norm": 13.12855060675622, + "learning_rate": 2.7096394782750186e-06, + "loss": 0.30262982845306396, + "step": 1673 + }, + { + "epoch": 2.061576354679803, + "grad_norm": 7.791350721856929, + "learning_rate": 2.7032718305704887e-06, + "loss": 0.23311859369277954, + "step": 1674 + }, + { + "epoch": 2.062807881773399, + "grad_norm": 12.221292312776084, + "learning_rate": 2.696908900542584e-06, + "loss": 0.6328019499778748, + "step": 1675 + }, + { + "epoch": 2.064039408866995, + "grad_norm": 10.8289045782447, + "learning_rate": 2.690550701261304e-06, + "loss": 0.30473750829696655, + "step": 1676 + }, + { + "epoch": 2.065270935960591, + "grad_norm": 8.921318423622994, + "learning_rate": 2.684197245786938e-06, + "loss": 0.2824372947216034, + "step": 1677 + }, + { + "epoch": 2.0665024630541873, + "grad_norm": 15.101179094698006, + "learning_rate": 2.677848547170029e-06, + "loss": 0.3543265163898468, + "step": 1678 + }, + { + "epoch": 2.0677339901477834, + "grad_norm": 8.79612621311314, + "learning_rate": 2.671504618451348e-06, + "loss": 0.6176484823226929, + "step": 1679 + }, + { + "epoch": 2.0689655172413794, + "grad_norm": 10.985306627235934, + "learning_rate": 2.665165472661866e-06, + "loss": 0.5290611386299133, + "step": 1680 + }, + { + "epoch": 2.0701970443349755, + "grad_norm": 8.398062035832517, + "learning_rate": 2.658831122822735e-06, + "loss": 0.5321454405784607, + "step": 1681 + }, + { + "epoch": 2.0714285714285716, + "grad_norm": 11.540193919775621, + "learning_rate": 2.6525015819452504e-06, + "loss": 0.27902156114578247, + "step": 1682 + }, + { + "epoch": 2.0726600985221673, + "grad_norm": 12.60801369495054, + "learning_rate": 2.6461768630308326e-06, + "loss": 0.46582847833633423, + "step": 1683 + }, + { + "epoch": 2.0738916256157633, + "grad_norm": 15.322116984466021, + "learning_rate": 2.6398569790710007e-06, + "loss": 0.651951014995575, + "step": 1684 + }, + { + "epoch": 2.0751231527093594, + "grad_norm": 9.74038331873093, + "learning_rate": 2.633541943047334e-06, + "loss": 0.36612239480018616, + "step": 1685 + }, + { + "epoch": 2.0763546798029555, + "grad_norm": 7.730903286765135, + "learning_rate": 2.6272317679314573e-06, + "loss": 0.22278031706809998, + "step": 1686 + }, + { + "epoch": 2.0775862068965516, + "grad_norm": 7.781634586207103, + "learning_rate": 2.620926466685013e-06, + "loss": 0.33012956380844116, + "step": 1687 + }, + { + "epoch": 2.0788177339901477, + "grad_norm": 9.397683957095191, + "learning_rate": 2.6146260522596334e-06, + "loss": 0.7396690845489502, + "step": 1688 + }, + { + "epoch": 2.0800492610837438, + "grad_norm": 11.988801603692485, + "learning_rate": 2.608330537596907e-06, + "loss": 0.8257578611373901, + "step": 1689 + }, + { + "epoch": 2.08128078817734, + "grad_norm": 8.855369489146483, + "learning_rate": 2.6020399356283586e-06, + "loss": 0.4538348317146301, + "step": 1690 + }, + { + "epoch": 2.082512315270936, + "grad_norm": 9.991399228257757, + "learning_rate": 2.595754259275428e-06, + "loss": 0.992777943611145, + "step": 1691 + }, + { + "epoch": 2.083743842364532, + "grad_norm": 11.406818947912145, + "learning_rate": 2.589473521449434e-06, + "loss": 0.346379816532135, + "step": 1692 + }, + { + "epoch": 2.084975369458128, + "grad_norm": 18.61665504561422, + "learning_rate": 2.583197735051546e-06, + "loss": 0.4523533284664154, + "step": 1693 + }, + { + "epoch": 2.086206896551724, + "grad_norm": 9.296672908995824, + "learning_rate": 2.576926912972771e-06, + "loss": 0.11842907965183258, + "step": 1694 + }, + { + "epoch": 2.0874384236453203, + "grad_norm": 8.459525770988064, + "learning_rate": 2.5706610680939186e-06, + "loss": 0.381897896528244, + "step": 1695 + }, + { + "epoch": 2.0886699507389164, + "grad_norm": 11.109371262298351, + "learning_rate": 2.564400213285564e-06, + "loss": 0.3824227452278137, + "step": 1696 + }, + { + "epoch": 2.0899014778325125, + "grad_norm": 7.622915250326246, + "learning_rate": 2.5581443614080433e-06, + "loss": 0.4153192639350891, + "step": 1697 + }, + { + "epoch": 2.0911330049261085, + "grad_norm": 12.840140963343943, + "learning_rate": 2.5518935253114153e-06, + "loss": 0.3284783959388733, + "step": 1698 + }, + { + "epoch": 2.0923645320197046, + "grad_norm": 9.586633818986163, + "learning_rate": 2.545647717835428e-06, + "loss": 0.7730638980865479, + "step": 1699 + }, + { + "epoch": 2.0935960591133007, + "grad_norm": 9.329889124511917, + "learning_rate": 2.539406951809512e-06, + "loss": 0.31647253036499023, + "step": 1700 + }, + { + "epoch": 2.0948275862068964, + "grad_norm": 12.004447197114908, + "learning_rate": 2.53317124005273e-06, + "loss": 0.5977708101272583, + "step": 1701 + }, + { + "epoch": 2.0960591133004924, + "grad_norm": 8.69992433934411, + "learning_rate": 2.5269405953737735e-06, + "loss": 0.2646758556365967, + "step": 1702 + }, + { + "epoch": 2.0972906403940885, + "grad_norm": 8.02489022856674, + "learning_rate": 2.5207150305709167e-06, + "loss": 0.5242122411727905, + "step": 1703 + }, + { + "epoch": 2.0985221674876846, + "grad_norm": 13.343080912035035, + "learning_rate": 2.5144945584320056e-06, + "loss": 0.43271976709365845, + "step": 1704 + }, + { + "epoch": 2.0997536945812807, + "grad_norm": 16.386560709178422, + "learning_rate": 2.5082791917344256e-06, + "loss": 0.902009904384613, + "step": 1705 + }, + { + "epoch": 2.100985221674877, + "grad_norm": 8.363747351262921, + "learning_rate": 2.5020689432450706e-06, + "loss": 0.5218071937561035, + "step": 1706 + }, + { + "epoch": 2.102216748768473, + "grad_norm": 13.441523308623053, + "learning_rate": 2.495863825720322e-06, + "loss": 0.7475143671035767, + "step": 1707 + }, + { + "epoch": 2.103448275862069, + "grad_norm": 9.20779623087441, + "learning_rate": 2.4896638519060257e-06, + "loss": 0.31655290722846985, + "step": 1708 + }, + { + "epoch": 2.104679802955665, + "grad_norm": 12.453919142267711, + "learning_rate": 2.4834690345374608e-06, + "loss": 0.30808842182159424, + "step": 1709 + }, + { + "epoch": 2.105911330049261, + "grad_norm": 12.241452294332287, + "learning_rate": 2.477279386339309e-06, + "loss": 0.7037611603736877, + "step": 1710 + }, + { + "epoch": 2.107142857142857, + "grad_norm": 14.091630182879387, + "learning_rate": 2.471094920025644e-06, + "loss": 0.4699273407459259, + "step": 1711 + }, + { + "epoch": 2.1083743842364533, + "grad_norm": 13.920276564221119, + "learning_rate": 2.4649156482998873e-06, + "loss": 0.5032830238342285, + "step": 1712 + }, + { + "epoch": 2.1096059113300494, + "grad_norm": 12.895772980307312, + "learning_rate": 2.45874158385479e-06, + "loss": 1.2563080787658691, + "step": 1713 + }, + { + "epoch": 2.1108374384236455, + "grad_norm": 7.446774906593091, + "learning_rate": 2.4525727393724136e-06, + "loss": 0.29728978872299194, + "step": 1714 + }, + { + "epoch": 2.1120689655172415, + "grad_norm": 9.446867560016528, + "learning_rate": 2.446409127524094e-06, + "loss": 0.2391032576560974, + "step": 1715 + }, + { + "epoch": 2.1133004926108376, + "grad_norm": 13.287475847065688, + "learning_rate": 2.4402507609704163e-06, + "loss": 0.4612117409706116, + "step": 1716 + }, + { + "epoch": 2.1145320197044333, + "grad_norm": 9.000836025460185, + "learning_rate": 2.4340976523611957e-06, + "loss": 0.36539849638938904, + "step": 1717 + }, + { + "epoch": 2.1157635467980294, + "grad_norm": 6.954876550316873, + "learning_rate": 2.427949814335443e-06, + "loss": 0.2918080687522888, + "step": 1718 + }, + { + "epoch": 2.1169950738916254, + "grad_norm": 12.290862216055704, + "learning_rate": 2.4218072595213467e-06, + "loss": 0.4508627653121948, + "step": 1719 + }, + { + "epoch": 2.1182266009852215, + "grad_norm": 10.395578945684981, + "learning_rate": 2.4156700005362384e-06, + "loss": 0.43477705121040344, + "step": 1720 + }, + { + "epoch": 2.1194581280788176, + "grad_norm": 13.97203519429258, + "learning_rate": 2.409538049986576e-06, + "loss": 0.36739200353622437, + "step": 1721 + }, + { + "epoch": 2.1206896551724137, + "grad_norm": 10.000232328294244, + "learning_rate": 2.403411420467916e-06, + "loss": 0.722801923751831, + "step": 1722 + }, + { + "epoch": 2.12192118226601, + "grad_norm": 8.047857628714285, + "learning_rate": 2.3972901245648724e-06, + "loss": 0.3729158043861389, + "step": 1723 + }, + { + "epoch": 2.123152709359606, + "grad_norm": 9.083191980371518, + "learning_rate": 2.3911741748511163e-06, + "loss": 0.741644024848938, + "step": 1724 + }, + { + "epoch": 2.124384236453202, + "grad_norm": 11.04614906019948, + "learning_rate": 2.385063583889335e-06, + "loss": 0.21925917267799377, + "step": 1725 + }, + { + "epoch": 2.125615763546798, + "grad_norm": 8.204563983460345, + "learning_rate": 2.378958364231202e-06, + "loss": 0.3161308765411377, + "step": 1726 + }, + { + "epoch": 2.126847290640394, + "grad_norm": 9.198617981495676, + "learning_rate": 2.3728585284173646e-06, + "loss": 0.2520957887172699, + "step": 1727 + }, + { + "epoch": 2.12807881773399, + "grad_norm": 17.99753939345998, + "learning_rate": 2.3667640889774096e-06, + "loss": 0.5538915991783142, + "step": 1728 + }, + { + "epoch": 2.1293103448275863, + "grad_norm": 15.205601395041407, + "learning_rate": 2.3606750584298375e-06, + "loss": 0.5438660979270935, + "step": 1729 + }, + { + "epoch": 2.1305418719211824, + "grad_norm": 11.445216371439214, + "learning_rate": 2.3545914492820366e-06, + "loss": 0.39724698662757874, + "step": 1730 + }, + { + "epoch": 2.1317733990147785, + "grad_norm": 13.240651517787109, + "learning_rate": 2.348513274030264e-06, + "loss": 0.3480866551399231, + "step": 1731 + }, + { + "epoch": 2.1330049261083746, + "grad_norm": 8.909285636059167, + "learning_rate": 2.3424405451596143e-06, + "loss": 0.9076392650604248, + "step": 1732 + }, + { + "epoch": 2.1342364532019706, + "grad_norm": 10.08773566622176, + "learning_rate": 2.3363732751439926e-06, + "loss": 0.19863876700401306, + "step": 1733 + }, + { + "epoch": 2.1354679802955667, + "grad_norm": 18.974399402946254, + "learning_rate": 2.3303114764460887e-06, + "loss": 0.5347404479980469, + "step": 1734 + }, + { + "epoch": 2.1366995073891624, + "grad_norm": 13.439122993751143, + "learning_rate": 2.32425516151736e-06, + "loss": 0.4876821041107178, + "step": 1735 + }, + { + "epoch": 2.1379310344827585, + "grad_norm": 11.45775521594229, + "learning_rate": 2.3182043427979973e-06, + "loss": 0.24914954602718353, + "step": 1736 + }, + { + "epoch": 2.1391625615763545, + "grad_norm": 8.201340069963411, + "learning_rate": 2.3121590327168987e-06, + "loss": 0.5773565769195557, + "step": 1737 + }, + { + "epoch": 2.1403940886699506, + "grad_norm": 11.57987957433396, + "learning_rate": 2.30611924369165e-06, + "loss": 0.7779598832130432, + "step": 1738 + }, + { + "epoch": 2.1416256157635467, + "grad_norm": 10.793230544693655, + "learning_rate": 2.3000849881285016e-06, + "loss": 0.27866464853286743, + "step": 1739 + }, + { + "epoch": 2.142857142857143, + "grad_norm": 10.857850500188468, + "learning_rate": 2.2940562784223224e-06, + "loss": 0.5243108868598938, + "step": 1740 + }, + { + "epoch": 2.144088669950739, + "grad_norm": 11.19069440448601, + "learning_rate": 2.2880331269566043e-06, + "loss": 0.6560786366462708, + "step": 1741 + }, + { + "epoch": 2.145320197044335, + "grad_norm": 13.01584696243558, + "learning_rate": 2.282015546103418e-06, + "loss": 0.6339880228042603, + "step": 1742 + }, + { + "epoch": 2.146551724137931, + "grad_norm": 9.571310950804556, + "learning_rate": 2.2760035482233868e-06, + "loss": 0.2517808973789215, + "step": 1743 + }, + { + "epoch": 2.147783251231527, + "grad_norm": 20.291798315352697, + "learning_rate": 2.269997145665674e-06, + "loss": 0.40347909927368164, + "step": 1744 + }, + { + "epoch": 2.149014778325123, + "grad_norm": 9.550073631094609, + "learning_rate": 2.263996350767942e-06, + "loss": 0.4681488573551178, + "step": 1745 + }, + { + "epoch": 2.1502463054187193, + "grad_norm": 9.340283980757114, + "learning_rate": 2.2580011758563418e-06, + "loss": 0.6371068954467773, + "step": 1746 + }, + { + "epoch": 2.1514778325123154, + "grad_norm": 21.612590436052542, + "learning_rate": 2.2520116332454726e-06, + "loss": 0.4741581678390503, + "step": 1747 + }, + { + "epoch": 2.1527093596059115, + "grad_norm": 8.523455664504207, + "learning_rate": 2.2460277352383713e-06, + "loss": 0.3354438543319702, + "step": 1748 + }, + { + "epoch": 2.1539408866995076, + "grad_norm": 14.050991791769299, + "learning_rate": 2.240049494126479e-06, + "loss": 0.593233585357666, + "step": 1749 + }, + { + "epoch": 2.1551724137931036, + "grad_norm": 11.626128632656414, + "learning_rate": 2.234076922189613e-06, + "loss": 0.32123100757598877, + "step": 1750 + }, + { + "epoch": 2.1564039408866993, + "grad_norm": 17.381626157091297, + "learning_rate": 2.2281100316959476e-06, + "loss": 1.0594584941864014, + "step": 1751 + }, + { + "epoch": 2.1576354679802954, + "grad_norm": 9.794184199968742, + "learning_rate": 2.2221488349019903e-06, + "loss": 0.8586208820343018, + "step": 1752 + }, + { + "epoch": 2.1588669950738915, + "grad_norm": 10.979739823361593, + "learning_rate": 2.2161933440525474e-06, + "loss": 0.38074642419815063, + "step": 1753 + }, + { + "epoch": 2.1600985221674875, + "grad_norm": 10.732650739543086, + "learning_rate": 2.21024357138071e-06, + "loss": 0.28768736124038696, + "step": 1754 + }, + { + "epoch": 2.1613300492610836, + "grad_norm": 10.263056998284627, + "learning_rate": 2.2042995291078227e-06, + "loss": 1.1843211650848389, + "step": 1755 + }, + { + "epoch": 2.1625615763546797, + "grad_norm": 13.635797719225163, + "learning_rate": 2.1983612294434563e-06, + "loss": 0.7616925835609436, + "step": 1756 + }, + { + "epoch": 2.163793103448276, + "grad_norm": 9.78260695772624, + "learning_rate": 2.192428684585386e-06, + "loss": 0.4518227279186249, + "step": 1757 + }, + { + "epoch": 2.165024630541872, + "grad_norm": 14.669561384919394, + "learning_rate": 2.1865019067195685e-06, + "loss": 0.9173997640609741, + "step": 1758 + }, + { + "epoch": 2.166256157635468, + "grad_norm": 9.861706475635476, + "learning_rate": 2.180580908020117e-06, + "loss": 0.4044645428657532, + "step": 1759 + }, + { + "epoch": 2.167487684729064, + "grad_norm": 11.783858103052328, + "learning_rate": 2.174665700649267e-06, + "loss": 0.7771418690681458, + "step": 1760 + }, + { + "epoch": 2.16871921182266, + "grad_norm": 12.555695641041428, + "learning_rate": 2.1687562967573645e-06, + "loss": 0.39461982250213623, + "step": 1761 + }, + { + "epoch": 2.1699507389162562, + "grad_norm": 8.510682084443147, + "learning_rate": 2.1628527084828283e-06, + "loss": 0.2924491763114929, + "step": 1762 + }, + { + "epoch": 2.1711822660098523, + "grad_norm": 7.789254339344862, + "learning_rate": 2.156954947952139e-06, + "loss": 0.2507514953613281, + "step": 1763 + }, + { + "epoch": 2.1724137931034484, + "grad_norm": 9.474786369957261, + "learning_rate": 2.151063027279798e-06, + "loss": 0.44257861375808716, + "step": 1764 + }, + { + "epoch": 2.1736453201970445, + "grad_norm": 9.165088005805186, + "learning_rate": 2.1451769585683196e-06, + "loss": 0.2863251268863678, + "step": 1765 + }, + { + "epoch": 2.1748768472906406, + "grad_norm": 14.506373027900759, + "learning_rate": 2.139296753908195e-06, + "loss": 0.6882431507110596, + "step": 1766 + }, + { + "epoch": 2.1761083743842367, + "grad_norm": 10.237681928740948, + "learning_rate": 2.1334224253778628e-06, + "loss": 0.8318816423416138, + "step": 1767 + }, + { + "epoch": 2.1773399014778327, + "grad_norm": 8.92298078848023, + "learning_rate": 2.1275539850437006e-06, + "loss": 0.3899531364440918, + "step": 1768 + }, + { + "epoch": 2.1785714285714284, + "grad_norm": 10.24700092560103, + "learning_rate": 2.1216914449599905e-06, + "loss": 0.6424532532691956, + "step": 1769 + }, + { + "epoch": 2.1798029556650245, + "grad_norm": 10.006066437806421, + "learning_rate": 2.1158348171688888e-06, + "loss": 0.6676028370857239, + "step": 1770 + }, + { + "epoch": 2.1810344827586206, + "grad_norm": 11.577953051638056, + "learning_rate": 2.109984113700413e-06, + "loss": 0.4219639301300049, + "step": 1771 + }, + { + "epoch": 2.1822660098522166, + "grad_norm": 6.842671899586793, + "learning_rate": 2.1041393465724114e-06, + "loss": 0.32283568382263184, + "step": 1772 + }, + { + "epoch": 2.1834975369458127, + "grad_norm": 9.373944237506624, + "learning_rate": 2.0983005277905348e-06, + "loss": 0.26172614097595215, + "step": 1773 + }, + { + "epoch": 2.184729064039409, + "grad_norm": 8.04859888971959, + "learning_rate": 2.092467669348217e-06, + "loss": 0.585732638835907, + "step": 1774 + }, + { + "epoch": 2.185960591133005, + "grad_norm": 17.13691371915511, + "learning_rate": 2.0866407832266506e-06, + "loss": 0.42734187841415405, + "step": 1775 + }, + { + "epoch": 2.187192118226601, + "grad_norm": 9.353812644763135, + "learning_rate": 2.0808198813947606e-06, + "loss": 0.24151989817619324, + "step": 1776 + }, + { + "epoch": 2.188423645320197, + "grad_norm": 6.491521280477716, + "learning_rate": 2.0750049758091778e-06, + "loss": 0.12940426170825958, + "step": 1777 + }, + { + "epoch": 2.189655172413793, + "grad_norm": 12.137046868295176, + "learning_rate": 2.0691960784142143e-06, + "loss": 0.7501548528671265, + "step": 1778 + }, + { + "epoch": 2.1908866995073892, + "grad_norm": 8.28614035816523, + "learning_rate": 2.063393201141846e-06, + "loss": 0.43730083107948303, + "step": 1779 + }, + { + "epoch": 2.1921182266009853, + "grad_norm": 7.426728577487124, + "learning_rate": 2.0575963559116823e-06, + "loss": 0.3335978388786316, + "step": 1780 + }, + { + "epoch": 2.1933497536945814, + "grad_norm": 7.727814229698406, + "learning_rate": 2.0518055546309362e-06, + "loss": 0.3262137174606323, + "step": 1781 + }, + { + "epoch": 2.1945812807881775, + "grad_norm": 12.218163734992793, + "learning_rate": 2.0460208091944122e-06, + "loss": 0.3336663544178009, + "step": 1782 + }, + { + "epoch": 2.1958128078817736, + "grad_norm": 12.61978263562606, + "learning_rate": 2.0402421314844774e-06, + "loss": 0.6050255298614502, + "step": 1783 + }, + { + "epoch": 2.1970443349753697, + "grad_norm": 10.058297792191603, + "learning_rate": 2.0344695333710234e-06, + "loss": 0.33584898710250854, + "step": 1784 + }, + { + "epoch": 2.1982758620689653, + "grad_norm": 7.629807101727278, + "learning_rate": 2.0287030267114665e-06, + "loss": 0.4711458683013916, + "step": 1785 + }, + { + "epoch": 2.1995073891625614, + "grad_norm": 7.348268103503395, + "learning_rate": 2.0229426233507067e-06, + "loss": 0.6127311587333679, + "step": 1786 + }, + { + "epoch": 2.2007389162561575, + "grad_norm": 8.230284472347915, + "learning_rate": 2.0171883351211038e-06, + "loss": 0.7195362448692322, + "step": 1787 + }, + { + "epoch": 2.2019704433497536, + "grad_norm": 20.032548588100823, + "learning_rate": 2.0114401738424618e-06, + "loss": 1.412251591682434, + "step": 1788 + }, + { + "epoch": 2.2032019704433496, + "grad_norm": 11.361862300830705, + "learning_rate": 2.0056981513219944e-06, + "loss": 0.48954465985298157, + "step": 1789 + }, + { + "epoch": 2.2044334975369457, + "grad_norm": 10.14335903404985, + "learning_rate": 1.999962279354311e-06, + "loss": 0.32414451241493225, + "step": 1790 + }, + { + "epoch": 2.205665024630542, + "grad_norm": 11.365030809564745, + "learning_rate": 1.9942325697213817e-06, + "loss": 0.4072822034358978, + "step": 1791 + }, + { + "epoch": 2.206896551724138, + "grad_norm": 9.518825727757552, + "learning_rate": 1.988509034192522e-06, + "loss": 0.25958192348480225, + "step": 1792 + }, + { + "epoch": 2.208128078817734, + "grad_norm": 7.689606665993246, + "learning_rate": 1.9827916845243687e-06, + "loss": 0.2943662405014038, + "step": 1793 + }, + { + "epoch": 2.20935960591133, + "grad_norm": 11.749853788306439, + "learning_rate": 1.9770805324608446e-06, + "loss": 0.6713488698005676, + "step": 1794 + }, + { + "epoch": 2.210591133004926, + "grad_norm": 8.987827629233262, + "learning_rate": 1.971375589733145e-06, + "loss": 0.5103387236595154, + "step": 1795 + }, + { + "epoch": 2.2118226600985222, + "grad_norm": 14.84712925009146, + "learning_rate": 1.965676868059714e-06, + "loss": 0.4981153905391693, + "step": 1796 + }, + { + "epoch": 2.2130541871921183, + "grad_norm": 9.829434549611708, + "learning_rate": 1.9599843791462123e-06, + "loss": 0.2828434407711029, + "step": 1797 + }, + { + "epoch": 2.2142857142857144, + "grad_norm": 11.531079285990483, + "learning_rate": 1.9542981346855015e-06, + "loss": 0.36899659037590027, + "step": 1798 + }, + { + "epoch": 2.2155172413793105, + "grad_norm": 10.264635301771921, + "learning_rate": 1.9486181463576176e-06, + "loss": 0.46039581298828125, + "step": 1799 + }, + { + "epoch": 2.2167487684729066, + "grad_norm": 7.994315710714336, + "learning_rate": 1.942944425829741e-06, + "loss": 0.611553966999054, + "step": 1800 + }, + { + "epoch": 2.2179802955665027, + "grad_norm": 10.64295367375575, + "learning_rate": 1.937276984756179e-06, + "loss": 0.23928876221179962, + "step": 1801 + }, + { + "epoch": 2.2192118226600988, + "grad_norm": 11.919180580141987, + "learning_rate": 1.9316158347783436e-06, + "loss": 0.3270934820175171, + "step": 1802 + }, + { + "epoch": 2.2204433497536944, + "grad_norm": 9.438403907761801, + "learning_rate": 1.925960987524724e-06, + "loss": 0.30926424264907837, + "step": 1803 + }, + { + "epoch": 2.2216748768472905, + "grad_norm": 11.903671185207038, + "learning_rate": 1.9203124546108583e-06, + "loss": 0.6049486994743347, + "step": 1804 + }, + { + "epoch": 2.2229064039408866, + "grad_norm": 14.861992075187999, + "learning_rate": 1.91467024763932e-06, + "loss": 0.7592355012893677, + "step": 1805 + }, + { + "epoch": 2.2241379310344827, + "grad_norm": 11.790018718519686, + "learning_rate": 1.9090343781996828e-06, + "loss": 0.26057887077331543, + "step": 1806 + }, + { + "epoch": 2.2253694581280787, + "grad_norm": 17.03673279052151, + "learning_rate": 1.9034048578685099e-06, + "loss": 0.4014609754085541, + "step": 1807 + }, + { + "epoch": 2.226600985221675, + "grad_norm": 10.412774433531801, + "learning_rate": 1.897781698209315e-06, + "loss": 0.26397138833999634, + "step": 1808 + }, + { + "epoch": 2.227832512315271, + "grad_norm": 11.809020308728643, + "learning_rate": 1.8921649107725525e-06, + "loss": 0.8727256059646606, + "step": 1809 + }, + { + "epoch": 2.229064039408867, + "grad_norm": 8.838116472787092, + "learning_rate": 1.8865545070955882e-06, + "loss": 0.45729875564575195, + "step": 1810 + }, + { + "epoch": 2.230295566502463, + "grad_norm": 13.341384604613445, + "learning_rate": 1.880950498702666e-06, + "loss": 0.3261849880218506, + "step": 1811 + }, + { + "epoch": 2.231527093596059, + "grad_norm": 16.210141929264246, + "learning_rate": 1.875352897104903e-06, + "loss": 0.682532787322998, + "step": 1812 + }, + { + "epoch": 2.2327586206896552, + "grad_norm": 16.44333196476405, + "learning_rate": 1.8697617138002545e-06, + "loss": 0.4255359470844269, + "step": 1813 + }, + { + "epoch": 2.2339901477832513, + "grad_norm": 8.460123548003127, + "learning_rate": 1.8641769602734872e-06, + "loss": 0.3307432234287262, + "step": 1814 + }, + { + "epoch": 2.2352216748768474, + "grad_norm": 9.96917434972206, + "learning_rate": 1.8585986479961653e-06, + "loss": 0.26837313175201416, + "step": 1815 + }, + { + "epoch": 2.2364532019704435, + "grad_norm": 12.410587151566334, + "learning_rate": 1.8530267884266228e-06, + "loss": 0.5036531686782837, + "step": 1816 + }, + { + "epoch": 2.2376847290640396, + "grad_norm": 13.229449859916322, + "learning_rate": 1.8474613930099356e-06, + "loss": 0.4444383680820465, + "step": 1817 + }, + { + "epoch": 2.2389162561576357, + "grad_norm": 10.366174513602477, + "learning_rate": 1.8419024731779e-06, + "loss": 0.24592629075050354, + "step": 1818 + }, + { + "epoch": 2.2401477832512313, + "grad_norm": 21.212742320307363, + "learning_rate": 1.8363500403490175e-06, + "loss": 0.9310093522071838, + "step": 1819 + }, + { + "epoch": 2.2413793103448274, + "grad_norm": 10.041916938686702, + "learning_rate": 1.8308041059284621e-06, + "loss": 0.3252318799495697, + "step": 1820 + }, + { + "epoch": 2.2426108374384235, + "grad_norm": 10.169102582875109, + "learning_rate": 1.8252646813080566e-06, + "loss": 0.44218361377716064, + "step": 1821 + }, + { + "epoch": 2.2438423645320196, + "grad_norm": 13.658159402672133, + "learning_rate": 1.8197317778662533e-06, + "loss": 0.631632924079895, + "step": 1822 + }, + { + "epoch": 2.2450738916256157, + "grad_norm": 11.284192076783485, + "learning_rate": 1.814205406968112e-06, + "loss": 0.2570488154888153, + "step": 1823 + }, + { + "epoch": 2.2463054187192117, + "grad_norm": 10.661610786830831, + "learning_rate": 1.8086855799652737e-06, + "loss": 0.6113500595092773, + "step": 1824 + }, + { + "epoch": 2.247536945812808, + "grad_norm": 9.883591422459872, + "learning_rate": 1.8031723081959334e-06, + "loss": 0.5997953414916992, + "step": 1825 + }, + { + "epoch": 2.248768472906404, + "grad_norm": 12.888281661513009, + "learning_rate": 1.7976656029848271e-06, + "loss": 0.501262903213501, + "step": 1826 + }, + { + "epoch": 2.25, + "grad_norm": 9.87397702836225, + "learning_rate": 1.792165475643199e-06, + "loss": 0.9116629362106323, + "step": 1827 + }, + { + "epoch": 2.251231527093596, + "grad_norm": 8.421237466791723, + "learning_rate": 1.786671937468779e-06, + "loss": 0.3302918076515198, + "step": 1828 + }, + { + "epoch": 2.252463054187192, + "grad_norm": 9.25026361639238, + "learning_rate": 1.7811849997457681e-06, + "loss": 0.26528751850128174, + "step": 1829 + }, + { + "epoch": 2.2536945812807883, + "grad_norm": 11.490820404812338, + "learning_rate": 1.775704673744809e-06, + "loss": 0.25929901003837585, + "step": 1830 + }, + { + "epoch": 2.2549261083743843, + "grad_norm": 13.127115940994786, + "learning_rate": 1.7702309707229576e-06, + "loss": 0.4980836808681488, + "step": 1831 + }, + { + "epoch": 2.2561576354679804, + "grad_norm": 16.054819413361866, + "learning_rate": 1.764763901923673e-06, + "loss": 0.5196325182914734, + "step": 1832 + }, + { + "epoch": 2.2573891625615765, + "grad_norm": 8.101995143129717, + "learning_rate": 1.7593034785767788e-06, + "loss": 0.20513209700584412, + "step": 1833 + }, + { + "epoch": 2.2586206896551726, + "grad_norm": 11.005823004560217, + "learning_rate": 1.753849711898457e-06, + "loss": 0.3052961826324463, + "step": 1834 + }, + { + "epoch": 2.2598522167487687, + "grad_norm": 14.916636143940408, + "learning_rate": 1.7484026130912097e-06, + "loss": 0.32289302349090576, + "step": 1835 + }, + { + "epoch": 2.2610837438423648, + "grad_norm": 10.783629716557854, + "learning_rate": 1.742962193343845e-06, + "loss": 0.5892568826675415, + "step": 1836 + }, + { + "epoch": 2.2623152709359604, + "grad_norm": 8.680159409558001, + "learning_rate": 1.737528463831456e-06, + "loss": 0.24824300408363342, + "step": 1837 + }, + { + "epoch": 2.2635467980295565, + "grad_norm": 28.059213249121456, + "learning_rate": 1.7321014357153815e-06, + "loss": 0.23833397030830383, + "step": 1838 + }, + { + "epoch": 2.2647783251231526, + "grad_norm": 10.866697094389515, + "learning_rate": 1.726681120143207e-06, + "loss": 0.4855925738811493, + "step": 1839 + }, + { + "epoch": 2.2660098522167487, + "grad_norm": 11.048047137574908, + "learning_rate": 1.7212675282487269e-06, + "loss": 0.44992727041244507, + "step": 1840 + }, + { + "epoch": 2.2672413793103448, + "grad_norm": 19.236329816785574, + "learning_rate": 1.7158606711519193e-06, + "loss": 0.41251128911972046, + "step": 1841 + }, + { + "epoch": 2.268472906403941, + "grad_norm": 8.021805078822515, + "learning_rate": 1.7104605599589353e-06, + "loss": 0.4418972134590149, + "step": 1842 + }, + { + "epoch": 2.269704433497537, + "grad_norm": 14.577958176696848, + "learning_rate": 1.7050672057620666e-06, + "loss": 0.4425298571586609, + "step": 1843 + }, + { + "epoch": 2.270935960591133, + "grad_norm": 13.33684949043127, + "learning_rate": 1.6996806196397243e-06, + "loss": 0.3141231834888458, + "step": 1844 + }, + { + "epoch": 2.272167487684729, + "grad_norm": 14.191190475097011, + "learning_rate": 1.6943008126564164e-06, + "loss": 0.2843426764011383, + "step": 1845 + }, + { + "epoch": 2.273399014778325, + "grad_norm": 8.774563230877245, + "learning_rate": 1.6889277958627293e-06, + "loss": 0.36104702949523926, + "step": 1846 + }, + { + "epoch": 2.2746305418719213, + "grad_norm": 8.915062589804638, + "learning_rate": 1.6835615802953026e-06, + "loss": 0.3061131536960602, + "step": 1847 + }, + { + "epoch": 2.2758620689655173, + "grad_norm": 14.006563372468205, + "learning_rate": 1.6782021769768015e-06, + "loss": 0.26009926199913025, + "step": 1848 + }, + { + "epoch": 2.2770935960591134, + "grad_norm": 8.127500944165664, + "learning_rate": 1.6728495969158976e-06, + "loss": 0.33785128593444824, + "step": 1849 + }, + { + "epoch": 2.2783251231527095, + "grad_norm": 13.84769147602863, + "learning_rate": 1.6675038511072518e-06, + "loss": 0.675277829170227, + "step": 1850 + }, + { + "epoch": 2.2795566502463056, + "grad_norm": 10.2024379894797, + "learning_rate": 1.6621649505314853e-06, + "loss": 0.30536460876464844, + "step": 1851 + }, + { + "epoch": 2.2807881773399012, + "grad_norm": 13.905669065241, + "learning_rate": 1.6568329061551552e-06, + "loss": 0.483297735452652, + "step": 1852 + }, + { + "epoch": 2.2820197044334973, + "grad_norm": 13.831832440802502, + "learning_rate": 1.6515077289307391e-06, + "loss": 1.2728561162948608, + "step": 1853 + }, + { + "epoch": 2.2832512315270934, + "grad_norm": 12.809334971632179, + "learning_rate": 1.6461894297966113e-06, + "loss": 1.2634159326553345, + "step": 1854 + }, + { + "epoch": 2.2844827586206895, + "grad_norm": 7.191323391539922, + "learning_rate": 1.640878019677008e-06, + "loss": 0.2823532819747925, + "step": 1855 + }, + { + "epoch": 2.2857142857142856, + "grad_norm": 10.11071089918571, + "learning_rate": 1.6355735094820236e-06, + "loss": 0.34143221378326416, + "step": 1856 + }, + { + "epoch": 2.2869458128078817, + "grad_norm": 21.093284752390208, + "learning_rate": 1.6302759101075788e-06, + "loss": 1.6820435523986816, + "step": 1857 + }, + { + "epoch": 2.2881773399014778, + "grad_norm": 10.354309593440153, + "learning_rate": 1.6249852324353943e-06, + "loss": 0.5194296836853027, + "step": 1858 + }, + { + "epoch": 2.289408866995074, + "grad_norm": 17.44623842314838, + "learning_rate": 1.619701487332978e-06, + "loss": 0.5637781023979187, + "step": 1859 + }, + { + "epoch": 2.29064039408867, + "grad_norm": 25.69777716112705, + "learning_rate": 1.6144246856535933e-06, + "loss": 0.34875303506851196, + "step": 1860 + }, + { + "epoch": 2.291871921182266, + "grad_norm": 12.072258734899453, + "learning_rate": 1.609154838236246e-06, + "loss": 1.098509430885315, + "step": 1861 + }, + { + "epoch": 2.293103448275862, + "grad_norm": 9.38995256932923, + "learning_rate": 1.603891955905652e-06, + "loss": 0.28303658962249756, + "step": 1862 + }, + { + "epoch": 2.294334975369458, + "grad_norm": 8.876257541157115, + "learning_rate": 1.5986360494722237e-06, + "loss": 0.2923981547355652, + "step": 1863 + }, + { + "epoch": 2.2955665024630543, + "grad_norm": 12.816591257478263, + "learning_rate": 1.5933871297320458e-06, + "loss": 0.7381842136383057, + "step": 1864 + }, + { + "epoch": 2.2967980295566504, + "grad_norm": 11.151348038557627, + "learning_rate": 1.5881452074668474e-06, + "loss": 0.3092786371707916, + "step": 1865 + }, + { + "epoch": 2.2980295566502464, + "grad_norm": 7.288277848225151, + "learning_rate": 1.5829102934439855e-06, + "loss": 0.23155847191810608, + "step": 1866 + }, + { + "epoch": 2.2992610837438425, + "grad_norm": 6.9100983038059685, + "learning_rate": 1.577682398416424e-06, + "loss": 0.28587496280670166, + "step": 1867 + }, + { + "epoch": 2.3004926108374386, + "grad_norm": 10.179482607383743, + "learning_rate": 1.572461533122709e-06, + "loss": 0.28047090768814087, + "step": 1868 + }, + { + "epoch": 2.3017241379310347, + "grad_norm": 9.853152635402589, + "learning_rate": 1.567247708286942e-06, + "loss": 0.23015758395195007, + "step": 1869 + }, + { + "epoch": 2.302955665024631, + "grad_norm": 11.277401391934358, + "learning_rate": 1.5620409346187697e-06, + "loss": 0.4323405623435974, + "step": 1870 + }, + { + "epoch": 2.3041871921182264, + "grad_norm": 11.297467766496554, + "learning_rate": 1.5568412228133506e-06, + "loss": 0.23572880029678345, + "step": 1871 + }, + { + "epoch": 2.3054187192118225, + "grad_norm": 13.421885123492197, + "learning_rate": 1.5516485835513368e-06, + "loss": 0.3727877140045166, + "step": 1872 + }, + { + "epoch": 2.3066502463054186, + "grad_norm": 12.62430001790282, + "learning_rate": 1.5464630274988558e-06, + "loss": 0.45042985677719116, + "step": 1873 + }, + { + "epoch": 2.3078817733990147, + "grad_norm": 14.933222032568711, + "learning_rate": 1.5412845653074871e-06, + "loss": 0.2898573875427246, + "step": 1874 + }, + { + "epoch": 2.3091133004926108, + "grad_norm": 13.678732792764093, + "learning_rate": 1.5361132076142316e-06, + "loss": 0.5285981893539429, + "step": 1875 + }, + { + "epoch": 2.310344827586207, + "grad_norm": 11.195106285237618, + "learning_rate": 1.5309489650415056e-06, + "loss": 0.32582932710647583, + "step": 1876 + }, + { + "epoch": 2.311576354679803, + "grad_norm": 10.519489956392377, + "learning_rate": 1.5257918481971028e-06, + "loss": 0.2169458121061325, + "step": 1877 + }, + { + "epoch": 2.312807881773399, + "grad_norm": 13.764556882530254, + "learning_rate": 1.5206418676741868e-06, + "loss": 0.618523359298706, + "step": 1878 + }, + { + "epoch": 2.314039408866995, + "grad_norm": 11.040931356433024, + "learning_rate": 1.515499034051256e-06, + "loss": 0.7014099359512329, + "step": 1879 + }, + { + "epoch": 2.315270935960591, + "grad_norm": 13.213679491063276, + "learning_rate": 1.510363357892133e-06, + "loss": 0.44798558950424194, + "step": 1880 + }, + { + "epoch": 2.3165024630541873, + "grad_norm": 77.68330951092015, + "learning_rate": 1.50523484974594e-06, + "loss": 0.4824434220790863, + "step": 1881 + }, + { + "epoch": 2.3177339901477834, + "grad_norm": 5.871453538227446, + "learning_rate": 1.5001135201470673e-06, + "loss": 0.16904819011688232, + "step": 1882 + }, + { + "epoch": 2.3189655172413794, + "grad_norm": 10.296708154719132, + "learning_rate": 1.4949993796151675e-06, + "loss": 0.8792778253555298, + "step": 1883 + }, + { + "epoch": 2.3201970443349755, + "grad_norm": 12.549086016226653, + "learning_rate": 1.4898924386551256e-06, + "loss": 0.6592487096786499, + "step": 1884 + }, + { + "epoch": 2.3214285714285716, + "grad_norm": 20.275701743724124, + "learning_rate": 1.4847927077570324e-06, + "loss": 1.6036354303359985, + "step": 1885 + }, + { + "epoch": 2.3226600985221673, + "grad_norm": 9.24831145241808, + "learning_rate": 1.4797001973961755e-06, + "loss": 0.34490981698036194, + "step": 1886 + }, + { + "epoch": 2.3238916256157633, + "grad_norm": 8.476000589981345, + "learning_rate": 1.4746149180330082e-06, + "loss": 0.3186146914958954, + "step": 1887 + }, + { + "epoch": 2.3251231527093594, + "grad_norm": 18.44274912327115, + "learning_rate": 1.4695368801131293e-06, + "loss": 0.5050108432769775, + "step": 1888 + }, + { + "epoch": 2.3263546798029555, + "grad_norm": 12.028503330268482, + "learning_rate": 1.4644660940672628e-06, + "loss": 0.3541644215583801, + "step": 1889 + }, + { + "epoch": 2.3275862068965516, + "grad_norm": 6.910684312350736, + "learning_rate": 1.4594025703112397e-06, + "loss": 0.3495083749294281, + "step": 1890 + }, + { + "epoch": 2.3288177339901477, + "grad_norm": 11.582636749838006, + "learning_rate": 1.4543463192459728e-06, + "loss": 0.9918674826622009, + "step": 1891 + }, + { + "epoch": 2.3300492610837438, + "grad_norm": 12.929277927199294, + "learning_rate": 1.4492973512574348e-06, + "loss": 0.9601753950119019, + "step": 1892 + }, + { + "epoch": 2.33128078817734, + "grad_norm": 8.289898772410082, + "learning_rate": 1.4442556767166371e-06, + "loss": 0.48341238498687744, + "step": 1893 + }, + { + "epoch": 2.332512315270936, + "grad_norm": 11.044218498303557, + "learning_rate": 1.4392213059796133e-06, + "loss": 0.38372108340263367, + "step": 1894 + }, + { + "epoch": 2.333743842364532, + "grad_norm": 17.672025418443823, + "learning_rate": 1.4341942493873934e-06, + "loss": 0.45662760734558105, + "step": 1895 + }, + { + "epoch": 2.334975369458128, + "grad_norm": 8.57989944923008, + "learning_rate": 1.4291745172659804e-06, + "loss": 0.6601132154464722, + "step": 1896 + }, + { + "epoch": 2.336206896551724, + "grad_norm": 10.831792328536467, + "learning_rate": 1.4241621199263362e-06, + "loss": 0.7569577097892761, + "step": 1897 + }, + { + "epoch": 2.3374384236453203, + "grad_norm": 14.76295283801852, + "learning_rate": 1.4191570676643573e-06, + "loss": 0.7162508964538574, + "step": 1898 + }, + { + "epoch": 2.3386699507389164, + "grad_norm": 16.808898262444146, + "learning_rate": 1.4141593707608441e-06, + "loss": 0.6121374368667603, + "step": 1899 + }, + { + "epoch": 2.3399014778325125, + "grad_norm": 14.404980275639364, + "learning_rate": 1.4091690394814989e-06, + "loss": 0.550343930721283, + "step": 1900 + }, + { + "epoch": 2.3411330049261085, + "grad_norm": 13.189507504332187, + "learning_rate": 1.40418608407689e-06, + "loss": 0.644547700881958, + "step": 1901 + }, + { + "epoch": 2.3423645320197046, + "grad_norm": 10.144794457121083, + "learning_rate": 1.3992105147824326e-06, + "loss": 0.463761568069458, + "step": 1902 + }, + { + "epoch": 2.3435960591133007, + "grad_norm": 9.21109140090456, + "learning_rate": 1.3942423418183764e-06, + "loss": 0.5593357682228088, + "step": 1903 + }, + { + "epoch": 2.344827586206897, + "grad_norm": 12.967643967580644, + "learning_rate": 1.3892815753897708e-06, + "loss": 0.5090635418891907, + "step": 1904 + }, + { + "epoch": 2.3460591133004924, + "grad_norm": 13.46983908302652, + "learning_rate": 1.3843282256864599e-06, + "loss": 0.4595394432544708, + "step": 1905 + }, + { + "epoch": 2.3472906403940885, + "grad_norm": 11.392389994781835, + "learning_rate": 1.379382302883044e-06, + "loss": 0.8381729125976562, + "step": 1906 + }, + { + "epoch": 2.3485221674876846, + "grad_norm": 8.85214424769499, + "learning_rate": 1.3744438171388752e-06, + "loss": 0.37937110662460327, + "step": 1907 + }, + { + "epoch": 2.3497536945812807, + "grad_norm": 17.78975528440709, + "learning_rate": 1.3695127785980279e-06, + "loss": 0.4255325496196747, + "step": 1908 + }, + { + "epoch": 2.350985221674877, + "grad_norm": 11.69369455239838, + "learning_rate": 1.3645891973892772e-06, + "loss": 1.1354942321777344, + "step": 1909 + }, + { + "epoch": 2.352216748768473, + "grad_norm": 7.241901848192273, + "learning_rate": 1.359673083626079e-06, + "loss": 0.30018460750579834, + "step": 1910 + }, + { + "epoch": 2.353448275862069, + "grad_norm": 10.130306855965305, + "learning_rate": 1.3547644474065557e-06, + "loss": 0.22174029052257538, + "step": 1911 + }, + { + "epoch": 2.354679802955665, + "grad_norm": 10.818242567623516, + "learning_rate": 1.349863298813464e-06, + "loss": 0.27310076355934143, + "step": 1912 + }, + { + "epoch": 2.355911330049261, + "grad_norm": 13.041781733429923, + "learning_rate": 1.3449696479141855e-06, + "loss": 0.39454638957977295, + "step": 1913 + }, + { + "epoch": 2.357142857142857, + "grad_norm": 10.18283763523278, + "learning_rate": 1.3400835047606997e-06, + "loss": 0.39921119809150696, + "step": 1914 + }, + { + "epoch": 2.3583743842364533, + "grad_norm": 10.365856020003331, + "learning_rate": 1.3352048793895623e-06, + "loss": 0.45110660791397095, + "step": 1915 + }, + { + "epoch": 2.3596059113300494, + "grad_norm": 8.256618178243365, + "learning_rate": 1.330333781821887e-06, + "loss": 0.5453286170959473, + "step": 1916 + }, + { + "epoch": 2.3608374384236455, + "grad_norm": 7.676268533106476, + "learning_rate": 1.325470222063327e-06, + "loss": 0.21928450465202332, + "step": 1917 + }, + { + "epoch": 2.3620689655172415, + "grad_norm": 11.703145589738702, + "learning_rate": 1.3206142101040525e-06, + "loss": 0.8491370677947998, + "step": 1918 + }, + { + "epoch": 2.363300492610837, + "grad_norm": 11.375579827407606, + "learning_rate": 1.3157657559187264e-06, + "loss": 0.5052551031112671, + "step": 1919 + }, + { + "epoch": 2.3645320197044333, + "grad_norm": 14.124196950433179, + "learning_rate": 1.3109248694664917e-06, + "loss": 1.0034559965133667, + "step": 1920 + }, + { + "epoch": 2.3657635467980294, + "grad_norm": 16.92878880493155, + "learning_rate": 1.3060915606909413e-06, + "loss": 0.3685661554336548, + "step": 1921 + }, + { + "epoch": 2.3669950738916254, + "grad_norm": 9.744666272771802, + "learning_rate": 1.301265839520109e-06, + "loss": 0.33304983377456665, + "step": 1922 + }, + { + "epoch": 2.3682266009852215, + "grad_norm": 9.861413232471296, + "learning_rate": 1.2964477158664367e-06, + "loss": 1.3396000862121582, + "step": 1923 + }, + { + "epoch": 2.3694581280788176, + "grad_norm": 13.403135613317723, + "learning_rate": 1.2916371996267656e-06, + "loss": 0.3852962851524353, + "step": 1924 + }, + { + "epoch": 2.3706896551724137, + "grad_norm": 12.989833739172669, + "learning_rate": 1.2868343006823113e-06, + "loss": 0.5070800185203552, + "step": 1925 + }, + { + "epoch": 2.37192118226601, + "grad_norm": 10.592089371352348, + "learning_rate": 1.2820390288986345e-06, + "loss": 0.1917571723461151, + "step": 1926 + }, + { + "epoch": 2.373152709359606, + "grad_norm": 6.248268258840329, + "learning_rate": 1.2772513941256371e-06, + "loss": 0.19884659349918365, + "step": 1927 + }, + { + "epoch": 2.374384236453202, + "grad_norm": 13.319990126266617, + "learning_rate": 1.2724714061975335e-06, + "loss": 0.27710244059562683, + "step": 1928 + }, + { + "epoch": 2.375615763546798, + "grad_norm": 12.638294589181001, + "learning_rate": 1.2676990749328255e-06, + "loss": 0.7216998338699341, + "step": 1929 + }, + { + "epoch": 2.376847290640394, + "grad_norm": 7.68797287512978, + "learning_rate": 1.262934410134292e-06, + "loss": 0.35512983798980713, + "step": 1930 + }, + { + "epoch": 2.37807881773399, + "grad_norm": 7.682504760826181, + "learning_rate": 1.2581774215889653e-06, + "loss": 0.21548208594322205, + "step": 1931 + }, + { + "epoch": 2.3793103448275863, + "grad_norm": 10.576319148708158, + "learning_rate": 1.2534281190681059e-06, + "loss": 0.7191505432128906, + "step": 1932 + }, + { + "epoch": 2.3805418719211824, + "grad_norm": 28.03273248427961, + "learning_rate": 1.2486865123271868e-06, + "loss": 0.5658040046691895, + "step": 1933 + }, + { + "epoch": 2.3817733990147785, + "grad_norm": 7.429440108605395, + "learning_rate": 1.243952611105877e-06, + "loss": 0.42820805311203003, + "step": 1934 + }, + { + "epoch": 2.3830049261083746, + "grad_norm": 8.913271204535084, + "learning_rate": 1.2392264251280167e-06, + "loss": 0.3223640024662018, + "step": 1935 + }, + { + "epoch": 2.3842364532019706, + "grad_norm": 16.39061337542185, + "learning_rate": 1.2345079641015955e-06, + "loss": 0.5262437462806702, + "step": 1936 + }, + { + "epoch": 2.3854679802955667, + "grad_norm": 12.040132799234067, + "learning_rate": 1.2297972377187361e-06, + "loss": 0.32022416591644287, + "step": 1937 + }, + { + "epoch": 2.386699507389163, + "grad_norm": 10.197992684406291, + "learning_rate": 1.2250942556556754e-06, + "loss": 0.76932692527771, + "step": 1938 + }, + { + "epoch": 2.3879310344827585, + "grad_norm": 9.459909563147203, + "learning_rate": 1.2203990275727435e-06, + "loss": 0.23026564717292786, + "step": 1939 + }, + { + "epoch": 2.3891625615763545, + "grad_norm": 11.035875303455253, + "learning_rate": 1.2157115631143384e-06, + "loss": 0.4533492624759674, + "step": 1940 + }, + { + "epoch": 2.3903940886699506, + "grad_norm": 10.823301129205994, + "learning_rate": 1.211031871908916e-06, + "loss": 0.6235211491584778, + "step": 1941 + }, + { + "epoch": 2.3916256157635467, + "grad_norm": 9.073613663519735, + "learning_rate": 1.206359963568966e-06, + "loss": 0.2519042193889618, + "step": 1942 + }, + { + "epoch": 2.392857142857143, + "grad_norm": 9.128265200465231, + "learning_rate": 1.201695847690983e-06, + "loss": 0.3229137659072876, + "step": 1943 + }, + { + "epoch": 2.394088669950739, + "grad_norm": 11.336508477709275, + "learning_rate": 1.1970395338554642e-06, + "loss": 0.19324302673339844, + "step": 1944 + }, + { + "epoch": 2.395320197044335, + "grad_norm": 11.07861313896692, + "learning_rate": 1.1923910316268783e-06, + "loss": 0.6342459917068481, + "step": 1945 + }, + { + "epoch": 2.396551724137931, + "grad_norm": 11.018070634448504, + "learning_rate": 1.1877503505536453e-06, + "loss": 0.3010944724082947, + "step": 1946 + }, + { + "epoch": 2.397783251231527, + "grad_norm": 8.241609243061369, + "learning_rate": 1.183117500168125e-06, + "loss": 0.40499716997146606, + "step": 1947 + }, + { + "epoch": 2.399014778325123, + "grad_norm": 18.259844198245478, + "learning_rate": 1.1784924899865856e-06, + "loss": 0.9692997336387634, + "step": 1948 + }, + { + "epoch": 2.4002463054187193, + "grad_norm": 15.459619863404178, + "learning_rate": 1.1738753295091986e-06, + "loss": 0.3848229646682739, + "step": 1949 + }, + { + "epoch": 2.4014778325123154, + "grad_norm": 10.437656103417114, + "learning_rate": 1.169266028220004e-06, + "loss": 0.4472384750843048, + "step": 1950 + }, + { + "epoch": 2.4027093596059115, + "grad_norm": 8.14141154883163, + "learning_rate": 1.164664595586904e-06, + "loss": 0.21374854445457458, + "step": 1951 + }, + { + "epoch": 2.4039408866995076, + "grad_norm": 9.895182845073167, + "learning_rate": 1.1600710410616367e-06, + "loss": 0.4789981544017792, + "step": 1952 + }, + { + "epoch": 2.405172413793103, + "grad_norm": 14.330046153248214, + "learning_rate": 1.1554853740797556e-06, + "loss": 0.6235543489456177, + "step": 1953 + }, + { + "epoch": 2.4064039408866993, + "grad_norm": 11.28922905122106, + "learning_rate": 1.1509076040606127e-06, + "loss": 0.42575669288635254, + "step": 1954 + }, + { + "epoch": 2.4076354679802954, + "grad_norm": 10.213241448714898, + "learning_rate": 1.1463377404073433e-06, + "loss": 0.22154280543327332, + "step": 1955 + }, + { + "epoch": 2.4088669950738915, + "grad_norm": 9.867650979911392, + "learning_rate": 1.1417757925068362e-06, + "loss": 0.5722556114196777, + "step": 1956 + }, + { + "epoch": 2.4100985221674875, + "grad_norm": 7.554394124376038, + "learning_rate": 1.137221769729725e-06, + "loss": 0.6502832174301147, + "step": 1957 + }, + { + "epoch": 2.4113300492610836, + "grad_norm": 13.191804943156788, + "learning_rate": 1.132675681430364e-06, + "loss": 0.41717976331710815, + "step": 1958 + }, + { + "epoch": 2.4125615763546797, + "grad_norm": 12.040721504656855, + "learning_rate": 1.1281375369468078e-06, + "loss": 0.3705020248889923, + "step": 1959 + }, + { + "epoch": 2.413793103448276, + "grad_norm": 19.08924876929562, + "learning_rate": 1.1236073456007928e-06, + "loss": 0.8128242492675781, + "step": 1960 + }, + { + "epoch": 2.415024630541872, + "grad_norm": 16.296662141524465, + "learning_rate": 1.1190851166977218e-06, + "loss": 0.7350403070449829, + "step": 1961 + }, + { + "epoch": 2.416256157635468, + "grad_norm": 7.0582572680809195, + "learning_rate": 1.1145708595266418e-06, + "loss": 0.5837904214859009, + "step": 1962 + }, + { + "epoch": 2.417487684729064, + "grad_norm": 8.875645426047061, + "learning_rate": 1.1100645833602231e-06, + "loss": 0.436983585357666, + "step": 1963 + }, + { + "epoch": 2.41871921182266, + "grad_norm": 9.396076477777111, + "learning_rate": 1.105566297454742e-06, + "loss": 0.4708068370819092, + "step": 1964 + }, + { + "epoch": 2.4199507389162562, + "grad_norm": 12.540961285951255, + "learning_rate": 1.1010760110500652e-06, + "loss": 0.37972012162208557, + "step": 1965 + }, + { + "epoch": 2.4211822660098523, + "grad_norm": 9.511768233063343, + "learning_rate": 1.0965937333696264e-06, + "loss": 0.3167269229888916, + "step": 1966 + }, + { + "epoch": 2.4224137931034484, + "grad_norm": 8.997618711574894, + "learning_rate": 1.0921194736204066e-06, + "loss": 0.3407049775123596, + "step": 1967 + }, + { + "epoch": 2.4236453201970445, + "grad_norm": 26.50748327469745, + "learning_rate": 1.0876532409929208e-06, + "loss": 0.7673642635345459, + "step": 1968 + }, + { + "epoch": 2.4248768472906406, + "grad_norm": 7.428296790887836, + "learning_rate": 1.083195044661195e-06, + "loss": 0.3029213845729828, + "step": 1969 + }, + { + "epoch": 2.4261083743842367, + "grad_norm": 16.297521234369484, + "learning_rate": 1.0787448937827428e-06, + "loss": 0.5143488049507141, + "step": 1970 + }, + { + "epoch": 2.4273399014778327, + "grad_norm": 9.838022492363262, + "learning_rate": 1.0743027974985576e-06, + "loss": 0.5086369514465332, + "step": 1971 + }, + { + "epoch": 2.4285714285714284, + "grad_norm": 11.760234490761677, + "learning_rate": 1.069868764933088e-06, + "loss": 0.7999781966209412, + "step": 1972 + }, + { + "epoch": 2.4298029556650245, + "grad_norm": 8.348930224912683, + "learning_rate": 1.065442805194214e-06, + "loss": 0.2686223089694977, + "step": 1973 + }, + { + "epoch": 2.4310344827586206, + "grad_norm": 10.189321214439989, + "learning_rate": 1.0610249273732393e-06, + "loss": 0.2520446181297302, + "step": 1974 + }, + { + "epoch": 2.4322660098522166, + "grad_norm": 11.006280468973555, + "learning_rate": 1.056615140544861e-06, + "loss": 0.28887757658958435, + "step": 1975 + }, + { + "epoch": 2.4334975369458127, + "grad_norm": 17.908792965669562, + "learning_rate": 1.0522134537671625e-06, + "loss": 0.3709273338317871, + "step": 1976 + }, + { + "epoch": 2.434729064039409, + "grad_norm": 8.261377574040777, + "learning_rate": 1.0478198760815833e-06, + "loss": 0.6718100309371948, + "step": 1977 + }, + { + "epoch": 2.435960591133005, + "grad_norm": 8.787835782948932, + "learning_rate": 1.0434344165129095e-06, + "loss": 0.17143529653549194, + "step": 1978 + }, + { + "epoch": 2.437192118226601, + "grad_norm": 15.115289039167425, + "learning_rate": 1.0390570840692527e-06, + "loss": 0.7128796577453613, + "step": 1979 + }, + { + "epoch": 2.438423645320197, + "grad_norm": 13.46718512167487, + "learning_rate": 1.034687887742028e-06, + "loss": 0.24575555324554443, + "step": 1980 + }, + { + "epoch": 2.439655172413793, + "grad_norm": 15.637303471440513, + "learning_rate": 1.0303268365059383e-06, + "loss": 0.5631250739097595, + "step": 1981 + }, + { + "epoch": 2.4408866995073892, + "grad_norm": 10.921107789227744, + "learning_rate": 1.0259739393189573e-06, + "loss": 0.3094029128551483, + "step": 1982 + }, + { + "epoch": 2.4421182266009853, + "grad_norm": 9.876371637108129, + "learning_rate": 1.021629205122311e-06, + "loss": 0.4754146635532379, + "step": 1983 + }, + { + "epoch": 2.4433497536945814, + "grad_norm": 11.197843935010443, + "learning_rate": 1.0172926428404527e-06, + "loss": 0.18599992990493774, + "step": 1984 + }, + { + "epoch": 2.4445812807881775, + "grad_norm": 11.60242134696919, + "learning_rate": 1.0129642613810576e-06, + "loss": 0.3831806480884552, + "step": 1985 + }, + { + "epoch": 2.4458128078817736, + "grad_norm": 10.915359357263476, + "learning_rate": 1.008644069634989e-06, + "loss": 0.7717353105545044, + "step": 1986 + }, + { + "epoch": 2.447044334975369, + "grad_norm": 16.40151326361354, + "learning_rate": 1.0043320764762915e-06, + "loss": 0.3248934745788574, + "step": 1987 + }, + { + "epoch": 2.4482758620689653, + "grad_norm": 7.869645643343828, + "learning_rate": 1.0000282907621694e-06, + "loss": 0.27836111187934875, + "step": 1988 + }, + { + "epoch": 2.4495073891625614, + "grad_norm": 10.609052698858209, + "learning_rate": 9.957327213329687e-07, + "loss": 0.20251630246639252, + "step": 1989 + }, + { + "epoch": 2.4507389162561575, + "grad_norm": 15.802681481740834, + "learning_rate": 9.914453770121557e-07, + "loss": 0.6009274125099182, + "step": 1990 + }, + { + "epoch": 2.4519704433497536, + "grad_norm": 12.5975867275524, + "learning_rate": 9.871662666063054e-07, + "loss": 0.3312684893608093, + "step": 1991 + }, + { + "epoch": 2.4532019704433496, + "grad_norm": 11.710094793009787, + "learning_rate": 9.828953989050744e-07, + "loss": 0.38521629571914673, + "step": 1992 + }, + { + "epoch": 2.4544334975369457, + "grad_norm": 7.249324950790913, + "learning_rate": 9.786327826811942e-07, + "loss": 0.2508774995803833, + "step": 1993 + }, + { + "epoch": 2.455665024630542, + "grad_norm": 9.220463260574913, + "learning_rate": 9.743784266904422e-07, + "loss": 0.36097291111946106, + "step": 1994 + }, + { + "epoch": 2.456896551724138, + "grad_norm": 22.22398053360695, + "learning_rate": 9.701323396716312e-07, + "loss": 0.6703237295150757, + "step": 1995 + }, + { + "epoch": 2.458128078817734, + "grad_norm": 10.185390156514575, + "learning_rate": 9.6589453034659e-07, + "loss": 0.9553302526473999, + "step": 1996 + }, + { + "epoch": 2.45935960591133, + "grad_norm": 10.103225854124274, + "learning_rate": 9.616650074201383e-07, + "loss": 0.3288821578025818, + "step": 1997 + }, + { + "epoch": 2.460591133004926, + "grad_norm": 9.00369401838797, + "learning_rate": 9.574437795800806e-07, + "loss": 0.3195754885673523, + "step": 1998 + }, + { + "epoch": 2.4618226600985222, + "grad_norm": 15.805795563779297, + "learning_rate": 9.532308554971831e-07, + "loss": 0.26505401730537415, + "step": 1999 + }, + { + "epoch": 2.4630541871921183, + "grad_norm": 11.25947467258853, + "learning_rate": 9.490262438251496e-07, + "loss": 0.43558627367019653, + "step": 2000 + }, + { + "epoch": 2.4642857142857144, + "grad_norm": 10.457734518302678, + "learning_rate": 9.44829953200615e-07, + "loss": 0.3582439720630646, + "step": 2001 + }, + { + "epoch": 2.4655172413793105, + "grad_norm": 12.231152863168465, + "learning_rate": 9.406419922431214e-07, + "loss": 0.7142423987388611, + "step": 2002 + }, + { + "epoch": 2.4667487684729066, + "grad_norm": 12.479544686562418, + "learning_rate": 9.364623695550979e-07, + "loss": 0.24947094917297363, + "step": 2003 + }, + { + "epoch": 2.4679802955665027, + "grad_norm": 16.323337348543824, + "learning_rate": 9.322910937218471e-07, + "loss": 1.0376765727996826, + "step": 2004 + }, + { + "epoch": 2.4692118226600988, + "grad_norm": 12.025786233159009, + "learning_rate": 9.281281733115288e-07, + "loss": 0.39291733503341675, + "step": 2005 + }, + { + "epoch": 2.4704433497536944, + "grad_norm": 15.526509163555014, + "learning_rate": 9.239736168751395e-07, + "loss": 1.1038362979888916, + "step": 2006 + }, + { + "epoch": 2.4716748768472905, + "grad_norm": 10.027251067087649, + "learning_rate": 9.198274329464929e-07, + "loss": 0.8542830944061279, + "step": 2007 + }, + { + "epoch": 2.4729064039408866, + "grad_norm": 20.306111450694207, + "learning_rate": 9.156896300422053e-07, + "loss": 0.807994544506073, + "step": 2008 + }, + { + "epoch": 2.4741379310344827, + "grad_norm": 5.653479787843331, + "learning_rate": 9.115602166616805e-07, + "loss": 0.17016081511974335, + "step": 2009 + }, + { + "epoch": 2.4753694581280787, + "grad_norm": 11.492766886926658, + "learning_rate": 9.07439201287088e-07, + "loss": 0.7831156849861145, + "step": 2010 + }, + { + "epoch": 2.476600985221675, + "grad_norm": 9.3732349373237, + "learning_rate": 9.033265923833446e-07, + "loss": 0.5146660804748535, + "step": 2011 + }, + { + "epoch": 2.477832512315271, + "grad_norm": 13.78559435557381, + "learning_rate": 8.992223983981035e-07, + "loss": 0.5641926527023315, + "step": 2012 + }, + { + "epoch": 2.479064039408867, + "grad_norm": 7.867545716232377, + "learning_rate": 8.951266277617326e-07, + "loss": 0.2155514359474182, + "step": 2013 + }, + { + "epoch": 2.480295566502463, + "grad_norm": 11.172087233714553, + "learning_rate": 8.91039288887292e-07, + "loss": 0.28125351667404175, + "step": 2014 + }, + { + "epoch": 2.481527093596059, + "grad_norm": 10.827596711387834, + "learning_rate": 8.869603901705287e-07, + "loss": 0.5349509716033936, + "step": 2015 + }, + { + "epoch": 2.4827586206896552, + "grad_norm": 10.652684351436065, + "learning_rate": 8.82889939989851e-07, + "loss": 0.43747422099113464, + "step": 2016 + }, + { + "epoch": 2.4839901477832513, + "grad_norm": 8.656359342370678, + "learning_rate": 8.78827946706311e-07, + "loss": 0.4629102647304535, + "step": 2017 + }, + { + "epoch": 2.4852216748768474, + "grad_norm": 9.302169561481923, + "learning_rate": 8.747744186635932e-07, + "loss": 0.41271477937698364, + "step": 2018 + }, + { + "epoch": 2.4864532019704435, + "grad_norm": 7.585718354318216, + "learning_rate": 8.707293641879888e-07, + "loss": 0.27247580885887146, + "step": 2019 + }, + { + "epoch": 2.4876847290640396, + "grad_norm": 11.7662978456361, + "learning_rate": 8.666927915883905e-07, + "loss": 1.4255273342132568, + "step": 2020 + }, + { + "epoch": 2.4889162561576352, + "grad_norm": 12.62783666106837, + "learning_rate": 8.626647091562612e-07, + "loss": 0.8762021660804749, + "step": 2021 + }, + { + "epoch": 2.4901477832512313, + "grad_norm": 7.781392053224673, + "learning_rate": 8.586451251656286e-07, + "loss": 0.43475109338760376, + "step": 2022 + }, + { + "epoch": 2.4913793103448274, + "grad_norm": 8.647004326334777, + "learning_rate": 8.546340478730647e-07, + "loss": 0.16091346740722656, + "step": 2023 + }, + { + "epoch": 2.4926108374384235, + "grad_norm": 10.050856051691818, + "learning_rate": 8.506314855176651e-07, + "loss": 0.491144061088562, + "step": 2024 + }, + { + "epoch": 2.4938423645320196, + "grad_norm": 15.049291696206959, + "learning_rate": 8.466374463210348e-07, + "loss": 0.792976438999176, + "step": 2025 + }, + { + "epoch": 2.4950738916256157, + "grad_norm": 13.192276803646186, + "learning_rate": 8.426519384872733e-07, + "loss": 0.8023815155029297, + "step": 2026 + }, + { + "epoch": 2.4963054187192117, + "grad_norm": 10.183319190154988, + "learning_rate": 8.386749702029578e-07, + "loss": 0.7008549571037292, + "step": 2027 + }, + { + "epoch": 2.497536945812808, + "grad_norm": 9.306826775675583, + "learning_rate": 8.347065496371193e-07, + "loss": 0.3158326745033264, + "step": 2028 + }, + { + "epoch": 2.498768472906404, + "grad_norm": 11.439845656368037, + "learning_rate": 8.307466849412365e-07, + "loss": 0.4847475588321686, + "step": 2029 + }, + { + "epoch": 2.5, + "grad_norm": 8.392845077442193, + "learning_rate": 8.2679538424921e-07, + "loss": 0.42490729689598083, + "step": 2030 + }, + { + "epoch": 2.501231527093596, + "grad_norm": 8.86668163556195, + "learning_rate": 8.228526556773486e-07, + "loss": 0.4303053021430969, + "step": 2031 + }, + { + "epoch": 2.502463054187192, + "grad_norm": 9.647239720582808, + "learning_rate": 8.18918507324356e-07, + "loss": 0.20669305324554443, + "step": 2032 + }, + { + "epoch": 2.5036945812807883, + "grad_norm": 14.868819185388821, + "learning_rate": 8.149929472713126e-07, + "loss": 0.4146193265914917, + "step": 2033 + }, + { + "epoch": 2.5049261083743843, + "grad_norm": 8.521845217294674, + "learning_rate": 8.110759835816518e-07, + "loss": 0.2852465510368347, + "step": 2034 + }, + { + "epoch": 2.5061576354679804, + "grad_norm": 9.65764576867383, + "learning_rate": 8.071676243011556e-07, + "loss": 0.5811144113540649, + "step": 2035 + }, + { + "epoch": 2.5073891625615765, + "grad_norm": 13.619550034189677, + "learning_rate": 8.032678774579272e-07, + "loss": 0.6767745614051819, + "step": 2036 + }, + { + "epoch": 2.5086206896551726, + "grad_norm": 10.986185907881213, + "learning_rate": 7.993767510623834e-07, + "loss": 0.5063849687576294, + "step": 2037 + }, + { + "epoch": 2.5098522167487687, + "grad_norm": 11.539593137413142, + "learning_rate": 7.954942531072285e-07, + "loss": 0.534786581993103, + "step": 2038 + }, + { + "epoch": 2.5110837438423648, + "grad_norm": 12.505177711554532, + "learning_rate": 7.91620391567448e-07, + "loss": 0.45122361183166504, + "step": 2039 + }, + { + "epoch": 2.512315270935961, + "grad_norm": 8.839741542848381, + "learning_rate": 7.877551744002881e-07, + "loss": 0.2832280099391937, + "step": 2040 + }, + { + "epoch": 2.5135467980295565, + "grad_norm": 11.718433441522615, + "learning_rate": 7.838986095452311e-07, + "loss": 0.8926963806152344, + "step": 2041 + }, + { + "epoch": 2.5147783251231526, + "grad_norm": 9.73145152883671, + "learning_rate": 7.800507049239947e-07, + "loss": 0.9263632893562317, + "step": 2042 + }, + { + "epoch": 2.5160098522167487, + "grad_norm": 16.48224794173804, + "learning_rate": 7.762114684405064e-07, + "loss": 0.3994196653366089, + "step": 2043 + }, + { + "epoch": 2.5172413793103448, + "grad_norm": 10.084446546675132, + "learning_rate": 7.723809079808842e-07, + "loss": 0.3273079991340637, + "step": 2044 + }, + { + "epoch": 2.518472906403941, + "grad_norm": 19.899209678081235, + "learning_rate": 7.685590314134294e-07, + "loss": 0.4566258192062378, + "step": 2045 + }, + { + "epoch": 2.519704433497537, + "grad_norm": 16.13317422246351, + "learning_rate": 7.647458465886055e-07, + "loss": 0.4199177026748657, + "step": 2046 + }, + { + "epoch": 2.520935960591133, + "grad_norm": 7.584665550484686, + "learning_rate": 7.609413613390199e-07, + "loss": 0.2789694666862488, + "step": 2047 + }, + { + "epoch": 2.522167487684729, + "grad_norm": 12.08003380462593, + "learning_rate": 7.571455834794095e-07, + "loss": 0.39359426498413086, + "step": 2048 + }, + { + "epoch": 2.523399014778325, + "grad_norm": 16.766513036441403, + "learning_rate": 7.533585208066302e-07, + "loss": 0.38510677218437195, + "step": 2049 + }, + { + "epoch": 2.5246305418719213, + "grad_norm": 14.332573036568608, + "learning_rate": 7.495801810996334e-07, + "loss": 1.0861276388168335, + "step": 2050 + }, + { + "epoch": 2.5258620689655173, + "grad_norm": 13.180696978229305, + "learning_rate": 7.458105721194525e-07, + "loss": 0.35866010189056396, + "step": 2051 + }, + { + "epoch": 2.5270935960591134, + "grad_norm": 8.80983116890946, + "learning_rate": 7.420497016091866e-07, + "loss": 0.3436219394207001, + "step": 2052 + }, + { + "epoch": 2.5283251231527095, + "grad_norm": 12.383092324048317, + "learning_rate": 7.382975772939866e-07, + "loss": 0.3687105178833008, + "step": 2053 + }, + { + "epoch": 2.529556650246305, + "grad_norm": 8.240739854437226, + "learning_rate": 7.34554206881039e-07, + "loss": 0.32671070098876953, + "step": 2054 + }, + { + "epoch": 2.5307881773399012, + "grad_norm": 11.575392957436732, + "learning_rate": 7.308195980595462e-07, + "loss": 0.7302184104919434, + "step": 2055 + }, + { + "epoch": 2.5320197044334973, + "grad_norm": 13.7288446044892, + "learning_rate": 7.270937585007149e-07, + "loss": 0.7430564761161804, + "step": 2056 + }, + { + "epoch": 2.5332512315270934, + "grad_norm": 8.666358783874388, + "learning_rate": 7.233766958577421e-07, + "loss": 0.305151104927063, + "step": 2057 + }, + { + "epoch": 2.5344827586206895, + "grad_norm": 17.881705697560324, + "learning_rate": 7.196684177657887e-07, + "loss": 0.4311235547065735, + "step": 2058 + }, + { + "epoch": 2.5357142857142856, + "grad_norm": 13.989195036115625, + "learning_rate": 7.159689318419777e-07, + "loss": 0.29697108268737793, + "step": 2059 + }, + { + "epoch": 2.5369458128078817, + "grad_norm": 10.004375359602093, + "learning_rate": 7.122782456853722e-07, + "loss": 0.5012999176979065, + "step": 2060 + }, + { + "epoch": 2.5381773399014778, + "grad_norm": 10.441122865704237, + "learning_rate": 7.085963668769552e-07, + "loss": 0.24754227697849274, + "step": 2061 + }, + { + "epoch": 2.539408866995074, + "grad_norm": 7.415294238465162, + "learning_rate": 7.049233029796243e-07, + "loss": 0.1311894953250885, + "step": 2062 + }, + { + "epoch": 2.54064039408867, + "grad_norm": 11.745936375906483, + "learning_rate": 7.012590615381654e-07, + "loss": 0.3458009958267212, + "step": 2063 + }, + { + "epoch": 2.541871921182266, + "grad_norm": 19.579629082198277, + "learning_rate": 6.976036500792466e-07, + "loss": 0.6216360330581665, + "step": 2064 + }, + { + "epoch": 2.543103448275862, + "grad_norm": 17.511409594621433, + "learning_rate": 6.939570761113939e-07, + "loss": 0.41114604473114014, + "step": 2065 + }, + { + "epoch": 2.544334975369458, + "grad_norm": 12.769592062525021, + "learning_rate": 6.903193471249853e-07, + "loss": 0.35362619161605835, + "step": 2066 + }, + { + "epoch": 2.5455665024630543, + "grad_norm": 15.37068507816602, + "learning_rate": 6.866904705922284e-07, + "loss": 1.7280857563018799, + "step": 2067 + }, + { + "epoch": 2.5467980295566504, + "grad_norm": 12.864848425460373, + "learning_rate": 6.830704539671462e-07, + "loss": 1.3645777702331543, + "step": 2068 + }, + { + "epoch": 2.5480295566502464, + "grad_norm": 8.663375537691056, + "learning_rate": 6.794593046855613e-07, + "loss": 0.46488872170448303, + "step": 2069 + }, + { + "epoch": 2.5492610837438425, + "grad_norm": 11.746641376676559, + "learning_rate": 6.758570301650869e-07, + "loss": 0.9913250803947449, + "step": 2070 + }, + { + "epoch": 2.5504926108374386, + "grad_norm": 14.714182423444447, + "learning_rate": 6.722636378051011e-07, + "loss": 0.8180273771286011, + "step": 2071 + }, + { + "epoch": 2.5517241379310347, + "grad_norm": 7.848050259431333, + "learning_rate": 6.686791349867422e-07, + "loss": 0.5234679579734802, + "step": 2072 + }, + { + "epoch": 2.552955665024631, + "grad_norm": 6.903410737354236, + "learning_rate": 6.651035290728858e-07, + "loss": 0.08975313603878021, + "step": 2073 + }, + { + "epoch": 2.554187192118227, + "grad_norm": 11.27527783341364, + "learning_rate": 6.615368274081335e-07, + "loss": 0.35545456409454346, + "step": 2074 + }, + { + "epoch": 2.5554187192118225, + "grad_norm": 11.726857926860664, + "learning_rate": 6.579790373187944e-07, + "loss": 1.192006230354309, + "step": 2075 + }, + { + "epoch": 2.5566502463054186, + "grad_norm": 18.37387229568444, + "learning_rate": 6.54430166112876e-07, + "loss": 0.35069915652275085, + "step": 2076 + }, + { + "epoch": 2.5578817733990147, + "grad_norm": 9.620718531681447, + "learning_rate": 6.508902210800649e-07, + "loss": 0.20691820979118347, + "step": 2077 + }, + { + "epoch": 2.5591133004926108, + "grad_norm": 16.343394062782135, + "learning_rate": 6.473592094917092e-07, + "loss": 0.4561042785644531, + "step": 2078 + }, + { + "epoch": 2.560344827586207, + "grad_norm": 11.889860706895831, + "learning_rate": 6.43837138600813e-07, + "loss": 0.32198822498321533, + "step": 2079 + }, + { + "epoch": 2.561576354679803, + "grad_norm": 10.519181625251578, + "learning_rate": 6.403240156420087e-07, + "loss": 0.35681653022766113, + "step": 2080 + }, + { + "epoch": 2.562807881773399, + "grad_norm": 9.426944191114051, + "learning_rate": 6.36819847831554e-07, + "loss": 0.5826268196105957, + "step": 2081 + }, + { + "epoch": 2.564039408866995, + "grad_norm": 10.18400417142911, + "learning_rate": 6.333246423673096e-07, + "loss": 0.23084279894828796, + "step": 2082 + }, + { + "epoch": 2.565270935960591, + "grad_norm": 8.146966381833735, + "learning_rate": 6.298384064287261e-07, + "loss": 0.5527750253677368, + "step": 2083 + }, + { + "epoch": 2.5665024630541873, + "grad_norm": 7.581778739386861, + "learning_rate": 6.263611471768349e-07, + "loss": 0.4125085175037384, + "step": 2084 + }, + { + "epoch": 2.5677339901477834, + "grad_norm": 9.31385960486644, + "learning_rate": 6.228928717542205e-07, + "loss": 0.37431174516677856, + "step": 2085 + }, + { + "epoch": 2.5689655172413794, + "grad_norm": 9.72676402112677, + "learning_rate": 6.194335872850188e-07, + "loss": 0.17119471728801727, + "step": 2086 + }, + { + "epoch": 2.5701970443349755, + "grad_norm": 11.790310632986847, + "learning_rate": 6.159833008748988e-07, + "loss": 0.9465748071670532, + "step": 2087 + }, + { + "epoch": 2.571428571428571, + "grad_norm": 25.018614409312026, + "learning_rate": 6.125420196110426e-07, + "loss": 0.48980847001075745, + "step": 2088 + }, + { + "epoch": 2.5726600985221673, + "grad_norm": 8.85280166601153, + "learning_rate": 6.091097505621374e-07, + "loss": 0.7195557951927185, + "step": 2089 + }, + { + "epoch": 2.5738916256157633, + "grad_norm": 12.112085029881426, + "learning_rate": 6.056865007783602e-07, + "loss": 1.83125638961792, + "step": 2090 + }, + { + "epoch": 2.5751231527093594, + "grad_norm": 9.94028667902401, + "learning_rate": 6.022722772913581e-07, + "loss": 0.3298517167568207, + "step": 2091 + }, + { + "epoch": 2.5763546798029555, + "grad_norm": 11.18503180129702, + "learning_rate": 5.988670871142377e-07, + "loss": 0.47125905752182007, + "step": 2092 + }, + { + "epoch": 2.5775862068965516, + "grad_norm": 9.413844300619951, + "learning_rate": 5.954709372415524e-07, + "loss": 0.288496196269989, + "step": 2093 + }, + { + "epoch": 2.5788177339901477, + "grad_norm": 7.1811144983138675, + "learning_rate": 5.920838346492874e-07, + "loss": 0.3627285957336426, + "step": 2094 + }, + { + "epoch": 2.5800492610837438, + "grad_norm": 14.830294096591077, + "learning_rate": 5.887057862948403e-07, + "loss": 0.7072806358337402, + "step": 2095 + }, + { + "epoch": 2.58128078817734, + "grad_norm": 10.644924386002677, + "learning_rate": 5.853367991170106e-07, + "loss": 0.3386034071445465, + "step": 2096 + }, + { + "epoch": 2.582512315270936, + "grad_norm": 14.094564220777247, + "learning_rate": 5.819768800359882e-07, + "loss": 0.4901737570762634, + "step": 2097 + }, + { + "epoch": 2.583743842364532, + "grad_norm": 10.630160715256755, + "learning_rate": 5.786260359533369e-07, + "loss": 1.683629035949707, + "step": 2098 + }, + { + "epoch": 2.584975369458128, + "grad_norm": 8.221455266315619, + "learning_rate": 5.752842737519743e-07, + "loss": 0.4275779128074646, + "step": 2099 + }, + { + "epoch": 2.586206896551724, + "grad_norm": 8.989808316079593, + "learning_rate": 5.7195160029617e-07, + "loss": 0.6892256736755371, + "step": 2100 + }, + { + "epoch": 2.5874384236453203, + "grad_norm": 10.390493407130242, + "learning_rate": 5.686280224315189e-07, + "loss": 0.6548988819122314, + "step": 2101 + }, + { + "epoch": 2.5886699507389164, + "grad_norm": 8.365114703591324, + "learning_rate": 5.653135469849347e-07, + "loss": 0.4431142807006836, + "step": 2102 + }, + { + "epoch": 2.5899014778325125, + "grad_norm": 20.296284889046316, + "learning_rate": 5.62008180764635e-07, + "loss": 0.5730191469192505, + "step": 2103 + }, + { + "epoch": 2.5911330049261085, + "grad_norm": 7.886033521206941, + "learning_rate": 5.587119305601263e-07, + "loss": 0.8734421730041504, + "step": 2104 + }, + { + "epoch": 2.5923645320197046, + "grad_norm": 7.851476190792639, + "learning_rate": 5.554248031421872e-07, + "loss": 0.30810514092445374, + "step": 2105 + }, + { + "epoch": 2.5935960591133007, + "grad_norm": 10.114012805058133, + "learning_rate": 5.521468052628615e-07, + "loss": 0.5941227078437805, + "step": 2106 + }, + { + "epoch": 2.594827586206897, + "grad_norm": 11.5276807645432, + "learning_rate": 5.488779436554359e-07, + "loss": 0.32648181915283203, + "step": 2107 + }, + { + "epoch": 2.596059113300493, + "grad_norm": 12.384461199116616, + "learning_rate": 5.456182250344349e-07, + "loss": 0.2934610843658447, + "step": 2108 + }, + { + "epoch": 2.5972906403940885, + "grad_norm": 9.420595645239136, + "learning_rate": 5.423676560955976e-07, + "loss": 0.20387941598892212, + "step": 2109 + }, + { + "epoch": 2.5985221674876846, + "grad_norm": 10.459297088933635, + "learning_rate": 5.391262435158722e-07, + "loss": 0.6115235090255737, + "step": 2110 + }, + { + "epoch": 2.5997536945812807, + "grad_norm": 13.891885044549888, + "learning_rate": 5.358939939534002e-07, + "loss": 0.45280611515045166, + "step": 2111 + }, + { + "epoch": 2.600985221674877, + "grad_norm": 8.172861215602202, + "learning_rate": 5.326709140474962e-07, + "loss": 0.29169538617134094, + "step": 2112 + }, + { + "epoch": 2.602216748768473, + "grad_norm": 6.844042320685791, + "learning_rate": 5.294570104186436e-07, + "loss": 0.4924798011779785, + "step": 2113 + }, + { + "epoch": 2.603448275862069, + "grad_norm": 12.392169249298135, + "learning_rate": 5.262522896684774e-07, + "loss": 0.6751348376274109, + "step": 2114 + }, + { + "epoch": 2.604679802955665, + "grad_norm": 13.993739996881734, + "learning_rate": 5.230567583797674e-07, + "loss": 0.6676002740859985, + "step": 2115 + }, + { + "epoch": 2.605911330049261, + "grad_norm": 12.746427038097593, + "learning_rate": 5.198704231164093e-07, + "loss": 0.3112475275993347, + "step": 2116 + }, + { + "epoch": 2.607142857142857, + "grad_norm": 9.88854663199865, + "learning_rate": 5.166932904234101e-07, + "loss": 0.5024739503860474, + "step": 2117 + }, + { + "epoch": 2.6083743842364533, + "grad_norm": 18.4856178419616, + "learning_rate": 5.135253668268724e-07, + "loss": 2.6769824028015137, + "step": 2118 + }, + { + "epoch": 2.6096059113300494, + "grad_norm": 12.280278924091732, + "learning_rate": 5.103666588339812e-07, + "loss": 0.4120222330093384, + "step": 2119 + }, + { + "epoch": 2.6108374384236455, + "grad_norm": 8.106704210398478, + "learning_rate": 5.072171729329944e-07, + "loss": 0.3238741457462311, + "step": 2120 + }, + { + "epoch": 2.612068965517241, + "grad_norm": 9.476233543897594, + "learning_rate": 5.040769155932285e-07, + "loss": 0.41853106021881104, + "step": 2121 + }, + { + "epoch": 2.613300492610837, + "grad_norm": 9.382868411266552, + "learning_rate": 5.00945893265039e-07, + "loss": 0.5511228442192078, + "step": 2122 + }, + { + "epoch": 2.6145320197044333, + "grad_norm": 10.011756541997418, + "learning_rate": 4.978241123798133e-07, + "loss": 0.6076939105987549, + "step": 2123 + }, + { + "epoch": 2.6157635467980294, + "grad_norm": 11.969458383094386, + "learning_rate": 4.94711579349959e-07, + "loss": 0.32137832045555115, + "step": 2124 + }, + { + "epoch": 2.6169950738916254, + "grad_norm": 9.120309940189742, + "learning_rate": 4.916083005688865e-07, + "loss": 0.2919730246067047, + "step": 2125 + }, + { + "epoch": 2.6182266009852215, + "grad_norm": 11.012298283555321, + "learning_rate": 4.885142824109946e-07, + "loss": 0.3521897792816162, + "step": 2126 + }, + { + "epoch": 2.6194581280788176, + "grad_norm": 10.719771585992975, + "learning_rate": 4.85429531231662e-07, + "loss": 0.5645777583122253, + "step": 2127 + }, + { + "epoch": 2.6206896551724137, + "grad_norm": 8.564760545887571, + "learning_rate": 4.823540533672355e-07, + "loss": 0.21364668011665344, + "step": 2128 + }, + { + "epoch": 2.62192118226601, + "grad_norm": 10.461100625681352, + "learning_rate": 4.792878551350055e-07, + "loss": 0.3472633957862854, + "step": 2129 + }, + { + "epoch": 2.623152709359606, + "grad_norm": 7.7796379590314295, + "learning_rate": 4.7623094283320905e-07, + "loss": 0.2312706857919693, + "step": 2130 + }, + { + "epoch": 2.624384236453202, + "grad_norm": 10.908716191951015, + "learning_rate": 4.7318332274100595e-07, + "loss": 0.4227292835712433, + "step": 2131 + }, + { + "epoch": 2.625615763546798, + "grad_norm": 11.077941430018797, + "learning_rate": 4.701450011184677e-07, + "loss": 0.4835679531097412, + "step": 2132 + }, + { + "epoch": 2.626847290640394, + "grad_norm": 8.011667181424437, + "learning_rate": 4.671159842065698e-07, + "loss": 0.30153489112854004, + "step": 2133 + }, + { + "epoch": 2.62807881773399, + "grad_norm": 9.961423240887521, + "learning_rate": 4.640962782271707e-07, + "loss": 0.19820570945739746, + "step": 2134 + }, + { + "epoch": 2.6293103448275863, + "grad_norm": 18.168474918209572, + "learning_rate": 4.6108588938300725e-07, + "loss": 0.5798308253288269, + "step": 2135 + }, + { + "epoch": 2.6305418719211824, + "grad_norm": 14.982461578988175, + "learning_rate": 4.5808482385767407e-07, + "loss": 0.4840395450592041, + "step": 2136 + }, + { + "epoch": 2.6317733990147785, + "grad_norm": 12.540506897781501, + "learning_rate": 4.5509308781561846e-07, + "loss": 0.33036884665489197, + "step": 2137 + }, + { + "epoch": 2.6330049261083746, + "grad_norm": 10.69964555424519, + "learning_rate": 4.521106874021242e-07, + "loss": 0.4032250642776489, + "step": 2138 + }, + { + "epoch": 2.6342364532019706, + "grad_norm": 10.190070867602095, + "learning_rate": 4.4913762874329527e-07, + "loss": 0.5196541547775269, + "step": 2139 + }, + { + "epoch": 2.6354679802955667, + "grad_norm": 15.414254295489695, + "learning_rate": 4.4617391794604946e-07, + "loss": 0.5049697160720825, + "step": 2140 + }, + { + "epoch": 2.636699507389163, + "grad_norm": 11.232489708483897, + "learning_rate": 4.4321956109810327e-07, + "loss": 0.6910302639007568, + "step": 2141 + }, + { + "epoch": 2.637931034482759, + "grad_norm": 17.874353794074672, + "learning_rate": 4.4027456426796014e-07, + "loss": 0.8860565423965454, + "step": 2142 + }, + { + "epoch": 2.6391625615763545, + "grad_norm": 8.315561152824909, + "learning_rate": 4.3733893350489386e-07, + "loss": 0.3347795307636261, + "step": 2143 + }, + { + "epoch": 2.6403940886699506, + "grad_norm": 8.406655821874109, + "learning_rate": 4.344126748389438e-07, + "loss": 0.5979218482971191, + "step": 2144 + }, + { + "epoch": 2.6416256157635467, + "grad_norm": 10.633642678256232, + "learning_rate": 4.314957942808956e-07, + "loss": 0.6724722385406494, + "step": 2145 + }, + { + "epoch": 2.642857142857143, + "grad_norm": 11.37770126439957, + "learning_rate": 4.2858829782227107e-07, + "loss": 0.23655423521995544, + "step": 2146 + }, + { + "epoch": 2.644088669950739, + "grad_norm": 13.564798867932334, + "learning_rate": 4.2569019143531845e-07, + "loss": 0.7535929679870605, + "step": 2147 + }, + { + "epoch": 2.645320197044335, + "grad_norm": 7.225057762729149, + "learning_rate": 4.228014810729963e-07, + "loss": 0.5065590143203735, + "step": 2148 + }, + { + "epoch": 2.646551724137931, + "grad_norm": 11.646047154930116, + "learning_rate": 4.199221726689634e-07, + "loss": 0.8232078552246094, + "step": 2149 + }, + { + "epoch": 2.647783251231527, + "grad_norm": 12.627075206048184, + "learning_rate": 4.170522721375669e-07, + "loss": 0.3928985595703125, + "step": 2150 + }, + { + "epoch": 2.649014778325123, + "grad_norm": 11.823044988035218, + "learning_rate": 4.1419178537382756e-07, + "loss": 0.6924771070480347, + "step": 2151 + }, + { + "epoch": 2.6502463054187193, + "grad_norm": 8.99171598727701, + "learning_rate": 4.1134071825343124e-07, + "loss": 0.3323458135128021, + "step": 2152 + }, + { + "epoch": 2.6514778325123154, + "grad_norm": 8.020309669901565, + "learning_rate": 4.0849907663271346e-07, + "loss": 0.6068896651268005, + "step": 2153 + }, + { + "epoch": 2.6527093596059115, + "grad_norm": 9.698785865473045, + "learning_rate": 4.0566686634865016e-07, + "loss": 0.2112211287021637, + "step": 2154 + }, + { + "epoch": 2.653940886699507, + "grad_norm": 8.70939943207942, + "learning_rate": 4.028440932188465e-07, + "loss": 0.3340219259262085, + "step": 2155 + }, + { + "epoch": 2.655172413793103, + "grad_norm": 16.06563756982883, + "learning_rate": 4.0003076304151624e-07, + "loss": 0.4172120690345764, + "step": 2156 + }, + { + "epoch": 2.6564039408866993, + "grad_norm": 10.448504154619048, + "learning_rate": 3.972268815954833e-07, + "loss": 0.3891775608062744, + "step": 2157 + }, + { + "epoch": 2.6576354679802954, + "grad_norm": 14.733135115767965, + "learning_rate": 3.944324546401607e-07, + "loss": 0.4906957149505615, + "step": 2158 + }, + { + "epoch": 2.6588669950738915, + "grad_norm": 9.613272858024363, + "learning_rate": 3.916474879155402e-07, + "loss": 0.8216167688369751, + "step": 2159 + }, + { + "epoch": 2.6600985221674875, + "grad_norm": 10.257611413751764, + "learning_rate": 3.8887198714218255e-07, + "loss": 0.2030409872531891, + "step": 2160 + }, + { + "epoch": 2.6613300492610836, + "grad_norm": 7.648297896745766, + "learning_rate": 3.8610595802120564e-07, + "loss": 0.24565047025680542, + "step": 2161 + }, + { + "epoch": 2.6625615763546797, + "grad_norm": 10.822762486642535, + "learning_rate": 3.833494062342691e-07, + "loss": 0.3111516833305359, + "step": 2162 + }, + { + "epoch": 2.663793103448276, + "grad_norm": 7.318326050197103, + "learning_rate": 3.8060233744356634e-07, + "loss": 0.32978883385658264, + "step": 2163 + }, + { + "epoch": 2.665024630541872, + "grad_norm": 12.599543466460439, + "learning_rate": 3.7786475729181314e-07, + "loss": 0.5468876361846924, + "step": 2164 + }, + { + "epoch": 2.666256157635468, + "grad_norm": 8.338604416987764, + "learning_rate": 3.751366714022342e-07, + "loss": 0.25511908531188965, + "step": 2165 + }, + { + "epoch": 2.667487684729064, + "grad_norm": 10.389301741607085, + "learning_rate": 3.724180853785514e-07, + "loss": 0.9938629269599915, + "step": 2166 + }, + { + "epoch": 2.66871921182266, + "grad_norm": 12.267953130443164, + "learning_rate": 3.6970900480497287e-07, + "loss": 0.4233144223690033, + "step": 2167 + }, + { + "epoch": 2.6699507389162562, + "grad_norm": 11.571711586702998, + "learning_rate": 3.6700943524618284e-07, + "loss": 0.39373546838760376, + "step": 2168 + }, + { + "epoch": 2.6711822660098523, + "grad_norm": 9.063048538209927, + "learning_rate": 3.643193822473301e-07, + "loss": 0.40346717834472656, + "step": 2169 + }, + { + "epoch": 2.6724137931034484, + "grad_norm": 14.384271085159352, + "learning_rate": 3.616388513340124e-07, + "loss": 0.35343194007873535, + "step": 2170 + }, + { + "epoch": 2.6736453201970445, + "grad_norm": 16.277411971018296, + "learning_rate": 3.5896784801227046e-07, + "loss": 0.38300061225891113, + "step": 2171 + }, + { + "epoch": 2.6748768472906406, + "grad_norm": 7.950757575573031, + "learning_rate": 3.56306377768576e-07, + "loss": 0.5319961905479431, + "step": 2172 + }, + { + "epoch": 2.6761083743842367, + "grad_norm": 19.004855778838706, + "learning_rate": 3.5365444606981434e-07, + "loss": 0.45474281907081604, + "step": 2173 + }, + { + "epoch": 2.6773399014778327, + "grad_norm": 13.211081908527799, + "learning_rate": 3.5101205836328144e-07, + "loss": 0.41422080993652344, + "step": 2174 + }, + { + "epoch": 2.678571428571429, + "grad_norm": 12.892521639907137, + "learning_rate": 3.4837922007667e-07, + "loss": 0.5486617088317871, + "step": 2175 + }, + { + "epoch": 2.6798029556650245, + "grad_norm": 10.113357639811962, + "learning_rate": 3.4575593661805296e-07, + "loss": 0.27931463718414307, + "step": 2176 + }, + { + "epoch": 2.6810344827586206, + "grad_norm": 9.357499790574233, + "learning_rate": 3.4314221337588217e-07, + "loss": 0.45936134457588196, + "step": 2177 + }, + { + "epoch": 2.6822660098522166, + "grad_norm": 12.597881278175105, + "learning_rate": 3.405380557189669e-07, + "loss": 0.5659298896789551, + "step": 2178 + }, + { + "epoch": 2.6834975369458127, + "grad_norm": 16.9103130329337, + "learning_rate": 3.379434689964728e-07, + "loss": 0.3952332139015198, + "step": 2179 + }, + { + "epoch": 2.684729064039409, + "grad_norm": 13.280154300410791, + "learning_rate": 3.3535845853790105e-07, + "loss": 0.36344432830810547, + "step": 2180 + }, + { + "epoch": 2.685960591133005, + "grad_norm": 8.267427758719474, + "learning_rate": 3.3278302965308593e-07, + "loss": 0.29526573419570923, + "step": 2181 + }, + { + "epoch": 2.687192118226601, + "grad_norm": 14.172270303989801, + "learning_rate": 3.3021718763218025e-07, + "loss": 0.35098952054977417, + "step": 2182 + }, + { + "epoch": 2.688423645320197, + "grad_norm": 15.442089142249914, + "learning_rate": 3.276609377456419e-07, + "loss": 0.9407736659049988, + "step": 2183 + }, + { + "epoch": 2.689655172413793, + "grad_norm": 10.545470371926038, + "learning_rate": 3.2511428524422793e-07, + "loss": 0.29226356744766235, + "step": 2184 + }, + { + "epoch": 2.6908866995073892, + "grad_norm": 11.590832336497728, + "learning_rate": 3.2257723535898177e-07, + "loss": 0.78415846824646, + "step": 2185 + }, + { + "epoch": 2.6921182266009853, + "grad_norm": 10.523504017171055, + "learning_rate": 3.200497933012198e-07, + "loss": 0.22600015997886658, + "step": 2186 + }, + { + "epoch": 2.6933497536945814, + "grad_norm": 16.18317423891681, + "learning_rate": 3.1753196426252573e-07, + "loss": 0.3907809853553772, + "step": 2187 + }, + { + "epoch": 2.6945812807881775, + "grad_norm": 12.272867485671698, + "learning_rate": 3.150237534147366e-07, + "loss": 0.7056915760040283, + "step": 2188 + }, + { + "epoch": 2.695812807881773, + "grad_norm": 11.590493499262351, + "learning_rate": 3.125251659099332e-07, + "loss": 0.35921359062194824, + "step": 2189 + }, + { + "epoch": 2.697044334975369, + "grad_norm": 7.139507013908415, + "learning_rate": 3.1003620688042636e-07, + "loss": 0.17715278267860413, + "step": 2190 + }, + { + "epoch": 2.6982758620689653, + "grad_norm": 6.945336769527092, + "learning_rate": 3.0755688143875253e-07, + "loss": 0.20512376725673676, + "step": 2191 + }, + { + "epoch": 2.6995073891625614, + "grad_norm": 11.666932414854655, + "learning_rate": 3.050871946776596e-07, + "loss": 0.38939356803894043, + "step": 2192 + }, + { + "epoch": 2.7007389162561575, + "grad_norm": 8.970559885182587, + "learning_rate": 3.026271516700946e-07, + "loss": 0.3292514681816101, + "step": 2193 + }, + { + "epoch": 2.7019704433497536, + "grad_norm": 8.920484564263525, + "learning_rate": 3.0017675746919883e-07, + "loss": 0.2732661962509155, + "step": 2194 + }, + { + "epoch": 2.7032019704433496, + "grad_norm": 14.273169657648177, + "learning_rate": 2.9773601710828937e-07, + "loss": 0.3058941960334778, + "step": 2195 + }, + { + "epoch": 2.7044334975369457, + "grad_norm": 16.20827847981958, + "learning_rate": 2.953049356008586e-07, + "loss": 0.7454397082328796, + "step": 2196 + }, + { + "epoch": 2.705665024630542, + "grad_norm": 17.54054653840535, + "learning_rate": 2.928835179405548e-07, + "loss": 0.3679504692554474, + "step": 2197 + }, + { + "epoch": 2.706896551724138, + "grad_norm": 9.77472352239386, + "learning_rate": 2.9047176910117824e-07, + "loss": 0.2241794466972351, + "step": 2198 + }, + { + "epoch": 2.708128078817734, + "grad_norm": 8.561542797938362, + "learning_rate": 2.8806969403666897e-07, + "loss": 0.19927407801151276, + "step": 2199 + }, + { + "epoch": 2.70935960591133, + "grad_norm": 7.0959519302312195, + "learning_rate": 2.856772976810929e-07, + "loss": 0.2808955907821655, + "step": 2200 + }, + { + "epoch": 2.710591133004926, + "grad_norm": 21.456216648925764, + "learning_rate": 2.8329458494863846e-07, + "loss": 0.7279784083366394, + "step": 2201 + }, + { + "epoch": 2.7118226600985222, + "grad_norm": 8.853404617031957, + "learning_rate": 2.809215607336024e-07, + "loss": 0.47690945863723755, + "step": 2202 + }, + { + "epoch": 2.7130541871921183, + "grad_norm": 9.19562501308832, + "learning_rate": 2.7855822991037895e-07, + "loss": 0.1997358649969101, + "step": 2203 + }, + { + "epoch": 2.7142857142857144, + "grad_norm": 12.418182947084489, + "learning_rate": 2.762045973334526e-07, + "loss": 0.3269602954387665, + "step": 2204 + }, + { + "epoch": 2.7155172413793105, + "grad_norm": 9.253477256115538, + "learning_rate": 2.738606678373873e-07, + "loss": 0.5450934767723083, + "step": 2205 + }, + { + "epoch": 2.7167487684729066, + "grad_norm": 12.029880579085864, + "learning_rate": 2.7152644623681503e-07, + "loss": 0.4732050895690918, + "step": 2206 + }, + { + "epoch": 2.7179802955665027, + "grad_norm": 13.561046323857816, + "learning_rate": 2.6920193732642594e-07, + "loss": 0.26588505506515503, + "step": 2207 + }, + { + "epoch": 2.7192118226600988, + "grad_norm": 4.326966860689474, + "learning_rate": 2.668871458809613e-07, + "loss": 0.09280772507190704, + "step": 2208 + }, + { + "epoch": 2.720443349753695, + "grad_norm": 12.851246166510439, + "learning_rate": 2.6458207665520266e-07, + "loss": 0.3763241767883301, + "step": 2209 + }, + { + "epoch": 2.7216748768472905, + "grad_norm": 11.562947215162826, + "learning_rate": 2.6228673438395804e-07, + "loss": 0.46730220317840576, + "step": 2210 + }, + { + "epoch": 2.7229064039408866, + "grad_norm": 11.5850144160988, + "learning_rate": 2.600011237820577e-07, + "loss": 0.42677825689315796, + "step": 2211 + }, + { + "epoch": 2.7241379310344827, + "grad_norm": 15.077683389725815, + "learning_rate": 2.577252495443422e-07, + "loss": 0.4460552930831909, + "step": 2212 + }, + { + "epoch": 2.7253694581280787, + "grad_norm": 8.23073307445448, + "learning_rate": 2.5545911634565266e-07, + "loss": 0.5031150579452515, + "step": 2213 + }, + { + "epoch": 2.726600985221675, + "grad_norm": 11.590947176695321, + "learning_rate": 2.5320272884081955e-07, + "loss": 0.18559831380844116, + "step": 2214 + }, + { + "epoch": 2.727832512315271, + "grad_norm": 10.364105747898172, + "learning_rate": 2.5095609166465805e-07, + "loss": 0.2087395340204239, + "step": 2215 + }, + { + "epoch": 2.729064039408867, + "grad_norm": 7.72131921454244, + "learning_rate": 2.4871920943195404e-07, + "loss": 0.21503375470638275, + "step": 2216 + }, + { + "epoch": 2.730295566502463, + "grad_norm": 13.07348837914591, + "learning_rate": 2.4649208673745317e-07, + "loss": 0.20347240567207336, + "step": 2217 + }, + { + "epoch": 2.731527093596059, + "grad_norm": 7.396681990877147, + "learning_rate": 2.442747281558572e-07, + "loss": 0.20019523799419403, + "step": 2218 + }, + { + "epoch": 2.7327586206896552, + "grad_norm": 7.384056914568049, + "learning_rate": 2.420671382418122e-07, + "loss": 0.6672437191009521, + "step": 2219 + }, + { + "epoch": 2.7339901477832513, + "grad_norm": 9.4227706186618, + "learning_rate": 2.398693215298953e-07, + "loss": 0.28304070234298706, + "step": 2220 + }, + { + "epoch": 2.7352216748768474, + "grad_norm": 13.10398470275865, + "learning_rate": 2.3768128253461253e-07, + "loss": 0.7915571331977844, + "step": 2221 + }, + { + "epoch": 2.7364532019704435, + "grad_norm": 14.271199864374358, + "learning_rate": 2.3550302575038154e-07, + "loss": 0.2920302152633667, + "step": 2222 + }, + { + "epoch": 2.737684729064039, + "grad_norm": 9.98798818011476, + "learning_rate": 2.333345556515304e-07, + "loss": 0.7924119830131531, + "step": 2223 + }, + { + "epoch": 2.7389162561576352, + "grad_norm": 16.52502448354582, + "learning_rate": 2.311758766922806e-07, + "loss": 2.4264345169067383, + "step": 2224 + }, + { + "epoch": 2.7401477832512313, + "grad_norm": 11.115670896935416, + "learning_rate": 2.290269933067457e-07, + "loss": 0.6286523342132568, + "step": 2225 + }, + { + "epoch": 2.7413793103448274, + "grad_norm": 10.041583417397344, + "learning_rate": 2.2688790990891606e-07, + "loss": 0.4733774662017822, + "step": 2226 + }, + { + "epoch": 2.7426108374384235, + "grad_norm": 9.613596914422414, + "learning_rate": 2.2475863089265193e-07, + "loss": 0.41262203454971313, + "step": 2227 + }, + { + "epoch": 2.7438423645320196, + "grad_norm": 12.211203634057204, + "learning_rate": 2.2263916063167523e-07, + "loss": 0.9069987535476685, + "step": 2228 + }, + { + "epoch": 2.7450738916256157, + "grad_norm": 8.477983222031407, + "learning_rate": 2.205295034795596e-07, + "loss": 0.33371949195861816, + "step": 2229 + }, + { + "epoch": 2.7463054187192117, + "grad_norm": 10.672673009053705, + "learning_rate": 2.1842966376972142e-07, + "loss": 0.2515576183795929, + "step": 2230 + }, + { + "epoch": 2.747536945812808, + "grad_norm": 15.919489094243, + "learning_rate": 2.1633964581541212e-07, + "loss": 0.5854448080062866, + "step": 2231 + }, + { + "epoch": 2.748768472906404, + "grad_norm": 8.34813593109363, + "learning_rate": 2.1425945390970816e-07, + "loss": 0.36172378063201904, + "step": 2232 + }, + { + "epoch": 2.75, + "grad_norm": 13.095561050747872, + "learning_rate": 2.1218909232550156e-07, + "loss": 0.8217978477478027, + "step": 2233 + }, + { + "epoch": 2.751231527093596, + "grad_norm": 10.987521536719951, + "learning_rate": 2.1012856531549163e-07, + "loss": 0.5560616850852966, + "step": 2234 + }, + { + "epoch": 2.752463054187192, + "grad_norm": 15.220877022032928, + "learning_rate": 2.0807787711217887e-07, + "loss": 0.3503821790218353, + "step": 2235 + }, + { + "epoch": 2.7536945812807883, + "grad_norm": 17.985871130679012, + "learning_rate": 2.0603703192785264e-07, + "loss": 0.6000460982322693, + "step": 2236 + }, + { + "epoch": 2.7549261083743843, + "grad_norm": 10.345272170286153, + "learning_rate": 2.0400603395458408e-07, + "loss": 0.20410886406898499, + "step": 2237 + }, + { + "epoch": 2.7561576354679804, + "grad_norm": 10.777826560400182, + "learning_rate": 2.0198488736421607e-07, + "loss": 0.2497151494026184, + "step": 2238 + }, + { + "epoch": 2.7573891625615765, + "grad_norm": 9.330808767879285, + "learning_rate": 1.999735963083571e-07, + "loss": 0.2881111800670624, + "step": 2239 + }, + { + "epoch": 2.7586206896551726, + "grad_norm": 19.301319480093145, + "learning_rate": 1.9797216491837356e-07, + "loss": 0.38934653997421265, + "step": 2240 + }, + { + "epoch": 2.7598522167487687, + "grad_norm": 13.511728912052765, + "learning_rate": 1.9598059730537465e-07, + "loss": 0.3553803563117981, + "step": 2241 + }, + { + "epoch": 2.7610837438423648, + "grad_norm": 13.74634988747894, + "learning_rate": 1.9399889756021196e-07, + "loss": 0.3653762936592102, + "step": 2242 + }, + { + "epoch": 2.762315270935961, + "grad_norm": 9.247962499458838, + "learning_rate": 1.9202706975346875e-07, + "loss": 0.2600834369659424, + "step": 2243 + }, + { + "epoch": 2.7635467980295565, + "grad_norm": 11.458094202817868, + "learning_rate": 1.9006511793544458e-07, + "loss": 0.4601256847381592, + "step": 2244 + }, + { + "epoch": 2.7647783251231526, + "grad_norm": 17.193961086363156, + "learning_rate": 1.881130461361591e-07, + "loss": 0.33677470684051514, + "step": 2245 + }, + { + "epoch": 2.7660098522167487, + "grad_norm": 8.524927066266194, + "learning_rate": 1.8617085836533544e-07, + "loss": 0.8099600672721863, + "step": 2246 + }, + { + "epoch": 2.7672413793103448, + "grad_norm": 15.804119634424612, + "learning_rate": 1.8423855861239238e-07, + "loss": 0.6992620229721069, + "step": 2247 + }, + { + "epoch": 2.768472906403941, + "grad_norm": 9.647846553411064, + "learning_rate": 1.8231615084644105e-07, + "loss": 0.3640286326408386, + "step": 2248 + }, + { + "epoch": 2.769704433497537, + "grad_norm": 8.955751617734634, + "learning_rate": 1.8040363901627001e-07, + "loss": 0.2996286451816559, + "step": 2249 + }, + { + "epoch": 2.770935960591133, + "grad_norm": 11.938038283583609, + "learning_rate": 1.7850102705034455e-07, + "loss": 0.43687328696250916, + "step": 2250 + }, + { + "epoch": 2.772167487684729, + "grad_norm": 17.093390601969645, + "learning_rate": 1.7660831885679074e-07, + "loss": 0.7942696809768677, + "step": 2251 + }, + { + "epoch": 2.773399014778325, + "grad_norm": 13.100096515382093, + "learning_rate": 1.747255183233948e-07, + "loss": 1.1030818223953247, + "step": 2252 + }, + { + "epoch": 2.7746305418719213, + "grad_norm": 8.873613224852555, + "learning_rate": 1.7285262931759084e-07, + "loss": 0.5030316114425659, + "step": 2253 + }, + { + "epoch": 2.7758620689655173, + "grad_norm": 12.14741952725113, + "learning_rate": 1.7098965568645264e-07, + "loss": 0.6707223653793335, + "step": 2254 + }, + { + "epoch": 2.7770935960591134, + "grad_norm": 11.75778232712136, + "learning_rate": 1.6913660125668806e-07, + "loss": 0.2983396351337433, + "step": 2255 + }, + { + "epoch": 2.7783251231527095, + "grad_norm": 14.41974913977501, + "learning_rate": 1.6729346983462957e-07, + "loss": 0.6233869791030884, + "step": 2256 + }, + { + "epoch": 2.779556650246305, + "grad_norm": 13.000501735636352, + "learning_rate": 1.654602652062276e-07, + "loss": 0.2838573455810547, + "step": 2257 + }, + { + "epoch": 2.7807881773399012, + "grad_norm": 8.269339223606165, + "learning_rate": 1.636369911370417e-07, + "loss": 0.516904354095459, + "step": 2258 + }, + { + "epoch": 2.7820197044334973, + "grad_norm": 12.228570926666848, + "learning_rate": 1.6182365137223266e-07, + "loss": 0.2637355625629425, + "step": 2259 + }, + { + "epoch": 2.7832512315270934, + "grad_norm": 12.77963989317756, + "learning_rate": 1.600202496365566e-07, + "loss": 0.2973381280899048, + "step": 2260 + }, + { + "epoch": 2.7844827586206895, + "grad_norm": 12.028070410415097, + "learning_rate": 1.5822678963435479e-07, + "loss": 0.731842041015625, + "step": 2261 + }, + { + "epoch": 2.7857142857142856, + "grad_norm": 16.480537506483405, + "learning_rate": 1.564432750495476e-07, + "loss": 0.9091979265213013, + "step": 2262 + }, + { + "epoch": 2.7869458128078817, + "grad_norm": 14.778758482272446, + "learning_rate": 1.5466970954562786e-07, + "loss": 0.9223085641860962, + "step": 2263 + }, + { + "epoch": 2.7881773399014778, + "grad_norm": 12.767601072668027, + "learning_rate": 1.5290609676564982e-07, + "loss": 0.35786327719688416, + "step": 2264 + }, + { + "epoch": 2.789408866995074, + "grad_norm": 10.468097971683415, + "learning_rate": 1.5115244033222732e-07, + "loss": 0.7312544584274292, + "step": 2265 + }, + { + "epoch": 2.79064039408867, + "grad_norm": 9.834986856814911, + "learning_rate": 1.4940874384751947e-07, + "loss": 0.8420913219451904, + "step": 2266 + }, + { + "epoch": 2.791871921182266, + "grad_norm": 16.21429528610728, + "learning_rate": 1.47675010893229e-07, + "loss": 0.3239392042160034, + "step": 2267 + }, + { + "epoch": 2.793103448275862, + "grad_norm": 8.629439268560123, + "learning_rate": 1.4595124503059165e-07, + "loss": 0.3498873710632324, + "step": 2268 + }, + { + "epoch": 2.794334975369458, + "grad_norm": 6.690308017489741, + "learning_rate": 1.4423744980037068e-07, + "loss": 0.22733798623085022, + "step": 2269 + }, + { + "epoch": 2.7955665024630543, + "grad_norm": 8.212515181619986, + "learning_rate": 1.425336287228496e-07, + "loss": 0.2721923291683197, + "step": 2270 + }, + { + "epoch": 2.7967980295566504, + "grad_norm": 9.080877903298425, + "learning_rate": 1.408397852978205e-07, + "loss": 0.344375342130661, + "step": 2271 + }, + { + "epoch": 2.7980295566502464, + "grad_norm": 9.45480785329488, + "learning_rate": 1.391559230045847e-07, + "loss": 0.4529953896999359, + "step": 2272 + }, + { + "epoch": 2.7992610837438425, + "grad_norm": 9.214190080042984, + "learning_rate": 1.3748204530193987e-07, + "loss": 0.1639999896287918, + "step": 2273 + }, + { + "epoch": 2.8004926108374386, + "grad_norm": 13.6280899298915, + "learning_rate": 1.3581815562817402e-07, + "loss": 0.23326484858989716, + "step": 2274 + }, + { + "epoch": 2.8017241379310347, + "grad_norm": 8.920482755226637, + "learning_rate": 1.341642574010582e-07, + "loss": 0.22694149613380432, + "step": 2275 + }, + { + "epoch": 2.802955665024631, + "grad_norm": 8.710884196173295, + "learning_rate": 1.3252035401784324e-07, + "loss": 0.3588021993637085, + "step": 2276 + }, + { + "epoch": 2.804187192118227, + "grad_norm": 11.632314435280234, + "learning_rate": 1.3088644885524637e-07, + "loss": 0.4335256516933441, + "step": 2277 + }, + { + "epoch": 2.8054187192118225, + "grad_norm": 6.272067777885255, + "learning_rate": 1.2926254526944904e-07, + "loss": 0.1874769926071167, + "step": 2278 + }, + { + "epoch": 2.8066502463054186, + "grad_norm": 8.936224496797552, + "learning_rate": 1.27648646596088e-07, + "loss": 0.3144474923610687, + "step": 2279 + }, + { + "epoch": 2.8078817733990147, + "grad_norm": 19.58883398368707, + "learning_rate": 1.2604475615025092e-07, + "loss": 0.7241795063018799, + "step": 2280 + }, + { + "epoch": 2.8091133004926108, + "grad_norm": 16.726363332544537, + "learning_rate": 1.2445087722646576e-07, + "loss": 0.5169468522071838, + "step": 2281 + }, + { + "epoch": 2.810344827586207, + "grad_norm": 30.94634458747577, + "learning_rate": 1.228670130986953e-07, + "loss": 1.6869860887527466, + "step": 2282 + }, + { + "epoch": 2.811576354679803, + "grad_norm": 10.707666993688912, + "learning_rate": 1.212931670203338e-07, + "loss": 0.47550255060195923, + "step": 2283 + }, + { + "epoch": 2.812807881773399, + "grad_norm": 9.540335234729794, + "learning_rate": 1.197293422241952e-07, + "loss": 0.2437782883644104, + "step": 2284 + }, + { + "epoch": 2.814039408866995, + "grad_norm": 6.665490888518648, + "learning_rate": 1.1817554192251002e-07, + "loss": 0.37867432832717896, + "step": 2285 + }, + { + "epoch": 2.815270935960591, + "grad_norm": 9.667222509113516, + "learning_rate": 1.1663176930691744e-07, + "loss": 0.8604614734649658, + "step": 2286 + }, + { + "epoch": 2.8165024630541873, + "grad_norm": 12.759555548828967, + "learning_rate": 1.1509802754845978e-07, + "loss": 1.1947153806686401, + "step": 2287 + }, + { + "epoch": 2.8177339901477834, + "grad_norm": 9.33176290924216, + "learning_rate": 1.1357431979757194e-07, + "loss": 0.30131372809410095, + "step": 2288 + }, + { + "epoch": 2.8189655172413794, + "grad_norm": 10.72676065785706, + "learning_rate": 1.1206064918408143e-07, + "loss": 0.47112587094306946, + "step": 2289 + }, + { + "epoch": 2.8201970443349755, + "grad_norm": 11.488110070600202, + "learning_rate": 1.1055701881719838e-07, + "loss": 0.2062550187110901, + "step": 2290 + }, + { + "epoch": 2.821428571428571, + "grad_norm": 8.859910558029405, + "learning_rate": 1.0906343178550715e-07, + "loss": 0.30918222665786743, + "step": 2291 + }, + { + "epoch": 2.8226600985221673, + "grad_norm": 7.645494812767514, + "learning_rate": 1.0757989115696421e-07, + "loss": 0.46675896644592285, + "step": 2292 + }, + { + "epoch": 2.8238916256157633, + "grad_norm": 7.696373009746994, + "learning_rate": 1.0610639997888917e-07, + "loss": 0.2514066696166992, + "step": 2293 + }, + { + "epoch": 2.8251231527093594, + "grad_norm": 20.301202253116305, + "learning_rate": 1.0464296127795926e-07, + "loss": 0.37799739837646484, + "step": 2294 + }, + { + "epoch": 2.8263546798029555, + "grad_norm": 10.51342866650685, + "learning_rate": 1.0318957806020269e-07, + "loss": 1.170919418334961, + "step": 2295 + }, + { + "epoch": 2.8275862068965516, + "grad_norm": 10.322546313834785, + "learning_rate": 1.0174625331099363e-07, + "loss": 0.34683138132095337, + "step": 2296 + }, + { + "epoch": 2.8288177339901477, + "grad_norm": 13.218925485338286, + "learning_rate": 1.0031298999504557e-07, + "loss": 0.24154211580753326, + "step": 2297 + }, + { + "epoch": 2.8300492610837438, + "grad_norm": 11.94151576403668, + "learning_rate": 9.888979105640295e-08, + "loss": 0.3270137906074524, + "step": 2298 + }, + { + "epoch": 2.83128078817734, + "grad_norm": 10.157922840931477, + "learning_rate": 9.747665941843953e-08, + "loss": 0.33205774426460266, + "step": 2299 + }, + { + "epoch": 2.832512315270936, + "grad_norm": 15.674554832536234, + "learning_rate": 9.607359798384785e-08, + "loss": 1.5672454833984375, + "step": 2300 + }, + { + "epoch": 2.833743842364532, + "grad_norm": 7.89425528282641, + "learning_rate": 9.468060963463754e-08, + "loss": 0.1868615597486496, + "step": 2301 + }, + { + "epoch": 2.834975369458128, + "grad_norm": 16.06809449939127, + "learning_rate": 9.329769723212478e-08, + "loss": 0.3485974371433258, + "step": 2302 + }, + { + "epoch": 2.836206896551724, + "grad_norm": 22.06944110945676, + "learning_rate": 9.192486361693175e-08, + "loss": 0.5702242851257324, + "step": 2303 + }, + { + "epoch": 2.8374384236453203, + "grad_norm": 13.611203107193855, + "learning_rate": 9.056211160897555e-08, + "loss": 0.7004730105400085, + "step": 2304 + }, + { + "epoch": 2.8386699507389164, + "grad_norm": 10.23772277567979, + "learning_rate": 8.920944400746589e-08, + "loss": 0.29311710596084595, + "step": 2305 + }, + { + "epoch": 2.8399014778325125, + "grad_norm": 7.167372063418741, + "learning_rate": 8.786686359089747e-08, + "loss": 0.18041157722473145, + "step": 2306 + }, + { + "epoch": 2.8411330049261085, + "grad_norm": 8.672887051600437, + "learning_rate": 8.653437311704648e-08, + "loss": 0.2873387634754181, + "step": 2307 + }, + { + "epoch": 2.8423645320197046, + "grad_norm": 9.699021546064241, + "learning_rate": 8.521197532296188e-08, + "loss": 0.23781178891658783, + "step": 2308 + }, + { + "epoch": 2.8435960591133007, + "grad_norm": 11.643059711853965, + "learning_rate": 8.38996729249636e-08, + "loss": 0.5913131833076477, + "step": 2309 + }, + { + "epoch": 2.844827586206897, + "grad_norm": 12.799008291574818, + "learning_rate": 8.259746861863094e-08, + "loss": 0.9139914512634277, + "step": 2310 + }, + { + "epoch": 2.846059113300493, + "grad_norm": 10.980579183559623, + "learning_rate": 8.130536507880538e-08, + "loss": 0.22883841395378113, + "step": 2311 + }, + { + "epoch": 2.8472906403940885, + "grad_norm": 9.488904590414009, + "learning_rate": 8.002336495957664e-08, + "loss": 0.6467199325561523, + "step": 2312 + }, + { + "epoch": 2.8485221674876846, + "grad_norm": 17.044793614561804, + "learning_rate": 7.875147089428436e-08, + "loss": 0.48100385069847107, + "step": 2313 + }, + { + "epoch": 2.8497536945812807, + "grad_norm": 6.232324566569768, + "learning_rate": 7.748968549550761e-08, + "loss": 0.22535499930381775, + "step": 2314 + }, + { + "epoch": 2.850985221674877, + "grad_norm": 16.357795976490426, + "learning_rate": 7.623801135506148e-08, + "loss": 0.7971012592315674, + "step": 2315 + }, + { + "epoch": 2.852216748768473, + "grad_norm": 10.56546293503534, + "learning_rate": 7.499645104399156e-08, + "loss": 0.6965846419334412, + "step": 2316 + }, + { + "epoch": 2.853448275862069, + "grad_norm": 10.699552582949096, + "learning_rate": 7.376500711257062e-08, + "loss": 0.2827698588371277, + "step": 2317 + }, + { + "epoch": 2.854679802955665, + "grad_norm": 11.75504997847818, + "learning_rate": 7.254368209028862e-08, + "loss": 0.4453064203262329, + "step": 2318 + }, + { + "epoch": 2.855911330049261, + "grad_norm": 10.373311779049724, + "learning_rate": 7.133247848585268e-08, + "loss": 0.5363994836807251, + "step": 2319 + }, + { + "epoch": 2.857142857142857, + "grad_norm": 10.742091428994968, + "learning_rate": 7.013139878717934e-08, + "loss": 0.33071067929267883, + "step": 2320 + }, + { + "epoch": 2.8583743842364533, + "grad_norm": 10.02135718464731, + "learning_rate": 6.894044546138845e-08, + "loss": 0.6118582487106323, + "step": 2321 + }, + { + "epoch": 2.8596059113300494, + "grad_norm": 11.952226631897975, + "learning_rate": 6.775962095480037e-08, + "loss": 0.4941851496696472, + "step": 2322 + }, + { + "epoch": 2.8608374384236455, + "grad_norm": 12.467253293652027, + "learning_rate": 6.65889276929299e-08, + "loss": 0.9043294191360474, + "step": 2323 + }, + { + "epoch": 2.862068965517241, + "grad_norm": 9.372107033246923, + "learning_rate": 6.542836808048181e-08, + "loss": 0.5352662801742554, + "step": 2324 + }, + { + "epoch": 2.863300492610837, + "grad_norm": 13.465637997675985, + "learning_rate": 6.427794450134529e-08, + "loss": 0.622706413269043, + "step": 2325 + }, + { + "epoch": 2.8645320197044333, + "grad_norm": 10.951531479275452, + "learning_rate": 6.313765931858785e-08, + "loss": 0.32065168023109436, + "step": 2326 + }, + { + "epoch": 2.8657635467980294, + "grad_norm": 11.940905797523131, + "learning_rate": 6.200751487445367e-08, + "loss": 0.5308477878570557, + "step": 2327 + }, + { + "epoch": 2.8669950738916254, + "grad_norm": 12.032315008603385, + "learning_rate": 6.088751349035693e-08, + "loss": 0.4006965756416321, + "step": 2328 + }, + { + "epoch": 2.8682266009852215, + "grad_norm": 14.936202143915887, + "learning_rate": 5.977765746687569e-08, + "loss": 0.29346001148223877, + "step": 2329 + }, + { + "epoch": 2.8694581280788176, + "grad_norm": 12.39243720991369, + "learning_rate": 5.8677949083749686e-08, + "loss": 0.17921757698059082, + "step": 2330 + }, + { + "epoch": 2.8706896551724137, + "grad_norm": 9.58038552158238, + "learning_rate": 5.758839059987531e-08, + "loss": 0.3909390866756439, + "step": 2331 + }, + { + "epoch": 2.87192118226601, + "grad_norm": 15.9782663440221, + "learning_rate": 5.650898425329676e-08, + "loss": 0.2947097420692444, + "step": 2332 + }, + { + "epoch": 2.873152709359606, + "grad_norm": 10.207214673211949, + "learning_rate": 5.5439732261209356e-08, + "loss": 0.27580755949020386, + "step": 2333 + }, + { + "epoch": 2.874384236453202, + "grad_norm": 10.944513423861029, + "learning_rate": 5.438063681994732e-08, + "loss": 0.5352618098258972, + "step": 2334 + }, + { + "epoch": 2.875615763546798, + "grad_norm": 11.026909219005717, + "learning_rate": 5.333170010498434e-08, + "loss": 0.4425346553325653, + "step": 2335 + }, + { + "epoch": 2.876847290640394, + "grad_norm": 10.718057032304046, + "learning_rate": 5.229292427092525e-08, + "loss": 0.3107433319091797, + "step": 2336 + }, + { + "epoch": 2.87807881773399, + "grad_norm": 12.247326551233483, + "learning_rate": 5.126431145150546e-08, + "loss": 0.8459264039993286, + "step": 2337 + }, + { + "epoch": 2.8793103448275863, + "grad_norm": 9.9858024833323, + "learning_rate": 5.024586375958429e-08, + "loss": 0.6122205257415771, + "step": 2338 + }, + { + "epoch": 2.8805418719211824, + "grad_norm": 8.326107009918898, + "learning_rate": 4.9237583287139454e-08, + "loss": 0.28234463930130005, + "step": 2339 + }, + { + "epoch": 2.8817733990147785, + "grad_norm": 9.707118891697133, + "learning_rate": 4.823947210526647e-08, + "loss": 0.26258403062820435, + "step": 2340 + }, + { + "epoch": 2.8830049261083746, + "grad_norm": 11.37690573459154, + "learning_rate": 4.72515322641709e-08, + "loss": 0.16676993668079376, + "step": 2341 + }, + { + "epoch": 2.8842364532019706, + "grad_norm": 10.744107147683183, + "learning_rate": 4.627376579316667e-08, + "loss": 0.5982980132102966, + "step": 2342 + }, + { + "epoch": 2.8854679802955667, + "grad_norm": 11.814730049244856, + "learning_rate": 4.530617470066834e-08, + "loss": 0.3576871156692505, + "step": 2343 + }, + { + "epoch": 2.886699507389163, + "grad_norm": 7.558098865292991, + "learning_rate": 4.4348760974192715e-08, + "loss": 0.22213858366012573, + "step": 2344 + }, + { + "epoch": 2.887931034482759, + "grad_norm": 31.227769055767126, + "learning_rate": 4.340152658034835e-08, + "loss": 0.7075624465942383, + "step": 2345 + }, + { + "epoch": 2.8891625615763545, + "grad_norm": 13.602269942674353, + "learning_rate": 4.246447346483662e-08, + "loss": 0.35476282238960266, + "step": 2346 + }, + { + "epoch": 2.8903940886699506, + "grad_norm": 11.66167288478714, + "learning_rate": 4.153760355244507e-08, + "loss": 0.4569534659385681, + "step": 2347 + }, + { + "epoch": 2.8916256157635467, + "grad_norm": 12.232619433370953, + "learning_rate": 4.062091874704355e-08, + "loss": 0.8425757884979248, + "step": 2348 + }, + { + "epoch": 2.892857142857143, + "grad_norm": 15.584381566055246, + "learning_rate": 3.971442093158195e-08, + "loss": 0.6543349623680115, + "step": 2349 + }, + { + "epoch": 2.894088669950739, + "grad_norm": 12.232909525407603, + "learning_rate": 3.8818111968083607e-08, + "loss": 0.4949587285518646, + "step": 2350 + }, + { + "epoch": 2.895320197044335, + "grad_norm": 28.009977519758436, + "learning_rate": 3.7931993697644664e-08, + "loss": 1.0205111503601074, + "step": 2351 + }, + { + "epoch": 2.896551724137931, + "grad_norm": 8.083430035021566, + "learning_rate": 3.7056067940427484e-08, + "loss": 0.429599404335022, + "step": 2352 + }, + { + "epoch": 2.897783251231527, + "grad_norm": 11.304307823971973, + "learning_rate": 3.6190336495659504e-08, + "loss": 0.6471319198608398, + "step": 2353 + }, + { + "epoch": 2.899014778325123, + "grad_norm": 11.052274245265034, + "learning_rate": 3.533480114162713e-08, + "loss": 0.6227458715438843, + "step": 2354 + }, + { + "epoch": 2.9002463054187193, + "grad_norm": 10.145305358695179, + "learning_rate": 3.448946363567296e-08, + "loss": 0.35620149970054626, + "step": 2355 + }, + { + "epoch": 2.9014778325123154, + "grad_norm": 9.735362530555188, + "learning_rate": 3.365432571419247e-08, + "loss": 0.41157659888267517, + "step": 2356 + }, + { + "epoch": 2.9027093596059115, + "grad_norm": 16.113614254695477, + "learning_rate": 3.282938909263122e-08, + "loss": 0.39660418033599854, + "step": 2357 + }, + { + "epoch": 2.903940886699507, + "grad_norm": 12.303598539070832, + "learning_rate": 3.201465546547988e-08, + "loss": 0.37891146540641785, + "step": 2358 + }, + { + "epoch": 2.905172413793103, + "grad_norm": 11.49013243084427, + "learning_rate": 3.121012650627031e-08, + "loss": 0.4459425210952759, + "step": 2359 + }, + { + "epoch": 2.9064039408866993, + "grad_norm": 12.062068468114942, + "learning_rate": 3.041580386757448e-08, + "loss": 0.4933587610721588, + "step": 2360 + }, + { + "epoch": 2.9076354679802954, + "grad_norm": 7.691939807180967, + "learning_rate": 2.9631689180999457e-08, + "loss": 0.16229723393917084, + "step": 2361 + }, + { + "epoch": 2.9088669950738915, + "grad_norm": 11.649633348013484, + "learning_rate": 2.885778405718409e-08, + "loss": 0.4784936308860779, + "step": 2362 + }, + { + "epoch": 2.9100985221674875, + "grad_norm": 20.64984541908695, + "learning_rate": 2.8094090085795112e-08, + "loss": 0.6622560620307922, + "step": 2363 + }, + { + "epoch": 2.9113300492610836, + "grad_norm": 9.783513206502265, + "learning_rate": 2.7340608835526584e-08, + "loss": 0.3672278821468353, + "step": 2364 + }, + { + "epoch": 2.9125615763546797, + "grad_norm": 6.04349473256102, + "learning_rate": 2.6597341854092685e-08, + "loss": 0.3247770667076111, + "step": 2365 + }, + { + "epoch": 2.913793103448276, + "grad_norm": 11.650085297412613, + "learning_rate": 2.586429066822771e-08, + "loss": 0.3467229902744293, + "step": 2366 + }, + { + "epoch": 2.915024630541872, + "grad_norm": 11.842612737683362, + "learning_rate": 2.514145678368163e-08, + "loss": 0.6725019812583923, + "step": 2367 + }, + { + "epoch": 2.916256157635468, + "grad_norm": 8.454338307427385, + "learning_rate": 2.4428841685217863e-08, + "loss": 0.6760755777359009, + "step": 2368 + }, + { + "epoch": 2.917487684729064, + "grad_norm": 13.555178809367312, + "learning_rate": 2.3726446836608298e-08, + "loss": 0.5354422330856323, + "step": 2369 + }, + { + "epoch": 2.91871921182266, + "grad_norm": 11.004737348047312, + "learning_rate": 2.3034273680632157e-08, + "loss": 0.3656280040740967, + "step": 2370 + }, + { + "epoch": 2.9199507389162562, + "grad_norm": 9.99595612427158, + "learning_rate": 2.235232363907269e-08, + "loss": 0.28186920285224915, + "step": 2371 + }, + { + "epoch": 2.9211822660098523, + "grad_norm": 16.789031513751276, + "learning_rate": 2.168059811271439e-08, + "loss": 0.31556010246276855, + "step": 2372 + }, + { + "epoch": 2.9224137931034484, + "grad_norm": 7.870447962098653, + "learning_rate": 2.101909848133743e-08, + "loss": 0.33978280425071716, + "step": 2373 + }, + { + "epoch": 2.9236453201970445, + "grad_norm": 13.322556254888749, + "learning_rate": 2.0367826103720457e-08, + "loss": 0.5645813941955566, + "step": 2374 + }, + { + "epoch": 2.9248768472906406, + "grad_norm": 6.936377752521131, + "learning_rate": 1.9726782317632255e-08, + "loss": 0.21976767480373383, + "step": 2375 + }, + { + "epoch": 2.9261083743842367, + "grad_norm": 16.201679118604396, + "learning_rate": 1.9095968439830637e-08, + "loss": 0.6068276166915894, + "step": 2376 + }, + { + "epoch": 2.9273399014778327, + "grad_norm": 10.683769815067068, + "learning_rate": 1.8475385766063002e-08, + "loss": 0.2844882607460022, + "step": 2377 + }, + { + "epoch": 2.928571428571429, + "grad_norm": 22.182288301690132, + "learning_rate": 1.786503557105912e-08, + "loss": 1.1885827779769897, + "step": 2378 + }, + { + "epoch": 2.9298029556650245, + "grad_norm": 8.221573464179809, + "learning_rate": 1.7264919108529455e-08, + "loss": 0.4241114854812622, + "step": 2379 + }, + { + "epoch": 2.9310344827586206, + "grad_norm": 10.23479597630979, + "learning_rate": 1.6675037611165735e-08, + "loss": 0.9062713980674744, + "step": 2380 + }, + { + "epoch": 2.9322660098522166, + "grad_norm": 9.83143734077978, + "learning_rate": 1.6095392290635393e-08, + "loss": 0.29996055364608765, + "step": 2381 + }, + { + "epoch": 2.9334975369458127, + "grad_norm": 9.191744534619497, + "learning_rate": 1.552598433757879e-08, + "loss": 0.3901692032814026, + "step": 2382 + }, + { + "epoch": 2.934729064039409, + "grad_norm": 10.314975796862411, + "learning_rate": 1.4966814921608674e-08, + "loss": 0.36974531412124634, + "step": 2383 + }, + { + "epoch": 2.935960591133005, + "grad_norm": 10.965587726479475, + "learning_rate": 1.441788519130738e-08, + "loss": 0.2913818359375, + "step": 2384 + }, + { + "epoch": 2.937192118226601, + "grad_norm": 26.225721932440074, + "learning_rate": 1.3879196274224626e-08, + "loss": 2.8897290229797363, + "step": 2385 + }, + { + "epoch": 2.938423645320197, + "grad_norm": 16.567199226805975, + "learning_rate": 1.335074927687141e-08, + "loss": 0.7396224141120911, + "step": 2386 + }, + { + "epoch": 2.939655172413793, + "grad_norm": 10.384159480919202, + "learning_rate": 1.2832545284724995e-08, + "loss": 0.2923913896083832, + "step": 2387 + }, + { + "epoch": 2.9408866995073892, + "grad_norm": 12.315507900916186, + "learning_rate": 1.2324585362220032e-08, + "loss": 0.60726398229599, + "step": 2388 + }, + { + "epoch": 2.9421182266009853, + "grad_norm": 10.077538225946919, + "learning_rate": 1.1826870552749669e-08, + "loss": 0.3081626892089844, + "step": 2389 + }, + { + "epoch": 2.9433497536945814, + "grad_norm": 15.192636407836343, + "learning_rate": 1.1339401878663337e-08, + "loss": 0.7774905562400818, + "step": 2390 + }, + { + "epoch": 2.9445812807881775, + "grad_norm": 12.649581445218459, + "learning_rate": 1.0862180341263962e-08, + "loss": 0.5568622350692749, + "step": 2391 + }, + { + "epoch": 2.945812807881773, + "grad_norm": 11.4557765341612, + "learning_rate": 1.039520692080409e-08, + "loss": 0.42753443121910095, + "step": 2392 + }, + { + "epoch": 2.947044334975369, + "grad_norm": 12.049826060673517, + "learning_rate": 9.938482576487551e-09, + "loss": 0.33313125371932983, + "step": 2393 + }, + { + "epoch": 2.9482758620689653, + "grad_norm": 11.358169603413613, + "learning_rate": 9.492008246466122e-09, + "loss": 0.4345099925994873, + "step": 2394 + }, + { + "epoch": 2.9495073891625614, + "grad_norm": 15.061185553672066, + "learning_rate": 9.055784847836202e-09, + "loss": 0.6844139695167542, + "step": 2395 + }, + { + "epoch": 2.9507389162561575, + "grad_norm": 12.25434358933355, + "learning_rate": 8.629813276637144e-09, + "loss": 0.4944530725479126, + "step": 2396 + }, + { + "epoch": 2.9519704433497536, + "grad_norm": 7.240836775147592, + "learning_rate": 8.214094407851814e-09, + "loss": 0.1517336368560791, + "step": 2397 + }, + { + "epoch": 2.9532019704433496, + "grad_norm": 11.570980194113849, + "learning_rate": 7.808629095402697e-09, + "loss": 0.24804279208183289, + "step": 2398 + }, + { + "epoch": 2.9544334975369457, + "grad_norm": 15.785024108321435, + "learning_rate": 7.413418172149689e-09, + "loss": 1.2773240804672241, + "step": 2399 + }, + { + "epoch": 2.955665024630542, + "grad_norm": 12.516388230034497, + "learning_rate": 7.028462449889528e-09, + "loss": 0.20905320346355438, + "step": 2400 + }, + { + "epoch": 2.956896551724138, + "grad_norm": 6.362652358430743, + "learning_rate": 6.6537627193558055e-09, + "loss": 0.24830211699008942, + "step": 2401 + }, + { + "epoch": 2.958128078817734, + "grad_norm": 9.391013644944394, + "learning_rate": 6.289319750212852e-09, + "loss": 0.30148234963417053, + "step": 2402 + }, + { + "epoch": 2.95935960591133, + "grad_norm": 11.036169214095409, + "learning_rate": 5.93513429105741e-09, + "loss": 0.7273882031440735, + "step": 2403 + }, + { + "epoch": 2.960591133004926, + "grad_norm": 10.956019864515577, + "learning_rate": 5.591207069417515e-09, + "loss": 0.4958484172821045, + "step": 2404 + }, + { + "epoch": 2.9618226600985222, + "grad_norm": 13.272684139309336, + "learning_rate": 5.257538791749173e-09, + "loss": 0.5852301120758057, + "step": 2405 + }, + { + "epoch": 2.9630541871921183, + "grad_norm": 15.300683310135565, + "learning_rate": 4.934130143435245e-09, + "loss": 0.5483534336090088, + "step": 2406 + }, + { + "epoch": 2.9642857142857144, + "grad_norm": 9.624016617554009, + "learning_rate": 4.6209817887848955e-09, + "loss": 0.49854928255081177, + "step": 2407 + }, + { + "epoch": 2.9655172413793105, + "grad_norm": 8.615173379839112, + "learning_rate": 4.318094371031922e-09, + "loss": 0.9770829677581787, + "step": 2408 + }, + { + "epoch": 2.9667487684729066, + "grad_norm": 15.370084776473758, + "learning_rate": 4.025468512333098e-09, + "loss": 0.4265647530555725, + "step": 2409 + }, + { + "epoch": 2.9679802955665027, + "grad_norm": 12.632393723486729, + "learning_rate": 3.743104813767051e-09, + "loss": 0.6890873908996582, + "step": 2410 + }, + { + "epoch": 2.9692118226600988, + "grad_norm": 8.772985107195037, + "learning_rate": 3.471003855332611e-09, + "loss": 0.28604504466056824, + "step": 2411 + }, + { + "epoch": 2.970443349753695, + "grad_norm": 9.587235477416659, + "learning_rate": 3.2091661959487986e-09, + "loss": 0.3280025124549866, + "step": 2412 + }, + { + "epoch": 2.9716748768472905, + "grad_norm": 9.74052346916064, + "learning_rate": 2.9575923734520562e-09, + "loss": 0.23375985026359558, + "step": 2413 + }, + { + "epoch": 2.9729064039408866, + "grad_norm": 14.377712378651319, + "learning_rate": 2.7162829045979113e-09, + "loss": 0.5062013864517212, + "step": 2414 + }, + { + "epoch": 2.9741379310344827, + "grad_norm": 10.486023439825937, + "learning_rate": 2.4852382850554245e-09, + "loss": 0.46517398953437805, + "step": 2415 + }, + { + "epoch": 2.9753694581280787, + "grad_norm": 7.705201332847603, + "learning_rate": 2.264458989410523e-09, + "loss": 0.43281105160713196, + "step": 2416 + }, + { + "epoch": 2.976600985221675, + "grad_norm": 9.481633319521942, + "learning_rate": 2.0539454711626663e-09, + "loss": 0.6278485655784607, + "step": 2417 + }, + { + "epoch": 2.977832512315271, + "grad_norm": 12.691647261969463, + "learning_rate": 1.8536981627254036e-09, + "loss": 0.3320518136024475, + "step": 2418 + }, + { + "epoch": 2.979064039408867, + "grad_norm": 9.582038617142, + "learning_rate": 1.6637174754230435e-09, + "loss": 0.4568738341331482, + "step": 2419 + }, + { + "epoch": 2.980295566502463, + "grad_norm": 10.563009615677867, + "learning_rate": 1.4840037994923173e-09, + "loss": 0.24025380611419678, + "step": 2420 + }, + { + "epoch": 2.981527093596059, + "grad_norm": 14.650292148384931, + "learning_rate": 1.3145575040801605e-09, + "loss": 0.33217573165893555, + "step": 2421 + }, + { + "epoch": 2.9827586206896552, + "grad_norm": 23.286828169967034, + "learning_rate": 1.1553789372453771e-09, + "loss": 1.5295354127883911, + "step": 2422 + }, + { + "epoch": 2.9839901477832513, + "grad_norm": 16.800662700378666, + "learning_rate": 1.0064684259525337e-09, + "loss": 0.6207250952720642, + "step": 2423 + }, + { + "epoch": 2.9852216748768474, + "grad_norm": 20.655163645870832, + "learning_rate": 8.678262760775102e-10, + "loss": 0.4011062681674957, + "step": 2424 + }, + { + "epoch": 2.9864532019704435, + "grad_norm": 12.812116716093689, + "learning_rate": 7.394527724030598e-10, + "loss": 0.8355351090431213, + "step": 2425 + }, + { + "epoch": 2.987684729064039, + "grad_norm": 13.524667045497342, + "learning_rate": 6.213481786199182e-10, + "loss": 0.6552157998085022, + "step": 2426 + }, + { + "epoch": 2.9889162561576352, + "grad_norm": 9.071239617590464, + "learning_rate": 5.13512737324029e-10, + "loss": 0.4416411519050598, + "step": 2427 + }, + { + "epoch": 2.9901477832512313, + "grad_norm": 12.103653519709662, + "learning_rate": 4.159466700187631e-10, + "loss": 0.3720128834247589, + "step": 2428 + }, + { + "epoch": 2.9913793103448274, + "grad_norm": 7.981239501743612, + "learning_rate": 3.2865017711380955e-10, + "loss": 0.6710848212242126, + "step": 2429 + }, + { + "epoch": 2.9926108374384235, + "grad_norm": 11.769326063023964, + "learning_rate": 2.516234379235094e-10, + "loss": 0.7640970349311829, + "step": 2430 + }, + { + "epoch": 2.9938423645320196, + "grad_norm": 11.664052062324599, + "learning_rate": 1.848666106674113e-10, + "loss": 0.5783921480178833, + "step": 2431 + }, + { + "epoch": 2.9950738916256157, + "grad_norm": 11.283478806003906, + "learning_rate": 1.2837983246916098e-10, + "loss": 0.411626935005188, + "step": 2432 + }, + { + "epoch": 2.9963054187192117, + "grad_norm": 11.703360380276939, + "learning_rate": 8.216321935816673e-11, + "loss": 0.529446005821228, + "step": 2433 + }, + { + "epoch": 2.997536945812808, + "grad_norm": 9.632699414961296, + "learning_rate": 4.6216866266823867e-11, + "loss": 0.44549500942230225, + "step": 2434 + }, + { + "epoch": 2.998768472906404, + "grad_norm": 9.699682514575105, + "learning_rate": 2.0540847032179955e-11, + "loss": 0.2854122519493103, + "step": 2435 + }, + { + "epoch": 3.0, + "grad_norm": 6.925750902905979, + "learning_rate": 5.135214394824672e-12, + "loss": 0.4455873966217041, + "step": 2436 + }, + { + "epoch": 3.0, + "step": 2436, + "total_flos": 6456127242240.0, + "train_loss": 1.6602046456561104, + "train_runtime": 2865.3381, + "train_samples_per_second": 3.4, + "train_steps_per_second": 0.85 + } + ], + "logging_steps": 1, + "max_steps": 2436, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 6456127242240.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..cbe8fd6 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ec34fd256726eff4bd3e090d5a43f5d8678544da2d9d64d7f16e355b1739463 +size 7096 diff --git a/training_loss.png b/training_loss.png new file mode 100644 index 0000000..fe9a27c Binary files /dev/null and b/training_loss.png differ