commit 259b0c554815ff84eac3f5a3096d689cc7f5eab5 Author: ModelHub XC Date: Fri Jun 12 17:14:16 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: shuoxing/llama3-8b-full-pretrain-wash-c4-0-6m-bs4 Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..21d95fa --- /dev/null +++ b/README.md @@ -0,0 +1,60 @@ +--- +library_name: transformers +license: llama3 +base_model: shuoxing/llama3-8b-full-pretrain-junk-tweet-1m-en-reproduce-bs8 +tags: +- llama-factory +- full +- generated_from_trainer +model-index: +- name: llama3-8b-full-pretrain-wash-c4-0-6m-bs4 + results: [] +--- + + + +# llama3-8b-full-pretrain-wash-c4-0-6m-bs4 + +This model is a fine-tuned version of [shuoxing/llama3-8b-full-pretrain-junk-tweet-1m-en-reproduce-bs8](https://huggingface.co/shuoxing/llama3-8b-full-pretrain-junk-tweet-1m-en-reproduce-bs8) on the c4_0_6m dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 1e-05 +- train_batch_size: 1 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- total_train_batch_size: 4 +- total_eval_batch_size: 32 +- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_steps: 0.1 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- Transformers 5.2.0 +- Pytorch 2.6.0+cu124 +- Datasets 4.0.0 +- Tokenizers 0.22.2 diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..3a17bd6 --- /dev/null +++ b/all_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 3.0, + "total_flos": 2578979020800.0, + "train_loss": 1.6905082198137384, + "train_runtime": 1266.0882, + "train_samples_per_second": 2.945, + "train_steps_per_second": 0.737 +} \ No newline at end of file diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..39bd0c9 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,5 @@ +{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|> + +'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|> + +' }}{% endif %} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..a5b8bc3 --- /dev/null +++ b/config.json @@ -0,0 +1,32 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "dtype": "bfloat16", + "eos_token_id": 128009, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pad_token_id": 128009, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_parameters": { + "rope_theta": 500000.0, + "rope_type": "default" + }, + "tie_word_embeddings": false, + "transformers_version": "5.2.0", + "use_cache": false, + "vocab_size": 128256 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..eb23973 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,13 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": [ + 128001, + 128009 + ], + "max_length": 4096, + "pad_token_id": 128009, + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "5.2.0" +} diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..9c2e0e5 --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5557a6e8a94a28a6971f3bd37a86d631d434c536e3a470029112fbdbdba2912 +size 16060556616 diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..86a3394 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..75e0e01 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,16 @@ +{ + "backend": "tokenizers", + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "is_local": false, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|eot_id|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "TokenizersBackend" +} diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..3a17bd6 --- /dev/null +++ b/train_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 3.0, + "total_flos": 2578979020800.0, + "train_loss": 1.6905082198137384, + "train_runtime": 1266.0882, + "train_samples_per_second": 2.945, + "train_steps_per_second": 0.737 +} \ No newline at end of file diff --git a/trainer_log.jsonl b/trainer_log.jsonl new file mode 100644 index 0000000..59fb710 --- /dev/null +++ b/trainer_log.jsonl @@ -0,0 +1,934 @@ +{"current_steps": 1, "total_steps": 933, "loss": 2.6452956199645996, "lr": 0.0, "epoch": 0.003215434083601286, "percentage": 0.11, "elapsed_time": "0:00:03", "remaining_time": "0:56:09"} +{"current_steps": 2, "total_steps": 933, "loss": 4.55789852142334, "lr": 1.0638297872340426e-07, "epoch": 0.006430868167202572, "percentage": 0.21, "elapsed_time": "0:00:05", "remaining_time": "0:39:04"} +{"current_steps": 3, "total_steps": 933, "loss": 4.399077415466309, "lr": 2.1276595744680852e-07, "epoch": 0.00964630225080386, "percentage": 0.32, "elapsed_time": "0:00:06", "remaining_time": "0:31:33"} +{"current_steps": 4, "total_steps": 933, "loss": 3.8412859439849854, "lr": 3.1914893617021275e-07, "epoch": 0.012861736334405145, "percentage": 0.43, "elapsed_time": "0:00:07", "remaining_time": "0:27:49"} +{"current_steps": 5, "total_steps": 933, "loss": 4.593955993652344, "lr": 4.2553191489361704e-07, "epoch": 0.01607717041800643, "percentage": 0.54, "elapsed_time": "0:00:08", "remaining_time": "0:25:38"} +{"current_steps": 6, "total_steps": 933, "loss": 4.042428016662598, "lr": 5.319148936170213e-07, "epoch": 0.01929260450160772, "percentage": 0.64, "elapsed_time": "0:00:09", "remaining_time": "0:24:06"} +{"current_steps": 7, "total_steps": 933, "loss": 4.12535285949707, "lr": 6.382978723404255e-07, "epoch": 0.022508038585209004, "percentage": 0.75, "elapsed_time": "0:00:10", "remaining_time": "0:23:00"} +{"current_steps": 8, "total_steps": 933, "loss": 3.521498680114746, "lr": 7.446808510638298e-07, "epoch": 0.02572347266881029, "percentage": 0.86, "elapsed_time": "0:00:11", "remaining_time": "0:22:13"} +{"current_steps": 9, "total_steps": 933, "loss": 3.6315274238586426, "lr": 8.510638297872341e-07, "epoch": 0.028938906752411574, "percentage": 0.96, "elapsed_time": "0:00:12", "remaining_time": "0:21:37"} +{"current_steps": 10, "total_steps": 933, "loss": 4.302469730377197, "lr": 9.574468085106384e-07, "epoch": 0.03215434083601286, "percentage": 1.07, "elapsed_time": "0:00:13", "remaining_time": "0:21:05"} +{"current_steps": 11, "total_steps": 933, "loss": 4.56865119934082, "lr": 1.0638297872340427e-06, "epoch": 0.03536977491961415, "percentage": 1.18, "elapsed_time": "0:00:14", "remaining_time": "0:20:40"} +{"current_steps": 12, "total_steps": 933, "loss": 4.283186912536621, "lr": 1.170212765957447e-06, "epoch": 0.03858520900321544, "percentage": 1.29, "elapsed_time": "0:00:15", "remaining_time": "0:20:19"} +{"current_steps": 13, "total_steps": 933, "loss": 4.2185187339782715, "lr": 1.276595744680851e-06, "epoch": 0.04180064308681672, "percentage": 1.39, "elapsed_time": "0:00:16", "remaining_time": "0:19:59"} +{"current_steps": 14, "total_steps": 933, "loss": 3.8783042430877686, "lr": 1.3829787234042555e-06, "epoch": 0.04501607717041801, "percentage": 1.5, "elapsed_time": "0:00:18", "remaining_time": "0:19:43"} +{"current_steps": 15, "total_steps": 933, "loss": 4.050146579742432, "lr": 1.4893617021276596e-06, "epoch": 0.04823151125401929, "percentage": 1.61, "elapsed_time": "0:00:19", "remaining_time": "0:19:29"} +{"current_steps": 16, "total_steps": 933, "loss": 3.825010299682617, "lr": 1.595744680851064e-06, "epoch": 0.05144694533762058, "percentage": 1.71, "elapsed_time": "0:00:20", "remaining_time": "0:19:17"} +{"current_steps": 17, "total_steps": 933, "loss": 3.562378406524658, "lr": 1.7021276595744682e-06, "epoch": 0.05466237942122187, "percentage": 1.82, "elapsed_time": "0:00:21", "remaining_time": "0:19:05"} +{"current_steps": 18, "total_steps": 933, "loss": 3.8451032638549805, "lr": 1.8085106382978727e-06, "epoch": 0.05787781350482315, "percentage": 1.93, "elapsed_time": "0:00:22", "remaining_time": "0:18:55"} +{"current_steps": 19, "total_steps": 933, "loss": 3.538512945175171, "lr": 1.9148936170212767e-06, "epoch": 0.06109324758842444, "percentage": 2.04, "elapsed_time": "0:00:23", "remaining_time": "0:18:46"} +{"current_steps": 20, "total_steps": 933, "loss": 3.4035849571228027, "lr": 2.021276595744681e-06, "epoch": 0.06430868167202572, "percentage": 2.14, "elapsed_time": "0:00:24", "remaining_time": "0:18:38"} +{"current_steps": 21, "total_steps": 933, "loss": 4.060901165008545, "lr": 2.1276595744680853e-06, "epoch": 0.06752411575562701, "percentage": 2.25, "elapsed_time": "0:00:25", "remaining_time": "0:18:31"} +{"current_steps": 22, "total_steps": 933, "loss": 3.4492287635803223, "lr": 2.2340425531914894e-06, "epoch": 0.0707395498392283, "percentage": 2.36, "elapsed_time": "0:00:26", "remaining_time": "0:18:24"} +{"current_steps": 23, "total_steps": 933, "loss": 3.4821548461914062, "lr": 2.340425531914894e-06, "epoch": 0.07395498392282958, "percentage": 2.47, "elapsed_time": "0:00:27", "remaining_time": "0:18:18"} +{"current_steps": 24, "total_steps": 933, "loss": 2.6914403438568115, "lr": 2.446808510638298e-06, "epoch": 0.07717041800643087, "percentage": 2.57, "elapsed_time": "0:00:28", "remaining_time": "0:18:13"} +{"current_steps": 25, "total_steps": 933, "loss": 3.381208896636963, "lr": 2.553191489361702e-06, "epoch": 0.08038585209003216, "percentage": 2.68, "elapsed_time": "0:00:29", "remaining_time": "0:18:08"} +{"current_steps": 26, "total_steps": 933, "loss": 3.723344564437866, "lr": 2.6595744680851065e-06, "epoch": 0.08360128617363344, "percentage": 2.79, "elapsed_time": "0:00:31", "remaining_time": "0:18:03"} +{"current_steps": 27, "total_steps": 933, "loss": 3.4975712299346924, "lr": 2.765957446808511e-06, "epoch": 0.08681672025723473, "percentage": 2.89, "elapsed_time": "0:00:32", "remaining_time": "0:17:58"} +{"current_steps": 28, "total_steps": 933, "loss": 3.742828369140625, "lr": 2.8723404255319155e-06, "epoch": 0.09003215434083602, "percentage": 3.0, "elapsed_time": "0:00:33", "remaining_time": "0:17:54"} +{"current_steps": 29, "total_steps": 933, "loss": 3.7880096435546875, "lr": 2.978723404255319e-06, "epoch": 0.0932475884244373, "percentage": 3.11, "elapsed_time": "0:00:34", "remaining_time": "0:17:49"} +{"current_steps": 30, "total_steps": 933, "loss": 3.091320037841797, "lr": 3.0851063829787237e-06, "epoch": 0.09646302250803858, "percentage": 3.22, "elapsed_time": "0:00:35", "remaining_time": "0:17:45"} +{"current_steps": 31, "total_steps": 933, "loss": 3.397942543029785, "lr": 3.191489361702128e-06, "epoch": 0.09967845659163987, "percentage": 3.32, "elapsed_time": "0:00:36", "remaining_time": "0:17:41"} +{"current_steps": 32, "total_steps": 933, "loss": 3.3407297134399414, "lr": 3.297872340425532e-06, "epoch": 0.10289389067524116, "percentage": 3.43, "elapsed_time": "0:00:37", "remaining_time": "0:17:37"} +{"current_steps": 33, "total_steps": 933, "loss": 3.5374701023101807, "lr": 3.4042553191489363e-06, "epoch": 0.10610932475884244, "percentage": 3.54, "elapsed_time": "0:00:38", "remaining_time": "0:17:33"} +{"current_steps": 34, "total_steps": 933, "loss": 3.028085708618164, "lr": 3.510638297872341e-06, "epoch": 0.10932475884244373, "percentage": 3.64, "elapsed_time": "0:00:39", "remaining_time": "0:17:29"} +{"current_steps": 35, "total_steps": 933, "loss": 3.191539764404297, "lr": 3.6170212765957453e-06, "epoch": 0.11254019292604502, "percentage": 3.75, "elapsed_time": "0:00:40", "remaining_time": "0:17:26"} +{"current_steps": 36, "total_steps": 933, "loss": 3.315582275390625, "lr": 3.723404255319149e-06, "epoch": 0.1157556270096463, "percentage": 3.86, "elapsed_time": "0:00:41", "remaining_time": "0:17:23"} +{"current_steps": 37, "total_steps": 933, "loss": 3.1429853439331055, "lr": 3.8297872340425535e-06, "epoch": 0.1189710610932476, "percentage": 3.97, "elapsed_time": "0:00:42", "remaining_time": "0:17:19"} +{"current_steps": 38, "total_steps": 933, "loss": 3.2580060958862305, "lr": 3.936170212765958e-06, "epoch": 0.12218649517684887, "percentage": 4.07, "elapsed_time": "0:00:44", "remaining_time": "0:17:16"} +{"current_steps": 39, "total_steps": 933, "loss": 2.8858089447021484, "lr": 4.042553191489362e-06, "epoch": 0.12540192926045016, "percentage": 4.18, "elapsed_time": "0:00:45", "remaining_time": "0:17:13"} +{"current_steps": 40, "total_steps": 933, "loss": 3.385632038116455, "lr": 4.148936170212766e-06, "epoch": 0.12861736334405144, "percentage": 4.29, "elapsed_time": "0:00:46", "remaining_time": "0:17:10"} +{"current_steps": 41, "total_steps": 933, "loss": 2.8703691959381104, "lr": 4.255319148936171e-06, "epoch": 0.13183279742765272, "percentage": 4.39, "elapsed_time": "0:00:47", "remaining_time": "0:17:08"} +{"current_steps": 42, "total_steps": 933, "loss": 3.3186304569244385, "lr": 4.361702127659575e-06, "epoch": 0.13504823151125403, "percentage": 4.5, "elapsed_time": "0:00:48", "remaining_time": "0:17:05"} +{"current_steps": 43, "total_steps": 933, "loss": 3.2565927505493164, "lr": 4.468085106382979e-06, "epoch": 0.1382636655948553, "percentage": 4.61, "elapsed_time": "0:00:49", "remaining_time": "0:17:02"} +{"current_steps": 44, "total_steps": 933, "loss": 3.012521982192993, "lr": 4.574468085106383e-06, "epoch": 0.1414790996784566, "percentage": 4.72, "elapsed_time": "0:00:50", "remaining_time": "0:17:00"} +{"current_steps": 45, "total_steps": 933, "loss": 3.093921184539795, "lr": 4.680851063829788e-06, "epoch": 0.14469453376205788, "percentage": 4.82, "elapsed_time": "0:00:51", "remaining_time": "0:16:57"} +{"current_steps": 46, "total_steps": 933, "loss": 3.1773462295532227, "lr": 4.787234042553192e-06, "epoch": 0.14790996784565916, "percentage": 4.93, "elapsed_time": "0:00:52", "remaining_time": "0:16:55"} +{"current_steps": 47, "total_steps": 933, "loss": 2.971510648727417, "lr": 4.893617021276596e-06, "epoch": 0.15112540192926044, "percentage": 5.04, "elapsed_time": "0:00:53", "remaining_time": "0:16:52"} +{"current_steps": 48, "total_steps": 933, "loss": 3.365861415863037, "lr": 5e-06, "epoch": 0.15434083601286175, "percentage": 5.14, "elapsed_time": "0:00:54", "remaining_time": "0:16:50"} +{"current_steps": 49, "total_steps": 933, "loss": 3.331052780151367, "lr": 5.106382978723404e-06, "epoch": 0.15755627009646303, "percentage": 5.25, "elapsed_time": "0:00:55", "remaining_time": "0:16:48"} +{"current_steps": 50, "total_steps": 933, "loss": 3.513488292694092, "lr": 5.212765957446809e-06, "epoch": 0.1607717041800643, "percentage": 5.36, "elapsed_time": "0:00:56", "remaining_time": "0:16:45"} +{"current_steps": 51, "total_steps": 933, "loss": 3.165461540222168, "lr": 5.319148936170213e-06, "epoch": 0.1639871382636656, "percentage": 5.47, "elapsed_time": "0:00:58", "remaining_time": "0:16:43"} +{"current_steps": 52, "total_steps": 933, "loss": 3.308755397796631, "lr": 5.425531914893617e-06, "epoch": 0.16720257234726688, "percentage": 5.57, "elapsed_time": "0:00:59", "remaining_time": "0:16:41"} +{"current_steps": 53, "total_steps": 933, "loss": 2.905186653137207, "lr": 5.531914893617022e-06, "epoch": 0.17041800643086816, "percentage": 5.68, "elapsed_time": "0:01:00", "remaining_time": "0:16:39"} +{"current_steps": 54, "total_steps": 933, "loss": 3.2160592079162598, "lr": 5.638297872340426e-06, "epoch": 0.17363344051446947, "percentage": 5.79, "elapsed_time": "0:01:01", "remaining_time": "0:16:37"} +{"current_steps": 55, "total_steps": 933, "loss": 2.809837818145752, "lr": 5.744680851063831e-06, "epoch": 0.17684887459807075, "percentage": 5.89, "elapsed_time": "0:01:02", "remaining_time": "0:16:35"} +{"current_steps": 56, "total_steps": 933, "loss": 3.355532646179199, "lr": 5.851063829787235e-06, "epoch": 0.18006430868167203, "percentage": 6.0, "elapsed_time": "0:01:03", "remaining_time": "0:16:33"} +{"current_steps": 57, "total_steps": 933, "loss": 2.912825584411621, "lr": 5.957446808510638e-06, "epoch": 0.1832797427652733, "percentage": 6.11, "elapsed_time": "0:01:04", "remaining_time": "0:16:32"} +{"current_steps": 58, "total_steps": 933, "loss": 2.738528251647949, "lr": 6.063829787234044e-06, "epoch": 0.1864951768488746, "percentage": 6.22, "elapsed_time": "0:01:05", "remaining_time": "0:16:30"} +{"current_steps": 59, "total_steps": 933, "loss": 3.5236358642578125, "lr": 6.170212765957447e-06, "epoch": 0.18971061093247588, "percentage": 6.32, "elapsed_time": "0:01:06", "remaining_time": "0:16:28"} +{"current_steps": 60, "total_steps": 933, "loss": 2.438237190246582, "lr": 6.276595744680851e-06, "epoch": 0.19292604501607716, "percentage": 6.43, "elapsed_time": "0:01:07", "remaining_time": "0:16:26"} +{"current_steps": 61, "total_steps": 933, "loss": 3.002664804458618, "lr": 6.382978723404256e-06, "epoch": 0.19614147909967847, "percentage": 6.54, "elapsed_time": "0:01:08", "remaining_time": "0:16:24"} +{"current_steps": 62, "total_steps": 933, "loss": 2.6102824211120605, "lr": 6.48936170212766e-06, "epoch": 0.19935691318327975, "percentage": 6.65, "elapsed_time": "0:01:09", "remaining_time": "0:16:22"} +{"current_steps": 63, "total_steps": 933, "loss": 2.8119864463806152, "lr": 6.595744680851064e-06, "epoch": 0.20257234726688103, "percentage": 6.75, "elapsed_time": "0:01:11", "remaining_time": "0:16:20"} +{"current_steps": 64, "total_steps": 933, "loss": 3.0033774375915527, "lr": 6.702127659574469e-06, "epoch": 0.2057877813504823, "percentage": 6.86, "elapsed_time": "0:01:12", "remaining_time": "0:16:18"} +{"current_steps": 65, "total_steps": 933, "loss": 3.0424952507019043, "lr": 6.808510638297873e-06, "epoch": 0.2090032154340836, "percentage": 6.97, "elapsed_time": "0:01:13", "remaining_time": "0:16:17"} +{"current_steps": 66, "total_steps": 933, "loss": 3.125572443008423, "lr": 6.914893617021278e-06, "epoch": 0.21221864951768488, "percentage": 7.07, "elapsed_time": "0:01:14", "remaining_time": "0:16:15"} +{"current_steps": 67, "total_steps": 933, "loss": 2.6417791843414307, "lr": 7.021276595744682e-06, "epoch": 0.21543408360128619, "percentage": 7.18, "elapsed_time": "0:01:15", "remaining_time": "0:16:13"} +{"current_steps": 68, "total_steps": 933, "loss": 2.795292615890503, "lr": 7.127659574468085e-06, "epoch": 0.21864951768488747, "percentage": 7.29, "elapsed_time": "0:01:16", "remaining_time": "0:16:11"} +{"current_steps": 69, "total_steps": 933, "loss": 2.806095600128174, "lr": 7.234042553191491e-06, "epoch": 0.22186495176848875, "percentage": 7.4, "elapsed_time": "0:01:17", "remaining_time": "0:16:10"} +{"current_steps": 70, "total_steps": 933, "loss": 3.1251420974731445, "lr": 7.340425531914894e-06, "epoch": 0.22508038585209003, "percentage": 7.5, "elapsed_time": "0:01:18", "remaining_time": "0:16:08"} +{"current_steps": 71, "total_steps": 933, "loss": 3.1366963386535645, "lr": 7.446808510638298e-06, "epoch": 0.2282958199356913, "percentage": 7.61, "elapsed_time": "0:01:19", "remaining_time": "0:16:07"} +{"current_steps": 72, "total_steps": 933, "loss": 2.7942872047424316, "lr": 7.553191489361703e-06, "epoch": 0.2315112540192926, "percentage": 7.72, "elapsed_time": "0:01:20", "remaining_time": "0:16:05"} +{"current_steps": 73, "total_steps": 933, "loss": 2.7176175117492676, "lr": 7.659574468085107e-06, "epoch": 0.2347266881028939, "percentage": 7.82, "elapsed_time": "0:01:21", "remaining_time": "0:16:03"} +{"current_steps": 74, "total_steps": 933, "loss": 3.2510626316070557, "lr": 7.765957446808511e-06, "epoch": 0.2379421221864952, "percentage": 7.93, "elapsed_time": "0:01:22", "remaining_time": "0:16:02"} +{"current_steps": 75, "total_steps": 933, "loss": 3.5180716514587402, "lr": 7.872340425531916e-06, "epoch": 0.24115755627009647, "percentage": 8.04, "elapsed_time": "0:01:23", "remaining_time": "0:16:00"} +{"current_steps": 76, "total_steps": 933, "loss": 2.6583242416381836, "lr": 7.97872340425532e-06, "epoch": 0.24437299035369775, "percentage": 8.15, "elapsed_time": "0:01:25", "remaining_time": "0:15:59"} +{"current_steps": 77, "total_steps": 933, "loss": 3.522376537322998, "lr": 8.085106382978723e-06, "epoch": 0.24758842443729903, "percentage": 8.25, "elapsed_time": "0:01:26", "remaining_time": "0:15:57"} +{"current_steps": 78, "total_steps": 933, "loss": 2.666438102722168, "lr": 8.191489361702128e-06, "epoch": 0.2508038585209003, "percentage": 8.36, "elapsed_time": "0:01:27", "remaining_time": "0:15:55"} +{"current_steps": 79, "total_steps": 933, "loss": 3.0438873767852783, "lr": 8.297872340425532e-06, "epoch": 0.2540192926045016, "percentage": 8.47, "elapsed_time": "0:01:28", "remaining_time": "0:15:54"} +{"current_steps": 80, "total_steps": 933, "loss": 2.7416107654571533, "lr": 8.404255319148937e-06, "epoch": 0.2572347266881029, "percentage": 8.57, "elapsed_time": "0:01:29", "remaining_time": "0:15:53"} +{"current_steps": 81, "total_steps": 933, "loss": 3.040888547897339, "lr": 8.510638297872341e-06, "epoch": 0.2604501607717042, "percentage": 8.68, "elapsed_time": "0:01:30", "remaining_time": "0:15:51"} +{"current_steps": 82, "total_steps": 933, "loss": 2.8178224563598633, "lr": 8.617021276595746e-06, "epoch": 0.26366559485530544, "percentage": 8.79, "elapsed_time": "0:01:31", "remaining_time": "0:15:50"} +{"current_steps": 83, "total_steps": 933, "loss": 3.13818097114563, "lr": 8.72340425531915e-06, "epoch": 0.26688102893890675, "percentage": 8.9, "elapsed_time": "0:01:32", "remaining_time": "0:15:48"} +{"current_steps": 84, "total_steps": 933, "loss": 3.0007710456848145, "lr": 8.829787234042555e-06, "epoch": 0.27009646302250806, "percentage": 9.0, "elapsed_time": "0:01:33", "remaining_time": "0:15:47"} +{"current_steps": 85, "total_steps": 933, "loss": 2.1240034103393555, "lr": 8.936170212765958e-06, "epoch": 0.2733118971061093, "percentage": 9.11, "elapsed_time": "0:01:34", "remaining_time": "0:15:45"} +{"current_steps": 86, "total_steps": 933, "loss": 3.0258898735046387, "lr": 9.042553191489362e-06, "epoch": 0.2765273311897106, "percentage": 9.22, "elapsed_time": "0:01:35", "remaining_time": "0:15:44"} +{"current_steps": 87, "total_steps": 933, "loss": 3.815779685974121, "lr": 9.148936170212767e-06, "epoch": 0.2797427652733119, "percentage": 9.32, "elapsed_time": "0:01:36", "remaining_time": "0:15:42"} +{"current_steps": 88, "total_steps": 933, "loss": 3.146327018737793, "lr": 9.255319148936171e-06, "epoch": 0.2829581993569132, "percentage": 9.43, "elapsed_time": "0:01:38", "remaining_time": "0:15:41"} +{"current_steps": 89, "total_steps": 933, "loss": 3.730978488922119, "lr": 9.361702127659576e-06, "epoch": 0.2861736334405145, "percentage": 9.54, "elapsed_time": "0:01:39", "remaining_time": "0:15:39"} +{"current_steps": 90, "total_steps": 933, "loss": 2.837461471557617, "lr": 9.46808510638298e-06, "epoch": 0.28938906752411575, "percentage": 9.65, "elapsed_time": "0:01:40", "remaining_time": "0:15:38"} +{"current_steps": 91, "total_steps": 933, "loss": 2.8579788208007812, "lr": 9.574468085106385e-06, "epoch": 0.29260450160771706, "percentage": 9.75, "elapsed_time": "0:01:41", "remaining_time": "0:15:36"} +{"current_steps": 92, "total_steps": 933, "loss": 3.059731960296631, "lr": 9.680851063829787e-06, "epoch": 0.2958199356913183, "percentage": 9.86, "elapsed_time": "0:01:42", "remaining_time": "0:15:35"} +{"current_steps": 93, "total_steps": 933, "loss": 2.419557809829712, "lr": 9.787234042553192e-06, "epoch": 0.2990353697749196, "percentage": 9.97, "elapsed_time": "0:01:43", "remaining_time": "0:15:34"} +{"current_steps": 94, "total_steps": 933, "loss": 2.8333683013916016, "lr": 9.893617021276596e-06, "epoch": 0.3022508038585209, "percentage": 10.08, "elapsed_time": "0:01:44", "remaining_time": "0:15:32"} +{"current_steps": 95, "total_steps": 933, "loss": 2.74963641166687, "lr": 1e-05, "epoch": 0.3054662379421222, "percentage": 10.18, "elapsed_time": "0:01:45", "remaining_time": "0:15:31"} +{"current_steps": 96, "total_steps": 933, "loss": 2.873091697692871, "lr": 9.999964947796453e-06, "epoch": 0.3086816720257235, "percentage": 10.29, "elapsed_time": "0:01:46", "remaining_time": "0:15:29"} +{"current_steps": 97, "total_steps": 933, "loss": 2.7511343955993652, "lr": 9.999859791677274e-06, "epoch": 0.31189710610932475, "percentage": 10.4, "elapsed_time": "0:01:47", "remaining_time": "0:15:28"} +{"current_steps": 98, "total_steps": 933, "loss": 2.71083927154541, "lr": 9.999684533116843e-06, "epoch": 0.31511254019292606, "percentage": 10.5, "elapsed_time": "0:01:48", "remaining_time": "0:15:27"} +{"current_steps": 99, "total_steps": 933, "loss": 3.5184683799743652, "lr": 9.999439174572441e-06, "epoch": 0.3183279742765273, "percentage": 10.61, "elapsed_time": "0:01:49", "remaining_time": "0:15:25"} +{"current_steps": 100, "total_steps": 933, "loss": 3.0679643154144287, "lr": 9.999123719484209e-06, "epoch": 0.3215434083601286, "percentage": 10.72, "elapsed_time": "0:01:50", "remaining_time": "0:15:24"} +{"current_steps": 101, "total_steps": 933, "loss": 3.4208366870880127, "lr": 9.99873817227511e-06, "epoch": 0.3247588424437299, "percentage": 10.83, "elapsed_time": "0:01:52", "remaining_time": "0:15:23"} +{"current_steps": 102, "total_steps": 933, "loss": 2.5970406532287598, "lr": 9.998282538350849e-06, "epoch": 0.3279742765273312, "percentage": 10.93, "elapsed_time": "0:01:53", "remaining_time": "0:15:21"} +{"current_steps": 103, "total_steps": 933, "loss": 2.414546012878418, "lr": 9.997756824099822e-06, "epoch": 0.3311897106109325, "percentage": 11.04, "elapsed_time": "0:01:54", "remaining_time": "0:15:20"} +{"current_steps": 104, "total_steps": 933, "loss": 2.7526440620422363, "lr": 9.997161036893001e-06, "epoch": 0.33440514469453375, "percentage": 11.15, "elapsed_time": "0:01:55", "remaining_time": "0:15:19"} +{"current_steps": 105, "total_steps": 933, "loss": 3.5313873291015625, "lr": 9.996495185083853e-06, "epoch": 0.33762057877813506, "percentage": 11.25, "elapsed_time": "0:01:56", "remaining_time": "0:15:17"} +{"current_steps": 106, "total_steps": 933, "loss": 3.215785026550293, "lr": 9.995759278008202e-06, "epoch": 0.3408360128617363, "percentage": 11.36, "elapsed_time": "0:01:57", "remaining_time": "0:15:16"} +{"current_steps": 107, "total_steps": 933, "loss": 2.8342652320861816, "lr": 9.994953325984116e-06, "epoch": 0.3440514469453376, "percentage": 11.47, "elapsed_time": "0:01:58", "remaining_time": "0:15:15"} +{"current_steps": 108, "total_steps": 933, "loss": 3.187843084335327, "lr": 9.994077340311751e-06, "epoch": 0.34726688102893893, "percentage": 11.58, "elapsed_time": "0:01:59", "remaining_time": "0:15:13"} +{"current_steps": 109, "total_steps": 933, "loss": 3.329102039337158, "lr": 9.993131333273203e-06, "epoch": 0.3504823151125402, "percentage": 11.68, "elapsed_time": "0:02:00", "remaining_time": "0:15:12"} +{"current_steps": 110, "total_steps": 933, "loss": 2.963022232055664, "lr": 9.99211531813232e-06, "epoch": 0.3536977491961415, "percentage": 11.79, "elapsed_time": "0:02:01", "remaining_time": "0:15:10"} +{"current_steps": 111, "total_steps": 933, "loss": 3.1603951454162598, "lr": 9.991029309134533e-06, "epoch": 0.35691318327974275, "percentage": 11.9, "elapsed_time": "0:02:02", "remaining_time": "0:15:09"} +{"current_steps": 112, "total_steps": 933, "loss": 3.063810110092163, "lr": 9.989873321506643e-06, "epoch": 0.36012861736334406, "percentage": 12.0, "elapsed_time": "0:02:03", "remaining_time": "0:15:08"} +{"current_steps": 113, "total_steps": 933, "loss": 3.0116543769836426, "lr": 9.988647371456614e-06, "epoch": 0.3633440514469453, "percentage": 12.11, "elapsed_time": "0:02:04", "remaining_time": "0:15:06"} +{"current_steps": 114, "total_steps": 933, "loss": 3.010406255722046, "lr": 9.987351476173352e-06, "epoch": 0.3665594855305466, "percentage": 12.22, "elapsed_time": "0:02:06", "remaining_time": "0:15:05"} +{"current_steps": 115, "total_steps": 933, "loss": 1.9504810571670532, "lr": 9.985985653826444e-06, "epoch": 0.36977491961414793, "percentage": 12.33, "elapsed_time": "0:02:07", "remaining_time": "0:15:04"} +{"current_steps": 116, "total_steps": 933, "loss": 2.93680739402771, "lr": 9.98454992356593e-06, "epoch": 0.3729903536977492, "percentage": 12.43, "elapsed_time": "0:02:08", "remaining_time": "0:15:03"} +{"current_steps": 117, "total_steps": 933, "loss": 2.3677353858947754, "lr": 9.983044305522007e-06, "epoch": 0.3762057877813505, "percentage": 12.54, "elapsed_time": "0:02:09", "remaining_time": "0:15:01"} +{"current_steps": 118, "total_steps": 933, "loss": 2.847960948944092, "lr": 9.981468820804774e-06, "epoch": 0.37942122186495175, "percentage": 12.65, "elapsed_time": "0:02:10", "remaining_time": "0:15:00"} +{"current_steps": 119, "total_steps": 933, "loss": 3.0923283100128174, "lr": 9.979823491503909e-06, "epoch": 0.38263665594855306, "percentage": 12.75, "elapsed_time": "0:02:11", "remaining_time": "0:14:59"} +{"current_steps": 120, "total_steps": 933, "loss": 3.020812511444092, "lr": 9.978108340688383e-06, "epoch": 0.3858520900321543, "percentage": 12.86, "elapsed_time": "0:02:12", "remaining_time": "0:14:57"} +{"current_steps": 121, "total_steps": 933, "loss": 3.3838634490966797, "lr": 9.976323392406122e-06, "epoch": 0.3890675241157556, "percentage": 12.97, "elapsed_time": "0:02:13", "remaining_time": "0:14:56"} +{"current_steps": 122, "total_steps": 933, "loss": 3.5906333923339844, "lr": 9.974468671683673e-06, "epoch": 0.39228295819935693, "percentage": 13.08, "elapsed_time": "0:02:14", "remaining_time": "0:14:55"} +{"current_steps": 123, "total_steps": 933, "loss": 3.000471591949463, "lr": 9.972544204525853e-06, "epoch": 0.3954983922829582, "percentage": 13.18, "elapsed_time": "0:02:15", "remaining_time": "0:14:53"} +{"current_steps": 124, "total_steps": 933, "loss": 2.9725592136383057, "lr": 9.970550017915393e-06, "epoch": 0.3987138263665595, "percentage": 13.29, "elapsed_time": "0:02:16", "remaining_time": "0:14:52"} +{"current_steps": 125, "total_steps": 933, "loss": 3.160482406616211, "lr": 9.968486139812544e-06, "epoch": 0.40192926045016075, "percentage": 13.4, "elapsed_time": "0:02:17", "remaining_time": "0:14:51"} +{"current_steps": 126, "total_steps": 933, "loss": 3.0642364025115967, "lr": 9.966352599154697e-06, "epoch": 0.40514469453376206, "percentage": 13.5, "elapsed_time": "0:02:18", "remaining_time": "0:14:50"} +{"current_steps": 127, "total_steps": 933, "loss": 3.73250675201416, "lr": 9.964149425855971e-06, "epoch": 0.40836012861736337, "percentage": 13.61, "elapsed_time": "0:02:20", "remaining_time": "0:14:48"} +{"current_steps": 128, "total_steps": 933, "loss": 2.632124900817871, "lr": 9.961876650806799e-06, "epoch": 0.4115755627009646, "percentage": 13.72, "elapsed_time": "0:02:21", "remaining_time": "0:14:47"} +{"current_steps": 129, "total_steps": 933, "loss": 2.5810580253601074, "lr": 9.95953430587349e-06, "epoch": 0.41479099678456594, "percentage": 13.83, "elapsed_time": "0:02:22", "remaining_time": "0:14:46"} +{"current_steps": 130, "total_steps": 933, "loss": 3.170461654663086, "lr": 9.957122423897786e-06, "epoch": 0.4180064308681672, "percentage": 13.93, "elapsed_time": "0:02:23", "remaining_time": "0:14:45"} +{"current_steps": 131, "total_steps": 933, "loss": 3.044951915740967, "lr": 9.954641038696395e-06, "epoch": 0.4212218649517685, "percentage": 14.04, "elapsed_time": "0:02:24", "remaining_time": "0:14:43"} +{"current_steps": 132, "total_steps": 933, "loss": 2.9257850646972656, "lr": 9.952090185060528e-06, "epoch": 0.42443729903536975, "percentage": 14.15, "elapsed_time": "0:02:25", "remaining_time": "0:14:42"} +{"current_steps": 133, "total_steps": 933, "loss": 2.943833827972412, "lr": 9.9494698987554e-06, "epoch": 0.42765273311897106, "percentage": 14.26, "elapsed_time": "0:02:26", "remaining_time": "0:14:41"} +{"current_steps": 134, "total_steps": 933, "loss": 3.022878646850586, "lr": 9.946780216519734e-06, "epoch": 0.43086816720257237, "percentage": 14.36, "elapsed_time": "0:02:27", "remaining_time": "0:14:40"} +{"current_steps": 135, "total_steps": 933, "loss": 2.792724847793579, "lr": 9.944021176065247e-06, "epoch": 0.4340836012861736, "percentage": 14.47, "elapsed_time": "0:02:28", "remaining_time": "0:14:39"} +{"current_steps": 136, "total_steps": 933, "loss": 3.35680890083313, "lr": 9.941192816076114e-06, "epoch": 0.43729903536977494, "percentage": 14.58, "elapsed_time": "0:02:29", "remaining_time": "0:14:37"} +{"current_steps": 137, "total_steps": 933, "loss": 2.4593820571899414, "lr": 9.938295176208441e-06, "epoch": 0.4405144694533762, "percentage": 14.68, "elapsed_time": "0:02:30", "remaining_time": "0:14:36"} +{"current_steps": 138, "total_steps": 933, "loss": 2.6255781650543213, "lr": 9.93532829708969e-06, "epoch": 0.4437299035369775, "percentage": 14.79, "elapsed_time": "0:02:31", "remaining_time": "0:14:35"} +{"current_steps": 139, "total_steps": 933, "loss": 2.6132946014404297, "lr": 9.932292220318121e-06, "epoch": 0.44694533762057875, "percentage": 14.9, "elapsed_time": "0:02:33", "remaining_time": "0:14:34"} +{"current_steps": 140, "total_steps": 933, "loss": 2.9322824478149414, "lr": 9.929186988462208e-06, "epoch": 0.45016077170418006, "percentage": 15.01, "elapsed_time": "0:02:34", "remaining_time": "0:14:33"} +{"current_steps": 141, "total_steps": 933, "loss": 2.601005792617798, "lr": 9.926012645060037e-06, "epoch": 0.4533762057877814, "percentage": 15.11, "elapsed_time": "0:02:35", "remaining_time": "0:14:31"} +{"current_steps": 142, "total_steps": 933, "loss": 2.5554819107055664, "lr": 9.9227692346187e-06, "epoch": 0.4565916398713826, "percentage": 15.22, "elapsed_time": "0:02:36", "remaining_time": "0:14:30"} +{"current_steps": 143, "total_steps": 933, "loss": 2.5176918506622314, "lr": 9.919456802613672e-06, "epoch": 0.45980707395498394, "percentage": 15.33, "elapsed_time": "0:02:37", "remaining_time": "0:14:29"} +{"current_steps": 144, "total_steps": 933, "loss": 2.341370105743408, "lr": 9.916075395488167e-06, "epoch": 0.4630225080385852, "percentage": 15.43, "elapsed_time": "0:02:38", "remaining_time": "0:14:28"} +{"current_steps": 145, "total_steps": 933, "loss": 3.2781105041503906, "lr": 9.912625060652496e-06, "epoch": 0.4662379421221865, "percentage": 15.54, "elapsed_time": "0:02:39", "remaining_time": "0:14:26"} +{"current_steps": 146, "total_steps": 933, "loss": 3.615126132965088, "lr": 9.909105846483394e-06, "epoch": 0.4694533762057878, "percentage": 15.65, "elapsed_time": "0:02:40", "remaining_time": "0:14:25"} +{"current_steps": 147, "total_steps": 933, "loss": 2.8603837490081787, "lr": 9.905517802323345e-06, "epoch": 0.47266881028938906, "percentage": 15.76, "elapsed_time": "0:02:41", "remaining_time": "0:14:24"} +{"current_steps": 148, "total_steps": 933, "loss": 2.465343952178955, "lr": 9.901860978479889e-06, "epoch": 0.4758842443729904, "percentage": 15.86, "elapsed_time": "0:02:42", "remaining_time": "0:14:23"} +{"current_steps": 149, "total_steps": 933, "loss": 3.241806983947754, "lr": 9.898135426224923e-06, "epoch": 0.4790996784565916, "percentage": 15.97, "elapsed_time": "0:02:43", "remaining_time": "0:14:22"} +{"current_steps": 150, "total_steps": 933, "loss": 3.292675018310547, "lr": 9.89434119779397e-06, "epoch": 0.48231511254019294, "percentage": 16.08, "elapsed_time": "0:02:44", "remaining_time": "0:14:21"} +{"current_steps": 151, "total_steps": 933, "loss": 2.6159539222717285, "lr": 9.89047834638546e-06, "epoch": 0.4855305466237942, "percentage": 16.18, "elapsed_time": "0:02:46", "remaining_time": "0:14:20"} +{"current_steps": 152, "total_steps": 933, "loss": 2.4839179515838623, "lr": 9.886546926159972e-06, "epoch": 0.4887459807073955, "percentage": 16.29, "elapsed_time": "0:02:47", "remaining_time": "0:14:18"} +{"current_steps": 153, "total_steps": 933, "loss": 2.995469570159912, "lr": 9.882546992239483e-06, "epoch": 0.4919614147909968, "percentage": 16.4, "elapsed_time": "0:02:48", "remaining_time": "0:14:17"} +{"current_steps": 154, "total_steps": 933, "loss": 3.255875587463379, "lr": 9.878478600706595e-06, "epoch": 0.49517684887459806, "percentage": 16.51, "elapsed_time": "0:02:49", "remaining_time": "0:14:16"} +{"current_steps": 155, "total_steps": 933, "loss": 3.4806838035583496, "lr": 9.87434180860374e-06, "epoch": 0.4983922829581994, "percentage": 16.61, "elapsed_time": "0:02:50", "remaining_time": "0:14:15"} +{"current_steps": 156, "total_steps": 933, "loss": 3.202141284942627, "lr": 9.87013667393239e-06, "epoch": 0.5016077170418006, "percentage": 16.72, "elapsed_time": "0:02:51", "remaining_time": "0:14:14"} +{"current_steps": 157, "total_steps": 933, "loss": 2.8557021617889404, "lr": 9.865863255652242e-06, "epoch": 0.5048231511254019, "percentage": 16.83, "elapsed_time": "0:02:52", "remaining_time": "0:14:12"} +{"current_steps": 158, "total_steps": 933, "loss": 2.9327592849731445, "lr": 9.861521613680384e-06, "epoch": 0.5080385852090032, "percentage": 16.93, "elapsed_time": "0:02:53", "remaining_time": "0:14:11"} +{"current_steps": 159, "total_steps": 933, "loss": 3.2285704612731934, "lr": 9.857111808890465e-06, "epoch": 0.5112540192926045, "percentage": 17.04, "elapsed_time": "0:02:54", "remaining_time": "0:14:10"} +{"current_steps": 160, "total_steps": 933, "loss": 2.7799017429351807, "lr": 9.852633903111834e-06, "epoch": 0.5144694533762058, "percentage": 17.15, "elapsed_time": "0:02:55", "remaining_time": "0:14:09"} +{"current_steps": 161, "total_steps": 933, "loss": 2.5985677242279053, "lr": 9.848087959128679e-06, "epoch": 0.5176848874598071, "percentage": 17.26, "elapsed_time": "0:02:56", "remaining_time": "0:14:08"} +{"current_steps": 162, "total_steps": 933, "loss": 3.1218137741088867, "lr": 9.843474040679137e-06, "epoch": 0.5209003215434084, "percentage": 17.36, "elapsed_time": "0:02:57", "remaining_time": "0:14:06"} +{"current_steps": 163, "total_steps": 933, "loss": 2.367370367050171, "lr": 9.838792212454416e-06, "epoch": 0.5241157556270096, "percentage": 17.47, "elapsed_time": "0:02:59", "remaining_time": "0:14:05"} +{"current_steps": 164, "total_steps": 933, "loss": 2.8083603382110596, "lr": 9.834042540097875e-06, "epoch": 0.5273311897106109, "percentage": 17.58, "elapsed_time": "0:03:00", "remaining_time": "0:14:04"} +{"current_steps": 165, "total_steps": 933, "loss": 2.920241117477417, "lr": 9.829225090204102e-06, "epoch": 0.5305466237942122, "percentage": 17.68, "elapsed_time": "0:03:01", "remaining_time": "0:14:03"} +{"current_steps": 166, "total_steps": 933, "loss": 2.6172094345092773, "lr": 9.824339930317994e-06, "epoch": 0.5337620578778135, "percentage": 17.79, "elapsed_time": "0:03:02", "remaining_time": "0:14:02"} +{"current_steps": 167, "total_steps": 933, "loss": 3.1109981536865234, "lr": 9.819387128933799e-06, "epoch": 0.5369774919614148, "percentage": 17.9, "elapsed_time": "0:03:03", "remaining_time": "0:14:00"} +{"current_steps": 168, "total_steps": 933, "loss": 3.153048038482666, "lr": 9.814366755494155e-06, "epoch": 0.5401929260450161, "percentage": 18.01, "elapsed_time": "0:03:04", "remaining_time": "0:13:59"} +{"current_steps": 169, "total_steps": 933, "loss": 3.2571372985839844, "lr": 9.809278880389126e-06, "epoch": 0.5434083601286174, "percentage": 18.11, "elapsed_time": "0:03:05", "remaining_time": "0:13:58"} +{"current_steps": 170, "total_steps": 933, "loss": 3.108255624771118, "lr": 9.804123574955202e-06, "epoch": 0.5466237942122186, "percentage": 18.22, "elapsed_time": "0:03:06", "remaining_time": "0:13:57"} +{"current_steps": 171, "total_steps": 933, "loss": 3.0177969932556152, "lr": 9.798900911474315e-06, "epoch": 0.5498392282958199, "percentage": 18.33, "elapsed_time": "0:03:07", "remaining_time": "0:13:56"} +{"current_steps": 172, "total_steps": 933, "loss": 2.795715093612671, "lr": 9.793610963172802e-06, "epoch": 0.5530546623794212, "percentage": 18.44, "elapsed_time": "0:03:08", "remaining_time": "0:13:55"} +{"current_steps": 173, "total_steps": 933, "loss": 3.3412439823150635, "lr": 9.78825380422041e-06, "epoch": 0.5562700964630225, "percentage": 18.54, "elapsed_time": "0:03:09", "remaining_time": "0:13:53"} +{"current_steps": 174, "total_steps": 933, "loss": 2.7117419242858887, "lr": 9.78282950972922e-06, "epoch": 0.5594855305466238, "percentage": 18.65, "elapsed_time": "0:03:10", "remaining_time": "0:13:52"} +{"current_steps": 175, "total_steps": 933, "loss": 3.1940979957580566, "lr": 9.77733815575263e-06, "epoch": 0.5627009646302251, "percentage": 18.76, "elapsed_time": "0:03:11", "remaining_time": "0:13:51"} +{"current_steps": 176, "total_steps": 933, "loss": 2.418674945831299, "lr": 9.771779819284257e-06, "epoch": 0.5659163987138264, "percentage": 18.86, "elapsed_time": "0:03:13", "remaining_time": "0:13:50"} +{"current_steps": 177, "total_steps": 933, "loss": 2.819211959838867, "lr": 9.766154578256883e-06, "epoch": 0.5691318327974276, "percentage": 18.97, "elapsed_time": "0:03:14", "remaining_time": "0:13:49"} +{"current_steps": 178, "total_steps": 933, "loss": 3.260634183883667, "lr": 9.76046251154134e-06, "epoch": 0.572347266881029, "percentage": 19.08, "elapsed_time": "0:03:15", "remaining_time": "0:13:48"} +{"current_steps": 179, "total_steps": 933, "loss": 3.002488136291504, "lr": 9.754703698945425e-06, "epoch": 0.5755627009646302, "percentage": 19.19, "elapsed_time": "0:03:16", "remaining_time": "0:13:46"} +{"current_steps": 180, "total_steps": 933, "loss": 3.066258668899536, "lr": 9.748878221212763e-06, "epoch": 0.5787781350482315, "percentage": 19.29, "elapsed_time": "0:03:17", "remaining_time": "0:13:45"} +{"current_steps": 181, "total_steps": 933, "loss": 3.4351935386657715, "lr": 9.742986160021688e-06, "epoch": 0.5819935691318328, "percentage": 19.4, "elapsed_time": "0:03:18", "remaining_time": "0:13:44"} +{"current_steps": 182, "total_steps": 933, "loss": 3.2599828243255615, "lr": 9.73702759798409e-06, "epoch": 0.5852090032154341, "percentage": 19.51, "elapsed_time": "0:03:19", "remaining_time": "0:13:43"} +{"current_steps": 183, "total_steps": 933, "loss": 2.5718865394592285, "lr": 9.731002618644265e-06, "epoch": 0.5884244372990354, "percentage": 19.61, "elapsed_time": "0:03:20", "remaining_time": "0:13:42"} +{"current_steps": 184, "total_steps": 933, "loss": 2.765693426132202, "lr": 9.724911306477729e-06, "epoch": 0.5916398713826366, "percentage": 19.72, "elapsed_time": "0:03:21", "remaining_time": "0:13:41"} +{"current_steps": 185, "total_steps": 933, "loss": 3.426023244857788, "lr": 9.71875374689005e-06, "epoch": 0.594855305466238, "percentage": 19.83, "elapsed_time": "0:03:22", "remaining_time": "0:13:39"} +{"current_steps": 186, "total_steps": 933, "loss": 3.001716375350952, "lr": 9.71253002621564e-06, "epoch": 0.5980707395498392, "percentage": 19.94, "elapsed_time": "0:03:23", "remaining_time": "0:13:38"} +{"current_steps": 187, "total_steps": 933, "loss": 2.950758218765259, "lr": 9.706240231716549e-06, "epoch": 0.6012861736334405, "percentage": 20.04, "elapsed_time": "0:03:24", "remaining_time": "0:13:37"} +{"current_steps": 188, "total_steps": 933, "loss": 3.2297606468200684, "lr": 9.699884451581238e-06, "epoch": 0.6045016077170418, "percentage": 20.15, "elapsed_time": "0:03:26", "remaining_time": "0:13:36"} +{"current_steps": 189, "total_steps": 933, "loss": 4.231553077697754, "lr": 9.693462774923351e-06, "epoch": 0.6077170418006431, "percentage": 20.26, "elapsed_time": "0:03:27", "remaining_time": "0:13:35"} +{"current_steps": 190, "total_steps": 933, "loss": 3.427185535430908, "lr": 9.686975291780449e-06, "epoch": 0.6109324758842444, "percentage": 20.36, "elapsed_time": "0:03:28", "remaining_time": "0:13:34"} +{"current_steps": 191, "total_steps": 933, "loss": 3.104320526123047, "lr": 9.68042209311277e-06, "epoch": 0.6141479099678456, "percentage": 20.47, "elapsed_time": "0:03:29", "remaining_time": "0:13:33"} +{"current_steps": 192, "total_steps": 933, "loss": 2.5996108055114746, "lr": 9.67380327080193e-06, "epoch": 0.617363344051447, "percentage": 20.58, "elapsed_time": "0:03:30", "remaining_time": "0:13:31"} +{"current_steps": 193, "total_steps": 933, "loss": 2.6025047302246094, "lr": 9.667118917649656e-06, "epoch": 0.6205787781350482, "percentage": 20.69, "elapsed_time": "0:03:31", "remaining_time": "0:13:30"} +{"current_steps": 194, "total_steps": 933, "loss": 2.7667369842529297, "lr": 9.660369127376469e-06, "epoch": 0.6237942122186495, "percentage": 20.79, "elapsed_time": "0:03:32", "remaining_time": "0:13:29"} +{"current_steps": 195, "total_steps": 933, "loss": 2.9813175201416016, "lr": 9.653553994620378e-06, "epoch": 0.6270096463022508, "percentage": 20.9, "elapsed_time": "0:03:33", "remaining_time": "0:13:28"} +{"current_steps": 196, "total_steps": 933, "loss": 3.094072103500366, "lr": 9.64667361493555e-06, "epoch": 0.6302250803858521, "percentage": 21.01, "elapsed_time": "0:03:34", "remaining_time": "0:13:27"} +{"current_steps": 197, "total_steps": 933, "loss": 2.9591763019561768, "lr": 9.639728084790976e-06, "epoch": 0.6334405144694534, "percentage": 21.11, "elapsed_time": "0:03:35", "remaining_time": "0:13:26"} +{"current_steps": 198, "total_steps": 933, "loss": 1.9626116752624512, "lr": 9.632717501569106e-06, "epoch": 0.6366559485530546, "percentage": 21.22, "elapsed_time": "0:03:36", "remaining_time": "0:13:25"} +{"current_steps": 199, "total_steps": 933, "loss": 3.0262200832366943, "lr": 9.6256419635645e-06, "epoch": 0.639871382636656, "percentage": 21.33, "elapsed_time": "0:03:37", "remaining_time": "0:13:23"} +{"current_steps": 200, "total_steps": 933, "loss": 2.373394012451172, "lr": 9.618501569982437e-06, "epoch": 0.6430868167202572, "percentage": 21.44, "elapsed_time": "0:03:39", "remaining_time": "0:13:22"} +{"current_steps": 201, "total_steps": 933, "loss": 2.601590633392334, "lr": 9.611296420937526e-06, "epoch": 0.6463022508038585, "percentage": 21.54, "elapsed_time": "0:03:40", "remaining_time": "0:13:21"} +{"current_steps": 202, "total_steps": 933, "loss": 3.3032302856445312, "lr": 9.60402661745231e-06, "epoch": 0.6495176848874598, "percentage": 21.65, "elapsed_time": "0:03:41", "remaining_time": "0:13:20"} +{"current_steps": 203, "total_steps": 933, "loss": 2.55745267868042, "lr": 9.59669226145584e-06, "epoch": 0.6527331189710611, "percentage": 21.76, "elapsed_time": "0:03:42", "remaining_time": "0:13:19"} +{"current_steps": 204, "total_steps": 933, "loss": 3.162076950073242, "lr": 9.589293455782253e-06, "epoch": 0.6559485530546624, "percentage": 21.86, "elapsed_time": "0:03:43", "remaining_time": "0:13:18"} +{"current_steps": 205, "total_steps": 933, "loss": 3.104783535003662, "lr": 9.581830304169325e-06, "epoch": 0.6591639871382636, "percentage": 21.97, "elapsed_time": "0:03:44", "remaining_time": "0:13:16"} +{"current_steps": 206, "total_steps": 933, "loss": 1.5882987976074219, "lr": 9.574302911257021e-06, "epoch": 0.662379421221865, "percentage": 22.08, "elapsed_time": "0:03:45", "remaining_time": "0:13:15"} +{"current_steps": 207, "total_steps": 933, "loss": 3.12563157081604, "lr": 9.566711382586022e-06, "epoch": 0.6655948553054662, "percentage": 22.19, "elapsed_time": "0:03:46", "remaining_time": "0:13:14"} +{"current_steps": 208, "total_steps": 933, "loss": 2.5417189598083496, "lr": 9.559055824596252e-06, "epoch": 0.6688102893890675, "percentage": 22.29, "elapsed_time": "0:03:47", "remaining_time": "0:13:13"} +{"current_steps": 209, "total_steps": 933, "loss": 3.221043109893799, "lr": 9.551336344625387e-06, "epoch": 0.6720257234726688, "percentage": 22.4, "elapsed_time": "0:03:48", "remaining_time": "0:13:12"} +{"current_steps": 210, "total_steps": 933, "loss": 2.767820358276367, "lr": 9.543553050907332e-06, "epoch": 0.6752411575562701, "percentage": 22.51, "elapsed_time": "0:03:49", "remaining_time": "0:13:11"} +{"current_steps": 211, "total_steps": 933, "loss": 4.299283027648926, "lr": 9.53570605257073e-06, "epoch": 0.6784565916398714, "percentage": 22.62, "elapsed_time": "0:03:50", "remaining_time": "0:13:10"} +{"current_steps": 212, "total_steps": 933, "loss": 2.867112398147583, "lr": 9.527795459637413e-06, "epoch": 0.6816720257234726, "percentage": 22.72, "elapsed_time": "0:03:51", "remaining_time": "0:13:08"} +{"current_steps": 213, "total_steps": 933, "loss": 2.6538944244384766, "lr": 9.519821383020866e-06, "epoch": 0.684887459807074, "percentage": 22.83, "elapsed_time": "0:03:53", "remaining_time": "0:13:07"} +{"current_steps": 214, "total_steps": 933, "loss": 3.379420280456543, "lr": 9.511783934524674e-06, "epoch": 0.6881028938906752, "percentage": 22.94, "elapsed_time": "0:03:54", "remaining_time": "0:13:06"} +{"current_steps": 215, "total_steps": 933, "loss": 2.9065451622009277, "lr": 9.503683226840948e-06, "epoch": 0.6913183279742765, "percentage": 23.04, "elapsed_time": "0:03:55", "remaining_time": "0:13:05"} +{"current_steps": 216, "total_steps": 933, "loss": 2.9622750282287598, "lr": 9.495519373548748e-06, "epoch": 0.6945337620578779, "percentage": 23.15, "elapsed_time": "0:03:56", "remaining_time": "0:13:04"} +{"current_steps": 217, "total_steps": 933, "loss": 3.1384975910186768, "lr": 9.487292489112497e-06, "epoch": 0.6977491961414791, "percentage": 23.26, "elapsed_time": "0:03:57", "remaining_time": "0:13:03"} +{"current_steps": 218, "total_steps": 933, "loss": 3.00459623336792, "lr": 9.479002688880362e-06, "epoch": 0.7009646302250804, "percentage": 23.37, "elapsed_time": "0:03:58", "remaining_time": "0:13:01"} +{"current_steps": 219, "total_steps": 933, "loss": 3.349134922027588, "lr": 9.470650089082649e-06, "epoch": 0.7041800643086816, "percentage": 23.47, "elapsed_time": "0:03:59", "remaining_time": "0:13:00"} +{"current_steps": 220, "total_steps": 933, "loss": 2.8735666275024414, "lr": 9.462234806830172e-06, "epoch": 0.707395498392283, "percentage": 23.58, "elapsed_time": "0:04:00", "remaining_time": "0:12:59"} +{"current_steps": 221, "total_steps": 933, "loss": 2.7908334732055664, "lr": 9.453756960112605e-06, "epoch": 0.7106109324758842, "percentage": 23.69, "elapsed_time": "0:04:01", "remaining_time": "0:12:58"} +{"current_steps": 222, "total_steps": 933, "loss": 2.5783145427703857, "lr": 9.445216667796833e-06, "epoch": 0.7138263665594855, "percentage": 23.79, "elapsed_time": "0:04:02", "remaining_time": "0:12:57"} +{"current_steps": 223, "total_steps": 933, "loss": 2.7622079849243164, "lr": 9.436614049625277e-06, "epoch": 0.7170418006430869, "percentage": 23.9, "elapsed_time": "0:04:03", "remaining_time": "0:12:56"} +{"current_steps": 224, "total_steps": 933, "loss": 3.2321324348449707, "lr": 9.42794922621423e-06, "epoch": 0.7202572347266881, "percentage": 24.01, "elapsed_time": "0:04:04", "remaining_time": "0:12:55"} +{"current_steps": 225, "total_steps": 933, "loss": 2.5579419136047363, "lr": 9.419222319052154e-06, "epoch": 0.7234726688102894, "percentage": 24.12, "elapsed_time": "0:04:05", "remaining_time": "0:12:54"} +{"current_steps": 226, "total_steps": 933, "loss": 3.260056495666504, "lr": 9.410433450497977e-06, "epoch": 0.7266881028938906, "percentage": 24.22, "elapsed_time": "0:04:07", "remaining_time": "0:12:52"} +{"current_steps": 227, "total_steps": 933, "loss": 2.7521541118621826, "lr": 9.401582743779384e-06, "epoch": 0.729903536977492, "percentage": 24.33, "elapsed_time": "0:04:08", "remaining_time": "0:12:51"} +{"current_steps": 228, "total_steps": 933, "loss": 2.986008644104004, "lr": 9.392670322991085e-06, "epoch": 0.7331189710610932, "percentage": 24.44, "elapsed_time": "0:04:09", "remaining_time": "0:12:50"} +{"current_steps": 229, "total_steps": 933, "loss": 3.1167283058166504, "lr": 9.383696313093073e-06, "epoch": 0.7363344051446945, "percentage": 24.54, "elapsed_time": "0:04:10", "remaining_time": "0:12:49"} +{"current_steps": 230, "total_steps": 933, "loss": 3.0863423347473145, "lr": 9.374660839908881e-06, "epoch": 0.7395498392282959, "percentage": 24.65, "elapsed_time": "0:04:11", "remaining_time": "0:12:48"} +{"current_steps": 231, "total_steps": 933, "loss": 3.310753345489502, "lr": 9.365564030123802e-06, "epoch": 0.7427652733118971, "percentage": 24.76, "elapsed_time": "0:04:12", "remaining_time": "0:12:47"} +{"current_steps": 232, "total_steps": 933, "loss": 2.494666576385498, "lr": 9.356406011283128e-06, "epoch": 0.7459807073954984, "percentage": 24.87, "elapsed_time": "0:04:13", "remaining_time": "0:12:46"} +{"current_steps": 233, "total_steps": 933, "loss": 3.037945508956909, "lr": 9.34718691179036e-06, "epoch": 0.7491961414790996, "percentage": 24.97, "elapsed_time": "0:04:14", "remaining_time": "0:12:44"} +{"current_steps": 234, "total_steps": 933, "loss": 3.584845542907715, "lr": 9.337906860905394e-06, "epoch": 0.752411575562701, "percentage": 25.08, "elapsed_time": "0:04:15", "remaining_time": "0:12:43"} +{"current_steps": 235, "total_steps": 933, "loss": 2.984691619873047, "lr": 9.328565988742723e-06, "epoch": 0.7556270096463023, "percentage": 25.19, "elapsed_time": "0:04:16", "remaining_time": "0:12:42"} +{"current_steps": 236, "total_steps": 933, "loss": 2.795103073120117, "lr": 9.31916442626961e-06, "epoch": 0.7588424437299035, "percentage": 25.29, "elapsed_time": "0:04:17", "remaining_time": "0:12:41"} +{"current_steps": 237, "total_steps": 933, "loss": 2.5230283737182617, "lr": 9.30970230530425e-06, "epoch": 0.7620578778135049, "percentage": 25.4, "elapsed_time": "0:04:18", "remaining_time": "0:12:40"} +{"current_steps": 238, "total_steps": 933, "loss": 2.849795341491699, "lr": 9.300179758513912e-06, "epoch": 0.7652733118971061, "percentage": 25.51, "elapsed_time": "0:04:20", "remaining_time": "0:12:39"} +{"current_steps": 239, "total_steps": 933, "loss": 2.4834206104278564, "lr": 9.290596919413101e-06, "epoch": 0.7684887459807074, "percentage": 25.62, "elapsed_time": "0:04:21", "remaining_time": "0:12:38"} +{"current_steps": 240, "total_steps": 933, "loss": 2.7844467163085938, "lr": 9.280953922361667e-06, "epoch": 0.7717041800643086, "percentage": 25.72, "elapsed_time": "0:04:22", "remaining_time": "0:12:36"} +{"current_steps": 241, "total_steps": 933, "loss": 3.224181890487671, "lr": 9.271250902562925e-06, "epoch": 0.77491961414791, "percentage": 25.83, "elapsed_time": "0:04:23", "remaining_time": "0:12:35"} +{"current_steps": 242, "total_steps": 933, "loss": 2.7271580696105957, "lr": 9.26148799606177e-06, "epoch": 0.7781350482315113, "percentage": 25.94, "elapsed_time": "0:04:24", "remaining_time": "0:12:34"} +{"current_steps": 243, "total_steps": 933, "loss": 2.834172248840332, "lr": 9.251665339742751e-06, "epoch": 0.7813504823151125, "percentage": 26.05, "elapsed_time": "0:04:25", "remaining_time": "0:12:33"} +{"current_steps": 244, "total_steps": 933, "loss": 2.8642072677612305, "lr": 9.24178307132817e-06, "epoch": 0.7845659163987139, "percentage": 26.15, "elapsed_time": "0:04:26", "remaining_time": "0:12:32"} +{"current_steps": 245, "total_steps": 933, "loss": 2.961083173751831, "lr": 9.231841329376142e-06, "epoch": 0.7877813504823151, "percentage": 26.26, "elapsed_time": "0:04:27", "remaining_time": "0:12:31"} +{"current_steps": 246, "total_steps": 933, "loss": 3.1648690700531006, "lr": 9.22184025327865e-06, "epoch": 0.7909967845659164, "percentage": 26.37, "elapsed_time": "0:04:28", "remaining_time": "0:12:30"} +{"current_steps": 247, "total_steps": 933, "loss": 2.65283203125, "lr": 9.211779983259597e-06, "epoch": 0.7942122186495176, "percentage": 26.47, "elapsed_time": "0:04:29", "remaining_time": "0:12:29"} +{"current_steps": 248, "total_steps": 933, "loss": 2.8571534156799316, "lr": 9.201660660372835e-06, "epoch": 0.797427652733119, "percentage": 26.58, "elapsed_time": "0:04:30", "remaining_time": "0:12:27"} +{"current_steps": 249, "total_steps": 933, "loss": 3.076096534729004, "lr": 9.191482426500192e-06, "epoch": 0.8006430868167203, "percentage": 26.69, "elapsed_time": "0:04:31", "remaining_time": "0:12:26"} +{"current_steps": 250, "total_steps": 933, "loss": 3.0239181518554688, "lr": 9.181245424349477e-06, "epoch": 0.8038585209003215, "percentage": 26.8, "elapsed_time": "0:04:32", "remaining_time": "0:12:25"} +{"current_steps": 251, "total_steps": 933, "loss": 2.6239326000213623, "lr": 9.170949797452481e-06, "epoch": 0.8070739549839229, "percentage": 26.9, "elapsed_time": "0:04:34", "remaining_time": "0:12:24"} +{"current_steps": 252, "total_steps": 933, "loss": 2.5555763244628906, "lr": 9.160595690162974e-06, "epoch": 0.8102893890675241, "percentage": 27.01, "elapsed_time": "0:04:35", "remaining_time": "0:12:23"} +{"current_steps": 253, "total_steps": 933, "loss": 2.371147871017456, "lr": 9.15018324765466e-06, "epoch": 0.8135048231511254, "percentage": 27.12, "elapsed_time": "0:04:36", "remaining_time": "0:12:22"} +{"current_steps": 254, "total_steps": 933, "loss": 2.494548797607422, "lr": 9.139712615919163e-06, "epoch": 0.8167202572347267, "percentage": 27.22, "elapsed_time": "0:04:37", "remaining_time": "0:12:21"} +{"current_steps": 255, "total_steps": 933, "loss": 2.974158525466919, "lr": 9.129183941763971e-06, "epoch": 0.819935691318328, "percentage": 27.33, "elapsed_time": "0:04:38", "remaining_time": "0:12:20"} +{"current_steps": 256, "total_steps": 933, "loss": 2.792419672012329, "lr": 9.118597372810374e-06, "epoch": 0.8231511254019293, "percentage": 27.44, "elapsed_time": "0:04:39", "remaining_time": "0:12:18"} +{"current_steps": 257, "total_steps": 933, "loss": 2.502934694290161, "lr": 9.107953057491399e-06, "epoch": 0.8263665594855305, "percentage": 27.55, "elapsed_time": "0:04:40", "remaining_time": "0:12:17"} +{"current_steps": 258, "total_steps": 933, "loss": 3.282193899154663, "lr": 9.09725114504973e-06, "epoch": 0.8295819935691319, "percentage": 27.65, "elapsed_time": "0:04:41", "remaining_time": "0:12:16"} +{"current_steps": 259, "total_steps": 933, "loss": 2.6221415996551514, "lr": 9.086491785535613e-06, "epoch": 0.8327974276527331, "percentage": 27.76, "elapsed_time": "0:04:42", "remaining_time": "0:12:15"} +{"current_steps": 260, "total_steps": 933, "loss": 2.594520330429077, "lr": 9.07567512980475e-06, "epoch": 0.8360128617363344, "percentage": 27.87, "elapsed_time": "0:04:43", "remaining_time": "0:12:14"} +{"current_steps": 261, "total_steps": 933, "loss": 3.279817819595337, "lr": 9.064801329516192e-06, "epoch": 0.8392282958199357, "percentage": 27.97, "elapsed_time": "0:04:44", "remaining_time": "0:12:13"} +{"current_steps": 262, "total_steps": 933, "loss": 3.408698558807373, "lr": 9.053870537130198e-06, "epoch": 0.842443729903537, "percentage": 28.08, "elapsed_time": "0:04:45", "remaining_time": "0:12:12"} +{"current_steps": 263, "total_steps": 933, "loss": 2.918642997741699, "lr": 9.042882905906118e-06, "epoch": 0.8456591639871383, "percentage": 28.19, "elapsed_time": "0:04:46", "remaining_time": "0:12:11"} +{"current_steps": 264, "total_steps": 933, "loss": 2.65049409866333, "lr": 9.03183858990022e-06, "epoch": 0.8488745980707395, "percentage": 28.3, "elapsed_time": "0:04:48", "remaining_time": "0:12:09"} +{"current_steps": 265, "total_steps": 933, "loss": 3.2480692863464355, "lr": 9.020737743963555e-06, "epoch": 0.8520900321543409, "percentage": 28.4, "elapsed_time": "0:04:49", "remaining_time": "0:12:08"} +{"current_steps": 266, "total_steps": 933, "loss": 3.1771302223205566, "lr": 9.009580523739763e-06, "epoch": 0.8553054662379421, "percentage": 28.51, "elapsed_time": "0:04:50", "remaining_time": "0:12:07"} +{"current_steps": 267, "total_steps": 933, "loss": 2.5076744556427, "lr": 8.998367085662908e-06, "epoch": 0.8585209003215434, "percentage": 28.62, "elapsed_time": "0:04:51", "remaining_time": "0:12:06"} +{"current_steps": 268, "total_steps": 933, "loss": 2.7998993396759033, "lr": 8.987097586955276e-06, "epoch": 0.8617363344051447, "percentage": 28.72, "elapsed_time": "0:04:52", "remaining_time": "0:12:05"} +{"current_steps": 269, "total_steps": 933, "loss": 2.735177755355835, "lr": 8.97577218562517e-06, "epoch": 0.864951768488746, "percentage": 28.83, "elapsed_time": "0:04:53", "remaining_time": "0:12:04"} +{"current_steps": 270, "total_steps": 933, "loss": 2.8440442085266113, "lr": 8.964391040464699e-06, "epoch": 0.8681672025723473, "percentage": 28.94, "elapsed_time": "0:04:54", "remaining_time": "0:12:03"} +{"current_steps": 271, "total_steps": 933, "loss": 3.18597674369812, "lr": 8.952954311047554e-06, "epoch": 0.8713826366559485, "percentage": 29.05, "elapsed_time": "0:04:55", "remaining_time": "0:12:02"} +{"current_steps": 272, "total_steps": 933, "loss": 2.6696226596832275, "lr": 8.941462157726757e-06, "epoch": 0.8745980707395499, "percentage": 29.15, "elapsed_time": "0:04:56", "remaining_time": "0:12:01"} +{"current_steps": 273, "total_steps": 933, "loss": 3.1313624382019043, "lr": 8.92991474163243e-06, "epoch": 0.8778135048231511, "percentage": 29.26, "elapsed_time": "0:04:57", "remaining_time": "0:11:59"} +{"current_steps": 274, "total_steps": 933, "loss": 2.7278852462768555, "lr": 8.918312224669523e-06, "epoch": 0.8810289389067524, "percentage": 29.37, "elapsed_time": "0:04:58", "remaining_time": "0:11:58"} +{"current_steps": 275, "total_steps": 933, "loss": 3.123018264770508, "lr": 8.906654769515551e-06, "epoch": 0.8842443729903537, "percentage": 29.47, "elapsed_time": "0:04:59", "remaining_time": "0:11:57"} +{"current_steps": 276, "total_steps": 933, "loss": 2.5999131202697754, "lr": 8.89494253961831e-06, "epoch": 0.887459807073955, "percentage": 29.58, "elapsed_time": "0:05:01", "remaining_time": "0:11:56"} +{"current_steps": 277, "total_steps": 933, "loss": 3.3754043579101562, "lr": 8.883175699193589e-06, "epoch": 0.8906752411575563, "percentage": 29.69, "elapsed_time": "0:05:02", "remaining_time": "0:11:55"} +{"current_steps": 278, "total_steps": 933, "loss": 2.477343797683716, "lr": 8.871354413222859e-06, "epoch": 0.8938906752411575, "percentage": 29.8, "elapsed_time": "0:05:03", "remaining_time": "0:11:54"} +{"current_steps": 279, "total_steps": 933, "loss": 3.132758140563965, "lr": 8.85947884745097e-06, "epoch": 0.8971061093247589, "percentage": 29.9, "elapsed_time": "0:05:04", "remaining_time": "0:11:53"} +{"current_steps": 280, "total_steps": 933, "loss": 2.820451021194458, "lr": 8.847549168383823e-06, "epoch": 0.9003215434083601, "percentage": 30.01, "elapsed_time": "0:05:05", "remaining_time": "0:11:52"} +{"current_steps": 281, "total_steps": 933, "loss": 2.6014206409454346, "lr": 8.835565543286031e-06, "epoch": 0.9035369774919614, "percentage": 30.12, "elapsed_time": "0:05:06", "remaining_time": "0:11:50"} +{"current_steps": 282, "total_steps": 933, "loss": 2.541848659515381, "lr": 8.82352814017858e-06, "epoch": 0.9067524115755627, "percentage": 30.23, "elapsed_time": "0:05:07", "remaining_time": "0:11:49"} +{"current_steps": 283, "total_steps": 933, "loss": 2.961427688598633, "lr": 8.811437127836477e-06, "epoch": 0.909967845659164, "percentage": 30.33, "elapsed_time": "0:05:08", "remaining_time": "0:11:48"} +{"current_steps": 284, "total_steps": 933, "loss": 2.8450682163238525, "lr": 8.799292675786365e-06, "epoch": 0.9131832797427653, "percentage": 30.44, "elapsed_time": "0:05:09", "remaining_time": "0:11:47"} +{"current_steps": 285, "total_steps": 933, "loss": 3.3827338218688965, "lr": 8.787094954304172e-06, "epoch": 0.9163987138263665, "percentage": 30.55, "elapsed_time": "0:05:10", "remaining_time": "0:11:46"} +{"current_steps": 286, "total_steps": 933, "loss": 2.749037265777588, "lr": 8.7748441344127e-06, "epoch": 0.9196141479099679, "percentage": 30.65, "elapsed_time": "0:05:11", "remaining_time": "0:11:45"} +{"current_steps": 287, "total_steps": 933, "loss": 2.7875254154205322, "lr": 8.762540387879245e-06, "epoch": 0.9228295819935691, "percentage": 30.76, "elapsed_time": "0:05:12", "remaining_time": "0:11:44"} +{"current_steps": 288, "total_steps": 933, "loss": 2.7887279987335205, "lr": 8.75018388721318e-06, "epoch": 0.9260450160771704, "percentage": 30.87, "elapsed_time": "0:05:13", "remaining_time": "0:11:43"} +{"current_steps": 289, "total_steps": 933, "loss": 3.5317232608795166, "lr": 8.73777480566353e-06, "epoch": 0.9292604501607717, "percentage": 30.98, "elapsed_time": "0:05:15", "remaining_time": "0:11:42"} +{"current_steps": 290, "total_steps": 933, "loss": 2.695559024810791, "lr": 8.725313317216558e-06, "epoch": 0.932475884244373, "percentage": 31.08, "elapsed_time": "0:05:16", "remaining_time": "0:11:40"} +{"current_steps": 291, "total_steps": 933, "loss": 3.773618698120117, "lr": 8.712799596593317e-06, "epoch": 0.9356913183279743, "percentage": 31.19, "elapsed_time": "0:05:17", "remaining_time": "0:11:39"} +{"current_steps": 292, "total_steps": 933, "loss": 2.8618407249450684, "lr": 8.7002338192472e-06, "epoch": 0.9389067524115756, "percentage": 31.3, "elapsed_time": "0:05:18", "remaining_time": "0:11:38"} +{"current_steps": 293, "total_steps": 933, "loss": 2.663022518157959, "lr": 8.68761616136148e-06, "epoch": 0.9421221864951769, "percentage": 31.4, "elapsed_time": "0:05:19", "remaining_time": "0:11:37"} +{"current_steps": 294, "total_steps": 933, "loss": 2.731468677520752, "lr": 8.674946799846844e-06, "epoch": 0.9453376205787781, "percentage": 31.51, "elapsed_time": "0:05:20", "remaining_time": "0:11:36"} +{"current_steps": 295, "total_steps": 933, "loss": 3.047337293624878, "lr": 8.662225912338906e-06, "epoch": 0.9485530546623794, "percentage": 31.62, "elapsed_time": "0:05:21", "remaining_time": "0:11:35"} +{"current_steps": 296, "total_steps": 933, "loss": 2.7521162033081055, "lr": 8.64945367719572e-06, "epoch": 0.9517684887459807, "percentage": 31.73, "elapsed_time": "0:05:22", "remaining_time": "0:11:34"} +{"current_steps": 297, "total_steps": 933, "loss": 2.8862874507904053, "lr": 8.636630273495284e-06, "epoch": 0.954983922829582, "percentage": 31.83, "elapsed_time": "0:05:23", "remaining_time": "0:11:33"} +{"current_steps": 298, "total_steps": 933, "loss": 2.4406180381774902, "lr": 8.623755881033016e-06, "epoch": 0.9581993569131833, "percentage": 31.94, "elapsed_time": "0:05:24", "remaining_time": "0:11:31"} +{"current_steps": 299, "total_steps": 933, "loss": 2.9202933311462402, "lr": 8.61083068031925e-06, "epoch": 0.9614147909967846, "percentage": 32.05, "elapsed_time": "0:05:25", "remaining_time": "0:11:30"} +{"current_steps": 300, "total_steps": 933, "loss": 2.7283151149749756, "lr": 8.59785485257669e-06, "epoch": 0.9646302250803859, "percentage": 32.15, "elapsed_time": "0:05:26", "remaining_time": "0:11:29"} +{"current_steps": 301, "total_steps": 933, "loss": 3.007424831390381, "lr": 8.58482857973788e-06, "epoch": 0.9678456591639871, "percentage": 32.26, "elapsed_time": "0:05:27", "remaining_time": "0:11:28"} +{"current_steps": 302, "total_steps": 933, "loss": 3.5147528648376465, "lr": 8.571752044442645e-06, "epoch": 0.9710610932475884, "percentage": 32.37, "elapsed_time": "0:05:29", "remaining_time": "0:11:27"} +{"current_steps": 303, "total_steps": 933, "loss": 2.6513309478759766, "lr": 8.558625430035537e-06, "epoch": 0.9742765273311897, "percentage": 32.48, "elapsed_time": "0:05:30", "remaining_time": "0:11:26"} +{"current_steps": 304, "total_steps": 933, "loss": 2.8666458129882812, "lr": 8.54544892056326e-06, "epoch": 0.977491961414791, "percentage": 32.58, "elapsed_time": "0:05:31", "remaining_time": "0:11:25"} +{"current_steps": 305, "total_steps": 933, "loss": 3.155954360961914, "lr": 8.53222270077209e-06, "epoch": 0.9807073954983923, "percentage": 32.69, "elapsed_time": "0:05:32", "remaining_time": "0:11:24"} +{"current_steps": 306, "total_steps": 933, "loss": 2.705427646636963, "lr": 8.518946956105288e-06, "epoch": 0.9839228295819936, "percentage": 32.8, "elapsed_time": "0:05:33", "remaining_time": "0:11:23"} +{"current_steps": 307, "total_steps": 933, "loss": 2.7899868488311768, "lr": 8.505621872700493e-06, "epoch": 0.9871382636655949, "percentage": 32.9, "elapsed_time": "0:05:34", "remaining_time": "0:11:21"} +{"current_steps": 308, "total_steps": 933, "loss": 3.3897032737731934, "lr": 8.492247637387123e-06, "epoch": 0.9903536977491961, "percentage": 33.01, "elapsed_time": "0:05:35", "remaining_time": "0:11:20"} +{"current_steps": 309, "total_steps": 933, "loss": 2.9029016494750977, "lr": 8.478824437683742e-06, "epoch": 0.9935691318327974, "percentage": 33.12, "elapsed_time": "0:05:36", "remaining_time": "0:11:19"} +{"current_steps": 310, "total_steps": 933, "loss": 2.8247499465942383, "lr": 8.465352461795443e-06, "epoch": 0.9967845659163987, "percentage": 33.23, "elapsed_time": "0:05:37", "remaining_time": "0:11:18"} +{"current_steps": 311, "total_steps": 933, "loss": 1.8072712421417236, "lr": 8.451831898611202e-06, "epoch": 1.0, "percentage": 33.33, "elapsed_time": "0:05:38", "remaining_time": "0:11:17"} +{"current_steps": 312, "total_steps": 933, "loss": 1.6938456296920776, "lr": 8.438262937701232e-06, "epoch": 1.0032154340836013, "percentage": 33.44, "elapsed_time": "0:06:47", "remaining_time": "0:13:30"} +{"current_steps": 313, "total_steps": 933, "loss": 1.7548950910568237, "lr": 8.424645769314324e-06, "epoch": 1.0064308681672025, "percentage": 33.55, "elapsed_time": "0:06:48", "remaining_time": "0:13:29"} +{"current_steps": 314, "total_steps": 933, "loss": 1.2501029968261719, "lr": 8.410980584375184e-06, "epoch": 1.0096463022508038, "percentage": 33.65, "elapsed_time": "0:06:49", "remaining_time": "0:13:27"} +{"current_steps": 315, "total_steps": 933, "loss": 1.7188260555267334, "lr": 8.397267574481746e-06, "epoch": 1.0128617363344052, "percentage": 33.76, "elapsed_time": "0:06:50", "remaining_time": "0:13:25"} +{"current_steps": 316, "total_steps": 933, "loss": 1.522129774093628, "lr": 8.3835069319025e-06, "epoch": 1.0160771704180065, "percentage": 33.87, "elapsed_time": "0:06:51", "remaining_time": "0:13:23"} +{"current_steps": 317, "total_steps": 933, "loss": 1.9183681011199951, "lr": 8.369698849573778e-06, "epoch": 1.0192926045016077, "percentage": 33.98, "elapsed_time": "0:06:52", "remaining_time": "0:13:22"} +{"current_steps": 318, "total_steps": 933, "loss": 1.1024775505065918, "lr": 8.355843521097071e-06, "epoch": 1.022508038585209, "percentage": 34.08, "elapsed_time": "0:06:53", "remaining_time": "0:13:20"} +{"current_steps": 319, "total_steps": 933, "loss": 1.8346397876739502, "lr": 8.341941140736292e-06, "epoch": 1.0257234726688103, "percentage": 34.19, "elapsed_time": "0:06:54", "remaining_time": "0:13:18"} +{"current_steps": 320, "total_steps": 933, "loss": 1.5781948566436768, "lr": 8.327991903415071e-06, "epoch": 1.0289389067524115, "percentage": 34.3, "elapsed_time": "0:06:56", "remaining_time": "0:13:16"} +{"current_steps": 321, "total_steps": 933, "loss": 2.0449671745300293, "lr": 8.313996004714007e-06, "epoch": 1.0321543408360128, "percentage": 34.41, "elapsed_time": "0:06:57", "remaining_time": "0:13:15"} +{"current_steps": 322, "total_steps": 933, "loss": 1.843071460723877, "lr": 8.29995364086794e-06, "epoch": 1.0353697749196142, "percentage": 34.51, "elapsed_time": "0:06:58", "remaining_time": "0:13:13"} +{"current_steps": 323, "total_steps": 933, "loss": 1.9475151300430298, "lr": 8.285865008763185e-06, "epoch": 1.0385852090032155, "percentage": 34.62, "elapsed_time": "0:06:59", "remaining_time": "0:13:11"} +{"current_steps": 324, "total_steps": 933, "loss": 1.6771236658096313, "lr": 8.271730305934781e-06, "epoch": 1.0418006430868167, "percentage": 34.73, "elapsed_time": "0:07:00", "remaining_time": "0:13:10"} +{"current_steps": 325, "total_steps": 933, "loss": 1.799647331237793, "lr": 8.257549730563726e-06, "epoch": 1.045016077170418, "percentage": 34.83, "elapsed_time": "0:07:01", "remaining_time": "0:13:08"} +{"current_steps": 326, "total_steps": 933, "loss": 1.5140706300735474, "lr": 8.24332348147418e-06, "epoch": 1.0482315112540193, "percentage": 34.94, "elapsed_time": "0:07:02", "remaining_time": "0:13:06"} +{"current_steps": 327, "total_steps": 933, "loss": 1.7540702819824219, "lr": 8.229051758130697e-06, "epoch": 1.0514469453376205, "percentage": 35.05, "elapsed_time": "0:07:03", "remaining_time": "0:13:05"} +{"current_steps": 328, "total_steps": 933, "loss": 1.0229500532150269, "lr": 8.214734760635418e-06, "epoch": 1.0546623794212218, "percentage": 35.16, "elapsed_time": "0:07:04", "remaining_time": "0:13:03"} +{"current_steps": 329, "total_steps": 933, "loss": 1.9475460052490234, "lr": 8.200372689725265e-06, "epoch": 1.0578778135048232, "percentage": 35.26, "elapsed_time": "0:07:05", "remaining_time": "0:13:01"} +{"current_steps": 330, "total_steps": 933, "loss": 1.4429758787155151, "lr": 8.185965746769134e-06, "epoch": 1.0610932475884245, "percentage": 35.37, "elapsed_time": "0:07:06", "remaining_time": "0:13:00"} +{"current_steps": 331, "total_steps": 933, "loss": 2.7338225841522217, "lr": 8.171514133765062e-06, "epoch": 1.0643086816720257, "percentage": 35.48, "elapsed_time": "0:07:08", "remaining_time": "0:12:58"} +{"current_steps": 332, "total_steps": 933, "loss": 2.250379800796509, "lr": 8.157018053337401e-06, "epoch": 1.067524115755627, "percentage": 35.58, "elapsed_time": "0:07:09", "remaining_time": "0:12:56"} +{"current_steps": 333, "total_steps": 933, "loss": 1.337722659111023, "lr": 8.142477708733977e-06, "epoch": 1.0707395498392283, "percentage": 35.69, "elapsed_time": "0:07:10", "remaining_time": "0:12:55"} +{"current_steps": 334, "total_steps": 933, "loss": 1.0721861124038696, "lr": 8.127893303823237e-06, "epoch": 1.0739549839228295, "percentage": 35.8, "elapsed_time": "0:07:11", "remaining_time": "0:12:53"} +{"current_steps": 335, "total_steps": 933, "loss": 1.4026358127593994, "lr": 8.113265043091393e-06, "epoch": 1.077170418006431, "percentage": 35.91, "elapsed_time": "0:07:12", "remaining_time": "0:12:51"} +{"current_steps": 336, "total_steps": 933, "loss": 1.9554322957992554, "lr": 8.098593131639555e-06, "epoch": 1.0803858520900322, "percentage": 36.01, "elapsed_time": "0:07:13", "remaining_time": "0:12:50"} +{"current_steps": 337, "total_steps": 933, "loss": 1.3829636573791504, "lr": 8.083877775180851e-06, "epoch": 1.0836012861736335, "percentage": 36.12, "elapsed_time": "0:07:14", "remaining_time": "0:12:48"} +{"current_steps": 338, "total_steps": 933, "loss": 1.4771521091461182, "lr": 8.06911918003755e-06, "epoch": 1.0868167202572347, "percentage": 36.23, "elapsed_time": "0:07:15", "remaining_time": "0:12:46"} +{"current_steps": 339, "total_steps": 933, "loss": 1.9953043460845947, "lr": 8.054317553138164e-06, "epoch": 1.090032154340836, "percentage": 36.33, "elapsed_time": "0:07:16", "remaining_time": "0:12:45"} +{"current_steps": 340, "total_steps": 933, "loss": 1.8568346500396729, "lr": 8.039473102014552e-06, "epoch": 1.0932475884244373, "percentage": 36.44, "elapsed_time": "0:07:17", "remaining_time": "0:12:43"} +{"current_steps": 341, "total_steps": 933, "loss": 1.0092703104019165, "lr": 8.024586034798998e-06, "epoch": 1.0964630225080385, "percentage": 36.55, "elapsed_time": "0:07:18", "remaining_time": "0:12:42"} +{"current_steps": 342, "total_steps": 933, "loss": 1.3574286699295044, "lr": 8.00965656022131e-06, "epoch": 1.09967845659164, "percentage": 36.66, "elapsed_time": "0:07:20", "remaining_time": "0:12:40"} +{"current_steps": 343, "total_steps": 933, "loss": 1.5812814235687256, "lr": 7.994684887605877e-06, "epoch": 1.1028938906752412, "percentage": 36.76, "elapsed_time": "0:07:21", "remaining_time": "0:12:38"} +{"current_steps": 344, "total_steps": 933, "loss": 1.5962257385253906, "lr": 7.97967122686875e-06, "epoch": 1.1061093247588425, "percentage": 36.87, "elapsed_time": "0:07:22", "remaining_time": "0:12:37"} +{"current_steps": 345, "total_steps": 933, "loss": 1.9959800243377686, "lr": 7.964615788514683e-06, "epoch": 1.1093247588424437, "percentage": 36.98, "elapsed_time": "0:07:23", "remaining_time": "0:12:35"} +{"current_steps": 346, "total_steps": 933, "loss": 1.4967670440673828, "lr": 7.949518783634191e-06, "epoch": 1.112540192926045, "percentage": 37.08, "elapsed_time": "0:07:24", "remaining_time": "0:12:33"} +{"current_steps": 347, "total_steps": 933, "loss": 1.5423383712768555, "lr": 7.934380423900591e-06, "epoch": 1.1157556270096463, "percentage": 37.19, "elapsed_time": "0:07:25", "remaining_time": "0:12:32"} +{"current_steps": 348, "total_steps": 933, "loss": 0.7710652351379395, "lr": 7.919200921567029e-06, "epoch": 1.1189710610932475, "percentage": 37.3, "elapsed_time": "0:07:26", "remaining_time": "0:12:30"} +{"current_steps": 349, "total_steps": 933, "loss": 1.6633532047271729, "lr": 7.903980489463507e-06, "epoch": 1.122186495176849, "percentage": 37.41, "elapsed_time": "0:07:27", "remaining_time": "0:12:29"} +{"current_steps": 350, "total_steps": 933, "loss": 2.7766261100769043, "lr": 7.8887193409939e-06, "epoch": 1.1254019292604502, "percentage": 37.51, "elapsed_time": "0:07:28", "remaining_time": "0:12:27"} +{"current_steps": 351, "total_steps": 933, "loss": 1.6282963752746582, "lr": 7.87341769013296e-06, "epoch": 1.1286173633440515, "percentage": 37.62, "elapsed_time": "0:07:29", "remaining_time": "0:12:25"} +{"current_steps": 352, "total_steps": 933, "loss": 1.9692919254302979, "lr": 7.858075751423319e-06, "epoch": 1.1318327974276527, "percentage": 37.73, "elapsed_time": "0:07:30", "remaining_time": "0:12:24"} +{"current_steps": 353, "total_steps": 933, "loss": 1.912774682044983, "lr": 7.84269373997248e-06, "epoch": 1.135048231511254, "percentage": 37.83, "elapsed_time": "0:07:32", "remaining_time": "0:12:22"} +{"current_steps": 354, "total_steps": 933, "loss": 1.8513641357421875, "lr": 7.827271871449803e-06, "epoch": 1.1382636655948553, "percentage": 37.94, "elapsed_time": "0:07:33", "remaining_time": "0:12:21"} +{"current_steps": 355, "total_steps": 933, "loss": 1.7648791074752808, "lr": 7.811810362083476e-06, "epoch": 1.1414790996784565, "percentage": 38.05, "elapsed_time": "0:07:34", "remaining_time": "0:12:19"} +{"current_steps": 356, "total_steps": 933, "loss": 1.8777326345443726, "lr": 7.79630942865749e-06, "epoch": 1.144694533762058, "percentage": 38.16, "elapsed_time": "0:07:35", "remaining_time": "0:12:17"} +{"current_steps": 357, "total_steps": 933, "loss": 1.4060571193695068, "lr": 7.780769288508594e-06, "epoch": 1.1479099678456592, "percentage": 38.26, "elapsed_time": "0:07:36", "remaining_time": "0:12:16"} +{"current_steps": 358, "total_steps": 933, "loss": 1.8052624464035034, "lr": 7.76519015952325e-06, "epoch": 1.1511254019292605, "percentage": 38.37, "elapsed_time": "0:07:37", "remaining_time": "0:12:14"} +{"current_steps": 359, "total_steps": 933, "loss": 1.251237392425537, "lr": 7.749572260134578e-06, "epoch": 1.1543408360128617, "percentage": 38.48, "elapsed_time": "0:07:38", "remaining_time": "0:12:13"} +{"current_steps": 360, "total_steps": 933, "loss": 1.7426929473876953, "lr": 7.733915809319295e-06, "epoch": 1.157556270096463, "percentage": 38.59, "elapsed_time": "0:07:39", "remaining_time": "0:12:11"} +{"current_steps": 361, "total_steps": 933, "loss": 1.3306620121002197, "lr": 7.718221026594638e-06, "epoch": 1.1607717041800643, "percentage": 38.69, "elapsed_time": "0:07:40", "remaining_time": "0:12:10"} +{"current_steps": 362, "total_steps": 933, "loss": 1.1201272010803223, "lr": 7.7024881320153e-06, "epoch": 1.1639871382636655, "percentage": 38.8, "elapsed_time": "0:07:41", "remaining_time": "0:12:08"} +{"current_steps": 363, "total_steps": 933, "loss": 1.2232224941253662, "lr": 7.686717346170323e-06, "epoch": 1.167202572347267, "percentage": 38.91, "elapsed_time": "0:07:42", "remaining_time": "0:12:06"} +{"current_steps": 364, "total_steps": 933, "loss": 1.5430934429168701, "lr": 7.67090889018003e-06, "epoch": 1.1704180064308682, "percentage": 39.01, "elapsed_time": "0:07:43", "remaining_time": "0:12:05"} +{"current_steps": 365, "total_steps": 933, "loss": 1.1287882328033447, "lr": 7.655062985692905e-06, "epoch": 1.1736334405144695, "percentage": 39.12, "elapsed_time": "0:07:45", "remaining_time": "0:12:03"} +{"current_steps": 366, "total_steps": 933, "loss": 1.8235803842544556, "lr": 7.639179854882499e-06, "epoch": 1.1768488745980707, "percentage": 39.23, "elapsed_time": "0:07:46", "remaining_time": "0:12:02"} +{"current_steps": 367, "total_steps": 933, "loss": 1.5709896087646484, "lr": 7.623259720444305e-06, "epoch": 1.180064308681672, "percentage": 39.34, "elapsed_time": "0:07:47", "remaining_time": "0:12:00"} +{"current_steps": 368, "total_steps": 933, "loss": 1.3701753616333008, "lr": 7.6073028055926375e-06, "epoch": 1.1832797427652733, "percentage": 39.44, "elapsed_time": "0:07:48", "remaining_time": "0:11:59"} +{"current_steps": 369, "total_steps": 933, "loss": 1.8047711849212646, "lr": 7.591309334057511e-06, "epoch": 1.1864951768488745, "percentage": 39.55, "elapsed_time": "0:07:49", "remaining_time": "0:11:57"} +{"current_steps": 370, "total_steps": 933, "loss": 1.3536028861999512, "lr": 7.5752795300814915e-06, "epoch": 1.189710610932476, "percentage": 39.66, "elapsed_time": "0:07:50", "remaining_time": "0:11:55"} +{"current_steps": 371, "total_steps": 933, "loss": 1.7526684999465942, "lr": 7.5592136184165586e-06, "epoch": 1.1929260450160772, "percentage": 39.76, "elapsed_time": "0:07:51", "remaining_time": "0:11:54"} +{"current_steps": 372, "total_steps": 933, "loss": 1.9506163597106934, "lr": 7.543111824320956e-06, "epoch": 1.1961414790996785, "percentage": 39.87, "elapsed_time": "0:07:52", "remaining_time": "0:11:52"} +{"current_steps": 373, "total_steps": 933, "loss": 1.4558610916137695, "lr": 7.526974373556031e-06, "epoch": 1.1993569131832797, "percentage": 39.98, "elapsed_time": "0:07:53", "remaining_time": "0:11:51"} +{"current_steps": 374, "total_steps": 933, "loss": 3.8982784748077393, "lr": 7.510801492383064e-06, "epoch": 1.202572347266881, "percentage": 40.09, "elapsed_time": "0:07:54", "remaining_time": "0:11:49"} +{"current_steps": 375, "total_steps": 933, "loss": 1.4054985046386719, "lr": 7.494593407560105e-06, "epoch": 1.2057877813504823, "percentage": 40.19, "elapsed_time": "0:07:56", "remaining_time": "0:11:48"} +{"current_steps": 376, "total_steps": 933, "loss": 1.4290287494659424, "lr": 7.4783503463387915e-06, "epoch": 1.2090032154340835, "percentage": 40.3, "elapsed_time": "0:07:57", "remaining_time": "0:11:46"} +{"current_steps": 377, "total_steps": 933, "loss": 2.6208114624023438, "lr": 7.462072536461158e-06, "epoch": 1.212218649517685, "percentage": 40.41, "elapsed_time": "0:07:58", "remaining_time": "0:11:45"} +{"current_steps": 378, "total_steps": 933, "loss": 1.4879982471466064, "lr": 7.445760206156443e-06, "epoch": 1.2154340836012862, "percentage": 40.51, "elapsed_time": "0:07:59", "remaining_time": "0:11:43"} +{"current_steps": 379, "total_steps": 933, "loss": 1.5636539459228516, "lr": 7.429413584137899e-06, "epoch": 1.2186495176848875, "percentage": 40.62, "elapsed_time": "0:08:00", "remaining_time": "0:11:42"} +{"current_steps": 380, "total_steps": 933, "loss": 2.5596606731414795, "lr": 7.413032899599575e-06, "epoch": 1.2218649517684887, "percentage": 40.73, "elapsed_time": "0:08:01", "remaining_time": "0:11:40"} +{"current_steps": 381, "total_steps": 933, "loss": 1.7480653524398804, "lr": 7.3966183822131055e-06, "epoch": 1.22508038585209, "percentage": 40.84, "elapsed_time": "0:08:02", "remaining_time": "0:11:39"} +{"current_steps": 382, "total_steps": 933, "loss": 1.1983137130737305, "lr": 7.380170262124491e-06, "epoch": 1.2282958199356913, "percentage": 40.94, "elapsed_time": "0:08:03", "remaining_time": "0:11:37"} +{"current_steps": 383, "total_steps": 933, "loss": 2.4147493839263916, "lr": 7.363688769950874e-06, "epoch": 1.2315112540192925, "percentage": 41.05, "elapsed_time": "0:08:04", "remaining_time": "0:11:36"} +{"current_steps": 384, "total_steps": 933, "loss": 1.5930522680282593, "lr": 7.347174136777303e-06, "epoch": 1.234726688102894, "percentage": 41.16, "elapsed_time": "0:08:05", "remaining_time": "0:11:34"} +{"current_steps": 385, "total_steps": 933, "loss": 1.545811414718628, "lr": 7.33062659415349e-06, "epoch": 1.2379421221864952, "percentage": 41.26, "elapsed_time": "0:08:06", "remaining_time": "0:11:33"} +{"current_steps": 386, "total_steps": 933, "loss": 1.4453141689300537, "lr": 7.314046374090569e-06, "epoch": 1.2411575562700965, "percentage": 41.37, "elapsed_time": "0:08:08", "remaining_time": "0:11:31"} +{"current_steps": 387, "total_steps": 933, "loss": 1.6127538681030273, "lr": 7.297433709057837e-06, "epoch": 1.2443729903536977, "percentage": 41.48, "elapsed_time": "0:08:09", "remaining_time": "0:11:30"} +{"current_steps": 388, "total_steps": 933, "loss": 1.3530693054199219, "lr": 7.280788831979504e-06, "epoch": 1.247588424437299, "percentage": 41.59, "elapsed_time": "0:08:10", "remaining_time": "0:11:28"} +{"current_steps": 389, "total_steps": 933, "loss": 2.167546510696411, "lr": 7.264111976231416e-06, "epoch": 1.2508038585209003, "percentage": 41.69, "elapsed_time": "0:08:11", "remaining_time": "0:11:27"} +{"current_steps": 390, "total_steps": 933, "loss": 1.1884214878082275, "lr": 7.247403375637789e-06, "epoch": 1.2540192926045015, "percentage": 41.8, "elapsed_time": "0:08:12", "remaining_time": "0:11:25"} +{"current_steps": 391, "total_steps": 933, "loss": 1.8475289344787598, "lr": 7.230663264467932e-06, "epoch": 1.257234726688103, "percentage": 41.91, "elapsed_time": "0:08:13", "remaining_time": "0:11:24"} +{"current_steps": 392, "total_steps": 933, "loss": 1.3596510887145996, "lr": 7.213891877432957e-06, "epoch": 1.2604501607717042, "percentage": 42.02, "elapsed_time": "0:08:14", "remaining_time": "0:11:22"} +{"current_steps": 393, "total_steps": 933, "loss": 1.1813664436340332, "lr": 7.197089449682495e-06, "epoch": 1.2636655948553055, "percentage": 42.12, "elapsed_time": "0:08:15", "remaining_time": "0:11:21"} +{"current_steps": 394, "total_steps": 933, "loss": 1.5710866451263428, "lr": 7.180256216801392e-06, "epoch": 1.2668810289389068, "percentage": 42.23, "elapsed_time": "0:08:16", "remaining_time": "0:11:19"} +{"current_steps": 395, "total_steps": 933, "loss": 1.1523092985153198, "lr": 7.163392414806409e-06, "epoch": 1.270096463022508, "percentage": 42.34, "elapsed_time": "0:08:17", "remaining_time": "0:11:18"} +{"current_steps": 396, "total_steps": 933, "loss": 1.584272861480713, "lr": 7.146498280142917e-06, "epoch": 1.2733118971061093, "percentage": 42.44, "elapsed_time": "0:08:18", "remaining_time": "0:11:16"} +{"current_steps": 397, "total_steps": 933, "loss": 1.8063056468963623, "lr": 7.1295740496815715e-06, "epoch": 1.2765273311897105, "percentage": 42.55, "elapsed_time": "0:08:19", "remaining_time": "0:11:15"} +{"current_steps": 398, "total_steps": 933, "loss": 1.792249083518982, "lr": 7.112619960715004e-06, "epoch": 1.279742765273312, "percentage": 42.66, "elapsed_time": "0:08:21", "remaining_time": "0:11:13"} +{"current_steps": 399, "total_steps": 933, "loss": 1.5670925378799438, "lr": 7.095636250954481e-06, "epoch": 1.2829581993569132, "percentage": 42.77, "elapsed_time": "0:08:22", "remaining_time": "0:11:12"} +{"current_steps": 400, "total_steps": 933, "loss": 0.8326504230499268, "lr": 7.078623158526588e-06, "epoch": 1.2861736334405145, "percentage": 42.87, "elapsed_time": "0:08:23", "remaining_time": "0:11:10"} +{"current_steps": 401, "total_steps": 933, "loss": 1.3786863088607788, "lr": 7.061580921969875e-06, "epoch": 1.2893890675241158, "percentage": 42.98, "elapsed_time": "0:08:24", "remaining_time": "0:11:09"} +{"current_steps": 402, "total_steps": 933, "loss": 1.1054686307907104, "lr": 7.044509780231517e-06, "epoch": 1.292604501607717, "percentage": 43.09, "elapsed_time": "0:08:25", "remaining_time": "0:11:07"} +{"current_steps": 403, "total_steps": 933, "loss": 1.670744776725769, "lr": 7.027409972663972e-06, "epoch": 1.2958199356913183, "percentage": 43.19, "elapsed_time": "0:08:26", "remaining_time": "0:11:06"} +{"current_steps": 404, "total_steps": 933, "loss": 1.4025758504867554, "lr": 7.010281739021612e-06, "epoch": 1.2990353697749195, "percentage": 43.3, "elapsed_time": "0:08:27", "remaining_time": "0:11:04"} +{"current_steps": 405, "total_steps": 933, "loss": 1.3583598136901855, "lr": 6.993125319457371e-06, "epoch": 1.302250803858521, "percentage": 43.41, "elapsed_time": "0:08:28", "remaining_time": "0:11:03"} +{"current_steps": 406, "total_steps": 933, "loss": 1.1004712581634521, "lr": 6.975940954519372e-06, "epoch": 1.3054662379421222, "percentage": 43.52, "elapsed_time": "0:08:29", "remaining_time": "0:11:01"} +{"current_steps": 407, "total_steps": 933, "loss": 1.2955303192138672, "lr": 6.958728885147559e-06, "epoch": 1.3086816720257235, "percentage": 43.62, "elapsed_time": "0:08:30", "remaining_time": "0:11:00"} +{"current_steps": 408, "total_steps": 933, "loss": 1.0670486688613892, "lr": 6.941489352670315e-06, "epoch": 1.3118971061093248, "percentage": 43.73, "elapsed_time": "0:08:31", "remaining_time": "0:10:58"} +{"current_steps": 409, "total_steps": 933, "loss": 1.9638640880584717, "lr": 6.92422259880108e-06, "epoch": 1.315112540192926, "percentage": 43.84, "elapsed_time": "0:08:32", "remaining_time": "0:10:57"} +{"current_steps": 410, "total_steps": 933, "loss": 1.5076138973236084, "lr": 6.9069288656349654e-06, "epoch": 1.3183279742765273, "percentage": 43.94, "elapsed_time": "0:08:34", "remaining_time": "0:10:55"} +{"current_steps": 411, "total_steps": 933, "loss": 1.0038764476776123, "lr": 6.8896083956453495e-06, "epoch": 1.3215434083601285, "percentage": 44.05, "elapsed_time": "0:08:35", "remaining_time": "0:10:54"} +{"current_steps": 412, "total_steps": 933, "loss": 1.8994210958480835, "lr": 6.87226143168049e-06, "epoch": 1.32475884244373, "percentage": 44.16, "elapsed_time": "0:08:36", "remaining_time": "0:10:52"} +{"current_steps": 413, "total_steps": 933, "loss": 1.7425453662872314, "lr": 6.8548882169601125e-06, "epoch": 1.3279742765273312, "percentage": 44.27, "elapsed_time": "0:08:37", "remaining_time": "0:10:51"} +{"current_steps": 414, "total_steps": 933, "loss": 0.9079417586326599, "lr": 6.837488995071999e-06, "epoch": 1.3311897106109325, "percentage": 44.37, "elapsed_time": "0:08:38", "remaining_time": "0:10:49"} +{"current_steps": 415, "total_steps": 933, "loss": 2.3625776767730713, "lr": 6.820064009968577e-06, "epoch": 1.3344051446945338, "percentage": 44.48, "elapsed_time": "0:08:39", "remaining_time": "0:10:48"} +{"current_steps": 416, "total_steps": 933, "loss": 1.4717687368392944, "lr": 6.802613505963496e-06, "epoch": 1.337620578778135, "percentage": 44.59, "elapsed_time": "0:08:40", "remaining_time": "0:10:46"} +{"current_steps": 417, "total_steps": 933, "loss": 1.8274226188659668, "lr": 6.7851377277282025e-06, "epoch": 1.3408360128617363, "percentage": 44.69, "elapsed_time": "0:08:41", "remaining_time": "0:10:45"} +{"current_steps": 418, "total_steps": 933, "loss": 0.9393669366836548, "lr": 6.767636920288514e-06, "epoch": 1.3440514469453375, "percentage": 44.8, "elapsed_time": "0:08:42", "remaining_time": "0:10:43"} +{"current_steps": 419, "total_steps": 933, "loss": 2.2995893955230713, "lr": 6.7501113290211715e-06, "epoch": 1.347266881028939, "percentage": 44.91, "elapsed_time": "0:08:43", "remaining_time": "0:10:42"} +{"current_steps": 420, "total_steps": 933, "loss": 0.9605876207351685, "lr": 6.732561199650417e-06, "epoch": 1.3504823151125402, "percentage": 45.02, "elapsed_time": "0:08:44", "remaining_time": "0:10:41"} +{"current_steps": 421, "total_steps": 933, "loss": 1.400538682937622, "lr": 6.71498677824453e-06, "epoch": 1.3536977491961415, "percentage": 45.12, "elapsed_time": "0:08:45", "remaining_time": "0:10:39"} +{"current_steps": 422, "total_steps": 933, "loss": 1.5933213233947754, "lr": 6.69738831121239e-06, "epoch": 1.3569131832797428, "percentage": 45.23, "elapsed_time": "0:08:46", "remaining_time": "0:10:38"} +{"current_steps": 423, "total_steps": 933, "loss": 1.3214967250823975, "lr": 6.679766045300017e-06, "epoch": 1.360128617363344, "percentage": 45.34, "elapsed_time": "0:08:48", "remaining_time": "0:10:36"} +{"current_steps": 424, "total_steps": 933, "loss": 1.8377995491027832, "lr": 6.66212022758711e-06, "epoch": 1.3633440514469453, "percentage": 45.44, "elapsed_time": "0:08:49", "remaining_time": "0:10:35"} +{"current_steps": 425, "total_steps": 933, "loss": 2.171910047531128, "lr": 6.644451105483588e-06, "epoch": 1.3665594855305465, "percentage": 45.55, "elapsed_time": "0:08:50", "remaining_time": "0:10:33"} +{"current_steps": 426, "total_steps": 933, "loss": 1.01298987865448, "lr": 6.626758926726118e-06, "epoch": 1.369774919614148, "percentage": 45.66, "elapsed_time": "0:08:51", "remaining_time": "0:10:32"} +{"current_steps": 427, "total_steps": 933, "loss": 1.3563112020492554, "lr": 6.609043939374638e-06, "epoch": 1.3729903536977492, "percentage": 45.77, "elapsed_time": "0:08:52", "remaining_time": "0:10:30"} +{"current_steps": 428, "total_steps": 933, "loss": 1.1524646282196045, "lr": 6.591306391808886e-06, "epoch": 1.3762057877813505, "percentage": 45.87, "elapsed_time": "0:08:53", "remaining_time": "0:10:29"} +{"current_steps": 429, "total_steps": 933, "loss": 1.288474202156067, "lr": 6.5735465327249125e-06, "epoch": 1.3794212218649518, "percentage": 45.98, "elapsed_time": "0:08:54", "remaining_time": "0:10:27"} +{"current_steps": 430, "total_steps": 933, "loss": 1.6986860036849976, "lr": 6.555764611131599e-06, "epoch": 1.382636655948553, "percentage": 46.09, "elapsed_time": "0:08:55", "remaining_time": "0:10:26"} +{"current_steps": 431, "total_steps": 933, "loss": 1.1839923858642578, "lr": 6.537960876347155e-06, "epoch": 1.3858520900321543, "percentage": 46.2, "elapsed_time": "0:08:56", "remaining_time": "0:10:25"} +{"current_steps": 432, "total_steps": 933, "loss": 1.0426067113876343, "lr": 6.520135577995636e-06, "epoch": 1.3890675241157555, "percentage": 46.3, "elapsed_time": "0:08:57", "remaining_time": "0:10:23"} +{"current_steps": 433, "total_steps": 933, "loss": 1.5624547004699707, "lr": 6.502288966003437e-06, "epoch": 1.392282958199357, "percentage": 46.41, "elapsed_time": "0:08:58", "remaining_time": "0:10:22"} +{"current_steps": 434, "total_steps": 933, "loss": 1.6425249576568604, "lr": 6.48442129059579e-06, "epoch": 1.3954983922829582, "percentage": 46.52, "elapsed_time": "0:08:59", "remaining_time": "0:10:20"} +{"current_steps": 435, "total_steps": 933, "loss": 1.1195060014724731, "lr": 6.4665328022932505e-06, "epoch": 1.3987138263665595, "percentage": 46.62, "elapsed_time": "0:09:01", "remaining_time": "0:10:19"} +{"current_steps": 436, "total_steps": 933, "loss": 1.9613807201385498, "lr": 6.448623751908193e-06, "epoch": 1.4019292604501608, "percentage": 46.73, "elapsed_time": "0:09:02", "remaining_time": "0:10:17"} +{"current_steps": 437, "total_steps": 933, "loss": 1.9818463325500488, "lr": 6.43069439054129e-06, "epoch": 1.405144694533762, "percentage": 46.84, "elapsed_time": "0:09:03", "remaining_time": "0:10:16"} +{"current_steps": 438, "total_steps": 933, "loss": 1.321189284324646, "lr": 6.4127449695779894e-06, "epoch": 1.4083601286173635, "percentage": 46.95, "elapsed_time": "0:09:04", "remaining_time": "0:10:15"} +{"current_steps": 439, "total_steps": 933, "loss": 1.653143048286438, "lr": 6.394775740684996e-06, "epoch": 1.4115755627009645, "percentage": 47.05, "elapsed_time": "0:09:05", "remaining_time": "0:10:13"} +{"current_steps": 440, "total_steps": 933, "loss": 1.3114051818847656, "lr": 6.376786955806735e-06, "epoch": 1.414790996784566, "percentage": 47.16, "elapsed_time": "0:09:06", "remaining_time": "0:10:12"} +{"current_steps": 441, "total_steps": 933, "loss": 1.6939886808395386, "lr": 6.358778867161829e-06, "epoch": 1.4180064308681672, "percentage": 47.27, "elapsed_time": "0:09:07", "remaining_time": "0:10:10"} +{"current_steps": 442, "total_steps": 933, "loss": 1.195266842842102, "lr": 6.340751727239551e-06, "epoch": 1.4212218649517685, "percentage": 47.37, "elapsed_time": "0:09:08", "remaining_time": "0:10:09"} +{"current_steps": 443, "total_steps": 933, "loss": 1.6877739429473877, "lr": 6.322705788796293e-06, "epoch": 1.4244372990353698, "percentage": 47.48, "elapsed_time": "0:09:09", "remaining_time": "0:10:08"} +{"current_steps": 444, "total_steps": 933, "loss": 1.8234769105911255, "lr": 6.304641304852017e-06, "epoch": 1.427652733118971, "percentage": 47.59, "elapsed_time": "0:09:10", "remaining_time": "0:10:06"} +{"current_steps": 445, "total_steps": 933, "loss": 1.707655429840088, "lr": 6.286558528686713e-06, "epoch": 1.4308681672025725, "percentage": 47.7, "elapsed_time": "0:09:11", "remaining_time": "0:10:05"} +{"current_steps": 446, "total_steps": 933, "loss": 1.685136318206787, "lr": 6.268457713836839e-06, "epoch": 1.4340836012861735, "percentage": 47.8, "elapsed_time": "0:09:12", "remaining_time": "0:10:03"} +{"current_steps": 447, "total_steps": 933, "loss": 1.8174901008605957, "lr": 6.250339114091775e-06, "epoch": 1.437299035369775, "percentage": 47.91, "elapsed_time": "0:09:14", "remaining_time": "0:10:02"} +{"current_steps": 448, "total_steps": 933, "loss": 1.2479822635650635, "lr": 6.2322029834902565e-06, "epoch": 1.4405144694533762, "percentage": 48.02, "elapsed_time": "0:09:15", "remaining_time": "0:10:01"} +{"current_steps": 449, "total_steps": 933, "loss": 2.1364529132843018, "lr": 6.214049576316824e-06, "epoch": 1.4437299035369775, "percentage": 48.12, "elapsed_time": "0:09:16", "remaining_time": "0:09:59"} +{"current_steps": 450, "total_steps": 933, "loss": 1.1708037853240967, "lr": 6.195879147098246e-06, "epoch": 1.4469453376205788, "percentage": 48.23, "elapsed_time": "0:09:17", "remaining_time": "0:09:58"} +{"current_steps": 451, "total_steps": 933, "loss": 1.7463445663452148, "lr": 6.177691950599953e-06, "epoch": 1.45016077170418, "percentage": 48.34, "elapsed_time": "0:09:18", "remaining_time": "0:09:56"} +{"current_steps": 452, "total_steps": 933, "loss": 1.4239546060562134, "lr": 6.159488241822473e-06, "epoch": 1.4533762057877815, "percentage": 48.45, "elapsed_time": "0:09:19", "remaining_time": "0:09:55"} +{"current_steps": 453, "total_steps": 933, "loss": 1.0984452962875366, "lr": 6.141268275997848e-06, "epoch": 1.4565916398713825, "percentage": 48.55, "elapsed_time": "0:09:20", "remaining_time": "0:09:53"} +{"current_steps": 454, "total_steps": 933, "loss": 1.6430256366729736, "lr": 6.123032308586059e-06, "epoch": 1.459807073954984, "percentage": 48.66, "elapsed_time": "0:09:21", "remaining_time": "0:09:52"} +{"current_steps": 455, "total_steps": 933, "loss": 1.4744967222213745, "lr": 6.10478059527144e-06, "epoch": 1.4630225080385852, "percentage": 48.77, "elapsed_time": "0:09:22", "remaining_time": "0:09:51"} +{"current_steps": 456, "total_steps": 933, "loss": 1.6440513134002686, "lr": 6.086513391959101e-06, "epoch": 1.4662379421221865, "percentage": 48.87, "elapsed_time": "0:09:23", "remaining_time": "0:09:49"} +{"current_steps": 457, "total_steps": 933, "loss": 1.4198493957519531, "lr": 6.068230954771334e-06, "epoch": 1.4694533762057878, "percentage": 48.98, "elapsed_time": "0:09:24", "remaining_time": "0:09:48"} +{"current_steps": 458, "total_steps": 933, "loss": 1.5990746021270752, "lr": 6.0499335400440216e-06, "epoch": 1.472668810289389, "percentage": 49.09, "elapsed_time": "0:09:25", "remaining_time": "0:09:46"} +{"current_steps": 459, "total_steps": 933, "loss": 1.1847198009490967, "lr": 6.031621404323046e-06, "epoch": 1.4758842443729905, "percentage": 49.2, "elapsed_time": "0:09:27", "remaining_time": "0:09:45"} +{"current_steps": 460, "total_steps": 933, "loss": 0.9134633541107178, "lr": 6.013294804360689e-06, "epoch": 1.4790996784565915, "percentage": 49.3, "elapsed_time": "0:09:28", "remaining_time": "0:09:44"} +{"current_steps": 461, "total_steps": 933, "loss": 2.0340170860290527, "lr": 5.9949539971120405e-06, "epoch": 1.482315112540193, "percentage": 49.41, "elapsed_time": "0:09:29", "remaining_time": "0:09:42"} +{"current_steps": 462, "total_steps": 933, "loss": 1.8881299495697021, "lr": 5.976599239731381e-06, "epoch": 1.4855305466237942, "percentage": 49.52, "elapsed_time": "0:09:30", "remaining_time": "0:09:41"} +{"current_steps": 463, "total_steps": 933, "loss": 1.7137913703918457, "lr": 5.9582307895685876e-06, "epoch": 1.4887459807073955, "percentage": 49.62, "elapsed_time": "0:09:31", "remaining_time": "0:09:39"} +{"current_steps": 464, "total_steps": 933, "loss": 1.2723381519317627, "lr": 5.939848904165519e-06, "epoch": 1.4919614147909968, "percentage": 49.73, "elapsed_time": "0:09:32", "remaining_time": "0:09:38"} +{"current_steps": 465, "total_steps": 933, "loss": 1.1736359596252441, "lr": 5.9214538412524155e-06, "epoch": 1.495176848874598, "percentage": 49.84, "elapsed_time": "0:09:33", "remaining_time": "0:09:37"} +{"current_steps": 466, "total_steps": 933, "loss": 1.3090627193450928, "lr": 5.903045858744271e-06, "epoch": 1.4983922829581995, "percentage": 49.95, "elapsed_time": "0:09:34", "remaining_time": "0:09:35"} +{"current_steps": 467, "total_steps": 933, "loss": 1.4756783246994019, "lr": 5.884625214737224e-06, "epoch": 1.5016077170418005, "percentage": 50.05, "elapsed_time": "0:09:35", "remaining_time": "0:09:34"} +{"current_steps": 468, "total_steps": 933, "loss": 1.6622803211212158, "lr": 5.866192167504941e-06, "epoch": 1.504823151125402, "percentage": 50.16, "elapsed_time": "0:09:36", "remaining_time": "0:09:33"} +{"current_steps": 469, "total_steps": 933, "loss": 1.6512175798416138, "lr": 5.84774697549499e-06, "epoch": 1.5080385852090032, "percentage": 50.27, "elapsed_time": "0:09:37", "remaining_time": "0:09:31"} +{"current_steps": 470, "total_steps": 933, "loss": 1.4214634895324707, "lr": 5.8292898973252246e-06, "epoch": 1.5112540192926045, "percentage": 50.38, "elapsed_time": "0:09:38", "remaining_time": "0:09:30"} +{"current_steps": 471, "total_steps": 933, "loss": 1.1714420318603516, "lr": 5.810821191780146e-06, "epoch": 1.5144694533762058, "percentage": 50.48, "elapsed_time": "0:09:39", "remaining_time": "0:09:28"} +{"current_steps": 472, "total_steps": 933, "loss": 1.4205889701843262, "lr": 5.792341117807284e-06, "epoch": 1.517684887459807, "percentage": 50.59, "elapsed_time": "0:09:40", "remaining_time": "0:09:27"} +{"current_steps": 473, "total_steps": 933, "loss": 1.419925570487976, "lr": 5.773849934513568e-06, "epoch": 1.5209003215434085, "percentage": 50.7, "elapsed_time": "0:09:42", "remaining_time": "0:09:26"} +{"current_steps": 474, "total_steps": 933, "loss": 1.8584307432174683, "lr": 5.755347901161683e-06, "epoch": 1.5241157556270095, "percentage": 50.8, "elapsed_time": "0:09:43", "remaining_time": "0:09:24"} +{"current_steps": 475, "total_steps": 933, "loss": 2.2646093368530273, "lr": 5.736835277166446e-06, "epoch": 1.527331189710611, "percentage": 50.91, "elapsed_time": "0:09:44", "remaining_time": "0:09:23"} +{"current_steps": 476, "total_steps": 933, "loss": 0.9686712622642517, "lr": 5.7183123220911615e-06, "epoch": 1.5305466237942122, "percentage": 51.02, "elapsed_time": "0:09:45", "remaining_time": "0:09:21"} +{"current_steps": 477, "total_steps": 933, "loss": 0.964187741279602, "lr": 5.699779295643988e-06, "epoch": 1.5337620578778135, "percentage": 51.13, "elapsed_time": "0:09:46", "remaining_time": "0:09:20"} +{"current_steps": 478, "total_steps": 933, "loss": 1.0403389930725098, "lr": 5.68123645767429e-06, "epoch": 1.5369774919614148, "percentage": 51.23, "elapsed_time": "0:09:47", "remaining_time": "0:09:19"} +{"current_steps": 479, "total_steps": 933, "loss": 1.6527446508407593, "lr": 5.662684068169002e-06, "epoch": 1.540192926045016, "percentage": 51.34, "elapsed_time": "0:09:48", "remaining_time": "0:09:17"} +{"current_steps": 480, "total_steps": 933, "loss": 1.4375754594802856, "lr": 5.644122387248975e-06, "epoch": 1.5434083601286175, "percentage": 51.45, "elapsed_time": "0:09:49", "remaining_time": "0:09:16"} +{"current_steps": 481, "total_steps": 933, "loss": 1.3001888990402222, "lr": 5.6255516751653376e-06, "epoch": 1.5466237942122185, "percentage": 51.55, "elapsed_time": "0:09:50", "remaining_time": "0:09:15"} +{"current_steps": 482, "total_steps": 933, "loss": 1.9102635383605957, "lr": 5.606972192295841e-06, "epoch": 1.54983922829582, "percentage": 51.66, "elapsed_time": "0:09:51", "remaining_time": "0:09:13"} +{"current_steps": 483, "total_steps": 933, "loss": 1.1081337928771973, "lr": 5.588384199141211e-06, "epoch": 1.5530546623794212, "percentage": 51.77, "elapsed_time": "0:09:52", "remaining_time": "0:09:12"} +{"current_steps": 484, "total_steps": 933, "loss": 1.2650421857833862, "lr": 5.569787956321496e-06, "epoch": 1.5562700964630225, "percentage": 51.88, "elapsed_time": "0:09:53", "remaining_time": "0:09:10"} +{"current_steps": 485, "total_steps": 933, "loss": 1.2185710668563843, "lr": 5.551183724572411e-06, "epoch": 1.5594855305466238, "percentage": 51.98, "elapsed_time": "0:09:54", "remaining_time": "0:09:09"} +{"current_steps": 486, "total_steps": 933, "loss": 1.2483716011047363, "lr": 5.532571764741686e-06, "epoch": 1.562700964630225, "percentage": 52.09, "elapsed_time": "0:09:56", "remaining_time": "0:09:08"} +{"current_steps": 487, "total_steps": 933, "loss": 1.7064783573150635, "lr": 5.513952337785398e-06, "epoch": 1.5659163987138265, "percentage": 52.2, "elapsed_time": "0:09:57", "remaining_time": "0:09:06"} +{"current_steps": 488, "total_steps": 933, "loss": 1.9306564331054688, "lr": 5.4953257047643284e-06, "epoch": 1.5691318327974275, "percentage": 52.3, "elapsed_time": "0:09:58", "remaining_time": "0:09:05"} +{"current_steps": 489, "total_steps": 933, "loss": 1.796401858329773, "lr": 5.476692126840287e-06, "epoch": 1.572347266881029, "percentage": 52.41, "elapsed_time": "0:09:59", "remaining_time": "0:09:04"} +{"current_steps": 490, "total_steps": 933, "loss": 1.9565566778182983, "lr": 5.458051865272462e-06, "epoch": 1.5755627009646302, "percentage": 52.52, "elapsed_time": "0:10:00", "remaining_time": "0:09:02"} +{"current_steps": 491, "total_steps": 933, "loss": 1.7262601852416992, "lr": 5.439405181413752e-06, "epoch": 1.5787781350482315, "percentage": 52.63, "elapsed_time": "0:10:01", "remaining_time": "0:09:01"} +{"current_steps": 492, "total_steps": 933, "loss": 1.2656997442245483, "lr": 5.420752336707098e-06, "epoch": 1.5819935691318328, "percentage": 52.73, "elapsed_time": "0:10:02", "remaining_time": "0:09:00"} +{"current_steps": 493, "total_steps": 933, "loss": 1.3866935968399048, "lr": 5.402093592681823e-06, "epoch": 1.585209003215434, "percentage": 52.84, "elapsed_time": "0:10:03", "remaining_time": "0:08:58"} +{"current_steps": 494, "total_steps": 933, "loss": 0.8932974934577942, "lr": 5.383429210949967e-06, "epoch": 1.5884244372990355, "percentage": 52.95, "elapsed_time": "0:10:04", "remaining_time": "0:08:57"} +{"current_steps": 495, "total_steps": 933, "loss": 1.5216851234436035, "lr": 5.36475945320261e-06, "epoch": 1.5916398713826365, "percentage": 53.05, "elapsed_time": "0:10:05", "remaining_time": "0:08:55"} +{"current_steps": 496, "total_steps": 933, "loss": 1.0728681087493896, "lr": 5.346084581206215e-06, "epoch": 1.594855305466238, "percentage": 53.16, "elapsed_time": "0:10:06", "remaining_time": "0:08:54"} +{"current_steps": 497, "total_steps": 933, "loss": 1.913758635520935, "lr": 5.327404856798944e-06, "epoch": 1.5980707395498392, "percentage": 53.27, "elapsed_time": "0:10:07", "remaining_time": "0:08:53"} +{"current_steps": 498, "total_steps": 933, "loss": 1.6854526996612549, "lr": 5.3087205418870014e-06, "epoch": 1.6012861736334405, "percentage": 53.38, "elapsed_time": "0:10:08", "remaining_time": "0:08:51"} +{"current_steps": 499, "total_steps": 933, "loss": 1.147437334060669, "lr": 5.29003189844095e-06, "epoch": 1.6045016077170418, "percentage": 53.48, "elapsed_time": "0:10:10", "remaining_time": "0:08:50"} +{"current_steps": 500, "total_steps": 933, "loss": 2.265592098236084, "lr": 5.2713391884920415e-06, "epoch": 1.607717041800643, "percentage": 53.59, "elapsed_time": "0:10:11", "remaining_time": "0:08:49"} +{"current_steps": 501, "total_steps": 933, "loss": 1.6772857904434204, "lr": 5.2526426741285465e-06, "epoch": 1.6109324758842445, "percentage": 53.7, "elapsed_time": "0:10:12", "remaining_time": "0:08:47"} +{"current_steps": 502, "total_steps": 933, "loss": 1.633618712425232, "lr": 5.233942617492077e-06, "epoch": 1.6141479099678455, "percentage": 53.8, "elapsed_time": "0:10:13", "remaining_time": "0:08:46"} +{"current_steps": 503, "total_steps": 933, "loss": 1.5846664905548096, "lr": 5.215239280773908e-06, "epoch": 1.617363344051447, "percentage": 53.91, "elapsed_time": "0:10:14", "remaining_time": "0:08:45"} +{"current_steps": 504, "total_steps": 933, "loss": 1.4743802547454834, "lr": 5.196532926211307e-06, "epoch": 1.6205787781350482, "percentage": 54.02, "elapsed_time": "0:10:15", "remaining_time": "0:08:43"} +{"current_steps": 505, "total_steps": 933, "loss": 1.7887756824493408, "lr": 5.177823816083853e-06, "epoch": 1.6237942122186495, "percentage": 54.13, "elapsed_time": "0:10:16", "remaining_time": "0:08:42"} +{"current_steps": 506, "total_steps": 933, "loss": 1.8924931287765503, "lr": 5.15911221270976e-06, "epoch": 1.6270096463022508, "percentage": 54.23, "elapsed_time": "0:10:17", "remaining_time": "0:08:41"} +{"current_steps": 507, "total_steps": 933, "loss": 1.5837712287902832, "lr": 5.140398378442201e-06, "epoch": 1.630225080385852, "percentage": 54.34, "elapsed_time": "0:10:18", "remaining_time": "0:08:39"} +{"current_steps": 508, "total_steps": 933, "loss": 1.3969013690948486, "lr": 5.121682575665625e-06, "epoch": 1.6334405144694535, "percentage": 54.45, "elapsed_time": "0:10:19", "remaining_time": "0:08:38"} +{"current_steps": 509, "total_steps": 933, "loss": 1.3699804544448853, "lr": 5.102965066792085e-06, "epoch": 1.6366559485530545, "percentage": 54.56, "elapsed_time": "0:10:20", "remaining_time": "0:08:37"} +{"current_steps": 510, "total_steps": 933, "loss": 1.3550987243652344, "lr": 5.084246114257554e-06, "epoch": 1.639871382636656, "percentage": 54.66, "elapsed_time": "0:10:21", "remaining_time": "0:08:35"} +{"current_steps": 511, "total_steps": 933, "loss": 1.2851953506469727, "lr": 5.065525980518244e-06, "epoch": 1.6430868167202572, "percentage": 54.77, "elapsed_time": "0:10:23", "remaining_time": "0:08:34"} +{"current_steps": 512, "total_steps": 933, "loss": 1.3243783712387085, "lr": 5.046804928046933e-06, "epoch": 1.6463022508038585, "percentage": 54.88, "elapsed_time": "0:10:24", "remaining_time": "0:08:33"} +{"current_steps": 513, "total_steps": 933, "loss": 1.6754820346832275, "lr": 5.028083219329274e-06, "epoch": 1.6495176848874598, "percentage": 54.98, "elapsed_time": "0:10:25", "remaining_time": "0:08:31"} +{"current_steps": 514, "total_steps": 933, "loss": 2.1871590614318848, "lr": 5.009361116860129e-06, "epoch": 1.652733118971061, "percentage": 55.09, "elapsed_time": "0:10:26", "remaining_time": "0:08:30"} +{"current_steps": 515, "total_steps": 933, "loss": 0.9794585108757019, "lr": 4.990638883139872e-06, "epoch": 1.6559485530546625, "percentage": 55.2, "elapsed_time": "0:10:27", "remaining_time": "0:08:29"} +{"current_steps": 516, "total_steps": 933, "loss": 1.329102635383606, "lr": 4.9719167806707265e-06, "epoch": 1.6591639871382635, "percentage": 55.31, "elapsed_time": "0:10:28", "remaining_time": "0:08:27"} +{"current_steps": 517, "total_steps": 933, "loss": 1.4713919162750244, "lr": 4.953195071953069e-06, "epoch": 1.662379421221865, "percentage": 55.41, "elapsed_time": "0:10:29", "remaining_time": "0:08:26"} +{"current_steps": 518, "total_steps": 933, "loss": 1.6331861019134521, "lr": 4.934474019481755e-06, "epoch": 1.6655948553054662, "percentage": 55.52, "elapsed_time": "0:10:30", "remaining_time": "0:08:25"} +{"current_steps": 519, "total_steps": 933, "loss": 0.8953830003738403, "lr": 4.915753885742446e-06, "epoch": 1.6688102893890675, "percentage": 55.63, "elapsed_time": "0:10:31", "remaining_time": "0:08:23"} +{"current_steps": 520, "total_steps": 933, "loss": 2.0606470108032227, "lr": 4.8970349332079155e-06, "epoch": 1.6720257234726688, "percentage": 55.73, "elapsed_time": "0:10:32", "remaining_time": "0:08:22"} +{"current_steps": 521, "total_steps": 933, "loss": 1.9117261171340942, "lr": 4.8783174243343765e-06, "epoch": 1.67524115755627, "percentage": 55.84, "elapsed_time": "0:10:33", "remaining_time": "0:08:21"} +{"current_steps": 522, "total_steps": 933, "loss": 1.107745885848999, "lr": 4.8596016215578e-06, "epoch": 1.6784565916398715, "percentage": 55.95, "elapsed_time": "0:10:34", "remaining_time": "0:08:19"} +{"current_steps": 523, "total_steps": 933, "loss": 2.056065559387207, "lr": 4.8408877872902404e-06, "epoch": 1.6816720257234725, "percentage": 56.06, "elapsed_time": "0:10:36", "remaining_time": "0:08:18"} +{"current_steps": 524, "total_steps": 933, "loss": 1.2189289331436157, "lr": 4.822176183916147e-06, "epoch": 1.684887459807074, "percentage": 56.16, "elapsed_time": "0:10:37", "remaining_time": "0:08:17"} +{"current_steps": 525, "total_steps": 933, "loss": 1.1678478717803955, "lr": 4.803467073788694e-06, "epoch": 1.6881028938906752, "percentage": 56.27, "elapsed_time": "0:10:38", "remaining_time": "0:08:15"} +{"current_steps": 526, "total_steps": 933, "loss": 1.3617630004882812, "lr": 4.7847607192260916e-06, "epoch": 1.6913183279742765, "percentage": 56.38, "elapsed_time": "0:10:39", "remaining_time": "0:08:14"} +{"current_steps": 527, "total_steps": 933, "loss": 1.8124133348464966, "lr": 4.766057382507924e-06, "epoch": 1.694533762057878, "percentage": 56.48, "elapsed_time": "0:10:40", "remaining_time": "0:08:13"} +{"current_steps": 528, "total_steps": 933, "loss": 1.7624785900115967, "lr": 4.747357325871454e-06, "epoch": 1.697749196141479, "percentage": 56.59, "elapsed_time": "0:10:41", "remaining_time": "0:08:11"} +{"current_steps": 529, "total_steps": 933, "loss": 1.804181456565857, "lr": 4.72866081150796e-06, "epoch": 1.7009646302250805, "percentage": 56.7, "elapsed_time": "0:10:42", "remaining_time": "0:08:10"} +{"current_steps": 530, "total_steps": 933, "loss": 1.8973114490509033, "lr": 4.709968101559051e-06, "epoch": 1.7041800643086815, "percentage": 56.81, "elapsed_time": "0:10:43", "remaining_time": "0:08:09"} +{"current_steps": 531, "total_steps": 933, "loss": 2.0870800018310547, "lr": 4.6912794581129985e-06, "epoch": 1.707395498392283, "percentage": 56.91, "elapsed_time": "0:10:44", "remaining_time": "0:08:08"} +{"current_steps": 532, "total_steps": 933, "loss": 1.4478678703308105, "lr": 4.672595143201056e-06, "epoch": 1.7106109324758842, "percentage": 57.02, "elapsed_time": "0:10:45", "remaining_time": "0:08:06"} +{"current_steps": 533, "total_steps": 933, "loss": 1.0462535619735718, "lr": 4.653915418793786e-06, "epoch": 1.7138263665594855, "percentage": 57.13, "elapsed_time": "0:10:46", "remaining_time": "0:08:05"} +{"current_steps": 534, "total_steps": 933, "loss": 1.0799921751022339, "lr": 4.63524054679739e-06, "epoch": 1.717041800643087, "percentage": 57.23, "elapsed_time": "0:10:47", "remaining_time": "0:08:04"} +{"current_steps": 535, "total_steps": 933, "loss": 0.9600023031234741, "lr": 4.616570789050034e-06, "epoch": 1.720257234726688, "percentage": 57.34, "elapsed_time": "0:10:48", "remaining_time": "0:08:02"} +{"current_steps": 536, "total_steps": 933, "loss": 1.4467713832855225, "lr": 4.5979064073181775e-06, "epoch": 1.7234726688102895, "percentage": 57.45, "elapsed_time": "0:10:50", "remaining_time": "0:08:01"} +{"current_steps": 537, "total_steps": 933, "loss": 1.1485623121261597, "lr": 4.579247663292903e-06, "epoch": 1.7266881028938905, "percentage": 57.56, "elapsed_time": "0:10:51", "remaining_time": "0:08:00"} +{"current_steps": 538, "total_steps": 933, "loss": 1.881239891052246, "lr": 4.560594818586248e-06, "epoch": 1.729903536977492, "percentage": 57.66, "elapsed_time": "0:10:52", "remaining_time": "0:07:58"} +{"current_steps": 539, "total_steps": 933, "loss": 1.4121603965759277, "lr": 4.541948134727538e-06, "epoch": 1.7331189710610932, "percentage": 57.77, "elapsed_time": "0:10:53", "remaining_time": "0:07:57"} +{"current_steps": 540, "total_steps": 933, "loss": 1.600027084350586, "lr": 4.523307873159714e-06, "epoch": 1.7363344051446945, "percentage": 57.88, "elapsed_time": "0:10:54", "remaining_time": "0:07:56"} +{"current_steps": 541, "total_steps": 933, "loss": 1.9366849660873413, "lr": 4.504674295235673e-06, "epoch": 1.739549839228296, "percentage": 57.98, "elapsed_time": "0:10:55", "remaining_time": "0:07:54"} +{"current_steps": 542, "total_steps": 933, "loss": 1.147647500038147, "lr": 4.486047662214602e-06, "epoch": 1.742765273311897, "percentage": 58.09, "elapsed_time": "0:10:56", "remaining_time": "0:07:53"} +{"current_steps": 543, "total_steps": 933, "loss": 2.267954111099243, "lr": 4.467428235258315e-06, "epoch": 1.7459807073954985, "percentage": 58.2, "elapsed_time": "0:10:57", "remaining_time": "0:07:52"} +{"current_steps": 544, "total_steps": 933, "loss": 1.3431754112243652, "lr": 4.448816275427589e-06, "epoch": 1.7491961414790995, "percentage": 58.31, "elapsed_time": "0:10:58", "remaining_time": "0:07:51"} +{"current_steps": 545, "total_steps": 933, "loss": 1.371949553489685, "lr": 4.430212043678506e-06, "epoch": 1.752411575562701, "percentage": 58.41, "elapsed_time": "0:10:59", "remaining_time": "0:07:49"} +{"current_steps": 546, "total_steps": 933, "loss": 1.2015950679779053, "lr": 4.41161580085879e-06, "epoch": 1.7556270096463023, "percentage": 58.52, "elapsed_time": "0:11:00", "remaining_time": "0:07:48"} +{"current_steps": 547, "total_steps": 933, "loss": 1.2269978523254395, "lr": 4.39302780770416e-06, "epoch": 1.7588424437299035, "percentage": 58.63, "elapsed_time": "0:11:01", "remaining_time": "0:07:47"} +{"current_steps": 548, "total_steps": 933, "loss": 1.1524686813354492, "lr": 4.374448324834664e-06, "epoch": 1.762057877813505, "percentage": 58.74, "elapsed_time": "0:11:03", "remaining_time": "0:07:45"} +{"current_steps": 549, "total_steps": 933, "loss": 0.8838063478469849, "lr": 4.355877612751027e-06, "epoch": 1.765273311897106, "percentage": 58.84, "elapsed_time": "0:11:04", "remaining_time": "0:07:44"} +{"current_steps": 550, "total_steps": 933, "loss": 1.0831339359283447, "lr": 4.337315931830999e-06, "epoch": 1.7684887459807075, "percentage": 58.95, "elapsed_time": "0:11:05", "remaining_time": "0:07:43"} +{"current_steps": 551, "total_steps": 933, "loss": 1.1000258922576904, "lr": 4.318763542325711e-06, "epoch": 1.7717041800643085, "percentage": 59.06, "elapsed_time": "0:11:06", "remaining_time": "0:07:41"} +{"current_steps": 552, "total_steps": 933, "loss": 1.230742335319519, "lr": 4.3002207043560135e-06, "epoch": 1.77491961414791, "percentage": 59.16, "elapsed_time": "0:11:07", "remaining_time": "0:07:40"} +{"current_steps": 553, "total_steps": 933, "loss": 1.103325605392456, "lr": 4.28168767790884e-06, "epoch": 1.7781350482315113, "percentage": 59.27, "elapsed_time": "0:11:08", "remaining_time": "0:07:39"} +{"current_steps": 554, "total_steps": 933, "loss": 1.3692865371704102, "lr": 4.263164722833556e-06, "epoch": 1.7813504823151125, "percentage": 59.38, "elapsed_time": "0:11:09", "remaining_time": "0:07:38"} +{"current_steps": 555, "total_steps": 933, "loss": 0.6936740875244141, "lr": 4.2446520988383185e-06, "epoch": 1.784565916398714, "percentage": 59.49, "elapsed_time": "0:11:10", "remaining_time": "0:07:36"} +{"current_steps": 556, "total_steps": 933, "loss": 0.7901575565338135, "lr": 4.226150065486434e-06, "epoch": 1.787781350482315, "percentage": 59.59, "elapsed_time": "0:11:11", "remaining_time": "0:07:35"} +{"current_steps": 557, "total_steps": 933, "loss": 1.3338205814361572, "lr": 4.207658882192717e-06, "epoch": 1.7909967845659165, "percentage": 59.7, "elapsed_time": "0:11:12", "remaining_time": "0:07:34"} +{"current_steps": 558, "total_steps": 933, "loss": 1.3363522291183472, "lr": 4.189178808219856e-06, "epoch": 1.7942122186495175, "percentage": 59.81, "elapsed_time": "0:11:13", "remaining_time": "0:07:32"} +{"current_steps": 559, "total_steps": 933, "loss": 1.8522659540176392, "lr": 4.170710102674777e-06, "epoch": 1.797427652733119, "percentage": 59.91, "elapsed_time": "0:11:14", "remaining_time": "0:07:31"} +{"current_steps": 560, "total_steps": 933, "loss": 1.9775140285491943, "lr": 4.152253024505011e-06, "epoch": 1.8006430868167203, "percentage": 60.02, "elapsed_time": "0:11:15", "remaining_time": "0:07:30"} +{"current_steps": 561, "total_steps": 933, "loss": 1.0015840530395508, "lr": 4.133807832495062e-06, "epoch": 1.8038585209003215, "percentage": 60.13, "elapsed_time": "0:11:17", "remaining_time": "0:07:28"} +{"current_steps": 562, "total_steps": 933, "loss": 1.753740906715393, "lr": 4.1153747852627775e-06, "epoch": 1.807073954983923, "percentage": 60.24, "elapsed_time": "0:11:18", "remaining_time": "0:07:27"} +{"current_steps": 563, "total_steps": 933, "loss": 1.5827150344848633, "lr": 4.096954141255731e-06, "epoch": 1.810289389067524, "percentage": 60.34, "elapsed_time": "0:11:19", "remaining_time": "0:07:26"} +{"current_steps": 564, "total_steps": 933, "loss": 0.916832685470581, "lr": 4.078546158747586e-06, "epoch": 1.8135048231511255, "percentage": 60.45, "elapsed_time": "0:11:20", "remaining_time": "0:07:25"} +{"current_steps": 565, "total_steps": 933, "loss": 1.4536582231521606, "lr": 4.060151095834482e-06, "epoch": 1.8167202572347267, "percentage": 60.56, "elapsed_time": "0:11:21", "remaining_time": "0:07:23"} +{"current_steps": 566, "total_steps": 933, "loss": 1.4267023801803589, "lr": 4.041769210431414e-06, "epoch": 1.819935691318328, "percentage": 60.66, "elapsed_time": "0:11:22", "remaining_time": "0:07:22"} +{"current_steps": 567, "total_steps": 933, "loss": 1.1329270601272583, "lr": 4.02340076026862e-06, "epoch": 1.8231511254019293, "percentage": 60.77, "elapsed_time": "0:11:23", "remaining_time": "0:07:21"} +{"current_steps": 568, "total_steps": 933, "loss": 1.7833589315414429, "lr": 4.00504600288796e-06, "epoch": 1.8263665594855305, "percentage": 60.88, "elapsed_time": "0:11:24", "remaining_time": "0:07:19"} +{"current_steps": 569, "total_steps": 933, "loss": 1.2976348400115967, "lr": 3.9867051956393114e-06, "epoch": 1.829581993569132, "percentage": 60.99, "elapsed_time": "0:11:25", "remaining_time": "0:07:18"} +{"current_steps": 570, "total_steps": 933, "loss": 1.4629448652267456, "lr": 3.968378595676956e-06, "epoch": 1.832797427652733, "percentage": 61.09, "elapsed_time": "0:11:26", "remaining_time": "0:07:17"} +{"current_steps": 571, "total_steps": 933, "loss": 2.5722076892852783, "lr": 3.95006645995598e-06, "epoch": 1.8360128617363345, "percentage": 61.2, "elapsed_time": "0:11:27", "remaining_time": "0:07:16"} +{"current_steps": 572, "total_steps": 933, "loss": 2.2091426849365234, "lr": 3.931769045228668e-06, "epoch": 1.8392282958199357, "percentage": 61.31, "elapsed_time": "0:11:28", "remaining_time": "0:07:14"} +{"current_steps": 573, "total_steps": 933, "loss": 1.391589641571045, "lr": 3.9134866080409e-06, "epoch": 1.842443729903537, "percentage": 61.41, "elapsed_time": "0:11:30", "remaining_time": "0:07:13"} +{"current_steps": 574, "total_steps": 933, "loss": 1.1414397954940796, "lr": 3.895219404728561e-06, "epoch": 1.8456591639871383, "percentage": 61.52, "elapsed_time": "0:11:31", "remaining_time": "0:07:12"} +{"current_steps": 575, "total_steps": 933, "loss": 1.059516191482544, "lr": 3.8769676914139426e-06, "epoch": 1.8488745980707395, "percentage": 61.63, "elapsed_time": "0:11:32", "remaining_time": "0:07:10"} +{"current_steps": 576, "total_steps": 933, "loss": 2.1879444122314453, "lr": 3.858731724002153e-06, "epoch": 1.852090032154341, "percentage": 61.74, "elapsed_time": "0:11:33", "remaining_time": "0:07:09"} +{"current_steps": 577, "total_steps": 933, "loss": 1.473386526107788, "lr": 3.840511758177528e-06, "epoch": 1.855305466237942, "percentage": 61.84, "elapsed_time": "0:11:34", "remaining_time": "0:07:08"} +{"current_steps": 578, "total_steps": 933, "loss": 1.4653959274291992, "lr": 3.822308049400047e-06, "epoch": 1.8585209003215435, "percentage": 61.95, "elapsed_time": "0:11:35", "remaining_time": "0:07:07"} +{"current_steps": 579, "total_steps": 933, "loss": 1.6413612365722656, "lr": 3.804120852901756e-06, "epoch": 1.8617363344051447, "percentage": 62.06, "elapsed_time": "0:11:36", "remaining_time": "0:07:05"} +{"current_steps": 580, "total_steps": 933, "loss": 1.316032886505127, "lr": 3.7859504236831766e-06, "epoch": 1.864951768488746, "percentage": 62.17, "elapsed_time": "0:11:37", "remaining_time": "0:07:04"} +{"current_steps": 581, "total_steps": 933, "loss": 1.7361235618591309, "lr": 3.7677970165097444e-06, "epoch": 1.8681672025723473, "percentage": 62.27, "elapsed_time": "0:11:38", "remaining_time": "0:07:03"} +{"current_steps": 582, "total_steps": 933, "loss": 1.667905569076538, "lr": 3.749660885908226e-06, "epoch": 1.8713826366559485, "percentage": 62.38, "elapsed_time": "0:11:39", "remaining_time": "0:07:02"} +{"current_steps": 583, "total_steps": 933, "loss": 1.065222978591919, "lr": 3.7315422861631623e-06, "epoch": 1.87459807073955, "percentage": 62.49, "elapsed_time": "0:11:40", "remaining_time": "0:07:00"} +{"current_steps": 584, "total_steps": 933, "loss": 1.115635633468628, "lr": 3.7134414713132883e-06, "epoch": 1.877813504823151, "percentage": 62.59, "elapsed_time": "0:11:41", "remaining_time": "0:06:59"} +{"current_steps": 585, "total_steps": 933, "loss": 2.2017221450805664, "lr": 3.6953586951479834e-06, "epoch": 1.8810289389067525, "percentage": 62.7, "elapsed_time": "0:11:42", "remaining_time": "0:06:58"} +{"current_steps": 586, "total_steps": 933, "loss": 2.484564781188965, "lr": 3.677294211203708e-06, "epoch": 1.8842443729903537, "percentage": 62.81, "elapsed_time": "0:11:44", "remaining_time": "0:06:56"} +{"current_steps": 587, "total_steps": 933, "loss": 1.3357205390930176, "lr": 3.6592482727604508e-06, "epoch": 1.887459807073955, "percentage": 62.92, "elapsed_time": "0:11:45", "remaining_time": "0:06:55"} +{"current_steps": 588, "total_steps": 933, "loss": 1.3544979095458984, "lr": 3.641221132838173e-06, "epoch": 1.8906752411575563, "percentage": 63.02, "elapsed_time": "0:11:46", "remaining_time": "0:06:54"} +{"current_steps": 589, "total_steps": 933, "loss": 0.9614198207855225, "lr": 3.623213044193266e-06, "epoch": 1.8938906752411575, "percentage": 63.13, "elapsed_time": "0:11:47", "remaining_time": "0:06:53"} +{"current_steps": 590, "total_steps": 933, "loss": 1.2620229721069336, "lr": 3.605224259315005e-06, "epoch": 1.897106109324759, "percentage": 63.24, "elapsed_time": "0:11:48", "remaining_time": "0:06:51"} +{"current_steps": 591, "total_steps": 933, "loss": 1.886639952659607, "lr": 3.587255030422011e-06, "epoch": 1.90032154340836, "percentage": 63.34, "elapsed_time": "0:11:49", "remaining_time": "0:06:50"} +{"current_steps": 592, "total_steps": 933, "loss": 1.2755482196807861, "lr": 3.569305609458712e-06, "epoch": 1.9035369774919615, "percentage": 63.45, "elapsed_time": "0:11:50", "remaining_time": "0:06:49"} +{"current_steps": 593, "total_steps": 933, "loss": 1.4138600826263428, "lr": 3.5513762480918084e-06, "epoch": 1.9067524115755627, "percentage": 63.56, "elapsed_time": "0:11:51", "remaining_time": "0:06:48"} +{"current_steps": 594, "total_steps": 933, "loss": 1.2061731815338135, "lr": 3.5334671977067504e-06, "epoch": 1.909967845659164, "percentage": 63.67, "elapsed_time": "0:11:52", "remaining_time": "0:06:46"} +{"current_steps": 595, "total_steps": 933, "loss": 1.641986608505249, "lr": 3.5155787094042113e-06, "epoch": 1.9131832797427653, "percentage": 63.77, "elapsed_time": "0:11:53", "remaining_time": "0:06:45"} +{"current_steps": 596, "total_steps": 933, "loss": 1.2158643007278442, "lr": 3.497711033996564e-06, "epoch": 1.9163987138263665, "percentage": 63.88, "elapsed_time": "0:11:54", "remaining_time": "0:06:44"} +{"current_steps": 597, "total_steps": 933, "loss": 1.8701896667480469, "lr": 3.4798644220043663e-06, "epoch": 1.919614147909968, "percentage": 63.99, "elapsed_time": "0:11:55", "remaining_time": "0:06:42"} +{"current_steps": 598, "total_steps": 933, "loss": 1.195351243019104, "lr": 3.462039123652847e-06, "epoch": 1.922829581993569, "percentage": 64.09, "elapsed_time": "0:11:57", "remaining_time": "0:06:41"} +{"current_steps": 599, "total_steps": 933, "loss": 1.2579845190048218, "lr": 3.444235388868403e-06, "epoch": 1.9260450160771705, "percentage": 64.2, "elapsed_time": "0:11:58", "remaining_time": "0:06:40"} +{"current_steps": 600, "total_steps": 933, "loss": 1.524860143661499, "lr": 3.4264534672750884e-06, "epoch": 1.9292604501607717, "percentage": 64.31, "elapsed_time": "0:11:59", "remaining_time": "0:06:39"} +{"current_steps": 601, "total_steps": 933, "loss": 1.286231517791748, "lr": 3.408693608191116e-06, "epoch": 1.932475884244373, "percentage": 64.42, "elapsed_time": "0:12:00", "remaining_time": "0:06:37"} +{"current_steps": 602, "total_steps": 933, "loss": 1.322242021560669, "lr": 3.3909560606253632e-06, "epoch": 1.9356913183279743, "percentage": 64.52, "elapsed_time": "0:12:01", "remaining_time": "0:06:36"} +{"current_steps": 603, "total_steps": 933, "loss": 1.4537055492401123, "lr": 3.3732410732738843e-06, "epoch": 1.9389067524115755, "percentage": 64.63, "elapsed_time": "0:12:02", "remaining_time": "0:06:35"} +{"current_steps": 604, "total_steps": 933, "loss": 1.5410267114639282, "lr": 3.3555488945164127e-06, "epoch": 1.942122186495177, "percentage": 64.74, "elapsed_time": "0:12:03", "remaining_time": "0:06:34"} +{"current_steps": 605, "total_steps": 933, "loss": 1.754872441291809, "lr": 3.337879772412892e-06, "epoch": 1.945337620578778, "percentage": 64.84, "elapsed_time": "0:12:04", "remaining_time": "0:06:32"} +{"current_steps": 606, "total_steps": 933, "loss": 1.4205820560455322, "lr": 3.320233954699985e-06, "epoch": 1.9485530546623795, "percentage": 64.95, "elapsed_time": "0:12:05", "remaining_time": "0:06:31"} +{"current_steps": 607, "total_steps": 933, "loss": 1.73175048828125, "lr": 3.302611688787612e-06, "epoch": 1.9517684887459807, "percentage": 65.06, "elapsed_time": "0:12:06", "remaining_time": "0:06:30"} +{"current_steps": 608, "total_steps": 933, "loss": 1.4618065357208252, "lr": 3.285013221755472e-06, "epoch": 1.954983922829582, "percentage": 65.17, "elapsed_time": "0:12:07", "remaining_time": "0:06:29"} +{"current_steps": 609, "total_steps": 933, "loss": 1.0909953117370605, "lr": 3.267438800349586e-06, "epoch": 1.9581993569131833, "percentage": 65.27, "elapsed_time": "0:12:08", "remaining_time": "0:06:27"} +{"current_steps": 610, "total_steps": 933, "loss": 1.2120417356491089, "lr": 3.2498886709788298e-06, "epoch": 1.9614147909967845, "percentage": 65.38, "elapsed_time": "0:12:09", "remaining_time": "0:06:26"} +{"current_steps": 611, "total_steps": 933, "loss": 1.560947060585022, "lr": 3.2323630797114892e-06, "epoch": 1.964630225080386, "percentage": 65.49, "elapsed_time": "0:12:11", "remaining_time": "0:06:25"} +{"current_steps": 612, "total_steps": 933, "loss": 1.1600837707519531, "lr": 3.214862272271799e-06, "epoch": 1.967845659163987, "percentage": 65.59, "elapsed_time": "0:12:12", "remaining_time": "0:06:23"} +{"current_steps": 613, "total_steps": 933, "loss": 1.0669816732406616, "lr": 3.1973864940365076e-06, "epoch": 1.9710610932475885, "percentage": 65.7, "elapsed_time": "0:12:13", "remaining_time": "0:06:22"} +{"current_steps": 614, "total_steps": 933, "loss": 1.236401081085205, "lr": 3.179935990031425e-06, "epoch": 1.9742765273311897, "percentage": 65.81, "elapsed_time": "0:12:14", "remaining_time": "0:06:21"} +{"current_steps": 615, "total_steps": 933, "loss": 0.8553851842880249, "lr": 3.162511004928003e-06, "epoch": 1.977491961414791, "percentage": 65.92, "elapsed_time": "0:12:15", "remaining_time": "0:06:20"} +{"current_steps": 616, "total_steps": 933, "loss": 1.6353504657745361, "lr": 3.1451117830398896e-06, "epoch": 1.9807073954983923, "percentage": 66.02, "elapsed_time": "0:12:16", "remaining_time": "0:06:18"} +{"current_steps": 617, "total_steps": 933, "loss": 1.8089317083358765, "lr": 3.1277385683195117e-06, "epoch": 1.9839228295819935, "percentage": 66.13, "elapsed_time": "0:12:17", "remaining_time": "0:06:17"} +{"current_steps": 618, "total_steps": 933, "loss": 1.1577980518341064, "lr": 3.110391604354652e-06, "epoch": 1.987138263665595, "percentage": 66.24, "elapsed_time": "0:12:18", "remaining_time": "0:06:16"} +{"current_steps": 619, "total_steps": 933, "loss": 2.4027674198150635, "lr": 3.093071134365037e-06, "epoch": 1.990353697749196, "percentage": 66.35, "elapsed_time": "0:12:19", "remaining_time": "0:06:15"} +{"current_steps": 620, "total_steps": 933, "loss": 1.3853836059570312, "lr": 3.075777401198922e-06, "epoch": 1.9935691318327975, "percentage": 66.45, "elapsed_time": "0:12:20", "remaining_time": "0:06:13"} +{"current_steps": 621, "total_steps": 933, "loss": 2.743666410446167, "lr": 3.058510647329688e-06, "epoch": 1.9967845659163987, "percentage": 66.56, "elapsed_time": "0:12:21", "remaining_time": "0:06:12"} +{"current_steps": 622, "total_steps": 933, "loss": 1.1549768447875977, "lr": 3.041271114852443e-06, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "0:12:22", "remaining_time": "0:06:11"} +{"current_steps": 623, "total_steps": 933, "loss": 1.0811271667480469, "lr": 3.02405904548063e-06, "epoch": 2.0032154340836015, "percentage": 66.77, "elapsed_time": "0:14:05", "remaining_time": "0:07:00"} +{"current_steps": 624, "total_steps": 933, "loss": 0.35157322883605957, "lr": 3.0068746805426318e-06, "epoch": 2.0064308681672025, "percentage": 66.88, "elapsed_time": "0:14:06", "remaining_time": "0:06:59"} +{"current_steps": 625, "total_steps": 933, "loss": 0.40260130167007446, "lr": 2.9897182609783905e-06, "epoch": 2.009646302250804, "percentage": 66.99, "elapsed_time": "0:14:07", "remaining_time": "0:06:57"} +{"current_steps": 626, "total_steps": 933, "loss": 0.870284378528595, "lr": 2.97259002733603e-06, "epoch": 2.012861736334405, "percentage": 67.1, "elapsed_time": "0:14:08", "remaining_time": "0:06:56"} +{"current_steps": 627, "total_steps": 933, "loss": 0.4470030963420868, "lr": 2.9554902197684843e-06, "epoch": 2.0160771704180065, "percentage": 67.2, "elapsed_time": "0:14:09", "remaining_time": "0:06:54"} +{"current_steps": 628, "total_steps": 933, "loss": 0.6769909858703613, "lr": 2.938419078030128e-06, "epoch": 2.0192926045016075, "percentage": 67.31, "elapsed_time": "0:14:11", "remaining_time": "0:06:53"} +{"current_steps": 629, "total_steps": 933, "loss": 0.5105010271072388, "lr": 2.9213768414734146e-06, "epoch": 2.022508038585209, "percentage": 67.42, "elapsed_time": "0:14:12", "remaining_time": "0:06:51"} +{"current_steps": 630, "total_steps": 933, "loss": 0.5568506121635437, "lr": 2.90436374904552e-06, "epoch": 2.0257234726688105, "percentage": 67.52, "elapsed_time": "0:14:13", "remaining_time": "0:06:50"} +{"current_steps": 631, "total_steps": 933, "loss": 0.21812975406646729, "lr": 2.887380039284999e-06, "epoch": 2.0289389067524115, "percentage": 67.63, "elapsed_time": "0:14:14", "remaining_time": "0:06:48"} +{"current_steps": 632, "total_steps": 933, "loss": 0.5240740776062012, "lr": 2.8704259503184306e-06, "epoch": 2.032154340836013, "percentage": 67.74, "elapsed_time": "0:14:15", "remaining_time": "0:06:47"} +{"current_steps": 633, "total_steps": 933, "loss": 0.3466935455799103, "lr": 2.853501719857086e-06, "epoch": 2.035369774919614, "percentage": 67.85, "elapsed_time": "0:14:16", "remaining_time": "0:06:45"} +{"current_steps": 634, "total_steps": 933, "loss": 0.6415536403656006, "lr": 2.8366075851935927e-06, "epoch": 2.0385852090032155, "percentage": 67.95, "elapsed_time": "0:14:17", "remaining_time": "0:06:44"} +{"current_steps": 635, "total_steps": 933, "loss": 0.4148802161216736, "lr": 2.8197437831986085e-06, "epoch": 2.0418006430868165, "percentage": 68.06, "elapsed_time": "0:14:18", "remaining_time": "0:06:42"} +{"current_steps": 636, "total_steps": 933, "loss": 0.7162201404571533, "lr": 2.802910550317506e-06, "epoch": 2.045016077170418, "percentage": 68.17, "elapsed_time": "0:14:19", "remaining_time": "0:06:41"} +{"current_steps": 637, "total_steps": 933, "loss": 0.5609467029571533, "lr": 2.786108122567044e-06, "epoch": 2.0482315112540195, "percentage": 68.27, "elapsed_time": "0:14:20", "remaining_time": "0:06:40"} +{"current_steps": 638, "total_steps": 933, "loss": 0.4859113395214081, "lr": 2.769336735532068e-06, "epoch": 2.0514469453376205, "percentage": 68.38, "elapsed_time": "0:14:21", "remaining_time": "0:06:38"} +{"current_steps": 639, "total_steps": 933, "loss": 0.35435792803764343, "lr": 2.7525966243622105e-06, "epoch": 2.054662379421222, "percentage": 68.49, "elapsed_time": "0:14:23", "remaining_time": "0:06:37"} +{"current_steps": 640, "total_steps": 933, "loss": 0.2768567204475403, "lr": 2.7358880237685844e-06, "epoch": 2.057877813504823, "percentage": 68.6, "elapsed_time": "0:14:24", "remaining_time": "0:06:35"} +{"current_steps": 641, "total_steps": 933, "loss": 0.24898266792297363, "lr": 2.7192111680204957e-06, "epoch": 2.0610932475884245, "percentage": 68.7, "elapsed_time": "0:14:25", "remaining_time": "0:06:34"} +{"current_steps": 642, "total_steps": 933, "loss": 0.7269777059555054, "lr": 2.7025662909421625e-06, "epoch": 2.0643086816720255, "percentage": 68.81, "elapsed_time": "0:14:26", "remaining_time": "0:06:32"} +{"current_steps": 643, "total_steps": 933, "loss": 0.6724145412445068, "lr": 2.685953625909432e-06, "epoch": 2.067524115755627, "percentage": 68.92, "elapsed_time": "0:14:27", "remaining_time": "0:06:31"} +{"current_steps": 644, "total_steps": 933, "loss": 0.5692760348320007, "lr": 2.6693734058465105e-06, "epoch": 2.0707395498392285, "percentage": 69.02, "elapsed_time": "0:14:28", "remaining_time": "0:06:29"} +{"current_steps": 645, "total_steps": 933, "loss": 0.5785061717033386, "lr": 2.652825863222698e-06, "epoch": 2.0739549839228295, "percentage": 69.13, "elapsed_time": "0:14:29", "remaining_time": "0:06:28"} +{"current_steps": 646, "total_steps": 933, "loss": 0.3663535416126251, "lr": 2.636311230049125e-06, "epoch": 2.077170418006431, "percentage": 69.24, "elapsed_time": "0:14:30", "remaining_time": "0:06:26"} +{"current_steps": 647, "total_steps": 933, "loss": 0.4806700050830841, "lr": 2.619829737875509e-06, "epoch": 2.080385852090032, "percentage": 69.35, "elapsed_time": "0:14:31", "remaining_time": "0:06:25"} +{"current_steps": 648, "total_steps": 933, "loss": 0.5164961814880371, "lr": 2.6033816177868954e-06, "epoch": 2.0836012861736335, "percentage": 69.45, "elapsed_time": "0:14:32", "remaining_time": "0:06:23"} +{"current_steps": 649, "total_steps": 933, "loss": 0.6894420385360718, "lr": 2.5869671004004256e-06, "epoch": 2.0868167202572345, "percentage": 69.56, "elapsed_time": "0:14:33", "remaining_time": "0:06:22"} +{"current_steps": 650, "total_steps": 933, "loss": 0.4011325538158417, "lr": 2.5705864158621008e-06, "epoch": 2.090032154340836, "percentage": 69.67, "elapsed_time": "0:14:35", "remaining_time": "0:06:20"} +{"current_steps": 651, "total_steps": 933, "loss": 0.745391845703125, "lr": 2.5542397938435574e-06, "epoch": 2.0932475884244375, "percentage": 69.77, "elapsed_time": "0:14:36", "remaining_time": "0:06:19"} +{"current_steps": 652, "total_steps": 933, "loss": 0.3058035969734192, "lr": 2.537927463538844e-06, "epoch": 2.0964630225080385, "percentage": 69.88, "elapsed_time": "0:14:37", "remaining_time": "0:06:18"} +{"current_steps": 653, "total_steps": 933, "loss": 0.33844730257987976, "lr": 2.521649653661209e-06, "epoch": 2.09967845659164, "percentage": 69.99, "elapsed_time": "0:14:38", "remaining_time": "0:06:16"} +{"current_steps": 654, "total_steps": 933, "loss": 0.3529083728790283, "lr": 2.5054065924398934e-06, "epoch": 2.102893890675241, "percentage": 70.1, "elapsed_time": "0:14:39", "remaining_time": "0:06:15"} +{"current_steps": 655, "total_steps": 933, "loss": 0.3711613714694977, "lr": 2.4891985076169356e-06, "epoch": 2.1061093247588425, "percentage": 70.2, "elapsed_time": "0:14:40", "remaining_time": "0:06:13"} +{"current_steps": 656, "total_steps": 933, "loss": 0.7479931712150574, "lr": 2.473025626443969e-06, "epoch": 2.1093247588424435, "percentage": 70.31, "elapsed_time": "0:14:41", "remaining_time": "0:06:12"} +{"current_steps": 657, "total_steps": 933, "loss": 1.7793715000152588, "lr": 2.4568881756790436e-06, "epoch": 2.112540192926045, "percentage": 70.42, "elapsed_time": "0:14:42", "remaining_time": "0:06:10"} +{"current_steps": 658, "total_steps": 933, "loss": 1.3851677179336548, "lr": 2.4407863815834414e-06, "epoch": 2.1157556270096465, "percentage": 70.53, "elapsed_time": "0:14:43", "remaining_time": "0:06:09"} +{"current_steps": 659, "total_steps": 933, "loss": 1.4731594324111938, "lr": 2.42472046991851e-06, "epoch": 2.1189710610932475, "percentage": 70.63, "elapsed_time": "0:14:44", "remaining_time": "0:06:07"} +{"current_steps": 660, "total_steps": 933, "loss": 0.5832240581512451, "lr": 2.4086906659424904e-06, "epoch": 2.122186495176849, "percentage": 70.74, "elapsed_time": "0:14:46", "remaining_time": "0:06:06"} +{"current_steps": 661, "total_steps": 933, "loss": 0.18587762117385864, "lr": 2.392697194407363e-06, "epoch": 2.12540192926045, "percentage": 70.85, "elapsed_time": "0:14:47", "remaining_time": "0:06:05"} +{"current_steps": 662, "total_steps": 933, "loss": 1.4967979192733765, "lr": 2.3767402795556953e-06, "epoch": 2.1286173633440515, "percentage": 70.95, "elapsed_time": "0:14:48", "remaining_time": "0:06:03"} +{"current_steps": 663, "total_steps": 933, "loss": 0.6785554885864258, "lr": 2.3608201451175004e-06, "epoch": 2.1318327974276525, "percentage": 71.06, "elapsed_time": "0:14:49", "remaining_time": "0:06:02"} +{"current_steps": 664, "total_steps": 933, "loss": 0.3392030596733093, "lr": 2.3449370143070948e-06, "epoch": 2.135048231511254, "percentage": 71.17, "elapsed_time": "0:14:50", "remaining_time": "0:06:00"} +{"current_steps": 665, "total_steps": 933, "loss": 1.3503499031066895, "lr": 2.329091109819972e-06, "epoch": 2.1382636655948555, "percentage": 71.28, "elapsed_time": "0:14:51", "remaining_time": "0:05:59"} +{"current_steps": 666, "total_steps": 933, "loss": 0.4895798861980438, "lr": 2.313282653829679e-06, "epoch": 2.1414790996784565, "percentage": 71.38, "elapsed_time": "0:14:52", "remaining_time": "0:05:57"} +{"current_steps": 667, "total_steps": 933, "loss": 0.5282753109931946, "lr": 2.297511867984703e-06, "epoch": 2.144694533762058, "percentage": 71.49, "elapsed_time": "0:14:53", "remaining_time": "0:05:56"} +{"current_steps": 668, "total_steps": 933, "loss": 0.44572100043296814, "lr": 2.2817789734053626e-06, "epoch": 2.147909967845659, "percentage": 71.6, "elapsed_time": "0:14:54", "remaining_time": "0:05:54"} +{"current_steps": 669, "total_steps": 933, "loss": 0.3825170397758484, "lr": 2.266084190680707e-06, "epoch": 2.1511254019292605, "percentage": 71.7, "elapsed_time": "0:14:55", "remaining_time": "0:05:53"} +{"current_steps": 670, "total_steps": 933, "loss": 0.8217453360557556, "lr": 2.250427739865421e-06, "epoch": 2.154340836012862, "percentage": 71.81, "elapsed_time": "0:14:56", "remaining_time": "0:05:52"} +{"current_steps": 671, "total_steps": 933, "loss": 0.32945936918258667, "lr": 2.23480984047675e-06, "epoch": 2.157556270096463, "percentage": 71.92, "elapsed_time": "0:14:58", "remaining_time": "0:05:50"} +{"current_steps": 672, "total_steps": 933, "loss": 0.3554953634738922, "lr": 2.219230711491406e-06, "epoch": 2.1607717041800645, "percentage": 72.03, "elapsed_time": "0:14:59", "remaining_time": "0:05:49"} +{"current_steps": 673, "total_steps": 933, "loss": 0.7275658845901489, "lr": 2.2036905713425104e-06, "epoch": 2.1639871382636655, "percentage": 72.13, "elapsed_time": "0:15:00", "remaining_time": "0:05:47"} +{"current_steps": 674, "total_steps": 933, "loss": 0.5208015441894531, "lr": 2.1881896379165253e-06, "epoch": 2.167202572347267, "percentage": 72.24, "elapsed_time": "0:15:01", "remaining_time": "0:05:46"} +{"current_steps": 675, "total_steps": 933, "loss": 0.8410961627960205, "lr": 2.172728128550199e-06, "epoch": 2.170418006430868, "percentage": 72.35, "elapsed_time": "0:15:02", "remaining_time": "0:05:44"} +{"current_steps": 676, "total_steps": 933, "loss": 0.9196676015853882, "lr": 2.1573062600275217e-06, "epoch": 2.1736334405144695, "percentage": 72.45, "elapsed_time": "0:15:03", "remaining_time": "0:05:43"} +{"current_steps": 677, "total_steps": 933, "loss": 0.4561595022678375, "lr": 2.1419242485766834e-06, "epoch": 2.176848874598071, "percentage": 72.56, "elapsed_time": "0:15:04", "remaining_time": "0:05:42"} +{"current_steps": 678, "total_steps": 933, "loss": 0.2470824271440506, "lr": 2.126582309867041e-06, "epoch": 2.180064308681672, "percentage": 72.67, "elapsed_time": "0:15:05", "remaining_time": "0:05:40"} +{"current_steps": 679, "total_steps": 933, "loss": 0.366765558719635, "lr": 2.1112806590061006e-06, "epoch": 2.1832797427652735, "percentage": 72.78, "elapsed_time": "0:15:06", "remaining_time": "0:05:39"} +{"current_steps": 680, "total_steps": 933, "loss": 0.11232413351535797, "lr": 2.0960195105364935e-06, "epoch": 2.1864951768488745, "percentage": 72.88, "elapsed_time": "0:15:07", "remaining_time": "0:05:37"} +{"current_steps": 681, "total_steps": 933, "loss": 0.5797847509384155, "lr": 2.080799078432972e-06, "epoch": 2.189710610932476, "percentage": 72.99, "elapsed_time": "0:15:08", "remaining_time": "0:05:36"} +{"current_steps": 682, "total_steps": 933, "loss": 0.3955646753311157, "lr": 2.0656195760994104e-06, "epoch": 2.192926045016077, "percentage": 73.1, "elapsed_time": "0:15:09", "remaining_time": "0:05:34"} +{"current_steps": 683, "total_steps": 933, "loss": 0.27037519216537476, "lr": 2.0504812163658104e-06, "epoch": 2.1961414790996785, "percentage": 73.2, "elapsed_time": "0:15:11", "remaining_time": "0:05:33"} +{"current_steps": 684, "total_steps": 933, "loss": 1.0738942623138428, "lr": 2.0353842114853194e-06, "epoch": 2.19935691318328, "percentage": 73.31, "elapsed_time": "0:15:12", "remaining_time": "0:05:32"} +{"current_steps": 685, "total_steps": 933, "loss": 0.48105546832084656, "lr": 2.020328773131252e-06, "epoch": 2.202572347266881, "percentage": 73.42, "elapsed_time": "0:15:13", "remaining_time": "0:05:30"} +{"current_steps": 686, "total_steps": 933, "loss": 0.37764203548431396, "lr": 2.005315112394122e-06, "epoch": 2.2057877813504825, "percentage": 73.53, "elapsed_time": "0:15:14", "remaining_time": "0:05:29"} +{"current_steps": 687, "total_steps": 933, "loss": 1.3033102750778198, "lr": 1.990343439778691e-06, "epoch": 2.2090032154340835, "percentage": 73.63, "elapsed_time": "0:15:15", "remaining_time": "0:05:27"} +{"current_steps": 688, "total_steps": 933, "loss": 0.3128071427345276, "lr": 1.9754139652010025e-06, "epoch": 2.212218649517685, "percentage": 73.74, "elapsed_time": "0:15:16", "remaining_time": "0:05:26"} +{"current_steps": 689, "total_steps": 933, "loss": 0.25044938921928406, "lr": 1.9605268979854493e-06, "epoch": 2.215434083601286, "percentage": 73.85, "elapsed_time": "0:15:17", "remaining_time": "0:05:24"} +{"current_steps": 690, "total_steps": 933, "loss": 0.7681007981300354, "lr": 1.9456824468618365e-06, "epoch": 2.2186495176848875, "percentage": 73.95, "elapsed_time": "0:15:18", "remaining_time": "0:05:23"} +{"current_steps": 691, "total_steps": 933, "loss": 0.8675619959831238, "lr": 1.9308808199624518e-06, "epoch": 2.221864951768489, "percentage": 74.06, "elapsed_time": "0:15:19", "remaining_time": "0:05:22"} +{"current_steps": 692, "total_steps": 933, "loss": 0.3523237109184265, "lr": 1.9161222248191515e-06, "epoch": 2.22508038585209, "percentage": 74.17, "elapsed_time": "0:15:20", "remaining_time": "0:05:20"} +{"current_steps": 693, "total_steps": 933, "loss": 0.5673654079437256, "lr": 1.9014068683604475e-06, "epoch": 2.2282958199356915, "percentage": 74.28, "elapsed_time": "0:15:22", "remaining_time": "0:05:19"} +{"current_steps": 694, "total_steps": 933, "loss": 0.4956747889518738, "lr": 1.8867349569086064e-06, "epoch": 2.2315112540192925, "percentage": 74.38, "elapsed_time": "0:15:23", "remaining_time": "0:05:17"} +{"current_steps": 695, "total_steps": 933, "loss": 1.0982662439346313, "lr": 1.8721066961767626e-06, "epoch": 2.234726688102894, "percentage": 74.49, "elapsed_time": "0:15:24", "remaining_time": "0:05:16"} +{"current_steps": 696, "total_steps": 933, "loss": 0.7824825644493103, "lr": 1.8575222912660224e-06, "epoch": 2.237942122186495, "percentage": 74.6, "elapsed_time": "0:15:25", "remaining_time": "0:05:15"} +{"current_steps": 697, "total_steps": 933, "loss": 0.9699738025665283, "lr": 1.8429819466625993e-06, "epoch": 2.2411575562700965, "percentage": 74.71, "elapsed_time": "0:15:26", "remaining_time": "0:05:13"} +{"current_steps": 698, "total_steps": 933, "loss": 0.2911728620529175, "lr": 1.8284858662349391e-06, "epoch": 2.244372990353698, "percentage": 74.81, "elapsed_time": "0:15:27", "remaining_time": "0:05:12"} +{"current_steps": 699, "total_steps": 933, "loss": 0.21695515513420105, "lr": 1.8140342532308675e-06, "epoch": 2.247588424437299, "percentage": 74.92, "elapsed_time": "0:15:28", "remaining_time": "0:05:10"} +{"current_steps": 700, "total_steps": 933, "loss": 0.5445791482925415, "lr": 1.7996273102747363e-06, "epoch": 2.2508038585209005, "percentage": 75.03, "elapsed_time": "0:15:29", "remaining_time": "0:05:09"} +{"current_steps": 701, "total_steps": 933, "loss": 0.5194580554962158, "lr": 1.7852652393645842e-06, "epoch": 2.2540192926045015, "percentage": 75.13, "elapsed_time": "0:15:30", "remaining_time": "0:05:08"} +{"current_steps": 702, "total_steps": 933, "loss": 0.23896172642707825, "lr": 1.7709482418693036e-06, "epoch": 2.257234726688103, "percentage": 75.24, "elapsed_time": "0:15:31", "remaining_time": "0:05:06"} +{"current_steps": 703, "total_steps": 933, "loss": 0.35497206449508667, "lr": 1.7566765185258205e-06, "epoch": 2.260450160771704, "percentage": 75.35, "elapsed_time": "0:15:32", "remaining_time": "0:05:05"} +{"current_steps": 704, "total_steps": 933, "loss": 0.5100143551826477, "lr": 1.7424502694362755e-06, "epoch": 2.2636655948553055, "percentage": 75.46, "elapsed_time": "0:15:33", "remaining_time": "0:05:03"} +{"current_steps": 705, "total_steps": 933, "loss": 0.3519476652145386, "lr": 1.7282696940652188e-06, "epoch": 2.266881028938907, "percentage": 75.56, "elapsed_time": "0:15:35", "remaining_time": "0:05:02"} +{"current_steps": 706, "total_steps": 933, "loss": 0.317427396774292, "lr": 1.714134991236817e-06, "epoch": 2.270096463022508, "percentage": 75.67, "elapsed_time": "0:15:36", "remaining_time": "0:05:01"} +{"current_steps": 707, "total_steps": 933, "loss": 0.2504899799823761, "lr": 1.7000463591320621e-06, "epoch": 2.2733118971061095, "percentage": 75.78, "elapsed_time": "0:15:37", "remaining_time": "0:04:59"} +{"current_steps": 708, "total_steps": 933, "loss": 0.6729331612586975, "lr": 1.6860039952859941e-06, "epoch": 2.2765273311897105, "percentage": 75.88, "elapsed_time": "0:15:38", "remaining_time": "0:04:58"} +{"current_steps": 709, "total_steps": 933, "loss": 0.2858375310897827, "lr": 1.672008096584931e-06, "epoch": 2.279742765273312, "percentage": 75.99, "elapsed_time": "0:15:39", "remaining_time": "0:04:56"} +{"current_steps": 710, "total_steps": 933, "loss": 0.2584810256958008, "lr": 1.658058859263708e-06, "epoch": 2.282958199356913, "percentage": 76.1, "elapsed_time": "0:15:40", "remaining_time": "0:04:55"} +{"current_steps": 711, "total_steps": 933, "loss": 0.6542217135429382, "lr": 1.64415647890293e-06, "epoch": 2.2861736334405145, "percentage": 76.21, "elapsed_time": "0:15:41", "remaining_time": "0:04:53"} +{"current_steps": 712, "total_steps": 933, "loss": 0.7301985025405884, "lr": 1.6303011504262223e-06, "epoch": 2.289389067524116, "percentage": 76.31, "elapsed_time": "0:15:42", "remaining_time": "0:04:52"} +{"current_steps": 713, "total_steps": 933, "loss": 0.5628085136413574, "lr": 1.6164930680975021e-06, "epoch": 2.292604501607717, "percentage": 76.42, "elapsed_time": "0:15:43", "remaining_time": "0:04:51"} +{"current_steps": 714, "total_steps": 933, "loss": 0.6142044067382812, "lr": 1.6027324255182547e-06, "epoch": 2.2958199356913185, "percentage": 76.53, "elapsed_time": "0:15:44", "remaining_time": "0:04:49"} +{"current_steps": 715, "total_steps": 933, "loss": 0.1730177402496338, "lr": 1.5890194156248178e-06, "epoch": 2.2990353697749195, "percentage": 76.63, "elapsed_time": "0:15:45", "remaining_time": "0:04:48"} +{"current_steps": 716, "total_steps": 933, "loss": 0.44994986057281494, "lr": 1.5753542306856774e-06, "epoch": 2.302250803858521, "percentage": 76.74, "elapsed_time": "0:15:46", "remaining_time": "0:04:46"} +{"current_steps": 717, "total_steps": 933, "loss": 0.4153675436973572, "lr": 1.5617370622987703e-06, "epoch": 2.305466237942122, "percentage": 76.85, "elapsed_time": "0:15:48", "remaining_time": "0:04:45"} +{"current_steps": 718, "total_steps": 933, "loss": 0.7933076620101929, "lr": 1.548168101388799e-06, "epoch": 2.3086816720257235, "percentage": 76.96, "elapsed_time": "0:15:49", "remaining_time": "0:04:44"} +{"current_steps": 719, "total_steps": 933, "loss": 0.2861023545265198, "lr": 1.5346475382045578e-06, "epoch": 2.311897106109325, "percentage": 77.06, "elapsed_time": "0:15:50", "remaining_time": "0:04:42"} +{"current_steps": 720, "total_steps": 933, "loss": 0.21383363008499146, "lr": 1.5211755623162588e-06, "epoch": 2.315112540192926, "percentage": 77.17, "elapsed_time": "0:15:51", "remaining_time": "0:04:41"} +{"current_steps": 721, "total_steps": 933, "loss": 0.34175100922584534, "lr": 1.507752362612878e-06, "epoch": 2.3183279742765275, "percentage": 77.28, "elapsed_time": "0:15:52", "remaining_time": "0:04:40"} +{"current_steps": 722, "total_steps": 933, "loss": 0.6520302295684814, "lr": 1.4943781272995073e-06, "epoch": 2.3215434083601285, "percentage": 77.38, "elapsed_time": "0:15:53", "remaining_time": "0:04:38"} +{"current_steps": 723, "total_steps": 933, "loss": 0.47313952445983887, "lr": 1.481053043894713e-06, "epoch": 2.32475884244373, "percentage": 77.49, "elapsed_time": "0:15:54", "remaining_time": "0:04:37"} +{"current_steps": 724, "total_steps": 933, "loss": 0.43662163615226746, "lr": 1.467777299227911e-06, "epoch": 2.327974276527331, "percentage": 77.6, "elapsed_time": "0:15:55", "remaining_time": "0:04:35"} +{"current_steps": 725, "total_steps": 933, "loss": 0.8428820371627808, "lr": 1.4545510794367413e-06, "epoch": 2.3311897106109325, "percentage": 77.71, "elapsed_time": "0:15:56", "remaining_time": "0:04:34"} +{"current_steps": 726, "total_steps": 933, "loss": 0.3453049063682556, "lr": 1.4413745699644633e-06, "epoch": 2.334405144694534, "percentage": 77.81, "elapsed_time": "0:15:57", "remaining_time": "0:04:33"} +{"current_steps": 727, "total_steps": 933, "loss": 0.2745356261730194, "lr": 1.4282479555573559e-06, "epoch": 2.337620578778135, "percentage": 77.92, "elapsed_time": "0:15:58", "remaining_time": "0:04:31"} +{"current_steps": 728, "total_steps": 933, "loss": 0.5829728245735168, "lr": 1.4151714202621214e-06, "epoch": 2.3408360128617365, "percentage": 78.03, "elapsed_time": "0:15:59", "remaining_time": "0:04:30"} +{"current_steps": 729, "total_steps": 933, "loss": 0.599088191986084, "lr": 1.4021451474233111e-06, "epoch": 2.3440514469453375, "percentage": 78.14, "elapsed_time": "0:16:00", "remaining_time": "0:04:28"} +{"current_steps": 730, "total_steps": 933, "loss": 0.6071078777313232, "lr": 1.389169319680752e-06, "epoch": 2.347266881028939, "percentage": 78.24, "elapsed_time": "0:16:02", "remaining_time": "0:04:27"} +{"current_steps": 731, "total_steps": 933, "loss": 0.7940813302993774, "lr": 1.3762441189669855e-06, "epoch": 2.35048231511254, "percentage": 78.35, "elapsed_time": "0:16:03", "remaining_time": "0:04:26"} +{"current_steps": 732, "total_steps": 933, "loss": 0.6212410926818848, "lr": 1.363369726504719e-06, "epoch": 2.3536977491961415, "percentage": 78.46, "elapsed_time": "0:16:04", "remaining_time": "0:04:24"} +{"current_steps": 733, "total_steps": 933, "loss": 0.7771339416503906, "lr": 1.3505463228042814e-06, "epoch": 2.356913183279743, "percentage": 78.56, "elapsed_time": "0:16:05", "remaining_time": "0:04:23"} +{"current_steps": 734, "total_steps": 933, "loss": 0.3336787819862366, "lr": 1.337774087661095e-06, "epoch": 2.360128617363344, "percentage": 78.67, "elapsed_time": "0:16:06", "remaining_time": "0:04:22"} +{"current_steps": 735, "total_steps": 933, "loss": 0.3283434510231018, "lr": 1.3250532001531568e-06, "epoch": 2.3633440514469455, "percentage": 78.78, "elapsed_time": "0:16:07", "remaining_time": "0:04:20"} +{"current_steps": 736, "total_steps": 933, "loss": 0.747290849685669, "lr": 1.31238383863852e-06, "epoch": 2.3665594855305465, "percentage": 78.89, "elapsed_time": "0:16:08", "remaining_time": "0:04:19"} +{"current_steps": 737, "total_steps": 933, "loss": 0.4145023226737976, "lr": 1.2997661807528011e-06, "epoch": 2.369774919614148, "percentage": 78.99, "elapsed_time": "0:16:09", "remaining_time": "0:04:17"} +{"current_steps": 738, "total_steps": 933, "loss": 0.6085692644119263, "lr": 1.2872004034066843e-06, "epoch": 2.372990353697749, "percentage": 79.1, "elapsed_time": "0:16:10", "remaining_time": "0:04:16"} +{"current_steps": 739, "total_steps": 933, "loss": 0.35022085905075073, "lr": 1.2746866827834443e-06, "epoch": 2.3762057877813505, "percentage": 79.21, "elapsed_time": "0:16:11", "remaining_time": "0:04:15"} +{"current_steps": 740, "total_steps": 933, "loss": 0.3418879210948944, "lr": 1.2622251943364733e-06, "epoch": 2.379421221864952, "percentage": 79.31, "elapsed_time": "0:16:12", "remaining_time": "0:04:13"} +{"current_steps": 741, "total_steps": 933, "loss": 0.48997265100479126, "lr": 1.2498161127868236e-06, "epoch": 2.382636655948553, "percentage": 79.42, "elapsed_time": "0:16:13", "remaining_time": "0:04:12"} +{"current_steps": 742, "total_steps": 933, "loss": 0.5536065697669983, "lr": 1.237459612120755e-06, "epoch": 2.3858520900321545, "percentage": 79.53, "elapsed_time": "0:16:14", "remaining_time": "0:04:10"} +{"current_steps": 743, "total_steps": 933, "loss": 0.4042523205280304, "lr": 1.2251558655873003e-06, "epoch": 2.3890675241157555, "percentage": 79.64, "elapsed_time": "0:16:16", "remaining_time": "0:04:09"} +{"current_steps": 744, "total_steps": 933, "loss": 0.3217351734638214, "lr": 1.2129050456958296e-06, "epoch": 2.392282958199357, "percentage": 79.74, "elapsed_time": "0:16:17", "remaining_time": "0:04:08"} +{"current_steps": 745, "total_steps": 933, "loss": 0.7380541563034058, "lr": 1.2007073242136358e-06, "epoch": 2.395498392282958, "percentage": 79.85, "elapsed_time": "0:16:18", "remaining_time": "0:04:06"} +{"current_steps": 746, "total_steps": 933, "loss": 1.3607124090194702, "lr": 1.1885628721635256e-06, "epoch": 2.3987138263665595, "percentage": 79.96, "elapsed_time": "0:16:19", "remaining_time": "0:04:05"} +{"current_steps": 747, "total_steps": 933, "loss": 0.5945987701416016, "lr": 1.176471859821421e-06, "epoch": 2.401929260450161, "percentage": 80.06, "elapsed_time": "0:16:20", "remaining_time": "0:04:04"} +{"current_steps": 748, "total_steps": 933, "loss": 0.44915276765823364, "lr": 1.1644344567139716e-06, "epoch": 2.405144694533762, "percentage": 80.17, "elapsed_time": "0:16:21", "remaining_time": "0:04:02"} +{"current_steps": 749, "total_steps": 933, "loss": 0.38859468698501587, "lr": 1.1524508316161799e-06, "epoch": 2.4083601286173635, "percentage": 80.28, "elapsed_time": "0:16:22", "remaining_time": "0:04:01"} +{"current_steps": 750, "total_steps": 933, "loss": 0.20568466186523438, "lr": 1.1405211525490307e-06, "epoch": 2.4115755627009645, "percentage": 80.39, "elapsed_time": "0:16:23", "remaining_time": "0:04:00"} +{"current_steps": 751, "total_steps": 933, "loss": 0.6367174983024597, "lr": 1.1286455867771422e-06, "epoch": 2.414790996784566, "percentage": 80.49, "elapsed_time": "0:16:24", "remaining_time": "0:03:58"} +{"current_steps": 752, "total_steps": 933, "loss": 0.30549776554107666, "lr": 1.1168243008064123e-06, "epoch": 2.418006430868167, "percentage": 80.6, "elapsed_time": "0:16:25", "remaining_time": "0:03:57"} +{"current_steps": 753, "total_steps": 933, "loss": 0.37557756900787354, "lr": 1.1050574603816905e-06, "epoch": 2.4212218649517685, "percentage": 80.71, "elapsed_time": "0:16:26", "remaining_time": "0:03:55"} +{"current_steps": 754, "total_steps": 933, "loss": 0.5297196507453918, "lr": 1.0933452304844505e-06, "epoch": 2.42443729903537, "percentage": 80.81, "elapsed_time": "0:16:27", "remaining_time": "0:03:54"} +{"current_steps": 755, "total_steps": 933, "loss": 0.274686723947525, "lr": 1.0816877753304777e-06, "epoch": 2.427652733118971, "percentage": 80.92, "elapsed_time": "0:16:29", "remaining_time": "0:03:53"} +{"current_steps": 756, "total_steps": 933, "loss": 0.33781081438064575, "lr": 1.0700852583675708e-06, "epoch": 2.4308681672025725, "percentage": 81.03, "elapsed_time": "0:16:30", "remaining_time": "0:03:51"} +{"current_steps": 757, "total_steps": 933, "loss": 0.3239028751850128, "lr": 1.0585378422732435e-06, "epoch": 2.4340836012861735, "percentage": 81.14, "elapsed_time": "0:16:31", "remaining_time": "0:03:50"} +{"current_steps": 758, "total_steps": 933, "loss": 0.34594273567199707, "lr": 1.0470456889524473e-06, "epoch": 2.437299035369775, "percentage": 81.24, "elapsed_time": "0:16:32", "remaining_time": "0:03:49"} +{"current_steps": 759, "total_steps": 933, "loss": 0.21343210339546204, "lr": 1.0356089595353008e-06, "epoch": 2.440514469453376, "percentage": 81.35, "elapsed_time": "0:16:33", "remaining_time": "0:03:47"} +{"current_steps": 760, "total_steps": 933, "loss": 0.49799197912216187, "lr": 1.0242278143748307e-06, "epoch": 2.4437299035369775, "percentage": 81.46, "elapsed_time": "0:16:34", "remaining_time": "0:03:46"} +{"current_steps": 761, "total_steps": 933, "loss": 0.3868298828601837, "lr": 1.012902413044725e-06, "epoch": 2.446945337620579, "percentage": 81.56, "elapsed_time": "0:16:35", "remaining_time": "0:03:45"} +{"current_steps": 762, "total_steps": 933, "loss": 0.264824241399765, "lr": 1.0016329143370929e-06, "epoch": 2.45016077170418, "percentage": 81.67, "elapsed_time": "0:16:36", "remaining_time": "0:03:43"} +{"current_steps": 763, "total_steps": 933, "loss": 0.3985700011253357, "lr": 9.904194762602382e-07, "epoch": 2.4533762057877815, "percentage": 81.78, "elapsed_time": "0:16:37", "remaining_time": "0:03:42"} +{"current_steps": 764, "total_steps": 933, "loss": 0.17972898483276367, "lr": 9.792622560364467e-07, "epoch": 2.4565916398713825, "percentage": 81.89, "elapsed_time": "0:16:38", "remaining_time": "0:03:40"} +{"current_steps": 765, "total_steps": 933, "loss": 0.3820546865463257, "lr": 9.681614100997806e-07, "epoch": 2.459807073954984, "percentage": 81.99, "elapsed_time": "0:16:39", "remaining_time": "0:03:39"} +{"current_steps": 766, "total_steps": 933, "loss": 0.13588829338550568, "lr": 9.57117094093884e-07, "epoch": 2.463022508038585, "percentage": 82.1, "elapsed_time": "0:16:40", "remaining_time": "0:03:38"} +{"current_steps": 767, "total_steps": 933, "loss": 0.2940727472305298, "lr": 9.46129462869802e-07, "epoch": 2.4662379421221865, "percentage": 82.21, "elapsed_time": "0:16:41", "remaining_time": "0:03:36"} +{"current_steps": 768, "total_steps": 933, "loss": 0.6208051443099976, "lr": 9.351986704838084e-07, "epoch": 2.469453376205788, "percentage": 82.32, "elapsed_time": "0:16:43", "remaining_time": "0:03:35"} +{"current_steps": 769, "total_steps": 933, "loss": 0.9009281396865845, "lr": 9.243248701952489e-07, "epoch": 2.472668810289389, "percentage": 82.42, "elapsed_time": "0:16:44", "remaining_time": "0:03:34"} +{"current_steps": 770, "total_steps": 933, "loss": 0.3825221061706543, "lr": 9.135082144643869e-07, "epoch": 2.4758842443729905, "percentage": 82.53, "elapsed_time": "0:16:45", "remaining_time": "0:03:32"} +{"current_steps": 771, "total_steps": 933, "loss": 0.4601898789405823, "lr": 9.0274885495027e-07, "epoch": 2.4790996784565915, "percentage": 82.64, "elapsed_time": "0:16:46", "remaining_time": "0:03:31"} +{"current_steps": 772, "total_steps": 933, "loss": 0.31752362847328186, "lr": 8.92046942508602e-07, "epoch": 2.482315112540193, "percentage": 82.74, "elapsed_time": "0:16:47", "remaining_time": "0:03:30"} +{"current_steps": 773, "total_steps": 933, "loss": 0.9294736981391907, "lr": 8.814026271896275e-07, "epoch": 2.485530546623794, "percentage": 82.85, "elapsed_time": "0:16:48", "remaining_time": "0:03:28"} +{"current_steps": 774, "total_steps": 933, "loss": 0.324219286441803, "lr": 8.708160582360303e-07, "epoch": 2.4887459807073955, "percentage": 82.96, "elapsed_time": "0:16:49", "remaining_time": "0:03:27"} +{"current_steps": 775, "total_steps": 933, "loss": 0.9777847528457642, "lr": 8.602873840808379e-07, "epoch": 2.491961414790997, "percentage": 83.07, "elapsed_time": "0:16:50", "remaining_time": "0:03:26"} +{"current_steps": 776, "total_steps": 933, "loss": 0.5360197424888611, "lr": 8.498167523453404e-07, "epoch": 2.495176848874598, "percentage": 83.17, "elapsed_time": "0:16:51", "remaining_time": "0:03:24"} +{"current_steps": 777, "total_steps": 933, "loss": 0.31830108165740967, "lr": 8.394043098370275e-07, "epoch": 2.4983922829581995, "percentage": 83.28, "elapsed_time": "0:16:52", "remaining_time": "0:03:23"} +{"current_steps": 778, "total_steps": 933, "loss": 0.9196930527687073, "lr": 8.290502025475183e-07, "epoch": 2.5016077170418005, "percentage": 83.39, "elapsed_time": "0:16:53", "remaining_time": "0:03:21"} +{"current_steps": 779, "total_steps": 933, "loss": 0.5551115870475769, "lr": 8.187545756505244e-07, "epoch": 2.504823151125402, "percentage": 83.49, "elapsed_time": "0:16:54", "remaining_time": "0:03:20"} +{"current_steps": 780, "total_steps": 933, "loss": 0.6043642163276672, "lr": 8.085175734998091e-07, "epoch": 2.508038585209003, "percentage": 83.6, "elapsed_time": "0:16:56", "remaining_time": "0:03:19"} +{"current_steps": 781, "total_steps": 933, "loss": 0.22657924890518188, "lr": 7.98339339627166e-07, "epoch": 2.5112540192926045, "percentage": 83.71, "elapsed_time": "0:16:57", "remaining_time": "0:03:17"} +{"current_steps": 782, "total_steps": 933, "loss": 0.259809672832489, "lr": 7.882200167404047e-07, "epoch": 2.514469453376206, "percentage": 83.82, "elapsed_time": "0:16:58", "remaining_time": "0:03:16"} +{"current_steps": 783, "total_steps": 933, "loss": 0.4243828058242798, "lr": 7.781597467213514e-07, "epoch": 2.517684887459807, "percentage": 83.92, "elapsed_time": "0:16:59", "remaining_time": "0:03:15"} +{"current_steps": 784, "total_steps": 933, "loss": 0.5216907858848572, "lr": 7.681586706238586e-07, "epoch": 2.5209003215434085, "percentage": 84.03, "elapsed_time": "0:17:00", "remaining_time": "0:03:13"} +{"current_steps": 785, "total_steps": 933, "loss": 0.260081946849823, "lr": 7.582169286718305e-07, "epoch": 2.5241157556270095, "percentage": 84.14, "elapsed_time": "0:17:01", "remaining_time": "0:03:12"} +{"current_steps": 786, "total_steps": 933, "loss": 0.3229532837867737, "lr": 7.483346602572505e-07, "epoch": 2.527331189710611, "percentage": 84.24, "elapsed_time": "0:17:02", "remaining_time": "0:03:11"} +{"current_steps": 787, "total_steps": 933, "loss": 1.6990151405334473, "lr": 7.385120039382326e-07, "epoch": 2.530546623794212, "percentage": 84.35, "elapsed_time": "0:17:03", "remaining_time": "0:03:09"} +{"current_steps": 788, "total_steps": 933, "loss": 0.46230068802833557, "lr": 7.287490974370759e-07, "epoch": 2.5337620578778135, "percentage": 84.46, "elapsed_time": "0:17:04", "remaining_time": "0:03:08"} +{"current_steps": 789, "total_steps": 933, "loss": 0.1410602331161499, "lr": 7.190460776383351e-07, "epoch": 2.536977491961415, "percentage": 84.57, "elapsed_time": "0:17:05", "remaining_time": "0:03:07"} +{"current_steps": 790, "total_steps": 933, "loss": 0.1462668925523758, "lr": 7.094030805869001e-07, "epoch": 2.540192926045016, "percentage": 84.67, "elapsed_time": "0:17:06", "remaining_time": "0:03:05"} +{"current_steps": 791, "total_steps": 933, "loss": 0.4931030869483948, "lr": 6.998202414860894e-07, "epoch": 2.5434083601286175, "percentage": 84.78, "elapsed_time": "0:17:07", "remaining_time": "0:03:04"} +{"current_steps": 792, "total_steps": 933, "loss": 0.4790411591529846, "lr": 6.902976946957518e-07, "epoch": 2.5466237942122185, "percentage": 84.89, "elapsed_time": "0:17:09", "remaining_time": "0:03:03"} +{"current_steps": 793, "total_steps": 933, "loss": 0.25933071970939636, "lr": 6.808355737303895e-07, "epoch": 2.54983922829582, "percentage": 84.99, "elapsed_time": "0:17:10", "remaining_time": "0:03:01"} +{"current_steps": 794, "total_steps": 933, "loss": 0.7769885063171387, "lr": 6.71434011257277e-07, "epoch": 2.553054662379421, "percentage": 85.1, "elapsed_time": "0:17:11", "remaining_time": "0:03:00"} +{"current_steps": 795, "total_steps": 933, "loss": 0.9100818634033203, "lr": 6.620931390946078e-07, "epoch": 2.5562700964630225, "percentage": 85.21, "elapsed_time": "0:17:12", "remaining_time": "0:02:59"} +{"current_steps": 796, "total_steps": 933, "loss": 0.31192898750305176, "lr": 6.528130882096418e-07, "epoch": 2.559485530546624, "percentage": 85.32, "elapsed_time": "0:17:13", "remaining_time": "0:02:57"} +{"current_steps": 797, "total_steps": 933, "loss": 0.29440587759017944, "lr": 6.435939887168718e-07, "epoch": 2.562700964630225, "percentage": 85.42, "elapsed_time": "0:17:14", "remaining_time": "0:02:56"} +{"current_steps": 798, "total_steps": 933, "loss": 0.3736717402935028, "lr": 6.344359698761998e-07, "epoch": 2.5659163987138265, "percentage": 85.53, "elapsed_time": "0:17:15", "remaining_time": "0:02:55"} +{"current_steps": 799, "total_steps": 933, "loss": 0.6370671391487122, "lr": 6.253391600911213e-07, "epoch": 2.5691318327974275, "percentage": 85.64, "elapsed_time": "0:17:16", "remaining_time": "0:02:53"} +{"current_steps": 800, "total_steps": 933, "loss": 1.8662174940109253, "lr": 6.163036869069267e-07, "epoch": 2.572347266881029, "percentage": 85.74, "elapsed_time": "0:17:17", "remaining_time": "0:02:52"} +{"current_steps": 801, "total_steps": 933, "loss": 0.4110758900642395, "lr": 6.073296770089159e-07, "epoch": 2.57556270096463, "percentage": 85.85, "elapsed_time": "0:17:18", "remaining_time": "0:02:51"} +{"current_steps": 802, "total_steps": 933, "loss": 0.26106947660446167, "lr": 5.984172562206164e-07, "epoch": 2.5787781350482315, "percentage": 85.96, "elapsed_time": "0:17:19", "remaining_time": "0:02:49"} +{"current_steps": 803, "total_steps": 933, "loss": 0.25730016827583313, "lr": 5.895665495020242e-07, "epoch": 2.581993569131833, "percentage": 86.07, "elapsed_time": "0:17:20", "remaining_time": "0:02:48"} +{"current_steps": 804, "total_steps": 933, "loss": 0.3436719477176666, "lr": 5.807776809478472e-07, "epoch": 2.585209003215434, "percentage": 86.17, "elapsed_time": "0:17:21", "remaining_time": "0:02:47"} +{"current_steps": 805, "total_steps": 933, "loss": 1.1672216653823853, "lr": 5.720507737857706e-07, "epoch": 2.5884244372990355, "percentage": 86.28, "elapsed_time": "0:17:23", "remaining_time": "0:02:45"} +{"current_steps": 806, "total_steps": 933, "loss": 0.5256634950637817, "lr": 5.633859503747241e-07, "epoch": 2.5916398713826365, "percentage": 86.39, "elapsed_time": "0:17:24", "remaining_time": "0:02:44"} +{"current_steps": 807, "total_steps": 933, "loss": 0.366617888212204, "lr": 5.547833322031693e-07, "epoch": 2.594855305466238, "percentage": 86.5, "elapsed_time": "0:17:25", "remaining_time": "0:02:43"} +{"current_steps": 808, "total_steps": 933, "loss": 0.27508530020713806, "lr": 5.462430398873947e-07, "epoch": 2.598070739549839, "percentage": 86.6, "elapsed_time": "0:17:26", "remaining_time": "0:02:41"} +{"current_steps": 809, "total_steps": 933, "loss": 0.36251911520957947, "lr": 5.377651931698275e-07, "epoch": 2.6012861736334405, "percentage": 86.71, "elapsed_time": "0:17:27", "remaining_time": "0:02:40"} +{"current_steps": 810, "total_steps": 933, "loss": 0.33935314416885376, "lr": 5.293499109173517e-07, "epoch": 2.604501607717042, "percentage": 86.82, "elapsed_time": "0:17:28", "remaining_time": "0:02:39"} +{"current_steps": 811, "total_steps": 933, "loss": 0.4906863570213318, "lr": 5.209973111196404e-07, "epoch": 2.607717041800643, "percentage": 86.92, "elapsed_time": "0:17:29", "remaining_time": "0:02:37"} +{"current_steps": 812, "total_steps": 933, "loss": 1.5506454706192017, "lr": 5.127075108875051e-07, "epoch": 2.6109324758842445, "percentage": 87.03, "elapsed_time": "0:17:30", "remaining_time": "0:02:36"} +{"current_steps": 813, "total_steps": 933, "loss": 0.2666461765766144, "lr": 5.044806264512525e-07, "epoch": 2.6141479099678455, "percentage": 87.14, "elapsed_time": "0:17:31", "remaining_time": "0:02:35"} +{"current_steps": 814, "total_steps": 933, "loss": 0.6199164986610413, "lr": 4.963167731590535e-07, "epoch": 2.617363344051447, "percentage": 87.25, "elapsed_time": "0:17:32", "remaining_time": "0:02:33"} +{"current_steps": 815, "total_steps": 933, "loss": 0.2723952531814575, "lr": 4.88216065475327e-07, "epoch": 2.620578778135048, "percentage": 87.35, "elapsed_time": "0:17:33", "remaining_time": "0:02:32"} +{"current_steps": 816, "total_steps": 933, "loss": 0.46878230571746826, "lr": 4.801786169791339e-07, "epoch": 2.6237942122186495, "percentage": 87.46, "elapsed_time": "0:17:34", "remaining_time": "0:02:31"} +{"current_steps": 817, "total_steps": 933, "loss": 0.36121273040771484, "lr": 4.7220454036258803e-07, "epoch": 2.627009646302251, "percentage": 87.57, "elapsed_time": "0:17:36", "remaining_time": "0:02:29"} +{"current_steps": 818, "total_steps": 933, "loss": 0.38482001423835754, "lr": 4.642939474292713e-07, "epoch": 2.630225080385852, "percentage": 87.67, "elapsed_time": "0:17:37", "remaining_time": "0:02:28"} +{"current_steps": 819, "total_steps": 933, "loss": 0.6713676452636719, "lr": 4.5644694909266984e-07, "epoch": 2.6334405144694535, "percentage": 87.78, "elapsed_time": "0:17:38", "remaining_time": "0:02:27"} +{"current_steps": 820, "total_steps": 933, "loss": 0.4145408868789673, "lr": 4.4866365537461543e-07, "epoch": 2.6366559485530545, "percentage": 87.89, "elapsed_time": "0:17:39", "remaining_time": "0:02:25"} +{"current_steps": 821, "total_steps": 933, "loss": 0.18469397723674774, "lr": 4.4094417540374745e-07, "epoch": 2.639871382636656, "percentage": 88.0, "elapsed_time": "0:17:40", "remaining_time": "0:02:24"} +{"current_steps": 822, "total_steps": 933, "loss": 0.9125012159347534, "lr": 4.332886174139794e-07, "epoch": 2.643086816720257, "percentage": 88.1, "elapsed_time": "0:17:41", "remaining_time": "0:02:23"} +{"current_steps": 823, "total_steps": 933, "loss": 0.7010893225669861, "lr": 4.2569708874298123e-07, "epoch": 2.6463022508038585, "percentage": 88.21, "elapsed_time": "0:17:42", "remaining_time": "0:02:22"} +{"current_steps": 824, "total_steps": 933, "loss": 0.22042930126190186, "lr": 4.1816969583067526e-07, "epoch": 2.64951768488746, "percentage": 88.32, "elapsed_time": "0:17:43", "remaining_time": "0:02:20"} +{"current_steps": 825, "total_steps": 933, "loss": 0.8344212770462036, "lr": 4.1070654421774767e-07, "epoch": 2.652733118971061, "percentage": 88.42, "elapsed_time": "0:17:44", "remaining_time": "0:02:19"} +{"current_steps": 826, "total_steps": 933, "loss": 0.3559122681617737, "lr": 4.0330773854416025e-07, "epoch": 2.6559485530546625, "percentage": 88.53, "elapsed_time": "0:17:45", "remaining_time": "0:02:18"} +{"current_steps": 827, "total_steps": 933, "loss": 0.30302202701568604, "lr": 3.959733825476908e-07, "epoch": 2.6591639871382635, "percentage": 88.64, "elapsed_time": "0:17:46", "remaining_time": "0:02:16"} +{"current_steps": 828, "total_steps": 933, "loss": 0.37081068754196167, "lr": 3.8870357906247434e-07, "epoch": 2.662379421221865, "percentage": 88.75, "elapsed_time": "0:17:47", "remaining_time": "0:02:15"} +{"current_steps": 829, "total_steps": 933, "loss": 0.2318996787071228, "lr": 3.814984300175645e-07, "epoch": 2.665594855305466, "percentage": 88.85, "elapsed_time": "0:17:48", "remaining_time": "0:02:14"} +{"current_steps": 830, "total_steps": 933, "loss": 0.27689433097839355, "lr": 3.743580364355004e-07, "epoch": 2.6688102893890675, "percentage": 88.96, "elapsed_time": "0:17:49", "remaining_time": "0:02:12"} +{"current_steps": 831, "total_steps": 933, "loss": 0.3493078351020813, "lr": 3.672824984308948e-07, "epoch": 2.672025723472669, "percentage": 89.07, "elapsed_time": "0:17:51", "remaining_time": "0:02:11"} +{"current_steps": 832, "total_steps": 933, "loss": 0.7462046146392822, "lr": 3.602719152090256e-07, "epoch": 2.67524115755627, "percentage": 89.17, "elapsed_time": "0:17:52", "remaining_time": "0:02:10"} +{"current_steps": 833, "total_steps": 933, "loss": 0.6614691615104675, "lr": 3.533263850644508e-07, "epoch": 2.6784565916398715, "percentage": 89.28, "elapsed_time": "0:17:53", "remaining_time": "0:02:08"} +{"current_steps": 834, "total_steps": 933, "loss": 0.29138296842575073, "lr": 3.464460053796237e-07, "epoch": 2.6816720257234725, "percentage": 89.39, "elapsed_time": "0:17:54", "remaining_time": "0:02:07"} +{"current_steps": 835, "total_steps": 933, "loss": 0.5526795387268066, "lr": 3.396308726235326e-07, "epoch": 2.684887459807074, "percentage": 89.5, "elapsed_time": "0:17:55", "remaining_time": "0:02:06"} +{"current_steps": 836, "total_steps": 933, "loss": 0.5932884812355042, "lr": 3.328810823503448e-07, "epoch": 2.688102893890675, "percentage": 89.6, "elapsed_time": "0:17:56", "remaining_time": "0:02:04"} +{"current_steps": 837, "total_steps": 933, "loss": 0.6854689121246338, "lr": 3.2619672919807054e-07, "epoch": 2.6913183279742765, "percentage": 89.71, "elapsed_time": "0:17:57", "remaining_time": "0:02:03"} +{"current_steps": 838, "total_steps": 933, "loss": 0.2756684422492981, "lr": 3.195779068872318e-07, "epoch": 2.694533762057878, "percentage": 89.82, "elapsed_time": "0:17:58", "remaining_time": "0:02:02"} +{"current_steps": 839, "total_steps": 933, "loss": 0.30567488074302673, "lr": 3.1302470821955143e-07, "epoch": 2.697749196141479, "percentage": 89.92, "elapsed_time": "0:17:59", "remaining_time": "0:02:00"} +{"current_steps": 840, "total_steps": 933, "loss": 0.8650332093238831, "lr": 3.0653722507665016e-07, "epoch": 2.7009646302250805, "percentage": 90.03, "elapsed_time": "0:18:00", "remaining_time": "0:01:59"} +{"current_steps": 841, "total_steps": 933, "loss": 0.7738863229751587, "lr": 3.0011554841876236e-07, "epoch": 2.7041800643086815, "percentage": 90.14, "elapsed_time": "0:18:01", "remaining_time": "0:01:58"} +{"current_steps": 842, "total_steps": 933, "loss": 0.4506850838661194, "lr": 2.9375976828345254e-07, "epoch": 2.707395498392283, "percentage": 90.25, "elapsed_time": "0:18:02", "remaining_time": "0:01:57"} +{"current_steps": 843, "total_steps": 933, "loss": 0.9781379699707031, "lr": 2.8746997378436117e-07, "epoch": 2.710610932475884, "percentage": 90.35, "elapsed_time": "0:18:04", "remaining_time": "0:01:55"} +{"current_steps": 844, "total_steps": 933, "loss": 0.36554017663002014, "lr": 2.8124625310995136e-07, "epoch": 2.7138263665594855, "percentage": 90.46, "elapsed_time": "0:18:05", "remaining_time": "0:01:54"} +{"current_steps": 845, "total_steps": 933, "loss": 1.2861707210540771, "lr": 2.750886935222724e-07, "epoch": 2.717041800643087, "percentage": 90.57, "elapsed_time": "0:18:06", "remaining_time": "0:01:53"} +{"current_steps": 846, "total_steps": 933, "loss": 0.30182671546936035, "lr": 2.689973813557367e-07, "epoch": 2.720257234726688, "percentage": 90.68, "elapsed_time": "0:18:07", "remaining_time": "0:01:51"} +{"current_steps": 847, "total_steps": 933, "loss": 0.3068884015083313, "lr": 2.6297240201591025e-07, "epoch": 2.7234726688102895, "percentage": 90.78, "elapsed_time": "0:18:08", "remaining_time": "0:01:50"} +{"current_steps": 848, "total_steps": 933, "loss": 0.3039591610431671, "lr": 2.5701383997831284e-07, "epoch": 2.7266881028938905, "percentage": 90.89, "elapsed_time": "0:18:09", "remaining_time": "0:01:49"} +{"current_steps": 849, "total_steps": 933, "loss": 0.4938279986381531, "lr": 2.5112177878723833e-07, "epoch": 2.729903536977492, "percentage": 91.0, "elapsed_time": "0:18:10", "remaining_time": "0:01:47"} +{"current_steps": 850, "total_steps": 933, "loss": 0.3451383709907532, "lr": 2.452963010545767e-07, "epoch": 2.733118971061093, "percentage": 91.1, "elapsed_time": "0:18:11", "remaining_time": "0:01:46"} +{"current_steps": 851, "total_steps": 933, "loss": 0.29905378818511963, "lr": 2.3953748845866096e-07, "epoch": 2.7363344051446945, "percentage": 91.21, "elapsed_time": "0:18:12", "remaining_time": "0:01:45"} +{"current_steps": 852, "total_steps": 933, "loss": 0.3758173882961273, "lr": 2.3384542174311908e-07, "epoch": 2.739549839228296, "percentage": 91.32, "elapsed_time": "0:18:13", "remaining_time": "0:01:43"} +{"current_steps": 853, "total_steps": 933, "loss": 0.7077566981315613, "lr": 2.282201807157436e-07, "epoch": 2.742765273311897, "percentage": 91.43, "elapsed_time": "0:18:14", "remaining_time": "0:01:42"} +{"current_steps": 854, "total_steps": 933, "loss": 0.5676212310791016, "lr": 2.2266184424737214e-07, "epoch": 2.7459807073954985, "percentage": 91.53, "elapsed_time": "0:18:15", "remaining_time": "0:01:41"} +{"current_steps": 855, "total_steps": 933, "loss": 0.3098456859588623, "lr": 2.1717049027078106e-07, "epoch": 2.7491961414790995, "percentage": 91.64, "elapsed_time": "0:18:16", "remaining_time": "0:01:40"} +{"current_steps": 856, "total_steps": 933, "loss": 0.5395633578300476, "lr": 2.1174619577959355e-07, "epoch": 2.752411575562701, "percentage": 91.75, "elapsed_time": "0:18:18", "remaining_time": "0:01:38"} +{"current_steps": 857, "total_steps": 933, "loss": 0.67840975522995, "lr": 2.0638903682719814e-07, "epoch": 2.755627009646302, "percentage": 91.85, "elapsed_time": "0:18:19", "remaining_time": "0:01:37"} +{"current_steps": 858, "total_steps": 933, "loss": 0.22875866293907166, "lr": 2.010990885256875e-07, "epoch": 2.7588424437299035, "percentage": 91.96, "elapsed_time": "0:18:20", "remaining_time": "0:01:36"} +{"current_steps": 859, "total_steps": 933, "loss": 0.28753116726875305, "lr": 1.958764250447981e-07, "epoch": 2.762057877813505, "percentage": 92.07, "elapsed_time": "0:18:21", "remaining_time": "0:01:34"} +{"current_steps": 860, "total_steps": 933, "loss": 0.49607959389686584, "lr": 1.9072111961087546e-07, "epoch": 2.765273311897106, "percentage": 92.18, "elapsed_time": "0:18:22", "remaining_time": "0:01:33"} +{"current_steps": 861, "total_steps": 933, "loss": 0.503407895565033, "lr": 1.856332445058462e-07, "epoch": 2.7684887459807075, "percentage": 92.28, "elapsed_time": "0:18:23", "remaining_time": "0:01:32"} +{"current_steps": 862, "total_steps": 933, "loss": 0.37309232354164124, "lr": 1.8061287106620308e-07, "epoch": 2.7717041800643085, "percentage": 92.39, "elapsed_time": "0:18:24", "remaining_time": "0:01:30"} +{"current_steps": 863, "total_steps": 933, "loss": 0.15657085180282593, "lr": 1.7566006968200712e-07, "epoch": 2.77491961414791, "percentage": 92.5, "elapsed_time": "0:18:25", "remaining_time": "0:01:29"} +{"current_steps": 864, "total_steps": 933, "loss": 0.9316859245300293, "lr": 1.7077490979589996e-07, "epoch": 2.778135048231511, "percentage": 92.6, "elapsed_time": "0:18:26", "remaining_time": "0:01:28"} +{"current_steps": 865, "total_steps": 933, "loss": 0.5050212144851685, "lr": 1.6595745990212686e-07, "epoch": 2.7813504823151125, "percentage": 92.71, "elapsed_time": "0:18:27", "remaining_time": "0:01:27"} +{"current_steps": 866, "total_steps": 933, "loss": 0.2638796865940094, "lr": 1.6120778754558418e-07, "epoch": 2.784565916398714, "percentage": 92.82, "elapsed_time": "0:18:28", "remaining_time": "0:01:25"} +{"current_steps": 867, "total_steps": 933, "loss": 0.3945295214653015, "lr": 1.5652595932086346e-07, "epoch": 2.787781350482315, "percentage": 92.93, "elapsed_time": "0:18:29", "remaining_time": "0:01:24"} +{"current_steps": 868, "total_steps": 933, "loss": 0.7213743925094604, "lr": 1.519120408713237e-07, "epoch": 2.7909967845659165, "percentage": 93.03, "elapsed_time": "0:18:30", "remaining_time": "0:01:23"} +{"current_steps": 869, "total_steps": 933, "loss": 0.3248399794101715, "lr": 1.4736609688816738e-07, "epoch": 2.7942122186495175, "percentage": 93.14, "elapsed_time": "0:18:32", "remaining_time": "0:01:21"} +{"current_steps": 870, "total_steps": 933, "loss": 0.22939413785934448, "lr": 1.42888191109537e-07, "epoch": 2.797427652733119, "percentage": 93.25, "elapsed_time": "0:18:33", "remaining_time": "0:01:20"} +{"current_steps": 871, "total_steps": 933, "loss": 0.32842016220092773, "lr": 1.3847838631961764e-07, "epoch": 2.80064308681672, "percentage": 93.35, "elapsed_time": "0:18:34", "remaining_time": "0:01:19"} +{"current_steps": 872, "total_steps": 933, "loss": 0.35260647535324097, "lr": 1.341367443477598e-07, "epoch": 2.8038585209003215, "percentage": 93.46, "elapsed_time": "0:18:35", "remaining_time": "0:01:18"} +{"current_steps": 873, "total_steps": 933, "loss": 0.14097937941551208, "lr": 1.2986332606761077e-07, "epoch": 2.807073954983923, "percentage": 93.57, "elapsed_time": "0:18:36", "remaining_time": "0:01:16"} +{"current_steps": 874, "total_steps": 933, "loss": 0.3869823217391968, "lr": 1.2565819139626123e-07, "epoch": 2.810289389067524, "percentage": 93.68, "elapsed_time": "0:18:37", "remaining_time": "0:01:15"} +{"current_steps": 875, "total_steps": 933, "loss": 0.37241262197494507, "lr": 1.215213992934061e-07, "epoch": 2.8135048231511255, "percentage": 93.78, "elapsed_time": "0:18:38", "remaining_time": "0:01:14"} +{"current_steps": 876, "total_steps": 933, "loss": 0.4987008273601532, "lr": 1.1745300776051683e-07, "epoch": 2.816720257234727, "percentage": 93.89, "elapsed_time": "0:18:39", "remaining_time": "0:01:12"} +{"current_steps": 877, "total_steps": 933, "loss": 0.7997194528579712, "lr": 1.1345307384002857e-07, "epoch": 2.819935691318328, "percentage": 94.0, "elapsed_time": "0:18:40", "remaining_time": "0:01:11"} +{"current_steps": 878, "total_steps": 933, "loss": 0.3447033166885376, "lr": 1.0952165361454103e-07, "epoch": 2.823151125401929, "percentage": 94.11, "elapsed_time": "0:18:41", "remaining_time": "0:01:10"} +{"current_steps": 879, "total_steps": 933, "loss": 0.3858156204223633, "lr": 1.0565880220603009e-07, "epoch": 2.8263665594855305, "percentage": 94.21, "elapsed_time": "0:18:42", "remaining_time": "0:01:08"} +{"current_steps": 880, "total_steps": 933, "loss": 0.34079843759536743, "lr": 1.0186457377507786e-07, "epoch": 2.829581993569132, "percentage": 94.32, "elapsed_time": "0:18:43", "remaining_time": "0:01:07"} +{"current_steps": 881, "total_steps": 933, "loss": 0.23632663488388062, "lr": 9.813902152011112e-08, "epoch": 2.832797427652733, "percentage": 94.43, "elapsed_time": "0:18:44", "remaining_time": "0:01:06"} +{"current_steps": 882, "total_steps": 933, "loss": 0.27433305978775024, "lr": 9.448219767665579e-08, "epoch": 2.8360128617363345, "percentage": 94.53, "elapsed_time": "0:18:45", "remaining_time": "0:01:05"} +{"current_steps": 883, "total_steps": 933, "loss": 0.23926009237766266, "lr": 9.089415351660635e-08, "epoch": 2.839228295819936, "percentage": 94.64, "elapsed_time": "0:18:47", "remaining_time": "0:01:03"} +{"current_steps": 884, "total_steps": 933, "loss": 0.2923390567302704, "lr": 8.737493934750374e-08, "epoch": 2.842443729903537, "percentage": 94.75, "elapsed_time": "0:18:48", "remaining_time": "0:01:02"} +{"current_steps": 885, "total_steps": 933, "loss": 0.41260838508605957, "lr": 8.392460451183304e-08, "epoch": 2.845659163987138, "percentage": 94.86, "elapsed_time": "0:18:49", "remaining_time": "0:01:01"} +{"current_steps": 886, "total_steps": 933, "loss": 1.014516830444336, "lr": 8.05431973863291e-08, "epoch": 2.8488745980707395, "percentage": 94.96, "elapsed_time": "0:18:50", "remaining_time": "0:00:59"} +{"current_steps": 887, "total_steps": 933, "loss": 0.7642203569412231, "lr": 7.723076538130093e-08, "epoch": 2.852090032154341, "percentage": 95.07, "elapsed_time": "0:18:51", "remaining_time": "0:00:58"} +{"current_steps": 888, "total_steps": 933, "loss": 0.5412633419036865, "lr": 7.398735493996445e-08, "epoch": 2.855305466237942, "percentage": 95.18, "elapsed_time": "0:18:52", "remaining_time": "0:00:57"} +{"current_steps": 889, "total_steps": 933, "loss": 0.2149556279182434, "lr": 7.081301153779308e-08, "epoch": 2.8585209003215435, "percentage": 95.28, "elapsed_time": "0:18:53", "remaining_time": "0:00:56"} +{"current_steps": 890, "total_steps": 933, "loss": 0.2750585675239563, "lr": 6.77077796818787e-08, "epoch": 2.861736334405145, "percentage": 95.39, "elapsed_time": "0:18:54", "remaining_time": "0:00:54"} +{"current_steps": 891, "total_steps": 933, "loss": 0.870136022567749, "lr": 6.467170291030999e-08, "epoch": 2.864951768488746, "percentage": 95.5, "elapsed_time": "0:18:55", "remaining_time": "0:00:53"} +{"current_steps": 892, "total_steps": 933, "loss": 0.6092904806137085, "lr": 6.170482379155907e-08, "epoch": 2.868167202572347, "percentage": 95.61, "elapsed_time": "0:18:56", "remaining_time": "0:00:52"} +{"current_steps": 893, "total_steps": 933, "loss": 0.3216220438480377, "lr": 5.880718392388518e-08, "epoch": 2.8713826366559485, "percentage": 95.71, "elapsed_time": "0:18:57", "remaining_time": "0:00:50"} +{"current_steps": 894, "total_steps": 933, "loss": 0.9828184247016907, "lr": 5.597882393475473e-08, "epoch": 2.87459807073955, "percentage": 95.82, "elapsed_time": "0:18:58", "remaining_time": "0:00:49"} +{"current_steps": 895, "total_steps": 933, "loss": 0.3474922776222229, "lr": 5.3219783480266685e-08, "epoch": 2.877813504823151, "percentage": 95.93, "elapsed_time": "0:19:00", "remaining_time": "0:00:48"} +{"current_steps": 896, "total_steps": 933, "loss": 0.6136802434921265, "lr": 5.053010124460078e-08, "epoch": 2.8810289389067525, "percentage": 96.03, "elapsed_time": "0:19:01", "remaining_time": "0:00:47"} +{"current_steps": 897, "total_steps": 933, "loss": 0.36145269870758057, "lr": 4.790981493947244e-08, "epoch": 2.884244372990354, "percentage": 96.14, "elapsed_time": "0:19:02", "remaining_time": "0:00:45"} +{"current_steps": 898, "total_steps": 933, "loss": 0.582977831363678, "lr": 4.5358961303604845e-08, "epoch": 2.887459807073955, "percentage": 96.25, "elapsed_time": "0:19:03", "remaining_time": "0:00:44"} +{"current_steps": 899, "total_steps": 933, "loss": 0.26535022258758545, "lr": 4.287757610221488e-08, "epoch": 2.890675241157556, "percentage": 96.36, "elapsed_time": "0:19:04", "remaining_time": "0:00:43"} +{"current_steps": 900, "total_steps": 933, "loss": 0.36218592524528503, "lr": 4.046569412651025e-08, "epoch": 2.8938906752411575, "percentage": 96.46, "elapsed_time": "0:19:05", "remaining_time": "0:00:42"} +{"current_steps": 901, "total_steps": 933, "loss": 0.6273083686828613, "lr": 3.8123349193201484e-08, "epoch": 2.897106109324759, "percentage": 96.57, "elapsed_time": "0:19:06", "remaining_time": "0:00:40"} +{"current_steps": 902, "total_steps": 933, "loss": 0.51048743724823, "lr": 3.585057414402959e-08, "epoch": 2.90032154340836, "percentage": 96.68, "elapsed_time": "0:19:07", "remaining_time": "0:00:39"} +{"current_steps": 903, "total_steps": 933, "loss": 0.3014843165874481, "lr": 3.364740084530416e-08, "epoch": 2.9035369774919615, "percentage": 96.78, "elapsed_time": "0:19:08", "remaining_time": "0:00:38"} +{"current_steps": 904, "total_steps": 933, "loss": 0.2004130780696869, "lr": 3.1513860187457055e-08, "epoch": 2.906752411575563, "percentage": 96.89, "elapsed_time": "0:19:09", "remaining_time": "0:00:36"} +{"current_steps": 905, "total_steps": 933, "loss": 0.39281973242759705, "lr": 2.9449982084607808e-08, "epoch": 2.909967845659164, "percentage": 97.0, "elapsed_time": "0:19:10", "remaining_time": "0:00:35"} +{"current_steps": 906, "total_steps": 933, "loss": 0.23955759406089783, "lr": 2.7455795474147228e-08, "epoch": 2.913183279742765, "percentage": 97.11, "elapsed_time": "0:19:11", "remaining_time": "0:00:34"} +{"current_steps": 907, "total_steps": 933, "loss": 0.20128212869167328, "lr": 2.5531328316328875e-08, "epoch": 2.9163987138263665, "percentage": 97.21, "elapsed_time": "0:19:12", "remaining_time": "0:00:33"} +{"current_steps": 908, "total_steps": 933, "loss": 0.7717773914337158, "lr": 2.367660759387935e-08, "epoch": 2.919614147909968, "percentage": 97.32, "elapsed_time": "0:19:14", "remaining_time": "0:00:31"} +{"current_steps": 909, "total_steps": 933, "loss": 0.36580389738082886, "lr": 2.189165931161752e-08, "epoch": 2.922829581993569, "percentage": 97.43, "elapsed_time": "0:19:15", "remaining_time": "0:00:30"} +{"current_steps": 910, "total_steps": 933, "loss": 0.20489028096199036, "lr": 2.017650849609143e-08, "epoch": 2.9260450160771705, "percentage": 97.53, "elapsed_time": "0:19:16", "remaining_time": "0:00:29"} +{"current_steps": 911, "total_steps": 933, "loss": 0.4565661549568176, "lr": 1.8531179195227512e-08, "epoch": 2.929260450160772, "percentage": 97.64, "elapsed_time": "0:19:17", "remaining_time": "0:00:27"} +{"current_steps": 912, "total_steps": 933, "loss": 0.1987374871969223, "lr": 1.6955694477993055e-08, "epoch": 2.932475884244373, "percentage": 97.75, "elapsed_time": "0:19:18", "remaining_time": "0:00:26"} +{"current_steps": 913, "total_steps": 933, "loss": 0.6139571070671082, "lr": 1.545007643407148e-08, "epoch": 2.935691318327974, "percentage": 97.86, "elapsed_time": "0:19:19", "remaining_time": "0:00:25"} +{"current_steps": 914, "total_steps": 933, "loss": 0.3137204647064209, "lr": 1.4014346173555904e-08, "epoch": 2.9389067524115755, "percentage": 97.96, "elapsed_time": "0:19:20", "remaining_time": "0:00:24"} +{"current_steps": 915, "total_steps": 933, "loss": 0.2887860834598541, "lr": 1.2648523826649384e-08, "epoch": 2.942122186495177, "percentage": 98.07, "elapsed_time": "0:19:21", "remaining_time": "0:00:22"} +{"current_steps": 916, "total_steps": 933, "loss": 0.20143553614616394, "lr": 1.1352628543385702e-08, "epoch": 2.945337620578778, "percentage": 98.18, "elapsed_time": "0:19:22", "remaining_time": "0:00:21"} +{"current_steps": 917, "total_steps": 933, "loss": 0.4039468765258789, "lr": 1.0126678493358466e-08, "epoch": 2.9485530546623795, "percentage": 98.29, "elapsed_time": "0:19:23", "remaining_time": "0:00:20"} +{"current_steps": 918, "total_steps": 933, "loss": 0.22868111729621887, "lr": 8.97069086546798e-09, "epoch": 2.951768488745981, "percentage": 98.39, "elapsed_time": "0:19:24", "remaining_time": "0:00:19"} +{"current_steps": 919, "total_steps": 933, "loss": 0.7044589519500732, "lr": 7.884681867679766e-09, "epoch": 2.954983922829582, "percentage": 98.5, "elapsed_time": "0:19:25", "remaining_time": "0:00:17"} +{"current_steps": 920, "total_steps": 933, "loss": 0.17346805334091187, "lr": 6.86866672679698e-09, "epoch": 2.958199356913183, "percentage": 98.61, "elapsed_time": "0:19:26", "remaining_time": "0:00:16"} +{"current_steps": 921, "total_steps": 933, "loss": 0.6509414911270142, "lr": 5.9226596882483445e-09, "epoch": 2.9614147909967845, "percentage": 98.71, "elapsed_time": "0:19:28", "remaining_time": "0:00:15"} +{"current_steps": 922, "total_steps": 933, "loss": 0.25824788212776184, "lr": 5.0466740158849895e-09, "epoch": 2.964630225080386, "percentage": 98.82, "elapsed_time": "0:19:29", "remaining_time": "0:00:13"} +{"current_steps": 923, "total_steps": 933, "loss": 0.40939438343048096, "lr": 4.240721991799479e-09, "epoch": 2.967845659163987, "percentage": 98.93, "elapsed_time": "0:19:30", "remaining_time": "0:00:12"} +{"current_steps": 924, "total_steps": 933, "loss": 0.2906154692173004, "lr": 3.5048149161487356e-09, "epoch": 2.9710610932475885, "percentage": 99.04, "elapsed_time": "0:19:31", "remaining_time": "0:00:11"} +{"current_steps": 925, "total_steps": 933, "loss": 0.7982381582260132, "lr": 2.8389631069986044e-09, "epoch": 2.97427652733119, "percentage": 99.14, "elapsed_time": "0:19:32", "remaining_time": "0:00:10"} +{"current_steps": 926, "total_steps": 933, "loss": 0.39818036556243896, "lr": 2.2431759001789734e-09, "epoch": 2.977491961414791, "percentage": 99.25, "elapsed_time": "0:19:33", "remaining_time": "0:00:08"} +{"current_steps": 927, "total_steps": 933, "loss": 0.311463862657547, "lr": 1.7174616491510975e-09, "epoch": 2.980707395498392, "percentage": 99.36, "elapsed_time": "0:19:34", "remaining_time": "0:00:07"} +{"current_steps": 928, "total_steps": 933, "loss": 0.8928610682487488, "lr": 1.2618277248921397e-09, "epoch": 2.9839228295819935, "percentage": 99.46, "elapsed_time": "0:19:35", "remaining_time": "0:00:06"} +{"current_steps": 929, "total_steps": 933, "loss": 0.5860022306442261, "lr": 8.762805157913612e-10, "epoch": 2.987138263665595, "percentage": 99.57, "elapsed_time": "0:19:36", "remaining_time": "0:00:05"} +{"current_steps": 930, "total_steps": 933, "loss": 0.244879812002182, "lr": 5.608254275607516e-10, "epoch": 2.990353697749196, "percentage": 99.68, "elapsed_time": "0:19:37", "remaining_time": "0:00:03"} +{"current_steps": 931, "total_steps": 933, "loss": 0.37080761790275574, "lr": 3.1546688315842177e-10, "epoch": 2.9935691318327975, "percentage": 99.79, "elapsed_time": "0:19:38", "remaining_time": "0:00:02"} +{"current_steps": 932, "total_steps": 933, "loss": 0.3129621148109436, "lr": 1.4020832272754193e-10, "epoch": 2.996784565916399, "percentage": 99.89, "elapsed_time": "0:19:39", "remaining_time": "0:00:01"} +{"current_steps": 933, "total_steps": 933, "loss": 0.548541247844696, "lr": 3.505220354749206e-11, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "0:19:41", "remaining_time": "0:00:00"} +{"current_steps": 933, "total_steps": 933, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "0:21:04", "remaining_time": "0:00:00"} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..a96534a --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,6574 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 933, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.003215434083601286, + "grad_norm": 28.06913370740238, + "learning_rate": 0.0, + "loss": 2.6452956199645996, + "step": 1 + }, + { + "epoch": 0.006430868167202572, + "grad_norm": 48.39869965639885, + "learning_rate": 1.0638297872340426e-07, + "loss": 4.55789852142334, + "step": 2 + }, + { + "epoch": 0.00964630225080386, + "grad_norm": 49.69301124062599, + "learning_rate": 2.1276595744680852e-07, + "loss": 4.399077415466309, + "step": 3 + }, + { + "epoch": 0.012861736334405145, + "grad_norm": 41.69483295374549, + "learning_rate": 3.1914893617021275e-07, + "loss": 3.8412859439849854, + "step": 4 + }, + { + "epoch": 0.01607717041800643, + "grad_norm": 38.19554788922322, + "learning_rate": 4.2553191489361704e-07, + "loss": 4.593955993652344, + "step": 5 + }, + { + "epoch": 0.01929260450160772, + "grad_norm": 35.40565048454064, + "learning_rate": 5.319148936170213e-07, + "loss": 4.042428016662598, + "step": 6 + }, + { + "epoch": 0.022508038585209004, + "grad_norm": 33.35178240422259, + "learning_rate": 6.382978723404255e-07, + "loss": 4.12535285949707, + "step": 7 + }, + { + "epoch": 0.02572347266881029, + "grad_norm": 30.900117272078198, + "learning_rate": 7.446808510638298e-07, + "loss": 3.521498680114746, + "step": 8 + }, + { + "epoch": 0.028938906752411574, + "grad_norm": 30.636501853074883, + "learning_rate": 8.510638297872341e-07, + "loss": 3.6315274238586426, + "step": 9 + }, + { + "epoch": 0.03215434083601286, + "grad_norm": 38.64870494238772, + "learning_rate": 9.574468085106384e-07, + "loss": 4.302469730377197, + "step": 10 + }, + { + "epoch": 0.03536977491961415, + "grad_norm": 32.91754594782006, + "learning_rate": 1.0638297872340427e-06, + "loss": 4.56865119934082, + "step": 11 + }, + { + "epoch": 0.03858520900321544, + "grad_norm": 34.58013362254361, + "learning_rate": 1.170212765957447e-06, + "loss": 4.283186912536621, + "step": 12 + }, + { + "epoch": 0.04180064308681672, + "grad_norm": 40.581705970195586, + "learning_rate": 1.276595744680851e-06, + "loss": 4.2185187339782715, + "step": 13 + }, + { + "epoch": 0.04501607717041801, + "grad_norm": 29.29283904903904, + "learning_rate": 1.3829787234042555e-06, + "loss": 3.8783042430877686, + "step": 14 + }, + { + "epoch": 0.04823151125401929, + "grad_norm": 27.27991833133174, + "learning_rate": 1.4893617021276596e-06, + "loss": 4.050146579742432, + "step": 15 + }, + { + "epoch": 0.05144694533762058, + "grad_norm": 27.999481136276096, + "learning_rate": 1.595744680851064e-06, + "loss": 3.825010299682617, + "step": 16 + }, + { + "epoch": 0.05466237942122187, + "grad_norm": 22.536982883463256, + "learning_rate": 1.7021276595744682e-06, + "loss": 3.562378406524658, + "step": 17 + }, + { + "epoch": 0.05787781350482315, + "grad_norm": 34.480722847374665, + "learning_rate": 1.8085106382978727e-06, + "loss": 3.8451032638549805, + "step": 18 + }, + { + "epoch": 0.06109324758842444, + "grad_norm": 18.624534731891554, + "learning_rate": 1.9148936170212767e-06, + "loss": 3.538512945175171, + "step": 19 + }, + { + "epoch": 0.06430868167202572, + "grad_norm": 12.727844125034615, + "learning_rate": 2.021276595744681e-06, + "loss": 3.4035849571228027, + "step": 20 + }, + { + "epoch": 0.06752411575562701, + "grad_norm": 16.628683791330275, + "learning_rate": 2.1276595744680853e-06, + "loss": 4.060901165008545, + "step": 21 + }, + { + "epoch": 0.0707395498392283, + "grad_norm": 15.851111458013285, + "learning_rate": 2.2340425531914894e-06, + "loss": 3.4492287635803223, + "step": 22 + }, + { + "epoch": 0.07395498392282958, + "grad_norm": 14.753226693275314, + "learning_rate": 2.340425531914894e-06, + "loss": 3.4821548461914062, + "step": 23 + }, + { + "epoch": 0.07717041800643087, + "grad_norm": 13.046614909449563, + "learning_rate": 2.446808510638298e-06, + "loss": 2.6914403438568115, + "step": 24 + }, + { + "epoch": 0.08038585209003216, + "grad_norm": 10.833167684511187, + "learning_rate": 2.553191489361702e-06, + "loss": 3.381208896636963, + "step": 25 + }, + { + "epoch": 0.08360128617363344, + "grad_norm": 14.663957003362531, + "learning_rate": 2.6595744680851065e-06, + "loss": 3.723344564437866, + "step": 26 + }, + { + "epoch": 0.08681672025723473, + "grad_norm": 12.38492722838179, + "learning_rate": 2.765957446808511e-06, + "loss": 3.4975712299346924, + "step": 27 + }, + { + "epoch": 0.09003215434083602, + "grad_norm": 11.401609720183584, + "learning_rate": 2.8723404255319155e-06, + "loss": 3.742828369140625, + "step": 28 + }, + { + "epoch": 0.0932475884244373, + "grad_norm": 12.794559143366632, + "learning_rate": 2.978723404255319e-06, + "loss": 3.7880096435546875, + "step": 29 + }, + { + "epoch": 0.09646302250803858, + "grad_norm": 26.7679703474409, + "learning_rate": 3.0851063829787237e-06, + "loss": 3.091320037841797, + "step": 30 + }, + { + "epoch": 0.09967845659163987, + "grad_norm": 11.726188149872904, + "learning_rate": 3.191489361702128e-06, + "loss": 3.397942543029785, + "step": 31 + }, + { + "epoch": 0.10289389067524116, + "grad_norm": 11.535658534446908, + "learning_rate": 3.297872340425532e-06, + "loss": 3.3407297134399414, + "step": 32 + }, + { + "epoch": 0.10610932475884244, + "grad_norm": 12.230466594907684, + "learning_rate": 3.4042553191489363e-06, + "loss": 3.5374701023101807, + "step": 33 + }, + { + "epoch": 0.10932475884244373, + "grad_norm": 17.243624644167863, + "learning_rate": 3.510638297872341e-06, + "loss": 3.028085708618164, + "step": 34 + }, + { + "epoch": 0.11254019292604502, + "grad_norm": 17.334038622045792, + "learning_rate": 3.6170212765957453e-06, + "loss": 3.191539764404297, + "step": 35 + }, + { + "epoch": 0.1157556270096463, + "grad_norm": 14.86079639907893, + "learning_rate": 3.723404255319149e-06, + "loss": 3.315582275390625, + "step": 36 + }, + { + "epoch": 0.1189710610932476, + "grad_norm": 13.609433504610683, + "learning_rate": 3.8297872340425535e-06, + "loss": 3.1429853439331055, + "step": 37 + }, + { + "epoch": 0.12218649517684887, + "grad_norm": 12.204605042968506, + "learning_rate": 3.936170212765958e-06, + "loss": 3.2580060958862305, + "step": 38 + }, + { + "epoch": 0.12540192926045016, + "grad_norm": 9.319212608299305, + "learning_rate": 4.042553191489362e-06, + "loss": 2.8858089447021484, + "step": 39 + }, + { + "epoch": 0.12861736334405144, + "grad_norm": 12.524532210698558, + "learning_rate": 4.148936170212766e-06, + "loss": 3.385632038116455, + "step": 40 + }, + { + "epoch": 0.13183279742765272, + "grad_norm": 8.648905095426267, + "learning_rate": 4.255319148936171e-06, + "loss": 2.8703691959381104, + "step": 41 + }, + { + "epoch": 0.13504823151125403, + "grad_norm": 13.669866742395188, + "learning_rate": 4.361702127659575e-06, + "loss": 3.3186304569244385, + "step": 42 + }, + { + "epoch": 0.1382636655948553, + "grad_norm": 13.513984643274535, + "learning_rate": 4.468085106382979e-06, + "loss": 3.2565927505493164, + "step": 43 + }, + { + "epoch": 0.1414790996784566, + "grad_norm": 11.467838472357526, + "learning_rate": 4.574468085106383e-06, + "loss": 3.012521982192993, + "step": 44 + }, + { + "epoch": 0.14469453376205788, + "grad_norm": 14.414224190053467, + "learning_rate": 4.680851063829788e-06, + "loss": 3.093921184539795, + "step": 45 + }, + { + "epoch": 0.14790996784565916, + "grad_norm": 13.083779631573304, + "learning_rate": 4.787234042553192e-06, + "loss": 3.1773462295532227, + "step": 46 + }, + { + "epoch": 0.15112540192926044, + "grad_norm": 10.99722367362911, + "learning_rate": 4.893617021276596e-06, + "loss": 2.971510648727417, + "step": 47 + }, + { + "epoch": 0.15434083601286175, + "grad_norm": 11.68667724946745, + "learning_rate": 5e-06, + "loss": 3.365861415863037, + "step": 48 + }, + { + "epoch": 0.15755627009646303, + "grad_norm": 12.098124814579858, + "learning_rate": 5.106382978723404e-06, + "loss": 3.331052780151367, + "step": 49 + }, + { + "epoch": 0.1607717041800643, + "grad_norm": 15.052059643954104, + "learning_rate": 5.212765957446809e-06, + "loss": 3.513488292694092, + "step": 50 + }, + { + "epoch": 0.1639871382636656, + "grad_norm": 11.566325521455326, + "learning_rate": 5.319148936170213e-06, + "loss": 3.165461540222168, + "step": 51 + }, + { + "epoch": 0.16720257234726688, + "grad_norm": 9.34326584922921, + "learning_rate": 5.425531914893617e-06, + "loss": 3.308755397796631, + "step": 52 + }, + { + "epoch": 0.17041800643086816, + "grad_norm": 8.081169045448624, + "learning_rate": 5.531914893617022e-06, + "loss": 2.905186653137207, + "step": 53 + }, + { + "epoch": 0.17363344051446947, + "grad_norm": 7.992815413183043, + "learning_rate": 5.638297872340426e-06, + "loss": 3.2160592079162598, + "step": 54 + }, + { + "epoch": 0.17684887459807075, + "grad_norm": 13.368580785667488, + "learning_rate": 5.744680851063831e-06, + "loss": 2.809837818145752, + "step": 55 + }, + { + "epoch": 0.18006430868167203, + "grad_norm": 12.22993319530342, + "learning_rate": 5.851063829787235e-06, + "loss": 3.355532646179199, + "step": 56 + }, + { + "epoch": 0.1832797427652733, + "grad_norm": 7.221255459808142, + "learning_rate": 5.957446808510638e-06, + "loss": 2.912825584411621, + "step": 57 + }, + { + "epoch": 0.1864951768488746, + "grad_norm": 8.99412347483827, + "learning_rate": 6.063829787234044e-06, + "loss": 2.738528251647949, + "step": 58 + }, + { + "epoch": 0.18971061093247588, + "grad_norm": 9.344015814293519, + "learning_rate": 6.170212765957447e-06, + "loss": 3.5236358642578125, + "step": 59 + }, + { + "epoch": 0.19292604501607716, + "grad_norm": 9.156439246600511, + "learning_rate": 6.276595744680851e-06, + "loss": 2.438237190246582, + "step": 60 + }, + { + "epoch": 0.19614147909967847, + "grad_norm": 13.840159967058424, + "learning_rate": 6.382978723404256e-06, + "loss": 3.002664804458618, + "step": 61 + }, + { + "epoch": 0.19935691318327975, + "grad_norm": 19.877275319550726, + "learning_rate": 6.48936170212766e-06, + "loss": 2.6102824211120605, + "step": 62 + }, + { + "epoch": 0.20257234726688103, + "grad_norm": 12.1602713562138, + "learning_rate": 6.595744680851064e-06, + "loss": 2.8119864463806152, + "step": 63 + }, + { + "epoch": 0.2057877813504823, + "grad_norm": 16.720203965795687, + "learning_rate": 6.702127659574469e-06, + "loss": 3.0033774375915527, + "step": 64 + }, + { + "epoch": 0.2090032154340836, + "grad_norm": 11.940414659790333, + "learning_rate": 6.808510638297873e-06, + "loss": 3.0424952507019043, + "step": 65 + }, + { + "epoch": 0.21221864951768488, + "grad_norm": 13.529947357143289, + "learning_rate": 6.914893617021278e-06, + "loss": 3.125572443008423, + "step": 66 + }, + { + "epoch": 0.21543408360128619, + "grad_norm": 14.39004817757797, + "learning_rate": 7.021276595744682e-06, + "loss": 2.6417791843414307, + "step": 67 + }, + { + "epoch": 0.21864951768488747, + "grad_norm": 10.746950415723779, + "learning_rate": 7.127659574468085e-06, + "loss": 2.795292615890503, + "step": 68 + }, + { + "epoch": 0.22186495176848875, + "grad_norm": 8.800102366100415, + "learning_rate": 7.234042553191491e-06, + "loss": 2.806095600128174, + "step": 69 + }, + { + "epoch": 0.22508038585209003, + "grad_norm": 9.675471278291617, + "learning_rate": 7.340425531914894e-06, + "loss": 3.1251420974731445, + "step": 70 + }, + { + "epoch": 0.2282958199356913, + "grad_norm": 10.053887771078337, + "learning_rate": 7.446808510638298e-06, + "loss": 3.1366963386535645, + "step": 71 + }, + { + "epoch": 0.2315112540192926, + "grad_norm": 9.96554198597255, + "learning_rate": 7.553191489361703e-06, + "loss": 2.7942872047424316, + "step": 72 + }, + { + "epoch": 0.2347266881028939, + "grad_norm": 9.128869915479399, + "learning_rate": 7.659574468085107e-06, + "loss": 2.7176175117492676, + "step": 73 + }, + { + "epoch": 0.2379421221864952, + "grad_norm": 28.173273740714603, + "learning_rate": 7.765957446808511e-06, + "loss": 3.2510626316070557, + "step": 74 + }, + { + "epoch": 0.24115755627009647, + "grad_norm": 15.875524782572642, + "learning_rate": 7.872340425531916e-06, + "loss": 3.5180716514587402, + "step": 75 + }, + { + "epoch": 0.24437299035369775, + "grad_norm": 8.411231774581736, + "learning_rate": 7.97872340425532e-06, + "loss": 2.6583242416381836, + "step": 76 + }, + { + "epoch": 0.24758842443729903, + "grad_norm": 10.255213693759098, + "learning_rate": 8.085106382978723e-06, + "loss": 3.522376537322998, + "step": 77 + }, + { + "epoch": 0.2508038585209003, + "grad_norm": 14.129242001151896, + "learning_rate": 8.191489361702128e-06, + "loss": 2.666438102722168, + "step": 78 + }, + { + "epoch": 0.2540192926045016, + "grad_norm": 13.280551504579265, + "learning_rate": 8.297872340425532e-06, + "loss": 3.0438873767852783, + "step": 79 + }, + { + "epoch": 0.2572347266881029, + "grad_norm": 9.282380199453716, + "learning_rate": 8.404255319148937e-06, + "loss": 2.7416107654571533, + "step": 80 + }, + { + "epoch": 0.2604501607717042, + "grad_norm": 13.819155087109117, + "learning_rate": 8.510638297872341e-06, + "loss": 3.040888547897339, + "step": 81 + }, + { + "epoch": 0.26366559485530544, + "grad_norm": 10.236048490196817, + "learning_rate": 8.617021276595746e-06, + "loss": 2.8178224563598633, + "step": 82 + }, + { + "epoch": 0.26688102893890675, + "grad_norm": 11.247565366748827, + "learning_rate": 8.72340425531915e-06, + "loss": 3.13818097114563, + "step": 83 + }, + { + "epoch": 0.27009646302250806, + "grad_norm": 11.38311149290456, + "learning_rate": 8.829787234042555e-06, + "loss": 3.0007710456848145, + "step": 84 + }, + { + "epoch": 0.2733118971061093, + "grad_norm": 15.216874845868524, + "learning_rate": 8.936170212765958e-06, + "loss": 2.1240034103393555, + "step": 85 + }, + { + "epoch": 0.2765273311897106, + "grad_norm": 14.569359980649596, + "learning_rate": 9.042553191489362e-06, + "loss": 3.0258898735046387, + "step": 86 + }, + { + "epoch": 0.2797427652733119, + "grad_norm": 10.551293545168425, + "learning_rate": 9.148936170212767e-06, + "loss": 3.815779685974121, + "step": 87 + }, + { + "epoch": 0.2829581993569132, + "grad_norm": 13.73383748688779, + "learning_rate": 9.255319148936171e-06, + "loss": 3.146327018737793, + "step": 88 + }, + { + "epoch": 0.2861736334405145, + "grad_norm": 16.606288521806196, + "learning_rate": 9.361702127659576e-06, + "loss": 3.730978488922119, + "step": 89 + }, + { + "epoch": 0.28938906752411575, + "grad_norm": 15.589892740807427, + "learning_rate": 9.46808510638298e-06, + "loss": 2.837461471557617, + "step": 90 + }, + { + "epoch": 0.29260450160771706, + "grad_norm": 8.969993651783593, + "learning_rate": 9.574468085106385e-06, + "loss": 2.8579788208007812, + "step": 91 + }, + { + "epoch": 0.2958199356913183, + "grad_norm": 7.662931624851478, + "learning_rate": 9.680851063829787e-06, + "loss": 3.059731960296631, + "step": 92 + }, + { + "epoch": 0.2990353697749196, + "grad_norm": 12.506461029325436, + "learning_rate": 9.787234042553192e-06, + "loss": 2.419557809829712, + "step": 93 + }, + { + "epoch": 0.3022508038585209, + "grad_norm": 11.680821462164978, + "learning_rate": 9.893617021276596e-06, + "loss": 2.8333683013916016, + "step": 94 + }, + { + "epoch": 0.3054662379421222, + "grad_norm": 12.174316986589778, + "learning_rate": 1e-05, + "loss": 2.74963641166687, + "step": 95 + }, + { + "epoch": 0.3086816720257235, + "grad_norm": 11.199762414211195, + "learning_rate": 9.999964947796453e-06, + "loss": 2.873091697692871, + "step": 96 + }, + { + "epoch": 0.31189710610932475, + "grad_norm": 13.974152571943035, + "learning_rate": 9.999859791677274e-06, + "loss": 2.7511343955993652, + "step": 97 + }, + { + "epoch": 0.31511254019292606, + "grad_norm": 7.263607356101665, + "learning_rate": 9.999684533116843e-06, + "loss": 2.71083927154541, + "step": 98 + }, + { + "epoch": 0.3183279742765273, + "grad_norm": 25.796622294537844, + "learning_rate": 9.999439174572441e-06, + "loss": 3.5184683799743652, + "step": 99 + }, + { + "epoch": 0.3215434083601286, + "grad_norm": 17.664679448204584, + "learning_rate": 9.999123719484209e-06, + "loss": 3.0679643154144287, + "step": 100 + }, + { + "epoch": 0.3247588424437299, + "grad_norm": 8.788378126441323, + "learning_rate": 9.99873817227511e-06, + "loss": 3.4208366870880127, + "step": 101 + }, + { + "epoch": 0.3279742765273312, + "grad_norm": 10.180060028116147, + "learning_rate": 9.998282538350849e-06, + "loss": 2.5970406532287598, + "step": 102 + }, + { + "epoch": 0.3311897106109325, + "grad_norm": 8.806422861108551, + "learning_rate": 9.997756824099822e-06, + "loss": 2.414546012878418, + "step": 103 + }, + { + "epoch": 0.33440514469453375, + "grad_norm": 7.16593228968872, + "learning_rate": 9.997161036893001e-06, + "loss": 2.7526440620422363, + "step": 104 + }, + { + "epoch": 0.33762057877813506, + "grad_norm": 17.516673563240467, + "learning_rate": 9.996495185083853e-06, + "loss": 3.5313873291015625, + "step": 105 + }, + { + "epoch": 0.3408360128617363, + "grad_norm": 11.176485057675038, + "learning_rate": 9.995759278008202e-06, + "loss": 3.215785026550293, + "step": 106 + }, + { + "epoch": 0.3440514469453376, + "grad_norm": 10.013772937565832, + "learning_rate": 9.994953325984116e-06, + "loss": 2.8342652320861816, + "step": 107 + }, + { + "epoch": 0.34726688102893893, + "grad_norm": 16.13725758624622, + "learning_rate": 9.994077340311751e-06, + "loss": 3.187843084335327, + "step": 108 + }, + { + "epoch": 0.3504823151125402, + "grad_norm": 13.142839661126178, + "learning_rate": 9.993131333273203e-06, + "loss": 3.329102039337158, + "step": 109 + }, + { + "epoch": 0.3536977491961415, + "grad_norm": 13.15304975587744, + "learning_rate": 9.99211531813232e-06, + "loss": 2.963022232055664, + "step": 110 + }, + { + "epoch": 0.35691318327974275, + "grad_norm": 20.764351097324777, + "learning_rate": 9.991029309134533e-06, + "loss": 3.1603951454162598, + "step": 111 + }, + { + "epoch": 0.36012861736334406, + "grad_norm": 20.990534185345496, + "learning_rate": 9.989873321506643e-06, + "loss": 3.063810110092163, + "step": 112 + }, + { + "epoch": 0.3633440514469453, + "grad_norm": 11.652596742212573, + "learning_rate": 9.988647371456614e-06, + "loss": 3.0116543769836426, + "step": 113 + }, + { + "epoch": 0.3665594855305466, + "grad_norm": 19.438695012900585, + "learning_rate": 9.987351476173352e-06, + "loss": 3.010406255722046, + "step": 114 + }, + { + "epoch": 0.36977491961414793, + "grad_norm": 6.079653142271714, + "learning_rate": 9.985985653826444e-06, + "loss": 1.9504810571670532, + "step": 115 + }, + { + "epoch": 0.3729903536977492, + "grad_norm": 13.795243277642982, + "learning_rate": 9.98454992356593e-06, + "loss": 2.93680739402771, + "step": 116 + }, + { + "epoch": 0.3762057877813505, + "grad_norm": 13.003967588877996, + "learning_rate": 9.983044305522007e-06, + "loss": 2.3677353858947754, + "step": 117 + }, + { + "epoch": 0.37942122186495175, + "grad_norm": 18.380499765458158, + "learning_rate": 9.981468820804774e-06, + "loss": 2.847960948944092, + "step": 118 + }, + { + "epoch": 0.38263665594855306, + "grad_norm": 9.899382867106105, + "learning_rate": 9.979823491503909e-06, + "loss": 3.0923283100128174, + "step": 119 + }, + { + "epoch": 0.3858520900321543, + "grad_norm": 28.34535742986171, + "learning_rate": 9.978108340688383e-06, + "loss": 3.020812511444092, + "step": 120 + }, + { + "epoch": 0.3890675241157556, + "grad_norm": 9.019875841237615, + "learning_rate": 9.976323392406122e-06, + "loss": 3.3838634490966797, + "step": 121 + }, + { + "epoch": 0.39228295819935693, + "grad_norm": 23.495664363464023, + "learning_rate": 9.974468671683673e-06, + "loss": 3.5906333923339844, + "step": 122 + }, + { + "epoch": 0.3954983922829582, + "grad_norm": 10.641890312294386, + "learning_rate": 9.972544204525853e-06, + "loss": 3.000471591949463, + "step": 123 + }, + { + "epoch": 0.3987138263665595, + "grad_norm": 9.496581002852425, + "learning_rate": 9.970550017915393e-06, + "loss": 2.9725592136383057, + "step": 124 + }, + { + "epoch": 0.40192926045016075, + "grad_norm": 20.913334058741135, + "learning_rate": 9.968486139812544e-06, + "loss": 3.160482406616211, + "step": 125 + }, + { + "epoch": 0.40514469453376206, + "grad_norm": 11.751298309185795, + "learning_rate": 9.966352599154697e-06, + "loss": 3.0642364025115967, + "step": 126 + }, + { + "epoch": 0.40836012861736337, + "grad_norm": 20.856740980507603, + "learning_rate": 9.964149425855971e-06, + "loss": 3.73250675201416, + "step": 127 + }, + { + "epoch": 0.4115755627009646, + "grad_norm": 9.151325730059728, + "learning_rate": 9.961876650806799e-06, + "loss": 2.632124900817871, + "step": 128 + }, + { + "epoch": 0.41479099678456594, + "grad_norm": 11.4247472974135, + "learning_rate": 9.95953430587349e-06, + "loss": 2.5810580253601074, + "step": 129 + }, + { + "epoch": 0.4180064308681672, + "grad_norm": 16.431822779962932, + "learning_rate": 9.957122423897786e-06, + "loss": 3.170461654663086, + "step": 130 + }, + { + "epoch": 0.4212218649517685, + "grad_norm": 21.943673264965508, + "learning_rate": 9.954641038696395e-06, + "loss": 3.044951915740967, + "step": 131 + }, + { + "epoch": 0.42443729903536975, + "grad_norm": 6.71332684009912, + "learning_rate": 9.952090185060528e-06, + "loss": 2.9257850646972656, + "step": 132 + }, + { + "epoch": 0.42765273311897106, + "grad_norm": 9.885222299724687, + "learning_rate": 9.9494698987554e-06, + "loss": 2.943833827972412, + "step": 133 + }, + { + "epoch": 0.43086816720257237, + "grad_norm": 11.16095425096583, + "learning_rate": 9.946780216519734e-06, + "loss": 3.022878646850586, + "step": 134 + }, + { + "epoch": 0.4340836012861736, + "grad_norm": 9.033329511785515, + "learning_rate": 9.944021176065247e-06, + "loss": 2.792724847793579, + "step": 135 + }, + { + "epoch": 0.43729903536977494, + "grad_norm": 13.435034889452208, + "learning_rate": 9.941192816076114e-06, + "loss": 3.35680890083313, + "step": 136 + }, + { + "epoch": 0.4405144694533762, + "grad_norm": 6.768294617543249, + "learning_rate": 9.938295176208441e-06, + "loss": 2.4593820571899414, + "step": 137 + }, + { + "epoch": 0.4437299035369775, + "grad_norm": 14.633684351350666, + "learning_rate": 9.93532829708969e-06, + "loss": 2.6255781650543213, + "step": 138 + }, + { + "epoch": 0.44694533762057875, + "grad_norm": 16.68694895086861, + "learning_rate": 9.932292220318121e-06, + "loss": 2.6132946014404297, + "step": 139 + }, + { + "epoch": 0.45016077170418006, + "grad_norm": 15.726897720613817, + "learning_rate": 9.929186988462208e-06, + "loss": 2.9322824478149414, + "step": 140 + }, + { + "epoch": 0.4533762057877814, + "grad_norm": 15.761243450650786, + "learning_rate": 9.926012645060037e-06, + "loss": 2.601005792617798, + "step": 141 + }, + { + "epoch": 0.4565916398713826, + "grad_norm": 36.62478666800511, + "learning_rate": 9.9227692346187e-06, + "loss": 2.5554819107055664, + "step": 142 + }, + { + "epoch": 0.45980707395498394, + "grad_norm": 12.187964367204458, + "learning_rate": 9.919456802613672e-06, + "loss": 2.5176918506622314, + "step": 143 + }, + { + "epoch": 0.4630225080385852, + "grad_norm": 17.390325809468337, + "learning_rate": 9.916075395488167e-06, + "loss": 2.341370105743408, + "step": 144 + }, + { + "epoch": 0.4662379421221865, + "grad_norm": 12.718168513574046, + "learning_rate": 9.912625060652496e-06, + "loss": 3.2781105041503906, + "step": 145 + }, + { + "epoch": 0.4694533762057878, + "grad_norm": 13.449199904281418, + "learning_rate": 9.909105846483394e-06, + "loss": 3.615126132965088, + "step": 146 + }, + { + "epoch": 0.47266881028938906, + "grad_norm": 8.050446777669299, + "learning_rate": 9.905517802323345e-06, + "loss": 2.8603837490081787, + "step": 147 + }, + { + "epoch": 0.4758842443729904, + "grad_norm": 12.943386027137832, + "learning_rate": 9.901860978479889e-06, + "loss": 2.465343952178955, + "step": 148 + }, + { + "epoch": 0.4790996784565916, + "grad_norm": 49.15991413291505, + "learning_rate": 9.898135426224923e-06, + "loss": 3.241806983947754, + "step": 149 + }, + { + "epoch": 0.48231511254019294, + "grad_norm": 17.678071991265508, + "learning_rate": 9.89434119779397e-06, + "loss": 3.292675018310547, + "step": 150 + }, + { + "epoch": 0.4855305466237942, + "grad_norm": 15.424220522820049, + "learning_rate": 9.89047834638546e-06, + "loss": 2.6159539222717285, + "step": 151 + }, + { + "epoch": 0.4887459807073955, + "grad_norm": 6.510280246958492, + "learning_rate": 9.886546926159972e-06, + "loss": 2.4839179515838623, + "step": 152 + }, + { + "epoch": 0.4919614147909968, + "grad_norm": 11.741972927177837, + "learning_rate": 9.882546992239483e-06, + "loss": 2.995469570159912, + "step": 153 + }, + { + "epoch": 0.49517684887459806, + "grad_norm": 13.630320770906891, + "learning_rate": 9.878478600706595e-06, + "loss": 3.255875587463379, + "step": 154 + }, + { + "epoch": 0.4983922829581994, + "grad_norm": 36.221839717193376, + "learning_rate": 9.87434180860374e-06, + "loss": 3.4806838035583496, + "step": 155 + }, + { + "epoch": 0.5016077170418006, + "grad_norm": 15.136927688596392, + "learning_rate": 9.87013667393239e-06, + "loss": 3.202141284942627, + "step": 156 + }, + { + "epoch": 0.5048231511254019, + "grad_norm": 21.095472411241253, + "learning_rate": 9.865863255652242e-06, + "loss": 2.8557021617889404, + "step": 157 + }, + { + "epoch": 0.5080385852090032, + "grad_norm": 9.121385453492111, + "learning_rate": 9.861521613680384e-06, + "loss": 2.9327592849731445, + "step": 158 + }, + { + "epoch": 0.5112540192926045, + "grad_norm": 8.35474008993999, + "learning_rate": 9.857111808890465e-06, + "loss": 3.2285704612731934, + "step": 159 + }, + { + "epoch": 0.5144694533762058, + "grad_norm": 14.002125724500594, + "learning_rate": 9.852633903111834e-06, + "loss": 2.7799017429351807, + "step": 160 + }, + { + "epoch": 0.5176848874598071, + "grad_norm": 12.65627846068601, + "learning_rate": 9.848087959128679e-06, + "loss": 2.5985677242279053, + "step": 161 + }, + { + "epoch": 0.5209003215434084, + "grad_norm": 14.086117584953953, + "learning_rate": 9.843474040679137e-06, + "loss": 3.1218137741088867, + "step": 162 + }, + { + "epoch": 0.5241157556270096, + "grad_norm": 8.527738044773436, + "learning_rate": 9.838792212454416e-06, + "loss": 2.367370367050171, + "step": 163 + }, + { + "epoch": 0.5273311897106109, + "grad_norm": 9.981924505220025, + "learning_rate": 9.834042540097875e-06, + "loss": 2.8083603382110596, + "step": 164 + }, + { + "epoch": 0.5305466237942122, + "grad_norm": 11.46020527117729, + "learning_rate": 9.829225090204102e-06, + "loss": 2.920241117477417, + "step": 165 + }, + { + "epoch": 0.5337620578778135, + "grad_norm": 12.469087819796588, + "learning_rate": 9.824339930317994e-06, + "loss": 2.6172094345092773, + "step": 166 + }, + { + "epoch": 0.5369774919614148, + "grad_norm": 20.922191531715136, + "learning_rate": 9.819387128933799e-06, + "loss": 3.1109981536865234, + "step": 167 + }, + { + "epoch": 0.5401929260450161, + "grad_norm": 15.632738580412314, + "learning_rate": 9.814366755494155e-06, + "loss": 3.153048038482666, + "step": 168 + }, + { + "epoch": 0.5434083601286174, + "grad_norm": 35.82397708735359, + "learning_rate": 9.809278880389126e-06, + "loss": 3.2571372985839844, + "step": 169 + }, + { + "epoch": 0.5466237942122186, + "grad_norm": 8.554787244670667, + "learning_rate": 9.804123574955202e-06, + "loss": 3.108255624771118, + "step": 170 + }, + { + "epoch": 0.5498392282958199, + "grad_norm": 7.383101110488588, + "learning_rate": 9.798900911474315e-06, + "loss": 3.0177969932556152, + "step": 171 + }, + { + "epoch": 0.5530546623794212, + "grad_norm": 11.293545889392764, + "learning_rate": 9.793610963172802e-06, + "loss": 2.795715093612671, + "step": 172 + }, + { + "epoch": 0.5562700964630225, + "grad_norm": 21.241938441534675, + "learning_rate": 9.78825380422041e-06, + "loss": 3.3412439823150635, + "step": 173 + }, + { + "epoch": 0.5594855305466238, + "grad_norm": 9.87643076192503, + "learning_rate": 9.78282950972922e-06, + "loss": 2.7117419242858887, + "step": 174 + }, + { + "epoch": 0.5627009646302251, + "grad_norm": 10.242385683099888, + "learning_rate": 9.77733815575263e-06, + "loss": 3.1940979957580566, + "step": 175 + }, + { + "epoch": 0.5659163987138264, + "grad_norm": 13.773999328309266, + "learning_rate": 9.771779819284257e-06, + "loss": 2.418674945831299, + "step": 176 + }, + { + "epoch": 0.5691318327974276, + "grad_norm": 18.81855085740899, + "learning_rate": 9.766154578256883e-06, + "loss": 2.819211959838867, + "step": 177 + }, + { + "epoch": 0.572347266881029, + "grad_norm": 18.832073296958836, + "learning_rate": 9.76046251154134e-06, + "loss": 3.260634183883667, + "step": 178 + }, + { + "epoch": 0.5755627009646302, + "grad_norm": 14.459091987874368, + "learning_rate": 9.754703698945425e-06, + "loss": 3.002488136291504, + "step": 179 + }, + { + "epoch": 0.5787781350482315, + "grad_norm": 9.482812229157702, + "learning_rate": 9.748878221212763e-06, + "loss": 3.066258668899536, + "step": 180 + }, + { + "epoch": 0.5819935691318328, + "grad_norm": 12.867585721735884, + "learning_rate": 9.742986160021688e-06, + "loss": 3.4351935386657715, + "step": 181 + }, + { + "epoch": 0.5852090032154341, + "grad_norm": 10.802517130903386, + "learning_rate": 9.73702759798409e-06, + "loss": 3.2599828243255615, + "step": 182 + }, + { + "epoch": 0.5884244372990354, + "grad_norm": 24.934825581625855, + "learning_rate": 9.731002618644265e-06, + "loss": 2.5718865394592285, + "step": 183 + }, + { + "epoch": 0.5916398713826366, + "grad_norm": 20.787769842276045, + "learning_rate": 9.724911306477729e-06, + "loss": 2.765693426132202, + "step": 184 + }, + { + "epoch": 0.594855305466238, + "grad_norm": 9.109874314786204, + "learning_rate": 9.71875374689005e-06, + "loss": 3.426023244857788, + "step": 185 + }, + { + "epoch": 0.5980707395498392, + "grad_norm": 21.279795009176986, + "learning_rate": 9.71253002621564e-06, + "loss": 3.001716375350952, + "step": 186 + }, + { + "epoch": 0.6012861736334405, + "grad_norm": 10.787659736048406, + "learning_rate": 9.706240231716549e-06, + "loss": 2.950758218765259, + "step": 187 + }, + { + "epoch": 0.6045016077170418, + "grad_norm": 12.733718582846585, + "learning_rate": 9.699884451581238e-06, + "loss": 3.2297606468200684, + "step": 188 + }, + { + "epoch": 0.6077170418006431, + "grad_norm": 12.774631438040222, + "learning_rate": 9.693462774923351e-06, + "loss": 4.231553077697754, + "step": 189 + }, + { + "epoch": 0.6109324758842444, + "grad_norm": 7.991710258799094, + "learning_rate": 9.686975291780449e-06, + "loss": 3.427185535430908, + "step": 190 + }, + { + "epoch": 0.6141479099678456, + "grad_norm": 8.091734891391763, + "learning_rate": 9.68042209311277e-06, + "loss": 3.104320526123047, + "step": 191 + }, + { + "epoch": 0.617363344051447, + "grad_norm": 9.939232309092867, + "learning_rate": 9.67380327080193e-06, + "loss": 2.5996108055114746, + "step": 192 + }, + { + "epoch": 0.6205787781350482, + "grad_norm": 15.107188854133135, + "learning_rate": 9.667118917649656e-06, + "loss": 2.6025047302246094, + "step": 193 + }, + { + "epoch": 0.6237942122186495, + "grad_norm": 13.565146098612988, + "learning_rate": 9.660369127376469e-06, + "loss": 2.7667369842529297, + "step": 194 + }, + { + "epoch": 0.6270096463022508, + "grad_norm": 12.27977824054795, + "learning_rate": 9.653553994620378e-06, + "loss": 2.9813175201416016, + "step": 195 + }, + { + "epoch": 0.6302250803858521, + "grad_norm": 16.18068581074389, + "learning_rate": 9.64667361493555e-06, + "loss": 3.094072103500366, + "step": 196 + }, + { + "epoch": 0.6334405144694534, + "grad_norm": 7.584418927697848, + "learning_rate": 9.639728084790976e-06, + "loss": 2.9591763019561768, + "step": 197 + }, + { + "epoch": 0.6366559485530546, + "grad_norm": 6.415026684299775, + "learning_rate": 9.632717501569106e-06, + "loss": 1.9626116752624512, + "step": 198 + }, + { + "epoch": 0.639871382636656, + "grad_norm": 21.919505536575453, + "learning_rate": 9.6256419635645e-06, + "loss": 3.0262200832366943, + "step": 199 + }, + { + "epoch": 0.6430868167202572, + "grad_norm": 9.532554532436253, + "learning_rate": 9.618501569982437e-06, + "loss": 2.373394012451172, + "step": 200 + }, + { + "epoch": 0.6463022508038585, + "grad_norm": 9.33134950216214, + "learning_rate": 9.611296420937526e-06, + "loss": 2.601590633392334, + "step": 201 + }, + { + "epoch": 0.6495176848874598, + "grad_norm": 13.972297231734744, + "learning_rate": 9.60402661745231e-06, + "loss": 3.3032302856445312, + "step": 202 + }, + { + "epoch": 0.6527331189710611, + "grad_norm": 12.123983986715679, + "learning_rate": 9.59669226145584e-06, + "loss": 2.55745267868042, + "step": 203 + }, + { + "epoch": 0.6559485530546624, + "grad_norm": 13.044678435205267, + "learning_rate": 9.589293455782253e-06, + "loss": 3.162076950073242, + "step": 204 + }, + { + "epoch": 0.6591639871382636, + "grad_norm": 12.717015520085477, + "learning_rate": 9.581830304169325e-06, + "loss": 3.104783535003662, + "step": 205 + }, + { + "epoch": 0.662379421221865, + "grad_norm": 10.909068293638112, + "learning_rate": 9.574302911257021e-06, + "loss": 1.5882987976074219, + "step": 206 + }, + { + "epoch": 0.6655948553054662, + "grad_norm": 16.856966099264525, + "learning_rate": 9.566711382586022e-06, + "loss": 3.12563157081604, + "step": 207 + }, + { + "epoch": 0.6688102893890675, + "grad_norm": 8.178490711172556, + "learning_rate": 9.559055824596252e-06, + "loss": 2.5417189598083496, + "step": 208 + }, + { + "epoch": 0.6720257234726688, + "grad_norm": 24.246438880890675, + "learning_rate": 9.551336344625387e-06, + "loss": 3.221043109893799, + "step": 209 + }, + { + "epoch": 0.6752411575562701, + "grad_norm": 8.34315714768651, + "learning_rate": 9.543553050907332e-06, + "loss": 2.767820358276367, + "step": 210 + }, + { + "epoch": 0.6784565916398714, + "grad_norm": 28.687273320967922, + "learning_rate": 9.53570605257073e-06, + "loss": 4.299283027648926, + "step": 211 + }, + { + "epoch": 0.6816720257234726, + "grad_norm": 9.232214067792807, + "learning_rate": 9.527795459637413e-06, + "loss": 2.867112398147583, + "step": 212 + }, + { + "epoch": 0.684887459807074, + "grad_norm": 14.46539808412858, + "learning_rate": 9.519821383020866e-06, + "loss": 2.6538944244384766, + "step": 213 + }, + { + "epoch": 0.6881028938906752, + "grad_norm": 16.762492706972427, + "learning_rate": 9.511783934524674e-06, + "loss": 3.379420280456543, + "step": 214 + }, + { + "epoch": 0.6913183279742765, + "grad_norm": 11.417167539665027, + "learning_rate": 9.503683226840948e-06, + "loss": 2.9065451622009277, + "step": 215 + }, + { + "epoch": 0.6945337620578779, + "grad_norm": 8.688764296327069, + "learning_rate": 9.495519373548748e-06, + "loss": 2.9622750282287598, + "step": 216 + }, + { + "epoch": 0.6977491961414791, + "grad_norm": 17.306753656078968, + "learning_rate": 9.487292489112497e-06, + "loss": 3.1384975910186768, + "step": 217 + }, + { + "epoch": 0.7009646302250804, + "grad_norm": 9.278446527178623, + "learning_rate": 9.479002688880362e-06, + "loss": 3.00459623336792, + "step": 218 + }, + { + "epoch": 0.7041800643086816, + "grad_norm": 8.829166062791868, + "learning_rate": 9.470650089082649e-06, + "loss": 3.349134922027588, + "step": 219 + }, + { + "epoch": 0.707395498392283, + "grad_norm": 8.252383756187383, + "learning_rate": 9.462234806830172e-06, + "loss": 2.8735666275024414, + "step": 220 + }, + { + "epoch": 0.7106109324758842, + "grad_norm": 10.10461049277595, + "learning_rate": 9.453756960112605e-06, + "loss": 2.7908334732055664, + "step": 221 + }, + { + "epoch": 0.7138263665594855, + "grad_norm": 11.263066058506535, + "learning_rate": 9.445216667796833e-06, + "loss": 2.5783145427703857, + "step": 222 + }, + { + "epoch": 0.7170418006430869, + "grad_norm": 19.60227612362862, + "learning_rate": 9.436614049625277e-06, + "loss": 2.7622079849243164, + "step": 223 + }, + { + "epoch": 0.7202572347266881, + "grad_norm": 20.895857573617988, + "learning_rate": 9.42794922621423e-06, + "loss": 3.2321324348449707, + "step": 224 + }, + { + "epoch": 0.7234726688102894, + "grad_norm": 23.420564933331203, + "learning_rate": 9.419222319052154e-06, + "loss": 2.5579419136047363, + "step": 225 + }, + { + "epoch": 0.7266881028938906, + "grad_norm": 12.853985003804183, + "learning_rate": 9.410433450497977e-06, + "loss": 3.260056495666504, + "step": 226 + }, + { + "epoch": 0.729903536977492, + "grad_norm": 11.182131927651461, + "learning_rate": 9.401582743779384e-06, + "loss": 2.7521541118621826, + "step": 227 + }, + { + "epoch": 0.7331189710610932, + "grad_norm": 19.802196143776204, + "learning_rate": 9.392670322991085e-06, + "loss": 2.986008644104004, + "step": 228 + }, + { + "epoch": 0.7363344051446945, + "grad_norm": 10.66119164095427, + "learning_rate": 9.383696313093073e-06, + "loss": 3.1167283058166504, + "step": 229 + }, + { + "epoch": 0.7395498392282959, + "grad_norm": 8.87831230718112, + "learning_rate": 9.374660839908881e-06, + "loss": 3.0863423347473145, + "step": 230 + }, + { + "epoch": 0.7427652733118971, + "grad_norm": 16.804824808938967, + "learning_rate": 9.365564030123802e-06, + "loss": 3.310753345489502, + "step": 231 + }, + { + "epoch": 0.7459807073954984, + "grad_norm": 14.686714392325005, + "learning_rate": 9.356406011283128e-06, + "loss": 2.494666576385498, + "step": 232 + }, + { + "epoch": 0.7491961414790996, + "grad_norm": 12.638367443704174, + "learning_rate": 9.34718691179036e-06, + "loss": 3.037945508956909, + "step": 233 + }, + { + "epoch": 0.752411575562701, + "grad_norm": 26.920374247029958, + "learning_rate": 9.337906860905394e-06, + "loss": 3.584845542907715, + "step": 234 + }, + { + "epoch": 0.7556270096463023, + "grad_norm": 6.708394075133507, + "learning_rate": 9.328565988742723e-06, + "loss": 2.984691619873047, + "step": 235 + }, + { + "epoch": 0.7588424437299035, + "grad_norm": 11.729278792048037, + "learning_rate": 9.31916442626961e-06, + "loss": 2.795103073120117, + "step": 236 + }, + { + "epoch": 0.7620578778135049, + "grad_norm": 7.697881178490154, + "learning_rate": 9.30970230530425e-06, + "loss": 2.5230283737182617, + "step": 237 + }, + { + "epoch": 0.7652733118971061, + "grad_norm": 8.848418169366562, + "learning_rate": 9.300179758513912e-06, + "loss": 2.849795341491699, + "step": 238 + }, + { + "epoch": 0.7684887459807074, + "grad_norm": 9.64788428575172, + "learning_rate": 9.290596919413101e-06, + "loss": 2.4834206104278564, + "step": 239 + }, + { + "epoch": 0.7717041800643086, + "grad_norm": 11.151899506614933, + "learning_rate": 9.280953922361667e-06, + "loss": 2.7844467163085938, + "step": 240 + }, + { + "epoch": 0.77491961414791, + "grad_norm": 13.100289974037779, + "learning_rate": 9.271250902562925e-06, + "loss": 3.224181890487671, + "step": 241 + }, + { + "epoch": 0.7781350482315113, + "grad_norm": 11.686317061932591, + "learning_rate": 9.26148799606177e-06, + "loss": 2.7271580696105957, + "step": 242 + }, + { + "epoch": 0.7813504823151125, + "grad_norm": 17.10133837407015, + "learning_rate": 9.251665339742751e-06, + "loss": 2.834172248840332, + "step": 243 + }, + { + "epoch": 0.7845659163987139, + "grad_norm": 9.175423032974624, + "learning_rate": 9.24178307132817e-06, + "loss": 2.8642072677612305, + "step": 244 + }, + { + "epoch": 0.7877813504823151, + "grad_norm": 10.563682693957642, + "learning_rate": 9.231841329376142e-06, + "loss": 2.961083173751831, + "step": 245 + }, + { + "epoch": 0.7909967845659164, + "grad_norm": 8.694998203842605, + "learning_rate": 9.22184025327865e-06, + "loss": 3.1648690700531006, + "step": 246 + }, + { + "epoch": 0.7942122186495176, + "grad_norm": 13.321968130939467, + "learning_rate": 9.211779983259597e-06, + "loss": 2.65283203125, + "step": 247 + }, + { + "epoch": 0.797427652733119, + "grad_norm": 8.370015100382718, + "learning_rate": 9.201660660372835e-06, + "loss": 2.8571534156799316, + "step": 248 + }, + { + "epoch": 0.8006430868167203, + "grad_norm": 13.1283440171712, + "learning_rate": 9.191482426500192e-06, + "loss": 3.076096534729004, + "step": 249 + }, + { + "epoch": 0.8038585209003215, + "grad_norm": 12.58763191241908, + "learning_rate": 9.181245424349477e-06, + "loss": 3.0239181518554688, + "step": 250 + }, + { + "epoch": 0.8070739549839229, + "grad_norm": 10.995319596435253, + "learning_rate": 9.170949797452481e-06, + "loss": 2.6239326000213623, + "step": 251 + }, + { + "epoch": 0.8102893890675241, + "grad_norm": 9.225322055367544, + "learning_rate": 9.160595690162974e-06, + "loss": 2.5555763244628906, + "step": 252 + }, + { + "epoch": 0.8135048231511254, + "grad_norm": 11.507809934321713, + "learning_rate": 9.15018324765466e-06, + "loss": 2.371147871017456, + "step": 253 + }, + { + "epoch": 0.8167202572347267, + "grad_norm": 12.469884255360506, + "learning_rate": 9.139712615919163e-06, + "loss": 2.494548797607422, + "step": 254 + }, + { + "epoch": 0.819935691318328, + "grad_norm": 10.895498922897257, + "learning_rate": 9.129183941763971e-06, + "loss": 2.974158525466919, + "step": 255 + }, + { + "epoch": 0.8231511254019293, + "grad_norm": 9.163142573864613, + "learning_rate": 9.118597372810374e-06, + "loss": 2.792419672012329, + "step": 256 + }, + { + "epoch": 0.8263665594855305, + "grad_norm": 7.256984326103208, + "learning_rate": 9.107953057491399e-06, + "loss": 2.502934694290161, + "step": 257 + }, + { + "epoch": 0.8295819935691319, + "grad_norm": 9.28895883091526, + "learning_rate": 9.09725114504973e-06, + "loss": 3.282193899154663, + "step": 258 + }, + { + "epoch": 0.8327974276527331, + "grad_norm": 12.653451743754152, + "learning_rate": 9.086491785535613e-06, + "loss": 2.6221415996551514, + "step": 259 + }, + { + "epoch": 0.8360128617363344, + "grad_norm": 12.157053154731505, + "learning_rate": 9.07567512980475e-06, + "loss": 2.594520330429077, + "step": 260 + }, + { + "epoch": 0.8392282958199357, + "grad_norm": 16.45711676935307, + "learning_rate": 9.064801329516192e-06, + "loss": 3.279817819595337, + "step": 261 + }, + { + "epoch": 0.842443729903537, + "grad_norm": 13.973378629771124, + "learning_rate": 9.053870537130198e-06, + "loss": 3.408698558807373, + "step": 262 + }, + { + "epoch": 0.8456591639871383, + "grad_norm": 16.62805609651466, + "learning_rate": 9.042882905906118e-06, + "loss": 2.918642997741699, + "step": 263 + }, + { + "epoch": 0.8488745980707395, + "grad_norm": 9.44966384136106, + "learning_rate": 9.03183858990022e-06, + "loss": 2.65049409866333, + "step": 264 + }, + { + "epoch": 0.8520900321543409, + "grad_norm": 13.363066970747031, + "learning_rate": 9.020737743963555e-06, + "loss": 3.2480692863464355, + "step": 265 + }, + { + "epoch": 0.8553054662379421, + "grad_norm": 12.666421815128665, + "learning_rate": 9.009580523739763e-06, + "loss": 3.1771302223205566, + "step": 266 + }, + { + "epoch": 0.8585209003215434, + "grad_norm": 9.17254667692124, + "learning_rate": 8.998367085662908e-06, + "loss": 2.5076744556427, + "step": 267 + }, + { + "epoch": 0.8617363344051447, + "grad_norm": 10.593922036651545, + "learning_rate": 8.987097586955276e-06, + "loss": 2.7998993396759033, + "step": 268 + }, + { + "epoch": 0.864951768488746, + "grad_norm": 14.910537978617421, + "learning_rate": 8.97577218562517e-06, + "loss": 2.735177755355835, + "step": 269 + }, + { + "epoch": 0.8681672025723473, + "grad_norm": 7.437460777250785, + "learning_rate": 8.964391040464699e-06, + "loss": 2.8440442085266113, + "step": 270 + }, + { + "epoch": 0.8713826366559485, + "grad_norm": 9.856724829654736, + "learning_rate": 8.952954311047554e-06, + "loss": 3.18597674369812, + "step": 271 + }, + { + "epoch": 0.8745980707395499, + "grad_norm": 12.798619966866845, + "learning_rate": 8.941462157726757e-06, + "loss": 2.6696226596832275, + "step": 272 + }, + { + "epoch": 0.8778135048231511, + "grad_norm": 10.558246728168221, + "learning_rate": 8.92991474163243e-06, + "loss": 3.1313624382019043, + "step": 273 + }, + { + "epoch": 0.8810289389067524, + "grad_norm": 73.1769401363515, + "learning_rate": 8.918312224669523e-06, + "loss": 2.7278852462768555, + "step": 274 + }, + { + "epoch": 0.8842443729903537, + "grad_norm": 14.250721035805938, + "learning_rate": 8.906654769515551e-06, + "loss": 3.123018264770508, + "step": 275 + }, + { + "epoch": 0.887459807073955, + "grad_norm": 13.624305251997455, + "learning_rate": 8.89494253961831e-06, + "loss": 2.5999131202697754, + "step": 276 + }, + { + "epoch": 0.8906752411575563, + "grad_norm": 7.8025103094357, + "learning_rate": 8.883175699193589e-06, + "loss": 3.3754043579101562, + "step": 277 + }, + { + "epoch": 0.8938906752411575, + "grad_norm": 14.813200674717987, + "learning_rate": 8.871354413222859e-06, + "loss": 2.477343797683716, + "step": 278 + }, + { + "epoch": 0.8971061093247589, + "grad_norm": 16.927933274503733, + "learning_rate": 8.85947884745097e-06, + "loss": 3.132758140563965, + "step": 279 + }, + { + "epoch": 0.9003215434083601, + "grad_norm": 7.191780346342989, + "learning_rate": 8.847549168383823e-06, + "loss": 2.820451021194458, + "step": 280 + }, + { + "epoch": 0.9035369774919614, + "grad_norm": 8.389707727309089, + "learning_rate": 8.835565543286031e-06, + "loss": 2.6014206409454346, + "step": 281 + }, + { + "epoch": 0.9067524115755627, + "grad_norm": 11.378077478241833, + "learning_rate": 8.82352814017858e-06, + "loss": 2.541848659515381, + "step": 282 + }, + { + "epoch": 0.909967845659164, + "grad_norm": 8.237217625528718, + "learning_rate": 8.811437127836477e-06, + "loss": 2.961427688598633, + "step": 283 + }, + { + "epoch": 0.9131832797427653, + "grad_norm": 14.816053043362626, + "learning_rate": 8.799292675786365e-06, + "loss": 2.8450682163238525, + "step": 284 + }, + { + "epoch": 0.9163987138263665, + "grad_norm": 16.057064979343828, + "learning_rate": 8.787094954304172e-06, + "loss": 3.3827338218688965, + "step": 285 + }, + { + "epoch": 0.9196141479099679, + "grad_norm": 10.580135043478649, + "learning_rate": 8.7748441344127e-06, + "loss": 2.749037265777588, + "step": 286 + }, + { + "epoch": 0.9228295819935691, + "grad_norm": 13.251352277326397, + "learning_rate": 8.762540387879245e-06, + "loss": 2.7875254154205322, + "step": 287 + }, + { + "epoch": 0.9260450160771704, + "grad_norm": 12.609710642983892, + "learning_rate": 8.75018388721318e-06, + "loss": 2.7887279987335205, + "step": 288 + }, + { + "epoch": 0.9292604501607717, + "grad_norm": 14.143996593057057, + "learning_rate": 8.73777480566353e-06, + "loss": 3.5317232608795166, + "step": 289 + }, + { + "epoch": 0.932475884244373, + "grad_norm": 10.953951580514532, + "learning_rate": 8.725313317216558e-06, + "loss": 2.695559024810791, + "step": 290 + }, + { + "epoch": 0.9356913183279743, + "grad_norm": 19.652789221795818, + "learning_rate": 8.712799596593317e-06, + "loss": 3.773618698120117, + "step": 291 + }, + { + "epoch": 0.9389067524115756, + "grad_norm": 28.646917195900233, + "learning_rate": 8.7002338192472e-06, + "loss": 2.8618407249450684, + "step": 292 + }, + { + "epoch": 0.9421221864951769, + "grad_norm": 14.650325645648199, + "learning_rate": 8.68761616136148e-06, + "loss": 2.663022518157959, + "step": 293 + }, + { + "epoch": 0.9453376205787781, + "grad_norm": 14.205451486366611, + "learning_rate": 8.674946799846844e-06, + "loss": 2.731468677520752, + "step": 294 + }, + { + "epoch": 0.9485530546623794, + "grad_norm": 12.68853285402269, + "learning_rate": 8.662225912338906e-06, + "loss": 3.047337293624878, + "step": 295 + }, + { + "epoch": 0.9517684887459807, + "grad_norm": 9.93366549634143, + "learning_rate": 8.64945367719572e-06, + "loss": 2.7521162033081055, + "step": 296 + }, + { + "epoch": 0.954983922829582, + "grad_norm": 9.799333221523396, + "learning_rate": 8.636630273495284e-06, + "loss": 2.8862874507904053, + "step": 297 + }, + { + "epoch": 0.9581993569131833, + "grad_norm": 9.853206555821423, + "learning_rate": 8.623755881033016e-06, + "loss": 2.4406180381774902, + "step": 298 + }, + { + "epoch": 0.9614147909967846, + "grad_norm": 15.424788267085962, + "learning_rate": 8.61083068031925e-06, + "loss": 2.9202933311462402, + "step": 299 + }, + { + "epoch": 0.9646302250803859, + "grad_norm": 8.470590376610176, + "learning_rate": 8.59785485257669e-06, + "loss": 2.7283151149749756, + "step": 300 + }, + { + "epoch": 0.9678456591639871, + "grad_norm": 11.638302591399707, + "learning_rate": 8.58482857973788e-06, + "loss": 3.007424831390381, + "step": 301 + }, + { + "epoch": 0.9710610932475884, + "grad_norm": 20.741792977624527, + "learning_rate": 8.571752044442645e-06, + "loss": 3.5147528648376465, + "step": 302 + }, + { + "epoch": 0.9742765273311897, + "grad_norm": 13.142922472996343, + "learning_rate": 8.558625430035537e-06, + "loss": 2.6513309478759766, + "step": 303 + }, + { + "epoch": 0.977491961414791, + "grad_norm": 16.4573242676752, + "learning_rate": 8.54544892056326e-06, + "loss": 2.8666458129882812, + "step": 304 + }, + { + "epoch": 0.9807073954983923, + "grad_norm": 11.252470074853187, + "learning_rate": 8.53222270077209e-06, + "loss": 3.155954360961914, + "step": 305 + }, + { + "epoch": 0.9839228295819936, + "grad_norm": 9.53791139505571, + "learning_rate": 8.518946956105288e-06, + "loss": 2.705427646636963, + "step": 306 + }, + { + "epoch": 0.9871382636655949, + "grad_norm": 8.995699850871455, + "learning_rate": 8.505621872700493e-06, + "loss": 2.7899868488311768, + "step": 307 + }, + { + "epoch": 0.9903536977491961, + "grad_norm": 20.466444028142707, + "learning_rate": 8.492247637387123e-06, + "loss": 3.3897032737731934, + "step": 308 + }, + { + "epoch": 0.9935691318327974, + "grad_norm": 24.429378092820443, + "learning_rate": 8.478824437683742e-06, + "loss": 2.9029016494750977, + "step": 309 + }, + { + "epoch": 0.9967845659163987, + "grad_norm": 14.9356195741505, + "learning_rate": 8.465352461795443e-06, + "loss": 2.8247499465942383, + "step": 310 + }, + { + "epoch": 1.0, + "grad_norm": 6.9393031111913785, + "learning_rate": 8.451831898611202e-06, + "loss": 1.8072712421417236, + "step": 311 + }, + { + "epoch": 1.0032154340836013, + "grad_norm": 15.619600920043508, + "learning_rate": 8.438262937701232e-06, + "loss": 1.6938456296920776, + "step": 312 + }, + { + "epoch": 1.0064308681672025, + "grad_norm": 11.204028481476685, + "learning_rate": 8.424645769314324e-06, + "loss": 1.7548950910568237, + "step": 313 + }, + { + "epoch": 1.0096463022508038, + "grad_norm": 11.12284902940604, + "learning_rate": 8.410980584375184e-06, + "loss": 1.2501029968261719, + "step": 314 + }, + { + "epoch": 1.0128617363344052, + "grad_norm": 15.282718019978093, + "learning_rate": 8.397267574481746e-06, + "loss": 1.7188260555267334, + "step": 315 + }, + { + "epoch": 1.0160771704180065, + "grad_norm": 21.75639670646837, + "learning_rate": 8.3835069319025e-06, + "loss": 1.522129774093628, + "step": 316 + }, + { + "epoch": 1.0192926045016077, + "grad_norm": 24.92647139454926, + "learning_rate": 8.369698849573778e-06, + "loss": 1.9183681011199951, + "step": 317 + }, + { + "epoch": 1.022508038585209, + "grad_norm": 12.732330001189787, + "learning_rate": 8.355843521097071e-06, + "loss": 1.1024775505065918, + "step": 318 + }, + { + "epoch": 1.0257234726688103, + "grad_norm": 10.813488074904523, + "learning_rate": 8.341941140736292e-06, + "loss": 1.8346397876739502, + "step": 319 + }, + { + "epoch": 1.0289389067524115, + "grad_norm": 13.745993996373599, + "learning_rate": 8.327991903415071e-06, + "loss": 1.5781948566436768, + "step": 320 + }, + { + "epoch": 1.0321543408360128, + "grad_norm": 10.618379447829842, + "learning_rate": 8.313996004714007e-06, + "loss": 2.0449671745300293, + "step": 321 + }, + { + "epoch": 1.0353697749196142, + "grad_norm": 30.15610599886493, + "learning_rate": 8.29995364086794e-06, + "loss": 1.843071460723877, + "step": 322 + }, + { + "epoch": 1.0385852090032155, + "grad_norm": 11.641323891727778, + "learning_rate": 8.285865008763185e-06, + "loss": 1.9475151300430298, + "step": 323 + }, + { + "epoch": 1.0418006430868167, + "grad_norm": 10.055339873855122, + "learning_rate": 8.271730305934781e-06, + "loss": 1.6771236658096313, + "step": 324 + }, + { + "epoch": 1.045016077170418, + "grad_norm": 19.242811918496102, + "learning_rate": 8.257549730563726e-06, + "loss": 1.799647331237793, + "step": 325 + }, + { + "epoch": 1.0482315112540193, + "grad_norm": 9.640296031149678, + "learning_rate": 8.24332348147418e-06, + "loss": 1.5140706300735474, + "step": 326 + }, + { + "epoch": 1.0514469453376205, + "grad_norm": 17.52024512085222, + "learning_rate": 8.229051758130697e-06, + "loss": 1.7540702819824219, + "step": 327 + }, + { + "epoch": 1.0546623794212218, + "grad_norm": 12.09956162787055, + "learning_rate": 8.214734760635418e-06, + "loss": 1.0229500532150269, + "step": 328 + }, + { + "epoch": 1.0578778135048232, + "grad_norm": 9.27508841346202, + "learning_rate": 8.200372689725265e-06, + "loss": 1.9475460052490234, + "step": 329 + }, + { + "epoch": 1.0610932475884245, + "grad_norm": 19.21666339915877, + "learning_rate": 8.185965746769134e-06, + "loss": 1.4429758787155151, + "step": 330 + }, + { + "epoch": 1.0643086816720257, + "grad_norm": 13.399563404218933, + "learning_rate": 8.171514133765062e-06, + "loss": 2.7338225841522217, + "step": 331 + }, + { + "epoch": 1.067524115755627, + "grad_norm": 13.76033185328455, + "learning_rate": 8.157018053337401e-06, + "loss": 2.250379800796509, + "step": 332 + }, + { + "epoch": 1.0707395498392283, + "grad_norm": 7.631778726340121, + "learning_rate": 8.142477708733977e-06, + "loss": 1.337722659111023, + "step": 333 + }, + { + "epoch": 1.0739549839228295, + "grad_norm": 13.880528130342933, + "learning_rate": 8.127893303823237e-06, + "loss": 1.0721861124038696, + "step": 334 + }, + { + "epoch": 1.077170418006431, + "grad_norm": 12.862974801515321, + "learning_rate": 8.113265043091393e-06, + "loss": 1.4026358127593994, + "step": 335 + }, + { + "epoch": 1.0803858520900322, + "grad_norm": 9.49382053942382, + "learning_rate": 8.098593131639555e-06, + "loss": 1.9554322957992554, + "step": 336 + }, + { + "epoch": 1.0836012861736335, + "grad_norm": 9.28245720635116, + "learning_rate": 8.083877775180851e-06, + "loss": 1.3829636573791504, + "step": 337 + }, + { + "epoch": 1.0868167202572347, + "grad_norm": 9.50422506559844, + "learning_rate": 8.06911918003755e-06, + "loss": 1.4771521091461182, + "step": 338 + }, + { + "epoch": 1.090032154340836, + "grad_norm": 14.885902266047808, + "learning_rate": 8.054317553138164e-06, + "loss": 1.9953043460845947, + "step": 339 + }, + { + "epoch": 1.0932475884244373, + "grad_norm": 21.84891512532416, + "learning_rate": 8.039473102014552e-06, + "loss": 1.8568346500396729, + "step": 340 + }, + { + "epoch": 1.0964630225080385, + "grad_norm": 10.983885337102643, + "learning_rate": 8.024586034798998e-06, + "loss": 1.0092703104019165, + "step": 341 + }, + { + "epoch": 1.09967845659164, + "grad_norm": 9.853494378778448, + "learning_rate": 8.00965656022131e-06, + "loss": 1.3574286699295044, + "step": 342 + }, + { + "epoch": 1.1028938906752412, + "grad_norm": 9.677987937855264, + "learning_rate": 7.994684887605877e-06, + "loss": 1.5812814235687256, + "step": 343 + }, + { + "epoch": 1.1061093247588425, + "grad_norm": 12.943696081924468, + "learning_rate": 7.97967122686875e-06, + "loss": 1.5962257385253906, + "step": 344 + }, + { + "epoch": 1.1093247588424437, + "grad_norm": 11.101835108804991, + "learning_rate": 7.964615788514683e-06, + "loss": 1.9959800243377686, + "step": 345 + }, + { + "epoch": 1.112540192926045, + "grad_norm": 9.080973101963357, + "learning_rate": 7.949518783634191e-06, + "loss": 1.4967670440673828, + "step": 346 + }, + { + "epoch": 1.1157556270096463, + "grad_norm": 13.434521540385626, + "learning_rate": 7.934380423900591e-06, + "loss": 1.5423383712768555, + "step": 347 + }, + { + "epoch": 1.1189710610932475, + "grad_norm": 10.76431702775392, + "learning_rate": 7.919200921567029e-06, + "loss": 0.7710652351379395, + "step": 348 + }, + { + "epoch": 1.122186495176849, + "grad_norm": 9.814715538896447, + "learning_rate": 7.903980489463507e-06, + "loss": 1.6633532047271729, + "step": 349 + }, + { + "epoch": 1.1254019292604502, + "grad_norm": 19.545538078520554, + "learning_rate": 7.8887193409939e-06, + "loss": 2.7766261100769043, + "step": 350 + }, + { + "epoch": 1.1286173633440515, + "grad_norm": 9.41961139939792, + "learning_rate": 7.87341769013296e-06, + "loss": 1.6282963752746582, + "step": 351 + }, + { + "epoch": 1.1318327974276527, + "grad_norm": 21.731938311350465, + "learning_rate": 7.858075751423319e-06, + "loss": 1.9692919254302979, + "step": 352 + }, + { + "epoch": 1.135048231511254, + "grad_norm": 16.44935380857812, + "learning_rate": 7.84269373997248e-06, + "loss": 1.912774682044983, + "step": 353 + }, + { + "epoch": 1.1382636655948553, + "grad_norm": 13.72564993302605, + "learning_rate": 7.827271871449803e-06, + "loss": 1.8513641357421875, + "step": 354 + }, + { + "epoch": 1.1414790996784565, + "grad_norm": 14.127395536555673, + "learning_rate": 7.811810362083476e-06, + "loss": 1.7648791074752808, + "step": 355 + }, + { + "epoch": 1.144694533762058, + "grad_norm": 11.466500120151354, + "learning_rate": 7.79630942865749e-06, + "loss": 1.8777326345443726, + "step": 356 + }, + { + "epoch": 1.1479099678456592, + "grad_norm": 16.23925105600561, + "learning_rate": 7.780769288508594e-06, + "loss": 1.4060571193695068, + "step": 357 + }, + { + "epoch": 1.1511254019292605, + "grad_norm": 13.429215394228326, + "learning_rate": 7.76519015952325e-06, + "loss": 1.8052624464035034, + "step": 358 + }, + { + "epoch": 1.1543408360128617, + "grad_norm": 11.074667268910995, + "learning_rate": 7.749572260134578e-06, + "loss": 1.251237392425537, + "step": 359 + }, + { + "epoch": 1.157556270096463, + "grad_norm": 9.520190196974967, + "learning_rate": 7.733915809319295e-06, + "loss": 1.7426929473876953, + "step": 360 + }, + { + "epoch": 1.1607717041800643, + "grad_norm": 13.259610020931154, + "learning_rate": 7.718221026594638e-06, + "loss": 1.3306620121002197, + "step": 361 + }, + { + "epoch": 1.1639871382636655, + "grad_norm": 11.55865396171068, + "learning_rate": 7.7024881320153e-06, + "loss": 1.1201272010803223, + "step": 362 + }, + { + "epoch": 1.167202572347267, + "grad_norm": 18.047432249398582, + "learning_rate": 7.686717346170323e-06, + "loss": 1.2232224941253662, + "step": 363 + }, + { + "epoch": 1.1704180064308682, + "grad_norm": 19.950356361520544, + "learning_rate": 7.67090889018003e-06, + "loss": 1.5430934429168701, + "step": 364 + }, + { + "epoch": 1.1736334405144695, + "grad_norm": 11.821218809655774, + "learning_rate": 7.655062985692905e-06, + "loss": 1.1287882328033447, + "step": 365 + }, + { + "epoch": 1.1768488745980707, + "grad_norm": 16.3870537176483, + "learning_rate": 7.639179854882499e-06, + "loss": 1.8235803842544556, + "step": 366 + }, + { + "epoch": 1.180064308681672, + "grad_norm": 12.215851800288341, + "learning_rate": 7.623259720444305e-06, + "loss": 1.5709896087646484, + "step": 367 + }, + { + "epoch": 1.1832797427652733, + "grad_norm": 10.179196448113856, + "learning_rate": 7.6073028055926375e-06, + "loss": 1.3701753616333008, + "step": 368 + }, + { + "epoch": 1.1864951768488745, + "grad_norm": 13.266482945914282, + "learning_rate": 7.591309334057511e-06, + "loss": 1.8047711849212646, + "step": 369 + }, + { + "epoch": 1.189710610932476, + "grad_norm": 12.558221362384941, + "learning_rate": 7.5752795300814915e-06, + "loss": 1.3536028861999512, + "step": 370 + }, + { + "epoch": 1.1929260450160772, + "grad_norm": 8.288395487014714, + "learning_rate": 7.5592136184165586e-06, + "loss": 1.7526684999465942, + "step": 371 + }, + { + "epoch": 1.1961414790996785, + "grad_norm": 32.312646691375775, + "learning_rate": 7.543111824320956e-06, + "loss": 1.9506163597106934, + "step": 372 + }, + { + "epoch": 1.1993569131832797, + "grad_norm": 18.190358055738578, + "learning_rate": 7.526974373556031e-06, + "loss": 1.4558610916137695, + "step": 373 + }, + { + "epoch": 1.202572347266881, + "grad_norm": 22.456941575166663, + "learning_rate": 7.510801492383064e-06, + "loss": 3.8982784748077393, + "step": 374 + }, + { + "epoch": 1.2057877813504823, + "grad_norm": 9.286565530142028, + "learning_rate": 7.494593407560105e-06, + "loss": 1.4054985046386719, + "step": 375 + }, + { + "epoch": 1.2090032154340835, + "grad_norm": 8.57272869781517, + "learning_rate": 7.4783503463387915e-06, + "loss": 1.4290287494659424, + "step": 376 + }, + { + "epoch": 1.212218649517685, + "grad_norm": 25.62750132146527, + "learning_rate": 7.462072536461158e-06, + "loss": 2.6208114624023438, + "step": 377 + }, + { + "epoch": 1.2154340836012862, + "grad_norm": 29.086655268807295, + "learning_rate": 7.445760206156443e-06, + "loss": 1.4879982471466064, + "step": 378 + }, + { + "epoch": 1.2186495176848875, + "grad_norm": 10.92323075616016, + "learning_rate": 7.429413584137899e-06, + "loss": 1.5636539459228516, + "step": 379 + }, + { + "epoch": 1.2218649517684887, + "grad_norm": 131.58390317452395, + "learning_rate": 7.413032899599575e-06, + "loss": 2.5596606731414795, + "step": 380 + }, + { + "epoch": 1.22508038585209, + "grad_norm": 10.911077112816377, + "learning_rate": 7.3966183822131055e-06, + "loss": 1.7480653524398804, + "step": 381 + }, + { + "epoch": 1.2282958199356913, + "grad_norm": 14.750459586463144, + "learning_rate": 7.380170262124491e-06, + "loss": 1.1983137130737305, + "step": 382 + }, + { + "epoch": 1.2315112540192925, + "grad_norm": 10.193526385520498, + "learning_rate": 7.363688769950874e-06, + "loss": 2.4147493839263916, + "step": 383 + }, + { + "epoch": 1.234726688102894, + "grad_norm": 8.110765389360624, + "learning_rate": 7.347174136777303e-06, + "loss": 1.5930522680282593, + "step": 384 + }, + { + "epoch": 1.2379421221864952, + "grad_norm": 13.379838012098837, + "learning_rate": 7.33062659415349e-06, + "loss": 1.545811414718628, + "step": 385 + }, + { + "epoch": 1.2411575562700965, + "grad_norm": 12.491186334322332, + "learning_rate": 7.314046374090569e-06, + "loss": 1.4453141689300537, + "step": 386 + }, + { + "epoch": 1.2443729903536977, + "grad_norm": 11.673353502682332, + "learning_rate": 7.297433709057837e-06, + "loss": 1.6127538681030273, + "step": 387 + }, + { + "epoch": 1.247588424437299, + "grad_norm": 14.231888382187387, + "learning_rate": 7.280788831979504e-06, + "loss": 1.3530693054199219, + "step": 388 + }, + { + "epoch": 1.2508038585209003, + "grad_norm": 8.015144095452492, + "learning_rate": 7.264111976231416e-06, + "loss": 2.167546510696411, + "step": 389 + }, + { + "epoch": 1.2540192926045015, + "grad_norm": 6.8301669716274125, + "learning_rate": 7.247403375637789e-06, + "loss": 1.1884214878082275, + "step": 390 + }, + { + "epoch": 1.257234726688103, + "grad_norm": 9.740309887687731, + "learning_rate": 7.230663264467932e-06, + "loss": 1.8475289344787598, + "step": 391 + }, + { + "epoch": 1.2604501607717042, + "grad_norm": 15.839480265530513, + "learning_rate": 7.213891877432957e-06, + "loss": 1.3596510887145996, + "step": 392 + }, + { + "epoch": 1.2636655948553055, + "grad_norm": 8.14484618288492, + "learning_rate": 7.197089449682495e-06, + "loss": 1.1813664436340332, + "step": 393 + }, + { + "epoch": 1.2668810289389068, + "grad_norm": 14.070099542557305, + "learning_rate": 7.180256216801392e-06, + "loss": 1.5710866451263428, + "step": 394 + }, + { + "epoch": 1.270096463022508, + "grad_norm": 11.220505541098138, + "learning_rate": 7.163392414806409e-06, + "loss": 1.1523092985153198, + "step": 395 + }, + { + "epoch": 1.2733118971061093, + "grad_norm": 9.667370363546029, + "learning_rate": 7.146498280142917e-06, + "loss": 1.584272861480713, + "step": 396 + }, + { + "epoch": 1.2765273311897105, + "grad_norm": 7.714526098855273, + "learning_rate": 7.1295740496815715e-06, + "loss": 1.8063056468963623, + "step": 397 + }, + { + "epoch": 1.279742765273312, + "grad_norm": 10.787378083877842, + "learning_rate": 7.112619960715004e-06, + "loss": 1.792249083518982, + "step": 398 + }, + { + "epoch": 1.2829581993569132, + "grad_norm": 13.233918817558243, + "learning_rate": 7.095636250954481e-06, + "loss": 1.5670925378799438, + "step": 399 + }, + { + "epoch": 1.2861736334405145, + "grad_norm": 11.79576476009515, + "learning_rate": 7.078623158526588e-06, + "loss": 0.8326504230499268, + "step": 400 + }, + { + "epoch": 1.2893890675241158, + "grad_norm": 10.491299911652822, + "learning_rate": 7.061580921969875e-06, + "loss": 1.3786863088607788, + "step": 401 + }, + { + "epoch": 1.292604501607717, + "grad_norm": 11.94691709161536, + "learning_rate": 7.044509780231517e-06, + "loss": 1.1054686307907104, + "step": 402 + }, + { + "epoch": 1.2958199356913183, + "grad_norm": 12.08443122924739, + "learning_rate": 7.027409972663972e-06, + "loss": 1.670744776725769, + "step": 403 + }, + { + "epoch": 1.2990353697749195, + "grad_norm": 12.818583379341405, + "learning_rate": 7.010281739021612e-06, + "loss": 1.4025758504867554, + "step": 404 + }, + { + "epoch": 1.302250803858521, + "grad_norm": 17.63344093685557, + "learning_rate": 6.993125319457371e-06, + "loss": 1.3583598136901855, + "step": 405 + }, + { + "epoch": 1.3054662379421222, + "grad_norm": 11.894949620821711, + "learning_rate": 6.975940954519372e-06, + "loss": 1.1004712581634521, + "step": 406 + }, + { + "epoch": 1.3086816720257235, + "grad_norm": 12.892080489602101, + "learning_rate": 6.958728885147559e-06, + "loss": 1.2955303192138672, + "step": 407 + }, + { + "epoch": 1.3118971061093248, + "grad_norm": 19.36393889877359, + "learning_rate": 6.941489352670315e-06, + "loss": 1.0670486688613892, + "step": 408 + }, + { + "epoch": 1.315112540192926, + "grad_norm": 8.211056728192768, + "learning_rate": 6.92422259880108e-06, + "loss": 1.9638640880584717, + "step": 409 + }, + { + "epoch": 1.3183279742765273, + "grad_norm": 10.833159131088287, + "learning_rate": 6.9069288656349654e-06, + "loss": 1.5076138973236084, + "step": 410 + }, + { + "epoch": 1.3215434083601285, + "grad_norm": 8.58513007343669, + "learning_rate": 6.8896083956453495e-06, + "loss": 1.0038764476776123, + "step": 411 + }, + { + "epoch": 1.32475884244373, + "grad_norm": 10.831768155951462, + "learning_rate": 6.87226143168049e-06, + "loss": 1.8994210958480835, + "step": 412 + }, + { + "epoch": 1.3279742765273312, + "grad_norm": 9.967816587412674, + "learning_rate": 6.8548882169601125e-06, + "loss": 1.7425453662872314, + "step": 413 + }, + { + "epoch": 1.3311897106109325, + "grad_norm": 11.573652006078895, + "learning_rate": 6.837488995071999e-06, + "loss": 0.9079417586326599, + "step": 414 + }, + { + "epoch": 1.3344051446945338, + "grad_norm": 11.435087576974311, + "learning_rate": 6.820064009968577e-06, + "loss": 2.3625776767730713, + "step": 415 + }, + { + "epoch": 1.337620578778135, + "grad_norm": 11.687306169218841, + "learning_rate": 6.802613505963496e-06, + "loss": 1.4717687368392944, + "step": 416 + }, + { + "epoch": 1.3408360128617363, + "grad_norm": 7.907999407954611, + "learning_rate": 6.7851377277282025e-06, + "loss": 1.8274226188659668, + "step": 417 + }, + { + "epoch": 1.3440514469453375, + "grad_norm": 13.33807600903979, + "learning_rate": 6.767636920288514e-06, + "loss": 0.9393669366836548, + "step": 418 + }, + { + "epoch": 1.347266881028939, + "grad_norm": 18.07910651280989, + "learning_rate": 6.7501113290211715e-06, + "loss": 2.2995893955230713, + "step": 419 + }, + { + "epoch": 1.3504823151125402, + "grad_norm": 13.219145457947517, + "learning_rate": 6.732561199650417e-06, + "loss": 0.9605876207351685, + "step": 420 + }, + { + "epoch": 1.3536977491961415, + "grad_norm": 9.84083040492843, + "learning_rate": 6.71498677824453e-06, + "loss": 1.400538682937622, + "step": 421 + }, + { + "epoch": 1.3569131832797428, + "grad_norm": 8.770195722914984, + "learning_rate": 6.69738831121239e-06, + "loss": 1.5933213233947754, + "step": 422 + }, + { + "epoch": 1.360128617363344, + "grad_norm": 13.726188714507023, + "learning_rate": 6.679766045300017e-06, + "loss": 1.3214967250823975, + "step": 423 + }, + { + "epoch": 1.3633440514469453, + "grad_norm": 11.733403335264603, + "learning_rate": 6.66212022758711e-06, + "loss": 1.8377995491027832, + "step": 424 + }, + { + "epoch": 1.3665594855305465, + "grad_norm": 10.482810176903879, + "learning_rate": 6.644451105483588e-06, + "loss": 2.171910047531128, + "step": 425 + }, + { + "epoch": 1.369774919614148, + "grad_norm": 10.272158746941752, + "learning_rate": 6.626758926726118e-06, + "loss": 1.01298987865448, + "step": 426 + }, + { + "epoch": 1.3729903536977492, + "grad_norm": 10.850492383150184, + "learning_rate": 6.609043939374638e-06, + "loss": 1.3563112020492554, + "step": 427 + }, + { + "epoch": 1.3762057877813505, + "grad_norm": 9.52772114565326, + "learning_rate": 6.591306391808886e-06, + "loss": 1.1524646282196045, + "step": 428 + }, + { + "epoch": 1.3794212218649518, + "grad_norm": 9.634527311630647, + "learning_rate": 6.5735465327249125e-06, + "loss": 1.288474202156067, + "step": 429 + }, + { + "epoch": 1.382636655948553, + "grad_norm": 15.933483184745002, + "learning_rate": 6.555764611131599e-06, + "loss": 1.6986860036849976, + "step": 430 + }, + { + "epoch": 1.3858520900321543, + "grad_norm": 12.386422766725826, + "learning_rate": 6.537960876347155e-06, + "loss": 1.1839923858642578, + "step": 431 + }, + { + "epoch": 1.3890675241157555, + "grad_norm": 10.253938139276935, + "learning_rate": 6.520135577995636e-06, + "loss": 1.0426067113876343, + "step": 432 + }, + { + "epoch": 1.392282958199357, + "grad_norm": 11.04901140382035, + "learning_rate": 6.502288966003437e-06, + "loss": 1.5624547004699707, + "step": 433 + }, + { + "epoch": 1.3954983922829582, + "grad_norm": 11.25738538359911, + "learning_rate": 6.48442129059579e-06, + "loss": 1.6425249576568604, + "step": 434 + }, + { + "epoch": 1.3987138263665595, + "grad_norm": 7.369100897838746, + "learning_rate": 6.4665328022932505e-06, + "loss": 1.1195060014724731, + "step": 435 + }, + { + "epoch": 1.4019292604501608, + "grad_norm": 8.920734482221718, + "learning_rate": 6.448623751908193e-06, + "loss": 1.9613807201385498, + "step": 436 + }, + { + "epoch": 1.405144694533762, + "grad_norm": 17.2717195962134, + "learning_rate": 6.43069439054129e-06, + "loss": 1.9818463325500488, + "step": 437 + }, + { + "epoch": 1.4083601286173635, + "grad_norm": 10.291894233810238, + "learning_rate": 6.4127449695779894e-06, + "loss": 1.321189284324646, + "step": 438 + }, + { + "epoch": 1.4115755627009645, + "grad_norm": 14.652832324526607, + "learning_rate": 6.394775740684996e-06, + "loss": 1.653143048286438, + "step": 439 + }, + { + "epoch": 1.414790996784566, + "grad_norm": 7.091089961179019, + "learning_rate": 6.376786955806735e-06, + "loss": 1.3114051818847656, + "step": 440 + }, + { + "epoch": 1.4180064308681672, + "grad_norm": 8.725539866872378, + "learning_rate": 6.358778867161829e-06, + "loss": 1.6939886808395386, + "step": 441 + }, + { + "epoch": 1.4212218649517685, + "grad_norm": 13.545006306769176, + "learning_rate": 6.340751727239551e-06, + "loss": 1.195266842842102, + "step": 442 + }, + { + "epoch": 1.4244372990353698, + "grad_norm": 8.297440104157152, + "learning_rate": 6.322705788796293e-06, + "loss": 1.6877739429473877, + "step": 443 + }, + { + "epoch": 1.427652733118971, + "grad_norm": 9.61755742164904, + "learning_rate": 6.304641304852017e-06, + "loss": 1.8234769105911255, + "step": 444 + }, + { + "epoch": 1.4308681672025725, + "grad_norm": 8.53099814778349, + "learning_rate": 6.286558528686713e-06, + "loss": 1.707655429840088, + "step": 445 + }, + { + "epoch": 1.4340836012861735, + "grad_norm": 16.09095041323655, + "learning_rate": 6.268457713836839e-06, + "loss": 1.685136318206787, + "step": 446 + }, + { + "epoch": 1.437299035369775, + "grad_norm": 9.220851793207403, + "learning_rate": 6.250339114091775e-06, + "loss": 1.8174901008605957, + "step": 447 + }, + { + "epoch": 1.4405144694533762, + "grad_norm": 16.91990210354561, + "learning_rate": 6.2322029834902565e-06, + "loss": 1.2479822635650635, + "step": 448 + }, + { + "epoch": 1.4437299035369775, + "grad_norm": 18.222539873072662, + "learning_rate": 6.214049576316824e-06, + "loss": 2.1364529132843018, + "step": 449 + }, + { + "epoch": 1.4469453376205788, + "grad_norm": 8.829662303717768, + "learning_rate": 6.195879147098246e-06, + "loss": 1.1708037853240967, + "step": 450 + }, + { + "epoch": 1.45016077170418, + "grad_norm": 9.811681806402762, + "learning_rate": 6.177691950599953e-06, + "loss": 1.7463445663452148, + "step": 451 + }, + { + "epoch": 1.4533762057877815, + "grad_norm": 15.76003868979169, + "learning_rate": 6.159488241822473e-06, + "loss": 1.4239546060562134, + "step": 452 + }, + { + "epoch": 1.4565916398713825, + "grad_norm": 13.819031689596322, + "learning_rate": 6.141268275997848e-06, + "loss": 1.0984452962875366, + "step": 453 + }, + { + "epoch": 1.459807073954984, + "grad_norm": 9.216205799504845, + "learning_rate": 6.123032308586059e-06, + "loss": 1.6430256366729736, + "step": 454 + }, + { + "epoch": 1.4630225080385852, + "grad_norm": 12.706806062500931, + "learning_rate": 6.10478059527144e-06, + "loss": 1.4744967222213745, + "step": 455 + }, + { + "epoch": 1.4662379421221865, + "grad_norm": 9.724752942795591, + "learning_rate": 6.086513391959101e-06, + "loss": 1.6440513134002686, + "step": 456 + }, + { + "epoch": 1.4694533762057878, + "grad_norm": 12.12251316195964, + "learning_rate": 6.068230954771334e-06, + "loss": 1.4198493957519531, + "step": 457 + }, + { + "epoch": 1.472668810289389, + "grad_norm": 10.329929581969795, + "learning_rate": 6.0499335400440216e-06, + "loss": 1.5990746021270752, + "step": 458 + }, + { + "epoch": 1.4758842443729905, + "grad_norm": 18.5583606495554, + "learning_rate": 6.031621404323046e-06, + "loss": 1.1847198009490967, + "step": 459 + }, + { + "epoch": 1.4790996784565915, + "grad_norm": 15.993991523648914, + "learning_rate": 6.013294804360689e-06, + "loss": 0.9134633541107178, + "step": 460 + }, + { + "epoch": 1.482315112540193, + "grad_norm": 9.490813889864704, + "learning_rate": 5.9949539971120405e-06, + "loss": 2.0340170860290527, + "step": 461 + }, + { + "epoch": 1.4855305466237942, + "grad_norm": 11.37889692680751, + "learning_rate": 5.976599239731381e-06, + "loss": 1.8881299495697021, + "step": 462 + }, + { + "epoch": 1.4887459807073955, + "grad_norm": 9.569629046002419, + "learning_rate": 5.9582307895685876e-06, + "loss": 1.7137913703918457, + "step": 463 + }, + { + "epoch": 1.4919614147909968, + "grad_norm": 12.420204461040186, + "learning_rate": 5.939848904165519e-06, + "loss": 1.2723381519317627, + "step": 464 + }, + { + "epoch": 1.495176848874598, + "grad_norm": 11.926795629917983, + "learning_rate": 5.9214538412524155e-06, + "loss": 1.1736359596252441, + "step": 465 + }, + { + "epoch": 1.4983922829581995, + "grad_norm": 12.405298290422278, + "learning_rate": 5.903045858744271e-06, + "loss": 1.3090627193450928, + "step": 466 + }, + { + "epoch": 1.5016077170418005, + "grad_norm": 15.523116313387339, + "learning_rate": 5.884625214737224e-06, + "loss": 1.4756783246994019, + "step": 467 + }, + { + "epoch": 1.504823151125402, + "grad_norm": 10.93229883191674, + "learning_rate": 5.866192167504941e-06, + "loss": 1.6622803211212158, + "step": 468 + }, + { + "epoch": 1.5080385852090032, + "grad_norm": 17.915289620044277, + "learning_rate": 5.84774697549499e-06, + "loss": 1.6512175798416138, + "step": 469 + }, + { + "epoch": 1.5112540192926045, + "grad_norm": 30.283483282050263, + "learning_rate": 5.8292898973252246e-06, + "loss": 1.4214634895324707, + "step": 470 + }, + { + "epoch": 1.5144694533762058, + "grad_norm": 15.810670228159147, + "learning_rate": 5.810821191780146e-06, + "loss": 1.1714420318603516, + "step": 471 + }, + { + "epoch": 1.517684887459807, + "grad_norm": 13.058805569793217, + "learning_rate": 5.792341117807284e-06, + "loss": 1.4205889701843262, + "step": 472 + }, + { + "epoch": 1.5209003215434085, + "grad_norm": 10.022823791961894, + "learning_rate": 5.773849934513568e-06, + "loss": 1.419925570487976, + "step": 473 + }, + { + "epoch": 1.5241157556270095, + "grad_norm": 12.48707482451956, + "learning_rate": 5.755347901161683e-06, + "loss": 1.8584307432174683, + "step": 474 + }, + { + "epoch": 1.527331189710611, + "grad_norm": 12.857583603145283, + "learning_rate": 5.736835277166446e-06, + "loss": 2.2646093368530273, + "step": 475 + }, + { + "epoch": 1.5305466237942122, + "grad_norm": 10.274067777361985, + "learning_rate": 5.7183123220911615e-06, + "loss": 0.9686712622642517, + "step": 476 + }, + { + "epoch": 1.5337620578778135, + "grad_norm": 29.493341811852122, + "learning_rate": 5.699779295643988e-06, + "loss": 0.964187741279602, + "step": 477 + }, + { + "epoch": 1.5369774919614148, + "grad_norm": 13.55003021738102, + "learning_rate": 5.68123645767429e-06, + "loss": 1.0403389930725098, + "step": 478 + }, + { + "epoch": 1.540192926045016, + "grad_norm": 12.54908295885377, + "learning_rate": 5.662684068169002e-06, + "loss": 1.6527446508407593, + "step": 479 + }, + { + "epoch": 1.5434083601286175, + "grad_norm": 6.610317425915739, + "learning_rate": 5.644122387248975e-06, + "loss": 1.4375754594802856, + "step": 480 + }, + { + "epoch": 1.5466237942122185, + "grad_norm": 11.06854659241844, + "learning_rate": 5.6255516751653376e-06, + "loss": 1.3001888990402222, + "step": 481 + }, + { + "epoch": 1.54983922829582, + "grad_norm": 21.599505542000774, + "learning_rate": 5.606972192295841e-06, + "loss": 1.9102635383605957, + "step": 482 + }, + { + "epoch": 1.5530546623794212, + "grad_norm": 11.405842693936945, + "learning_rate": 5.588384199141211e-06, + "loss": 1.1081337928771973, + "step": 483 + }, + { + "epoch": 1.5562700964630225, + "grad_norm": 10.182874123460897, + "learning_rate": 5.569787956321496e-06, + "loss": 1.2650421857833862, + "step": 484 + }, + { + "epoch": 1.5594855305466238, + "grad_norm": 13.134492774505976, + "learning_rate": 5.551183724572411e-06, + "loss": 1.2185710668563843, + "step": 485 + }, + { + "epoch": 1.562700964630225, + "grad_norm": 15.44354876414701, + "learning_rate": 5.532571764741686e-06, + "loss": 1.2483716011047363, + "step": 486 + }, + { + "epoch": 1.5659163987138265, + "grad_norm": 13.467838369490899, + "learning_rate": 5.513952337785398e-06, + "loss": 1.7064783573150635, + "step": 487 + }, + { + "epoch": 1.5691318327974275, + "grad_norm": 11.973440553832287, + "learning_rate": 5.4953257047643284e-06, + "loss": 1.9306564331054688, + "step": 488 + }, + { + "epoch": 1.572347266881029, + "grad_norm": 12.222087950483303, + "learning_rate": 5.476692126840287e-06, + "loss": 1.796401858329773, + "step": 489 + }, + { + "epoch": 1.5755627009646302, + "grad_norm": 14.251036373638994, + "learning_rate": 5.458051865272462e-06, + "loss": 1.9565566778182983, + "step": 490 + }, + { + "epoch": 1.5787781350482315, + "grad_norm": 12.909098298317854, + "learning_rate": 5.439405181413752e-06, + "loss": 1.7262601852416992, + "step": 491 + }, + { + "epoch": 1.5819935691318328, + "grad_norm": 9.663223095722376, + "learning_rate": 5.420752336707098e-06, + "loss": 1.2656997442245483, + "step": 492 + }, + { + "epoch": 1.585209003215434, + "grad_norm": 15.520726456430584, + "learning_rate": 5.402093592681823e-06, + "loss": 1.3866935968399048, + "step": 493 + }, + { + "epoch": 1.5884244372990355, + "grad_norm": 10.917894975556058, + "learning_rate": 5.383429210949967e-06, + "loss": 0.8932974934577942, + "step": 494 + }, + { + "epoch": 1.5916398713826365, + "grad_norm": 13.277750253841756, + "learning_rate": 5.36475945320261e-06, + "loss": 1.5216851234436035, + "step": 495 + }, + { + "epoch": 1.594855305466238, + "grad_norm": 12.30741363554072, + "learning_rate": 5.346084581206215e-06, + "loss": 1.0728681087493896, + "step": 496 + }, + { + "epoch": 1.5980707395498392, + "grad_norm": 29.029933055790377, + "learning_rate": 5.327404856798944e-06, + "loss": 1.913758635520935, + "step": 497 + }, + { + "epoch": 1.6012861736334405, + "grad_norm": 12.42781087693241, + "learning_rate": 5.3087205418870014e-06, + "loss": 1.6854526996612549, + "step": 498 + }, + { + "epoch": 1.6045016077170418, + "grad_norm": 9.018921661705617, + "learning_rate": 5.29003189844095e-06, + "loss": 1.147437334060669, + "step": 499 + }, + { + "epoch": 1.607717041800643, + "grad_norm": 13.581361587228178, + "learning_rate": 5.2713391884920415e-06, + "loss": 2.265592098236084, + "step": 500 + }, + { + "epoch": 1.6109324758842445, + "grad_norm": 9.087003042899404, + "learning_rate": 5.2526426741285465e-06, + "loss": 1.6772857904434204, + "step": 501 + }, + { + "epoch": 1.6141479099678455, + "grad_norm": 12.529074715254756, + "learning_rate": 5.233942617492077e-06, + "loss": 1.633618712425232, + "step": 502 + }, + { + "epoch": 1.617363344051447, + "grad_norm": 12.64649957772085, + "learning_rate": 5.215239280773908e-06, + "loss": 1.5846664905548096, + "step": 503 + }, + { + "epoch": 1.6205787781350482, + "grad_norm": 8.149292574594154, + "learning_rate": 5.196532926211307e-06, + "loss": 1.4743802547454834, + "step": 504 + }, + { + "epoch": 1.6237942122186495, + "grad_norm": 30.942623906686798, + "learning_rate": 5.177823816083853e-06, + "loss": 1.7887756824493408, + "step": 505 + }, + { + "epoch": 1.6270096463022508, + "grad_norm": 10.656347918532477, + "learning_rate": 5.15911221270976e-06, + "loss": 1.8924931287765503, + "step": 506 + }, + { + "epoch": 1.630225080385852, + "grad_norm": 11.982264969959546, + "learning_rate": 5.140398378442201e-06, + "loss": 1.5837712287902832, + "step": 507 + }, + { + "epoch": 1.6334405144694535, + "grad_norm": 17.74630841676715, + "learning_rate": 5.121682575665625e-06, + "loss": 1.3969013690948486, + "step": 508 + }, + { + "epoch": 1.6366559485530545, + "grad_norm": 13.757379611481515, + "learning_rate": 5.102965066792085e-06, + "loss": 1.3699804544448853, + "step": 509 + }, + { + "epoch": 1.639871382636656, + "grad_norm": 9.471724169843402, + "learning_rate": 5.084246114257554e-06, + "loss": 1.3550987243652344, + "step": 510 + }, + { + "epoch": 1.6430868167202572, + "grad_norm": 15.596683071578276, + "learning_rate": 5.065525980518244e-06, + "loss": 1.2851953506469727, + "step": 511 + }, + { + "epoch": 1.6463022508038585, + "grad_norm": 10.623931822433907, + "learning_rate": 5.046804928046933e-06, + "loss": 1.3243783712387085, + "step": 512 + }, + { + "epoch": 1.6495176848874598, + "grad_norm": 22.19363105957551, + "learning_rate": 5.028083219329274e-06, + "loss": 1.6754820346832275, + "step": 513 + }, + { + "epoch": 1.652733118971061, + "grad_norm": 10.186022632613163, + "learning_rate": 5.009361116860129e-06, + "loss": 2.1871590614318848, + "step": 514 + }, + { + "epoch": 1.6559485530546625, + "grad_norm": 14.398718167356659, + "learning_rate": 4.990638883139872e-06, + "loss": 0.9794585108757019, + "step": 515 + }, + { + "epoch": 1.6591639871382635, + "grad_norm": 10.294011417163757, + "learning_rate": 4.9719167806707265e-06, + "loss": 1.329102635383606, + "step": 516 + }, + { + "epoch": 1.662379421221865, + "grad_norm": 10.782193417515709, + "learning_rate": 4.953195071953069e-06, + "loss": 1.4713919162750244, + "step": 517 + }, + { + "epoch": 1.6655948553054662, + "grad_norm": 20.256749627414443, + "learning_rate": 4.934474019481755e-06, + "loss": 1.6331861019134521, + "step": 518 + }, + { + "epoch": 1.6688102893890675, + "grad_norm": 10.326694785789037, + "learning_rate": 4.915753885742446e-06, + "loss": 0.8953830003738403, + "step": 519 + }, + { + "epoch": 1.6720257234726688, + "grad_norm": 34.510915012884716, + "learning_rate": 4.8970349332079155e-06, + "loss": 2.0606470108032227, + "step": 520 + }, + { + "epoch": 1.67524115755627, + "grad_norm": 22.26005680178925, + "learning_rate": 4.8783174243343765e-06, + "loss": 1.9117261171340942, + "step": 521 + }, + { + "epoch": 1.6784565916398715, + "grad_norm": 11.981892440704723, + "learning_rate": 4.8596016215578e-06, + "loss": 1.107745885848999, + "step": 522 + }, + { + "epoch": 1.6816720257234725, + "grad_norm": 7.902869741318056, + "learning_rate": 4.8408877872902404e-06, + "loss": 2.056065559387207, + "step": 523 + }, + { + "epoch": 1.684887459807074, + "grad_norm": 14.657507529582745, + "learning_rate": 4.822176183916147e-06, + "loss": 1.2189289331436157, + "step": 524 + }, + { + "epoch": 1.6881028938906752, + "grad_norm": 15.323149691380317, + "learning_rate": 4.803467073788694e-06, + "loss": 1.1678478717803955, + "step": 525 + }, + { + "epoch": 1.6913183279742765, + "grad_norm": 10.369820938531406, + "learning_rate": 4.7847607192260916e-06, + "loss": 1.3617630004882812, + "step": 526 + }, + { + "epoch": 1.694533762057878, + "grad_norm": 16.278666347716534, + "learning_rate": 4.766057382507924e-06, + "loss": 1.8124133348464966, + "step": 527 + }, + { + "epoch": 1.697749196141479, + "grad_norm": 11.617355712233303, + "learning_rate": 4.747357325871454e-06, + "loss": 1.7624785900115967, + "step": 528 + }, + { + "epoch": 1.7009646302250805, + "grad_norm": 6.976469169369697, + "learning_rate": 4.72866081150796e-06, + "loss": 1.804181456565857, + "step": 529 + }, + { + "epoch": 1.7041800643086815, + "grad_norm": 8.884998817103593, + "learning_rate": 4.709968101559051e-06, + "loss": 1.8973114490509033, + "step": 530 + }, + { + "epoch": 1.707395498392283, + "grad_norm": 10.671057224558174, + "learning_rate": 4.6912794581129985e-06, + "loss": 2.0870800018310547, + "step": 531 + }, + { + "epoch": 1.7106109324758842, + "grad_norm": 10.490926549117916, + "learning_rate": 4.672595143201056e-06, + "loss": 1.4478678703308105, + "step": 532 + }, + { + "epoch": 1.7138263665594855, + "grad_norm": 10.312387100131009, + "learning_rate": 4.653915418793786e-06, + "loss": 1.0462535619735718, + "step": 533 + }, + { + "epoch": 1.717041800643087, + "grad_norm": 12.221086012098088, + "learning_rate": 4.63524054679739e-06, + "loss": 1.0799921751022339, + "step": 534 + }, + { + "epoch": 1.720257234726688, + "grad_norm": 6.827983273847641, + "learning_rate": 4.616570789050034e-06, + "loss": 0.9600023031234741, + "step": 535 + }, + { + "epoch": 1.7234726688102895, + "grad_norm": 8.94999117796044, + "learning_rate": 4.5979064073181775e-06, + "loss": 1.4467713832855225, + "step": 536 + }, + { + "epoch": 1.7266881028938905, + "grad_norm": 11.947768679414317, + "learning_rate": 4.579247663292903e-06, + "loss": 1.1485623121261597, + "step": 537 + }, + { + "epoch": 1.729903536977492, + "grad_norm": 7.738903631330294, + "learning_rate": 4.560594818586248e-06, + "loss": 1.881239891052246, + "step": 538 + }, + { + "epoch": 1.7331189710610932, + "grad_norm": 14.43039125664941, + "learning_rate": 4.541948134727538e-06, + "loss": 1.4121603965759277, + "step": 539 + }, + { + "epoch": 1.7363344051446945, + "grad_norm": 13.559187134442164, + "learning_rate": 4.523307873159714e-06, + "loss": 1.600027084350586, + "step": 540 + }, + { + "epoch": 1.739549839228296, + "grad_norm": 12.924451068775486, + "learning_rate": 4.504674295235673e-06, + "loss": 1.9366849660873413, + "step": 541 + }, + { + "epoch": 1.742765273311897, + "grad_norm": 10.348503628222995, + "learning_rate": 4.486047662214602e-06, + "loss": 1.147647500038147, + "step": 542 + }, + { + "epoch": 1.7459807073954985, + "grad_norm": 9.22563519797577, + "learning_rate": 4.467428235258315e-06, + "loss": 2.267954111099243, + "step": 543 + }, + { + "epoch": 1.7491961414790995, + "grad_norm": 11.921588194327418, + "learning_rate": 4.448816275427589e-06, + "loss": 1.3431754112243652, + "step": 544 + }, + { + "epoch": 1.752411575562701, + "grad_norm": 13.456836651748802, + "learning_rate": 4.430212043678506e-06, + "loss": 1.371949553489685, + "step": 545 + }, + { + "epoch": 1.7556270096463023, + "grad_norm": 11.31057602108461, + "learning_rate": 4.41161580085879e-06, + "loss": 1.2015950679779053, + "step": 546 + }, + { + "epoch": 1.7588424437299035, + "grad_norm": 11.775227684435517, + "learning_rate": 4.39302780770416e-06, + "loss": 1.2269978523254395, + "step": 547 + }, + { + "epoch": 1.762057877813505, + "grad_norm": 9.277512918356958, + "learning_rate": 4.374448324834664e-06, + "loss": 1.1524686813354492, + "step": 548 + }, + { + "epoch": 1.765273311897106, + "grad_norm": 13.986012721095971, + "learning_rate": 4.355877612751027e-06, + "loss": 0.8838063478469849, + "step": 549 + }, + { + "epoch": 1.7684887459807075, + "grad_norm": 9.61624989824511, + "learning_rate": 4.337315931830999e-06, + "loss": 1.0831339359283447, + "step": 550 + }, + { + "epoch": 1.7717041800643085, + "grad_norm": 10.157468448463078, + "learning_rate": 4.318763542325711e-06, + "loss": 1.1000258922576904, + "step": 551 + }, + { + "epoch": 1.77491961414791, + "grad_norm": 9.188155688035968, + "learning_rate": 4.3002207043560135e-06, + "loss": 1.230742335319519, + "step": 552 + }, + { + "epoch": 1.7781350482315113, + "grad_norm": 9.615692013478174, + "learning_rate": 4.28168767790884e-06, + "loss": 1.103325605392456, + "step": 553 + }, + { + "epoch": 1.7813504823151125, + "grad_norm": 8.302705452574013, + "learning_rate": 4.263164722833556e-06, + "loss": 1.3692865371704102, + "step": 554 + }, + { + "epoch": 1.784565916398714, + "grad_norm": 14.643567177677879, + "learning_rate": 4.2446520988383185e-06, + "loss": 0.6936740875244141, + "step": 555 + }, + { + "epoch": 1.787781350482315, + "grad_norm": 9.360894269774661, + "learning_rate": 4.226150065486434e-06, + "loss": 0.7901575565338135, + "step": 556 + }, + { + "epoch": 1.7909967845659165, + "grad_norm": 12.472712807562056, + "learning_rate": 4.207658882192717e-06, + "loss": 1.3338205814361572, + "step": 557 + }, + { + "epoch": 1.7942122186495175, + "grad_norm": 11.717589080001146, + "learning_rate": 4.189178808219856e-06, + "loss": 1.3363522291183472, + "step": 558 + }, + { + "epoch": 1.797427652733119, + "grad_norm": 10.30067614909667, + "learning_rate": 4.170710102674777e-06, + "loss": 1.8522659540176392, + "step": 559 + }, + { + "epoch": 1.8006430868167203, + "grad_norm": 9.820476935188125, + "learning_rate": 4.152253024505011e-06, + "loss": 1.9775140285491943, + "step": 560 + }, + { + "epoch": 1.8038585209003215, + "grad_norm": 10.166187506670667, + "learning_rate": 4.133807832495062e-06, + "loss": 1.0015840530395508, + "step": 561 + }, + { + "epoch": 1.807073954983923, + "grad_norm": 15.340234839194505, + "learning_rate": 4.1153747852627775e-06, + "loss": 1.753740906715393, + "step": 562 + }, + { + "epoch": 1.810289389067524, + "grad_norm": 8.962676705869463, + "learning_rate": 4.096954141255731e-06, + "loss": 1.5827150344848633, + "step": 563 + }, + { + "epoch": 1.8135048231511255, + "grad_norm": 15.147384411595603, + "learning_rate": 4.078546158747586e-06, + "loss": 0.916832685470581, + "step": 564 + }, + { + "epoch": 1.8167202572347267, + "grad_norm": 10.569002234689366, + "learning_rate": 4.060151095834482e-06, + "loss": 1.4536582231521606, + "step": 565 + }, + { + "epoch": 1.819935691318328, + "grad_norm": 9.415759397401844, + "learning_rate": 4.041769210431414e-06, + "loss": 1.4267023801803589, + "step": 566 + }, + { + "epoch": 1.8231511254019293, + "grad_norm": 7.078102010311895, + "learning_rate": 4.02340076026862e-06, + "loss": 1.1329270601272583, + "step": 567 + }, + { + "epoch": 1.8263665594855305, + "grad_norm": 10.876729484364853, + "learning_rate": 4.00504600288796e-06, + "loss": 1.7833589315414429, + "step": 568 + }, + { + "epoch": 1.829581993569132, + "grad_norm": 13.635055310739693, + "learning_rate": 3.9867051956393114e-06, + "loss": 1.2976348400115967, + "step": 569 + }, + { + "epoch": 1.832797427652733, + "grad_norm": 10.771584206810726, + "learning_rate": 3.968378595676956e-06, + "loss": 1.4629448652267456, + "step": 570 + }, + { + "epoch": 1.8360128617363345, + "grad_norm": 11.703783804229115, + "learning_rate": 3.95006645995598e-06, + "loss": 2.5722076892852783, + "step": 571 + }, + { + "epoch": 1.8392282958199357, + "grad_norm": 12.349504033741153, + "learning_rate": 3.931769045228668e-06, + "loss": 2.2091426849365234, + "step": 572 + }, + { + "epoch": 1.842443729903537, + "grad_norm": 11.542335230955135, + "learning_rate": 3.9134866080409e-06, + "loss": 1.391589641571045, + "step": 573 + }, + { + "epoch": 1.8456591639871383, + "grad_norm": 12.806637836059952, + "learning_rate": 3.895219404728561e-06, + "loss": 1.1414397954940796, + "step": 574 + }, + { + "epoch": 1.8488745980707395, + "grad_norm": 14.264578459530476, + "learning_rate": 3.8769676914139426e-06, + "loss": 1.059516191482544, + "step": 575 + }, + { + "epoch": 1.852090032154341, + "grad_norm": 19.271915274351503, + "learning_rate": 3.858731724002153e-06, + "loss": 2.1879444122314453, + "step": 576 + }, + { + "epoch": 1.855305466237942, + "grad_norm": 12.309421348778711, + "learning_rate": 3.840511758177528e-06, + "loss": 1.473386526107788, + "step": 577 + }, + { + "epoch": 1.8585209003215435, + "grad_norm": 10.878678959452348, + "learning_rate": 3.822308049400047e-06, + "loss": 1.4653959274291992, + "step": 578 + }, + { + "epoch": 1.8617363344051447, + "grad_norm": 8.040534262037765, + "learning_rate": 3.804120852901756e-06, + "loss": 1.6413612365722656, + "step": 579 + }, + { + "epoch": 1.864951768488746, + "grad_norm": 9.044916275419325, + "learning_rate": 3.7859504236831766e-06, + "loss": 1.316032886505127, + "step": 580 + }, + { + "epoch": 1.8681672025723473, + "grad_norm": 14.730705685102624, + "learning_rate": 3.7677970165097444e-06, + "loss": 1.7361235618591309, + "step": 581 + }, + { + "epoch": 1.8713826366559485, + "grad_norm": 7.6900213009486675, + "learning_rate": 3.749660885908226e-06, + "loss": 1.667905569076538, + "step": 582 + }, + { + "epoch": 1.87459807073955, + "grad_norm": 11.07535258062379, + "learning_rate": 3.7315422861631623e-06, + "loss": 1.065222978591919, + "step": 583 + }, + { + "epoch": 1.877813504823151, + "grad_norm": 8.326344701604272, + "learning_rate": 3.7134414713132883e-06, + "loss": 1.115635633468628, + "step": 584 + }, + { + "epoch": 1.8810289389067525, + "grad_norm": 18.801864494350724, + "learning_rate": 3.6953586951479834e-06, + "loss": 2.2017221450805664, + "step": 585 + }, + { + "epoch": 1.8842443729903537, + "grad_norm": 25.418633684284842, + "learning_rate": 3.677294211203708e-06, + "loss": 2.484564781188965, + "step": 586 + }, + { + "epoch": 1.887459807073955, + "grad_norm": 12.423647444445745, + "learning_rate": 3.6592482727604508e-06, + "loss": 1.3357205390930176, + "step": 587 + }, + { + "epoch": 1.8906752411575563, + "grad_norm": 12.144643827381929, + "learning_rate": 3.641221132838173e-06, + "loss": 1.3544979095458984, + "step": 588 + }, + { + "epoch": 1.8938906752411575, + "grad_norm": 12.46311005742411, + "learning_rate": 3.623213044193266e-06, + "loss": 0.9614198207855225, + "step": 589 + }, + { + "epoch": 1.897106109324759, + "grad_norm": 10.364954003308453, + "learning_rate": 3.605224259315005e-06, + "loss": 1.2620229721069336, + "step": 590 + }, + { + "epoch": 1.90032154340836, + "grad_norm": 9.237871106266182, + "learning_rate": 3.587255030422011e-06, + "loss": 1.886639952659607, + "step": 591 + }, + { + "epoch": 1.9035369774919615, + "grad_norm": 13.030874547511374, + "learning_rate": 3.569305609458712e-06, + "loss": 1.2755482196807861, + "step": 592 + }, + { + "epoch": 1.9067524115755627, + "grad_norm": 11.110774284864801, + "learning_rate": 3.5513762480918084e-06, + "loss": 1.4138600826263428, + "step": 593 + }, + { + "epoch": 1.909967845659164, + "grad_norm": 13.866814610623521, + "learning_rate": 3.5334671977067504e-06, + "loss": 1.2061731815338135, + "step": 594 + }, + { + "epoch": 1.9131832797427653, + "grad_norm": 11.47086636607418, + "learning_rate": 3.5155787094042113e-06, + "loss": 1.641986608505249, + "step": 595 + }, + { + "epoch": 1.9163987138263665, + "grad_norm": 12.881675239858552, + "learning_rate": 3.497711033996564e-06, + "loss": 1.2158643007278442, + "step": 596 + }, + { + "epoch": 1.919614147909968, + "grad_norm": 10.830930409963317, + "learning_rate": 3.4798644220043663e-06, + "loss": 1.8701896667480469, + "step": 597 + }, + { + "epoch": 1.922829581993569, + "grad_norm": 18.238087057780916, + "learning_rate": 3.462039123652847e-06, + "loss": 1.195351243019104, + "step": 598 + }, + { + "epoch": 1.9260450160771705, + "grad_norm": 18.418356217668133, + "learning_rate": 3.444235388868403e-06, + "loss": 1.2579845190048218, + "step": 599 + }, + { + "epoch": 1.9292604501607717, + "grad_norm": 12.58424302992326, + "learning_rate": 3.4264534672750884e-06, + "loss": 1.524860143661499, + "step": 600 + }, + { + "epoch": 1.932475884244373, + "grad_norm": 8.098300750497785, + "learning_rate": 3.408693608191116e-06, + "loss": 1.286231517791748, + "step": 601 + }, + { + "epoch": 1.9356913183279743, + "grad_norm": 9.863741692326508, + "learning_rate": 3.3909560606253632e-06, + "loss": 1.322242021560669, + "step": 602 + }, + { + "epoch": 1.9389067524115755, + "grad_norm": 12.987443308520445, + "learning_rate": 3.3732410732738843e-06, + "loss": 1.4537055492401123, + "step": 603 + }, + { + "epoch": 1.942122186495177, + "grad_norm": 10.506554610719009, + "learning_rate": 3.3555488945164127e-06, + "loss": 1.5410267114639282, + "step": 604 + }, + { + "epoch": 1.945337620578778, + "grad_norm": 10.174939767346636, + "learning_rate": 3.337879772412892e-06, + "loss": 1.754872441291809, + "step": 605 + }, + { + "epoch": 1.9485530546623795, + "grad_norm": 10.59532168962947, + "learning_rate": 3.320233954699985e-06, + "loss": 1.4205820560455322, + "step": 606 + }, + { + "epoch": 1.9517684887459807, + "grad_norm": 25.622835260786083, + "learning_rate": 3.302611688787612e-06, + "loss": 1.73175048828125, + "step": 607 + }, + { + "epoch": 1.954983922829582, + "grad_norm": 9.901036226861155, + "learning_rate": 3.285013221755472e-06, + "loss": 1.4618065357208252, + "step": 608 + }, + { + "epoch": 1.9581993569131833, + "grad_norm": 13.405612432143831, + "learning_rate": 3.267438800349586e-06, + "loss": 1.0909953117370605, + "step": 609 + }, + { + "epoch": 1.9614147909967845, + "grad_norm": 14.915858656289252, + "learning_rate": 3.2498886709788298e-06, + "loss": 1.2120417356491089, + "step": 610 + }, + { + "epoch": 1.964630225080386, + "grad_norm": 11.503072772803362, + "learning_rate": 3.2323630797114892e-06, + "loss": 1.560947060585022, + "step": 611 + }, + { + "epoch": 1.967845659163987, + "grad_norm": 13.493518918254455, + "learning_rate": 3.214862272271799e-06, + "loss": 1.1600837707519531, + "step": 612 + }, + { + "epoch": 1.9710610932475885, + "grad_norm": 15.01023608928888, + "learning_rate": 3.1973864940365076e-06, + "loss": 1.0669816732406616, + "step": 613 + }, + { + "epoch": 1.9742765273311897, + "grad_norm": 12.264629967700163, + "learning_rate": 3.179935990031425e-06, + "loss": 1.236401081085205, + "step": 614 + }, + { + "epoch": 1.977491961414791, + "grad_norm": 12.416946892637084, + "learning_rate": 3.162511004928003e-06, + "loss": 0.8553851842880249, + "step": 615 + }, + { + "epoch": 1.9807073954983923, + "grad_norm": 10.966787271475475, + "learning_rate": 3.1451117830398896e-06, + "loss": 1.6353504657745361, + "step": 616 + }, + { + "epoch": 1.9839228295819935, + "grad_norm": 11.04503339220036, + "learning_rate": 3.1277385683195117e-06, + "loss": 1.8089317083358765, + "step": 617 + }, + { + "epoch": 1.987138263665595, + "grad_norm": 16.458323320048635, + "learning_rate": 3.110391604354652e-06, + "loss": 1.1577980518341064, + "step": 618 + }, + { + "epoch": 1.990353697749196, + "grad_norm": 16.20975999278611, + "learning_rate": 3.093071134365037e-06, + "loss": 2.4027674198150635, + "step": 619 + }, + { + "epoch": 1.9935691318327975, + "grad_norm": 23.51251674539541, + "learning_rate": 3.075777401198922e-06, + "loss": 1.3853836059570312, + "step": 620 + }, + { + "epoch": 1.9967845659163987, + "grad_norm": 8.708072763645559, + "learning_rate": 3.058510647329688e-06, + "loss": 2.743666410446167, + "step": 621 + }, + { + "epoch": 2.0, + "grad_norm": 11.979852138626141, + "learning_rate": 3.041271114852443e-06, + "loss": 1.1549768447875977, + "step": 622 + }, + { + "epoch": 2.0032154340836015, + "grad_norm": 14.314789124914883, + "learning_rate": 3.02405904548063e-06, + "loss": 1.0811271667480469, + "step": 623 + }, + { + "epoch": 2.0064308681672025, + "grad_norm": 9.87321228153049, + "learning_rate": 3.0068746805426318e-06, + "loss": 0.35157322883605957, + "step": 624 + }, + { + "epoch": 2.009646302250804, + "grad_norm": 9.320230936525673, + "learning_rate": 2.9897182609783905e-06, + "loss": 0.40260130167007446, + "step": 625 + }, + { + "epoch": 2.012861736334405, + "grad_norm": 9.052307743678542, + "learning_rate": 2.97259002733603e-06, + "loss": 0.870284378528595, + "step": 626 + }, + { + "epoch": 2.0160771704180065, + "grad_norm": 8.178905821906115, + "learning_rate": 2.9554902197684843e-06, + "loss": 0.4470030963420868, + "step": 627 + }, + { + "epoch": 2.0192926045016075, + "grad_norm": 12.906748407694202, + "learning_rate": 2.938419078030128e-06, + "loss": 0.6769909858703613, + "step": 628 + }, + { + "epoch": 2.022508038585209, + "grad_norm": 15.160992244136171, + "learning_rate": 2.9213768414734146e-06, + "loss": 0.5105010271072388, + "step": 629 + }, + { + "epoch": 2.0257234726688105, + "grad_norm": 9.593795396914789, + "learning_rate": 2.90436374904552e-06, + "loss": 0.5568506121635437, + "step": 630 + }, + { + "epoch": 2.0289389067524115, + "grad_norm": 8.019441658658858, + "learning_rate": 2.887380039284999e-06, + "loss": 0.21812975406646729, + "step": 631 + }, + { + "epoch": 2.032154340836013, + "grad_norm": 7.294457920425058, + "learning_rate": 2.8704259503184306e-06, + "loss": 0.5240740776062012, + "step": 632 + }, + { + "epoch": 2.035369774919614, + "grad_norm": 8.804193733481519, + "learning_rate": 2.853501719857086e-06, + "loss": 0.3466935455799103, + "step": 633 + }, + { + "epoch": 2.0385852090032155, + "grad_norm": 10.734383468470607, + "learning_rate": 2.8366075851935927e-06, + "loss": 0.6415536403656006, + "step": 634 + }, + { + "epoch": 2.0418006430868165, + "grad_norm": 7.475946116985285, + "learning_rate": 2.8197437831986085e-06, + "loss": 0.4148802161216736, + "step": 635 + }, + { + "epoch": 2.045016077170418, + "grad_norm": 11.258686396792596, + "learning_rate": 2.802910550317506e-06, + "loss": 0.7162201404571533, + "step": 636 + }, + { + "epoch": 2.0482315112540195, + "grad_norm": 9.209880063404333, + "learning_rate": 2.786108122567044e-06, + "loss": 0.5609467029571533, + "step": 637 + }, + { + "epoch": 2.0514469453376205, + "grad_norm": 10.662055158233054, + "learning_rate": 2.769336735532068e-06, + "loss": 0.4859113395214081, + "step": 638 + }, + { + "epoch": 2.054662379421222, + "grad_norm": 8.827327001805601, + "learning_rate": 2.7525966243622105e-06, + "loss": 0.35435792803764343, + "step": 639 + }, + { + "epoch": 2.057877813504823, + "grad_norm": 8.027492720872965, + "learning_rate": 2.7358880237685844e-06, + "loss": 0.2768567204475403, + "step": 640 + }, + { + "epoch": 2.0610932475884245, + "grad_norm": 10.207767409910922, + "learning_rate": 2.7192111680204957e-06, + "loss": 0.24898266792297363, + "step": 641 + }, + { + "epoch": 2.0643086816720255, + "grad_norm": 9.78068569866971, + "learning_rate": 2.7025662909421625e-06, + "loss": 0.7269777059555054, + "step": 642 + }, + { + "epoch": 2.067524115755627, + "grad_norm": 14.513005174422293, + "learning_rate": 2.685953625909432e-06, + "loss": 0.6724145412445068, + "step": 643 + }, + { + "epoch": 2.0707395498392285, + "grad_norm": 12.097896199856809, + "learning_rate": 2.6693734058465105e-06, + "loss": 0.5692760348320007, + "step": 644 + }, + { + "epoch": 2.0739549839228295, + "grad_norm": 13.857845362848131, + "learning_rate": 2.652825863222698e-06, + "loss": 0.5785061717033386, + "step": 645 + }, + { + "epoch": 2.077170418006431, + "grad_norm": 9.786540455953286, + "learning_rate": 2.636311230049125e-06, + "loss": 0.3663535416126251, + "step": 646 + }, + { + "epoch": 2.080385852090032, + "grad_norm": 12.912854147301195, + "learning_rate": 2.619829737875509e-06, + "loss": 0.4806700050830841, + "step": 647 + }, + { + "epoch": 2.0836012861736335, + "grad_norm": 8.68661115707137, + "learning_rate": 2.6033816177868954e-06, + "loss": 0.5164961814880371, + "step": 648 + }, + { + "epoch": 2.0868167202572345, + "grad_norm": 13.93536389564521, + "learning_rate": 2.5869671004004256e-06, + "loss": 0.6894420385360718, + "step": 649 + }, + { + "epoch": 2.090032154340836, + "grad_norm": 7.225443037652673, + "learning_rate": 2.5705864158621008e-06, + "loss": 0.4011325538158417, + "step": 650 + }, + { + "epoch": 2.0932475884244375, + "grad_norm": 23.325451994129086, + "learning_rate": 2.5542397938435574e-06, + "loss": 0.745391845703125, + "step": 651 + }, + { + "epoch": 2.0964630225080385, + "grad_norm": 9.205824646857016, + "learning_rate": 2.537927463538844e-06, + "loss": 0.3058035969734192, + "step": 652 + }, + { + "epoch": 2.09967845659164, + "grad_norm": 14.483317104386071, + "learning_rate": 2.521649653661209e-06, + "loss": 0.33844730257987976, + "step": 653 + }, + { + "epoch": 2.102893890675241, + "grad_norm": 10.748115186443174, + "learning_rate": 2.5054065924398934e-06, + "loss": 0.3529083728790283, + "step": 654 + }, + { + "epoch": 2.1061093247588425, + "grad_norm": 7.134237797900679, + "learning_rate": 2.4891985076169356e-06, + "loss": 0.3711613714694977, + "step": 655 + }, + { + "epoch": 2.1093247588424435, + "grad_norm": 9.555120459648053, + "learning_rate": 2.473025626443969e-06, + "loss": 0.7479931712150574, + "step": 656 + }, + { + "epoch": 2.112540192926045, + "grad_norm": 19.610879263288407, + "learning_rate": 2.4568881756790436e-06, + "loss": 1.7793715000152588, + "step": 657 + }, + { + "epoch": 2.1157556270096465, + "grad_norm": 15.31483056892348, + "learning_rate": 2.4407863815834414e-06, + "loss": 1.3851677179336548, + "step": 658 + }, + { + "epoch": 2.1189710610932475, + "grad_norm": 12.915717617411342, + "learning_rate": 2.42472046991851e-06, + "loss": 1.4731594324111938, + "step": 659 + }, + { + "epoch": 2.122186495176849, + "grad_norm": 7.978020539846432, + "learning_rate": 2.4086906659424904e-06, + "loss": 0.5832240581512451, + "step": 660 + }, + { + "epoch": 2.12540192926045, + "grad_norm": 8.57893982485209, + "learning_rate": 2.392697194407363e-06, + "loss": 0.18587762117385864, + "step": 661 + }, + { + "epoch": 2.1286173633440515, + "grad_norm": 16.040640516050228, + "learning_rate": 2.3767402795556953e-06, + "loss": 1.4967979192733765, + "step": 662 + }, + { + "epoch": 2.1318327974276525, + "grad_norm": 8.800901865323832, + "learning_rate": 2.3608201451175004e-06, + "loss": 0.6785554885864258, + "step": 663 + }, + { + "epoch": 2.135048231511254, + "grad_norm": 8.369254419222456, + "learning_rate": 2.3449370143070948e-06, + "loss": 0.3392030596733093, + "step": 664 + }, + { + "epoch": 2.1382636655948555, + "grad_norm": 13.581695588001903, + "learning_rate": 2.329091109819972e-06, + "loss": 1.3503499031066895, + "step": 665 + }, + { + "epoch": 2.1414790996784565, + "grad_norm": 9.544273690674421, + "learning_rate": 2.313282653829679e-06, + "loss": 0.4895798861980438, + "step": 666 + }, + { + "epoch": 2.144694533762058, + "grad_norm": 7.497690746653366, + "learning_rate": 2.297511867984703e-06, + "loss": 0.5282753109931946, + "step": 667 + }, + { + "epoch": 2.147909967845659, + "grad_norm": 15.108645601163957, + "learning_rate": 2.2817789734053626e-06, + "loss": 0.44572100043296814, + "step": 668 + }, + { + "epoch": 2.1511254019292605, + "grad_norm": 9.522046573188412, + "learning_rate": 2.266084190680707e-06, + "loss": 0.3825170397758484, + "step": 669 + }, + { + "epoch": 2.154340836012862, + "grad_norm": 11.927142058431379, + "learning_rate": 2.250427739865421e-06, + "loss": 0.8217453360557556, + "step": 670 + }, + { + "epoch": 2.157556270096463, + "grad_norm": 11.122450888975413, + "learning_rate": 2.23480984047675e-06, + "loss": 0.32945936918258667, + "step": 671 + }, + { + "epoch": 2.1607717041800645, + "grad_norm": 10.060653422202963, + "learning_rate": 2.219230711491406e-06, + "loss": 0.3554953634738922, + "step": 672 + }, + { + "epoch": 2.1639871382636655, + "grad_norm": 10.224259390532415, + "learning_rate": 2.2036905713425104e-06, + "loss": 0.7275658845901489, + "step": 673 + }, + { + "epoch": 2.167202572347267, + "grad_norm": 9.307333936478546, + "learning_rate": 2.1881896379165253e-06, + "loss": 0.5208015441894531, + "step": 674 + }, + { + "epoch": 2.170418006430868, + "grad_norm": 16.480106084286305, + "learning_rate": 2.172728128550199e-06, + "loss": 0.8410961627960205, + "step": 675 + }, + { + "epoch": 2.1736334405144695, + "grad_norm": 23.43109332519012, + "learning_rate": 2.1573062600275217e-06, + "loss": 0.9196676015853882, + "step": 676 + }, + { + "epoch": 2.176848874598071, + "grad_norm": 8.075311133528524, + "learning_rate": 2.1419242485766834e-06, + "loss": 0.4561595022678375, + "step": 677 + }, + { + "epoch": 2.180064308681672, + "grad_norm": 6.991604551367555, + "learning_rate": 2.126582309867041e-06, + "loss": 0.2470824271440506, + "step": 678 + }, + { + "epoch": 2.1832797427652735, + "grad_norm": 11.89113431006695, + "learning_rate": 2.1112806590061006e-06, + "loss": 0.366765558719635, + "step": 679 + }, + { + "epoch": 2.1864951768488745, + "grad_norm": 7.200037824647487, + "learning_rate": 2.0960195105364935e-06, + "loss": 0.11232413351535797, + "step": 680 + }, + { + "epoch": 2.189710610932476, + "grad_norm": 15.235602952570773, + "learning_rate": 2.080799078432972e-06, + "loss": 0.5797847509384155, + "step": 681 + }, + { + "epoch": 2.192926045016077, + "grad_norm": 12.082768147438976, + "learning_rate": 2.0656195760994104e-06, + "loss": 0.3955646753311157, + "step": 682 + }, + { + "epoch": 2.1961414790996785, + "grad_norm": 10.182906221123625, + "learning_rate": 2.0504812163658104e-06, + "loss": 0.27037519216537476, + "step": 683 + }, + { + "epoch": 2.19935691318328, + "grad_norm": 16.725089320535528, + "learning_rate": 2.0353842114853194e-06, + "loss": 1.0738942623138428, + "step": 684 + }, + { + "epoch": 2.202572347266881, + "grad_norm": 14.80956824571125, + "learning_rate": 2.020328773131252e-06, + "loss": 0.48105546832084656, + "step": 685 + }, + { + "epoch": 2.2057877813504825, + "grad_norm": 14.488092854169647, + "learning_rate": 2.005315112394122e-06, + "loss": 0.37764203548431396, + "step": 686 + }, + { + "epoch": 2.2090032154340835, + "grad_norm": 35.11784903119427, + "learning_rate": 1.990343439778691e-06, + "loss": 1.3033102750778198, + "step": 687 + }, + { + "epoch": 2.212218649517685, + "grad_norm": 11.32083821956754, + "learning_rate": 1.9754139652010025e-06, + "loss": 0.3128071427345276, + "step": 688 + }, + { + "epoch": 2.215434083601286, + "grad_norm": 7.739863787904974, + "learning_rate": 1.9605268979854493e-06, + "loss": 0.25044938921928406, + "step": 689 + }, + { + "epoch": 2.2186495176848875, + "grad_norm": 9.336795568467322, + "learning_rate": 1.9456824468618365e-06, + "loss": 0.7681007981300354, + "step": 690 + }, + { + "epoch": 2.221864951768489, + "grad_norm": 8.47441592443782, + "learning_rate": 1.9308808199624518e-06, + "loss": 0.8675619959831238, + "step": 691 + }, + { + "epoch": 2.22508038585209, + "grad_norm": 13.531497358422932, + "learning_rate": 1.9161222248191515e-06, + "loss": 0.3523237109184265, + "step": 692 + }, + { + "epoch": 2.2282958199356915, + "grad_norm": 7.7686044200353646, + "learning_rate": 1.9014068683604475e-06, + "loss": 0.5673654079437256, + "step": 693 + }, + { + "epoch": 2.2315112540192925, + "grad_norm": 12.607843632159877, + "learning_rate": 1.8867349569086064e-06, + "loss": 0.4956747889518738, + "step": 694 + }, + { + "epoch": 2.234726688102894, + "grad_norm": 20.83086061852303, + "learning_rate": 1.8721066961767626e-06, + "loss": 1.0982662439346313, + "step": 695 + }, + { + "epoch": 2.237942122186495, + "grad_norm": 28.0587449883014, + "learning_rate": 1.8575222912660224e-06, + "loss": 0.7824825644493103, + "step": 696 + }, + { + "epoch": 2.2411575562700965, + "grad_norm": 13.190430546984835, + "learning_rate": 1.8429819466625993e-06, + "loss": 0.9699738025665283, + "step": 697 + }, + { + "epoch": 2.244372990353698, + "grad_norm": 9.095739529876795, + "learning_rate": 1.8284858662349391e-06, + "loss": 0.2911728620529175, + "step": 698 + }, + { + "epoch": 2.247588424437299, + "grad_norm": 10.728396018637426, + "learning_rate": 1.8140342532308675e-06, + "loss": 0.21695515513420105, + "step": 699 + }, + { + "epoch": 2.2508038585209005, + "grad_norm": 11.998687234131197, + "learning_rate": 1.7996273102747363e-06, + "loss": 0.5445791482925415, + "step": 700 + }, + { + "epoch": 2.2540192926045015, + "grad_norm": 11.429548764717456, + "learning_rate": 1.7852652393645842e-06, + "loss": 0.5194580554962158, + "step": 701 + }, + { + "epoch": 2.257234726688103, + "grad_norm": 7.56653505390186, + "learning_rate": 1.7709482418693036e-06, + "loss": 0.23896172642707825, + "step": 702 + }, + { + "epoch": 2.260450160771704, + "grad_norm": 9.62095075841344, + "learning_rate": 1.7566765185258205e-06, + "loss": 0.35497206449508667, + "step": 703 + }, + { + "epoch": 2.2636655948553055, + "grad_norm": 8.254578062437453, + "learning_rate": 1.7424502694362755e-06, + "loss": 0.5100143551826477, + "step": 704 + }, + { + "epoch": 2.266881028938907, + "grad_norm": 9.058208468637773, + "learning_rate": 1.7282696940652188e-06, + "loss": 0.3519476652145386, + "step": 705 + }, + { + "epoch": 2.270096463022508, + "grad_norm": 10.161898375908322, + "learning_rate": 1.714134991236817e-06, + "loss": 0.317427396774292, + "step": 706 + }, + { + "epoch": 2.2733118971061095, + "grad_norm": 13.112579685570976, + "learning_rate": 1.7000463591320621e-06, + "loss": 0.2504899799823761, + "step": 707 + }, + { + "epoch": 2.2765273311897105, + "grad_norm": 7.423757130841184, + "learning_rate": 1.6860039952859941e-06, + "loss": 0.6729331612586975, + "step": 708 + }, + { + "epoch": 2.279742765273312, + "grad_norm": 10.935414242351301, + "learning_rate": 1.672008096584931e-06, + "loss": 0.2858375310897827, + "step": 709 + }, + { + "epoch": 2.282958199356913, + "grad_norm": 7.614378612646665, + "learning_rate": 1.658058859263708e-06, + "loss": 0.2584810256958008, + "step": 710 + }, + { + "epoch": 2.2861736334405145, + "grad_norm": 9.020570310988946, + "learning_rate": 1.64415647890293e-06, + "loss": 0.6542217135429382, + "step": 711 + }, + { + "epoch": 2.289389067524116, + "grad_norm": 8.199722638862658, + "learning_rate": 1.6303011504262223e-06, + "loss": 0.7301985025405884, + "step": 712 + }, + { + "epoch": 2.292604501607717, + "grad_norm": 16.272283444338548, + "learning_rate": 1.6164930680975021e-06, + "loss": 0.5628085136413574, + "step": 713 + }, + { + "epoch": 2.2958199356913185, + "grad_norm": 10.028333472377934, + "learning_rate": 1.6027324255182547e-06, + "loss": 0.6142044067382812, + "step": 714 + }, + { + "epoch": 2.2990353697749195, + "grad_norm": 8.45235295984565, + "learning_rate": 1.5890194156248178e-06, + "loss": 0.1730177402496338, + "step": 715 + }, + { + "epoch": 2.302250803858521, + "grad_norm": 19.06820769510073, + "learning_rate": 1.5753542306856774e-06, + "loss": 0.44994986057281494, + "step": 716 + }, + { + "epoch": 2.305466237942122, + "grad_norm": 13.015925232226362, + "learning_rate": 1.5617370622987703e-06, + "loss": 0.4153675436973572, + "step": 717 + }, + { + "epoch": 2.3086816720257235, + "grad_norm": 15.815386750824745, + "learning_rate": 1.548168101388799e-06, + "loss": 0.7933076620101929, + "step": 718 + }, + { + "epoch": 2.311897106109325, + "grad_norm": 6.8033416899165475, + "learning_rate": 1.5346475382045578e-06, + "loss": 0.2861023545265198, + "step": 719 + }, + { + "epoch": 2.315112540192926, + "grad_norm": 7.586806061312669, + "learning_rate": 1.5211755623162588e-06, + "loss": 0.21383363008499146, + "step": 720 + }, + { + "epoch": 2.3183279742765275, + "grad_norm": 8.35896871658916, + "learning_rate": 1.507752362612878e-06, + "loss": 0.34175100922584534, + "step": 721 + }, + { + "epoch": 2.3215434083601285, + "grad_norm": 11.144091740266603, + "learning_rate": 1.4943781272995073e-06, + "loss": 0.6520302295684814, + "step": 722 + }, + { + "epoch": 2.32475884244373, + "grad_norm": 9.891660748247679, + "learning_rate": 1.481053043894713e-06, + "loss": 0.47313952445983887, + "step": 723 + }, + { + "epoch": 2.327974276527331, + "grad_norm": 10.01954348978427, + "learning_rate": 1.467777299227911e-06, + "loss": 0.43662163615226746, + "step": 724 + }, + { + "epoch": 2.3311897106109325, + "grad_norm": 11.031433385009539, + "learning_rate": 1.4545510794367413e-06, + "loss": 0.8428820371627808, + "step": 725 + }, + { + "epoch": 2.334405144694534, + "grad_norm": 8.663154574713626, + "learning_rate": 1.4413745699644633e-06, + "loss": 0.3453049063682556, + "step": 726 + }, + { + "epoch": 2.337620578778135, + "grad_norm": 7.803079349836385, + "learning_rate": 1.4282479555573559e-06, + "loss": 0.2745356261730194, + "step": 727 + }, + { + "epoch": 2.3408360128617365, + "grad_norm": 10.72034771606454, + "learning_rate": 1.4151714202621214e-06, + "loss": 0.5829728245735168, + "step": 728 + }, + { + "epoch": 2.3440514469453375, + "grad_norm": 14.559259818421047, + "learning_rate": 1.4021451474233111e-06, + "loss": 0.599088191986084, + "step": 729 + }, + { + "epoch": 2.347266881028939, + "grad_norm": 11.885109180133032, + "learning_rate": 1.389169319680752e-06, + "loss": 0.6071078777313232, + "step": 730 + }, + { + "epoch": 2.35048231511254, + "grad_norm": 7.512662309336336, + "learning_rate": 1.3762441189669855e-06, + "loss": 0.7940813302993774, + "step": 731 + }, + { + "epoch": 2.3536977491961415, + "grad_norm": 9.282805302910258, + "learning_rate": 1.363369726504719e-06, + "loss": 0.6212410926818848, + "step": 732 + }, + { + "epoch": 2.356913183279743, + "grad_norm": 16.59514995951129, + "learning_rate": 1.3505463228042814e-06, + "loss": 0.7771339416503906, + "step": 733 + }, + { + "epoch": 2.360128617363344, + "grad_norm": 8.019573283607059, + "learning_rate": 1.337774087661095e-06, + "loss": 0.3336787819862366, + "step": 734 + }, + { + "epoch": 2.3633440514469455, + "grad_norm": 9.3480016042164, + "learning_rate": 1.3250532001531568e-06, + "loss": 0.3283434510231018, + "step": 735 + }, + { + "epoch": 2.3665594855305465, + "grad_norm": 15.615069116375588, + "learning_rate": 1.31238383863852e-06, + "loss": 0.747290849685669, + "step": 736 + }, + { + "epoch": 2.369774919614148, + "grad_norm": 11.478964562122078, + "learning_rate": 1.2997661807528011e-06, + "loss": 0.4145023226737976, + "step": 737 + }, + { + "epoch": 2.372990353697749, + "grad_norm": 9.674102610648761, + "learning_rate": 1.2872004034066843e-06, + "loss": 0.6085692644119263, + "step": 738 + }, + { + "epoch": 2.3762057877813505, + "grad_norm": 9.158481184369425, + "learning_rate": 1.2746866827834443e-06, + "loss": 0.35022085905075073, + "step": 739 + }, + { + "epoch": 2.379421221864952, + "grad_norm": 10.878685805468756, + "learning_rate": 1.2622251943364733e-06, + "loss": 0.3418879210948944, + "step": 740 + }, + { + "epoch": 2.382636655948553, + "grad_norm": 14.543864763023729, + "learning_rate": 1.2498161127868236e-06, + "loss": 0.48997265100479126, + "step": 741 + }, + { + "epoch": 2.3858520900321545, + "grad_norm": 10.876583627678352, + "learning_rate": 1.237459612120755e-06, + "loss": 0.5536065697669983, + "step": 742 + }, + { + "epoch": 2.3890675241157555, + "grad_norm": 20.351717332652505, + "learning_rate": 1.2251558655873003e-06, + "loss": 0.4042523205280304, + "step": 743 + }, + { + "epoch": 2.392282958199357, + "grad_norm": 8.13192706557856, + "learning_rate": 1.2129050456958296e-06, + "loss": 0.3217351734638214, + "step": 744 + }, + { + "epoch": 2.395498392282958, + "grad_norm": 7.539239428048716, + "learning_rate": 1.2007073242136358e-06, + "loss": 0.7380541563034058, + "step": 745 + }, + { + "epoch": 2.3987138263665595, + "grad_norm": 10.893419775532118, + "learning_rate": 1.1885628721635256e-06, + "loss": 1.3607124090194702, + "step": 746 + }, + { + "epoch": 2.401929260450161, + "grad_norm": 8.269742975582927, + "learning_rate": 1.176471859821421e-06, + "loss": 0.5945987701416016, + "step": 747 + }, + { + "epoch": 2.405144694533762, + "grad_norm": 10.313373636714106, + "learning_rate": 1.1644344567139716e-06, + "loss": 0.44915276765823364, + "step": 748 + }, + { + "epoch": 2.4083601286173635, + "grad_norm": 9.146019028304934, + "learning_rate": 1.1524508316161799e-06, + "loss": 0.38859468698501587, + "step": 749 + }, + { + "epoch": 2.4115755627009645, + "grad_norm": 13.294635461287648, + "learning_rate": 1.1405211525490307e-06, + "loss": 0.20568466186523438, + "step": 750 + }, + { + "epoch": 2.414790996784566, + "grad_norm": 11.70213362190279, + "learning_rate": 1.1286455867771422e-06, + "loss": 0.6367174983024597, + "step": 751 + }, + { + "epoch": 2.418006430868167, + "grad_norm": 10.461363981706414, + "learning_rate": 1.1168243008064123e-06, + "loss": 0.30549776554107666, + "step": 752 + }, + { + "epoch": 2.4212218649517685, + "grad_norm": 10.69031162106857, + "learning_rate": 1.1050574603816905e-06, + "loss": 0.37557756900787354, + "step": 753 + }, + { + "epoch": 2.42443729903537, + "grad_norm": 7.379872280064574, + "learning_rate": 1.0933452304844505e-06, + "loss": 0.5297196507453918, + "step": 754 + }, + { + "epoch": 2.427652733118971, + "grad_norm": 9.7634723677066, + "learning_rate": 1.0816877753304777e-06, + "loss": 0.274686723947525, + "step": 755 + }, + { + "epoch": 2.4308681672025725, + "grad_norm": 7.237746575511637, + "learning_rate": 1.0700852583675708e-06, + "loss": 0.33781081438064575, + "step": 756 + }, + { + "epoch": 2.4340836012861735, + "grad_norm": 10.197522367598072, + "learning_rate": 1.0585378422732435e-06, + "loss": 0.3239028751850128, + "step": 757 + }, + { + "epoch": 2.437299035369775, + "grad_norm": 9.392166371827608, + "learning_rate": 1.0470456889524473e-06, + "loss": 0.34594273567199707, + "step": 758 + }, + { + "epoch": 2.440514469453376, + "grad_norm": 6.796254455681624, + "learning_rate": 1.0356089595353008e-06, + "loss": 0.21343210339546204, + "step": 759 + }, + { + "epoch": 2.4437299035369775, + "grad_norm": 7.480509218493879, + "learning_rate": 1.0242278143748307e-06, + "loss": 0.49799197912216187, + "step": 760 + }, + { + "epoch": 2.446945337620579, + "grad_norm": 8.95241961534226, + "learning_rate": 1.012902413044725e-06, + "loss": 0.3868298828601837, + "step": 761 + }, + { + "epoch": 2.45016077170418, + "grad_norm": 10.42651529337544, + "learning_rate": 1.0016329143370929e-06, + "loss": 0.264824241399765, + "step": 762 + }, + { + "epoch": 2.4533762057877815, + "grad_norm": 9.17833598375061, + "learning_rate": 9.904194762602382e-07, + "loss": 0.3985700011253357, + "step": 763 + }, + { + "epoch": 2.4565916398713825, + "grad_norm": 9.674121185768858, + "learning_rate": 9.792622560364467e-07, + "loss": 0.17972898483276367, + "step": 764 + }, + { + "epoch": 2.459807073954984, + "grad_norm": 8.206523942923813, + "learning_rate": 9.681614100997806e-07, + "loss": 0.3820546865463257, + "step": 765 + }, + { + "epoch": 2.463022508038585, + "grad_norm": 8.778341452335091, + "learning_rate": 9.57117094093884e-07, + "loss": 0.13588829338550568, + "step": 766 + }, + { + "epoch": 2.4662379421221865, + "grad_norm": 12.703644433268686, + "learning_rate": 9.46129462869802e-07, + "loss": 0.2940727472305298, + "step": 767 + }, + { + "epoch": 2.469453376205788, + "grad_norm": 10.30882189666681, + "learning_rate": 9.351986704838084e-07, + "loss": 0.6208051443099976, + "step": 768 + }, + { + "epoch": 2.472668810289389, + "grad_norm": 9.88354985018698, + "learning_rate": 9.243248701952489e-07, + "loss": 0.9009281396865845, + "step": 769 + }, + { + "epoch": 2.4758842443729905, + "grad_norm": 7.057277121900955, + "learning_rate": 9.135082144643869e-07, + "loss": 0.3825221061706543, + "step": 770 + }, + { + "epoch": 2.4790996784565915, + "grad_norm": 13.283010516967643, + "learning_rate": 9.0274885495027e-07, + "loss": 0.4601898789405823, + "step": 771 + }, + { + "epoch": 2.482315112540193, + "grad_norm": 9.034062537915768, + "learning_rate": 8.92046942508602e-07, + "loss": 0.31752362847328186, + "step": 772 + }, + { + "epoch": 2.485530546623794, + "grad_norm": 9.886229624024415, + "learning_rate": 8.814026271896275e-07, + "loss": 0.9294736981391907, + "step": 773 + }, + { + "epoch": 2.4887459807073955, + "grad_norm": 13.589832734358895, + "learning_rate": 8.708160582360303e-07, + "loss": 0.324219286441803, + "step": 774 + }, + { + "epoch": 2.491961414790997, + "grad_norm": 11.552870172052325, + "learning_rate": 8.602873840808379e-07, + "loss": 0.9777847528457642, + "step": 775 + }, + { + "epoch": 2.495176848874598, + "grad_norm": 14.788050425785567, + "learning_rate": 8.498167523453404e-07, + "loss": 0.5360197424888611, + "step": 776 + }, + { + "epoch": 2.4983922829581995, + "grad_norm": 13.424208182029288, + "learning_rate": 8.394043098370275e-07, + "loss": 0.31830108165740967, + "step": 777 + }, + { + "epoch": 2.5016077170418005, + "grad_norm": 20.795163896337453, + "learning_rate": 8.290502025475183e-07, + "loss": 0.9196930527687073, + "step": 778 + }, + { + "epoch": 2.504823151125402, + "grad_norm": 13.342352710693937, + "learning_rate": 8.187545756505244e-07, + "loss": 0.5551115870475769, + "step": 779 + }, + { + "epoch": 2.508038585209003, + "grad_norm": 10.381356116441133, + "learning_rate": 8.085175734998091e-07, + "loss": 0.6043642163276672, + "step": 780 + }, + { + "epoch": 2.5112540192926045, + "grad_norm": 6.062655557368801, + "learning_rate": 7.98339339627166e-07, + "loss": 0.22657924890518188, + "step": 781 + }, + { + "epoch": 2.514469453376206, + "grad_norm": 10.807014185724375, + "learning_rate": 7.882200167404047e-07, + "loss": 0.259809672832489, + "step": 782 + }, + { + "epoch": 2.517684887459807, + "grad_norm": 7.872967554652684, + "learning_rate": 7.781597467213514e-07, + "loss": 0.4243828058242798, + "step": 783 + }, + { + "epoch": 2.5209003215434085, + "grad_norm": 14.382598474640337, + "learning_rate": 7.681586706238586e-07, + "loss": 0.5216907858848572, + "step": 784 + }, + { + "epoch": 2.5241157556270095, + "grad_norm": 6.741881997512629, + "learning_rate": 7.582169286718305e-07, + "loss": 0.260081946849823, + "step": 785 + }, + { + "epoch": 2.527331189710611, + "grad_norm": 7.491622137828397, + "learning_rate": 7.483346602572505e-07, + "loss": 0.3229532837867737, + "step": 786 + }, + { + "epoch": 2.530546623794212, + "grad_norm": 12.51190036363299, + "learning_rate": 7.385120039382326e-07, + "loss": 1.6990151405334473, + "step": 787 + }, + { + "epoch": 2.5337620578778135, + "grad_norm": 12.102101725870327, + "learning_rate": 7.287490974370759e-07, + "loss": 0.46230068802833557, + "step": 788 + }, + { + "epoch": 2.536977491961415, + "grad_norm": 7.3789531863557, + "learning_rate": 7.190460776383351e-07, + "loss": 0.1410602331161499, + "step": 789 + }, + { + "epoch": 2.540192926045016, + "grad_norm": 8.791343590023278, + "learning_rate": 7.094030805869001e-07, + "loss": 0.1462668925523758, + "step": 790 + }, + { + "epoch": 2.5434083601286175, + "grad_norm": 11.499145842643497, + "learning_rate": 6.998202414860894e-07, + "loss": 0.4931030869483948, + "step": 791 + }, + { + "epoch": 2.5466237942122185, + "grad_norm": 8.890241677567976, + "learning_rate": 6.902976946957518e-07, + "loss": 0.4790411591529846, + "step": 792 + }, + { + "epoch": 2.54983922829582, + "grad_norm": 7.357169662653574, + "learning_rate": 6.808355737303895e-07, + "loss": 0.25933071970939636, + "step": 793 + }, + { + "epoch": 2.553054662379421, + "grad_norm": 18.008951919388966, + "learning_rate": 6.71434011257277e-07, + "loss": 0.7769885063171387, + "step": 794 + }, + { + "epoch": 2.5562700964630225, + "grad_norm": 15.216768610586033, + "learning_rate": 6.620931390946078e-07, + "loss": 0.9100818634033203, + "step": 795 + }, + { + "epoch": 2.559485530546624, + "grad_norm": 10.749117948962583, + "learning_rate": 6.528130882096418e-07, + "loss": 0.31192898750305176, + "step": 796 + }, + { + "epoch": 2.562700964630225, + "grad_norm": 11.371166952776898, + "learning_rate": 6.435939887168718e-07, + "loss": 0.29440587759017944, + "step": 797 + }, + { + "epoch": 2.5659163987138265, + "grad_norm": 9.195291843958517, + "learning_rate": 6.344359698761998e-07, + "loss": 0.3736717402935028, + "step": 798 + }, + { + "epoch": 2.5691318327974275, + "grad_norm": 13.644043776235835, + "learning_rate": 6.253391600911213e-07, + "loss": 0.6370671391487122, + "step": 799 + }, + { + "epoch": 2.572347266881029, + "grad_norm": 17.84760553493272, + "learning_rate": 6.163036869069267e-07, + "loss": 1.8662174940109253, + "step": 800 + }, + { + "epoch": 2.57556270096463, + "grad_norm": 8.898342133449939, + "learning_rate": 6.073296770089159e-07, + "loss": 0.4110758900642395, + "step": 801 + }, + { + "epoch": 2.5787781350482315, + "grad_norm": 13.204596578832312, + "learning_rate": 5.984172562206164e-07, + "loss": 0.26106947660446167, + "step": 802 + }, + { + "epoch": 2.581993569131833, + "grad_norm": 10.376179325495649, + "learning_rate": 5.895665495020242e-07, + "loss": 0.25730016827583313, + "step": 803 + }, + { + "epoch": 2.585209003215434, + "grad_norm": 13.684680313080825, + "learning_rate": 5.807776809478472e-07, + "loss": 0.3436719477176666, + "step": 804 + }, + { + "epoch": 2.5884244372990355, + "grad_norm": 27.176558442062554, + "learning_rate": 5.720507737857706e-07, + "loss": 1.1672216653823853, + "step": 805 + }, + { + "epoch": 2.5916398713826365, + "grad_norm": 12.887853458109294, + "learning_rate": 5.633859503747241e-07, + "loss": 0.5256634950637817, + "step": 806 + }, + { + "epoch": 2.594855305466238, + "grad_norm": 8.96694135103656, + "learning_rate": 5.547833322031693e-07, + "loss": 0.366617888212204, + "step": 807 + }, + { + "epoch": 2.598070739549839, + "grad_norm": 9.412357022980187, + "learning_rate": 5.462430398873947e-07, + "loss": 0.27508530020713806, + "step": 808 + }, + { + "epoch": 2.6012861736334405, + "grad_norm": 9.133709325093466, + "learning_rate": 5.377651931698275e-07, + "loss": 0.36251911520957947, + "step": 809 + }, + { + "epoch": 2.604501607717042, + "grad_norm": 9.848470581534286, + "learning_rate": 5.293499109173517e-07, + "loss": 0.33935314416885376, + "step": 810 + }, + { + "epoch": 2.607717041800643, + "grad_norm": 14.662987456896733, + "learning_rate": 5.209973111196404e-07, + "loss": 0.4906863570213318, + "step": 811 + }, + { + "epoch": 2.6109324758842445, + "grad_norm": 26.400737908261828, + "learning_rate": 5.127075108875051e-07, + "loss": 1.5506454706192017, + "step": 812 + }, + { + "epoch": 2.6141479099678455, + "grad_norm": 12.023484760258915, + "learning_rate": 5.044806264512525e-07, + "loss": 0.2666461765766144, + "step": 813 + }, + { + "epoch": 2.617363344051447, + "grad_norm": 12.371598029084595, + "learning_rate": 4.963167731590535e-07, + "loss": 0.6199164986610413, + "step": 814 + }, + { + "epoch": 2.620578778135048, + "grad_norm": 9.073073795027149, + "learning_rate": 4.88216065475327e-07, + "loss": 0.2723952531814575, + "step": 815 + }, + { + "epoch": 2.6237942122186495, + "grad_norm": 8.758785690157431, + "learning_rate": 4.801786169791339e-07, + "loss": 0.46878230571746826, + "step": 816 + }, + { + "epoch": 2.627009646302251, + "grad_norm": 9.483949934382007, + "learning_rate": 4.7220454036258803e-07, + "loss": 0.36121273040771484, + "step": 817 + }, + { + "epoch": 2.630225080385852, + "grad_norm": 9.850946982616728, + "learning_rate": 4.642939474292713e-07, + "loss": 0.38482001423835754, + "step": 818 + }, + { + "epoch": 2.6334405144694535, + "grad_norm": 14.43891373480777, + "learning_rate": 4.5644694909266984e-07, + "loss": 0.6713676452636719, + "step": 819 + }, + { + "epoch": 2.6366559485530545, + "grad_norm": 10.446474380940323, + "learning_rate": 4.4866365537461543e-07, + "loss": 0.4145408868789673, + "step": 820 + }, + { + "epoch": 2.639871382636656, + "grad_norm": 7.984266402601969, + "learning_rate": 4.4094417540374745e-07, + "loss": 0.18469397723674774, + "step": 821 + }, + { + "epoch": 2.643086816720257, + "grad_norm": 12.587060023396914, + "learning_rate": 4.332886174139794e-07, + "loss": 0.9125012159347534, + "step": 822 + }, + { + "epoch": 2.6463022508038585, + "grad_norm": 7.989834221712064, + "learning_rate": 4.2569708874298123e-07, + "loss": 0.7010893225669861, + "step": 823 + }, + { + "epoch": 2.64951768488746, + "grad_norm": 10.71200284540403, + "learning_rate": 4.1816969583067526e-07, + "loss": 0.22042930126190186, + "step": 824 + }, + { + "epoch": 2.652733118971061, + "grad_norm": 8.632776088215412, + "learning_rate": 4.1070654421774767e-07, + "loss": 0.8344212770462036, + "step": 825 + }, + { + "epoch": 2.6559485530546625, + "grad_norm": 16.33719255777251, + "learning_rate": 4.0330773854416025e-07, + "loss": 0.3559122681617737, + "step": 826 + }, + { + "epoch": 2.6591639871382635, + "grad_norm": 11.020551860792871, + "learning_rate": 3.959733825476908e-07, + "loss": 0.30302202701568604, + "step": 827 + }, + { + "epoch": 2.662379421221865, + "grad_norm": 11.593833422236477, + "learning_rate": 3.8870357906247434e-07, + "loss": 0.37081068754196167, + "step": 828 + }, + { + "epoch": 2.665594855305466, + "grad_norm": 10.818672631896604, + "learning_rate": 3.814984300175645e-07, + "loss": 0.2318996787071228, + "step": 829 + }, + { + "epoch": 2.6688102893890675, + "grad_norm": 10.490748477323145, + "learning_rate": 3.743580364355004e-07, + "loss": 0.27689433097839355, + "step": 830 + }, + { + "epoch": 2.672025723472669, + "grad_norm": 7.557342296482055, + "learning_rate": 3.672824984308948e-07, + "loss": 0.3493078351020813, + "step": 831 + }, + { + "epoch": 2.67524115755627, + "grad_norm": 19.55126309686112, + "learning_rate": 3.602719152090256e-07, + "loss": 0.7462046146392822, + "step": 832 + }, + { + "epoch": 2.6784565916398715, + "grad_norm": 9.221894655944931, + "learning_rate": 3.533263850644508e-07, + "loss": 0.6614691615104675, + "step": 833 + }, + { + "epoch": 2.6816720257234725, + "grad_norm": 9.211741634507469, + "learning_rate": 3.464460053796237e-07, + "loss": 0.29138296842575073, + "step": 834 + }, + { + "epoch": 2.684887459807074, + "grad_norm": 9.734125500615347, + "learning_rate": 3.396308726235326e-07, + "loss": 0.5526795387268066, + "step": 835 + }, + { + "epoch": 2.688102893890675, + "grad_norm": 10.836649082695137, + "learning_rate": 3.328810823503448e-07, + "loss": 0.5932884812355042, + "step": 836 + }, + { + "epoch": 2.6913183279742765, + "grad_norm": 10.19243735016382, + "learning_rate": 3.2619672919807054e-07, + "loss": 0.6854689121246338, + "step": 837 + }, + { + "epoch": 2.694533762057878, + "grad_norm": 14.297447936460108, + "learning_rate": 3.195779068872318e-07, + "loss": 0.2756684422492981, + "step": 838 + }, + { + "epoch": 2.697749196141479, + "grad_norm": 10.266269542405677, + "learning_rate": 3.1302470821955143e-07, + "loss": 0.30567488074302673, + "step": 839 + }, + { + "epoch": 2.7009646302250805, + "grad_norm": 10.298881489172567, + "learning_rate": 3.0653722507665016e-07, + "loss": 0.8650332093238831, + "step": 840 + }, + { + "epoch": 2.7041800643086815, + "grad_norm": 12.33005217016299, + "learning_rate": 3.0011554841876236e-07, + "loss": 0.7738863229751587, + "step": 841 + }, + { + "epoch": 2.707395498392283, + "grad_norm": 13.788201933128668, + "learning_rate": 2.9375976828345254e-07, + "loss": 0.4506850838661194, + "step": 842 + }, + { + "epoch": 2.710610932475884, + "grad_norm": 23.437927191055728, + "learning_rate": 2.8746997378436117e-07, + "loss": 0.9781379699707031, + "step": 843 + }, + { + "epoch": 2.7138263665594855, + "grad_norm": 8.156853967408997, + "learning_rate": 2.8124625310995136e-07, + "loss": 0.36554017663002014, + "step": 844 + }, + { + "epoch": 2.717041800643087, + "grad_norm": 36.113062108363714, + "learning_rate": 2.750886935222724e-07, + "loss": 1.2861707210540771, + "step": 845 + }, + { + "epoch": 2.720257234726688, + "grad_norm": 9.707027837669324, + "learning_rate": 2.689973813557367e-07, + "loss": 0.30182671546936035, + "step": 846 + }, + { + "epoch": 2.7234726688102895, + "grad_norm": 8.613290992443563, + "learning_rate": 2.6297240201591025e-07, + "loss": 0.3068884015083313, + "step": 847 + }, + { + "epoch": 2.7266881028938905, + "grad_norm": 10.585291997817569, + "learning_rate": 2.5701383997831284e-07, + "loss": 0.3039591610431671, + "step": 848 + }, + { + "epoch": 2.729903536977492, + "grad_norm": 16.036360689461798, + "learning_rate": 2.5112177878723833e-07, + "loss": 0.4938279986381531, + "step": 849 + }, + { + "epoch": 2.733118971061093, + "grad_norm": 7.7246780557178045, + "learning_rate": 2.452963010545767e-07, + "loss": 0.3451383709907532, + "step": 850 + }, + { + "epoch": 2.7363344051446945, + "grad_norm": 11.562492840025135, + "learning_rate": 2.3953748845866096e-07, + "loss": 0.29905378818511963, + "step": 851 + }, + { + "epoch": 2.739549839228296, + "grad_norm": 13.062794280899158, + "learning_rate": 2.3384542174311908e-07, + "loss": 0.3758173882961273, + "step": 852 + }, + { + "epoch": 2.742765273311897, + "grad_norm": 10.466966734307, + "learning_rate": 2.282201807157436e-07, + "loss": 0.7077566981315613, + "step": 853 + }, + { + "epoch": 2.7459807073954985, + "grad_norm": 23.68854074954466, + "learning_rate": 2.2266184424737214e-07, + "loss": 0.5676212310791016, + "step": 854 + }, + { + "epoch": 2.7491961414790995, + "grad_norm": 10.931312601197227, + "learning_rate": 2.1717049027078106e-07, + "loss": 0.3098456859588623, + "step": 855 + }, + { + "epoch": 2.752411575562701, + "grad_norm": 13.915496014810294, + "learning_rate": 2.1174619577959355e-07, + "loss": 0.5395633578300476, + "step": 856 + }, + { + "epoch": 2.755627009646302, + "grad_norm": 11.883754001722993, + "learning_rate": 2.0638903682719814e-07, + "loss": 0.67840975522995, + "step": 857 + }, + { + "epoch": 2.7588424437299035, + "grad_norm": 9.072301219104835, + "learning_rate": 2.010990885256875e-07, + "loss": 0.22875866293907166, + "step": 858 + }, + { + "epoch": 2.762057877813505, + "grad_norm": 11.388523965575855, + "learning_rate": 1.958764250447981e-07, + "loss": 0.28753116726875305, + "step": 859 + }, + { + "epoch": 2.765273311897106, + "grad_norm": 10.42746808832713, + "learning_rate": 1.9072111961087546e-07, + "loss": 0.49607959389686584, + "step": 860 + }, + { + "epoch": 2.7684887459807075, + "grad_norm": 8.55806623128332, + "learning_rate": 1.856332445058462e-07, + "loss": 0.503407895565033, + "step": 861 + }, + { + "epoch": 2.7717041800643085, + "grad_norm": 12.297262248101239, + "learning_rate": 1.8061287106620308e-07, + "loss": 0.37309232354164124, + "step": 862 + }, + { + "epoch": 2.77491961414791, + "grad_norm": 6.834698795090784, + "learning_rate": 1.7566006968200712e-07, + "loss": 0.15657085180282593, + "step": 863 + }, + { + "epoch": 2.778135048231511, + "grad_norm": 11.42404497830935, + "learning_rate": 1.7077490979589996e-07, + "loss": 0.9316859245300293, + "step": 864 + }, + { + "epoch": 2.7813504823151125, + "grad_norm": 24.35923106389002, + "learning_rate": 1.6595745990212686e-07, + "loss": 0.5050212144851685, + "step": 865 + }, + { + "epoch": 2.784565916398714, + "grad_norm": 8.48025747992941, + "learning_rate": 1.6120778754558418e-07, + "loss": 0.2638796865940094, + "step": 866 + }, + { + "epoch": 2.787781350482315, + "grad_norm": 10.634566725803458, + "learning_rate": 1.5652595932086346e-07, + "loss": 0.3945295214653015, + "step": 867 + }, + { + "epoch": 2.7909967845659165, + "grad_norm": 10.867738538207686, + "learning_rate": 1.519120408713237e-07, + "loss": 0.7213743925094604, + "step": 868 + }, + { + "epoch": 2.7942122186495175, + "grad_norm": 14.418328198181541, + "learning_rate": 1.4736609688816738e-07, + "loss": 0.3248399794101715, + "step": 869 + }, + { + "epoch": 2.797427652733119, + "grad_norm": 8.419980675343098, + "learning_rate": 1.42888191109537e-07, + "loss": 0.22939413785934448, + "step": 870 + }, + { + "epoch": 2.80064308681672, + "grad_norm": 9.19433579405382, + "learning_rate": 1.3847838631961764e-07, + "loss": 0.32842016220092773, + "step": 871 + }, + { + "epoch": 2.8038585209003215, + "grad_norm": 10.639689045879424, + "learning_rate": 1.341367443477598e-07, + "loss": 0.35260647535324097, + "step": 872 + }, + { + "epoch": 2.807073954983923, + "grad_norm": 7.268481225335977, + "learning_rate": 1.2986332606761077e-07, + "loss": 0.14097937941551208, + "step": 873 + }, + { + "epoch": 2.810289389067524, + "grad_norm": 12.654855752821373, + "learning_rate": 1.2565819139626123e-07, + "loss": 0.3869823217391968, + "step": 874 + }, + { + "epoch": 2.8135048231511255, + "grad_norm": 12.375199769540473, + "learning_rate": 1.215213992934061e-07, + "loss": 0.37241262197494507, + "step": 875 + }, + { + "epoch": 2.816720257234727, + "grad_norm": 11.840995986503778, + "learning_rate": 1.1745300776051683e-07, + "loss": 0.4987008273601532, + "step": 876 + }, + { + "epoch": 2.819935691318328, + "grad_norm": 20.150747698775717, + "learning_rate": 1.1345307384002857e-07, + "loss": 0.7997194528579712, + "step": 877 + }, + { + "epoch": 2.823151125401929, + "grad_norm": 11.222276595989268, + "learning_rate": 1.0952165361454103e-07, + "loss": 0.3447033166885376, + "step": 878 + }, + { + "epoch": 2.8263665594855305, + "grad_norm": 10.642677007946524, + "learning_rate": 1.0565880220603009e-07, + "loss": 0.3858156204223633, + "step": 879 + }, + { + "epoch": 2.829581993569132, + "grad_norm": 9.185432192229552, + "learning_rate": 1.0186457377507786e-07, + "loss": 0.34079843759536743, + "step": 880 + }, + { + "epoch": 2.832797427652733, + "grad_norm": 11.088590201340256, + "learning_rate": 9.813902152011112e-08, + "loss": 0.23632663488388062, + "step": 881 + }, + { + "epoch": 2.8360128617363345, + "grad_norm": 8.103072227415286, + "learning_rate": 9.448219767665579e-08, + "loss": 0.27433305978775024, + "step": 882 + }, + { + "epoch": 2.839228295819936, + "grad_norm": 12.74986410220812, + "learning_rate": 9.089415351660635e-08, + "loss": 0.23926009237766266, + "step": 883 + }, + { + "epoch": 2.842443729903537, + "grad_norm": 9.837558952269603, + "learning_rate": 8.737493934750374e-08, + "loss": 0.2923390567302704, + "step": 884 + }, + { + "epoch": 2.845659163987138, + "grad_norm": 8.70335385161108, + "learning_rate": 8.392460451183304e-08, + "loss": 0.41260838508605957, + "step": 885 + }, + { + "epoch": 2.8488745980707395, + "grad_norm": 14.688290879664716, + "learning_rate": 8.05431973863291e-08, + "loss": 1.014516830444336, + "step": 886 + }, + { + "epoch": 2.852090032154341, + "grad_norm": 9.853101244202293, + "learning_rate": 7.723076538130093e-08, + "loss": 0.7642203569412231, + "step": 887 + }, + { + "epoch": 2.855305466237942, + "grad_norm": 21.13860057153763, + "learning_rate": 7.398735493996445e-08, + "loss": 0.5412633419036865, + "step": 888 + }, + { + "epoch": 2.8585209003215435, + "grad_norm": 6.858041915122581, + "learning_rate": 7.081301153779308e-08, + "loss": 0.2149556279182434, + "step": 889 + }, + { + "epoch": 2.861736334405145, + "grad_norm": 8.692720770718445, + "learning_rate": 6.77077796818787e-08, + "loss": 0.2750585675239563, + "step": 890 + }, + { + "epoch": 2.864951768488746, + "grad_norm": 11.520297885410352, + "learning_rate": 6.467170291030999e-08, + "loss": 0.870136022567749, + "step": 891 + }, + { + "epoch": 2.868167202572347, + "grad_norm": 7.431835115778323, + "learning_rate": 6.170482379155907e-08, + "loss": 0.6092904806137085, + "step": 892 + }, + { + "epoch": 2.8713826366559485, + "grad_norm": 6.6845295035921035, + "learning_rate": 5.880718392388518e-08, + "loss": 0.3216220438480377, + "step": 893 + }, + { + "epoch": 2.87459807073955, + "grad_norm": 12.92591336560967, + "learning_rate": 5.597882393475473e-08, + "loss": 0.9828184247016907, + "step": 894 + }, + { + "epoch": 2.877813504823151, + "grad_norm": 12.3805351827977, + "learning_rate": 5.3219783480266685e-08, + "loss": 0.3474922776222229, + "step": 895 + }, + { + "epoch": 2.8810289389067525, + "grad_norm": 8.250380956070305, + "learning_rate": 5.053010124460078e-08, + "loss": 0.6136802434921265, + "step": 896 + }, + { + "epoch": 2.884244372990354, + "grad_norm": 10.183182722886206, + "learning_rate": 4.790981493947244e-08, + "loss": 0.36145269870758057, + "step": 897 + }, + { + "epoch": 2.887459807073955, + "grad_norm": 10.874247692186005, + "learning_rate": 4.5358961303604845e-08, + "loss": 0.582977831363678, + "step": 898 + }, + { + "epoch": 2.890675241157556, + "grad_norm": 7.351347894780587, + "learning_rate": 4.287757610221488e-08, + "loss": 0.26535022258758545, + "step": 899 + }, + { + "epoch": 2.8938906752411575, + "grad_norm": 8.630676257377292, + "learning_rate": 4.046569412651025e-08, + "loss": 0.36218592524528503, + "step": 900 + }, + { + "epoch": 2.897106109324759, + "grad_norm": 16.143200059875355, + "learning_rate": 3.8123349193201484e-08, + "loss": 0.6273083686828613, + "step": 901 + }, + { + "epoch": 2.90032154340836, + "grad_norm": 13.043811958366279, + "learning_rate": 3.585057414402959e-08, + "loss": 0.51048743724823, + "step": 902 + }, + { + "epoch": 2.9035369774919615, + "grad_norm": 13.302923874937125, + "learning_rate": 3.364740084530416e-08, + "loss": 0.3014843165874481, + "step": 903 + }, + { + "epoch": 2.906752411575563, + "grad_norm": 14.89983151718059, + "learning_rate": 3.1513860187457055e-08, + "loss": 0.2004130780696869, + "step": 904 + }, + { + "epoch": 2.909967845659164, + "grad_norm": 9.790667693555704, + "learning_rate": 2.9449982084607808e-08, + "loss": 0.39281973242759705, + "step": 905 + }, + { + "epoch": 2.913183279742765, + "grad_norm": 18.644324884396916, + "learning_rate": 2.7455795474147228e-08, + "loss": 0.23955759406089783, + "step": 906 + }, + { + "epoch": 2.9163987138263665, + "grad_norm": 8.927013412093052, + "learning_rate": 2.5531328316328875e-08, + "loss": 0.20128212869167328, + "step": 907 + }, + { + "epoch": 2.919614147909968, + "grad_norm": 15.799239685702284, + "learning_rate": 2.367660759387935e-08, + "loss": 0.7717773914337158, + "step": 908 + }, + { + "epoch": 2.922829581993569, + "grad_norm": 15.772125165413822, + "learning_rate": 2.189165931161752e-08, + "loss": 0.36580389738082886, + "step": 909 + }, + { + "epoch": 2.9260450160771705, + "grad_norm": 9.375529241824653, + "learning_rate": 2.017650849609143e-08, + "loss": 0.20489028096199036, + "step": 910 + }, + { + "epoch": 2.929260450160772, + "grad_norm": 9.02286597270734, + "learning_rate": 1.8531179195227512e-08, + "loss": 0.4565661549568176, + "step": 911 + }, + { + "epoch": 2.932475884244373, + "grad_norm": 8.336347819565956, + "learning_rate": 1.6955694477993055e-08, + "loss": 0.1987374871969223, + "step": 912 + }, + { + "epoch": 2.935691318327974, + "grad_norm": 11.104808526073217, + "learning_rate": 1.545007643407148e-08, + "loss": 0.6139571070671082, + "step": 913 + }, + { + "epoch": 2.9389067524115755, + "grad_norm": 19.520039784387993, + "learning_rate": 1.4014346173555904e-08, + "loss": 0.3137204647064209, + "step": 914 + }, + { + "epoch": 2.942122186495177, + "grad_norm": 11.947768453886892, + "learning_rate": 1.2648523826649384e-08, + "loss": 0.2887860834598541, + "step": 915 + }, + { + "epoch": 2.945337620578778, + "grad_norm": 12.055012811370135, + "learning_rate": 1.1352628543385702e-08, + "loss": 0.20143553614616394, + "step": 916 + }, + { + "epoch": 2.9485530546623795, + "grad_norm": 9.518048446591854, + "learning_rate": 1.0126678493358466e-08, + "loss": 0.4039468765258789, + "step": 917 + }, + { + "epoch": 2.951768488745981, + "grad_norm": 6.847191516049079, + "learning_rate": 8.97069086546798e-09, + "loss": 0.22868111729621887, + "step": 918 + }, + { + "epoch": 2.954983922829582, + "grad_norm": 8.642631440592353, + "learning_rate": 7.884681867679766e-09, + "loss": 0.7044589519500732, + "step": 919 + }, + { + "epoch": 2.958199356913183, + "grad_norm": 7.259843508692505, + "learning_rate": 6.86866672679698e-09, + "loss": 0.17346805334091187, + "step": 920 + }, + { + "epoch": 2.9614147909967845, + "grad_norm": 8.999484586415532, + "learning_rate": 5.9226596882483445e-09, + "loss": 0.6509414911270142, + "step": 921 + }, + { + "epoch": 2.964630225080386, + "grad_norm": 9.496798526213007, + "learning_rate": 5.0466740158849895e-09, + "loss": 0.25824788212776184, + "step": 922 + }, + { + "epoch": 2.967845659163987, + "grad_norm": 8.918273839634464, + "learning_rate": 4.240721991799479e-09, + "loss": 0.40939438343048096, + "step": 923 + }, + { + "epoch": 2.9710610932475885, + "grad_norm": 7.676643145381744, + "learning_rate": 3.5048149161487356e-09, + "loss": 0.2906154692173004, + "step": 924 + }, + { + "epoch": 2.97427652733119, + "grad_norm": 9.78535730660284, + "learning_rate": 2.8389631069986044e-09, + "loss": 0.7982381582260132, + "step": 925 + }, + { + "epoch": 2.977491961414791, + "grad_norm": 8.125146432986423, + "learning_rate": 2.2431759001789734e-09, + "loss": 0.39818036556243896, + "step": 926 + }, + { + "epoch": 2.980707395498392, + "grad_norm": 8.127002382258551, + "learning_rate": 1.7174616491510975e-09, + "loss": 0.311463862657547, + "step": 927 + }, + { + "epoch": 2.9839228295819935, + "grad_norm": 9.126971708317253, + "learning_rate": 1.2618277248921397e-09, + "loss": 0.8928610682487488, + "step": 928 + }, + { + "epoch": 2.987138263665595, + "grad_norm": 13.640832208032544, + "learning_rate": 8.762805157913612e-10, + "loss": 0.5860022306442261, + "step": 929 + }, + { + "epoch": 2.990353697749196, + "grad_norm": 11.44603238864219, + "learning_rate": 5.608254275607516e-10, + "loss": 0.244879812002182, + "step": 930 + }, + { + "epoch": 2.9935691318327975, + "grad_norm": 11.900949930490297, + "learning_rate": 3.1546688315842177e-10, + "loss": 0.37080761790275574, + "step": 931 + }, + { + "epoch": 2.996784565916399, + "grad_norm": 11.46303644524341, + "learning_rate": 1.4020832272754193e-10, + "loss": 0.3129621148109436, + "step": 932 + }, + { + "epoch": 3.0, + "grad_norm": 7.767345085481768, + "learning_rate": 3.505220354749206e-11, + "loss": 0.548541247844696, + "step": 933 + }, + { + "epoch": 3.0, + "step": 933, + "total_flos": 2578979020800.0, + "train_loss": 1.6905082198137384, + "train_runtime": 1266.0882, + "train_samples_per_second": 2.945, + "train_steps_per_second": 0.737 + } + ], + "logging_steps": 1, + "max_steps": 933, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 2578979020800.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..dcc154d --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2213129bc37d1ed3d86972874ca3e766bc03a7c1af89ef57118dfe20fe349c3f +size 7096 diff --git a/training_loss.png b/training_loss.png new file mode 100644 index 0000000..c08a6b8 Binary files /dev/null and b/training_loss.png differ