commit 8b50c89a62517b6139edb0c98b87795ea462b676 Author: ModelHub XC Date: Fri Apr 10 11:37:05 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: mlfoundations-dev/openthoughts3_100k_qwen25_1b_bsz1024_lr16e5_epochs5 Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..5f233b8 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,53 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text + + +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zstandard filter=lfs diff=lfs merge=lfs -text +*.tfevents* filter=lfs diff=lfs merge=lfs -text +*.db* filter=lfs diff=lfs merge=lfs -text +*.ark* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text + +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.gguf* filter=lfs diff=lfs merge=lfs -text +*.ggml filter=lfs diff=lfs merge=lfs -text +*.llamafile* filter=lfs diff=lfs merge=lfs -text +*.pt2 filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text + +vocab.json filter=lfs diff=lfs merge=lfs -text +training_args.bin filter=lfs diff=lfs merge=lfs -text +model.safetensors filter=lfs diff=lfs merge=lfs -text +merges.txt filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..bbda0e7 --- /dev/null +++ b/README.md @@ -0,0 +1,61 @@ +--- +library_name: transformers +license: apache-2.0 +base_model: Qwen/Qwen2.5-1.5B-Instruct +tags: +- llama-factory +- full +- generated_from_trainer +model-index: +- name: openthoughts3_100k_qwen25_1b_bsz1024_lr16e5_epochs5 + results: [] +--- + + + +# openthoughts3_100k_qwen25_1b_bsz1024_lr16e5_epochs5 + +This model is a fine-tuned version of [Qwen/Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) on the mlfoundations-dev/openthoughts3_100k dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 0.00016 +- train_batch_size: 4 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 32 +- gradient_accumulation_steps: 8 +- total_train_batch_size: 1024 +- total_eval_batch_size: 256 +- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.1 +- num_epochs: 5.0 + +### Training results + + + +### Framework versions + +- Transformers 4.46.1 +- Pytorch 2.3.0 +- Datasets 3.1.0 +- Tokenizers 0.20.3 diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000..482ced4 --- /dev/null +++ b/added_tokens.json @@ -0,0 +1,24 @@ +{ + "": 151658, + "": 151657, + "<|box_end|>": 151649, + "<|box_start|>": 151648, + "<|endoftext|>": 151643, + "<|file_sep|>": 151664, + "<|fim_middle|>": 151660, + "<|fim_pad|>": 151662, + "<|fim_prefix|>": 151659, + "<|fim_suffix|>": 151661, + "<|im_end|>": 151645, + "<|im_start|>": 151644, + "<|image_pad|>": 151655, + "<|object_ref_end|>": 151647, + "<|object_ref_start|>": 151646, + "<|quad_end|>": 151651, + "<|quad_start|>": 151650, + "<|repo_name|>": 151663, + "<|video_pad|>": 151656, + "<|vision_end|>": 151653, + "<|vision_pad|>": 151654, + "<|vision_start|>": 151652 +} diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..5b356e6 --- /dev/null +++ b/all_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 4.961636828644501, + "total_flos": 7065760181780480.0, + "train_loss": 1.075610858509221, + "train_runtime": 69151.8205, + "train_samples_per_second": 7.23, + "train_steps_per_second": 0.007 +} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..0e002a8 --- /dev/null +++ b/config.json @@ -0,0 +1,29 @@ +{ + "_name_or_path": "Qwen/Qwen2.5-1.5B-Instruct", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "eos_token_id": 151645, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "max_position_embeddings": 32768, + "max_window_layers": 21, + "model_type": "qwen2", + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.46.1", + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/configs.yaml b/configs.yaml new file mode 100644 index 0000000..36afb5a --- /dev/null +++ b/configs.yaml @@ -0,0 +1,39 @@ +assistant_tag: gpt +bf16: 'True' +content_tag: value +cutoff_len: '16384' +dataloader_num_workers: '4' +dataloader_persistent_workers: 'True' +dataloader_pin_memory: 'True' +dataset: mlfoundations-dev/openthoughts3_100k +dataset_dir: ONLINE +ddp_timeout: '180000000' +deepspeed: /opt/ml/code/zero3.json +do_train: 'True' +enable_liger_kernel: 'True' +finetuning_type: full +formatting: sharegpt +global_batch_size: '1024' +gradient_accumulation_steps: '8' +hub_model_id: mlfoundations-dev/openthoughts3_100k_qwen25_1b_bsz1024_lr16e5_epochs5 +learning_rate: '0.00016' +logging_steps: '1' +lr_scheduler_type: cosine +messages: conversations +model_name_or_path: Qwen/Qwen2.5-1.5B-Instruct +num_train_epochs: '5.0' +output_dir: /opt/ml/model +overwrite_cache: 'True' +per_device_train_batch_size: '4' +plot_loss: 'True' +preprocessing_num_workers: '16' +push_to_db: 'True' +push_to_hub: 'True' +report_to: wandb +role_tag: from +run_name: openthoughts3_100k_qwen25_1b_bsz1024_lr16e5_epochs5 +save_strategy: epoch +stage: sft +template: qwen25 +user_tag: human +warmup_ratio: '0.1' diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..bbeeda1 --- /dev/null +++ b/configuration.json @@ -0,0 +1 @@ +{"framework": "pytorch", "task": "text-generation", "allow_remote": true} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..16e88f7 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,14 @@ +{ + "bos_token_id": 151643, + "do_sample": true, + "eos_token_id": [ + 151645, + 151643 + ], + "pad_token_id": 151643, + "repetition_penalty": 1.1, + "temperature": 0.7, + "top_k": 20, + "top_p": 0.8, + "transformers_version": "4.46.1" +} diff --git a/merges.txt b/merges.txt new file mode 100644 index 0000000..80c1a19 --- /dev/null +++ b/merges.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5 +size 1671853 diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..6678e83 --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a037d183c5b8bc2409a94e8d84a02273b7be1333efa95685813e81985db394bb +size 3087467144 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..17305b3 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,31 @@ +{ + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "eos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..51ebb3b --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa +size 11421896 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..b84f53a --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,208 @@ +{ + "add_bos_token": false, + "add_prefix_space": false, + "added_tokens_decoder": { + "151643": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151644": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151645": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151646": { + "content": "<|object_ref_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151647": { + "content": "<|object_ref_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151648": { + "content": "<|box_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151649": { + "content": "<|box_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151650": { + "content": "<|quad_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151657": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151658": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "bos_token": null, + "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{\\\"name\\\": , \\\"arguments\\\": }\\n<|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n\\n' }}\n {{- message.content }}\n {{- '\\n' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "errors": "replace", + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..5b356e6 --- /dev/null +++ b/train_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 4.961636828644501, + "total_flos": 7065760181780480.0, + "train_loss": 1.075610858509221, + "train_runtime": 69151.8205, + "train_samples_per_second": 7.23, + "train_steps_per_second": 0.007 +} \ No newline at end of file diff --git a/trainer_log.jsonl b/trainer_log.jsonl new file mode 100644 index 0000000..478f33f --- /dev/null +++ b/trainer_log.jsonl @@ -0,0 +1,486 @@ +{"current_steps": 1, "total_steps": 485, "loss": 1.4918, "lr": 3.2653061224489794e-06, "epoch": 0.010230179028132993, "percentage": 0.21, "elapsed_time": "0:02:28", "remaining_time": "20:00:30"} +{"current_steps": 2, "total_steps": 485, "loss": 1.4922, "lr": 6.530612244897959e-06, "epoch": 0.020460358056265986, "percentage": 0.41, "elapsed_time": "0:04:52", "remaining_time": "19:37:22"} +{"current_steps": 3, "total_steps": 485, "loss": 1.503, "lr": 9.795918367346939e-06, "epoch": 0.030690537084398978, "percentage": 0.62, "elapsed_time": "0:07:14", "remaining_time": "19:23:06"} +{"current_steps": 4, "total_steps": 485, "loss": 1.4675, "lr": 1.3061224489795918e-05, "epoch": 0.04092071611253197, "percentage": 0.82, "elapsed_time": "0:09:37", "remaining_time": "19:17:22"} +{"current_steps": 5, "total_steps": 485, "loss": 1.4533, "lr": 1.63265306122449e-05, "epoch": 0.05115089514066496, "percentage": 1.03, "elapsed_time": "0:12:00", "remaining_time": "19:13:16"} +{"current_steps": 6, "total_steps": 485, "loss": 1.4122, "lr": 1.9591836734693877e-05, "epoch": 0.061381074168797956, "percentage": 1.24, "elapsed_time": "0:14:22", "remaining_time": "19:07:58"} +{"current_steps": 7, "total_steps": 485, "loss": 1.4056, "lr": 2.2857142857142858e-05, "epoch": 0.07161125319693094, "percentage": 1.44, "elapsed_time": "0:16:45", "remaining_time": "19:03:57"} +{"current_steps": 8, "total_steps": 485, "loss": 1.3833, "lr": 2.6122448979591835e-05, "epoch": 0.08184143222506395, "percentage": 1.65, "elapsed_time": "0:19:07", "remaining_time": "19:00:06"} +{"current_steps": 9, "total_steps": 485, "loss": 1.3843, "lr": 2.938775510204082e-05, "epoch": 0.09207161125319693, "percentage": 1.86, "elapsed_time": "0:21:29", "remaining_time": "18:56:59"} +{"current_steps": 10, "total_steps": 485, "loss": 1.3515, "lr": 3.26530612244898e-05, "epoch": 0.10230179028132992, "percentage": 2.06, "elapsed_time": "0:23:52", "remaining_time": "18:54:11"} +{"current_steps": 11, "total_steps": 485, "loss": 1.3224, "lr": 3.591836734693878e-05, "epoch": 0.11253196930946291, "percentage": 2.27, "elapsed_time": "0:26:14", "remaining_time": "18:50:30"} +{"current_steps": 12, "total_steps": 485, "loss": 1.322, "lr": 3.9183673469387755e-05, "epoch": 0.12276214833759591, "percentage": 2.47, "elapsed_time": "0:28:37", "remaining_time": "18:48:05"} +{"current_steps": 13, "total_steps": 485, "loss": 1.2956, "lr": 4.244897959183674e-05, "epoch": 0.1329923273657289, "percentage": 2.68, "elapsed_time": "0:30:59", "remaining_time": "18:45:23"} +{"current_steps": 14, "total_steps": 485, "loss": 1.3067, "lr": 4.5714285714285716e-05, "epoch": 0.1432225063938619, "percentage": 2.89, "elapsed_time": "0:33:21", "remaining_time": "18:42:25"} +{"current_steps": 15, "total_steps": 485, "loss": 1.2961, "lr": 4.89795918367347e-05, "epoch": 0.1534526854219949, "percentage": 3.09, "elapsed_time": "0:35:43", "remaining_time": "18:39:30"} +{"current_steps": 16, "total_steps": 485, "loss": 1.301, "lr": 5.224489795918367e-05, "epoch": 0.1636828644501279, "percentage": 3.3, "elapsed_time": "0:38:05", "remaining_time": "18:36:26"} +{"current_steps": 17, "total_steps": 485, "loss": 1.2656, "lr": 5.551020408163266e-05, "epoch": 0.17391304347826086, "percentage": 3.51, "elapsed_time": "0:40:29", "remaining_time": "18:34:31"} +{"current_steps": 18, "total_steps": 485, "loss": 1.246, "lr": 5.877551020408164e-05, "epoch": 0.18414322250639387, "percentage": 3.71, "elapsed_time": "0:42:51", "remaining_time": "18:31:46"} +{"current_steps": 19, "total_steps": 485, "loss": 1.2565, "lr": 6.204081632653062e-05, "epoch": 0.19437340153452684, "percentage": 3.92, "elapsed_time": "0:45:13", "remaining_time": "18:29:02"} +{"current_steps": 20, "total_steps": 485, "loss": 1.241, "lr": 6.53061224489796e-05, "epoch": 0.20460358056265984, "percentage": 4.12, "elapsed_time": "0:47:34", "remaining_time": "18:26:06"} +{"current_steps": 21, "total_steps": 485, "loss": 1.232, "lr": 6.857142857142857e-05, "epoch": 0.21483375959079284, "percentage": 4.33, "elapsed_time": "0:49:57", "remaining_time": "18:23:43"} +{"current_steps": 22, "total_steps": 485, "loss": 1.2567, "lr": 7.183673469387756e-05, "epoch": 0.22506393861892582, "percentage": 4.54, "elapsed_time": "0:52:19", "remaining_time": "18:21:04"} +{"current_steps": 23, "total_steps": 485, "loss": 1.2448, "lr": 7.510204081632654e-05, "epoch": 0.23529411764705882, "percentage": 4.74, "elapsed_time": "0:54:41", "remaining_time": "18:18:34"} +{"current_steps": 24, "total_steps": 485, "loss": 1.2357, "lr": 7.836734693877551e-05, "epoch": 0.24552429667519182, "percentage": 4.95, "elapsed_time": "0:57:03", "remaining_time": "18:15:52"} +{"current_steps": 25, "total_steps": 485, "loss": 1.2164, "lr": 8.16326530612245e-05, "epoch": 0.2557544757033248, "percentage": 5.15, "elapsed_time": "0:59:25", "remaining_time": "18:13:18"} +{"current_steps": 26, "total_steps": 485, "loss": 1.2115, "lr": 8.489795918367348e-05, "epoch": 0.2659846547314578, "percentage": 5.36, "elapsed_time": "1:01:46", "remaining_time": "18:10:37"} +{"current_steps": 27, "total_steps": 485, "loss": 1.2004, "lr": 8.816326530612245e-05, "epoch": 0.27621483375959077, "percentage": 5.57, "elapsed_time": "1:04:08", "remaining_time": "18:08:01"} +{"current_steps": 28, "total_steps": 485, "loss": 1.2235, "lr": 9.142857142857143e-05, "epoch": 0.2864450127877238, "percentage": 5.77, "elapsed_time": "1:06:30", "remaining_time": "18:05:30"} +{"current_steps": 29, "total_steps": 485, "loss": 1.208, "lr": 9.469387755102041e-05, "epoch": 0.2966751918158568, "percentage": 5.98, "elapsed_time": "1:08:53", "remaining_time": "18:03:20"} +{"current_steps": 30, "total_steps": 485, "loss": 1.2118, "lr": 9.79591836734694e-05, "epoch": 0.3069053708439898, "percentage": 6.19, "elapsed_time": "1:11:14", "remaining_time": "18:00:33"} +{"current_steps": 31, "total_steps": 485, "loss": 1.2032, "lr": 0.00010122448979591839, "epoch": 0.3171355498721228, "percentage": 6.39, "elapsed_time": "1:13:36", "remaining_time": "17:58:03"} +{"current_steps": 32, "total_steps": 485, "loss": 1.1912, "lr": 0.00010448979591836734, "epoch": 0.3273657289002558, "percentage": 6.6, "elapsed_time": "1:15:59", "remaining_time": "17:55:46"} +{"current_steps": 33, "total_steps": 485, "loss": 1.2025, "lr": 0.00010775510204081634, "epoch": 0.3375959079283887, "percentage": 6.8, "elapsed_time": "1:18:21", "remaining_time": "17:53:15"} +{"current_steps": 34, "total_steps": 485, "loss": 1.2118, "lr": 0.00011102040816326532, "epoch": 0.34782608695652173, "percentage": 7.01, "elapsed_time": "1:20:44", "remaining_time": "17:51:00"} +{"current_steps": 35, "total_steps": 485, "loss": 1.1986, "lr": 0.0001142857142857143, "epoch": 0.35805626598465473, "percentage": 7.22, "elapsed_time": "1:23:07", "remaining_time": "17:48:43"} +{"current_steps": 36, "total_steps": 485, "loss": 1.1939, "lr": 0.00011755102040816328, "epoch": 0.36828644501278773, "percentage": 7.42, "elapsed_time": "1:25:30", "remaining_time": "17:46:25"} +{"current_steps": 37, "total_steps": 485, "loss": 1.1997, "lr": 0.00012081632653061224, "epoch": 0.37851662404092073, "percentage": 7.63, "elapsed_time": "1:27:51", "remaining_time": "17:43:51"} +{"current_steps": 38, "total_steps": 485, "loss": 1.1791, "lr": 0.00012408163265306124, "epoch": 0.3887468030690537, "percentage": 7.84, "elapsed_time": "1:30:13", "remaining_time": "17:41:23"} +{"current_steps": 39, "total_steps": 485, "loss": 1.1896, "lr": 0.0001273469387755102, "epoch": 0.3989769820971867, "percentage": 8.04, "elapsed_time": "1:32:35", "remaining_time": "17:38:52"} +{"current_steps": 40, "total_steps": 485, "loss": 1.1876, "lr": 0.0001306122448979592, "epoch": 0.4092071611253197, "percentage": 8.25, "elapsed_time": "1:34:57", "remaining_time": "17:36:28"} +{"current_steps": 41, "total_steps": 485, "loss": 1.1709, "lr": 0.00013387755102040817, "epoch": 0.4194373401534527, "percentage": 8.45, "elapsed_time": "1:37:20", "remaining_time": "17:34:04"} +{"current_steps": 42, "total_steps": 485, "loss": 1.1795, "lr": 0.00013714285714285713, "epoch": 0.4296675191815857, "percentage": 8.66, "elapsed_time": "1:39:42", "remaining_time": "17:31:39"} +{"current_steps": 43, "total_steps": 485, "loss": 1.1753, "lr": 0.00014040816326530613, "epoch": 0.4398976982097187, "percentage": 8.87, "elapsed_time": "1:42:04", "remaining_time": "17:29:13"} +{"current_steps": 44, "total_steps": 485, "loss": 1.2028, "lr": 0.00014367346938775512, "epoch": 0.45012787723785164, "percentage": 9.07, "elapsed_time": "1:44:26", "remaining_time": "17:26:46"} +{"current_steps": 45, "total_steps": 485, "loss": 1.1822, "lr": 0.0001469387755102041, "epoch": 0.46035805626598464, "percentage": 9.28, "elapsed_time": "1:46:47", "remaining_time": "17:24:14"} +{"current_steps": 46, "total_steps": 485, "loss": 1.1836, "lr": 0.00015020408163265308, "epoch": 0.47058823529411764, "percentage": 9.48, "elapsed_time": "1:49:09", "remaining_time": "17:21:49"} +{"current_steps": 47, "total_steps": 485, "loss": 1.1901, "lr": 0.00015346938775510205, "epoch": 0.48081841432225064, "percentage": 9.69, "elapsed_time": "1:51:31", "remaining_time": "17:19:21"} +{"current_steps": 48, "total_steps": 485, "loss": 1.1562, "lr": 0.00015673469387755102, "epoch": 0.49104859335038364, "percentage": 9.9, "elapsed_time": "1:53:53", "remaining_time": "17:16:56"} +{"current_steps": 49, "total_steps": 485, "loss": 1.1794, "lr": 0.00016, "epoch": 0.5012787723785166, "percentage": 10.1, "elapsed_time": "1:56:17", "remaining_time": "17:14:48"} +{"current_steps": 50, "total_steps": 485, "loss": 1.1877, "lr": 0.00015999792324684382, "epoch": 0.5115089514066496, "percentage": 10.31, "elapsed_time": "1:58:40", "remaining_time": "17:12:24"} +{"current_steps": 51, "total_steps": 485, "loss": 1.1585, "lr": 0.00015999169309519789, "epoch": 0.5217391304347826, "percentage": 10.52, "elapsed_time": "2:01:01", "remaining_time": "17:09:56"} +{"current_steps": 52, "total_steps": 485, "loss": 1.1732, "lr": 0.0001599813098685243, "epoch": 0.5319693094629157, "percentage": 10.72, "elapsed_time": "2:03:23", "remaining_time": "17:07:27"} +{"current_steps": 53, "total_steps": 485, "loss": 1.1617, "lr": 0.0001599667741059081, "epoch": 0.5421994884910486, "percentage": 10.93, "elapsed_time": "2:05:44", "remaining_time": "17:04:55"} +{"current_steps": 54, "total_steps": 485, "loss": 1.1584, "lr": 0.00015994808656202904, "epoch": 0.5524296675191815, "percentage": 11.13, "elapsed_time": "2:08:06", "remaining_time": "17:02:31"} +{"current_steps": 55, "total_steps": 485, "loss": 1.1478, "lr": 0.00015992524820712252, "epoch": 0.5626598465473146, "percentage": 11.34, "elapsed_time": "2:10:28", "remaining_time": "17:00:06"} +{"current_steps": 56, "total_steps": 485, "loss": 1.1493, "lr": 0.00015989826022692918, "epoch": 0.5728900255754475, "percentage": 11.55, "elapsed_time": "2:12:51", "remaining_time": "16:57:45"} +{"current_steps": 57, "total_steps": 485, "loss": 1.148, "lr": 0.00015986712402263334, "epoch": 0.5831202046035806, "percentage": 11.75, "elapsed_time": "2:15:13", "remaining_time": "16:55:22"} +{"current_steps": 58, "total_steps": 485, "loss": 1.1532, "lr": 0.00015983184121079024, "epoch": 0.5933503836317136, "percentage": 11.96, "elapsed_time": "2:17:35", "remaining_time": "16:53:00"} +{"current_steps": 59, "total_steps": 485, "loss": 1.1313, "lr": 0.00015979241362324223, "epoch": 0.6035805626598465, "percentage": 12.16, "elapsed_time": "2:19:57", "remaining_time": "16:50:36"} +{"current_steps": 60, "total_steps": 485, "loss": 1.138, "lr": 0.0001597488433070234, "epoch": 0.6138107416879796, "percentage": 12.37, "elapsed_time": "2:22:21", "remaining_time": "16:48:19"} +{"current_steps": 61, "total_steps": 485, "loss": 1.1546, "lr": 0.00015970113252425356, "epoch": 0.6240409207161125, "percentage": 12.58, "elapsed_time": "2:24:42", "remaining_time": "16:45:53"} +{"current_steps": 62, "total_steps": 485, "loss": 1.163, "lr": 0.00015964928375202068, "epoch": 0.6342710997442456, "percentage": 12.78, "elapsed_time": "2:27:04", "remaining_time": "16:43:28"} +{"current_steps": 63, "total_steps": 485, "loss": 1.1564, "lr": 0.00015959329968225232, "epoch": 0.6445012787723785, "percentage": 12.99, "elapsed_time": "2:29:26", "remaining_time": "16:41:03"} +{"current_steps": 64, "total_steps": 485, "loss": 1.1684, "lr": 0.0001595331832215758, "epoch": 0.6547314578005116, "percentage": 13.2, "elapsed_time": "2:31:48", "remaining_time": "16:38:34"} +{"current_steps": 65, "total_steps": 485, "loss": 1.1788, "lr": 0.00015946893749116734, "epoch": 0.6649616368286445, "percentage": 13.4, "elapsed_time": "2:34:10", "remaining_time": "16:36:12"} +{"current_steps": 66, "total_steps": 485, "loss": 1.1537, "lr": 0.00015940056582659006, "epoch": 0.6751918158567775, "percentage": 13.61, "elapsed_time": "2:36:32", "remaining_time": "16:33:48"} +{"current_steps": 67, "total_steps": 485, "loss": 1.1487, "lr": 0.0001593280717776207, "epoch": 0.6854219948849105, "percentage": 13.81, "elapsed_time": "2:38:55", "remaining_time": "16:31:27"} +{"current_steps": 68, "total_steps": 485, "loss": 1.15, "lr": 0.0001592514591080654, "epoch": 0.6956521739130435, "percentage": 14.02, "elapsed_time": "2:41:17", "remaining_time": "16:29:02"} +{"current_steps": 69, "total_steps": 485, "loss": 1.1487, "lr": 0.0001591707317955642, "epoch": 0.7058823529411765, "percentage": 14.23, "elapsed_time": "2:43:38", "remaining_time": "16:26:37"} +{"current_steps": 70, "total_steps": 485, "loss": 1.1766, "lr": 0.00015908589403138468, "epoch": 0.7161125319693095, "percentage": 14.43, "elapsed_time": "2:46:02", "remaining_time": "16:24:20"} +{"current_steps": 71, "total_steps": 485, "loss": 1.1464, "lr": 0.00015899695022020415, "epoch": 0.7263427109974424, "percentage": 14.64, "elapsed_time": "2:48:24", "remaining_time": "16:21:58"} +{"current_steps": 72, "total_steps": 485, "loss": 1.1487, "lr": 0.00015890390497988116, "epoch": 0.7365728900255755, "percentage": 14.85, "elapsed_time": "2:50:47", "remaining_time": "16:19:39"} +{"current_steps": 73, "total_steps": 485, "loss": 1.1447, "lr": 0.0001588067631412156, "epoch": 0.7468030690537084, "percentage": 15.05, "elapsed_time": "2:53:09", "remaining_time": "16:17:15"} +{"current_steps": 74, "total_steps": 485, "loss": 1.1299, "lr": 0.000158705529747698, "epoch": 0.7570332480818415, "percentage": 15.26, "elapsed_time": "2:55:31", "remaining_time": "16:14:52"} +{"current_steps": 75, "total_steps": 485, "loss": 1.1473, "lr": 0.0001586002100552476, "epoch": 0.7672634271099744, "percentage": 15.46, "elapsed_time": "2:57:53", "remaining_time": "16:12:28"} +{"current_steps": 76, "total_steps": 485, "loss": 1.1368, "lr": 0.00015849080953193943, "epoch": 0.7774936061381074, "percentage": 15.67, "elapsed_time": "3:00:16", "remaining_time": "16:10:09"} +{"current_steps": 77, "total_steps": 485, "loss": 1.123, "lr": 0.00015837733385772062, "epoch": 0.7877237851662404, "percentage": 15.88, "elapsed_time": "3:02:38", "remaining_time": "16:07:46"} +{"current_steps": 78, "total_steps": 485, "loss": 1.1403, "lr": 0.00015825978892411522, "epoch": 0.7979539641943734, "percentage": 16.08, "elapsed_time": "3:05:00", "remaining_time": "16:05:24"} +{"current_steps": 79, "total_steps": 485, "loss": 1.1229, "lr": 0.00015813818083391858, "epoch": 0.8081841432225064, "percentage": 16.29, "elapsed_time": "3:07:22", "remaining_time": "16:02:56"} +{"current_steps": 80, "total_steps": 485, "loss": 1.129, "lr": 0.0001580125159008803, "epoch": 0.8184143222506394, "percentage": 16.49, "elapsed_time": "3:09:44", "remaining_time": "16:00:33"} +{"current_steps": 81, "total_steps": 485, "loss": 1.1351, "lr": 0.00015788280064937655, "epoch": 0.8286445012787724, "percentage": 16.7, "elapsed_time": "3:12:06", "remaining_time": "15:58:11"} +{"current_steps": 82, "total_steps": 485, "loss": 1.1334, "lr": 0.00015774904181407127, "epoch": 0.8388746803069054, "percentage": 16.91, "elapsed_time": "3:14:28", "remaining_time": "15:55:48"} +{"current_steps": 83, "total_steps": 485, "loss": 1.1363, "lr": 0.00015761124633956652, "epoch": 0.8491048593350383, "percentage": 17.11, "elapsed_time": "3:16:51", "remaining_time": "15:53:25"} +{"current_steps": 84, "total_steps": 485, "loss": 1.1142, "lr": 0.00015746942138004203, "epoch": 0.8593350383631714, "percentage": 17.32, "elapsed_time": "3:19:12", "remaining_time": "15:50:59"} +{"current_steps": 85, "total_steps": 485, "loss": 1.13, "lr": 0.00015732357429888355, "epoch": 0.8695652173913043, "percentage": 17.53, "elapsed_time": "3:21:34", "remaining_time": "15:48:33"} +{"current_steps": 86, "total_steps": 485, "loss": 1.1225, "lr": 0.00015717371266830076, "epoch": 0.8797953964194374, "percentage": 17.73, "elapsed_time": "3:23:56", "remaining_time": "15:46:11"} +{"current_steps": 87, "total_steps": 485, "loss": 1.1237, "lr": 0.000157019844268934, "epoch": 0.8900255754475703, "percentage": 17.94, "elapsed_time": "3:26:19", "remaining_time": "15:43:51"} +{"current_steps": 88, "total_steps": 485, "loss": 1.1223, "lr": 0.0001568619770894504, "epoch": 0.9002557544757033, "percentage": 18.14, "elapsed_time": "3:28:41", "remaining_time": "15:41:29"} +{"current_steps": 89, "total_steps": 485, "loss": 1.1117, "lr": 0.000156700119326129, "epoch": 0.9104859335038363, "percentage": 18.35, "elapsed_time": "3:31:05", "remaining_time": "15:39:13"} +{"current_steps": 90, "total_steps": 485, "loss": 1.1195, "lr": 0.00015653427938243532, "epoch": 0.9207161125319693, "percentage": 18.56, "elapsed_time": "3:33:27", "remaining_time": "15:36:52"} +{"current_steps": 91, "total_steps": 485, "loss": 1.1112, "lr": 0.0001563644658685851, "epoch": 0.9309462915601023, "percentage": 18.76, "elapsed_time": "3:35:50", "remaining_time": "15:34:29"} +{"current_steps": 92, "total_steps": 485, "loss": 1.1334, "lr": 0.00015619068760109703, "epoch": 0.9411764705882353, "percentage": 18.97, "elapsed_time": "3:38:12", "remaining_time": "15:32:07"} +{"current_steps": 93, "total_steps": 485, "loss": 1.123, "lr": 0.00015601295360233528, "epoch": 0.9514066496163683, "percentage": 19.18, "elapsed_time": "3:40:34", "remaining_time": "15:29:42"} +{"current_steps": 94, "total_steps": 485, "loss": 1.1245, "lr": 0.0001558312731000409, "epoch": 0.9616368286445013, "percentage": 19.38, "elapsed_time": "3:42:56", "remaining_time": "15:27:20"} +{"current_steps": 95, "total_steps": 485, "loss": 1.1159, "lr": 0.00015564565552685282, "epoch": 0.9718670076726342, "percentage": 19.59, "elapsed_time": "3:45:18", "remaining_time": "15:24:58"} +{"current_steps": 96, "total_steps": 485, "loss": 1.1086, "lr": 0.00015545611051981807, "epoch": 0.9820971867007673, "percentage": 19.79, "elapsed_time": "3:47:41", "remaining_time": "15:22:37"} +{"current_steps": 97, "total_steps": 485, "loss": 1.1396, "lr": 0.00015526264791989144, "epoch": 0.9923273657289002, "percentage": 20.0, "elapsed_time": "3:50:03", "remaining_time": "15:20:15"} +{"current_steps": 98, "total_steps": 485, "loss": 1.4022, "lr": 0.00015506527777142453, "epoch": 1.0025575447570332, "percentage": 20.21, "elapsed_time": "3:52:37", "remaining_time": "15:18:39"} +{"current_steps": 99, "total_steps": 485, "loss": 1.0962, "lr": 0.00015486401032164434, "epoch": 1.0127877237851663, "percentage": 20.41, "elapsed_time": "3:55:00", "remaining_time": "15:16:16"} +{"current_steps": 100, "total_steps": 485, "loss": 1.1252, "lr": 0.00015465885602012117, "epoch": 1.0230179028132993, "percentage": 20.62, "elapsed_time": "3:57:22", "remaining_time": "15:13:52"} +{"current_steps": 101, "total_steps": 485, "loss": 1.1044, "lr": 0.00015444982551822604, "epoch": 1.0332480818414322, "percentage": 20.82, "elapsed_time": "3:59:44", "remaining_time": "15:11:29"} +{"current_steps": 102, "total_steps": 485, "loss": 1.1138, "lr": 0.00015423692966857788, "epoch": 1.0434782608695652, "percentage": 21.03, "elapsed_time": "4:02:06", "remaining_time": "15:09:06"} +{"current_steps": 103, "total_steps": 485, "loss": 1.0804, "lr": 0.00015402017952447983, "epoch": 1.0537084398976981, "percentage": 21.24, "elapsed_time": "4:04:29", "remaining_time": "15:06:44"} +{"current_steps": 104, "total_steps": 485, "loss": 1.1212, "lr": 0.00015379958633934555, "epoch": 1.0639386189258313, "percentage": 21.44, "elapsed_time": "4:06:52", "remaining_time": "15:04:23"} +{"current_steps": 105, "total_steps": 485, "loss": 1.1, "lr": 0.0001535751615661149, "epoch": 1.0741687979539642, "percentage": 21.65, "elapsed_time": "4:09:14", "remaining_time": "15:02:00"} +{"current_steps": 106, "total_steps": 485, "loss": 1.096, "lr": 0.00015334691685665928, "epoch": 1.0843989769820972, "percentage": 21.86, "elapsed_time": "4:11:36", "remaining_time": "14:59:37"} +{"current_steps": 107, "total_steps": 485, "loss": 1.0882, "lr": 0.00015311486406117668, "epoch": 1.0946291560102301, "percentage": 22.06, "elapsed_time": "4:13:58", "remaining_time": "14:57:14"} +{"current_steps": 108, "total_steps": 485, "loss": 1.1214, "lr": 0.00015287901522757652, "epoch": 1.104859335038363, "percentage": 22.27, "elapsed_time": "4:16:21", "remaining_time": "14:54:52"} +{"current_steps": 109, "total_steps": 485, "loss": 1.0963, "lr": 0.000152639382600854, "epoch": 1.1150895140664963, "percentage": 22.47, "elapsed_time": "4:18:43", "remaining_time": "14:52:29"} +{"current_steps": 110, "total_steps": 485, "loss": 1.0855, "lr": 0.00015239597862245452, "epoch": 1.1253196930946292, "percentage": 22.68, "elapsed_time": "4:21:06", "remaining_time": "14:50:10"} +{"current_steps": 111, "total_steps": 485, "loss": 1.094, "lr": 0.00015214881592962753, "epoch": 1.1355498721227621, "percentage": 22.89, "elapsed_time": "4:23:29", "remaining_time": "14:47:49"} +{"current_steps": 112, "total_steps": 485, "loss": 1.0819, "lr": 0.00015189790735477062, "epoch": 1.145780051150895, "percentage": 23.09, "elapsed_time": "4:25:51", "remaining_time": "14:45:24"} +{"current_steps": 113, "total_steps": 485, "loss": 1.099, "lr": 0.00015164326592476316, "epoch": 1.156010230179028, "percentage": 23.3, "elapsed_time": "4:28:13", "remaining_time": "14:43:00"} +{"current_steps": 114, "total_steps": 485, "loss": 1.0887, "lr": 0.00015138490486028998, "epoch": 1.1662404092071612, "percentage": 23.51, "elapsed_time": "4:30:36", "remaining_time": "14:40:38"} +{"current_steps": 115, "total_steps": 485, "loss": 1.0879, "lr": 0.000151122837575155, "epoch": 1.1764705882352942, "percentage": 23.71, "elapsed_time": "4:32:58", "remaining_time": "14:38:15"} +{"current_steps": 116, "total_steps": 485, "loss": 1.0974, "lr": 0.00015085707767558475, "epoch": 1.186700767263427, "percentage": 23.92, "elapsed_time": "4:35:20", "remaining_time": "14:35:51"} +{"current_steps": 117, "total_steps": 485, "loss": 1.1016, "lr": 0.00015058763895952194, "epoch": 1.19693094629156, "percentage": 24.12, "elapsed_time": "4:37:41", "remaining_time": "14:33:26"} +{"current_steps": 118, "total_steps": 485, "loss": 1.0789, "lr": 0.00015031453541590925, "epoch": 1.207161125319693, "percentage": 24.33, "elapsed_time": "4:40:03", "remaining_time": "14:31:02"} +{"current_steps": 119, "total_steps": 485, "loss": 1.0851, "lr": 0.00015003778122396277, "epoch": 1.2173913043478262, "percentage": 24.54, "elapsed_time": "4:42:25", "remaining_time": "14:28:39"} +{"current_steps": 120, "total_steps": 485, "loss": 1.1047, "lr": 0.0001497573907524361, "epoch": 1.227621483375959, "percentage": 24.74, "elapsed_time": "4:44:48", "remaining_time": "14:26:18"} +{"current_steps": 121, "total_steps": 485, "loss": 1.0943, "lr": 0.00014947337855887406, "epoch": 1.237851662404092, "percentage": 24.95, "elapsed_time": "4:47:11", "remaining_time": "14:23:56"} +{"current_steps": 122, "total_steps": 485, "loss": 1.0896, "lr": 0.00014918575938885725, "epoch": 1.248081841432225, "percentage": 25.15, "elapsed_time": "4:49:33", "remaining_time": "14:21:33"} +{"current_steps": 123, "total_steps": 485, "loss": 1.0984, "lr": 0.00014889454817523608, "epoch": 1.258312020460358, "percentage": 25.36, "elapsed_time": "4:51:56", "remaining_time": "14:19:11"} +{"current_steps": 124, "total_steps": 485, "loss": 1.091, "lr": 0.00014859976003735572, "epoch": 1.2685421994884911, "percentage": 25.57, "elapsed_time": "4:54:19", "remaining_time": "14:16:52"} +{"current_steps": 125, "total_steps": 485, "loss": 1.0749, "lr": 0.0001483014102802711, "epoch": 1.278772378516624, "percentage": 25.77, "elapsed_time": "4:56:41", "remaining_time": "14:14:29"} +{"current_steps": 126, "total_steps": 485, "loss": 1.0901, "lr": 0.00014799951439395221, "epoch": 1.289002557544757, "percentage": 25.98, "elapsed_time": "4:59:05", "remaining_time": "14:12:09"} +{"current_steps": 127, "total_steps": 485, "loss": 1.0848, "lr": 0.00014769408805247986, "epoch": 1.29923273657289, "percentage": 26.19, "elapsed_time": "5:01:27", "remaining_time": "14:09:46"} +{"current_steps": 128, "total_steps": 485, "loss": 1.0897, "lr": 0.0001473851471132321, "epoch": 1.309462915601023, "percentage": 26.39, "elapsed_time": "5:03:49", "remaining_time": "14:07:23"} +{"current_steps": 129, "total_steps": 485, "loss": 1.0695, "lr": 0.00014707270761606063, "epoch": 1.319693094629156, "percentage": 26.6, "elapsed_time": "5:06:12", "remaining_time": "14:05:02"} +{"current_steps": 130, "total_steps": 485, "loss": 1.0895, "lr": 0.00014675678578245828, "epoch": 1.329923273657289, "percentage": 26.8, "elapsed_time": "5:08:34", "remaining_time": "14:02:39"} +{"current_steps": 131, "total_steps": 485, "loss": 1.1003, "lr": 0.00014643739801471667, "epoch": 1.340153452685422, "percentage": 27.01, "elapsed_time": "5:10:57", "remaining_time": "14:00:17"} +{"current_steps": 132, "total_steps": 485, "loss": 1.098, "lr": 0.00014611456089507464, "epoch": 1.350383631713555, "percentage": 27.22, "elapsed_time": "5:13:20", "remaining_time": "13:57:57"} +{"current_steps": 133, "total_steps": 485, "loss": 1.0698, "lr": 0.00014578829118485742, "epoch": 1.3606138107416879, "percentage": 27.42, "elapsed_time": "5:15:42", "remaining_time": "13:55:34"} +{"current_steps": 134, "total_steps": 485, "loss": 1.1071, "lr": 0.00014545860582360624, "epoch": 1.370843989769821, "percentage": 27.63, "elapsed_time": "5:18:06", "remaining_time": "13:53:14"} +{"current_steps": 135, "total_steps": 485, "loss": 1.0869, "lr": 0.00014512552192819897, "epoch": 1.381074168797954, "percentage": 27.84, "elapsed_time": "5:20:28", "remaining_time": "13:50:51"} +{"current_steps": 136, "total_steps": 485, "loss": 1.0954, "lr": 0.0001447890567919614, "epoch": 1.391304347826087, "percentage": 28.04, "elapsed_time": "5:22:50", "remaining_time": "13:48:29"} +{"current_steps": 137, "total_steps": 485, "loss": 1.0784, "lr": 0.00014444922788376934, "epoch": 1.40153452685422, "percentage": 28.25, "elapsed_time": "5:25:13", "remaining_time": "13:46:06"} +{"current_steps": 138, "total_steps": 485, "loss": 1.0888, "lr": 0.00014410605284714175, "epoch": 1.4117647058823528, "percentage": 28.45, "elapsed_time": "5:27:36", "remaining_time": "13:43:45"} +{"current_steps": 139, "total_steps": 485, "loss": 1.0842, "lr": 0.0001437595494993246, "epoch": 1.421994884910486, "percentage": 28.66, "elapsed_time": "5:29:58", "remaining_time": "13:41:21"} +{"current_steps": 140, "total_steps": 485, "loss": 1.0795, "lr": 0.000143409735830366, "epoch": 1.432225063938619, "percentage": 28.87, "elapsed_time": "5:32:20", "remaining_time": "13:38:58"} +{"current_steps": 141, "total_steps": 485, "loss": 1.0907, "lr": 0.00014305663000218193, "epoch": 1.4424552429667519, "percentage": 29.07, "elapsed_time": "5:34:42", "remaining_time": "13:36:36"} +{"current_steps": 142, "total_steps": 485, "loss": 1.0817, "lr": 0.00014270025034761352, "epoch": 1.452685421994885, "percentage": 29.28, "elapsed_time": "5:37:05", "remaining_time": "13:34:14"} +{"current_steps": 143, "total_steps": 485, "loss": 1.0819, "lr": 0.000142340615369475, "epoch": 1.4629156010230178, "percentage": 29.48, "elapsed_time": "5:39:28", "remaining_time": "13:31:52"} +{"current_steps": 144, "total_steps": 485, "loss": 1.0931, "lr": 0.00014197774373959327, "epoch": 1.473145780051151, "percentage": 29.69, "elapsed_time": "5:41:50", "remaining_time": "13:29:30"} +{"current_steps": 145, "total_steps": 485, "loss": 1.0884, "lr": 0.00014161165429783844, "epoch": 1.4833759590792839, "percentage": 29.9, "elapsed_time": "5:44:12", "remaining_time": "13:27:06"} +{"current_steps": 146, "total_steps": 485, "loss": 1.0924, "lr": 0.0001412423660511456, "epoch": 1.4936061381074168, "percentage": 30.1, "elapsed_time": "5:46:34", "remaining_time": "13:24:44"} +{"current_steps": 147, "total_steps": 485, "loss": 1.0785, "lr": 0.00014086989817252803, "epoch": 1.50383631713555, "percentage": 30.31, "elapsed_time": "5:48:57", "remaining_time": "13:22:21"} +{"current_steps": 148, "total_steps": 485, "loss": 1.0699, "lr": 0.00014049427000008185, "epoch": 1.5140664961636827, "percentage": 30.52, "elapsed_time": "5:51:19", "remaining_time": "13:19:57"} +{"current_steps": 149, "total_steps": 485, "loss": 1.064, "lr": 0.00014011550103598176, "epoch": 1.5242966751918159, "percentage": 30.72, "elapsed_time": "5:53:41", "remaining_time": "13:17:35"} +{"current_steps": 150, "total_steps": 485, "loss": 1.1002, "lr": 0.0001397336109454689, "epoch": 1.5345268542199488, "percentage": 30.93, "elapsed_time": "5:56:04", "remaining_time": "13:15:13"} +{"current_steps": 151, "total_steps": 485, "loss": 1.0709, "lr": 0.0001393486195558295, "epoch": 1.5447570332480818, "percentage": 31.13, "elapsed_time": "5:58:26", "remaining_time": "13:12:51"} +{"current_steps": 152, "total_steps": 485, "loss": 1.0717, "lr": 0.00013896054685536566, "epoch": 1.554987212276215, "percentage": 31.34, "elapsed_time": "6:00:49", "remaining_time": "13:10:28"} +{"current_steps": 153, "total_steps": 485, "loss": 1.0714, "lr": 0.00013856941299235752, "epoch": 1.5652173913043477, "percentage": 31.55, "elapsed_time": "6:03:12", "remaining_time": "13:08:08"} +{"current_steps": 154, "total_steps": 485, "loss": 1.0825, "lr": 0.00013817523827401715, "epoch": 1.5754475703324808, "percentage": 31.75, "elapsed_time": "6:05:34", "remaining_time": "13:05:45"} +{"current_steps": 155, "total_steps": 485, "loss": 1.0583, "lr": 0.00013777804316543438, "epoch": 1.5856777493606138, "percentage": 31.96, "elapsed_time": "6:07:57", "remaining_time": "13:03:22"} +{"current_steps": 156, "total_steps": 485, "loss": 1.0998, "lr": 0.00013737784828851405, "epoch": 1.5959079283887467, "percentage": 32.16, "elapsed_time": "6:10:20", "remaining_time": "13:01:01"} +{"current_steps": 157, "total_steps": 485, "loss": 1.0814, "lr": 0.0001369746744209055, "epoch": 1.60613810741688, "percentage": 32.37, "elapsed_time": "6:12:42", "remaining_time": "12:58:39"} +{"current_steps": 158, "total_steps": 485, "loss": 1.0672, "lr": 0.00013656854249492382, "epoch": 1.6163682864450126, "percentage": 32.58, "elapsed_time": "6:15:05", "remaining_time": "12:56:17"} +{"current_steps": 159, "total_steps": 485, "loss": 1.077, "lr": 0.00013615947359646295, "epoch": 1.6265984654731458, "percentage": 32.78, "elapsed_time": "6:17:28", "remaining_time": "12:53:56"} +{"current_steps": 160, "total_steps": 485, "loss": 1.0831, "lr": 0.00013574748896390105, "epoch": 1.6368286445012787, "percentage": 32.99, "elapsed_time": "6:19:50", "remaining_time": "12:51:33"} +{"current_steps": 161, "total_steps": 485, "loss": 1.0808, "lr": 0.00013533260998699776, "epoch": 1.6470588235294117, "percentage": 33.2, "elapsed_time": "6:22:13", "remaining_time": "12:49:11"} +{"current_steps": 162, "total_steps": 485, "loss": 1.0609, "lr": 0.00013491485820578373, "epoch": 1.6572890025575449, "percentage": 33.4, "elapsed_time": "6:24:35", "remaining_time": "12:46:48"} +{"current_steps": 163, "total_steps": 485, "loss": 1.0822, "lr": 0.00013449425530944218, "epoch": 1.6675191815856778, "percentage": 33.61, "elapsed_time": "6:26:58", "remaining_time": "12:44:26"} +{"current_steps": 164, "total_steps": 485, "loss": 1.0771, "lr": 0.00013407082313518292, "epoch": 1.6777493606138107, "percentage": 33.81, "elapsed_time": "6:29:20", "remaining_time": "12:42:04"} +{"current_steps": 165, "total_steps": 485, "loss": 1.0853, "lr": 0.0001336445836671086, "epoch": 1.6879795396419437, "percentage": 34.02, "elapsed_time": "6:31:43", "remaining_time": "12:39:41"} +{"current_steps": 166, "total_steps": 485, "loss": 1.0838, "lr": 0.0001332155590350732, "epoch": 1.6982097186700766, "percentage": 34.23, "elapsed_time": "6:34:05", "remaining_time": "12:37:19"} +{"current_steps": 167, "total_steps": 485, "loss": 1.082, "lr": 0.0001327837715135332, "epoch": 1.7084398976982098, "percentage": 34.43, "elapsed_time": "6:36:27", "remaining_time": "12:34:56"} +{"current_steps": 168, "total_steps": 485, "loss": 1.0802, "lr": 0.00013234924352039103, "epoch": 1.7186700767263428, "percentage": 34.64, "elapsed_time": "6:38:51", "remaining_time": "12:32:35"} +{"current_steps": 169, "total_steps": 485, "loss": 1.0887, "lr": 0.00013191199761583124, "epoch": 1.7289002557544757, "percentage": 34.85, "elapsed_time": "6:41:14", "remaining_time": "12:30:14"} +{"current_steps": 170, "total_steps": 485, "loss": 1.0718, "lr": 0.00013147205650114913, "epoch": 1.7391304347826086, "percentage": 35.05, "elapsed_time": "6:43:36", "remaining_time": "12:27:52"} +{"current_steps": 171, "total_steps": 485, "loss": 1.0788, "lr": 0.0001310294430175722, "epoch": 1.7493606138107416, "percentage": 35.26, "elapsed_time": "6:45:59", "remaining_time": "12:25:29"} +{"current_steps": 172, "total_steps": 485, "loss": 1.0879, "lr": 0.00013058418014507412, "epoch": 1.7595907928388748, "percentage": 35.46, "elapsed_time": "6:48:21", "remaining_time": "12:23:07"} +{"current_steps": 173, "total_steps": 485, "loss": 1.0721, "lr": 0.00013013629100118183, "epoch": 1.7698209718670077, "percentage": 35.67, "elapsed_time": "6:50:43", "remaining_time": "12:20:44"} +{"current_steps": 174, "total_steps": 485, "loss": 1.0737, "lr": 0.00012968579883977508, "epoch": 1.7800511508951407, "percentage": 35.88, "elapsed_time": "6:53:06", "remaining_time": "12:18:21"} +{"current_steps": 175, "total_steps": 485, "loss": 1.0742, "lr": 0.00012923272704987943, "epoch": 1.7902813299232738, "percentage": 36.08, "elapsed_time": "6:55:28", "remaining_time": "12:15:58"} +{"current_steps": 176, "total_steps": 485, "loss": 1.0721, "lr": 0.00012877709915445155, "epoch": 1.8005115089514065, "percentage": 36.29, "elapsed_time": "6:57:50", "remaining_time": "12:13:35"} +{"current_steps": 177, "total_steps": 485, "loss": 1.0555, "lr": 0.00012831893880915822, "epoch": 1.8107416879795397, "percentage": 36.49, "elapsed_time": "7:00:12", "remaining_time": "12:11:12"} +{"current_steps": 178, "total_steps": 485, "loss": 1.0804, "lr": 0.00012785826980114798, "epoch": 1.8209718670076727, "percentage": 36.7, "elapsed_time": "7:02:34", "remaining_time": "12:08:50"} +{"current_steps": 179, "total_steps": 485, "loss": 1.063, "lr": 0.0001273951160478163, "epoch": 1.8312020460358056, "percentage": 36.91, "elapsed_time": "7:04:57", "remaining_time": "12:06:27"} +{"current_steps": 180, "total_steps": 485, "loss": 1.0666, "lr": 0.00012692950159556358, "epoch": 1.8414322250639388, "percentage": 37.11, "elapsed_time": "7:07:19", "remaining_time": "12:04:04"} +{"current_steps": 181, "total_steps": 485, "loss": 1.0703, "lr": 0.00012646145061854697, "epoch": 1.8516624040920715, "percentage": 37.32, "elapsed_time": "7:09:41", "remaining_time": "12:01:42"} +{"current_steps": 182, "total_steps": 485, "loss": 1.0571, "lr": 0.00012599098741742504, "epoch": 1.8618925831202047, "percentage": 37.53, "elapsed_time": "7:12:05", "remaining_time": "11:59:20"} +{"current_steps": 183, "total_steps": 485, "loss": 1.0706, "lr": 0.00012551813641809622, "epoch": 1.8721227621483376, "percentage": 37.73, "elapsed_time": "7:14:27", "remaining_time": "11:56:58"} +{"current_steps": 184, "total_steps": 485, "loss": 1.0779, "lr": 0.0001250429221704306, "epoch": 1.8823529411764706, "percentage": 37.94, "elapsed_time": "7:16:50", "remaining_time": "11:54:36"} +{"current_steps": 185, "total_steps": 485, "loss": 1.064, "lr": 0.00012456536934699552, "epoch": 1.8925831202046037, "percentage": 38.14, "elapsed_time": "7:19:12", "remaining_time": "11:52:13"} +{"current_steps": 186, "total_steps": 485, "loss": 1.0585, "lr": 0.0001240855027417742, "epoch": 1.9028132992327365, "percentage": 38.35, "elapsed_time": "7:21:34", "remaining_time": "11:49:51"} +{"current_steps": 187, "total_steps": 485, "loss": 1.0672, "lr": 0.00012360334726887887, "epoch": 1.9130434782608696, "percentage": 38.56, "elapsed_time": "7:23:57", "remaining_time": "11:47:28"} +{"current_steps": 188, "total_steps": 485, "loss": 1.0713, "lr": 0.00012311892796125704, "epoch": 1.9232736572890026, "percentage": 38.76, "elapsed_time": "7:26:19", "remaining_time": "11:45:06"} +{"current_steps": 189, "total_steps": 485, "loss": 1.0536, "lr": 0.0001226322699693918, "epoch": 1.9335038363171355, "percentage": 38.97, "elapsed_time": "7:28:42", "remaining_time": "11:42:44"} +{"current_steps": 190, "total_steps": 485, "loss": 1.0807, "lr": 0.00012214339855999624, "epoch": 1.9437340153452687, "percentage": 39.18, "elapsed_time": "7:31:05", "remaining_time": "11:40:22"} +{"current_steps": 191, "total_steps": 485, "loss": 1.0777, "lr": 0.00012165233911470136, "epoch": 1.9539641943734014, "percentage": 39.38, "elapsed_time": "7:33:28", "remaining_time": "11:38:01"} +{"current_steps": 192, "total_steps": 485, "loss": 1.0525, "lr": 0.00012115911712873851, "epoch": 1.9641943734015346, "percentage": 39.59, "elapsed_time": "7:35:51", "remaining_time": "11:35:39"} +{"current_steps": 193, "total_steps": 485, "loss": 1.0617, "lr": 0.00012066375820961558, "epoch": 1.9744245524296675, "percentage": 39.79, "elapsed_time": "7:38:14", "remaining_time": "11:33:17"} +{"current_steps": 194, "total_steps": 485, "loss": 1.0682, "lr": 0.00012016628807578756, "epoch": 1.9846547314578005, "percentage": 40.0, "elapsed_time": "7:40:37", "remaining_time": "11:30:56"} +{"current_steps": 195, "total_steps": 485, "loss": 1.1518, "lr": 0.00011966673255532119, "epoch": 1.9948849104859336, "percentage": 40.21, "elapsed_time": "7:43:01", "remaining_time": "11:28:35"} +{"current_steps": 196, "total_steps": 485, "loss": 1.226, "lr": 0.00011916511758455407, "epoch": 2.0051150895140664, "percentage": 40.41, "elapsed_time": "7:45:33", "remaining_time": "11:26:27"} +{"current_steps": 197, "total_steps": 485, "loss": 1.068, "lr": 0.00011866146920674807, "epoch": 2.0153452685421995, "percentage": 40.62, "elapsed_time": "7:47:56", "remaining_time": "11:24:05"} +{"current_steps": 198, "total_steps": 485, "loss": 1.0502, "lr": 0.0001181558135707371, "epoch": 2.0255754475703327, "percentage": 40.82, "elapsed_time": "7:50:19", "remaining_time": "11:21:44"} +{"current_steps": 199, "total_steps": 485, "loss": 1.0286, "lr": 0.00011764817692956966, "epoch": 2.0358056265984654, "percentage": 41.03, "elapsed_time": "7:52:42", "remaining_time": "11:19:22"} +{"current_steps": 200, "total_steps": 485, "loss": 1.0747, "lr": 0.00011713858563914562, "epoch": 2.0460358056265986, "percentage": 41.24, "elapsed_time": "7:55:05", "remaining_time": "11:17:00"} +{"current_steps": 201, "total_steps": 485, "loss": 1.045, "lr": 0.00011662706615684803, "epoch": 2.0562659846547313, "percentage": 41.44, "elapsed_time": "7:57:27", "remaining_time": "11:14:37"} +{"current_steps": 202, "total_steps": 485, "loss": 1.0678, "lr": 0.00011611364504016935, "epoch": 2.0664961636828645, "percentage": 41.65, "elapsed_time": "7:59:50", "remaining_time": "11:12:15"} +{"current_steps": 203, "total_steps": 485, "loss": 1.0458, "lr": 0.00011559834894533275, "epoch": 2.0767263427109977, "percentage": 41.86, "elapsed_time": "8:02:12", "remaining_time": "11:09:52"} +{"current_steps": 204, "total_steps": 485, "loss": 1.0461, "lr": 0.00011508120462590794, "epoch": 2.0869565217391304, "percentage": 42.06, "elapsed_time": "8:04:35", "remaining_time": "11:07:29"} +{"current_steps": 205, "total_steps": 485, "loss": 1.0407, "lr": 0.00011456223893142238, "epoch": 2.0971867007672635, "percentage": 42.27, "elapsed_time": "8:06:57", "remaining_time": "11:05:06"} +{"current_steps": 206, "total_steps": 485, "loss": 1.0534, "lr": 0.0001140414788059672, "epoch": 2.1074168797953963, "percentage": 42.47, "elapsed_time": "8:09:20", "remaining_time": "11:02:45"} +{"current_steps": 207, "total_steps": 485, "loss": 1.0577, "lr": 0.00011351895128679823, "epoch": 2.1176470588235294, "percentage": 42.68, "elapsed_time": "8:11:42", "remaining_time": "11:00:22"} +{"current_steps": 208, "total_steps": 485, "loss": 1.0592, "lr": 0.00011299468350293232, "epoch": 2.1278772378516626, "percentage": 42.89, "elapsed_time": "8:14:05", "remaining_time": "10:57:59"} +{"current_steps": 209, "total_steps": 485, "loss": 1.069, "lr": 0.00011246870267373885, "epoch": 2.1381074168797953, "percentage": 43.09, "elapsed_time": "8:16:28", "remaining_time": "10:55:38"} +{"current_steps": 210, "total_steps": 485, "loss": 1.0454, "lr": 0.00011194103610752655, "epoch": 2.1483375959079285, "percentage": 43.3, "elapsed_time": "8:18:51", "remaining_time": "10:53:15"} +{"current_steps": 211, "total_steps": 485, "loss": 1.0723, "lr": 0.00011141171120012552, "epoch": 2.1585677749360612, "percentage": 43.51, "elapsed_time": "8:21:13", "remaining_time": "10:50:52"} +{"current_steps": 212, "total_steps": 485, "loss": 1.0428, "lr": 0.0001108807554334651, "epoch": 2.1687979539641944, "percentage": 43.71, "elapsed_time": "8:23:35", "remaining_time": "10:48:29"} +{"current_steps": 213, "total_steps": 485, "loss": 1.061, "lr": 0.00011034819637414686, "epoch": 2.1790281329923276, "percentage": 43.92, "elapsed_time": "8:25:58", "remaining_time": "10:46:07"} +{"current_steps": 214, "total_steps": 485, "loss": 1.0355, "lr": 0.00010981406167201354, "epoch": 2.1892583120204603, "percentage": 44.12, "elapsed_time": "8:28:20", "remaining_time": "10:43:44"} +{"current_steps": 215, "total_steps": 485, "loss": 1.0777, "lr": 0.0001092783790587133, "epoch": 2.1994884910485935, "percentage": 44.33, "elapsed_time": "8:30:43", "remaining_time": "10:41:21"} +{"current_steps": 216, "total_steps": 485, "loss": 1.0541, "lr": 0.00010874117634626011, "epoch": 2.209718670076726, "percentage": 44.54, "elapsed_time": "8:33:07", "remaining_time": "10:39:01"} +{"current_steps": 217, "total_steps": 485, "loss": 1.0435, "lr": 0.00010820248142558965, "epoch": 2.2199488491048593, "percentage": 44.74, "elapsed_time": "8:35:30", "remaining_time": "10:36:39"} +{"current_steps": 218, "total_steps": 485, "loss": 1.0513, "lr": 0.00010766232226511142, "epoch": 2.2301790281329925, "percentage": 44.95, "elapsed_time": "8:37:52", "remaining_time": "10:34:17"} +{"current_steps": 219, "total_steps": 485, "loss": 1.0509, "lr": 0.00010712072690925638, "epoch": 2.2404092071611252, "percentage": 45.15, "elapsed_time": "8:40:15", "remaining_time": "10:31:54"} +{"current_steps": 220, "total_steps": 485, "loss": 1.0325, "lr": 0.00010657772347702118, "epoch": 2.2506393861892584, "percentage": 45.36, "elapsed_time": "8:42:37", "remaining_time": "10:29:31"} +{"current_steps": 221, "total_steps": 485, "loss": 1.0369, "lr": 0.00010603334016050808, "epoch": 2.260869565217391, "percentage": 45.57, "elapsed_time": "8:45:00", "remaining_time": "10:27:09"} +{"current_steps": 222, "total_steps": 485, "loss": 1.0414, "lr": 0.00010548760522346138, "epoch": 2.2710997442455243, "percentage": 45.77, "elapsed_time": "8:47:22", "remaining_time": "10:24:46"} +{"current_steps": 223, "total_steps": 485, "loss": 1.056, "lr": 0.00010494054699979992, "epoch": 2.2813299232736575, "percentage": 45.98, "elapsed_time": "8:49:45", "remaining_time": "10:22:24"} +{"current_steps": 224, "total_steps": 485, "loss": 1.0573, "lr": 0.00010439219389214595, "epoch": 2.29156010230179, "percentage": 46.19, "elapsed_time": "8:52:07", "remaining_time": "10:20:01"} +{"current_steps": 225, "total_steps": 485, "loss": 1.0412, "lr": 0.0001038425743703507, "epoch": 2.3017902813299234, "percentage": 46.39, "elapsed_time": "8:54:30", "remaining_time": "10:17:38"} +{"current_steps": 226, "total_steps": 485, "loss": 1.0366, "lr": 0.00010329171697001608, "epoch": 2.312020460358056, "percentage": 46.6, "elapsed_time": "8:56:53", "remaining_time": "10:15:17"} +{"current_steps": 227, "total_steps": 485, "loss": 1.0451, "lr": 0.0001027396502910132, "epoch": 2.3222506393861893, "percentage": 46.8, "elapsed_time": "8:59:15", "remaining_time": "10:12:54"} +{"current_steps": 228, "total_steps": 485, "loss": 1.0428, "lr": 0.0001021864029959975, "epoch": 2.3324808184143224, "percentage": 47.01, "elapsed_time": "9:01:39", "remaining_time": "10:10:32"} +{"current_steps": 229, "total_steps": 485, "loss": 1.0612, "lr": 0.00010163200380892063, "epoch": 2.342710997442455, "percentage": 47.22, "elapsed_time": "9:04:01", "remaining_time": "10:08:10"} +{"current_steps": 230, "total_steps": 485, "loss": 1.0247, "lr": 0.00010107648151353916, "epoch": 2.3529411764705883, "percentage": 47.42, "elapsed_time": "9:06:25", "remaining_time": "10:05:48"} +{"current_steps": 231, "total_steps": 485, "loss": 1.0363, "lr": 0.00010051986495192008, "epoch": 2.363171355498721, "percentage": 47.63, "elapsed_time": "9:08:47", "remaining_time": "10:03:25"} +{"current_steps": 232, "total_steps": 485, "loss": 1.05, "lr": 9.99621830229434e-05, "epoch": 2.373401534526854, "percentage": 47.84, "elapsed_time": "9:11:09", "remaining_time": "10:01:02"} +{"current_steps": 233, "total_steps": 485, "loss": 1.0537, "lr": 9.94034646808018e-05, "epoch": 2.3836317135549874, "percentage": 48.04, "elapsed_time": "9:13:31", "remaining_time": "9:58:40"} +{"current_steps": 234, "total_steps": 485, "loss": 1.0273, "lr": 9.884373893349725e-05, "epoch": 2.39386189258312, "percentage": 48.25, "elapsed_time": "9:15:54", "remaining_time": "9:56:17"} +{"current_steps": 235, "total_steps": 485, "loss": 1.053, "lr": 9.828303484133515e-05, "epoch": 2.4040920716112533, "percentage": 48.45, "elapsed_time": "9:18:17", "remaining_time": "9:53:55"} +{"current_steps": 236, "total_steps": 485, "loss": 1.0364, "lr": 9.772138151541522e-05, "epoch": 2.414322250639386, "percentage": 48.66, "elapsed_time": "9:20:39", "remaining_time": "9:51:32"} +{"current_steps": 237, "total_steps": 485, "loss": 1.0331, "lr": 9.715880811612044e-05, "epoch": 2.424552429667519, "percentage": 48.87, "elapsed_time": "9:23:02", "remaining_time": "9:49:10"} +{"current_steps": 238, "total_steps": 485, "loss": 1.0323, "lr": 9.659534385160289e-05, "epoch": 2.4347826086956523, "percentage": 49.07, "elapsed_time": "9:25:25", "remaining_time": "9:46:48"} +{"current_steps": 239, "total_steps": 485, "loss": 1.0491, "lr": 9.603101797626729e-05, "epoch": 2.445012787723785, "percentage": 49.28, "elapsed_time": "9:27:47", "remaining_time": "9:44:25"} +{"current_steps": 240, "total_steps": 485, "loss": 1.0127, "lr": 9.546585978925221e-05, "epoch": 2.455242966751918, "percentage": 49.48, "elapsed_time": "9:30:10", "remaining_time": "9:42:02"} +{"current_steps": 241, "total_steps": 485, "loss": 1.0637, "lr": 9.489989863290885e-05, "epoch": 2.4654731457800514, "percentage": 49.69, "elapsed_time": "9:32:32", "remaining_time": "9:39:40"} +{"current_steps": 242, "total_steps": 485, "loss": 1.038, "lr": 9.433316389127768e-05, "epoch": 2.475703324808184, "percentage": 49.9, "elapsed_time": "9:34:55", "remaining_time": "9:37:17"} +{"current_steps": 243, "total_steps": 485, "loss": 1.0441, "lr": 9.37656849885628e-05, "epoch": 2.4859335038363173, "percentage": 50.1, "elapsed_time": "9:37:17", "remaining_time": "9:34:54"} +{"current_steps": 244, "total_steps": 485, "loss": 1.0317, "lr": 9.319749138760424e-05, "epoch": 2.49616368286445, "percentage": 50.31, "elapsed_time": "9:39:39", "remaining_time": "9:32:32"} +{"current_steps": 245, "total_steps": 485, "loss": 1.0353, "lr": 9.262861258834833e-05, "epoch": 2.506393861892583, "percentage": 50.52, "elapsed_time": "9:42:02", "remaining_time": "9:30:09"} +{"current_steps": 246, "total_steps": 485, "loss": 1.0211, "lr": 9.205907812631616e-05, "epoch": 2.516624040920716, "percentage": 50.72, "elapsed_time": "9:44:24", "remaining_time": "9:27:47"} +{"current_steps": 247, "total_steps": 485, "loss": 1.0381, "lr": 9.148891757106999e-05, "epoch": 2.526854219948849, "percentage": 50.93, "elapsed_time": "9:46:48", "remaining_time": "9:25:25"} +{"current_steps": 248, "total_steps": 485, "loss": 1.045, "lr": 9.091816052467817e-05, "epoch": 2.5370843989769822, "percentage": 51.13, "elapsed_time": "9:49:10", "remaining_time": "9:23:02"} +{"current_steps": 249, "total_steps": 485, "loss": 1.0339, "lr": 9.034683662017812e-05, "epoch": 2.547314578005115, "percentage": 51.34, "elapsed_time": "9:51:33", "remaining_time": "9:20:39"} +{"current_steps": 250, "total_steps": 485, "loss": 1.0297, "lr": 8.977497552003785e-05, "epoch": 2.557544757033248, "percentage": 51.55, "elapsed_time": "9:53:55", "remaining_time": "9:18:17"} +{"current_steps": 251, "total_steps": 485, "loss": 1.0474, "lr": 8.920260691461602e-05, "epoch": 2.5677749360613813, "percentage": 51.75, "elapsed_time": "9:56:17", "remaining_time": "9:15:54"} +{"current_steps": 252, "total_steps": 485, "loss": 1.0478, "lr": 8.862976052062034e-05, "epoch": 2.578005115089514, "percentage": 51.96, "elapsed_time": "9:58:40", "remaining_time": "9:13:32"} +{"current_steps": 253, "total_steps": 485, "loss": 1.0384, "lr": 8.805646607956467e-05, "epoch": 2.588235294117647, "percentage": 52.16, "elapsed_time": "10:01:03", "remaining_time": "9:11:09"} +{"current_steps": 254, "total_steps": 485, "loss": 1.0352, "lr": 8.748275335622506e-05, "epoch": 2.59846547314578, "percentage": 52.37, "elapsed_time": "10:03:25", "remaining_time": "9:08:46"} +{"current_steps": 255, "total_steps": 485, "loss": 1.0251, "lr": 8.69086521370942e-05, "epoch": 2.608695652173913, "percentage": 52.58, "elapsed_time": "10:05:47", "remaining_time": "9:06:24"} +{"current_steps": 256, "total_steps": 485, "loss": 1.0388, "lr": 8.633419222883508e-05, "epoch": 2.618925831202046, "percentage": 52.78, "elapsed_time": "10:08:09", "remaining_time": "9:04:01"} +{"current_steps": 257, "total_steps": 485, "loss": 1.0415, "lr": 8.575940345673337e-05, "epoch": 2.629156010230179, "percentage": 52.99, "elapsed_time": "10:10:32", "remaining_time": "9:01:38"} +{"current_steps": 258, "total_steps": 485, "loss": 1.0338, "lr": 8.518431566314901e-05, "epoch": 2.639386189258312, "percentage": 53.2, "elapsed_time": "10:12:54", "remaining_time": "8:59:16"} +{"current_steps": 259, "total_steps": 485, "loss": 1.0455, "lr": 8.460895870596675e-05, "epoch": 2.649616368286445, "percentage": 53.4, "elapsed_time": "10:15:17", "remaining_time": "8:56:53"} +{"current_steps": 260, "total_steps": 485, "loss": 1.0446, "lr": 8.4033362457046e-05, "epoch": 2.659846547314578, "percentage": 53.61, "elapsed_time": "10:17:39", "remaining_time": "8:54:30"} +{"current_steps": 261, "total_steps": 485, "loss": 1.0282, "lr": 8.345755680066993e-05, "epoch": 2.670076726342711, "percentage": 53.81, "elapsed_time": "10:20:02", "remaining_time": "8:52:08"} +{"current_steps": 262, "total_steps": 485, "loss": 1.0278, "lr": 8.288157163199389e-05, "epoch": 2.680306905370844, "percentage": 54.02, "elapsed_time": "10:22:24", "remaining_time": "8:49:45"} +{"current_steps": 263, "total_steps": 485, "loss": 1.0317, "lr": 8.230543685549333e-05, "epoch": 2.690537084398977, "percentage": 54.23, "elapsed_time": "10:24:46", "remaining_time": "8:47:22"} +{"current_steps": 264, "total_steps": 485, "loss": 1.0326, "lr": 8.17291823834111e-05, "epoch": 2.70076726342711, "percentage": 54.43, "elapsed_time": "10:27:09", "remaining_time": "8:45:00"} +{"current_steps": 265, "total_steps": 485, "loss": 1.0375, "lr": 8.115283813420459e-05, "epoch": 2.710997442455243, "percentage": 54.64, "elapsed_time": "10:29:32", "remaining_time": "8:42:38"} +{"current_steps": 266, "total_steps": 485, "loss": 1.0584, "lr": 8.057643403099221e-05, "epoch": 2.7212276214833757, "percentage": 54.85, "elapsed_time": "10:31:54", "remaining_time": "8:40:15"} +{"current_steps": 267, "total_steps": 485, "loss": 1.0395, "lr": 8e-05, "epoch": 2.731457800511509, "percentage": 55.05, "elapsed_time": "10:34:17", "remaining_time": "8:37:53"} +{"current_steps": 268, "total_steps": 485, "loss": 1.0369, "lr": 7.94235659690078e-05, "epoch": 2.741687979539642, "percentage": 55.26, "elapsed_time": "10:36:40", "remaining_time": "8:35:31"} +{"current_steps": 269, "total_steps": 485, "loss": 1.0532, "lr": 7.884716186579545e-05, "epoch": 2.7519181585677748, "percentage": 55.46, "elapsed_time": "10:39:03", "remaining_time": "8:33:08"} +{"current_steps": 270, "total_steps": 485, "loss": 1.0266, "lr": 7.827081761658892e-05, "epoch": 2.762148337595908, "percentage": 55.67, "elapsed_time": "10:41:24", "remaining_time": "8:30:45"} +{"current_steps": 271, "total_steps": 485, "loss": 1.0344, "lr": 7.76945631445067e-05, "epoch": 2.772378516624041, "percentage": 55.88, "elapsed_time": "10:43:47", "remaining_time": "8:28:22"} +{"current_steps": 272, "total_steps": 485, "loss": 1.0285, "lr": 7.711842836800614e-05, "epoch": 2.782608695652174, "percentage": 56.08, "elapsed_time": "10:46:09", "remaining_time": "8:26:00"} +{"current_steps": 273, "total_steps": 485, "loss": 1.0272, "lr": 7.654244319933009e-05, "epoch": 2.792838874680307, "percentage": 56.29, "elapsed_time": "10:48:31", "remaining_time": "8:23:37"} +{"current_steps": 274, "total_steps": 485, "loss": 1.0427, "lr": 7.596663754295404e-05, "epoch": 2.80306905370844, "percentage": 56.49, "elapsed_time": "10:50:54", "remaining_time": "8:21:14"} +{"current_steps": 275, "total_steps": 485, "loss": 1.0474, "lr": 7.539104129403327e-05, "epoch": 2.813299232736573, "percentage": 56.7, "elapsed_time": "10:53:17", "remaining_time": "8:18:52"} +{"current_steps": 276, "total_steps": 485, "loss": 1.0445, "lr": 7.4815684336851e-05, "epoch": 2.8235294117647056, "percentage": 56.91, "elapsed_time": "10:55:40", "remaining_time": "8:16:30"} +{"current_steps": 277, "total_steps": 485, "loss": 1.04, "lr": 7.424059654326664e-05, "epoch": 2.833759590792839, "percentage": 57.11, "elapsed_time": "10:58:02", "remaining_time": "8:14:07"} +{"current_steps": 278, "total_steps": 485, "loss": 1.0406, "lr": 7.366580777116495e-05, "epoch": 2.843989769820972, "percentage": 57.32, "elapsed_time": "11:00:25", "remaining_time": "8:11:45"} +{"current_steps": 279, "total_steps": 485, "loss": 1.0321, "lr": 7.309134786290583e-05, "epoch": 2.8542199488491047, "percentage": 57.53, "elapsed_time": "11:02:47", "remaining_time": "8:09:22"} +{"current_steps": 280, "total_steps": 485, "loss": 1.0371, "lr": 7.251724664377497e-05, "epoch": 2.864450127877238, "percentage": 57.73, "elapsed_time": "11:05:10", "remaining_time": "8:06:59"} +{"current_steps": 281, "total_steps": 485, "loss": 1.039, "lr": 7.194353392043534e-05, "epoch": 2.874680306905371, "percentage": 57.94, "elapsed_time": "11:07:32", "remaining_time": "8:04:37"} +{"current_steps": 282, "total_steps": 485, "loss": 1.0364, "lr": 7.13702394793797e-05, "epoch": 2.8849104859335037, "percentage": 58.14, "elapsed_time": "11:09:54", "remaining_time": "8:02:14"} +{"current_steps": 283, "total_steps": 485, "loss": 1.0277, "lr": 7.079739308538399e-05, "epoch": 2.895140664961637, "percentage": 58.35, "elapsed_time": "11:12:17", "remaining_time": "7:59:51"} +{"current_steps": 284, "total_steps": 485, "loss": 1.0275, "lr": 7.022502447996215e-05, "epoch": 2.90537084398977, "percentage": 58.56, "elapsed_time": "11:14:39", "remaining_time": "7:57:29"} +{"current_steps": 285, "total_steps": 485, "loss": 1.0381, "lr": 6.965316337982191e-05, "epoch": 2.915601023017903, "percentage": 58.76, "elapsed_time": "11:17:02", "remaining_time": "7:55:06"} +{"current_steps": 286, "total_steps": 485, "loss": 1.0342, "lr": 6.908183947532184e-05, "epoch": 2.9258312020460355, "percentage": 58.97, "elapsed_time": "11:19:24", "remaining_time": "7:52:44"} +{"current_steps": 287, "total_steps": 485, "loss": 1.0377, "lr": 6.851108242893002e-05, "epoch": 2.9360613810741687, "percentage": 59.18, "elapsed_time": "11:21:47", "remaining_time": "7:50:21"} +{"current_steps": 288, "total_steps": 485, "loss": 1.0428, "lr": 6.794092187368387e-05, "epoch": 2.946291560102302, "percentage": 59.38, "elapsed_time": "11:24:10", "remaining_time": "7:47:59"} +{"current_steps": 289, "total_steps": 485, "loss": 1.0503, "lr": 6.737138741165168e-05, "epoch": 2.9565217391304346, "percentage": 59.59, "elapsed_time": "11:26:32", "remaining_time": "7:45:36"} +{"current_steps": 290, "total_steps": 485, "loss": 1.035, "lr": 6.680250861239581e-05, "epoch": 2.9667519181585678, "percentage": 59.79, "elapsed_time": "11:28:55", "remaining_time": "7:43:14"} +{"current_steps": 291, "total_steps": 485, "loss": 1.0313, "lr": 6.623431501143723e-05, "epoch": 2.976982097186701, "percentage": 60.0, "elapsed_time": "11:31:17", "remaining_time": "7:40:51"} +{"current_steps": 292, "total_steps": 485, "loss": 1.0564, "lr": 6.566683610872231e-05, "epoch": 2.9872122762148337, "percentage": 60.21, "elapsed_time": "11:33:40", "remaining_time": "7:38:29"} +{"current_steps": 293, "total_steps": 485, "loss": 1.2037, "lr": 6.510010136709118e-05, "epoch": 2.997442455242967, "percentage": 60.41, "elapsed_time": "11:36:02", "remaining_time": "7:36:06"} +{"current_steps": 294, "total_steps": 485, "loss": 1.1394, "lr": 6.453414021074781e-05, "epoch": 3.0076726342710995, "percentage": 60.62, "elapsed_time": "11:38:34", "remaining_time": "7:33:50"} +{"current_steps": 295, "total_steps": 485, "loss": 1.0223, "lr": 6.396898202373277e-05, "epoch": 3.0179028132992327, "percentage": 60.82, "elapsed_time": "11:40:56", "remaining_time": "7:31:27"} +{"current_steps": 296, "total_steps": 485, "loss": 1.0336, "lr": 6.340465614839714e-05, "epoch": 3.028132992327366, "percentage": 61.03, "elapsed_time": "11:43:19", "remaining_time": "7:29:04"} +{"current_steps": 297, "total_steps": 485, "loss": 1.0107, "lr": 6.284119188387957e-05, "epoch": 3.0383631713554986, "percentage": 61.24, "elapsed_time": "11:45:41", "remaining_time": "7:26:42"} +{"current_steps": 298, "total_steps": 485, "loss": 1.0134, "lr": 6.227861848458481e-05, "epoch": 3.0485933503836318, "percentage": 61.44, "elapsed_time": "11:48:03", "remaining_time": "7:24:19"} +{"current_steps": 299, "total_steps": 485, "loss": 1.0289, "lr": 6.171696515866488e-05, "epoch": 3.0588235294117645, "percentage": 61.65, "elapsed_time": "11:50:26", "remaining_time": "7:21:56"} +{"current_steps": 300, "total_steps": 485, "loss": 1.0297, "lr": 6.115626106650273e-05, "epoch": 3.0690537084398977, "percentage": 61.86, "elapsed_time": "11:52:49", "remaining_time": "7:19:34"} +{"current_steps": 301, "total_steps": 485, "loss": 1.0282, "lr": 6.059653531919823e-05, "epoch": 3.079283887468031, "percentage": 62.06, "elapsed_time": "11:55:11", "remaining_time": "7:17:11"} +{"current_steps": 302, "total_steps": 485, "loss": 1.0531, "lr": 6.0037816977056625e-05, "epoch": 3.0895140664961636, "percentage": 62.27, "elapsed_time": "11:57:34", "remaining_time": "7:14:49"} +{"current_steps": 303, "total_steps": 485, "loss": 1.0113, "lr": 5.9480135048079964e-05, "epoch": 3.0997442455242967, "percentage": 62.47, "elapsed_time": "11:59:56", "remaining_time": "7:12:26"} +{"current_steps": 304, "total_steps": 485, "loss": 1.0394, "lr": 5.892351848646087e-05, "epoch": 3.10997442455243, "percentage": 62.68, "elapsed_time": "12:02:19", "remaining_time": "7:10:04"} +{"current_steps": 305, "total_steps": 485, "loss": 1.0365, "lr": 5.836799619107937e-05, "epoch": 3.1202046035805626, "percentage": 62.89, "elapsed_time": "12:04:41", "remaining_time": "7:07:41"} +{"current_steps": 306, "total_steps": 485, "loss": 1.0039, "lr": 5.781359700400254e-05, "epoch": 3.130434782608696, "percentage": 63.09, "elapsed_time": "12:07:04", "remaining_time": "7:05:18"} +{"current_steps": 307, "total_steps": 485, "loss": 1.0243, "lr": 5.726034970898682e-05, "epoch": 3.1406649616368285, "percentage": 63.3, "elapsed_time": "12:09:26", "remaining_time": "7:02:56"} +{"current_steps": 308, "total_steps": 485, "loss": 1.0314, "lr": 5.670828302998393e-05, "epoch": 3.1508951406649617, "percentage": 63.51, "elapsed_time": "12:11:49", "remaining_time": "7:00:33"} +{"current_steps": 309, "total_steps": 485, "loss": 1.0485, "lr": 5.6157425629649314e-05, "epoch": 3.1611253196930944, "percentage": 63.71, "elapsed_time": "12:14:11", "remaining_time": "6:58:10"} +{"current_steps": 310, "total_steps": 485, "loss": 1.018, "lr": 5.560780610785406e-05, "epoch": 3.1713554987212276, "percentage": 63.92, "elapsed_time": "12:16:34", "remaining_time": "6:55:48"} +{"current_steps": 311, "total_steps": 485, "loss": 1.0061, "lr": 5.5059453000200125e-05, "epoch": 3.1815856777493607, "percentage": 64.12, "elapsed_time": "12:18:56", "remaining_time": "6:53:25"} +{"current_steps": 312, "total_steps": 485, "loss": 1.0205, "lr": 5.451239477653864e-05, "epoch": 3.1918158567774935, "percentage": 64.33, "elapsed_time": "12:21:19", "remaining_time": "6:51:03"} +{"current_steps": 313, "total_steps": 485, "loss": 1.0226, "lr": 5.3966659839491936e-05, "epoch": 3.2020460358056266, "percentage": 64.54, "elapsed_time": "12:23:41", "remaining_time": "6:48:40"} +{"current_steps": 314, "total_steps": 485, "loss": 1.001, "lr": 5.342227652297887e-05, "epoch": 3.21227621483376, "percentage": 64.74, "elapsed_time": "12:26:03", "remaining_time": "6:46:17"} +{"current_steps": 315, "total_steps": 485, "loss": 1.031, "lr": 5.287927309074365e-05, "epoch": 3.2225063938618925, "percentage": 64.95, "elapsed_time": "12:28:25", "remaining_time": "6:43:54"} +{"current_steps": 316, "total_steps": 485, "loss": 1.015, "lr": 5.233767773488859e-05, "epoch": 3.2327365728900257, "percentage": 65.15, "elapsed_time": "12:30:48", "remaining_time": "6:41:32"} +{"current_steps": 317, "total_steps": 485, "loss": 1.0053, "lr": 5.179751857441036e-05, "epoch": 3.2429667519181584, "percentage": 65.36, "elapsed_time": "12:33:10", "remaining_time": "6:39:09"} +{"current_steps": 318, "total_steps": 485, "loss": 1.0211, "lr": 5.1258823653739914e-05, "epoch": 3.2531969309462916, "percentage": 65.57, "elapsed_time": "12:35:32", "remaining_time": "6:36:46"} +{"current_steps": 319, "total_steps": 485, "loss": 1.0143, "lr": 5.0721620941286735e-05, "epoch": 3.2634271099744243, "percentage": 65.77, "elapsed_time": "12:37:54", "remaining_time": "6:34:23"} +{"current_steps": 320, "total_steps": 485, "loss": 1.0375, "lr": 5.018593832798649e-05, "epoch": 3.2736572890025575, "percentage": 65.98, "elapsed_time": "12:40:16", "remaining_time": "6:32:01"} +{"current_steps": 321, "total_steps": 485, "loss": 1.0253, "lr": 4.965180362585315e-05, "epoch": 3.2838874680306906, "percentage": 66.19, "elapsed_time": "12:42:39", "remaining_time": "6:29:38"} +{"current_steps": 322, "total_steps": 485, "loss": 1.0209, "lr": 4.911924456653494e-05, "epoch": 3.2941176470588234, "percentage": 66.39, "elapsed_time": "12:45:01", "remaining_time": "6:27:15"} +{"current_steps": 323, "total_steps": 485, "loss": 1.0112, "lr": 4.8588288799874514e-05, "epoch": 3.3043478260869565, "percentage": 66.6, "elapsed_time": "12:47:23", "remaining_time": "6:24:53"} +{"current_steps": 324, "total_steps": 485, "loss": 1.0077, "lr": 4.805896389247348e-05, "epoch": 3.3145780051150897, "percentage": 66.8, "elapsed_time": "12:49:46", "remaining_time": "6:22:30"} +{"current_steps": 325, "total_steps": 485, "loss": 1.0229, "lr": 4.753129732626116e-05, "epoch": 3.3248081841432224, "percentage": 67.01, "elapsed_time": "12:52:08", "remaining_time": "6:20:08"} +{"current_steps": 326, "total_steps": 485, "loss": 1.0184, "lr": 4.70053164970677e-05, "epoch": 3.3350383631713556, "percentage": 67.22, "elapsed_time": "12:54:31", "remaining_time": "6:17:45"} +{"current_steps": 327, "total_steps": 485, "loss": 1.0058, "lr": 4.6481048713201825e-05, "epoch": 3.3452685421994883, "percentage": 67.42, "elapsed_time": "12:56:53", "remaining_time": "6:15:22"} +{"current_steps": 328, "total_steps": 485, "loss": 1.0278, "lr": 4.595852119403282e-05, "epoch": 3.3554987212276215, "percentage": 67.63, "elapsed_time": "12:59:16", "remaining_time": "6:13:00"} +{"current_steps": 329, "total_steps": 485, "loss": 1.012, "lr": 4.543776106857765e-05, "epoch": 3.3657289002557547, "percentage": 67.84, "elapsed_time": "13:01:38", "remaining_time": "6:10:37"} +{"current_steps": 330, "total_steps": 485, "loss": 1.0242, "lr": 4.491879537409211e-05, "epoch": 3.3759590792838874, "percentage": 68.04, "elapsed_time": "13:04:01", "remaining_time": "6:08:15"} +{"current_steps": 331, "total_steps": 485, "loss": 1.0078, "lr": 4.4401651054667274e-05, "epoch": 3.3861892583120206, "percentage": 68.25, "elapsed_time": "13:06:24", "remaining_time": "6:05:52"} +{"current_steps": 332, "total_steps": 485, "loss": 1.0141, "lr": 4.3886354959830625e-05, "epoch": 3.3964194373401533, "percentage": 68.45, "elapsed_time": "13:08:47", "remaining_time": "6:03:30"} +{"current_steps": 333, "total_steps": 485, "loss": 1.031, "lr": 4.3372933843152e-05, "epoch": 3.4066496163682864, "percentage": 68.66, "elapsed_time": "13:11:10", "remaining_time": "6:01:08"} +{"current_steps": 334, "total_steps": 485, "loss": 1.0261, "lr": 4.2861414360854387e-05, "epoch": 3.4168797953964196, "percentage": 68.87, "elapsed_time": "13:13:32", "remaining_time": "5:58:45"} +{"current_steps": 335, "total_steps": 485, "loss": 1.0191, "lr": 4.2351823070430376e-05, "epoch": 3.4271099744245523, "percentage": 69.07, "elapsed_time": "13:15:55", "remaining_time": "5:56:22"} +{"current_steps": 336, "total_steps": 485, "loss": 1.0267, "lr": 4.184418642926289e-05, "epoch": 3.4373401534526855, "percentage": 69.28, "elapsed_time": "13:18:17", "remaining_time": "5:54:00"} +{"current_steps": 337, "total_steps": 485, "loss": 1.025, "lr": 4.133853079325196e-05, "epoch": 3.4475703324808182, "percentage": 69.48, "elapsed_time": "13:20:39", "remaining_time": "5:51:37"} +{"current_steps": 338, "total_steps": 485, "loss": 1.0459, "lr": 4.083488241544595e-05, "epoch": 3.4578005115089514, "percentage": 69.69, "elapsed_time": "13:23:02", "remaining_time": "5:49:15"} +{"current_steps": 339, "total_steps": 485, "loss": 1.0112, "lr": 4.033326744467882e-05, "epoch": 3.4680306905370846, "percentage": 69.9, "elapsed_time": "13:25:24", "remaining_time": "5:46:52"} +{"current_steps": 340, "total_steps": 485, "loss": 1.0306, "lr": 3.983371192421246e-05, "epoch": 3.4782608695652173, "percentage": 70.1, "elapsed_time": "13:27:47", "remaining_time": "5:44:29"} +{"current_steps": 341, "total_steps": 485, "loss": 1.0185, "lr": 3.933624179038446e-05, "epoch": 3.4884910485933505, "percentage": 70.31, "elapsed_time": "13:30:10", "remaining_time": "5:42:07"} +{"current_steps": 342, "total_steps": 485, "loss": 1.0293, "lr": 3.884088287126151e-05, "epoch": 3.498721227621483, "percentage": 70.52, "elapsed_time": "13:32:33", "remaining_time": "5:39:45"} +{"current_steps": 343, "total_steps": 485, "loss": 1.0458, "lr": 3.834766088529867e-05, "epoch": 3.5089514066496164, "percentage": 70.72, "elapsed_time": "13:34:55", "remaining_time": "5:37:22"} +{"current_steps": 344, "total_steps": 485, "loss": 1.0056, "lr": 3.785660144000378e-05, "epoch": 3.5191815856777495, "percentage": 70.93, "elapsed_time": "13:37:18", "remaining_time": "5:34:59"} +{"current_steps": 345, "total_steps": 485, "loss": 1.0297, "lr": 3.736773003060821e-05, "epoch": 3.5294117647058822, "percentage": 71.13, "elapsed_time": "13:39:40", "remaining_time": "5:32:37"} +{"current_steps": 346, "total_steps": 485, "loss": 1.0416, "lr": 3.688107203874301e-05, "epoch": 3.5396419437340154, "percentage": 71.34, "elapsed_time": "13:42:02", "remaining_time": "5:30:14"} +{"current_steps": 347, "total_steps": 485, "loss": 1.0204, "lr": 3.6396652731121136e-05, "epoch": 3.5498721227621486, "percentage": 71.55, "elapsed_time": "13:44:25", "remaining_time": "5:27:52"} +{"current_steps": 348, "total_steps": 485, "loss": 1.0281, "lr": 3.5914497258225815e-05, "epoch": 3.5601023017902813, "percentage": 71.75, "elapsed_time": "13:46:48", "remaining_time": "5:25:29"} +{"current_steps": 349, "total_steps": 485, "loss": 1.0271, "lr": 3.543463065300452e-05, "epoch": 3.5703324808184145, "percentage": 71.96, "elapsed_time": "13:49:10", "remaining_time": "5:23:07"} +{"current_steps": 350, "total_steps": 485, "loss": 1.0253, "lr": 3.49570778295694e-05, "epoch": 3.580562659846547, "percentage": 72.16, "elapsed_time": "13:51:32", "remaining_time": "5:20:44"} +{"current_steps": 351, "total_steps": 485, "loss": 1.0155, "lr": 3.448186358190383e-05, "epoch": 3.5907928388746804, "percentage": 72.37, "elapsed_time": "13:53:55", "remaining_time": "5:18:22"} +{"current_steps": 352, "total_steps": 485, "loss": 1.0316, "lr": 3.400901258257501e-05, "epoch": 3.601023017902813, "percentage": 72.58, "elapsed_time": "13:56:18", "remaining_time": "5:15:59"} +{"current_steps": 353, "total_steps": 485, "loss": 1.0147, "lr": 3.3538549381453046e-05, "epoch": 3.6112531969309463, "percentage": 72.78, "elapsed_time": "13:58:40", "remaining_time": "5:13:36"} +{"current_steps": 354, "total_steps": 485, "loss": 1.0158, "lr": 3.307049840443644e-05, "epoch": 3.6214833759590794, "percentage": 72.99, "elapsed_time": "14:01:03", "remaining_time": "5:11:14"} +{"current_steps": 355, "total_steps": 485, "loss": 1.0219, "lr": 3.2604883952183716e-05, "epoch": 3.631713554987212, "percentage": 73.2, "elapsed_time": "14:03:25", "remaining_time": "5:08:51"} +{"current_steps": 356, "total_steps": 485, "loss": 1.0165, "lr": 3.214173019885202e-05, "epoch": 3.6419437340153453, "percentage": 73.4, "elapsed_time": "14:05:48", "remaining_time": "5:06:29"} +{"current_steps": 357, "total_steps": 485, "loss": 1.0193, "lr": 3.1681061190841806e-05, "epoch": 3.6521739130434785, "percentage": 73.61, "elapsed_time": "14:08:10", "remaining_time": "5:04:06"} +{"current_steps": 358, "total_steps": 485, "loss": 1.0309, "lr": 3.122290084554845e-05, "epoch": 3.662404092071611, "percentage": 73.81, "elapsed_time": "14:10:33", "remaining_time": "5:01:44"} +{"current_steps": 359, "total_steps": 485, "loss": 1.0106, "lr": 3.076727295012059e-05, "epoch": 3.6726342710997444, "percentage": 74.02, "elapsed_time": "14:12:56", "remaining_time": "4:59:21"} +{"current_steps": 360, "total_steps": 485, "loss": 1.0237, "lr": 3.031420116022493e-05, "epoch": 3.682864450127877, "percentage": 74.23, "elapsed_time": "14:15:20", "remaining_time": "4:56:59"} +{"current_steps": 361, "total_steps": 485, "loss": 1.0071, "lr": 2.98637089988182e-05, "epoch": 3.6930946291560103, "percentage": 74.43, "elapsed_time": "14:17:42", "remaining_time": "4:54:36"} +{"current_steps": 362, "total_steps": 485, "loss": 1.0213, "lr": 2.94158198549259e-05, "epoch": 3.703324808184143, "percentage": 74.64, "elapsed_time": "14:20:06", "remaining_time": "4:52:14"} +{"current_steps": 363, "total_steps": 485, "loss": 1.0114, "lr": 2.8970556982427836e-05, "epoch": 3.713554987212276, "percentage": 74.85, "elapsed_time": "14:22:28", "remaining_time": "4:49:52"} +{"current_steps": 364, "total_steps": 485, "loss": 1.0141, "lr": 2.852794349885087e-05, "epoch": 3.7237851662404093, "percentage": 75.05, "elapsed_time": "14:24:50", "remaining_time": "4:47:29"} +{"current_steps": 365, "total_steps": 485, "loss": 1.0309, "lr": 2.8088002384168783e-05, "epoch": 3.734015345268542, "percentage": 75.26, "elapsed_time": "14:27:13", "remaining_time": "4:45:06"} +{"current_steps": 366, "total_steps": 485, "loss": 1.0133, "lr": 2.765075647960898e-05, "epoch": 3.7442455242966752, "percentage": 75.46, "elapsed_time": "14:29:35", "remaining_time": "4:42:44"} +{"current_steps": 367, "total_steps": 485, "loss": 1.0158, "lr": 2.7216228486466856e-05, "epoch": 3.7544757033248084, "percentage": 75.67, "elapsed_time": "14:31:57", "remaining_time": "4:40:21"} +{"current_steps": 368, "total_steps": 485, "loss": 1.02, "lr": 2.678444096492683e-05, "epoch": 3.764705882352941, "percentage": 75.88, "elapsed_time": "14:34:20", "remaining_time": "4:37:59"} +{"current_steps": 369, "total_steps": 485, "loss": 1.0185, "lr": 2.6355416332891404e-05, "epoch": 3.7749360613810743, "percentage": 76.08, "elapsed_time": "14:36:42", "remaining_time": "4:35:36"} +{"current_steps": 370, "total_steps": 485, "loss": 1.0038, "lr": 2.592917686481708e-05, "epoch": 3.785166240409207, "percentage": 76.29, "elapsed_time": "14:39:05", "remaining_time": "4:33:13"} +{"current_steps": 371, "total_steps": 485, "loss": 1.0376, "lr": 2.5505744690557846e-05, "epoch": 3.79539641943734, "percentage": 76.49, "elapsed_time": "14:41:27", "remaining_time": "4:30:51"} +{"current_steps": 372, "total_steps": 485, "loss": 1.0358, "lr": 2.508514179421629e-05, "epoch": 3.805626598465473, "percentage": 76.7, "elapsed_time": "14:43:49", "remaining_time": "4:28:28"} +{"current_steps": 373, "total_steps": 485, "loss": 1.0211, "lr": 2.4667390013002254e-05, "epoch": 3.815856777493606, "percentage": 76.91, "elapsed_time": "14:46:12", "remaining_time": "4:26:05"} +{"current_steps": 374, "total_steps": 485, "loss": 1.0332, "lr": 2.425251103609898e-05, "epoch": 3.8260869565217392, "percentage": 77.11, "elapsed_time": "14:48:35", "remaining_time": "4:23:43"} +{"current_steps": 375, "total_steps": 485, "loss": 1.0143, "lr": 2.3840526403537095e-05, "epoch": 3.836317135549872, "percentage": 77.32, "elapsed_time": "14:50:57", "remaining_time": "4:21:20"} +{"current_steps": 376, "total_steps": 485, "loss": 1.0173, "lr": 2.3431457505076205e-05, "epoch": 3.846547314578005, "percentage": 77.53, "elapsed_time": "14:53:19", "remaining_time": "4:18:58"} +{"current_steps": 377, "total_steps": 485, "loss": 1.0362, "lr": 2.3025325579094498e-05, "epoch": 3.8567774936061383, "percentage": 77.73, "elapsed_time": "14:55:42", "remaining_time": "4:16:35"} +{"current_steps": 378, "total_steps": 485, "loss": 1.0124, "lr": 2.2622151711485962e-05, "epoch": 3.867007672634271, "percentage": 77.94, "elapsed_time": "14:58:04", "remaining_time": "4:14:12"} +{"current_steps": 379, "total_steps": 485, "loss": 1.0139, "lr": 2.2221956834565647e-05, "epoch": 3.877237851662404, "percentage": 78.14, "elapsed_time": "15:00:26", "remaining_time": "4:11:50"} +{"current_steps": 380, "total_steps": 485, "loss": 1.0523, "lr": 2.1824761725982874e-05, "epoch": 3.887468030690537, "percentage": 78.35, "elapsed_time": "15:02:49", "remaining_time": "4:09:27"} +{"current_steps": 381, "total_steps": 485, "loss": 0.991, "lr": 2.1430587007642513e-05, "epoch": 3.89769820971867, "percentage": 78.56, "elapsed_time": "15:05:11", "remaining_time": "4:07:05"} +{"current_steps": 382, "total_steps": 485, "loss": 1.0355, "lr": 2.1039453144634364e-05, "epoch": 3.907928388746803, "percentage": 78.76, "elapsed_time": "15:07:34", "remaining_time": "4:04:42"} +{"current_steps": 383, "total_steps": 485, "loss": 1.015, "lr": 2.0651380444170527e-05, "epoch": 3.918158567774936, "percentage": 78.97, "elapsed_time": "15:09:56", "remaining_time": "4:02:20"} +{"current_steps": 384, "total_steps": 485, "loss": 1.0229, "lr": 2.026638905453111e-05, "epoch": 3.928388746803069, "percentage": 79.18, "elapsed_time": "15:12:19", "remaining_time": "3:59:57"} +{"current_steps": 385, "total_steps": 485, "loss": 1.0135, "lr": 1.9884498964018233e-05, "epoch": 3.938618925831202, "percentage": 79.38, "elapsed_time": "15:14:41", "remaining_time": "3:57:35"} +{"current_steps": 386, "total_steps": 485, "loss": 1.0044, "lr": 1.9505729999918194e-05, "epoch": 3.948849104859335, "percentage": 79.59, "elapsed_time": "15:17:04", "remaining_time": "3:55:12"} +{"current_steps": 387, "total_steps": 485, "loss": 1.0103, "lr": 1.913010182747196e-05, "epoch": 3.959079283887468, "percentage": 79.79, "elapsed_time": "15:19:26", "remaining_time": "3:52:49"} +{"current_steps": 388, "total_steps": 485, "loss": 1.0491, "lr": 1.875763394885441e-05, "epoch": 3.969309462915601, "percentage": 80.0, "elapsed_time": "15:21:49", "remaining_time": "3:50:27"} +{"current_steps": 389, "total_steps": 485, "loss": 1.0355, "lr": 1.8388345702161556e-05, "epoch": 3.979539641943734, "percentage": 80.21, "elapsed_time": "15:24:11", "remaining_time": "3:48:04"} +{"current_steps": 390, "total_steps": 485, "loss": 1.021, "lr": 1.8022256260406756e-05, "epoch": 3.9897698209718673, "percentage": 80.41, "elapsed_time": "15:26:34", "remaining_time": "3:45:42"} +{"current_steps": 391, "total_steps": 485, "loss": 1.288, "lr": 1.765938463052506e-05, "epoch": 4.0, "percentage": 80.62, "elapsed_time": "15:28:56", "remaining_time": "3:43:19"} +{"current_steps": 392, "total_steps": 485, "loss": 1.001, "lr": 1.729974965238651e-05, "epoch": 4.010230179028133, "percentage": 80.82, "elapsed_time": "15:31:27", "remaining_time": "3:40:59"} +{"current_steps": 393, "total_steps": 485, "loss": 1.015, "lr": 1.6943369997818066e-05, "epoch": 4.020460358056266, "percentage": 81.03, "elapsed_time": "15:33:51", "remaining_time": "3:38:36"} +{"current_steps": 394, "total_steps": 485, "loss": 1.0076, "lr": 1.659026416963401e-05, "epoch": 4.030690537084399, "percentage": 81.24, "elapsed_time": "15:36:13", "remaining_time": "3:36:14"} +{"current_steps": 395, "total_steps": 485, "loss": 1.0148, "lr": 1.6240450500675393e-05, "epoch": 4.040920716112532, "percentage": 81.44, "elapsed_time": "15:38:36", "remaining_time": "3:33:51"} +{"current_steps": 396, "total_steps": 485, "loss": 1.0016, "lr": 1.5893947152858285e-05, "epoch": 4.051150895140665, "percentage": 81.65, "elapsed_time": "15:40:58", "remaining_time": "3:31:28"} +{"current_steps": 397, "total_steps": 485, "loss": 1.0043, "lr": 1.55507721162307e-05, "epoch": 4.061381074168798, "percentage": 81.86, "elapsed_time": "15:43:20", "remaining_time": "3:29:06"} +{"current_steps": 398, "total_steps": 485, "loss": 1.0288, "lr": 1.5210943208038634e-05, "epoch": 4.071611253196931, "percentage": 82.06, "elapsed_time": "15:45:44", "remaining_time": "3:26:43"} +{"current_steps": 399, "total_steps": 485, "loss": 1.0302, "lr": 1.4874478071801055e-05, "epoch": 4.081841432225064, "percentage": 82.27, "elapsed_time": "15:48:06", "remaining_time": "3:24:21"} +{"current_steps": 400, "total_steps": 485, "loss": 1.0021, "lr": 1.454139417639377e-05, "epoch": 4.092071611253197, "percentage": 82.47, "elapsed_time": "15:50:29", "remaining_time": "3:21:58"} +{"current_steps": 401, "total_steps": 485, "loss": 1.008, "lr": 1.4211708815142599e-05, "epoch": 4.10230179028133, "percentage": 82.68, "elapsed_time": "15:52:51", "remaining_time": "3:19:36"} +{"current_steps": 402, "total_steps": 485, "loss": 1.0082, "lr": 1.3885439104925387e-05, "epoch": 4.112531969309463, "percentage": 82.89, "elapsed_time": "15:55:13", "remaining_time": "3:17:13"} +{"current_steps": 403, "total_steps": 485, "loss": 1.0103, "lr": 1.3562601985283358e-05, "epoch": 4.122762148337596, "percentage": 83.09, "elapsed_time": "15:57:35", "remaining_time": "3:14:50"} +{"current_steps": 404, "total_steps": 485, "loss": 1.0186, "lr": 1.3243214217541751e-05, "epoch": 4.132992327365729, "percentage": 83.3, "elapsed_time": "15:59:57", "remaining_time": "3:12:28"} +{"current_steps": 405, "total_steps": 485, "loss": 1.0103, "lr": 1.2927292383939407e-05, "epoch": 4.143222506393862, "percentage": 83.51, "elapsed_time": "16:02:20", "remaining_time": "3:10:05"} +{"current_steps": 406, "total_steps": 485, "loss": 1.0172, "lr": 1.2614852886767932e-05, "epoch": 4.153452685421995, "percentage": 83.71, "elapsed_time": "16:04:42", "remaining_time": "3:07:42"} +{"current_steps": 407, "total_steps": 485, "loss": 1.0172, "lr": 1.2305911947520159e-05, "epoch": 4.163682864450128, "percentage": 83.92, "elapsed_time": "16:07:04", "remaining_time": "3:05:20"} +{"current_steps": 408, "total_steps": 485, "loss": 1.0254, "lr": 1.2000485606047837e-05, "epoch": 4.173913043478261, "percentage": 84.12, "elapsed_time": "16:09:27", "remaining_time": "3:02:57"} +{"current_steps": 409, "total_steps": 485, "loss": 1.025, "lr": 1.1698589719728911e-05, "epoch": 4.1841432225063935, "percentage": 84.33, "elapsed_time": "16:11:49", "remaining_time": "3:00:35"} +{"current_steps": 410, "total_steps": 485, "loss": 1.011, "lr": 1.1400239962644294e-05, "epoch": 4.194373401534527, "percentage": 84.54, "elapsed_time": "16:14:12", "remaining_time": "2:58:12"} +{"current_steps": 411, "total_steps": 485, "loss": 1.0064, "lr": 1.1105451824763933e-05, "epoch": 4.20460358056266, "percentage": 84.74, "elapsed_time": "16:16:34", "remaining_time": "2:55:49"} +{"current_steps": 412, "total_steps": 485, "loss": 1.0049, "lr": 1.0814240611142765e-05, "epoch": 4.2148337595907925, "percentage": 84.95, "elapsed_time": "16:18:56", "remaining_time": "2:53:27"} +{"current_steps": 413, "total_steps": 485, "loss": 1.0039, "lr": 1.0526621441125946e-05, "epoch": 4.225063938618926, "percentage": 85.15, "elapsed_time": "16:21:18", "remaining_time": "2:51:04"} +{"current_steps": 414, "total_steps": 485, "loss": 1.0204, "lr": 1.0242609247563924e-05, "epoch": 4.235294117647059, "percentage": 85.36, "elapsed_time": "16:23:41", "remaining_time": "2:48:42"} +{"current_steps": 415, "total_steps": 485, "loss": 1.0178, "lr": 9.962218776037234e-06, "epoch": 4.245524296675192, "percentage": 85.57, "elapsed_time": "16:26:04", "remaining_time": "2:46:19"} +{"current_steps": 416, "total_steps": 485, "loss": 1.0144, "lr": 9.68546458409077e-06, "epoch": 4.255754475703325, "percentage": 85.77, "elapsed_time": "16:28:26", "remaining_time": "2:43:56"} +{"current_steps": 417, "total_steps": 485, "loss": 1.0008, "lr": 9.41236104047806e-06, "epoch": 4.265984654731458, "percentage": 85.98, "elapsed_time": "16:30:49", "remaining_time": "2:41:34"} +{"current_steps": 418, "total_steps": 485, "loss": 1.0102, "lr": 9.14292232441528e-06, "epoch": 4.276214833759591, "percentage": 86.19, "elapsed_time": "16:33:12", "remaining_time": "2:39:11"} +{"current_steps": 419, "total_steps": 485, "loss": 0.997, "lr": 8.877162424845012e-06, "epoch": 4.286445012787723, "percentage": 86.39, "elapsed_time": "16:35:34", "remaining_time": "2:36:49"} +{"current_steps": 420, "total_steps": 485, "loss": 1.0204, "lr": 8.615095139710044e-06, "epoch": 4.296675191815857, "percentage": 86.6, "elapsed_time": "16:37:56", "remaining_time": "2:34:26"} +{"current_steps": 421, "total_steps": 485, "loss": 1.0286, "lr": 8.356734075236858e-06, "epoch": 4.30690537084399, "percentage": 86.8, "elapsed_time": "16:40:19", "remaining_time": "2:32:04"} +{"current_steps": 422, "total_steps": 485, "loss": 0.9999, "lr": 8.102092645229392e-06, "epoch": 4.3171355498721224, "percentage": 87.01, "elapsed_time": "16:42:44", "remaining_time": "2:29:41"} +{"current_steps": 423, "total_steps": 485, "loss": 1.0157, "lr": 7.8511840703725e-06, "epoch": 4.327365728900256, "percentage": 87.22, "elapsed_time": "16:45:06", "remaining_time": "2:27:19"} +{"current_steps": 424, "total_steps": 485, "loss": 1.0177, "lr": 7.604021377545518e-06, "epoch": 4.337595907928389, "percentage": 87.42, "elapsed_time": "16:47:29", "remaining_time": "2:24:56"} +{"current_steps": 425, "total_steps": 485, "loss": 1.0053, "lr": 7.36061739914601e-06, "epoch": 4.3478260869565215, "percentage": 87.63, "elapsed_time": "16:49:51", "remaining_time": "2:22:34"} +{"current_steps": 426, "total_steps": 485, "loss": 1.0116, "lr": 7.120984772423507e-06, "epoch": 4.358056265984655, "percentage": 87.84, "elapsed_time": "16:52:14", "remaining_time": "2:20:11"} +{"current_steps": 427, "total_steps": 485, "loss": 1.0157, "lr": 6.88513593882334e-06, "epoch": 4.368286445012788, "percentage": 88.04, "elapsed_time": "16:54:36", "remaining_time": "2:17:48"} +{"current_steps": 428, "total_steps": 485, "loss": 1.0321, "lr": 6.653083143340748e-06, "epoch": 4.378516624040921, "percentage": 88.25, "elapsed_time": "16:56:58", "remaining_time": "2:15:26"} +{"current_steps": 429, "total_steps": 485, "loss": 1.0166, "lr": 6.4248384338851146e-06, "epoch": 4.388746803069053, "percentage": 88.45, "elapsed_time": "16:59:21", "remaining_time": "2:13:03"} +{"current_steps": 430, "total_steps": 485, "loss": 1.0155, "lr": 6.2004136606544515e-06, "epoch": 4.398976982097187, "percentage": 88.66, "elapsed_time": "17:01:43", "remaining_time": "2:10:41"} +{"current_steps": 431, "total_steps": 485, "loss": 1.0268, "lr": 5.979820475520202e-06, "epoch": 4.40920716112532, "percentage": 88.87, "elapsed_time": "17:04:05", "remaining_time": "2:08:18"} +{"current_steps": 432, "total_steps": 485, "loss": 1.0094, "lr": 5.763070331422151e-06, "epoch": 4.419437340153452, "percentage": 89.07, "elapsed_time": "17:06:28", "remaining_time": "2:05:55"} +{"current_steps": 433, "total_steps": 485, "loss": 1.0117, "lr": 5.550174481773969e-06, "epoch": 4.429667519181586, "percentage": 89.28, "elapsed_time": "17:08:50", "remaining_time": "2:03:33"} +{"current_steps": 434, "total_steps": 485, "loss": 1.024, "lr": 5.341143979878851e-06, "epoch": 4.439897698209719, "percentage": 89.48, "elapsed_time": "17:11:13", "remaining_time": "2:01:10"} +{"current_steps": 435, "total_steps": 485, "loss": 1.0068, "lr": 5.135989678355664e-06, "epoch": 4.450127877237851, "percentage": 89.69, "elapsed_time": "17:13:35", "remaining_time": "1:58:48"} +{"current_steps": 436, "total_steps": 485, "loss": 1.0144, "lr": 4.934722228575481e-06, "epoch": 4.460358056265985, "percentage": 89.9, "elapsed_time": "17:15:57", "remaining_time": "1:56:25"} +{"current_steps": 437, "total_steps": 485, "loss": 1.0149, "lr": 4.7373520801085705e-06, "epoch": 4.470588235294118, "percentage": 90.1, "elapsed_time": "17:18:20", "remaining_time": "1:54:03"} +{"current_steps": 438, "total_steps": 485, "loss": 1.0209, "lr": 4.543889480181944e-06, "epoch": 4.4808184143222505, "percentage": 90.31, "elapsed_time": "17:20:43", "remaining_time": "1:51:40"} +{"current_steps": 439, "total_steps": 485, "loss": 1.0229, "lr": 4.354344473147194e-06, "epoch": 4.491048593350383, "percentage": 90.52, "elapsed_time": "17:23:05", "remaining_time": "1:49:17"} +{"current_steps": 440, "total_steps": 485, "loss": 1.0093, "lr": 4.1687268999591164e-06, "epoch": 4.501278772378517, "percentage": 90.72, "elapsed_time": "17:25:27", "remaining_time": "1:46:55"} +{"current_steps": 441, "total_steps": 485, "loss": 1.0227, "lr": 3.98704639766474e-06, "epoch": 4.5115089514066495, "percentage": 90.93, "elapsed_time": "17:27:50", "remaining_time": "1:44:32"} +{"current_steps": 442, "total_steps": 485, "loss": 1.0206, "lr": 3.809312398903e-06, "epoch": 4.521739130434782, "percentage": 91.13, "elapsed_time": "17:30:12", "remaining_time": "1:42:10"} +{"current_steps": 443, "total_steps": 485, "loss": 1.0061, "lr": 3.6355341314149216e-06, "epoch": 4.531969309462916, "percentage": 91.34, "elapsed_time": "17:32:34", "remaining_time": "1:39:47"} +{"current_steps": 444, "total_steps": 485, "loss": 1.0001, "lr": 3.465720617564676e-06, "epoch": 4.542199488491049, "percentage": 91.55, "elapsed_time": "17:34:57", "remaining_time": "1:37:25"} +{"current_steps": 445, "total_steps": 485, "loss": 1.0179, "lr": 3.299880673871023e-06, "epoch": 4.552429667519181, "percentage": 91.75, "elapsed_time": "17:37:20", "remaining_time": "1:35:02"} +{"current_steps": 446, "total_steps": 485, "loss": 1.0261, "lr": 3.138022910549632e-06, "epoch": 4.562659846547315, "percentage": 91.96, "elapsed_time": "17:39:42", "remaining_time": "1:32:39"} +{"current_steps": 447, "total_steps": 485, "loss": 0.9983, "lr": 2.980155731066017e-06, "epoch": 4.572890025575448, "percentage": 92.16, "elapsed_time": "17:42:05", "remaining_time": "1:30:17"} +{"current_steps": 448, "total_steps": 485, "loss": 1.0232, "lr": 2.8262873316992556e-06, "epoch": 4.58312020460358, "percentage": 92.37, "elapsed_time": "17:44:27", "remaining_time": "1:27:54"} +{"current_steps": 449, "total_steps": 485, "loss": 1.0065, "lr": 2.676425701116463e-06, "epoch": 4.593350383631714, "percentage": 92.58, "elapsed_time": "17:46:49", "remaining_time": "1:25:32"} +{"current_steps": 450, "total_steps": 485, "loss": 1.0117, "lr": 2.530578619957993e-06, "epoch": 4.603580562659847, "percentage": 92.78, "elapsed_time": "17:49:12", "remaining_time": "1:23:09"} +{"current_steps": 451, "total_steps": 485, "loss": 0.9904, "lr": 2.3887536604334784e-06, "epoch": 4.6138107416879794, "percentage": 92.99, "elapsed_time": "17:51:34", "remaining_time": "1:20:47"} +{"current_steps": 452, "total_steps": 485, "loss": 1.018, "lr": 2.2509581859287576e-06, "epoch": 4.624040920716112, "percentage": 93.2, "elapsed_time": "17:53:57", "remaining_time": "1:18:24"} +{"current_steps": 453, "total_steps": 485, "loss": 1.0224, "lr": 2.117199350623462e-06, "epoch": 4.634271099744246, "percentage": 93.4, "elapsed_time": "17:56:19", "remaining_time": "1:16:01"} +{"current_steps": 454, "total_steps": 485, "loss": 1.0256, "lr": 1.987484099119712e-06, "epoch": 4.6445012787723785, "percentage": 93.61, "elapsed_time": "17:58:42", "remaining_time": "1:13:39"} +{"current_steps": 455, "total_steps": 485, "loss": 1.0126, "lr": 1.8618191660814356e-06, "epoch": 4.654731457800511, "percentage": 93.81, "elapsed_time": "18:01:04", "remaining_time": "1:11:16"} +{"current_steps": 456, "total_steps": 485, "loss": 1.0064, "lr": 1.7402110758847834e-06, "epoch": 4.664961636828645, "percentage": 94.02, "elapsed_time": "18:03:26", "remaining_time": "1:08:54"} +{"current_steps": 457, "total_steps": 485, "loss": 1.0015, "lr": 1.6226661422794033e-06, "epoch": 4.675191815856778, "percentage": 94.23, "elapsed_time": "18:05:48", "remaining_time": "1:06:31"} +{"current_steps": 458, "total_steps": 485, "loss": 1.0195, "lr": 1.5091904680605862e-06, "epoch": 4.68542199488491, "percentage": 94.43, "elapsed_time": "18:08:12", "remaining_time": "1:04:09"} +{"current_steps": 459, "total_steps": 485, "loss": 1.0313, "lr": 1.3997899447524277e-06, "epoch": 4.695652173913043, "percentage": 94.64, "elapsed_time": "18:10:34", "remaining_time": "1:01:46"} +{"current_steps": 460, "total_steps": 485, "loss": 1.0074, "lr": 1.294470252302009e-06, "epoch": 4.705882352941177, "percentage": 94.85, "elapsed_time": "18:12:57", "remaining_time": "0:59:23"} +{"current_steps": 461, "total_steps": 485, "loss": 1.0073, "lr": 1.193236858784408e-06, "epoch": 4.716112531969309, "percentage": 95.05, "elapsed_time": "18:15:19", "remaining_time": "0:57:01"} +{"current_steps": 462, "total_steps": 485, "loss": 1.0217, "lr": 1.0960950201188524e-06, "epoch": 4.726342710997442, "percentage": 95.26, "elapsed_time": "18:17:41", "remaining_time": "0:54:38"} +{"current_steps": 463, "total_steps": 485, "loss": 1.0167, "lr": 1.003049779795866e-06, "epoch": 4.736572890025576, "percentage": 95.46, "elapsed_time": "18:20:04", "remaining_time": "0:52:16"} +{"current_steps": 464, "total_steps": 485, "loss": 1.0207, "lr": 9.141059686153419e-07, "epoch": 4.746803069053708, "percentage": 95.67, "elapsed_time": "18:22:26", "remaining_time": "0:49:53"} +{"current_steps": 465, "total_steps": 485, "loss": 1.0169, "lr": 8.292682044358114e-07, "epoch": 4.757033248081841, "percentage": 95.88, "elapsed_time": "18:24:49", "remaining_time": "0:47:31"} +{"current_steps": 466, "total_steps": 485, "loss": 1.0276, "lr": 7.485408919346171e-07, "epoch": 4.767263427109975, "percentage": 96.08, "elapsed_time": "18:27:11", "remaining_time": "0:45:08"} +{"current_steps": 467, "total_steps": 485, "loss": 1.0108, "lr": 6.719282223793056e-07, "epoch": 4.7774936061381075, "percentage": 96.29, "elapsed_time": "18:29:34", "remaining_time": "0:42:46"} +{"current_steps": 468, "total_steps": 485, "loss": 1.0213, "lr": 5.994341734099429e-07, "epoch": 4.78772378516624, "percentage": 96.49, "elapsed_time": "18:31:56", "remaining_time": "0:40:23"} +{"current_steps": 469, "total_steps": 485, "loss": 0.9962, "lr": 5.310625088326671e-07, "epoch": 4.797953964194374, "percentage": 96.7, "elapsed_time": "18:34:18", "remaining_time": "0:38:00"} +{"current_steps": 470, "total_steps": 485, "loss": 1.0079, "lr": 4.6681677842421724e-07, "epoch": 4.8081841432225065, "percentage": 96.91, "elapsed_time": "18:36:40", "remaining_time": "0:35:38"} +{"current_steps": 471, "total_steps": 485, "loss": 1.0025, "lr": 4.067003177476991e-07, "epoch": 4.818414322250639, "percentage": 97.11, "elapsed_time": "18:39:03", "remaining_time": "0:33:15"} +{"current_steps": 472, "total_steps": 485, "loss": 1.0173, "lr": 3.507162479793369e-07, "epoch": 4.828644501278772, "percentage": 97.32, "elapsed_time": "18:41:25", "remaining_time": "0:30:53"} +{"current_steps": 473, "total_steps": 485, "loss": 1.0001, "lr": 2.9886747574646936e-07, "epoch": 4.838874680306906, "percentage": 97.53, "elapsed_time": "18:43:47", "remaining_time": "0:28:30"} +{"current_steps": 474, "total_steps": 485, "loss": 1.0062, "lr": 2.511566929766396e-07, "epoch": 4.849104859335038, "percentage": 97.73, "elapsed_time": "18:46:10", "remaining_time": "0:26:08"} +{"current_steps": 475, "total_steps": 485, "loss": 1.0195, "lr": 2.075863767577957e-07, "epoch": 4.859335038363171, "percentage": 97.94, "elapsed_time": "18:48:32", "remaining_time": "0:23:45"} +{"current_steps": 476, "total_steps": 485, "loss": 1.0159, "lr": 1.681587892097536e-07, "epoch": 4.869565217391305, "percentage": 98.14, "elapsed_time": "18:50:54", "remaining_time": "0:21:22"} +{"current_steps": 477, "total_steps": 485, "loss": 1.0233, "lr": 1.3287597736667323e-07, "epoch": 4.879795396419437, "percentage": 98.35, "elapsed_time": "18:53:16", "remaining_time": "0:19:00"} +{"current_steps": 478, "total_steps": 485, "loss": 1.0188, "lr": 1.0173977307082361e-07, "epoch": 4.89002557544757, "percentage": 98.56, "elapsed_time": "18:55:38", "remaining_time": "0:16:37"} +{"current_steps": 479, "total_steps": 485, "loss": 1.0235, "lr": 7.475179287748547e-08, "epoch": 4.900255754475703, "percentage": 98.76, "elapsed_time": "18:58:01", "remaining_time": "0:14:15"} +{"current_steps": 480, "total_steps": 485, "loss": 1.0018, "lr": 5.191343797096515e-08, "epoch": 4.910485933503836, "percentage": 98.97, "elapsed_time": "19:00:23", "remaining_time": "0:11:52"} +{"current_steps": 481, "total_steps": 485, "loss": 1.009, "lr": 3.322589409190613e-08, "epoch": 4.920716112531969, "percentage": 99.18, "elapsed_time": "19:02:45", "remaining_time": "0:09:30"} +{"current_steps": 482, "total_steps": 485, "loss": 1.0273, "lr": 1.8690131475711527e-08, "epoch": 4.930946291560103, "percentage": 99.38, "elapsed_time": "19:05:08", "remaining_time": "0:07:07"} +{"current_steps": 483, "total_steps": 485, "loss": 1.0322, "lr": 8.306904802148907e-09, "epoch": 4.9411764705882355, "percentage": 99.59, "elapsed_time": "19:07:30", "remaining_time": "0:04:45"} +{"current_steps": 484, "total_steps": 485, "loss": 1.0035, "lr": 2.07675315618161e-09, "epoch": 4.951406649616368, "percentage": 99.79, "elapsed_time": "19:09:52", "remaining_time": "0:02:22"} +{"current_steps": 485, "total_steps": 485, "loss": 1.0124, "lr": 0.0, "epoch": 4.961636828644501, "percentage": 100.0, "elapsed_time": "19:12:15", "remaining_time": "0:00:00"} +{"current_steps": 485, "total_steps": 485, "epoch": 4.961636828644501, "percentage": 100.0, "elapsed_time": "19:12:30", "remaining_time": "0:00:00"} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..d451855 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,3437 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.961636828644501, + "eval_steps": 500, + "global_step": 485, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010230179028132993, + "grad_norm": 2.9682868161196363, + "learning_rate": 3.2653061224489794e-06, + "loss": 1.4918, + "step": 1 + }, + { + "epoch": 0.020460358056265986, + "grad_norm": 2.9796653677109557, + "learning_rate": 6.530612244897959e-06, + "loss": 1.4922, + "step": 2 + }, + { + "epoch": 0.030690537084398978, + "grad_norm": 2.8660911829024167, + "learning_rate": 9.795918367346939e-06, + "loss": 1.503, + "step": 3 + }, + { + "epoch": 0.04092071611253197, + "grad_norm": 1.9805780511052813, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.4675, + "step": 4 + }, + { + "epoch": 0.05115089514066496, + "grad_norm": 1.998834758427541, + "learning_rate": 1.63265306122449e-05, + "loss": 1.4533, + "step": 5 + }, + { + "epoch": 0.061381074168797956, + "grad_norm": 2.050049149995988, + "learning_rate": 1.9591836734693877e-05, + "loss": 1.4122, + "step": 6 + }, + { + "epoch": 0.07161125319693094, + "grad_norm": 2.399845329221154, + "learning_rate": 2.2857142857142858e-05, + "loss": 1.4056, + "step": 7 + }, + { + "epoch": 0.08184143222506395, + "grad_norm": 1.7232075850783724, + "learning_rate": 2.6122448979591835e-05, + "loss": 1.3833, + "step": 8 + }, + { + "epoch": 0.09207161125319693, + "grad_norm": 1.410904151553637, + "learning_rate": 2.938775510204082e-05, + "loss": 1.3843, + "step": 9 + }, + { + "epoch": 0.10230179028132992, + "grad_norm": 1.1900105733063466, + "learning_rate": 3.26530612244898e-05, + "loss": 1.3515, + "step": 10 + }, + { + "epoch": 0.11253196930946291, + "grad_norm": 1.2160713923639859, + "learning_rate": 3.591836734693878e-05, + "loss": 1.3224, + "step": 11 + }, + { + "epoch": 0.12276214833759591, + "grad_norm": 1.0389820905559795, + "learning_rate": 3.9183673469387755e-05, + "loss": 1.322, + "step": 12 + }, + { + "epoch": 0.1329923273657289, + "grad_norm": 0.9606384846443378, + "learning_rate": 4.244897959183674e-05, + "loss": 1.2956, + "step": 13 + }, + { + "epoch": 0.1432225063938619, + "grad_norm": 1.032658088356086, + "learning_rate": 4.5714285714285716e-05, + "loss": 1.3067, + "step": 14 + }, + { + "epoch": 0.1534526854219949, + "grad_norm": 1.006086286942022, + "learning_rate": 4.89795918367347e-05, + "loss": 1.2961, + "step": 15 + }, + { + "epoch": 0.1636828644501279, + "grad_norm": 1.1671471070443895, + "learning_rate": 5.224489795918367e-05, + "loss": 1.301, + "step": 16 + }, + { + "epoch": 0.17391304347826086, + "grad_norm": 0.8672174854035504, + "learning_rate": 5.551020408163266e-05, + "loss": 1.2656, + "step": 17 + }, + { + "epoch": 0.18414322250639387, + "grad_norm": 0.7154082384610629, + "learning_rate": 5.877551020408164e-05, + "loss": 1.246, + "step": 18 + }, + { + "epoch": 0.19437340153452684, + "grad_norm": 0.694176256293125, + "learning_rate": 6.204081632653062e-05, + "loss": 1.2565, + "step": 19 + }, + { + "epoch": 0.20460358056265984, + "grad_norm": 0.7538553851540746, + "learning_rate": 6.53061224489796e-05, + "loss": 1.241, + "step": 20 + }, + { + "epoch": 0.21483375959079284, + "grad_norm": 0.7434156335821637, + "learning_rate": 6.857142857142857e-05, + "loss": 1.232, + "step": 21 + }, + { + "epoch": 0.22506393861892582, + "grad_norm": 1.2022520877750795, + "learning_rate": 7.183673469387756e-05, + "loss": 1.2567, + "step": 22 + }, + { + "epoch": 0.23529411764705882, + "grad_norm": 1.150528684217217, + "learning_rate": 7.510204081632654e-05, + "loss": 1.2448, + "step": 23 + }, + { + "epoch": 0.24552429667519182, + "grad_norm": 0.8365697619839101, + "learning_rate": 7.836734693877551e-05, + "loss": 1.2357, + "step": 24 + }, + { + "epoch": 0.2557544757033248, + "grad_norm": 1.1469314213901645, + "learning_rate": 8.16326530612245e-05, + "loss": 1.2164, + "step": 25 + }, + { + "epoch": 0.2659846547314578, + "grad_norm": 0.9677681397432715, + "learning_rate": 8.489795918367348e-05, + "loss": 1.2115, + "step": 26 + }, + { + "epoch": 0.27621483375959077, + "grad_norm": 1.106826210998024, + "learning_rate": 8.816326530612245e-05, + "loss": 1.2004, + "step": 27 + }, + { + "epoch": 0.2864450127877238, + "grad_norm": 1.1744803496009477, + "learning_rate": 9.142857142857143e-05, + "loss": 1.2235, + "step": 28 + }, + { + "epoch": 0.2966751918158568, + "grad_norm": 1.3610877277166293, + "learning_rate": 9.469387755102041e-05, + "loss": 1.208, + "step": 29 + }, + { + "epoch": 0.3069053708439898, + "grad_norm": 0.7447757731907216, + "learning_rate": 9.79591836734694e-05, + "loss": 1.2118, + "step": 30 + }, + { + "epoch": 0.3171355498721228, + "grad_norm": 1.3296162158652725, + "learning_rate": 0.00010122448979591839, + "loss": 1.2032, + "step": 31 + }, + { + "epoch": 0.3273657289002558, + "grad_norm": 1.144264777414957, + "learning_rate": 0.00010448979591836734, + "loss": 1.1912, + "step": 32 + }, + { + "epoch": 0.3375959079283887, + "grad_norm": 1.0342650793984924, + "learning_rate": 0.00010775510204081634, + "loss": 1.2025, + "step": 33 + }, + { + "epoch": 0.34782608695652173, + "grad_norm": 1.9336445124108894, + "learning_rate": 0.00011102040816326532, + "loss": 1.2118, + "step": 34 + }, + { + "epoch": 0.35805626598465473, + "grad_norm": 0.9195357493046576, + "learning_rate": 0.0001142857142857143, + "loss": 1.1986, + "step": 35 + }, + { + "epoch": 0.36828644501278773, + "grad_norm": 1.588300333871633, + "learning_rate": 0.00011755102040816328, + "loss": 1.1939, + "step": 36 + }, + { + "epoch": 0.37851662404092073, + "grad_norm": 1.4710948512542918, + "learning_rate": 0.00012081632653061224, + "loss": 1.1997, + "step": 37 + }, + { + "epoch": 0.3887468030690537, + "grad_norm": 1.2164757524877552, + "learning_rate": 0.00012408163265306124, + "loss": 1.1791, + "step": 38 + }, + { + "epoch": 0.3989769820971867, + "grad_norm": 1.4375602427525511, + "learning_rate": 0.0001273469387755102, + "loss": 1.1896, + "step": 39 + }, + { + "epoch": 0.4092071611253197, + "grad_norm": 1.5736548612793597, + "learning_rate": 0.0001306122448979592, + "loss": 1.1876, + "step": 40 + }, + { + "epoch": 0.4194373401534527, + "grad_norm": 1.0735537900578096, + "learning_rate": 0.00013387755102040817, + "loss": 1.1709, + "step": 41 + }, + { + "epoch": 0.4296675191815857, + "grad_norm": 1.8510870420688348, + "learning_rate": 0.00013714285714285713, + "loss": 1.1795, + "step": 42 + }, + { + "epoch": 0.4398976982097187, + "grad_norm": 1.1080619334489261, + "learning_rate": 0.00014040816326530613, + "loss": 1.1753, + "step": 43 + }, + { + "epoch": 0.45012787723785164, + "grad_norm": 1.9521299298489614, + "learning_rate": 0.00014367346938775512, + "loss": 1.2028, + "step": 44 + }, + { + "epoch": 0.46035805626598464, + "grad_norm": 1.41657060512908, + "learning_rate": 0.0001469387755102041, + "loss": 1.1822, + "step": 45 + }, + { + "epoch": 0.47058823529411764, + "grad_norm": 1.1977389520948274, + "learning_rate": 0.00015020408163265308, + "loss": 1.1836, + "step": 46 + }, + { + "epoch": 0.48081841432225064, + "grad_norm": 1.5288668435257222, + "learning_rate": 0.00015346938775510205, + "loss": 1.1901, + "step": 47 + }, + { + "epoch": 0.49104859335038364, + "grad_norm": 0.9552067161901755, + "learning_rate": 0.00015673469387755102, + "loss": 1.1562, + "step": 48 + }, + { + "epoch": 0.5012787723785166, + "grad_norm": 1.6547871414680237, + "learning_rate": 0.00016, + "loss": 1.1794, + "step": 49 + }, + { + "epoch": 0.5115089514066496, + "grad_norm": 1.8368364441109388, + "learning_rate": 0.00015999792324684382, + "loss": 1.1877, + "step": 50 + }, + { + "epoch": 0.5217391304347826, + "grad_norm": 1.1718896076543888, + "learning_rate": 0.00015999169309519789, + "loss": 1.1585, + "step": 51 + }, + { + "epoch": 0.5319693094629157, + "grad_norm": 2.015243385621693, + "learning_rate": 0.0001599813098685243, + "loss": 1.1732, + "step": 52 + }, + { + "epoch": 0.5421994884910486, + "grad_norm": 1.638588399609178, + "learning_rate": 0.0001599667741059081, + "loss": 1.1617, + "step": 53 + }, + { + "epoch": 0.5524296675191815, + "grad_norm": 1.1352479883236335, + "learning_rate": 0.00015994808656202904, + "loss": 1.1584, + "step": 54 + }, + { + "epoch": 0.5626598465473146, + "grad_norm": 1.2074962340041349, + "learning_rate": 0.00015992524820712252, + "loss": 1.1478, + "step": 55 + }, + { + "epoch": 0.5728900255754475, + "grad_norm": 1.4410034215063858, + "learning_rate": 0.00015989826022692918, + "loss": 1.1493, + "step": 56 + }, + { + "epoch": 0.5831202046035806, + "grad_norm": 0.8679551934822172, + "learning_rate": 0.00015986712402263334, + "loss": 1.148, + "step": 57 + }, + { + "epoch": 0.5933503836317136, + "grad_norm": 1.3034239933412972, + "learning_rate": 0.00015983184121079024, + "loss": 1.1532, + "step": 58 + }, + { + "epoch": 0.6035805626598465, + "grad_norm": 0.9780209051309727, + "learning_rate": 0.00015979241362324223, + "loss": 1.1313, + "step": 59 + }, + { + "epoch": 0.6138107416879796, + "grad_norm": 1.057690857573897, + "learning_rate": 0.0001597488433070234, + "loss": 1.138, + "step": 60 + }, + { + "epoch": 0.6240409207161125, + "grad_norm": 1.456389761257225, + "learning_rate": 0.00015970113252425356, + "loss": 1.1546, + "step": 61 + }, + { + "epoch": 0.6342710997442456, + "grad_norm": 2.3692148862526485, + "learning_rate": 0.00015964928375202068, + "loss": 1.163, + "step": 62 + }, + { + "epoch": 0.6445012787723785, + "grad_norm": 1.047447123295706, + "learning_rate": 0.00015959329968225232, + "loss": 1.1564, + "step": 63 + }, + { + "epoch": 0.6547314578005116, + "grad_norm": 3.5383718747156534, + "learning_rate": 0.0001595331832215758, + "loss": 1.1684, + "step": 64 + }, + { + "epoch": 0.6649616368286445, + "grad_norm": 2.968800505278846, + "learning_rate": 0.00015946893749116734, + "loss": 1.1788, + "step": 65 + }, + { + "epoch": 0.6751918158567775, + "grad_norm": 1.9185023523212743, + "learning_rate": 0.00015940056582659006, + "loss": 1.1537, + "step": 66 + }, + { + "epoch": 0.6854219948849105, + "grad_norm": 1.404723257524876, + "learning_rate": 0.0001593280717776207, + "loss": 1.1487, + "step": 67 + }, + { + "epoch": 0.6956521739130435, + "grad_norm": 1.9443271708932357, + "learning_rate": 0.0001592514591080654, + "loss": 1.15, + "step": 68 + }, + { + "epoch": 0.7058823529411765, + "grad_norm": 1.0325757666109745, + "learning_rate": 0.0001591707317955642, + "loss": 1.1487, + "step": 69 + }, + { + "epoch": 0.7161125319693095, + "grad_norm": 2.1150324509757006, + "learning_rate": 0.00015908589403138468, + "loss": 1.1766, + "step": 70 + }, + { + "epoch": 0.7263427109974424, + "grad_norm": 1.7663965705197828, + "learning_rate": 0.00015899695022020415, + "loss": 1.1464, + "step": 71 + }, + { + "epoch": 0.7365728900255755, + "grad_norm": 1.4582930405137742, + "learning_rate": 0.00015890390497988116, + "loss": 1.1487, + "step": 72 + }, + { + "epoch": 0.7468030690537084, + "grad_norm": 1.394985742880279, + "learning_rate": 0.0001588067631412156, + "loss": 1.1447, + "step": 73 + }, + { + "epoch": 0.7570332480818415, + "grad_norm": 1.1897882316318342, + "learning_rate": 0.000158705529747698, + "loss": 1.1299, + "step": 74 + }, + { + "epoch": 0.7672634271099744, + "grad_norm": 1.1626082561212874, + "learning_rate": 0.0001586002100552476, + "loss": 1.1473, + "step": 75 + }, + { + "epoch": 0.7774936061381074, + "grad_norm": 1.0736945268719569, + "learning_rate": 0.00015849080953193943, + "loss": 1.1368, + "step": 76 + }, + { + "epoch": 0.7877237851662404, + "grad_norm": 1.5654343283068946, + "learning_rate": 0.00015837733385772062, + "loss": 1.123, + "step": 77 + }, + { + "epoch": 0.7979539641943734, + "grad_norm": 0.8523221287975669, + "learning_rate": 0.00015825978892411522, + "loss": 1.1403, + "step": 78 + }, + { + "epoch": 0.8081841432225064, + "grad_norm": 1.035617914076512, + "learning_rate": 0.00015813818083391858, + "loss": 1.1229, + "step": 79 + }, + { + "epoch": 0.8184143222506394, + "grad_norm": 1.4757207346915424, + "learning_rate": 0.0001580125159008803, + "loss": 1.129, + "step": 80 + }, + { + "epoch": 0.8286445012787724, + "grad_norm": 1.0711086873355355, + "learning_rate": 0.00015788280064937655, + "loss": 1.1351, + "step": 81 + }, + { + "epoch": 0.8388746803069054, + "grad_norm": 1.2110898160193528, + "learning_rate": 0.00015774904181407127, + "loss": 1.1334, + "step": 82 + }, + { + "epoch": 0.8491048593350383, + "grad_norm": 1.2411189602993498, + "learning_rate": 0.00015761124633956652, + "loss": 1.1363, + "step": 83 + }, + { + "epoch": 0.8593350383631714, + "grad_norm": 1.1341606871236802, + "learning_rate": 0.00015746942138004203, + "loss": 1.1142, + "step": 84 + }, + { + "epoch": 0.8695652173913043, + "grad_norm": 0.9942900226210764, + "learning_rate": 0.00015732357429888355, + "loss": 1.13, + "step": 85 + }, + { + "epoch": 0.8797953964194374, + "grad_norm": 1.1602205285909497, + "learning_rate": 0.00015717371266830076, + "loss": 1.1225, + "step": 86 + }, + { + "epoch": 0.8900255754475703, + "grad_norm": 1.2773792554197163, + "learning_rate": 0.000157019844268934, + "loss": 1.1237, + "step": 87 + }, + { + "epoch": 0.9002557544757033, + "grad_norm": 1.0698753174294156, + "learning_rate": 0.0001568619770894504, + "loss": 1.1223, + "step": 88 + }, + { + "epoch": 0.9104859335038363, + "grad_norm": 1.1279028063380314, + "learning_rate": 0.000156700119326129, + "loss": 1.1117, + "step": 89 + }, + { + "epoch": 0.9207161125319693, + "grad_norm": 1.047346608848377, + "learning_rate": 0.00015653427938243532, + "loss": 1.1195, + "step": 90 + }, + { + "epoch": 0.9309462915601023, + "grad_norm": 1.1063879184523018, + "learning_rate": 0.0001563644658685851, + "loss": 1.1112, + "step": 91 + }, + { + "epoch": 0.9411764705882353, + "grad_norm": 1.403473791127022, + "learning_rate": 0.00015619068760109703, + "loss": 1.1334, + "step": 92 + }, + { + "epoch": 0.9514066496163683, + "grad_norm": 1.1738079096105647, + "learning_rate": 0.00015601295360233528, + "loss": 1.123, + "step": 93 + }, + { + "epoch": 0.9616368286445013, + "grad_norm": 1.2622400384084083, + "learning_rate": 0.0001558312731000409, + "loss": 1.1245, + "step": 94 + }, + { + "epoch": 0.9718670076726342, + "grad_norm": 1.5103980615396817, + "learning_rate": 0.00015564565552685282, + "loss": 1.1159, + "step": 95 + }, + { + "epoch": 0.9820971867007673, + "grad_norm": 0.9924832092739461, + "learning_rate": 0.00015545611051981807, + "loss": 1.1086, + "step": 96 + }, + { + "epoch": 0.9923273657289002, + "grad_norm": 1.2306800508291864, + "learning_rate": 0.00015526264791989144, + "loss": 1.1396, + "step": 97 + }, + { + "epoch": 1.0025575447570332, + "grad_norm": 1.7155461125801181, + "learning_rate": 0.00015506527777142453, + "loss": 1.4022, + "step": 98 + }, + { + "epoch": 1.0127877237851663, + "grad_norm": 1.0329131057981322, + "learning_rate": 0.00015486401032164434, + "loss": 1.0962, + "step": 99 + }, + { + "epoch": 1.0230179028132993, + "grad_norm": 1.5658900134919402, + "learning_rate": 0.00015465885602012117, + "loss": 1.1252, + "step": 100 + }, + { + "epoch": 1.0332480818414322, + "grad_norm": 0.8015741030349912, + "learning_rate": 0.00015444982551822604, + "loss": 1.1044, + "step": 101 + }, + { + "epoch": 1.0434782608695652, + "grad_norm": 1.3138293387151978, + "learning_rate": 0.00015423692966857788, + "loss": 1.1138, + "step": 102 + }, + { + "epoch": 1.0537084398976981, + "grad_norm": 0.8910062603103389, + "learning_rate": 0.00015402017952447983, + "loss": 1.0804, + "step": 103 + }, + { + "epoch": 1.0639386189258313, + "grad_norm": 1.4375718907843533, + "learning_rate": 0.00015379958633934555, + "loss": 1.1212, + "step": 104 + }, + { + "epoch": 1.0741687979539642, + "grad_norm": 1.2721636337229971, + "learning_rate": 0.0001535751615661149, + "loss": 1.1, + "step": 105 + }, + { + "epoch": 1.0843989769820972, + "grad_norm": 1.0105810629121363, + "learning_rate": 0.00015334691685665928, + "loss": 1.096, + "step": 106 + }, + { + "epoch": 1.0946291560102301, + "grad_norm": 1.0900734221547557, + "learning_rate": 0.00015311486406117668, + "loss": 1.0882, + "step": 107 + }, + { + "epoch": 1.104859335038363, + "grad_norm": 1.1336612336243286, + "learning_rate": 0.00015287901522757652, + "loss": 1.1214, + "step": 108 + }, + { + "epoch": 1.1150895140664963, + "grad_norm": 1.1569430105162481, + "learning_rate": 0.000152639382600854, + "loss": 1.0963, + "step": 109 + }, + { + "epoch": 1.1253196930946292, + "grad_norm": 1.1569271586356937, + "learning_rate": 0.00015239597862245452, + "loss": 1.0855, + "step": 110 + }, + { + "epoch": 1.1355498721227621, + "grad_norm": 1.137534239049028, + "learning_rate": 0.00015214881592962753, + "loss": 1.094, + "step": 111 + }, + { + "epoch": 1.145780051150895, + "grad_norm": 1.2673351118799951, + "learning_rate": 0.00015189790735477062, + "loss": 1.0819, + "step": 112 + }, + { + "epoch": 1.156010230179028, + "grad_norm": 0.8230868750468249, + "learning_rate": 0.00015164326592476316, + "loss": 1.099, + "step": 113 + }, + { + "epoch": 1.1662404092071612, + "grad_norm": 1.0550234081618097, + "learning_rate": 0.00015138490486028998, + "loss": 1.0887, + "step": 114 + }, + { + "epoch": 1.1764705882352942, + "grad_norm": 1.2808691339484497, + "learning_rate": 0.000151122837575155, + "loss": 1.0879, + "step": 115 + }, + { + "epoch": 1.186700767263427, + "grad_norm": 1.0207057907200907, + "learning_rate": 0.00015085707767558475, + "loss": 1.0974, + "step": 116 + }, + { + "epoch": 1.19693094629156, + "grad_norm": 1.7224394340286069, + "learning_rate": 0.00015058763895952194, + "loss": 1.1016, + "step": 117 + }, + { + "epoch": 1.207161125319693, + "grad_norm": 0.5474668831444041, + "learning_rate": 0.00015031453541590925, + "loss": 1.0789, + "step": 118 + }, + { + "epoch": 1.2173913043478262, + "grad_norm": 1.2675844883943275, + "learning_rate": 0.00015003778122396277, + "loss": 1.0851, + "step": 119 + }, + { + "epoch": 1.227621483375959, + "grad_norm": 1.2798593237994211, + "learning_rate": 0.0001497573907524361, + "loss": 1.1047, + "step": 120 + }, + { + "epoch": 1.237851662404092, + "grad_norm": 1.1502336899283057, + "learning_rate": 0.00014947337855887406, + "loss": 1.0943, + "step": 121 + }, + { + "epoch": 1.248081841432225, + "grad_norm": 1.2423026547868323, + "learning_rate": 0.00014918575938885725, + "loss": 1.0896, + "step": 122 + }, + { + "epoch": 1.258312020460358, + "grad_norm": 0.8127468643676744, + "learning_rate": 0.00014889454817523608, + "loss": 1.0984, + "step": 123 + }, + { + "epoch": 1.2685421994884911, + "grad_norm": 1.1236998604735062, + "learning_rate": 0.00014859976003735572, + "loss": 1.091, + "step": 124 + }, + { + "epoch": 1.278772378516624, + "grad_norm": 1.0625241691652207, + "learning_rate": 0.0001483014102802711, + "loss": 1.0749, + "step": 125 + }, + { + "epoch": 1.289002557544757, + "grad_norm": 1.2427086503303664, + "learning_rate": 0.00014799951439395221, + "loss": 1.0901, + "step": 126 + }, + { + "epoch": 1.29923273657289, + "grad_norm": 0.9886108496701173, + "learning_rate": 0.00014769408805247986, + "loss": 1.0848, + "step": 127 + }, + { + "epoch": 1.309462915601023, + "grad_norm": 1.017219160331291, + "learning_rate": 0.0001473851471132321, + "loss": 1.0897, + "step": 128 + }, + { + "epoch": 1.319693094629156, + "grad_norm": 1.4542703012009928, + "learning_rate": 0.00014707270761606063, + "loss": 1.0695, + "step": 129 + }, + { + "epoch": 1.329923273657289, + "grad_norm": 1.0782706459189146, + "learning_rate": 0.00014675678578245828, + "loss": 1.0895, + "step": 130 + }, + { + "epoch": 1.340153452685422, + "grad_norm": 1.0968316967604133, + "learning_rate": 0.00014643739801471667, + "loss": 1.1003, + "step": 131 + }, + { + "epoch": 1.350383631713555, + "grad_norm": 0.9003561398004416, + "learning_rate": 0.00014611456089507464, + "loss": 1.098, + "step": 132 + }, + { + "epoch": 1.3606138107416879, + "grad_norm": 1.351034619458769, + "learning_rate": 0.00014578829118485742, + "loss": 1.0698, + "step": 133 + }, + { + "epoch": 1.370843989769821, + "grad_norm": 0.8658183503538551, + "learning_rate": 0.00014545860582360624, + "loss": 1.1071, + "step": 134 + }, + { + "epoch": 1.381074168797954, + "grad_norm": 1.0177123952310823, + "learning_rate": 0.00014512552192819897, + "loss": 1.0869, + "step": 135 + }, + { + "epoch": 1.391304347826087, + "grad_norm": 1.4195927565971158, + "learning_rate": 0.0001447890567919614, + "loss": 1.0954, + "step": 136 + }, + { + "epoch": 1.40153452685422, + "grad_norm": 0.9931491307968467, + "learning_rate": 0.00014444922788376934, + "loss": 1.0784, + "step": 137 + }, + { + "epoch": 1.4117647058823528, + "grad_norm": 1.270639141634098, + "learning_rate": 0.00014410605284714175, + "loss": 1.0888, + "step": 138 + }, + { + "epoch": 1.421994884910486, + "grad_norm": 0.8408471380854269, + "learning_rate": 0.0001437595494993246, + "loss": 1.0842, + "step": 139 + }, + { + "epoch": 1.432225063938619, + "grad_norm": 1.380164251857079, + "learning_rate": 0.000143409735830366, + "loss": 1.0795, + "step": 140 + }, + { + "epoch": 1.4424552429667519, + "grad_norm": 0.9896679635139878, + "learning_rate": 0.00014305663000218193, + "loss": 1.0907, + "step": 141 + }, + { + "epoch": 1.452685421994885, + "grad_norm": 1.412072301199199, + "learning_rate": 0.00014270025034761352, + "loss": 1.0817, + "step": 142 + }, + { + "epoch": 1.4629156010230178, + "grad_norm": 0.770940428011719, + "learning_rate": 0.000142340615369475, + "loss": 1.0819, + "step": 143 + }, + { + "epoch": 1.473145780051151, + "grad_norm": 0.7983918736084521, + "learning_rate": 0.00014197774373959327, + "loss": 1.0931, + "step": 144 + }, + { + "epoch": 1.4833759590792839, + "grad_norm": 1.3538690920598053, + "learning_rate": 0.00014161165429783844, + "loss": 1.0884, + "step": 145 + }, + { + "epoch": 1.4936061381074168, + "grad_norm": 0.8169757443568035, + "learning_rate": 0.0001412423660511456, + "loss": 1.0924, + "step": 146 + }, + { + "epoch": 1.50383631713555, + "grad_norm": 0.9256987946838048, + "learning_rate": 0.00014086989817252803, + "loss": 1.0785, + "step": 147 + }, + { + "epoch": 1.5140664961636827, + "grad_norm": 1.1631633494660514, + "learning_rate": 0.00014049427000008185, + "loss": 1.0699, + "step": 148 + }, + { + "epoch": 1.5242966751918159, + "grad_norm": 1.212333243880141, + "learning_rate": 0.00014011550103598176, + "loss": 1.064, + "step": 149 + }, + { + "epoch": 1.5345268542199488, + "grad_norm": 0.9967182911949714, + "learning_rate": 0.0001397336109454689, + "loss": 1.1002, + "step": 150 + }, + { + "epoch": 1.5447570332480818, + "grad_norm": 1.1502313981018193, + "learning_rate": 0.0001393486195558295, + "loss": 1.0709, + "step": 151 + }, + { + "epoch": 1.554987212276215, + "grad_norm": 1.311911957474106, + "learning_rate": 0.00013896054685536566, + "loss": 1.0717, + "step": 152 + }, + { + "epoch": 1.5652173913043477, + "grad_norm": 0.8141917822120857, + "learning_rate": 0.00013856941299235752, + "loss": 1.0714, + "step": 153 + }, + { + "epoch": 1.5754475703324808, + "grad_norm": 0.5826303398204319, + "learning_rate": 0.00013817523827401715, + "loss": 1.0825, + "step": 154 + }, + { + "epoch": 1.5856777493606138, + "grad_norm": 1.1035174246946824, + "learning_rate": 0.00013777804316543438, + "loss": 1.0583, + "step": 155 + }, + { + "epoch": 1.5959079283887467, + "grad_norm": 1.483439389847773, + "learning_rate": 0.00013737784828851405, + "loss": 1.0998, + "step": 156 + }, + { + "epoch": 1.60613810741688, + "grad_norm": 0.5590063439764236, + "learning_rate": 0.0001369746744209055, + "loss": 1.0814, + "step": 157 + }, + { + "epoch": 1.6163682864450126, + "grad_norm": 1.427302602685382, + "learning_rate": 0.00013656854249492382, + "loss": 1.0672, + "step": 158 + }, + { + "epoch": 1.6265984654731458, + "grad_norm": 0.9907320262762309, + "learning_rate": 0.00013615947359646295, + "loss": 1.077, + "step": 159 + }, + { + "epoch": 1.6368286445012787, + "grad_norm": 1.3253726577143705, + "learning_rate": 0.00013574748896390105, + "loss": 1.0831, + "step": 160 + }, + { + "epoch": 1.6470588235294117, + "grad_norm": 0.6170129994977172, + "learning_rate": 0.00013533260998699776, + "loss": 1.0808, + "step": 161 + }, + { + "epoch": 1.6572890025575449, + "grad_norm": 0.8974715249718852, + "learning_rate": 0.00013491485820578373, + "loss": 1.0609, + "step": 162 + }, + { + "epoch": 1.6675191815856778, + "grad_norm": 0.8218782327367921, + "learning_rate": 0.00013449425530944218, + "loss": 1.0822, + "step": 163 + }, + { + "epoch": 1.6777493606138107, + "grad_norm": 0.6783603661700464, + "learning_rate": 0.00013407082313518292, + "loss": 1.0771, + "step": 164 + }, + { + "epoch": 1.6879795396419437, + "grad_norm": 0.6976792612146404, + "learning_rate": 0.0001336445836671086, + "loss": 1.0853, + "step": 165 + }, + { + "epoch": 1.6982097186700766, + "grad_norm": 0.823428019130252, + "learning_rate": 0.0001332155590350732, + "loss": 1.0838, + "step": 166 + }, + { + "epoch": 1.7084398976982098, + "grad_norm": 1.0953340521038164, + "learning_rate": 0.0001327837715135332, + "loss": 1.082, + "step": 167 + }, + { + "epoch": 1.7186700767263428, + "grad_norm": 0.9474195557921864, + "learning_rate": 0.00013234924352039103, + "loss": 1.0802, + "step": 168 + }, + { + "epoch": 1.7289002557544757, + "grad_norm": 0.9444470757389084, + "learning_rate": 0.00013191199761583124, + "loss": 1.0887, + "step": 169 + }, + { + "epoch": 1.7391304347826086, + "grad_norm": 1.101212086185109, + "learning_rate": 0.00013147205650114913, + "loss": 1.0718, + "step": 170 + }, + { + "epoch": 1.7493606138107416, + "grad_norm": 1.0553531873256305, + "learning_rate": 0.0001310294430175722, + "loss": 1.0788, + "step": 171 + }, + { + "epoch": 1.7595907928388748, + "grad_norm": 1.05582162764911, + "learning_rate": 0.00013058418014507412, + "loss": 1.0879, + "step": 172 + }, + { + "epoch": 1.7698209718670077, + "grad_norm": 0.9944536264830629, + "learning_rate": 0.00013013629100118183, + "loss": 1.0721, + "step": 173 + }, + { + "epoch": 1.7800511508951407, + "grad_norm": 1.1307659477843484, + "learning_rate": 0.00012968579883977508, + "loss": 1.0737, + "step": 174 + }, + { + "epoch": 1.7902813299232738, + "grad_norm": 1.2420812191214468, + "learning_rate": 0.00012923272704987943, + "loss": 1.0742, + "step": 175 + }, + { + "epoch": 1.8005115089514065, + "grad_norm": 1.0472853919924061, + "learning_rate": 0.00012877709915445155, + "loss": 1.0721, + "step": 176 + }, + { + "epoch": 1.8107416879795397, + "grad_norm": 0.6947884508219546, + "learning_rate": 0.00012831893880915822, + "loss": 1.0555, + "step": 177 + }, + { + "epoch": 1.8209718670076727, + "grad_norm": 0.7757404896202875, + "learning_rate": 0.00012785826980114798, + "loss": 1.0804, + "step": 178 + }, + { + "epoch": 1.8312020460358056, + "grad_norm": 1.253778618594067, + "learning_rate": 0.0001273951160478163, + "loss": 1.063, + "step": 179 + }, + { + "epoch": 1.8414322250639388, + "grad_norm": 0.5979958514242996, + "learning_rate": 0.00012692950159556358, + "loss": 1.0666, + "step": 180 + }, + { + "epoch": 1.8516624040920715, + "grad_norm": 0.8662747748641294, + "learning_rate": 0.00012646145061854697, + "loss": 1.0703, + "step": 181 + }, + { + "epoch": 1.8618925831202047, + "grad_norm": 0.8287006788806506, + "learning_rate": 0.00012599098741742504, + "loss": 1.0571, + "step": 182 + }, + { + "epoch": 1.8721227621483376, + "grad_norm": 1.0265713289334373, + "learning_rate": 0.00012551813641809622, + "loss": 1.0706, + "step": 183 + }, + { + "epoch": 1.8823529411764706, + "grad_norm": 1.2306509484226458, + "learning_rate": 0.0001250429221704306, + "loss": 1.0779, + "step": 184 + }, + { + "epoch": 1.8925831202046037, + "grad_norm": 0.8951251773522786, + "learning_rate": 0.00012456536934699552, + "loss": 1.064, + "step": 185 + }, + { + "epoch": 1.9028132992327365, + "grad_norm": 1.1181307738147266, + "learning_rate": 0.0001240855027417742, + "loss": 1.0585, + "step": 186 + }, + { + "epoch": 1.9130434782608696, + "grad_norm": 0.977542614018667, + "learning_rate": 0.00012360334726887887, + "loss": 1.0672, + "step": 187 + }, + { + "epoch": 1.9232736572890026, + "grad_norm": 1.1637804133002638, + "learning_rate": 0.00012311892796125704, + "loss": 1.0713, + "step": 188 + }, + { + "epoch": 1.9335038363171355, + "grad_norm": 0.445419436951453, + "learning_rate": 0.0001226322699693918, + "loss": 1.0536, + "step": 189 + }, + { + "epoch": 1.9437340153452687, + "grad_norm": 0.9034412973205753, + "learning_rate": 0.00012214339855999624, + "loss": 1.0807, + "step": 190 + }, + { + "epoch": 1.9539641943734014, + "grad_norm": 0.5236086539347802, + "learning_rate": 0.00012165233911470136, + "loss": 1.0777, + "step": 191 + }, + { + "epoch": 1.9641943734015346, + "grad_norm": 0.6745807390622581, + "learning_rate": 0.00012115911712873851, + "loss": 1.0525, + "step": 192 + }, + { + "epoch": 1.9744245524296675, + "grad_norm": 0.5551428173478703, + "learning_rate": 0.00012066375820961558, + "loss": 1.0617, + "step": 193 + }, + { + "epoch": 1.9846547314578005, + "grad_norm": 0.5930456521809883, + "learning_rate": 0.00012016628807578756, + "loss": 1.0682, + "step": 194 + }, + { + "epoch": 1.9948849104859336, + "grad_norm": 0.5333039982670191, + "learning_rate": 0.00011966673255532119, + "loss": 1.1518, + "step": 195 + }, + { + "epoch": 2.0051150895140664, + "grad_norm": 0.7368227183464856, + "learning_rate": 0.00011916511758455407, + "loss": 1.226, + "step": 196 + }, + { + "epoch": 2.0153452685421995, + "grad_norm": 1.0538129513540742, + "learning_rate": 0.00011866146920674807, + "loss": 1.068, + "step": 197 + }, + { + "epoch": 2.0255754475703327, + "grad_norm": 1.2094094160759923, + "learning_rate": 0.0001181558135707371, + "loss": 1.0502, + "step": 198 + }, + { + "epoch": 2.0358056265984654, + "grad_norm": 0.915224534709575, + "learning_rate": 0.00011764817692956966, + "loss": 1.0286, + "step": 199 + }, + { + "epoch": 2.0460358056265986, + "grad_norm": 1.0711022599910567, + "learning_rate": 0.00011713858563914562, + "loss": 1.0747, + "step": 200 + }, + { + "epoch": 2.0562659846547313, + "grad_norm": 1.0787442473268498, + "learning_rate": 0.00011662706615684803, + "loss": 1.045, + "step": 201 + }, + { + "epoch": 2.0664961636828645, + "grad_norm": 0.8778392977870085, + "learning_rate": 0.00011611364504016935, + "loss": 1.0678, + "step": 202 + }, + { + "epoch": 2.0767263427109977, + "grad_norm": 0.9910713045345557, + "learning_rate": 0.00011559834894533275, + "loss": 1.0458, + "step": 203 + }, + { + "epoch": 2.0869565217391304, + "grad_norm": 1.1592891619781418, + "learning_rate": 0.00011508120462590794, + "loss": 1.0461, + "step": 204 + }, + { + "epoch": 2.0971867007672635, + "grad_norm": 0.6931145505443533, + "learning_rate": 0.00011456223893142238, + "loss": 1.0407, + "step": 205 + }, + { + "epoch": 2.1074168797953963, + "grad_norm": 0.8009294768801537, + "learning_rate": 0.0001140414788059672, + "loss": 1.0534, + "step": 206 + }, + { + "epoch": 2.1176470588235294, + "grad_norm": 0.6527102431957467, + "learning_rate": 0.00011351895128679823, + "loss": 1.0577, + "step": 207 + }, + { + "epoch": 2.1278772378516626, + "grad_norm": 0.5808810881566437, + "learning_rate": 0.00011299468350293232, + "loss": 1.0592, + "step": 208 + }, + { + "epoch": 2.1381074168797953, + "grad_norm": 0.8896635325022703, + "learning_rate": 0.00011246870267373885, + "loss": 1.069, + "step": 209 + }, + { + "epoch": 2.1483375959079285, + "grad_norm": 1.0873242378114067, + "learning_rate": 0.00011194103610752655, + "loss": 1.0454, + "step": 210 + }, + { + "epoch": 2.1585677749360612, + "grad_norm": 0.9489754319671625, + "learning_rate": 0.00011141171120012552, + "loss": 1.0723, + "step": 211 + }, + { + "epoch": 2.1687979539641944, + "grad_norm": 0.8030522232247485, + "learning_rate": 0.0001108807554334651, + "loss": 1.0428, + "step": 212 + }, + { + "epoch": 2.1790281329923276, + "grad_norm": 0.5529249514757153, + "learning_rate": 0.00011034819637414686, + "loss": 1.061, + "step": 213 + }, + { + "epoch": 2.1892583120204603, + "grad_norm": 0.4631583007632592, + "learning_rate": 0.00010981406167201354, + "loss": 1.0355, + "step": 214 + }, + { + "epoch": 2.1994884910485935, + "grad_norm": 0.4099197978603843, + "learning_rate": 0.0001092783790587133, + "loss": 1.0777, + "step": 215 + }, + { + "epoch": 2.209718670076726, + "grad_norm": 0.4121757237952397, + "learning_rate": 0.00010874117634626011, + "loss": 1.0541, + "step": 216 + }, + { + "epoch": 2.2199488491048593, + "grad_norm": 0.4349846280052258, + "learning_rate": 0.00010820248142558965, + "loss": 1.0435, + "step": 217 + }, + { + "epoch": 2.2301790281329925, + "grad_norm": 0.3960108429870153, + "learning_rate": 0.00010766232226511142, + "loss": 1.0513, + "step": 218 + }, + { + "epoch": 2.2404092071611252, + "grad_norm": 0.4522590868845693, + "learning_rate": 0.00010712072690925638, + "loss": 1.0509, + "step": 219 + }, + { + "epoch": 2.2506393861892584, + "grad_norm": 0.4862701100635931, + "learning_rate": 0.00010657772347702118, + "loss": 1.0325, + "step": 220 + }, + { + "epoch": 2.260869565217391, + "grad_norm": 0.44763620371165586, + "learning_rate": 0.00010603334016050808, + "loss": 1.0369, + "step": 221 + }, + { + "epoch": 2.2710997442455243, + "grad_norm": 0.5580650650662916, + "learning_rate": 0.00010548760522346138, + "loss": 1.0414, + "step": 222 + }, + { + "epoch": 2.2813299232736575, + "grad_norm": 0.6352266377825914, + "learning_rate": 0.00010494054699979992, + "loss": 1.056, + "step": 223 + }, + { + "epoch": 2.29156010230179, + "grad_norm": 0.617790095766774, + "learning_rate": 0.00010439219389214595, + "loss": 1.0573, + "step": 224 + }, + { + "epoch": 2.3017902813299234, + "grad_norm": 0.6417970276744828, + "learning_rate": 0.0001038425743703507, + "loss": 1.0412, + "step": 225 + }, + { + "epoch": 2.312020460358056, + "grad_norm": 0.7082597904375986, + "learning_rate": 0.00010329171697001608, + "loss": 1.0366, + "step": 226 + }, + { + "epoch": 2.3222506393861893, + "grad_norm": 0.8496266841221832, + "learning_rate": 0.0001027396502910132, + "loss": 1.0451, + "step": 227 + }, + { + "epoch": 2.3324808184143224, + "grad_norm": 1.0308330001421908, + "learning_rate": 0.0001021864029959975, + "loss": 1.0428, + "step": 228 + }, + { + "epoch": 2.342710997442455, + "grad_norm": 1.026355847644825, + "learning_rate": 0.00010163200380892063, + "loss": 1.0612, + "step": 229 + }, + { + "epoch": 2.3529411764705883, + "grad_norm": 0.7560985076254375, + "learning_rate": 0.00010107648151353916, + "loss": 1.0247, + "step": 230 + }, + { + "epoch": 2.363171355498721, + "grad_norm": 0.41561156978885305, + "learning_rate": 0.00010051986495192008, + "loss": 1.0363, + "step": 231 + }, + { + "epoch": 2.373401534526854, + "grad_norm": 0.38675030483907014, + "learning_rate": 9.99621830229434e-05, + "loss": 1.05, + "step": 232 + }, + { + "epoch": 2.3836317135549874, + "grad_norm": 0.4804093390079753, + "learning_rate": 9.94034646808018e-05, + "loss": 1.0537, + "step": 233 + }, + { + "epoch": 2.39386189258312, + "grad_norm": 0.5877095225822028, + "learning_rate": 9.884373893349725e-05, + "loss": 1.0273, + "step": 234 + }, + { + "epoch": 2.4040920716112533, + "grad_norm": 0.6010921886045744, + "learning_rate": 9.828303484133515e-05, + "loss": 1.053, + "step": 235 + }, + { + "epoch": 2.414322250639386, + "grad_norm": 0.4918851450838751, + "learning_rate": 9.772138151541522e-05, + "loss": 1.0364, + "step": 236 + }, + { + "epoch": 2.424552429667519, + "grad_norm": 0.3407705594769831, + "learning_rate": 9.715880811612044e-05, + "loss": 1.0331, + "step": 237 + }, + { + "epoch": 2.4347826086956523, + "grad_norm": 0.2468632428242675, + "learning_rate": 9.659534385160289e-05, + "loss": 1.0323, + "step": 238 + }, + { + "epoch": 2.445012787723785, + "grad_norm": 0.26175126594915626, + "learning_rate": 9.603101797626729e-05, + "loss": 1.0491, + "step": 239 + }, + { + "epoch": 2.455242966751918, + "grad_norm": 0.3250566758468044, + "learning_rate": 9.546585978925221e-05, + "loss": 1.0127, + "step": 240 + }, + { + "epoch": 2.4654731457800514, + "grad_norm": 0.3733853550848411, + "learning_rate": 9.489989863290885e-05, + "loss": 1.0637, + "step": 241 + }, + { + "epoch": 2.475703324808184, + "grad_norm": 0.45748627752418863, + "learning_rate": 9.433316389127768e-05, + "loss": 1.038, + "step": 242 + }, + { + "epoch": 2.4859335038363173, + "grad_norm": 0.432153145698656, + "learning_rate": 9.37656849885628e-05, + "loss": 1.0441, + "step": 243 + }, + { + "epoch": 2.49616368286445, + "grad_norm": 0.3361885905419595, + "learning_rate": 9.319749138760424e-05, + "loss": 1.0317, + "step": 244 + }, + { + "epoch": 2.506393861892583, + "grad_norm": 0.26392084893370804, + "learning_rate": 9.262861258834833e-05, + "loss": 1.0353, + "step": 245 + }, + { + "epoch": 2.516624040920716, + "grad_norm": 0.25278206548366783, + "learning_rate": 9.205907812631616e-05, + "loss": 1.0211, + "step": 246 + }, + { + "epoch": 2.526854219948849, + "grad_norm": 0.2853155639467533, + "learning_rate": 9.148891757106999e-05, + "loss": 1.0381, + "step": 247 + }, + { + "epoch": 2.5370843989769822, + "grad_norm": 0.28133618292890095, + "learning_rate": 9.091816052467817e-05, + "loss": 1.045, + "step": 248 + }, + { + "epoch": 2.547314578005115, + "grad_norm": 0.26415138394214893, + "learning_rate": 9.034683662017812e-05, + "loss": 1.0339, + "step": 249 + }, + { + "epoch": 2.557544757033248, + "grad_norm": 0.23100116731609785, + "learning_rate": 8.977497552003785e-05, + "loss": 1.0297, + "step": 250 + }, + { + "epoch": 2.5677749360613813, + "grad_norm": 0.25137211273391913, + "learning_rate": 8.920260691461602e-05, + "loss": 1.0474, + "step": 251 + }, + { + "epoch": 2.578005115089514, + "grad_norm": 0.25925530222353527, + "learning_rate": 8.862976052062034e-05, + "loss": 1.0478, + "step": 252 + }, + { + "epoch": 2.588235294117647, + "grad_norm": 0.21181713461857493, + "learning_rate": 8.805646607956467e-05, + "loss": 1.0384, + "step": 253 + }, + { + "epoch": 2.59846547314578, + "grad_norm": 0.20383781215036986, + "learning_rate": 8.748275335622506e-05, + "loss": 1.0352, + "step": 254 + }, + { + "epoch": 2.608695652173913, + "grad_norm": 0.22216426938201625, + "learning_rate": 8.69086521370942e-05, + "loss": 1.0251, + "step": 255 + }, + { + "epoch": 2.618925831202046, + "grad_norm": 0.2339550911616179, + "learning_rate": 8.633419222883508e-05, + "loss": 1.0388, + "step": 256 + }, + { + "epoch": 2.629156010230179, + "grad_norm": 0.29608330426201757, + "learning_rate": 8.575940345673337e-05, + "loss": 1.0415, + "step": 257 + }, + { + "epoch": 2.639386189258312, + "grad_norm": 0.282754017428277, + "learning_rate": 8.518431566314901e-05, + "loss": 1.0338, + "step": 258 + }, + { + "epoch": 2.649616368286445, + "grad_norm": 0.21590808755680316, + "learning_rate": 8.460895870596675e-05, + "loss": 1.0455, + "step": 259 + }, + { + "epoch": 2.659846547314578, + "grad_norm": 0.22038275314532527, + "learning_rate": 8.4033362457046e-05, + "loss": 1.0446, + "step": 260 + }, + { + "epoch": 2.670076726342711, + "grad_norm": 0.19903936327716265, + "learning_rate": 8.345755680066993e-05, + "loss": 1.0282, + "step": 261 + }, + { + "epoch": 2.680306905370844, + "grad_norm": 0.17558239320808622, + "learning_rate": 8.288157163199389e-05, + "loss": 1.0278, + "step": 262 + }, + { + "epoch": 2.690537084398977, + "grad_norm": 0.20248735135033116, + "learning_rate": 8.230543685549333e-05, + "loss": 1.0317, + "step": 263 + }, + { + "epoch": 2.70076726342711, + "grad_norm": 0.23742588094542533, + "learning_rate": 8.17291823834111e-05, + "loss": 1.0326, + "step": 264 + }, + { + "epoch": 2.710997442455243, + "grad_norm": 0.3032160882786812, + "learning_rate": 8.115283813420459e-05, + "loss": 1.0375, + "step": 265 + }, + { + "epoch": 2.7212276214833757, + "grad_norm": 0.2962448679587415, + "learning_rate": 8.057643403099221e-05, + "loss": 1.0584, + "step": 266 + }, + { + "epoch": 2.731457800511509, + "grad_norm": 0.30722625928358643, + "learning_rate": 8e-05, + "loss": 1.0395, + "step": 267 + }, + { + "epoch": 2.741687979539642, + "grad_norm": 0.28307960709841534, + "learning_rate": 7.94235659690078e-05, + "loss": 1.0369, + "step": 268 + }, + { + "epoch": 2.7519181585677748, + "grad_norm": 0.21321155330317557, + "learning_rate": 7.884716186579545e-05, + "loss": 1.0532, + "step": 269 + }, + { + "epoch": 2.762148337595908, + "grad_norm": 0.2443283053996415, + "learning_rate": 7.827081761658892e-05, + "loss": 1.0266, + "step": 270 + }, + { + "epoch": 2.772378516624041, + "grad_norm": 0.2904460725842854, + "learning_rate": 7.76945631445067e-05, + "loss": 1.0344, + "step": 271 + }, + { + "epoch": 2.782608695652174, + "grad_norm": 0.25431043958852784, + "learning_rate": 7.711842836800614e-05, + "loss": 1.0285, + "step": 272 + }, + { + "epoch": 2.792838874680307, + "grad_norm": 0.25586265632907745, + "learning_rate": 7.654244319933009e-05, + "loss": 1.0272, + "step": 273 + }, + { + "epoch": 2.80306905370844, + "grad_norm": 0.23485094953105404, + "learning_rate": 7.596663754295404e-05, + "loss": 1.0427, + "step": 274 + }, + { + "epoch": 2.813299232736573, + "grad_norm": 0.17957023788842033, + "learning_rate": 7.539104129403327e-05, + "loss": 1.0474, + "step": 275 + }, + { + "epoch": 2.8235294117647056, + "grad_norm": 0.160508230528404, + "learning_rate": 7.4815684336851e-05, + "loss": 1.0445, + "step": 276 + }, + { + "epoch": 2.833759590792839, + "grad_norm": 0.21356478024874126, + "learning_rate": 7.424059654326664e-05, + "loss": 1.04, + "step": 277 + }, + { + "epoch": 2.843989769820972, + "grad_norm": 0.21888445256566497, + "learning_rate": 7.366580777116495e-05, + "loss": 1.0406, + "step": 278 + }, + { + "epoch": 2.8542199488491047, + "grad_norm": 0.2041365287775187, + "learning_rate": 7.309134786290583e-05, + "loss": 1.0321, + "step": 279 + }, + { + "epoch": 2.864450127877238, + "grad_norm": 0.19151321727068246, + "learning_rate": 7.251724664377497e-05, + "loss": 1.0371, + "step": 280 + }, + { + "epoch": 2.874680306905371, + "grad_norm": 0.18344693741146628, + "learning_rate": 7.194353392043534e-05, + "loss": 1.039, + "step": 281 + }, + { + "epoch": 2.8849104859335037, + "grad_norm": 0.19330950475902522, + "learning_rate": 7.13702394793797e-05, + "loss": 1.0364, + "step": 282 + }, + { + "epoch": 2.895140664961637, + "grad_norm": 0.1990648393315987, + "learning_rate": 7.079739308538399e-05, + "loss": 1.0277, + "step": 283 + }, + { + "epoch": 2.90537084398977, + "grad_norm": 0.2262885121158685, + "learning_rate": 7.022502447996215e-05, + "loss": 1.0275, + "step": 284 + }, + { + "epoch": 2.915601023017903, + "grad_norm": 0.20233291351840074, + "learning_rate": 6.965316337982191e-05, + "loss": 1.0381, + "step": 285 + }, + { + "epoch": 2.9258312020460355, + "grad_norm": 0.19513363820566396, + "learning_rate": 6.908183947532184e-05, + "loss": 1.0342, + "step": 286 + }, + { + "epoch": 2.9360613810741687, + "grad_norm": 0.20380750046128057, + "learning_rate": 6.851108242893002e-05, + "loss": 1.0377, + "step": 287 + }, + { + "epoch": 2.946291560102302, + "grad_norm": 0.1866318852041668, + "learning_rate": 6.794092187368387e-05, + "loss": 1.0428, + "step": 288 + }, + { + "epoch": 2.9565217391304346, + "grad_norm": 0.15744510473495013, + "learning_rate": 6.737138741165168e-05, + "loss": 1.0503, + "step": 289 + }, + { + "epoch": 2.9667519181585678, + "grad_norm": 0.1652822650571465, + "learning_rate": 6.680250861239581e-05, + "loss": 1.035, + "step": 290 + }, + { + "epoch": 2.976982097186701, + "grad_norm": 0.15546583646318948, + "learning_rate": 6.623431501143723e-05, + "loss": 1.0313, + "step": 291 + }, + { + "epoch": 2.9872122762148337, + "grad_norm": 0.29006532173297445, + "learning_rate": 6.566683610872231e-05, + "loss": 1.0564, + "step": 292 + }, + { + "epoch": 2.997442455242967, + "grad_norm": 0.2268887255063016, + "learning_rate": 6.510010136709118e-05, + "loss": 1.2037, + "step": 293 + }, + { + "epoch": 3.0076726342710995, + "grad_norm": 0.2539730951218038, + "learning_rate": 6.453414021074781e-05, + "loss": 1.1394, + "step": 294 + }, + { + "epoch": 3.0179028132992327, + "grad_norm": 0.2231294668730024, + "learning_rate": 6.396898202373277e-05, + "loss": 1.0223, + "step": 295 + }, + { + "epoch": 3.028132992327366, + "grad_norm": 0.20401904180469313, + "learning_rate": 6.340465614839714e-05, + "loss": 1.0336, + "step": 296 + }, + { + "epoch": 3.0383631713554986, + "grad_norm": 0.18979057876237626, + "learning_rate": 6.284119188387957e-05, + "loss": 1.0107, + "step": 297 + }, + { + "epoch": 3.0485933503836318, + "grad_norm": 0.16567071328991922, + "learning_rate": 6.227861848458481e-05, + "loss": 1.0134, + "step": 298 + }, + { + "epoch": 3.0588235294117645, + "grad_norm": 0.2788005001606031, + "learning_rate": 6.171696515866488e-05, + "loss": 1.0289, + "step": 299 + }, + { + "epoch": 3.0690537084398977, + "grad_norm": 0.13520429300745568, + "learning_rate": 6.115626106650273e-05, + "loss": 1.0297, + "step": 300 + }, + { + "epoch": 3.079283887468031, + "grad_norm": 0.21854013343576806, + "learning_rate": 6.059653531919823e-05, + "loss": 1.0282, + "step": 301 + }, + { + "epoch": 3.0895140664961636, + "grad_norm": 0.1818832819994029, + "learning_rate": 6.0037816977056625e-05, + "loss": 1.0531, + "step": 302 + }, + { + "epoch": 3.0997442455242967, + "grad_norm": 0.18589522528315505, + "learning_rate": 5.9480135048079964e-05, + "loss": 1.0113, + "step": 303 + }, + { + "epoch": 3.10997442455243, + "grad_norm": 0.18063564176637392, + "learning_rate": 5.892351848646087e-05, + "loss": 1.0394, + "step": 304 + }, + { + "epoch": 3.1202046035805626, + "grad_norm": 0.147586744880159, + "learning_rate": 5.836799619107937e-05, + "loss": 1.0365, + "step": 305 + }, + { + "epoch": 3.130434782608696, + "grad_norm": 0.17675912154183307, + "learning_rate": 5.781359700400254e-05, + "loss": 1.0039, + "step": 306 + }, + { + "epoch": 3.1406649616368285, + "grad_norm": 0.16586699917434417, + "learning_rate": 5.726034970898682e-05, + "loss": 1.0243, + "step": 307 + }, + { + "epoch": 3.1508951406649617, + "grad_norm": 0.17820259338383218, + "learning_rate": 5.670828302998393e-05, + "loss": 1.0314, + "step": 308 + }, + { + "epoch": 3.1611253196930944, + "grad_norm": 0.15458082339803622, + "learning_rate": 5.6157425629649314e-05, + "loss": 1.0485, + "step": 309 + }, + { + "epoch": 3.1713554987212276, + "grad_norm": 0.1643571380269719, + "learning_rate": 5.560780610785406e-05, + "loss": 1.018, + "step": 310 + }, + { + "epoch": 3.1815856777493607, + "grad_norm": 0.156856618068632, + "learning_rate": 5.5059453000200125e-05, + "loss": 1.0061, + "step": 311 + }, + { + "epoch": 3.1918158567774935, + "grad_norm": 0.14086641088136265, + "learning_rate": 5.451239477653864e-05, + "loss": 1.0205, + "step": 312 + }, + { + "epoch": 3.2020460358056266, + "grad_norm": 0.14820087407296167, + "learning_rate": 5.3966659839491936e-05, + "loss": 1.0226, + "step": 313 + }, + { + "epoch": 3.21227621483376, + "grad_norm": 0.13884477979100748, + "learning_rate": 5.342227652297887e-05, + "loss": 1.001, + "step": 314 + }, + { + "epoch": 3.2225063938618925, + "grad_norm": 0.14309328916359365, + "learning_rate": 5.287927309074365e-05, + "loss": 1.031, + "step": 315 + }, + { + "epoch": 3.2327365728900257, + "grad_norm": 0.15252565103321644, + "learning_rate": 5.233767773488859e-05, + "loss": 1.015, + "step": 316 + }, + { + "epoch": 3.2429667519181584, + "grad_norm": 0.13230625482589622, + "learning_rate": 5.179751857441036e-05, + "loss": 1.0053, + "step": 317 + }, + { + "epoch": 3.2531969309462916, + "grad_norm": 0.1556826070734683, + "learning_rate": 5.1258823653739914e-05, + "loss": 1.0211, + "step": 318 + }, + { + "epoch": 3.2634271099744243, + "grad_norm": 0.1371375107565863, + "learning_rate": 5.0721620941286735e-05, + "loss": 1.0143, + "step": 319 + }, + { + "epoch": 3.2736572890025575, + "grad_norm": 0.13573969073646522, + "learning_rate": 5.018593832798649e-05, + "loss": 1.0375, + "step": 320 + }, + { + "epoch": 3.2838874680306906, + "grad_norm": 0.11657280744271552, + "learning_rate": 4.965180362585315e-05, + "loss": 1.0253, + "step": 321 + }, + { + "epoch": 3.2941176470588234, + "grad_norm": 0.11616693511840327, + "learning_rate": 4.911924456653494e-05, + "loss": 1.0209, + "step": 322 + }, + { + "epoch": 3.3043478260869565, + "grad_norm": 0.12512977851281545, + "learning_rate": 4.8588288799874514e-05, + "loss": 1.0112, + "step": 323 + }, + { + "epoch": 3.3145780051150897, + "grad_norm": 0.10387393823484337, + "learning_rate": 4.805896389247348e-05, + "loss": 1.0077, + "step": 324 + }, + { + "epoch": 3.3248081841432224, + "grad_norm": 0.13924261830307932, + "learning_rate": 4.753129732626116e-05, + "loss": 1.0229, + "step": 325 + }, + { + "epoch": 3.3350383631713556, + "grad_norm": 0.12135011031399273, + "learning_rate": 4.70053164970677e-05, + "loss": 1.0184, + "step": 326 + }, + { + "epoch": 3.3452685421994883, + "grad_norm": 0.1183989328140461, + "learning_rate": 4.6481048713201825e-05, + "loss": 1.0058, + "step": 327 + }, + { + "epoch": 3.3554987212276215, + "grad_norm": 0.1284311973823529, + "learning_rate": 4.595852119403282e-05, + "loss": 1.0278, + "step": 328 + }, + { + "epoch": 3.3657289002557547, + "grad_norm": 0.11430293939389213, + "learning_rate": 4.543776106857765e-05, + "loss": 1.012, + "step": 329 + }, + { + "epoch": 3.3759590792838874, + "grad_norm": 0.1263289266506516, + "learning_rate": 4.491879537409211e-05, + "loss": 1.0242, + "step": 330 + }, + { + "epoch": 3.3861892583120206, + "grad_norm": 0.1316142612111793, + "learning_rate": 4.4401651054667274e-05, + "loss": 1.0078, + "step": 331 + }, + { + "epoch": 3.3964194373401533, + "grad_norm": 0.10490352230994067, + "learning_rate": 4.3886354959830625e-05, + "loss": 1.0141, + "step": 332 + }, + { + "epoch": 3.4066496163682864, + "grad_norm": 0.1587962681364766, + "learning_rate": 4.3372933843152e-05, + "loss": 1.031, + "step": 333 + }, + { + "epoch": 3.4168797953964196, + "grad_norm": 0.14806873434228535, + "learning_rate": 4.2861414360854387e-05, + "loss": 1.0261, + "step": 334 + }, + { + "epoch": 3.4271099744245523, + "grad_norm": 0.12428110117787702, + "learning_rate": 4.2351823070430376e-05, + "loss": 1.0191, + "step": 335 + }, + { + "epoch": 3.4373401534526855, + "grad_norm": 0.1700524980071232, + "learning_rate": 4.184418642926289e-05, + "loss": 1.0267, + "step": 336 + }, + { + "epoch": 3.4475703324808182, + "grad_norm": 0.15240023162277883, + "learning_rate": 4.133853079325196e-05, + "loss": 1.025, + "step": 337 + }, + { + "epoch": 3.4578005115089514, + "grad_norm": 0.11788415451483955, + "learning_rate": 4.083488241544595e-05, + "loss": 1.0459, + "step": 338 + }, + { + "epoch": 3.4680306905370846, + "grad_norm": 0.16921030624641467, + "learning_rate": 4.033326744467882e-05, + "loss": 1.0112, + "step": 339 + }, + { + "epoch": 3.4782608695652173, + "grad_norm": 0.11606787523018822, + "learning_rate": 3.983371192421246e-05, + "loss": 1.0306, + "step": 340 + }, + { + "epoch": 3.4884910485933505, + "grad_norm": 0.1682374876165679, + "learning_rate": 3.933624179038446e-05, + "loss": 1.0185, + "step": 341 + }, + { + "epoch": 3.498721227621483, + "grad_norm": 0.1565962574632331, + "learning_rate": 3.884088287126151e-05, + "loss": 1.0293, + "step": 342 + }, + { + "epoch": 3.5089514066496164, + "grad_norm": 0.14324776346066015, + "learning_rate": 3.834766088529867e-05, + "loss": 1.0458, + "step": 343 + }, + { + "epoch": 3.5191815856777495, + "grad_norm": 0.19529159534261803, + "learning_rate": 3.785660144000378e-05, + "loss": 1.0056, + "step": 344 + }, + { + "epoch": 3.5294117647058822, + "grad_norm": 0.12801441909483788, + "learning_rate": 3.736773003060821e-05, + "loss": 1.0297, + "step": 345 + }, + { + "epoch": 3.5396419437340154, + "grad_norm": 0.1503941568635443, + "learning_rate": 3.688107203874301e-05, + "loss": 1.0416, + "step": 346 + }, + { + "epoch": 3.5498721227621486, + "grad_norm": 0.13361192505091346, + "learning_rate": 3.6396652731121136e-05, + "loss": 1.0204, + "step": 347 + }, + { + "epoch": 3.5601023017902813, + "grad_norm": 0.12402122906765294, + "learning_rate": 3.5914497258225815e-05, + "loss": 1.0281, + "step": 348 + }, + { + "epoch": 3.5703324808184145, + "grad_norm": 0.1370545977754261, + "learning_rate": 3.543463065300452e-05, + "loss": 1.0271, + "step": 349 + }, + { + "epoch": 3.580562659846547, + "grad_norm": 0.10522140449656789, + "learning_rate": 3.49570778295694e-05, + "loss": 1.0253, + "step": 350 + }, + { + "epoch": 3.5907928388746804, + "grad_norm": 0.13288330038665777, + "learning_rate": 3.448186358190383e-05, + "loss": 1.0155, + "step": 351 + }, + { + "epoch": 3.601023017902813, + "grad_norm": 0.11850947440760301, + "learning_rate": 3.400901258257501e-05, + "loss": 1.0316, + "step": 352 + }, + { + "epoch": 3.6112531969309463, + "grad_norm": 0.1207639690615278, + "learning_rate": 3.3538549381453046e-05, + "loss": 1.0147, + "step": 353 + }, + { + "epoch": 3.6214833759590794, + "grad_norm": 0.10436214175934275, + "learning_rate": 3.307049840443644e-05, + "loss": 1.0158, + "step": 354 + }, + { + "epoch": 3.631713554987212, + "grad_norm": 0.10939591730050287, + "learning_rate": 3.2604883952183716e-05, + "loss": 1.0219, + "step": 355 + }, + { + "epoch": 3.6419437340153453, + "grad_norm": 0.10305381344145671, + "learning_rate": 3.214173019885202e-05, + "loss": 1.0165, + "step": 356 + }, + { + "epoch": 3.6521739130434785, + "grad_norm": 0.10617153493263774, + "learning_rate": 3.1681061190841806e-05, + "loss": 1.0193, + "step": 357 + }, + { + "epoch": 3.662404092071611, + "grad_norm": 0.1052450380146473, + "learning_rate": 3.122290084554845e-05, + "loss": 1.0309, + "step": 358 + }, + { + "epoch": 3.6726342710997444, + "grad_norm": 0.09779940640870793, + "learning_rate": 3.076727295012059e-05, + "loss": 1.0106, + "step": 359 + }, + { + "epoch": 3.682864450127877, + "grad_norm": 0.09865111890866658, + "learning_rate": 3.031420116022493e-05, + "loss": 1.0237, + "step": 360 + }, + { + "epoch": 3.6930946291560103, + "grad_norm": 0.09761460812236314, + "learning_rate": 2.98637089988182e-05, + "loss": 1.0071, + "step": 361 + }, + { + "epoch": 3.703324808184143, + "grad_norm": 0.09657195902847425, + "learning_rate": 2.94158198549259e-05, + "loss": 1.0213, + "step": 362 + }, + { + "epoch": 3.713554987212276, + "grad_norm": 0.10306766572443878, + "learning_rate": 2.8970556982427836e-05, + "loss": 1.0114, + "step": 363 + }, + { + "epoch": 3.7237851662404093, + "grad_norm": 0.0892256925639781, + "learning_rate": 2.852794349885087e-05, + "loss": 1.0141, + "step": 364 + }, + { + "epoch": 3.734015345268542, + "grad_norm": 0.0984967903902541, + "learning_rate": 2.8088002384168783e-05, + "loss": 1.0309, + "step": 365 + }, + { + "epoch": 3.7442455242966752, + "grad_norm": 0.09702833968633048, + "learning_rate": 2.765075647960898e-05, + "loss": 1.0133, + "step": 366 + }, + { + "epoch": 3.7544757033248084, + "grad_norm": 0.10259443786625837, + "learning_rate": 2.7216228486466856e-05, + "loss": 1.0158, + "step": 367 + }, + { + "epoch": 3.764705882352941, + "grad_norm": 0.09444360606393558, + "learning_rate": 2.678444096492683e-05, + "loss": 1.02, + "step": 368 + }, + { + "epoch": 3.7749360613810743, + "grad_norm": 0.11306816830082422, + "learning_rate": 2.6355416332891404e-05, + "loss": 1.0185, + "step": 369 + }, + { + "epoch": 3.785166240409207, + "grad_norm": 0.09142683583600977, + "learning_rate": 2.592917686481708e-05, + "loss": 1.0038, + "step": 370 + }, + { + "epoch": 3.79539641943734, + "grad_norm": 0.10949950748671738, + "learning_rate": 2.5505744690557846e-05, + "loss": 1.0376, + "step": 371 + }, + { + "epoch": 3.805626598465473, + "grad_norm": 0.11343467361764166, + "learning_rate": 2.508514179421629e-05, + "loss": 1.0358, + "step": 372 + }, + { + "epoch": 3.815856777493606, + "grad_norm": 0.09342791259699781, + "learning_rate": 2.4667390013002254e-05, + "loss": 1.0211, + "step": 373 + }, + { + "epoch": 3.8260869565217392, + "grad_norm": 0.10858137240897216, + "learning_rate": 2.425251103609898e-05, + "loss": 1.0332, + "step": 374 + }, + { + "epoch": 3.836317135549872, + "grad_norm": 0.0886186909107238, + "learning_rate": 2.3840526403537095e-05, + "loss": 1.0143, + "step": 375 + }, + { + "epoch": 3.846547314578005, + "grad_norm": 0.09895029034827527, + "learning_rate": 2.3431457505076205e-05, + "loss": 1.0173, + "step": 376 + }, + { + "epoch": 3.8567774936061383, + "grad_norm": 0.10546368891288044, + "learning_rate": 2.3025325579094498e-05, + "loss": 1.0362, + "step": 377 + }, + { + "epoch": 3.867007672634271, + "grad_norm": 0.08194245174545557, + "learning_rate": 2.2622151711485962e-05, + "loss": 1.0124, + "step": 378 + }, + { + "epoch": 3.877237851662404, + "grad_norm": 0.1114016593112589, + "learning_rate": 2.2221956834565647e-05, + "loss": 1.0139, + "step": 379 + }, + { + "epoch": 3.887468030690537, + "grad_norm": 0.09816527002580688, + "learning_rate": 2.1824761725982874e-05, + "loss": 1.0523, + "step": 380 + }, + { + "epoch": 3.89769820971867, + "grad_norm": 0.0799950179310954, + "learning_rate": 2.1430587007642513e-05, + "loss": 0.991, + "step": 381 + }, + { + "epoch": 3.907928388746803, + "grad_norm": 0.1080492642683735, + "learning_rate": 2.1039453144634364e-05, + "loss": 1.0355, + "step": 382 + }, + { + "epoch": 3.918158567774936, + "grad_norm": 0.09178099069537385, + "learning_rate": 2.0651380444170527e-05, + "loss": 1.015, + "step": 383 + }, + { + "epoch": 3.928388746803069, + "grad_norm": 0.09031457518884235, + "learning_rate": 2.026638905453111e-05, + "loss": 1.0229, + "step": 384 + }, + { + "epoch": 3.938618925831202, + "grad_norm": 0.09836817148107584, + "learning_rate": 1.9884498964018233e-05, + "loss": 1.0135, + "step": 385 + }, + { + "epoch": 3.948849104859335, + "grad_norm": 0.08002277345563297, + "learning_rate": 1.9505729999918194e-05, + "loss": 1.0044, + "step": 386 + }, + { + "epoch": 3.959079283887468, + "grad_norm": 0.08738866259057297, + "learning_rate": 1.913010182747196e-05, + "loss": 1.0103, + "step": 387 + }, + { + "epoch": 3.969309462915601, + "grad_norm": 0.08459856390870199, + "learning_rate": 1.875763394885441e-05, + "loss": 1.0491, + "step": 388 + }, + { + "epoch": 3.979539641943734, + "grad_norm": 0.07894640689767103, + "learning_rate": 1.8388345702161556e-05, + "loss": 1.0355, + "step": 389 + }, + { + "epoch": 3.9897698209718673, + "grad_norm": 0.07643936790355793, + "learning_rate": 1.8022256260406756e-05, + "loss": 1.021, + "step": 390 + }, + { + "epoch": 4.0, + "grad_norm": 0.09661289153754662, + "learning_rate": 1.765938463052506e-05, + "loss": 1.288, + "step": 391 + }, + { + "epoch": 4.010230179028133, + "grad_norm": 0.08762181265193057, + "learning_rate": 1.729974965238651e-05, + "loss": 1.001, + "step": 392 + }, + { + "epoch": 4.020460358056266, + "grad_norm": 0.08647609139764888, + "learning_rate": 1.6943369997818066e-05, + "loss": 1.015, + "step": 393 + }, + { + "epoch": 4.030690537084399, + "grad_norm": 0.08788066772345464, + "learning_rate": 1.659026416963401e-05, + "loss": 1.0076, + "step": 394 + }, + { + "epoch": 4.040920716112532, + "grad_norm": 0.07750381562018518, + "learning_rate": 1.6240450500675393e-05, + "loss": 1.0148, + "step": 395 + }, + { + "epoch": 4.051150895140665, + "grad_norm": 0.07635694926329481, + "learning_rate": 1.5893947152858285e-05, + "loss": 1.0016, + "step": 396 + }, + { + "epoch": 4.061381074168798, + "grad_norm": 0.08148076505063037, + "learning_rate": 1.55507721162307e-05, + "loss": 1.0043, + "step": 397 + }, + { + "epoch": 4.071611253196931, + "grad_norm": 0.07751240865988411, + "learning_rate": 1.5210943208038634e-05, + "loss": 1.0288, + "step": 398 + }, + { + "epoch": 4.081841432225064, + "grad_norm": 0.07904341557445363, + "learning_rate": 1.4874478071801055e-05, + "loss": 1.0302, + "step": 399 + }, + { + "epoch": 4.092071611253197, + "grad_norm": 0.08036549522985713, + "learning_rate": 1.454139417639377e-05, + "loss": 1.0021, + "step": 400 + }, + { + "epoch": 4.10230179028133, + "grad_norm": 0.07597724481740194, + "learning_rate": 1.4211708815142599e-05, + "loss": 1.008, + "step": 401 + }, + { + "epoch": 4.112531969309463, + "grad_norm": 0.07493385254201147, + "learning_rate": 1.3885439104925387e-05, + "loss": 1.0082, + "step": 402 + }, + { + "epoch": 4.122762148337596, + "grad_norm": 0.08492760833944613, + "learning_rate": 1.3562601985283358e-05, + "loss": 1.0103, + "step": 403 + }, + { + "epoch": 4.132992327365729, + "grad_norm": 0.0819518128021678, + "learning_rate": 1.3243214217541751e-05, + "loss": 1.0186, + "step": 404 + }, + { + "epoch": 4.143222506393862, + "grad_norm": 0.07813515029694298, + "learning_rate": 1.2927292383939407e-05, + "loss": 1.0103, + "step": 405 + }, + { + "epoch": 4.153452685421995, + "grad_norm": 0.07571122126240162, + "learning_rate": 1.2614852886767932e-05, + "loss": 1.0172, + "step": 406 + }, + { + "epoch": 4.163682864450128, + "grad_norm": 0.07867401677851067, + "learning_rate": 1.2305911947520159e-05, + "loss": 1.0172, + "step": 407 + }, + { + "epoch": 4.173913043478261, + "grad_norm": 0.07427552294515202, + "learning_rate": 1.2000485606047837e-05, + "loss": 1.0254, + "step": 408 + }, + { + "epoch": 4.1841432225063935, + "grad_norm": 0.07918066825847282, + "learning_rate": 1.1698589719728911e-05, + "loss": 1.025, + "step": 409 + }, + { + "epoch": 4.194373401534527, + "grad_norm": 0.0745101976937812, + "learning_rate": 1.1400239962644294e-05, + "loss": 1.011, + "step": 410 + }, + { + "epoch": 4.20460358056266, + "grad_norm": 0.07635541094438233, + "learning_rate": 1.1105451824763933e-05, + "loss": 1.0064, + "step": 411 + }, + { + "epoch": 4.2148337595907925, + "grad_norm": 0.07409705314572482, + "learning_rate": 1.0814240611142765e-05, + "loss": 1.0049, + "step": 412 + }, + { + "epoch": 4.225063938618926, + "grad_norm": 0.07875185704588439, + "learning_rate": 1.0526621441125946e-05, + "loss": 1.0039, + "step": 413 + }, + { + "epoch": 4.235294117647059, + "grad_norm": 0.07576883769832815, + "learning_rate": 1.0242609247563924e-05, + "loss": 1.0204, + "step": 414 + }, + { + "epoch": 4.245524296675192, + "grad_norm": 0.07808157523983991, + "learning_rate": 9.962218776037234e-06, + "loss": 1.0178, + "step": 415 + }, + { + "epoch": 4.255754475703325, + "grad_norm": 0.0706208132598418, + "learning_rate": 9.68546458409077e-06, + "loss": 1.0144, + "step": 416 + }, + { + "epoch": 4.265984654731458, + "grad_norm": 0.0816397013567019, + "learning_rate": 9.41236104047806e-06, + "loss": 1.0008, + "step": 417 + }, + { + "epoch": 4.276214833759591, + "grad_norm": 0.07454955996130302, + "learning_rate": 9.14292232441528e-06, + "loss": 1.0102, + "step": 418 + }, + { + "epoch": 4.286445012787723, + "grad_norm": 0.0715010447940489, + "learning_rate": 8.877162424845012e-06, + "loss": 0.997, + "step": 419 + }, + { + "epoch": 4.296675191815857, + "grad_norm": 0.07752947453647589, + "learning_rate": 8.615095139710044e-06, + "loss": 1.0204, + "step": 420 + }, + { + "epoch": 4.30690537084399, + "grad_norm": 0.07999927748669688, + "learning_rate": 8.356734075236858e-06, + "loss": 1.0286, + "step": 421 + }, + { + "epoch": 4.3171355498721224, + "grad_norm": 0.07470822337555567, + "learning_rate": 8.102092645229392e-06, + "loss": 0.9999, + "step": 422 + }, + { + "epoch": 4.327365728900256, + "grad_norm": 0.06986020359409462, + "learning_rate": 7.8511840703725e-06, + "loss": 1.0157, + "step": 423 + }, + { + "epoch": 4.337595907928389, + "grad_norm": 0.07546615544304432, + "learning_rate": 7.604021377545518e-06, + "loss": 1.0177, + "step": 424 + }, + { + "epoch": 4.3478260869565215, + "grad_norm": 0.07303012390571562, + "learning_rate": 7.36061739914601e-06, + "loss": 1.0053, + "step": 425 + }, + { + "epoch": 4.358056265984655, + "grad_norm": 0.07983971604823199, + "learning_rate": 7.120984772423507e-06, + "loss": 1.0116, + "step": 426 + }, + { + "epoch": 4.368286445012788, + "grad_norm": 0.07412392301940252, + "learning_rate": 6.88513593882334e-06, + "loss": 1.0157, + "step": 427 + }, + { + "epoch": 4.378516624040921, + "grad_norm": 0.07540589914642663, + "learning_rate": 6.653083143340748e-06, + "loss": 1.0321, + "step": 428 + }, + { + "epoch": 4.388746803069053, + "grad_norm": 0.0734695901154156, + "learning_rate": 6.4248384338851146e-06, + "loss": 1.0166, + "step": 429 + }, + { + "epoch": 4.398976982097187, + "grad_norm": 0.07148917346240428, + "learning_rate": 6.2004136606544515e-06, + "loss": 1.0155, + "step": 430 + }, + { + "epoch": 4.40920716112532, + "grad_norm": 0.06576015271509353, + "learning_rate": 5.979820475520202e-06, + "loss": 1.0268, + "step": 431 + }, + { + "epoch": 4.419437340153452, + "grad_norm": 0.06602815168374586, + "learning_rate": 5.763070331422151e-06, + "loss": 1.0094, + "step": 432 + }, + { + "epoch": 4.429667519181586, + "grad_norm": 0.06806818912226788, + "learning_rate": 5.550174481773969e-06, + "loss": 1.0117, + "step": 433 + }, + { + "epoch": 4.439897698209719, + "grad_norm": 0.0654860198424932, + "learning_rate": 5.341143979878851e-06, + "loss": 1.024, + "step": 434 + }, + { + "epoch": 4.450127877237851, + "grad_norm": 0.06347915481210827, + "learning_rate": 5.135989678355664e-06, + "loss": 1.0068, + "step": 435 + }, + { + "epoch": 4.460358056265985, + "grad_norm": 0.06673007006524383, + "learning_rate": 4.934722228575481e-06, + "loss": 1.0144, + "step": 436 + }, + { + "epoch": 4.470588235294118, + "grad_norm": 0.06276490466658945, + "learning_rate": 4.7373520801085705e-06, + "loss": 1.0149, + "step": 437 + }, + { + "epoch": 4.4808184143222505, + "grad_norm": 0.06462800206504453, + "learning_rate": 4.543889480181944e-06, + "loss": 1.0209, + "step": 438 + }, + { + "epoch": 4.491048593350383, + "grad_norm": 0.06638702716213918, + "learning_rate": 4.354344473147194e-06, + "loss": 1.0229, + "step": 439 + }, + { + "epoch": 4.501278772378517, + "grad_norm": 0.06786708229119566, + "learning_rate": 4.1687268999591164e-06, + "loss": 1.0093, + "step": 440 + }, + { + "epoch": 4.5115089514066495, + "grad_norm": 0.06344929556504937, + "learning_rate": 3.98704639766474e-06, + "loss": 1.0227, + "step": 441 + }, + { + "epoch": 4.521739130434782, + "grad_norm": 0.061257597771025976, + "learning_rate": 3.809312398903e-06, + "loss": 1.0206, + "step": 442 + }, + { + "epoch": 4.531969309462916, + "grad_norm": 0.06207018671567817, + "learning_rate": 3.6355341314149216e-06, + "loss": 1.0061, + "step": 443 + }, + { + "epoch": 4.542199488491049, + "grad_norm": 0.0697177559048247, + "learning_rate": 3.465720617564676e-06, + "loss": 1.0001, + "step": 444 + }, + { + "epoch": 4.552429667519181, + "grad_norm": 0.06909570953584229, + "learning_rate": 3.299880673871023e-06, + "loss": 1.0179, + "step": 445 + }, + { + "epoch": 4.562659846547315, + "grad_norm": 0.0660073639280078, + "learning_rate": 3.138022910549632e-06, + "loss": 1.0261, + "step": 446 + }, + { + "epoch": 4.572890025575448, + "grad_norm": 0.061208022344978574, + "learning_rate": 2.980155731066017e-06, + "loss": 0.9983, + "step": 447 + }, + { + "epoch": 4.58312020460358, + "grad_norm": 0.05925952496152496, + "learning_rate": 2.8262873316992556e-06, + "loss": 1.0232, + "step": 448 + }, + { + "epoch": 4.593350383631714, + "grad_norm": 0.06402981619720209, + "learning_rate": 2.676425701116463e-06, + "loss": 1.0065, + "step": 449 + }, + { + "epoch": 4.603580562659847, + "grad_norm": 0.058782758741933706, + "learning_rate": 2.530578619957993e-06, + "loss": 1.0117, + "step": 450 + }, + { + "epoch": 4.6138107416879794, + "grad_norm": 0.05836448954212346, + "learning_rate": 2.3887536604334784e-06, + "loss": 0.9904, + "step": 451 + }, + { + "epoch": 4.624040920716112, + "grad_norm": 0.05866978691158495, + "learning_rate": 2.2509581859287576e-06, + "loss": 1.018, + "step": 452 + }, + { + "epoch": 4.634271099744246, + "grad_norm": 0.05897537411295091, + "learning_rate": 2.117199350623462e-06, + "loss": 1.0224, + "step": 453 + }, + { + "epoch": 4.6445012787723785, + "grad_norm": 0.0576826219117585, + "learning_rate": 1.987484099119712e-06, + "loss": 1.0256, + "step": 454 + }, + { + "epoch": 4.654731457800511, + "grad_norm": 0.058074308794142736, + "learning_rate": 1.8618191660814356e-06, + "loss": 1.0126, + "step": 455 + }, + { + "epoch": 4.664961636828645, + "grad_norm": 0.057718766623449665, + "learning_rate": 1.7402110758847834e-06, + "loss": 1.0064, + "step": 456 + }, + { + "epoch": 4.675191815856778, + "grad_norm": 0.055268606367167225, + "learning_rate": 1.6226661422794033e-06, + "loss": 1.0015, + "step": 457 + }, + { + "epoch": 4.68542199488491, + "grad_norm": 0.06120460420865381, + "learning_rate": 1.5091904680605862e-06, + "loss": 1.0195, + "step": 458 + }, + { + "epoch": 4.695652173913043, + "grad_norm": 0.057198583474872174, + "learning_rate": 1.3997899447524277e-06, + "loss": 1.0313, + "step": 459 + }, + { + "epoch": 4.705882352941177, + "grad_norm": 0.054125320926830395, + "learning_rate": 1.294470252302009e-06, + "loss": 1.0074, + "step": 460 + }, + { + "epoch": 4.716112531969309, + "grad_norm": 0.05767289138418469, + "learning_rate": 1.193236858784408e-06, + "loss": 1.0073, + "step": 461 + }, + { + "epoch": 4.726342710997442, + "grad_norm": 0.058374045513090105, + "learning_rate": 1.0960950201188524e-06, + "loss": 1.0217, + "step": 462 + }, + { + "epoch": 4.736572890025576, + "grad_norm": 0.05554209835620826, + "learning_rate": 1.003049779795866e-06, + "loss": 1.0167, + "step": 463 + }, + { + "epoch": 4.746803069053708, + "grad_norm": 0.06184311865928885, + "learning_rate": 9.141059686153419e-07, + "loss": 1.0207, + "step": 464 + }, + { + "epoch": 4.757033248081841, + "grad_norm": 0.056079764830628674, + "learning_rate": 8.292682044358114e-07, + "loss": 1.0169, + "step": 465 + }, + { + "epoch": 4.767263427109975, + "grad_norm": 0.05903827365785868, + "learning_rate": 7.485408919346171e-07, + "loss": 1.0276, + "step": 466 + }, + { + "epoch": 4.7774936061381075, + "grad_norm": 0.05665451559441624, + "learning_rate": 6.719282223793056e-07, + "loss": 1.0108, + "step": 467 + }, + { + "epoch": 4.78772378516624, + "grad_norm": 0.05541913708216884, + "learning_rate": 5.994341734099429e-07, + "loss": 1.0213, + "step": 468 + }, + { + "epoch": 4.797953964194374, + "grad_norm": 0.0531727158843823, + "learning_rate": 5.310625088326671e-07, + "loss": 0.9962, + "step": 469 + }, + { + "epoch": 4.8081841432225065, + "grad_norm": 0.056474278323680495, + "learning_rate": 4.6681677842421724e-07, + "loss": 1.0079, + "step": 470 + }, + { + "epoch": 4.818414322250639, + "grad_norm": 0.05529573865410167, + "learning_rate": 4.067003177476991e-07, + "loss": 1.0025, + "step": 471 + }, + { + "epoch": 4.828644501278772, + "grad_norm": 0.055929732769464856, + "learning_rate": 3.507162479793369e-07, + "loss": 1.0173, + "step": 472 + }, + { + "epoch": 4.838874680306906, + "grad_norm": 0.0546056253906124, + "learning_rate": 2.9886747574646936e-07, + "loss": 1.0001, + "step": 473 + }, + { + "epoch": 4.849104859335038, + "grad_norm": 0.05682769159587846, + "learning_rate": 2.511566929766396e-07, + "loss": 1.0062, + "step": 474 + }, + { + "epoch": 4.859335038363171, + "grad_norm": 0.05365397819606409, + "learning_rate": 2.075863767577957e-07, + "loss": 1.0195, + "step": 475 + }, + { + "epoch": 4.869565217391305, + "grad_norm": 0.05538514973032816, + "learning_rate": 1.681587892097536e-07, + "loss": 1.0159, + "step": 476 + }, + { + "epoch": 4.879795396419437, + "grad_norm": 0.05391134086338163, + "learning_rate": 1.3287597736667323e-07, + "loss": 1.0233, + "step": 477 + }, + { + "epoch": 4.89002557544757, + "grad_norm": 0.054055617154126184, + "learning_rate": 1.0173977307082361e-07, + "loss": 1.0188, + "step": 478 + }, + { + "epoch": 4.900255754475703, + "grad_norm": 0.05372397484026782, + "learning_rate": 7.475179287748547e-08, + "loss": 1.0235, + "step": 479 + }, + { + "epoch": 4.910485933503836, + "grad_norm": 0.054794275718811285, + "learning_rate": 5.191343797096515e-08, + "loss": 1.0018, + "step": 480 + }, + { + "epoch": 4.920716112531969, + "grad_norm": 0.05350058384849903, + "learning_rate": 3.322589409190613e-08, + "loss": 1.009, + "step": 481 + }, + { + "epoch": 4.930946291560103, + "grad_norm": 0.05480646007195536, + "learning_rate": 1.8690131475711527e-08, + "loss": 1.0273, + "step": 482 + }, + { + "epoch": 4.9411764705882355, + "grad_norm": 0.05416557096319236, + "learning_rate": 8.306904802148907e-09, + "loss": 1.0322, + "step": 483 + }, + { + "epoch": 4.951406649616368, + "grad_norm": 0.053492193014158126, + "learning_rate": 2.07675315618161e-09, + "loss": 1.0035, + "step": 484 + }, + { + "epoch": 4.961636828644501, + "grad_norm": 0.055105663696246775, + "learning_rate": 0.0, + "loss": 1.0124, + "step": 485 + }, + { + "epoch": 4.961636828644501, + "step": 485, + "total_flos": 7065760181780480.0, + "train_loss": 1.075610858509221, + "train_runtime": 69151.8205, + "train_samples_per_second": 7.23, + "train_steps_per_second": 0.007 + } + ], + "logging_steps": 1.0, + "max_steps": 485, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 7065760181780480.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..8775f18 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f53657f362937f57b3c763e3a8626ec8db0ce2d9eb097eb6b0bc915a57bb83e +size 7224 diff --git a/training_loss.png b/training_loss.png new file mode 100644 index 0000000..a621b07 Binary files /dev/null and b/training_loss.png differ diff --git a/vocab.json b/vocab.json new file mode 100644 index 0000000..6c49fc6 --- /dev/null +++ b/vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910 +size 2776833