commit f11041f846b537920a397af4e061c9f696e1931c Author: ModelHub XC Date: Wed Jun 3 23:19:11 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: DaDing777/qwen2.5-VL-3B-atm-finetune-cot-full Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..15af783 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,54 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text + + +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zstandard filter=lfs diff=lfs merge=lfs -text +*.tfevents* filter=lfs diff=lfs merge=lfs -text +*.db* filter=lfs diff=lfs merge=lfs -text +*.ark* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text + +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.gguf* filter=lfs diff=lfs merge=lfs -text +*.ggml filter=lfs diff=lfs merge=lfs -text +*.llamafile* filter=lfs diff=lfs merge=lfs -text +*.pt2 filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text + +tokenizer.json filter=lfs diff=lfs merge=lfs -text +model-00001-of-00002.safetensors filter=lfs diff=lfs merge=lfs -text +training_args.bin filter=lfs diff=lfs merge=lfs -text +vocab.json filter=lfs diff=lfs merge=lfs -text +merges.txt filter=lfs diff=lfs merge=lfs -text +model-00002-of-00002.safetensors filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..6863933 --- /dev/null +++ b/README.md @@ -0,0 +1,61 @@ +--- +library_name: transformers +license: other +base_model: /home/student2025/dch/Qwen2.5-VL-3B-Instruct +tags: +- llama-factory +- full +- generated_from_trainer +model-index: +- name: Qwen2.5-VL-3B-Instruct-cot-full + results: [] +--- + + + +# Qwen2.5-VL-3B-Instruct-cot-full + +This model is a fine-tuned version of [/home/student2025/dch/Qwen2.5-VL-3B-Instruct](https://huggingface.co//home/student2025/dch/Qwen2.5-VL-3B-Instruct) on the atm_finetune_cot dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 1e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 32 +- optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.1 +- num_epochs: 1.0 + +### Training results + + + +### Framework versions + +- Transformers 4.52.4 +- Pytorch 2.6.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.1 diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000..482ced4 --- /dev/null +++ b/added_tokens.json @@ -0,0 +1,24 @@ +{ + "": 151658, + "": 151657, + "<|box_end|>": 151649, + "<|box_start|>": 151648, + "<|endoftext|>": 151643, + "<|file_sep|>": 151664, + "<|fim_middle|>": 151660, + "<|fim_pad|>": 151662, + "<|fim_prefix|>": 151659, + "<|fim_suffix|>": 151661, + "<|im_end|>": 151645, + "<|im_start|>": 151644, + "<|image_pad|>": 151655, + "<|object_ref_end|>": 151647, + "<|object_ref_start|>": 151646, + "<|quad_end|>": 151651, + "<|quad_start|>": 151650, + "<|repo_name|>": 151663, + "<|video_pad|>": 151656, + "<|vision_end|>": 151653, + "<|vision_pad|>": 151654, + "<|vision_start|>": 151652 +} diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..54e5cda --- /dev/null +++ b/all_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 1.0, + "total_flos": 669099333058560.0, + "train_loss": 0.32129256987551813, + "train_runtime": 70988.8215, + "train_samples_per_second": 0.742, + "train_steps_per_second": 0.023 +} \ No newline at end of file diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..6c22663 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,7 @@ +{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system +You are a helpful assistant.<|im_end|> +{% endif %}<|im_start|>{{ message['role'] }} +{% if message['content'] is string %}{{ message['content'] }}<|im_end|> +{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|> +{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant +{% endif %} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..6def60c --- /dev/null +++ b/config.json @@ -0,0 +1,105 @@ +{ + "architectures": [ + "Qwen2_5_VLForConditionalGeneration" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "eos_token_id": 151645, + "hidden_act": "silu", + "hidden_size": 2048, + "image_token_id": 151655, + "initializer_range": 0.02, + "intermediate_size": 11008, + "max_position_embeddings": 128000, + "max_window_layers": 70, + "model_type": "qwen2_5_vl", + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "mrope_section": [ + 16, + 24, + 24 + ], + "rope_type": "default", + "type": "default" + }, + "rope_theta": 1000000.0, + "sliding_window": 32768, + "text_config": { + "architectures": [ + "Qwen2_5_VLForConditionalGeneration" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "eos_token_id": 151645, + "hidden_act": "silu", + "hidden_size": 2048, + "image_token_id": null, + "initializer_range": 0.02, + "intermediate_size": 11008, + "max_position_embeddings": 128000, + "max_window_layers": 70, + "model_type": "qwen2_5_vl_text", + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "mrope_section": [ + 16, + 24, + 24 + ], + "rope_type": "default", + "type": "default" + }, + "rope_theta": 1000000.0, + "sliding_window": 32768, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "use_cache": false, + "use_sliding_window": false, + "video_token_id": null, + "vision_end_token_id": 151653, + "vision_start_token_id": 151652, + "vision_token_id": 151654, + "vocab_size": 151936 + }, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": false, + "use_sliding_window": false, + "video_token_id": 151656, + "vision_config": { + "depth": 32, + "fullatt_block_indexes": [ + 7, + 15, + 23, + 31 + ], + "hidden_act": "silu", + "hidden_size": 1280, + "in_channels": 3, + "in_chans": 3, + "initializer_range": 0.02, + "intermediate_size": 3420, + "model_type": "qwen2_5_vl", + "num_heads": 16, + "out_hidden_size": 2048, + "patch_size": 14, + "spatial_merge_size": 2, + "spatial_patch_size": 14, + "temporal_patch_size": 2, + "tokens_per_second": 2, + "torch_dtype": "float32", + "window_size": 112 + }, + "vision_end_token_id": 151653, + "vision_start_token_id": 151652, + "vision_token_id": 151654, + "vocab_size": 151936 +} diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..3a6d425 --- /dev/null +++ b/configuration.json @@ -0,0 +1 @@ +{"framework":"Pytorch","task":"image-text-to-text"} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..c110271 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,12 @@ +{ + "bos_token_id": 151643, + "do_sample": true, + "eos_token_id": [ + 151645, + 151643 + ], + "pad_token_id": 151643, + "repetition_penalty": 1.05, + "temperature": 1e-06, + "transformers_version": "4.52.4" +} diff --git a/merges.txt b/merges.txt new file mode 100644 index 0000000..80c1a19 --- /dev/null +++ b/merges.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5 +size 1671853 diff --git a/model-00001-of-00002.safetensors b/model-00001-of-00002.safetensors new file mode 100644 index 0000000..aee3c2f --- /dev/null +++ b/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:432b56b44eff6f252f9964a986375d546acab6feba366384d96c4bbf1a504d33 +size 4997750760 diff --git a/model-00002-of-00002.safetensors b/model-00002-of-00002.safetensors new file mode 100644 index 0000000..0075fe6 --- /dev/null +++ b/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:051960960d6febce048df2c1d91f5ffc0121d4b4add1336f0ac6cf957eea18bf +size 2511587184 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..97a2999 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,831 @@ +{ + "metadata": { + "total_size": 7509245952 + }, + "weight_map": { + "model.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.19.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.20.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.20.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.20.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.28.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.28.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.28.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.28.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.29.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.29.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.29.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.29.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.30.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.30.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.30.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.30.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.31.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.31.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.31.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.31.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.32.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.32.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.32.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.32.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.32.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.32.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.32.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.33.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.33.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.33.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.33.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.33.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.33.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.33.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.34.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.34.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.34.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.34.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.34.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.34.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.34.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.35.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.35.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.35.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.35.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.35.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.35.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.35.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.k_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.norm.weight": "model-00002-of-00002.safetensors", + "visual.blocks.0.attn.proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.0.attn.proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.0.attn.qkv.bias": "model-00001-of-00002.safetensors", + "visual.blocks.0.attn.qkv.weight": "model-00001-of-00002.safetensors", + "visual.blocks.0.mlp.down_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.0.mlp.gate_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.0.mlp.up_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.0.norm1.weight": "model-00001-of-00002.safetensors", + "visual.blocks.0.norm2.weight": "model-00001-of-00002.safetensors", + "visual.blocks.1.attn.proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.1.attn.proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.1.attn.qkv.bias": "model-00001-of-00002.safetensors", + "visual.blocks.1.attn.qkv.weight": "model-00001-of-00002.safetensors", + "visual.blocks.1.mlp.down_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.1.mlp.gate_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.1.mlp.up_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.1.norm1.weight": "model-00001-of-00002.safetensors", + "visual.blocks.1.norm2.weight": "model-00001-of-00002.safetensors", + "visual.blocks.10.attn.proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.10.attn.proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.10.attn.qkv.bias": "model-00001-of-00002.safetensors", + "visual.blocks.10.attn.qkv.weight": "model-00001-of-00002.safetensors", + "visual.blocks.10.mlp.down_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.10.mlp.gate_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.10.mlp.up_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.10.norm1.weight": "model-00001-of-00002.safetensors", + "visual.blocks.10.norm2.weight": "model-00001-of-00002.safetensors", + "visual.blocks.11.attn.proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.11.attn.proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.11.attn.qkv.bias": "model-00001-of-00002.safetensors", + "visual.blocks.11.attn.qkv.weight": "model-00001-of-00002.safetensors", + "visual.blocks.11.mlp.down_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.11.mlp.gate_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.11.mlp.up_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.11.norm1.weight": "model-00001-of-00002.safetensors", + "visual.blocks.11.norm2.weight": "model-00001-of-00002.safetensors", + "visual.blocks.12.attn.proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.12.attn.proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.12.attn.qkv.bias": "model-00001-of-00002.safetensors", + "visual.blocks.12.attn.qkv.weight": "model-00001-of-00002.safetensors", + "visual.blocks.12.mlp.down_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.12.mlp.gate_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.12.mlp.up_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.12.norm1.weight": "model-00001-of-00002.safetensors", + "visual.blocks.12.norm2.weight": "model-00001-of-00002.safetensors", + "visual.blocks.13.attn.proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.13.attn.proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.13.attn.qkv.bias": "model-00001-of-00002.safetensors", + "visual.blocks.13.attn.qkv.weight": "model-00001-of-00002.safetensors", + "visual.blocks.13.mlp.down_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.13.mlp.gate_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.13.mlp.up_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.13.norm1.weight": "model-00001-of-00002.safetensors", + "visual.blocks.13.norm2.weight": "model-00001-of-00002.safetensors", + "visual.blocks.14.attn.proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.14.attn.proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.14.attn.qkv.bias": "model-00001-of-00002.safetensors", + "visual.blocks.14.attn.qkv.weight": "model-00001-of-00002.safetensors", + "visual.blocks.14.mlp.down_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.14.mlp.gate_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.14.mlp.up_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.14.norm1.weight": "model-00001-of-00002.safetensors", + "visual.blocks.14.norm2.weight": "model-00001-of-00002.safetensors", + "visual.blocks.15.attn.proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.15.attn.proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.15.attn.qkv.bias": "model-00001-of-00002.safetensors", + "visual.blocks.15.attn.qkv.weight": "model-00001-of-00002.safetensors", + "visual.blocks.15.mlp.down_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.15.mlp.gate_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.15.mlp.up_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.15.norm1.weight": "model-00001-of-00002.safetensors", + "visual.blocks.15.norm2.weight": "model-00001-of-00002.safetensors", + "visual.blocks.16.attn.proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.16.attn.proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.16.attn.qkv.bias": "model-00001-of-00002.safetensors", + "visual.blocks.16.attn.qkv.weight": "model-00001-of-00002.safetensors", + "visual.blocks.16.mlp.down_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.16.mlp.gate_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.16.mlp.up_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.16.norm1.weight": "model-00001-of-00002.safetensors", + "visual.blocks.16.norm2.weight": "model-00001-of-00002.safetensors", + "visual.blocks.17.attn.proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.17.attn.proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.17.attn.qkv.bias": "model-00001-of-00002.safetensors", + "visual.blocks.17.attn.qkv.weight": "model-00001-of-00002.safetensors", + "visual.blocks.17.mlp.down_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.17.mlp.gate_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.17.mlp.up_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.17.norm1.weight": "model-00001-of-00002.safetensors", + "visual.blocks.17.norm2.weight": "model-00001-of-00002.safetensors", + "visual.blocks.18.attn.proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.18.attn.proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.18.attn.qkv.bias": "model-00001-of-00002.safetensors", + "visual.blocks.18.attn.qkv.weight": "model-00001-of-00002.safetensors", + "visual.blocks.18.mlp.down_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.18.mlp.gate_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.18.mlp.up_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.18.norm1.weight": "model-00001-of-00002.safetensors", + "visual.blocks.18.norm2.weight": "model-00001-of-00002.safetensors", + "visual.blocks.19.attn.proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.19.attn.proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.19.attn.qkv.bias": "model-00001-of-00002.safetensors", + "visual.blocks.19.attn.qkv.weight": "model-00001-of-00002.safetensors", + "visual.blocks.19.mlp.down_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.19.mlp.gate_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.19.mlp.up_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.19.norm1.weight": "model-00001-of-00002.safetensors", + "visual.blocks.19.norm2.weight": "model-00001-of-00002.safetensors", + "visual.blocks.2.attn.proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.2.attn.proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.2.attn.qkv.bias": "model-00001-of-00002.safetensors", + "visual.blocks.2.attn.qkv.weight": "model-00001-of-00002.safetensors", + "visual.blocks.2.mlp.down_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.2.mlp.gate_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.2.mlp.up_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.2.norm1.weight": "model-00001-of-00002.safetensors", + "visual.blocks.2.norm2.weight": "model-00001-of-00002.safetensors", + "visual.blocks.20.attn.proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.20.attn.proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.20.attn.qkv.bias": "model-00001-of-00002.safetensors", + "visual.blocks.20.attn.qkv.weight": "model-00001-of-00002.safetensors", + "visual.blocks.20.mlp.down_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.20.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.20.mlp.gate_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.20.mlp.up_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.20.norm1.weight": "model-00001-of-00002.safetensors", + "visual.blocks.20.norm2.weight": "model-00001-of-00002.safetensors", + "visual.blocks.21.attn.proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.21.attn.proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.21.attn.qkv.bias": "model-00001-of-00002.safetensors", + "visual.blocks.21.attn.qkv.weight": "model-00001-of-00002.safetensors", + "visual.blocks.21.mlp.down_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.21.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.21.mlp.gate_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.21.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.21.mlp.up_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.21.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.21.norm1.weight": "model-00001-of-00002.safetensors", + "visual.blocks.21.norm2.weight": "model-00001-of-00002.safetensors", + "visual.blocks.22.attn.proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.22.attn.proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.22.attn.qkv.bias": "model-00001-of-00002.safetensors", + "visual.blocks.22.attn.qkv.weight": "model-00001-of-00002.safetensors", + "visual.blocks.22.mlp.down_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.22.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.22.mlp.gate_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.22.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.22.mlp.up_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.22.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.22.norm1.weight": "model-00001-of-00002.safetensors", + "visual.blocks.22.norm2.weight": "model-00001-of-00002.safetensors", + "visual.blocks.23.attn.proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.23.attn.proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.23.attn.qkv.bias": "model-00001-of-00002.safetensors", + "visual.blocks.23.attn.qkv.weight": "model-00001-of-00002.safetensors", + "visual.blocks.23.mlp.down_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.23.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.23.mlp.gate_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.23.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.23.mlp.up_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.23.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.23.norm1.weight": "model-00001-of-00002.safetensors", + "visual.blocks.23.norm2.weight": "model-00001-of-00002.safetensors", + "visual.blocks.24.attn.proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.24.attn.proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.24.attn.qkv.bias": "model-00001-of-00002.safetensors", + "visual.blocks.24.attn.qkv.weight": "model-00001-of-00002.safetensors", + "visual.blocks.24.mlp.down_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.24.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.24.mlp.gate_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.24.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.24.mlp.up_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.24.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.24.norm1.weight": "model-00001-of-00002.safetensors", + "visual.blocks.24.norm2.weight": "model-00001-of-00002.safetensors", + "visual.blocks.25.attn.proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.25.attn.proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.25.attn.qkv.bias": "model-00001-of-00002.safetensors", + "visual.blocks.25.attn.qkv.weight": "model-00001-of-00002.safetensors", + "visual.blocks.25.mlp.down_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.25.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.25.mlp.gate_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.25.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.25.mlp.up_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.25.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.25.norm1.weight": "model-00001-of-00002.safetensors", + "visual.blocks.25.norm2.weight": "model-00001-of-00002.safetensors", + "visual.blocks.26.attn.proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.26.attn.proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.26.attn.qkv.bias": "model-00001-of-00002.safetensors", + "visual.blocks.26.attn.qkv.weight": "model-00001-of-00002.safetensors", + "visual.blocks.26.mlp.down_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.26.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.26.mlp.gate_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.26.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.26.mlp.up_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.26.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.26.norm1.weight": "model-00001-of-00002.safetensors", + "visual.blocks.26.norm2.weight": "model-00001-of-00002.safetensors", + "visual.blocks.27.attn.proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.27.attn.proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.27.attn.qkv.bias": "model-00001-of-00002.safetensors", + "visual.blocks.27.attn.qkv.weight": "model-00001-of-00002.safetensors", + "visual.blocks.27.mlp.down_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.27.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.27.mlp.gate_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.27.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.27.mlp.up_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.27.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.27.norm1.weight": "model-00001-of-00002.safetensors", + "visual.blocks.27.norm2.weight": "model-00001-of-00002.safetensors", + "visual.blocks.28.attn.proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.28.attn.proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.28.attn.qkv.bias": "model-00001-of-00002.safetensors", + "visual.blocks.28.attn.qkv.weight": "model-00001-of-00002.safetensors", + "visual.blocks.28.mlp.down_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.28.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.28.mlp.gate_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.28.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.28.mlp.up_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.28.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.28.norm1.weight": "model-00001-of-00002.safetensors", + "visual.blocks.28.norm2.weight": "model-00001-of-00002.safetensors", + "visual.blocks.29.attn.proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.29.attn.proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.29.attn.qkv.bias": "model-00001-of-00002.safetensors", + "visual.blocks.29.attn.qkv.weight": "model-00001-of-00002.safetensors", + "visual.blocks.29.mlp.down_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.29.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.29.mlp.gate_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.29.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.29.mlp.up_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.29.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.29.norm1.weight": "model-00001-of-00002.safetensors", + "visual.blocks.29.norm2.weight": "model-00001-of-00002.safetensors", + "visual.blocks.3.attn.proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.3.attn.proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.3.attn.qkv.bias": "model-00001-of-00002.safetensors", + "visual.blocks.3.attn.qkv.weight": "model-00001-of-00002.safetensors", + "visual.blocks.3.mlp.down_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.3.mlp.gate_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.3.mlp.up_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.3.norm1.weight": "model-00001-of-00002.safetensors", + "visual.blocks.3.norm2.weight": "model-00001-of-00002.safetensors", + "visual.blocks.30.attn.proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.30.attn.proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.30.attn.qkv.bias": "model-00001-of-00002.safetensors", + "visual.blocks.30.attn.qkv.weight": "model-00001-of-00002.safetensors", + "visual.blocks.30.mlp.down_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.30.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.30.mlp.gate_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.30.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.30.mlp.up_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.30.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.30.norm1.weight": "model-00001-of-00002.safetensors", + "visual.blocks.30.norm2.weight": "model-00001-of-00002.safetensors", + "visual.blocks.31.attn.proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.31.attn.proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.31.attn.qkv.bias": "model-00001-of-00002.safetensors", + "visual.blocks.31.attn.qkv.weight": "model-00001-of-00002.safetensors", + "visual.blocks.31.mlp.down_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.31.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.31.mlp.gate_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.31.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.31.mlp.up_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.31.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.31.norm1.weight": "model-00001-of-00002.safetensors", + "visual.blocks.31.norm2.weight": "model-00001-of-00002.safetensors", + "visual.blocks.4.attn.proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.4.attn.proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.4.attn.qkv.bias": "model-00001-of-00002.safetensors", + "visual.blocks.4.attn.qkv.weight": "model-00001-of-00002.safetensors", + "visual.blocks.4.mlp.down_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.4.mlp.gate_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.4.mlp.up_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.4.norm1.weight": "model-00001-of-00002.safetensors", + "visual.blocks.4.norm2.weight": "model-00001-of-00002.safetensors", + "visual.blocks.5.attn.proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.5.attn.proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.5.attn.qkv.bias": "model-00001-of-00002.safetensors", + "visual.blocks.5.attn.qkv.weight": "model-00001-of-00002.safetensors", + "visual.blocks.5.mlp.down_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.5.mlp.gate_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.5.mlp.up_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.5.norm1.weight": "model-00001-of-00002.safetensors", + "visual.blocks.5.norm2.weight": "model-00001-of-00002.safetensors", + "visual.blocks.6.attn.proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.6.attn.proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.6.attn.qkv.bias": "model-00001-of-00002.safetensors", + "visual.blocks.6.attn.qkv.weight": "model-00001-of-00002.safetensors", + "visual.blocks.6.mlp.down_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.6.mlp.gate_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.6.mlp.up_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.6.norm1.weight": "model-00001-of-00002.safetensors", + "visual.blocks.6.norm2.weight": "model-00001-of-00002.safetensors", + "visual.blocks.7.attn.proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.7.attn.proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.7.attn.qkv.bias": "model-00001-of-00002.safetensors", + "visual.blocks.7.attn.qkv.weight": "model-00001-of-00002.safetensors", + "visual.blocks.7.mlp.down_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.7.mlp.gate_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.7.mlp.up_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.7.norm1.weight": "model-00001-of-00002.safetensors", + "visual.blocks.7.norm2.weight": "model-00001-of-00002.safetensors", + "visual.blocks.8.attn.proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.8.attn.proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.8.attn.qkv.bias": "model-00001-of-00002.safetensors", + "visual.blocks.8.attn.qkv.weight": "model-00001-of-00002.safetensors", + "visual.blocks.8.mlp.down_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.8.mlp.gate_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.8.mlp.up_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.8.norm1.weight": "model-00001-of-00002.safetensors", + "visual.blocks.8.norm2.weight": "model-00001-of-00002.safetensors", + "visual.blocks.9.attn.proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.9.attn.proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.9.attn.qkv.bias": "model-00001-of-00002.safetensors", + "visual.blocks.9.attn.qkv.weight": "model-00001-of-00002.safetensors", + "visual.blocks.9.mlp.down_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.9.mlp.gate_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.9.mlp.up_proj.bias": "model-00001-of-00002.safetensors", + "visual.blocks.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "visual.blocks.9.norm1.weight": "model-00001-of-00002.safetensors", + "visual.blocks.9.norm2.weight": "model-00001-of-00002.safetensors", + "visual.merger.ln_q.weight": "model-00001-of-00002.safetensors", + "visual.merger.mlp.0.bias": "model-00001-of-00002.safetensors", + "visual.merger.mlp.0.weight": "model-00001-of-00002.safetensors", + "visual.merger.mlp.2.bias": "model-00001-of-00002.safetensors", + "visual.merger.mlp.2.weight": "model-00001-of-00002.safetensors", + "visual.patch_embed.proj.weight": "model-00001-of-00002.safetensors" + } +} diff --git a/preprocessor_config.json b/preprocessor_config.json new file mode 100644 index 0000000..1c234b7 --- /dev/null +++ b/preprocessor_config.json @@ -0,0 +1,36 @@ +{ + "crop_size": null, + "data_format": "channels_first", + "default_to_square": true, + "device": null, + "do_center_crop": null, + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessorFast", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "input_data_format": null, + "max_pixels": 12845056, + "merge_size": 2, + "min_pixels": 3136, + "patch_size": 14, + "processor_class": "Qwen2_5_VLProcessor", + "resample": 3, + "rescale_factor": 0.00392156862745098, + "return_tensors": null, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2 +} diff --git a/runs/Dec12_16-04-06_n1/events.out.tfevents.1765555619.n1 b/runs/Dec12_16-04-06_n1/events.out.tfevents.1765555619.n1 new file mode 100644 index 0000000..0eb18c2 --- /dev/null +++ b/runs/Dec12_16-04-06_n1/events.out.tfevents.1765555619.n1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed69e687eee22a91ec3dc83c48af7629d2e4c56c348c8e7f714611af3543e619 +size 354620 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..ac23c0a --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,31 @@ +{ + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..51ebb3b --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa +size 11421896 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..230f071 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,209 @@ +{ + "add_bos_token": false, + "add_prefix_space": false, + "added_tokens_decoder": { + "151643": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151644": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151645": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151646": { + "content": "<|object_ref_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151647": { + "content": "<|object_ref_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151648": { + "content": "<|box_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151649": { + "content": "<|box_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151650": { + "content": "<|quad_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151657": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151658": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": {}, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "processor_class": "Qwen2_5_VLProcessor", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..54e5cda --- /dev/null +++ b/train_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 1.0, + "total_flos": 669099333058560.0, + "train_loss": 0.32129256987551813, + "train_runtime": 70988.8215, + "train_samples_per_second": 0.742, + "train_steps_per_second": 0.023 +} \ No newline at end of file diff --git a/trainer_log.jsonl b/trainer_log.jsonl new file mode 100644 index 0000000..4adde03 --- /dev/null +++ b/trainer_log.jsonl @@ -0,0 +1,1648 @@ +{"current_steps": 1, "total_steps": 1647, "loss": 1.3011, "lr": 0.0, "epoch": 0.0006074411541381929, "percentage": 0.06, "elapsed_time": "0:00:55", "remaining_time": "1 day, 1:16:01"} +{"current_steps": 2, "total_steps": 1647, "loss": 1.3126, "lr": 6.060606060606061e-08, "epoch": 0.0012148823082763858, "percentage": 0.12, "elapsed_time": "0:01:38", "remaining_time": "22:35:56"} +{"current_steps": 3, "total_steps": 1647, "loss": 1.2803, "lr": 1.2121212121212122e-07, "epoch": 0.0018223234624145787, "percentage": 0.18, "elapsed_time": "0:02:24", "remaining_time": "21:59:36"} +{"current_steps": 4, "total_steps": 1647, "loss": 1.3162, "lr": 1.8181818181818183e-07, "epoch": 0.0024297646165527716, "percentage": 0.24, "elapsed_time": "0:03:07", "remaining_time": "21:26:23"} +{"current_steps": 5, "total_steps": 1647, "loss": 1.3803, "lr": 2.4242424242424244e-07, "epoch": 0.0030372057706909645, "percentage": 0.3, "elapsed_time": "0:03:51", "remaining_time": "21:08:44"} +{"current_steps": 6, "total_steps": 1647, "loss": 1.3463, "lr": 3.0303030303030305e-07, "epoch": 0.0036446469248291574, "percentage": 0.36, "elapsed_time": "0:04:38", "remaining_time": "21:08:28"} +{"current_steps": 7, "total_steps": 1647, "loss": 1.3397, "lr": 3.6363636363636366e-07, "epoch": 0.00425208807896735, "percentage": 0.43, "elapsed_time": "0:05:24", "remaining_time": "21:05:43"} +{"current_steps": 8, "total_steps": 1647, "loss": 1.3189, "lr": 4.242424242424243e-07, "epoch": 0.004859529233105543, "percentage": 0.49, "elapsed_time": "0:06:10", "remaining_time": "21:06:08"} +{"current_steps": 9, "total_steps": 1647, "loss": 1.2963, "lr": 4.848484848484849e-07, "epoch": 0.005466970387243736, "percentage": 0.55, "elapsed_time": "0:06:55", "remaining_time": "21:00:52"} +{"current_steps": 10, "total_steps": 1647, "loss": 1.2897, "lr": 5.454545454545455e-07, "epoch": 0.006074411541381929, "percentage": 0.61, "elapsed_time": "0:07:37", "remaining_time": "20:48:53"} +{"current_steps": 11, "total_steps": 1647, "loss": 1.2556, "lr": 6.060606060606061e-07, "epoch": 0.006681852695520122, "percentage": 0.67, "elapsed_time": "0:08:24", "remaining_time": "20:50:36"} +{"current_steps": 12, "total_steps": 1647, "loss": 1.2826, "lr": 6.666666666666667e-07, "epoch": 0.007289293849658315, "percentage": 0.73, "elapsed_time": "0:09:05", "remaining_time": "20:38:51"} +{"current_steps": 13, "total_steps": 1647, "loss": 1.3033, "lr": 7.272727272727273e-07, "epoch": 0.007896735003796507, "percentage": 0.79, "elapsed_time": "0:09:46", "remaining_time": "20:28:10"} +{"current_steps": 14, "total_steps": 1647, "loss": 1.3182, "lr": 7.878787878787879e-07, "epoch": 0.0085041761579347, "percentage": 0.85, "elapsed_time": "0:10:33", "remaining_time": "20:30:49"} +{"current_steps": 15, "total_steps": 1647, "loss": 1.2456, "lr": 8.484848484848486e-07, "epoch": 0.009111617312072893, "percentage": 0.91, "elapsed_time": "0:11:17", "remaining_time": "20:27:59"} +{"current_steps": 16, "total_steps": 1647, "loss": 1.3312, "lr": 9.090909090909091e-07, "epoch": 0.009719058466211086, "percentage": 0.97, "elapsed_time": "0:12:03", "remaining_time": "20:29:39"} +{"current_steps": 17, "total_steps": 1647, "loss": 1.2702, "lr": 9.696969696969698e-07, "epoch": 0.010326499620349278, "percentage": 1.03, "elapsed_time": "0:12:44", "remaining_time": "20:21:33"} +{"current_steps": 18, "total_steps": 1647, "loss": 1.2982, "lr": 1.0303030303030304e-06, "epoch": 0.010933940774487472, "percentage": 1.09, "elapsed_time": "0:13:28", "remaining_time": "20:19:04"} +{"current_steps": 19, "total_steps": 1647, "loss": 1.2928, "lr": 1.090909090909091e-06, "epoch": 0.011541381928625664, "percentage": 1.15, "elapsed_time": "0:14:12", "remaining_time": "20:17:34"} +{"current_steps": 20, "total_steps": 1647, "loss": 1.3399, "lr": 1.1515151515151516e-06, "epoch": 0.012148823082763858, "percentage": 1.21, "elapsed_time": "0:14:53", "remaining_time": "20:10:52"} +{"current_steps": 21, "total_steps": 1647, "loss": 1.201, "lr": 1.2121212121212122e-06, "epoch": 0.01275626423690205, "percentage": 1.28, "elapsed_time": "0:15:38", "remaining_time": "20:11:04"} +{"current_steps": 22, "total_steps": 1647, "loss": 1.3176, "lr": 1.2727272727272728e-06, "epoch": 0.013363705391040244, "percentage": 1.34, "elapsed_time": "0:16:21", "remaining_time": "20:08:04"} +{"current_steps": 23, "total_steps": 1647, "loss": 1.2253, "lr": 1.3333333333333334e-06, "epoch": 0.013971146545178436, "percentage": 1.4, "elapsed_time": "0:17:04", "remaining_time": "20:06:11"} +{"current_steps": 24, "total_steps": 1647, "loss": 1.2849, "lr": 1.3939393939393942e-06, "epoch": 0.01457858769931663, "percentage": 1.46, "elapsed_time": "0:17:43", "remaining_time": "19:59:07"} +{"current_steps": 25, "total_steps": 1647, "loss": 1.0856, "lr": 1.4545454545454546e-06, "epoch": 0.015186028853454821, "percentage": 1.52, "elapsed_time": "0:18:25", "remaining_time": "19:55:55"} +{"current_steps": 26, "total_steps": 1647, "loss": 1.1542, "lr": 1.5151515151515152e-06, "epoch": 0.015793470007593013, "percentage": 1.58, "elapsed_time": "0:19:09", "remaining_time": "19:53:59"} +{"current_steps": 27, "total_steps": 1647, "loss": 1.1332, "lr": 1.5757575757575759e-06, "epoch": 0.01640091116173121, "percentage": 1.64, "elapsed_time": "0:19:51", "remaining_time": "19:51:17"} +{"current_steps": 28, "total_steps": 1647, "loss": 1.0859, "lr": 1.6363636363636365e-06, "epoch": 0.0170083523158694, "percentage": 1.7, "elapsed_time": "0:20:35", "remaining_time": "19:50:14"} +{"current_steps": 29, "total_steps": 1647, "loss": 1.1462, "lr": 1.6969696969696973e-06, "epoch": 0.017615793470007593, "percentage": 1.76, "elapsed_time": "0:21:19", "remaining_time": "19:50:01"} +{"current_steps": 30, "total_steps": 1647, "loss": 1.1017, "lr": 1.7575757575757577e-06, "epoch": 0.018223234624145785, "percentage": 1.82, "elapsed_time": "0:22:01", "remaining_time": "19:46:56"} +{"current_steps": 31, "total_steps": 1647, "loss": 1.1052, "lr": 1.8181818181818183e-06, "epoch": 0.018830675778283977, "percentage": 1.88, "elapsed_time": "0:22:41", "remaining_time": "19:42:47"} +{"current_steps": 32, "total_steps": 1647, "loss": 1.0702, "lr": 1.878787878787879e-06, "epoch": 0.019438116932422173, "percentage": 1.94, "elapsed_time": "0:23:23", "remaining_time": "19:40:48"} +{"current_steps": 33, "total_steps": 1647, "loss": 1.0783, "lr": 1.9393939393939395e-06, "epoch": 0.020045558086560365, "percentage": 2.0, "elapsed_time": "0:24:07", "remaining_time": "19:39:37"} +{"current_steps": 34, "total_steps": 1647, "loss": 0.9578, "lr": 2.0000000000000003e-06, "epoch": 0.020652999240698557, "percentage": 2.06, "elapsed_time": "0:24:53", "remaining_time": "19:40:48"} +{"current_steps": 35, "total_steps": 1647, "loss": 1.0325, "lr": 2.0606060606060607e-06, "epoch": 0.02126044039483675, "percentage": 2.13, "elapsed_time": "0:25:36", "remaining_time": "19:39:34"} +{"current_steps": 36, "total_steps": 1647, "loss": 0.9263, "lr": 2.1212121212121216e-06, "epoch": 0.021867881548974944, "percentage": 2.19, "elapsed_time": "0:26:21", "remaining_time": "19:39:32"} +{"current_steps": 37, "total_steps": 1647, "loss": 0.9275, "lr": 2.181818181818182e-06, "epoch": 0.022475322703113136, "percentage": 2.25, "elapsed_time": "0:27:06", "remaining_time": "19:39:15"} +{"current_steps": 38, "total_steps": 1647, "loss": 0.9474, "lr": 2.2424242424242428e-06, "epoch": 0.023082763857251328, "percentage": 2.31, "elapsed_time": "0:27:51", "remaining_time": "19:39:27"} +{"current_steps": 39, "total_steps": 1647, "loss": 0.9734, "lr": 2.303030303030303e-06, "epoch": 0.02369020501138952, "percentage": 2.37, "elapsed_time": "0:28:37", "remaining_time": "19:40:22"} +{"current_steps": 40, "total_steps": 1647, "loss": 0.8991, "lr": 2.363636363636364e-06, "epoch": 0.024297646165527716, "percentage": 2.43, "elapsed_time": "0:29:19", "remaining_time": "19:38:17"} +{"current_steps": 41, "total_steps": 1647, "loss": 0.8963, "lr": 2.4242424242424244e-06, "epoch": 0.024905087319665908, "percentage": 2.49, "elapsed_time": "0:30:03", "remaining_time": "19:37:42"} +{"current_steps": 42, "total_steps": 1647, "loss": 0.8778, "lr": 2.4848484848484848e-06, "epoch": 0.0255125284738041, "percentage": 2.55, "elapsed_time": "0:30:48", "remaining_time": "19:37:25"} +{"current_steps": 43, "total_steps": 1647, "loss": 0.8738, "lr": 2.5454545454545456e-06, "epoch": 0.026119969627942292, "percentage": 2.61, "elapsed_time": "0:31:29", "remaining_time": "19:34:26"} +{"current_steps": 44, "total_steps": 1647, "loss": 0.8107, "lr": 2.6060606060606064e-06, "epoch": 0.026727410782080487, "percentage": 2.67, "elapsed_time": "0:32:15", "remaining_time": "19:35:07"} +{"current_steps": 45, "total_steps": 1647, "loss": 0.8709, "lr": 2.666666666666667e-06, "epoch": 0.02733485193621868, "percentage": 2.73, "elapsed_time": "0:32:58", "remaining_time": "19:34:11"} +{"current_steps": 46, "total_steps": 1647, "loss": 0.8008, "lr": 2.7272727272727272e-06, "epoch": 0.02794229309035687, "percentage": 2.79, "elapsed_time": "0:33:42", "remaining_time": "19:33:22"} +{"current_steps": 47, "total_steps": 1647, "loss": 0.8585, "lr": 2.7878787878787885e-06, "epoch": 0.028549734244495063, "percentage": 2.85, "elapsed_time": "0:34:23", "remaining_time": "19:30:32"} +{"current_steps": 48, "total_steps": 1647, "loss": 0.799, "lr": 2.848484848484849e-06, "epoch": 0.02915717539863326, "percentage": 2.91, "elapsed_time": "0:35:07", "remaining_time": "19:29:53"} +{"current_steps": 49, "total_steps": 1647, "loss": 0.7643, "lr": 2.9090909090909093e-06, "epoch": 0.02976461655277145, "percentage": 2.98, "elapsed_time": "0:35:49", "remaining_time": "19:28:34"} +{"current_steps": 50, "total_steps": 1647, "loss": 0.7075, "lr": 2.96969696969697e-06, "epoch": 0.030372057706909643, "percentage": 3.04, "elapsed_time": "0:36:34", "remaining_time": "19:28:11"} +{"current_steps": 51, "total_steps": 1647, "loss": 0.752, "lr": 3.0303030303030305e-06, "epoch": 0.030979498861047835, "percentage": 3.1, "elapsed_time": "0:37:19", "remaining_time": "19:28:10"} +{"current_steps": 52, "total_steps": 1647, "loss": 0.7298, "lr": 3.090909090909091e-06, "epoch": 0.03158694001518603, "percentage": 3.16, "elapsed_time": "0:37:59", "remaining_time": "19:25:22"} +{"current_steps": 53, "total_steps": 1647, "loss": 0.6652, "lr": 3.1515151515151517e-06, "epoch": 0.03219438116932422, "percentage": 3.22, "elapsed_time": "0:38:40", "remaining_time": "19:23:19"} +{"current_steps": 54, "total_steps": 1647, "loss": 0.6731, "lr": 3.2121212121212125e-06, "epoch": 0.03280182232346242, "percentage": 3.28, "elapsed_time": "0:39:22", "remaining_time": "19:21:29"} +{"current_steps": 55, "total_steps": 1647, "loss": 0.7381, "lr": 3.272727272727273e-06, "epoch": 0.033409263477600606, "percentage": 3.34, "elapsed_time": "0:40:05", "remaining_time": "19:20:29"} +{"current_steps": 56, "total_steps": 1647, "loss": 0.6703, "lr": 3.3333333333333333e-06, "epoch": 0.0340167046317388, "percentage": 3.4, "elapsed_time": "0:40:48", "remaining_time": "19:19:31"} +{"current_steps": 57, "total_steps": 1647, "loss": 0.6278, "lr": 3.3939393939393946e-06, "epoch": 0.03462414578587699, "percentage": 3.46, "elapsed_time": "0:41:37", "remaining_time": "19:21:05"} +{"current_steps": 58, "total_steps": 1647, "loss": 0.6501, "lr": 3.454545454545455e-06, "epoch": 0.035231586940015186, "percentage": 3.52, "elapsed_time": "0:42:19", "remaining_time": "19:19:33"} +{"current_steps": 59, "total_steps": 1647, "loss": 0.6557, "lr": 3.5151515151515154e-06, "epoch": 0.03583902809415338, "percentage": 3.58, "elapsed_time": "0:43:06", "remaining_time": "19:20:10"} +{"current_steps": 60, "total_steps": 1647, "loss": 0.605, "lr": 3.575757575757576e-06, "epoch": 0.03644646924829157, "percentage": 3.64, "elapsed_time": "0:43:48", "remaining_time": "19:18:56"} +{"current_steps": 61, "total_steps": 1647, "loss": 0.6313, "lr": 3.6363636363636366e-06, "epoch": 0.037053910402429766, "percentage": 3.7, "elapsed_time": "0:44:31", "remaining_time": "19:17:51"} +{"current_steps": 62, "total_steps": 1647, "loss": 0.6025, "lr": 3.6969696969696974e-06, "epoch": 0.037661351556567954, "percentage": 3.76, "elapsed_time": "0:45:15", "remaining_time": "19:17:02"} +{"current_steps": 63, "total_steps": 1647, "loss": 0.5803, "lr": 3.757575757575758e-06, "epoch": 0.03826879271070615, "percentage": 3.83, "elapsed_time": "0:46:00", "remaining_time": "19:16:50"} +{"current_steps": 64, "total_steps": 1647, "loss": 0.6036, "lr": 3.818181818181819e-06, "epoch": 0.038876233864844345, "percentage": 3.89, "elapsed_time": "0:46:41", "remaining_time": "19:15:00"} +{"current_steps": 65, "total_steps": 1647, "loss": 0.5619, "lr": 3.878787878787879e-06, "epoch": 0.039483675018982534, "percentage": 3.95, "elapsed_time": "0:47:23", "remaining_time": "19:13:34"} +{"current_steps": 66, "total_steps": 1647, "loss": 0.6117, "lr": 3.93939393939394e-06, "epoch": 0.04009111617312073, "percentage": 4.01, "elapsed_time": "0:48:05", "remaining_time": "19:12:08"} +{"current_steps": 67, "total_steps": 1647, "loss": 0.6115, "lr": 4.000000000000001e-06, "epoch": 0.040698557327258925, "percentage": 4.07, "elapsed_time": "0:48:46", "remaining_time": "19:10:04"} +{"current_steps": 68, "total_steps": 1647, "loss": 0.555, "lr": 4.060606060606061e-06, "epoch": 0.04130599848139711, "percentage": 4.13, "elapsed_time": "0:49:29", "remaining_time": "19:09:09"} +{"current_steps": 69, "total_steps": 1647, "loss": 0.5281, "lr": 4.1212121212121215e-06, "epoch": 0.04191343963553531, "percentage": 4.19, "elapsed_time": "0:50:14", "remaining_time": "19:08:57"} +{"current_steps": 70, "total_steps": 1647, "loss": 0.4976, "lr": 4.181818181818182e-06, "epoch": 0.0425208807896735, "percentage": 4.25, "elapsed_time": "0:50:52", "remaining_time": "19:06:17"} +{"current_steps": 71, "total_steps": 1647, "loss": 0.5599, "lr": 4.242424242424243e-06, "epoch": 0.04312832194381169, "percentage": 4.31, "elapsed_time": "0:51:37", "remaining_time": "19:05:49"} +{"current_steps": 72, "total_steps": 1647, "loss": 0.5043, "lr": 4.303030303030303e-06, "epoch": 0.04373576309794989, "percentage": 4.37, "elapsed_time": "0:52:21", "remaining_time": "19:05:27"} +{"current_steps": 73, "total_steps": 1647, "loss": 0.5835, "lr": 4.363636363636364e-06, "epoch": 0.04434320425208808, "percentage": 4.43, "elapsed_time": "0:53:03", "remaining_time": "19:03:52"} +{"current_steps": 74, "total_steps": 1647, "loss": 0.5495, "lr": 4.424242424242425e-06, "epoch": 0.04495064540622627, "percentage": 4.49, "elapsed_time": "0:53:48", "remaining_time": "19:03:42"} +{"current_steps": 75, "total_steps": 1647, "loss": 0.5371, "lr": 4.4848484848484855e-06, "epoch": 0.04555808656036447, "percentage": 4.55, "elapsed_time": "0:54:33", "remaining_time": "19:03:42"} +{"current_steps": 76, "total_steps": 1647, "loss": 0.4778, "lr": 4.5454545454545455e-06, "epoch": 0.046165527714502656, "percentage": 4.61, "elapsed_time": "0:55:18", "remaining_time": "19:03:13"} +{"current_steps": 77, "total_steps": 1647, "loss": 0.4913, "lr": 4.606060606060606e-06, "epoch": 0.04677296886864085, "percentage": 4.68, "elapsed_time": "0:55:59", "remaining_time": "19:01:45"} +{"current_steps": 78, "total_steps": 1647, "loss": 0.453, "lr": 4.666666666666667e-06, "epoch": 0.04738041002277904, "percentage": 4.74, "elapsed_time": "0:56:41", "remaining_time": "19:00:22"} +{"current_steps": 79, "total_steps": 1647, "loss": 0.559, "lr": 4.727272727272728e-06, "epoch": 0.047987851176917236, "percentage": 4.8, "elapsed_time": "0:57:23", "remaining_time": "18:59:14"} +{"current_steps": 80, "total_steps": 1647, "loss": 0.4996, "lr": 4.787878787878788e-06, "epoch": 0.04859529233105543, "percentage": 4.86, "elapsed_time": "0:58:05", "remaining_time": "18:57:44"} +{"current_steps": 81, "total_steps": 1647, "loss": 0.5501, "lr": 4.848484848484849e-06, "epoch": 0.04920273348519362, "percentage": 4.92, "elapsed_time": "0:58:46", "remaining_time": "18:56:11"} +{"current_steps": 82, "total_steps": 1647, "loss": 0.5637, "lr": 4.90909090909091e-06, "epoch": 0.049810174639331815, "percentage": 4.98, "elapsed_time": "0:59:30", "remaining_time": "18:55:47"} +{"current_steps": 83, "total_steps": 1647, "loss": 0.4904, "lr": 4.9696969696969696e-06, "epoch": 0.05041761579347001, "percentage": 5.04, "elapsed_time": "1:00:15", "remaining_time": "18:55:34"} +{"current_steps": 84, "total_steps": 1647, "loss": 0.485, "lr": 5.030303030303031e-06, "epoch": 0.0510250569476082, "percentage": 5.1, "elapsed_time": "1:00:57", "remaining_time": "18:54:21"} +{"current_steps": 85, "total_steps": 1647, "loss": 0.4898, "lr": 5.090909090909091e-06, "epoch": 0.051632498101746395, "percentage": 5.16, "elapsed_time": "1:01:40", "remaining_time": "18:53:16"} +{"current_steps": 86, "total_steps": 1647, "loss": 0.4896, "lr": 5.151515151515152e-06, "epoch": 0.052239939255884583, "percentage": 5.22, "elapsed_time": "1:02:21", "remaining_time": "18:51:57"} +{"current_steps": 87, "total_steps": 1647, "loss": 0.4677, "lr": 5.212121212121213e-06, "epoch": 0.05284738041002278, "percentage": 5.28, "elapsed_time": "1:03:05", "remaining_time": "18:51:23"} +{"current_steps": 88, "total_steps": 1647, "loss": 0.4233, "lr": 5.272727272727273e-06, "epoch": 0.053454821564160974, "percentage": 5.34, "elapsed_time": "1:03:48", "remaining_time": "18:50:19"} +{"current_steps": 89, "total_steps": 1647, "loss": 0.4784, "lr": 5.333333333333334e-06, "epoch": 0.05406226271829916, "percentage": 5.4, "elapsed_time": "1:04:33", "remaining_time": "18:50:16"} +{"current_steps": 90, "total_steps": 1647, "loss": 0.4646, "lr": 5.3939393939393945e-06, "epoch": 0.05466970387243736, "percentage": 5.46, "elapsed_time": "1:05:17", "remaining_time": "18:49:39"} +{"current_steps": 91, "total_steps": 1647, "loss": 0.4966, "lr": 5.4545454545454545e-06, "epoch": 0.05527714502657555, "percentage": 5.53, "elapsed_time": "1:05:59", "remaining_time": "18:48:28"} +{"current_steps": 92, "total_steps": 1647, "loss": 0.5535, "lr": 5.515151515151515e-06, "epoch": 0.05588458618071374, "percentage": 5.59, "elapsed_time": "1:06:40", "remaining_time": "18:46:58"} +{"current_steps": 93, "total_steps": 1647, "loss": 0.4948, "lr": 5.575757575757577e-06, "epoch": 0.05649202733485194, "percentage": 5.65, "elapsed_time": "1:07:23", "remaining_time": "18:46:02"} +{"current_steps": 94, "total_steps": 1647, "loss": 0.4807, "lr": 5.636363636363636e-06, "epoch": 0.05709946848899013, "percentage": 5.71, "elapsed_time": "1:08:05", "remaining_time": "18:44:56"} +{"current_steps": 95, "total_steps": 1647, "loss": 0.4591, "lr": 5.696969696969698e-06, "epoch": 0.05770690964312832, "percentage": 5.77, "elapsed_time": "1:08:45", "remaining_time": "18:43:19"} +{"current_steps": 96, "total_steps": 1647, "loss": 0.477, "lr": 5.7575757575757586e-06, "epoch": 0.05831435079726652, "percentage": 5.83, "elapsed_time": "1:09:28", "remaining_time": "18:42:34"} +{"current_steps": 97, "total_steps": 1647, "loss": 0.5028, "lr": 5.8181818181818185e-06, "epoch": 0.058921791951404706, "percentage": 5.89, "elapsed_time": "1:10:10", "remaining_time": "18:41:13"} +{"current_steps": 98, "total_steps": 1647, "loss": 0.4828, "lr": 5.878787878787879e-06, "epoch": 0.0595292331055429, "percentage": 5.95, "elapsed_time": "1:10:51", "remaining_time": "18:39:56"} +{"current_steps": 99, "total_steps": 1647, "loss": 0.4249, "lr": 5.93939393939394e-06, "epoch": 0.06013667425968109, "percentage": 6.01, "elapsed_time": "1:11:33", "remaining_time": "18:39:00"} +{"current_steps": 100, "total_steps": 1647, "loss": 0.3987, "lr": 6e-06, "epoch": 0.060744115413819286, "percentage": 6.07, "elapsed_time": "1:12:14", "remaining_time": "18:37:36"} +{"current_steps": 101, "total_steps": 1647, "loss": 0.459, "lr": 6.060606060606061e-06, "epoch": 0.06135155656795748, "percentage": 6.13, "elapsed_time": "1:12:56", "remaining_time": "18:36:33"} +{"current_steps": 102, "total_steps": 1647, "loss": 0.4439, "lr": 6.121212121212121e-06, "epoch": 0.06195899772209567, "percentage": 6.19, "elapsed_time": "1:13:38", "remaining_time": "18:35:19"} +{"current_steps": 103, "total_steps": 1647, "loss": 0.529, "lr": 6.181818181818182e-06, "epoch": 0.06256643887623387, "percentage": 6.25, "elapsed_time": "1:14:22", "remaining_time": "18:34:53"} +{"current_steps": 104, "total_steps": 1647, "loss": 0.4752, "lr": 6.2424242424242434e-06, "epoch": 0.06317388003037205, "percentage": 6.31, "elapsed_time": "1:15:04", "remaining_time": "18:33:55"} +{"current_steps": 105, "total_steps": 1647, "loss": 0.4559, "lr": 6.303030303030303e-06, "epoch": 0.06378132118451026, "percentage": 6.38, "elapsed_time": "1:15:47", "remaining_time": "18:33:00"} +{"current_steps": 106, "total_steps": 1647, "loss": 0.4081, "lr": 6.363636363636364e-06, "epoch": 0.06438876233864844, "percentage": 6.44, "elapsed_time": "1:16:27", "remaining_time": "18:31:38"} +{"current_steps": 107, "total_steps": 1647, "loss": 0.4906, "lr": 6.424242424242425e-06, "epoch": 0.06499620349278663, "percentage": 6.5, "elapsed_time": "1:17:07", "remaining_time": "18:30:01"} +{"current_steps": 108, "total_steps": 1647, "loss": 0.5131, "lr": 6.484848484848485e-06, "epoch": 0.06560364464692484, "percentage": 6.56, "elapsed_time": "1:17:48", "remaining_time": "18:28:48"} +{"current_steps": 109, "total_steps": 1647, "loss": 0.4312, "lr": 6.545454545454546e-06, "epoch": 0.06621108580106302, "percentage": 6.62, "elapsed_time": "1:18:30", "remaining_time": "18:27:40"} +{"current_steps": 110, "total_steps": 1647, "loss": 0.4083, "lr": 6.606060606060607e-06, "epoch": 0.06681852695520121, "percentage": 6.68, "elapsed_time": "1:19:12", "remaining_time": "18:26:48"} +{"current_steps": 111, "total_steps": 1647, "loss": 0.4338, "lr": 6.666666666666667e-06, "epoch": 0.0674259681093394, "percentage": 6.74, "elapsed_time": "1:19:53", "remaining_time": "18:25:33"} +{"current_steps": 112, "total_steps": 1647, "loss": 0.4259, "lr": 6.7272727272727275e-06, "epoch": 0.0680334092634776, "percentage": 6.8, "elapsed_time": "1:20:38", "remaining_time": "18:25:07"} +{"current_steps": 113, "total_steps": 1647, "loss": 0.4359, "lr": 6.787878787878789e-06, "epoch": 0.06864085041761579, "percentage": 6.86, "elapsed_time": "1:21:24", "remaining_time": "18:25:04"} +{"current_steps": 114, "total_steps": 1647, "loss": 0.4215, "lr": 6.848484848484849e-06, "epoch": 0.06924829157175398, "percentage": 6.92, "elapsed_time": "1:22:05", "remaining_time": "18:23:54"} +{"current_steps": 115, "total_steps": 1647, "loss": 0.4414, "lr": 6.90909090909091e-06, "epoch": 0.06985573272589218, "percentage": 6.98, "elapsed_time": "1:22:44", "remaining_time": "18:22:15"} +{"current_steps": 116, "total_steps": 1647, "loss": 0.4297, "lr": 6.969696969696971e-06, "epoch": 0.07046317388003037, "percentage": 7.04, "elapsed_time": "1:23:25", "remaining_time": "18:21:02"} +{"current_steps": 117, "total_steps": 1647, "loss": 0.4479, "lr": 7.030303030303031e-06, "epoch": 0.07107061503416856, "percentage": 7.1, "elapsed_time": "1:24:08", "remaining_time": "18:20:14"} +{"current_steps": 118, "total_steps": 1647, "loss": 0.4278, "lr": 7.0909090909090916e-06, "epoch": 0.07167805618830676, "percentage": 7.16, "elapsed_time": "1:24:50", "remaining_time": "18:19:20"} +{"current_steps": 119, "total_steps": 1647, "loss": 0.4375, "lr": 7.151515151515152e-06, "epoch": 0.07228549734244495, "percentage": 7.23, "elapsed_time": "1:25:33", "remaining_time": "18:18:31"} +{"current_steps": 120, "total_steps": 1647, "loss": 0.3987, "lr": 7.212121212121212e-06, "epoch": 0.07289293849658314, "percentage": 7.29, "elapsed_time": "1:26:17", "remaining_time": "18:18:05"} +{"current_steps": 121, "total_steps": 1647, "loss": 0.4095, "lr": 7.272727272727273e-06, "epoch": 0.07350037965072134, "percentage": 7.35, "elapsed_time": "1:27:00", "remaining_time": "18:17:20"} +{"current_steps": 122, "total_steps": 1647, "loss": 0.4165, "lr": 7.333333333333333e-06, "epoch": 0.07410782080485953, "percentage": 7.41, "elapsed_time": "1:27:46", "remaining_time": "18:17:05"} +{"current_steps": 123, "total_steps": 1647, "loss": 0.4534, "lr": 7.393939393939395e-06, "epoch": 0.07471526195899772, "percentage": 7.47, "elapsed_time": "1:28:29", "remaining_time": "18:16:26"} +{"current_steps": 124, "total_steps": 1647, "loss": 0.4315, "lr": 7.454545454545456e-06, "epoch": 0.07532270311313591, "percentage": 7.53, "elapsed_time": "1:29:13", "remaining_time": "18:15:49"} +{"current_steps": 125, "total_steps": 1647, "loss": 0.4044, "lr": 7.515151515151516e-06, "epoch": 0.07593014426727411, "percentage": 7.59, "elapsed_time": "1:29:55", "remaining_time": "18:14:51"} +{"current_steps": 126, "total_steps": 1647, "loss": 0.4153, "lr": 7.5757575757575764e-06, "epoch": 0.0765375854214123, "percentage": 7.65, "elapsed_time": "1:30:35", "remaining_time": "18:13:36"} +{"current_steps": 127, "total_steps": 1647, "loss": 0.3912, "lr": 7.636363636363638e-06, "epoch": 0.07714502657555049, "percentage": 7.71, "elapsed_time": "1:31:21", "remaining_time": "18:13:26"} +{"current_steps": 128, "total_steps": 1647, "loss": 0.3989, "lr": 7.696969696969696e-06, "epoch": 0.07775246772968869, "percentage": 7.77, "elapsed_time": "1:32:05", "remaining_time": "18:12:54"} +{"current_steps": 129, "total_steps": 1647, "loss": 0.4357, "lr": 7.757575757575758e-06, "epoch": 0.07835990888382688, "percentage": 7.83, "elapsed_time": "1:32:49", "remaining_time": "18:12:13"} +{"current_steps": 130, "total_steps": 1647, "loss": 0.3616, "lr": 7.81818181818182e-06, "epoch": 0.07896735003796507, "percentage": 7.89, "elapsed_time": "1:33:29", "remaining_time": "18:11:00"} +{"current_steps": 131, "total_steps": 1647, "loss": 0.4498, "lr": 7.87878787878788e-06, "epoch": 0.07957479119210327, "percentage": 7.95, "elapsed_time": "1:34:11", "remaining_time": "18:10:01"} +{"current_steps": 132, "total_steps": 1647, "loss": 0.4247, "lr": 7.93939393939394e-06, "epoch": 0.08018223234624146, "percentage": 8.01, "elapsed_time": "1:34:53", "remaining_time": "18:09:09"} +{"current_steps": 133, "total_steps": 1647, "loss": 0.3744, "lr": 8.000000000000001e-06, "epoch": 0.08078967350037965, "percentage": 8.08, "elapsed_time": "1:35:35", "remaining_time": "18:08:08"} +{"current_steps": 134, "total_steps": 1647, "loss": 0.4373, "lr": 8.060606060606061e-06, "epoch": 0.08139711465451785, "percentage": 8.14, "elapsed_time": "1:36:17", "remaining_time": "18:07:11"} +{"current_steps": 135, "total_steps": 1647, "loss": 0.4294, "lr": 8.121212121212121e-06, "epoch": 0.08200455580865604, "percentage": 8.2, "elapsed_time": "1:37:02", "remaining_time": "18:06:51"} +{"current_steps": 136, "total_steps": 1647, "loss": 0.4106, "lr": 8.181818181818183e-06, "epoch": 0.08261199696279423, "percentage": 8.26, "elapsed_time": "1:37:49", "remaining_time": "18:06:57"} +{"current_steps": 137, "total_steps": 1647, "loss": 0.4537, "lr": 8.242424242424243e-06, "epoch": 0.08321943811693243, "percentage": 8.32, "elapsed_time": "1:38:31", "remaining_time": "18:05:51"} +{"current_steps": 138, "total_steps": 1647, "loss": 0.3788, "lr": 8.303030303030305e-06, "epoch": 0.08382687927107062, "percentage": 8.38, "elapsed_time": "1:39:12", "remaining_time": "18:04:47"} +{"current_steps": 139, "total_steps": 1647, "loss": 0.3867, "lr": 8.363636363636365e-06, "epoch": 0.0844343204252088, "percentage": 8.44, "elapsed_time": "1:39:57", "remaining_time": "18:04:28"} +{"current_steps": 140, "total_steps": 1647, "loss": 0.4355, "lr": 8.424242424242425e-06, "epoch": 0.085041761579347, "percentage": 8.5, "elapsed_time": "1:40:38", "remaining_time": "18:03:19"} +{"current_steps": 141, "total_steps": 1647, "loss": 0.4044, "lr": 8.484848484848486e-06, "epoch": 0.0856492027334852, "percentage": 8.56, "elapsed_time": "1:41:24", "remaining_time": "18:03:03"} +{"current_steps": 142, "total_steps": 1647, "loss": 0.4603, "lr": 8.545454545454546e-06, "epoch": 0.08625664388762339, "percentage": 8.62, "elapsed_time": "1:42:05", "remaining_time": "18:01:58"} +{"current_steps": 143, "total_steps": 1647, "loss": 0.3701, "lr": 8.606060606060606e-06, "epoch": 0.08686408504176157, "percentage": 8.68, "elapsed_time": "1:42:47", "remaining_time": "18:01:03"} +{"current_steps": 144, "total_steps": 1647, "loss": 0.4052, "lr": 8.666666666666668e-06, "epoch": 0.08747152619589978, "percentage": 8.74, "elapsed_time": "1:43:33", "remaining_time": "18:00:48"} +{"current_steps": 145, "total_steps": 1647, "loss": 0.3743, "lr": 8.727272727272728e-06, "epoch": 0.08807896735003796, "percentage": 8.8, "elapsed_time": "1:44:19", "remaining_time": "18:00:43"} +{"current_steps": 146, "total_steps": 1647, "loss": 0.4156, "lr": 8.787878787878788e-06, "epoch": 0.08868640850417615, "percentage": 8.86, "elapsed_time": "1:45:02", "remaining_time": "17:59:54"} +{"current_steps": 147, "total_steps": 1647, "loss": 0.403, "lr": 8.84848484848485e-06, "epoch": 0.08929384965831436, "percentage": 8.93, "elapsed_time": "1:45:45", "remaining_time": "17:59:08"} +{"current_steps": 148, "total_steps": 1647, "loss": 0.4241, "lr": 8.90909090909091e-06, "epoch": 0.08990129081245254, "percentage": 8.99, "elapsed_time": "1:46:28", "remaining_time": "17:58:21"} +{"current_steps": 149, "total_steps": 1647, "loss": 0.3843, "lr": 8.969696969696971e-06, "epoch": 0.09050873196659073, "percentage": 9.05, "elapsed_time": "1:47:12", "remaining_time": "17:57:55"} +{"current_steps": 150, "total_steps": 1647, "loss": 0.3963, "lr": 9.030303030303031e-06, "epoch": 0.09111617312072894, "percentage": 9.11, "elapsed_time": "1:47:54", "remaining_time": "17:56:54"} +{"current_steps": 151, "total_steps": 1647, "loss": 0.3909, "lr": 9.090909090909091e-06, "epoch": 0.09172361427486712, "percentage": 9.17, "elapsed_time": "1:48:35", "remaining_time": "17:55:53"} +{"current_steps": 152, "total_steps": 1647, "loss": 0.3864, "lr": 9.151515151515153e-06, "epoch": 0.09233105542900531, "percentage": 9.23, "elapsed_time": "1:49:22", "remaining_time": "17:55:42"} +{"current_steps": 153, "total_steps": 1647, "loss": 0.3766, "lr": 9.212121212121213e-06, "epoch": 0.0929384965831435, "percentage": 9.29, "elapsed_time": "1:50:06", "remaining_time": "17:55:15"} +{"current_steps": 154, "total_steps": 1647, "loss": 0.352, "lr": 9.272727272727273e-06, "epoch": 0.0935459377372817, "percentage": 9.35, "elapsed_time": "1:50:48", "remaining_time": "17:54:13"} +{"current_steps": 155, "total_steps": 1647, "loss": 0.3938, "lr": 9.333333333333334e-06, "epoch": 0.09415337889141989, "percentage": 9.41, "elapsed_time": "1:51:30", "remaining_time": "17:53:19"} +{"current_steps": 156, "total_steps": 1647, "loss": 0.3722, "lr": 9.393939393939396e-06, "epoch": 0.09476082004555808, "percentage": 9.47, "elapsed_time": "1:52:15", "remaining_time": "17:52:53"} +{"current_steps": 157, "total_steps": 1647, "loss": 0.4142, "lr": 9.454545454545456e-06, "epoch": 0.09536826119969628, "percentage": 9.53, "elapsed_time": "1:52:57", "remaining_time": "17:51:58"} +{"current_steps": 158, "total_steps": 1647, "loss": 0.3723, "lr": 9.515151515151516e-06, "epoch": 0.09597570235383447, "percentage": 9.59, "elapsed_time": "1:53:36", "remaining_time": "17:50:36"} +{"current_steps": 159, "total_steps": 1647, "loss": 0.4198, "lr": 9.575757575757576e-06, "epoch": 0.09658314350797266, "percentage": 9.65, "elapsed_time": "1:54:21", "remaining_time": "17:50:17"} +{"current_steps": 160, "total_steps": 1647, "loss": 0.3832, "lr": 9.636363636363638e-06, "epoch": 0.09719058466211086, "percentage": 9.71, "elapsed_time": "1:55:06", "remaining_time": "17:49:45"} +{"current_steps": 161, "total_steps": 1647, "loss": 0.3825, "lr": 9.696969696969698e-06, "epoch": 0.09779802581624905, "percentage": 9.78, "elapsed_time": "1:55:49", "remaining_time": "17:48:58"} +{"current_steps": 162, "total_steps": 1647, "loss": 0.3418, "lr": 9.757575757575758e-06, "epoch": 0.09840546697038724, "percentage": 9.84, "elapsed_time": "1:56:27", "remaining_time": "17:47:31"} +{"current_steps": 163, "total_steps": 1647, "loss": 0.3554, "lr": 9.81818181818182e-06, "epoch": 0.09901290812452544, "percentage": 9.9, "elapsed_time": "1:57:08", "remaining_time": "17:46:32"} +{"current_steps": 164, "total_steps": 1647, "loss": 0.4433, "lr": 9.87878787878788e-06, "epoch": 0.09962034927866363, "percentage": 9.96, "elapsed_time": "1:57:50", "remaining_time": "17:45:33"} +{"current_steps": 165, "total_steps": 1647, "loss": 0.3988, "lr": 9.939393939393939e-06, "epoch": 0.10022779043280182, "percentage": 10.02, "elapsed_time": "1:58:28", "remaining_time": "17:44:05"} +{"current_steps": 166, "total_steps": 1647, "loss": 0.4318, "lr": 1e-05, "epoch": 0.10083523158694002, "percentage": 10.08, "elapsed_time": "1:59:13", "remaining_time": "17:43:45"} +{"current_steps": 167, "total_steps": 1647, "loss": 0.3901, "lr": 9.999988765773283e-06, "epoch": 0.10144267274107821, "percentage": 10.14, "elapsed_time": "1:59:56", "remaining_time": "17:42:55"} +{"current_steps": 168, "total_steps": 1647, "loss": 0.3733, "lr": 9.99995506314361e-06, "epoch": 0.1020501138952164, "percentage": 10.2, "elapsed_time": "2:00:37", "remaining_time": "17:41:58"} +{"current_steps": 169, "total_steps": 1647, "loss": 0.3702, "lr": 9.999898892262433e-06, "epoch": 0.10265755504935459, "percentage": 10.26, "elapsed_time": "2:01:23", "remaining_time": "17:41:36"} +{"current_steps": 170, "total_steps": 1647, "loss": 0.3925, "lr": 9.99982025338217e-06, "epoch": 0.10326499620349279, "percentage": 10.32, "elapsed_time": "2:02:10", "remaining_time": "17:41:25"} +{"current_steps": 171, "total_steps": 1647, "loss": 0.3601, "lr": 9.999719146856191e-06, "epoch": 0.10387243735763098, "percentage": 10.38, "elapsed_time": "2:02:54", "remaining_time": "17:40:49"} +{"current_steps": 172, "total_steps": 1647, "loss": 0.3676, "lr": 9.999595573138845e-06, "epoch": 0.10447987851176917, "percentage": 10.44, "elapsed_time": "2:03:36", "remaining_time": "17:40:02"} +{"current_steps": 173, "total_steps": 1647, "loss": 0.3896, "lr": 9.99944953278543e-06, "epoch": 0.10508731966590737, "percentage": 10.5, "elapsed_time": "2:04:23", "remaining_time": "17:39:51"} +{"current_steps": 174, "total_steps": 1647, "loss": 0.3931, "lr": 9.99928102645221e-06, "epoch": 0.10569476082004556, "percentage": 10.56, "elapsed_time": "2:05:05", "remaining_time": "17:38:59"} +{"current_steps": 175, "total_steps": 1647, "loss": 0.4084, "lr": 9.999090054896397e-06, "epoch": 0.10630220197418375, "percentage": 10.63, "elapsed_time": "2:05:47", "remaining_time": "17:38:06"} +{"current_steps": 176, "total_steps": 1647, "loss": 0.4019, "lr": 9.99887661897616e-06, "epoch": 0.10690964312832195, "percentage": 10.69, "elapsed_time": "2:06:30", "remaining_time": "17:37:24"} +{"current_steps": 177, "total_steps": 1647, "loss": 0.353, "lr": 9.998640719650609e-06, "epoch": 0.10751708428246014, "percentage": 10.75, "elapsed_time": "2:07:12", "remaining_time": "17:36:29"} +{"current_steps": 178, "total_steps": 1647, "loss": 0.3934, "lr": 9.99838235797981e-06, "epoch": 0.10812452543659833, "percentage": 10.81, "elapsed_time": "2:07:57", "remaining_time": "17:36:03"} +{"current_steps": 179, "total_steps": 1647, "loss": 0.3784, "lr": 9.998101535124758e-06, "epoch": 0.10873196659073653, "percentage": 10.87, "elapsed_time": "2:08:39", "remaining_time": "17:35:05"} +{"current_steps": 180, "total_steps": 1647, "loss": 0.3829, "lr": 9.997798252347382e-06, "epoch": 0.10933940774487472, "percentage": 10.93, "elapsed_time": "2:09:22", "remaining_time": "17:34:24"} +{"current_steps": 181, "total_steps": 1647, "loss": 0.3468, "lr": 9.997472511010543e-06, "epoch": 0.1099468488990129, "percentage": 10.99, "elapsed_time": "2:10:07", "remaining_time": "17:33:54"} +{"current_steps": 182, "total_steps": 1647, "loss": 0.3942, "lr": 9.99712431257802e-06, "epoch": 0.1105542900531511, "percentage": 11.05, "elapsed_time": "2:10:46", "remaining_time": "17:32:40"} +{"current_steps": 183, "total_steps": 1647, "loss": 0.3493, "lr": 9.99675365861451e-06, "epoch": 0.1111617312072893, "percentage": 11.11, "elapsed_time": "2:11:27", "remaining_time": "17:31:38"} +{"current_steps": 184, "total_steps": 1647, "loss": 0.3748, "lr": 9.996360550785619e-06, "epoch": 0.11176917236142749, "percentage": 11.17, "elapsed_time": "2:12:13", "remaining_time": "17:31:17"} +{"current_steps": 185, "total_steps": 1647, "loss": 0.3929, "lr": 9.995944990857848e-06, "epoch": 0.11237661351556567, "percentage": 11.23, "elapsed_time": "2:12:55", "remaining_time": "17:30:24"} +{"current_steps": 186, "total_steps": 1647, "loss": 0.3832, "lr": 9.9955069806986e-06, "epoch": 0.11298405466970388, "percentage": 11.29, "elapsed_time": "2:13:37", "remaining_time": "17:29:35"} +{"current_steps": 187, "total_steps": 1647, "loss": 0.3726, "lr": 9.995046522276152e-06, "epoch": 0.11359149582384206, "percentage": 11.35, "elapsed_time": "2:14:24", "remaining_time": "17:29:20"} +{"current_steps": 188, "total_steps": 1647, "loss": 0.4186, "lr": 9.994563617659665e-06, "epoch": 0.11419893697798025, "percentage": 11.41, "elapsed_time": "2:15:04", "remaining_time": "17:28:12"} +{"current_steps": 189, "total_steps": 1647, "loss": 0.3649, "lr": 9.994058269019163e-06, "epoch": 0.11480637813211846, "percentage": 11.48, "elapsed_time": "2:15:46", "remaining_time": "17:27:22"} +{"current_steps": 190, "total_steps": 1647, "loss": 0.3889, "lr": 9.993530478625524e-06, "epoch": 0.11541381928625664, "percentage": 11.54, "elapsed_time": "2:16:28", "remaining_time": "17:26:34"} +{"current_steps": 191, "total_steps": 1647, "loss": 0.3505, "lr": 9.992980248850476e-06, "epoch": 0.11602126044039483, "percentage": 11.6, "elapsed_time": "2:17:10", "remaining_time": "17:25:43"} +{"current_steps": 192, "total_steps": 1647, "loss": 0.3716, "lr": 9.992407582166582e-06, "epoch": 0.11662870159453304, "percentage": 11.66, "elapsed_time": "2:17:53", "remaining_time": "17:24:57"} +{"current_steps": 193, "total_steps": 1647, "loss": 0.353, "lr": 9.99181248114723e-06, "epoch": 0.11723614274867122, "percentage": 11.72, "elapsed_time": "2:18:41", "remaining_time": "17:24:49"} +{"current_steps": 194, "total_steps": 1647, "loss": 0.3212, "lr": 9.991194948466615e-06, "epoch": 0.11784358390280941, "percentage": 11.78, "elapsed_time": "2:19:24", "remaining_time": "17:24:07"} +{"current_steps": 195, "total_steps": 1647, "loss": 0.3383, "lr": 9.990554986899745e-06, "epoch": 0.11845102505694761, "percentage": 11.84, "elapsed_time": "2:20:09", "remaining_time": "17:23:39"} +{"current_steps": 196, "total_steps": 1647, "loss": 0.4176, "lr": 9.989892599322404e-06, "epoch": 0.1190584662110858, "percentage": 11.9, "elapsed_time": "2:20:50", "remaining_time": "17:22:41"} +{"current_steps": 197, "total_steps": 1647, "loss": 0.3655, "lr": 9.98920778871116e-06, "epoch": 0.11966590736522399, "percentage": 11.96, "elapsed_time": "2:21:36", "remaining_time": "17:22:16"} +{"current_steps": 198, "total_steps": 1647, "loss": 0.3844, "lr": 9.988500558143337e-06, "epoch": 0.12027334851936218, "percentage": 12.02, "elapsed_time": "2:22:20", "remaining_time": "17:21:44"} +{"current_steps": 199, "total_steps": 1647, "loss": 0.4128, "lr": 9.987770910797014e-06, "epoch": 0.12088078967350038, "percentage": 12.08, "elapsed_time": "2:23:06", "remaining_time": "17:21:16"} +{"current_steps": 200, "total_steps": 1647, "loss": 0.3962, "lr": 9.987018849950996e-06, "epoch": 0.12148823082763857, "percentage": 12.14, "elapsed_time": "2:23:47", "remaining_time": "17:20:19"} +{"current_steps": 201, "total_steps": 1647, "loss": 0.3691, "lr": 9.986244378984817e-06, "epoch": 0.12209567198177676, "percentage": 12.2, "elapsed_time": "2:24:27", "remaining_time": "17:19:16"} +{"current_steps": 202, "total_steps": 1647, "loss": 0.3566, "lr": 9.985447501378706e-06, "epoch": 0.12270311313591496, "percentage": 12.26, "elapsed_time": "2:25:10", "remaining_time": "17:18:32"} +{"current_steps": 203, "total_steps": 1647, "loss": 0.33, "lr": 9.984628220713587e-06, "epoch": 0.12331055429005315, "percentage": 12.33, "elapsed_time": "2:25:57", "remaining_time": "17:18:16"} +{"current_steps": 204, "total_steps": 1647, "loss": 0.3745, "lr": 9.983786540671052e-06, "epoch": 0.12391799544419134, "percentage": 12.39, "elapsed_time": "2:26:41", "remaining_time": "17:17:37"} +{"current_steps": 205, "total_steps": 1647, "loss": 0.3863, "lr": 9.98292246503335e-06, "epoch": 0.12452543659832954, "percentage": 12.45, "elapsed_time": "2:27:23", "remaining_time": "17:16:44"} +{"current_steps": 206, "total_steps": 1647, "loss": 0.3449, "lr": 9.982035997683372e-06, "epoch": 0.12513287775246773, "percentage": 12.51, "elapsed_time": "2:28:05", "remaining_time": "17:15:54"} +{"current_steps": 207, "total_steps": 1647, "loss": 0.3553, "lr": 9.981127142604628e-06, "epoch": 0.12574031890660592, "percentage": 12.57, "elapsed_time": "2:28:46", "remaining_time": "17:14:59"} +{"current_steps": 208, "total_steps": 1647, "loss": 0.3929, "lr": 9.980195903881231e-06, "epoch": 0.1263477600607441, "percentage": 12.63, "elapsed_time": "2:29:28", "remaining_time": "17:14:09"} +{"current_steps": 209, "total_steps": 1647, "loss": 0.3894, "lr": 9.979242285697878e-06, "epoch": 0.1269552012148823, "percentage": 12.69, "elapsed_time": "2:30:14", "remaining_time": "17:13:42"} +{"current_steps": 210, "total_steps": 1647, "loss": 0.3462, "lr": 9.978266292339838e-06, "epoch": 0.1275626423690205, "percentage": 12.75, "elapsed_time": "2:31:00", "remaining_time": "17:13:22"} +{"current_steps": 211, "total_steps": 1647, "loss": 0.3756, "lr": 9.97726792819292e-06, "epoch": 0.1281700835231587, "percentage": 12.81, "elapsed_time": "2:31:44", "remaining_time": "17:12:43"} +{"current_steps": 212, "total_steps": 1647, "loss": 0.3517, "lr": 9.976247197743465e-06, "epoch": 0.1287775246772969, "percentage": 12.87, "elapsed_time": "2:32:26", "remaining_time": "17:11:54"} +{"current_steps": 213, "total_steps": 1647, "loss": 0.3524, "lr": 9.975204105578318e-06, "epoch": 0.12938496583143508, "percentage": 12.93, "elapsed_time": "2:33:12", "remaining_time": "17:11:28"} +{"current_steps": 214, "total_steps": 1647, "loss": 0.3703, "lr": 9.974138656384815e-06, "epoch": 0.12999240698557327, "percentage": 12.99, "elapsed_time": "2:33:56", "remaining_time": "17:10:47"} +{"current_steps": 215, "total_steps": 1647, "loss": 0.4107, "lr": 9.973050854950756e-06, "epoch": 0.13059984813971146, "percentage": 13.05, "elapsed_time": "2:34:39", "remaining_time": "17:10:06"} +{"current_steps": 216, "total_steps": 1647, "loss": 0.3704, "lr": 9.97194070616438e-06, "epoch": 0.13120728929384967, "percentage": 13.11, "elapsed_time": "2:35:23", "remaining_time": "17:09:28"} +{"current_steps": 217, "total_steps": 1647, "loss": 0.3616, "lr": 9.970808215014357e-06, "epoch": 0.13181473044798786, "percentage": 13.18, "elapsed_time": "2:36:06", "remaining_time": "17:08:43"} +{"current_steps": 218, "total_steps": 1647, "loss": 0.3476, "lr": 9.969653386589749e-06, "epoch": 0.13242217160212605, "percentage": 13.24, "elapsed_time": "2:36:47", "remaining_time": "17:07:45"} +{"current_steps": 219, "total_steps": 1647, "loss": 0.3658, "lr": 9.968476226079997e-06, "epoch": 0.13302961275626424, "percentage": 13.3, "elapsed_time": "2:37:30", "remaining_time": "17:07:04"} +{"current_steps": 220, "total_steps": 1647, "loss": 0.3559, "lr": 9.967276738774897e-06, "epoch": 0.13363705391040243, "percentage": 13.36, "elapsed_time": "2:38:14", "remaining_time": "17:06:27"} +{"current_steps": 221, "total_steps": 1647, "loss": 0.3464, "lr": 9.966054930064577e-06, "epoch": 0.13424449506454061, "percentage": 13.42, "elapsed_time": "2:39:00", "remaining_time": "17:05:58"} +{"current_steps": 222, "total_steps": 1647, "loss": 0.3835, "lr": 9.964810805439464e-06, "epoch": 0.1348519362186788, "percentage": 13.48, "elapsed_time": "2:39:41", "remaining_time": "17:05:05"} +{"current_steps": 223, "total_steps": 1647, "loss": 0.3649, "lr": 9.96354437049027e-06, "epoch": 0.13545937737281702, "percentage": 13.54, "elapsed_time": "2:40:28", "remaining_time": "17:04:45"} +{"current_steps": 224, "total_steps": 1647, "loss": 0.306, "lr": 9.962255630907964e-06, "epoch": 0.1360668185269552, "percentage": 13.6, "elapsed_time": "2:41:11", "remaining_time": "17:03:59"} +{"current_steps": 225, "total_steps": 1647, "loss": 0.4094, "lr": 9.96094459248374e-06, "epoch": 0.1366742596810934, "percentage": 13.66, "elapsed_time": "2:41:56", "remaining_time": "17:03:27"} +{"current_steps": 226, "total_steps": 1647, "loss": 0.3601, "lr": 9.959611261108999e-06, "epoch": 0.13728170083523158, "percentage": 13.72, "elapsed_time": "2:42:39", "remaining_time": "17:02:41"} +{"current_steps": 227, "total_steps": 1647, "loss": 0.3532, "lr": 9.95825564277532e-06, "epoch": 0.13788914198936977, "percentage": 13.78, "elapsed_time": "2:43:23", "remaining_time": "17:02:08"} +{"current_steps": 228, "total_steps": 1647, "loss": 0.3384, "lr": 9.956877743574437e-06, "epoch": 0.13849658314350796, "percentage": 13.84, "elapsed_time": "2:44:04", "remaining_time": "17:01:08"} +{"current_steps": 229, "total_steps": 1647, "loss": 0.3367, "lr": 9.955477569698197e-06, "epoch": 0.13910402429764618, "percentage": 13.9, "elapsed_time": "2:44:46", "remaining_time": "17:00:21"} +{"current_steps": 230, "total_steps": 1647, "loss": 0.3673, "lr": 9.954055127438554e-06, "epoch": 0.13971146545178437, "percentage": 13.96, "elapsed_time": "2:45:28", "remaining_time": "16:59:28"} +{"current_steps": 231, "total_steps": 1647, "loss": 0.4095, "lr": 9.952610423187516e-06, "epoch": 0.14031890660592256, "percentage": 14.03, "elapsed_time": "2:46:11", "remaining_time": "16:58:43"} +{"current_steps": 232, "total_steps": 1647, "loss": 0.3503, "lr": 9.951143463437145e-06, "epoch": 0.14092634776006074, "percentage": 14.09, "elapsed_time": "2:46:54", "remaining_time": "16:58:01"} +{"current_steps": 233, "total_steps": 1647, "loss": 0.3897, "lr": 9.949654254779499e-06, "epoch": 0.14153378891419893, "percentage": 14.15, "elapsed_time": "2:47:37", "remaining_time": "16:57:18"} +{"current_steps": 234, "total_steps": 1647, "loss": 0.3596, "lr": 9.948142803906623e-06, "epoch": 0.14214123006833712, "percentage": 14.21, "elapsed_time": "2:48:23", "remaining_time": "16:56:51"} +{"current_steps": 235, "total_steps": 1647, "loss": 0.3311, "lr": 9.946609117610508e-06, "epoch": 0.1427486712224753, "percentage": 14.27, "elapsed_time": "2:49:06", "remaining_time": "16:56:06"} +{"current_steps": 236, "total_steps": 1647, "loss": 0.3417, "lr": 9.94505320278307e-06, "epoch": 0.14335611237661353, "percentage": 14.33, "elapsed_time": "2:49:46", "remaining_time": "16:55:02"} +{"current_steps": 237, "total_steps": 1647, "loss": 0.3246, "lr": 9.943475066416105e-06, "epoch": 0.14396355353075171, "percentage": 14.39, "elapsed_time": "2:50:27", "remaining_time": "16:54:08"} +{"current_steps": 238, "total_steps": 1647, "loss": 0.336, "lr": 9.94187471560127e-06, "epoch": 0.1445709946848899, "percentage": 14.45, "elapsed_time": "2:51:16", "remaining_time": "16:53:56"} +{"current_steps": 239, "total_steps": 1647, "loss": 0.3728, "lr": 9.940252157530048e-06, "epoch": 0.1451784358390281, "percentage": 14.51, "elapsed_time": "2:51:59", "remaining_time": "16:53:15"} +{"current_steps": 240, "total_steps": 1647, "loss": 0.3349, "lr": 9.938607399493714e-06, "epoch": 0.14578587699316628, "percentage": 14.57, "elapsed_time": "2:52:44", "remaining_time": "16:52:42"} +{"current_steps": 241, "total_steps": 1647, "loss": 0.3732, "lr": 9.936940448883299e-06, "epoch": 0.14639331814730447, "percentage": 14.63, "elapsed_time": "2:53:31", "remaining_time": "16:52:19"} +{"current_steps": 242, "total_steps": 1647, "loss": 0.3614, "lr": 9.935251313189564e-06, "epoch": 0.14700075930144268, "percentage": 14.69, "elapsed_time": "2:54:18", "remaining_time": "16:52:01"} +{"current_steps": 243, "total_steps": 1647, "loss": 0.3495, "lr": 9.933540000002966e-06, "epoch": 0.14760820045558087, "percentage": 14.75, "elapsed_time": "2:55:05", "remaining_time": "16:51:41"} +{"current_steps": 244, "total_steps": 1647, "loss": 0.3846, "lr": 9.931806517013612e-06, "epoch": 0.14821564160971906, "percentage": 14.81, "elapsed_time": "2:55:48", "remaining_time": "16:50:56"} +{"current_steps": 245, "total_steps": 1647, "loss": 0.3927, "lr": 9.930050872011242e-06, "epoch": 0.14882308276385725, "percentage": 14.88, "elapsed_time": "2:56:30", "remaining_time": "16:50:05"} +{"current_steps": 246, "total_steps": 1647, "loss": 0.347, "lr": 9.92827307288518e-06, "epoch": 0.14943052391799544, "percentage": 14.94, "elapsed_time": "2:57:11", "remaining_time": "16:49:10"} +{"current_steps": 247, "total_steps": 1647, "loss": 0.3099, "lr": 9.926473127624306e-06, "epoch": 0.15003796507213363, "percentage": 15.0, "elapsed_time": "2:57:59", "remaining_time": "16:48:49"} +{"current_steps": 248, "total_steps": 1647, "loss": 0.3476, "lr": 9.924651044317017e-06, "epoch": 0.15064540622627182, "percentage": 15.06, "elapsed_time": "2:58:40", "remaining_time": "16:47:58"} +{"current_steps": 249, "total_steps": 1647, "loss": 0.3829, "lr": 9.922806831151192e-06, "epoch": 0.15125284738041003, "percentage": 15.12, "elapsed_time": "2:59:21", "remaining_time": "16:47:00"} +{"current_steps": 250, "total_steps": 1647, "loss": 0.3414, "lr": 9.920940496414153e-06, "epoch": 0.15186028853454822, "percentage": 15.18, "elapsed_time": "3:00:03", "remaining_time": "16:46:12"} +{"current_steps": 251, "total_steps": 1647, "loss": 0.329, "lr": 9.919052048492633e-06, "epoch": 0.1524677296886864, "percentage": 15.24, "elapsed_time": "3:00:45", "remaining_time": "16:45:20"} +{"current_steps": 252, "total_steps": 1647, "loss": 0.3112, "lr": 9.917141495872733e-06, "epoch": 0.1530751708428246, "percentage": 15.3, "elapsed_time": "3:01:35", "remaining_time": "16:45:16"} +{"current_steps": 253, "total_steps": 1647, "loss": 0.3576, "lr": 9.915208847139883e-06, "epoch": 0.1536826119969628, "percentage": 15.36, "elapsed_time": "3:02:20", "remaining_time": "16:44:42"} +{"current_steps": 254, "total_steps": 1647, "loss": 0.3669, "lr": 9.913254110978812e-06, "epoch": 0.15429005315110098, "percentage": 15.42, "elapsed_time": "3:03:03", "remaining_time": "16:43:54"} +{"current_steps": 255, "total_steps": 1647, "loss": 0.3572, "lr": 9.911277296173498e-06, "epoch": 0.1548974943052392, "percentage": 15.48, "elapsed_time": "3:03:46", "remaining_time": "16:43:10"} +{"current_steps": 256, "total_steps": 1647, "loss": 0.3432, "lr": 9.909278411607134e-06, "epoch": 0.15550493545937738, "percentage": 15.54, "elapsed_time": "3:04:26", "remaining_time": "16:42:12"} +{"current_steps": 257, "total_steps": 1647, "loss": 0.3096, "lr": 9.90725746626209e-06, "epoch": 0.15611237661351557, "percentage": 15.6, "elapsed_time": "3:05:11", "remaining_time": "16:41:38"} +{"current_steps": 258, "total_steps": 1647, "loss": 0.3067, "lr": 9.90521446921987e-06, "epoch": 0.15671981776765376, "percentage": 15.66, "elapsed_time": "3:05:54", "remaining_time": "16:40:50"} +{"current_steps": 259, "total_steps": 1647, "loss": 0.3666, "lr": 9.903149429661072e-06, "epoch": 0.15732725892179195, "percentage": 15.73, "elapsed_time": "3:06:33", "remaining_time": "16:39:48"} +{"current_steps": 260, "total_steps": 1647, "loss": 0.3506, "lr": 9.90106235686534e-06, "epoch": 0.15793470007593013, "percentage": 15.79, "elapsed_time": "3:07:18", "remaining_time": "16:39:14"} +{"current_steps": 261, "total_steps": 1647, "loss": 0.3249, "lr": 9.89895326021134e-06, "epoch": 0.15854214123006835, "percentage": 15.85, "elapsed_time": "3:07:59", "remaining_time": "16:38:20"} +{"current_steps": 262, "total_steps": 1647, "loss": 0.3318, "lr": 9.896822149176695e-06, "epoch": 0.15914958238420654, "percentage": 15.91, "elapsed_time": "3:08:44", "remaining_time": "16:37:44"} +{"current_steps": 263, "total_steps": 1647, "loss": 0.396, "lr": 9.894669033337962e-06, "epoch": 0.15975702353834473, "percentage": 15.97, "elapsed_time": "3:09:26", "remaining_time": "16:36:55"} +{"current_steps": 264, "total_steps": 1647, "loss": 0.3188, "lr": 9.892493922370575e-06, "epoch": 0.16036446469248292, "percentage": 16.03, "elapsed_time": "3:10:10", "remaining_time": "16:36:13"} +{"current_steps": 265, "total_steps": 1647, "loss": 0.3379, "lr": 9.89029682604881e-06, "epoch": 0.1609719058466211, "percentage": 16.09, "elapsed_time": "3:10:53", "remaining_time": "16:35:32"} +{"current_steps": 266, "total_steps": 1647, "loss": 0.3493, "lr": 9.888077754245741e-06, "epoch": 0.1615793470007593, "percentage": 16.15, "elapsed_time": "3:11:33", "remaining_time": "16:34:32"} +{"current_steps": 267, "total_steps": 1647, "loss": 0.3608, "lr": 9.88583671693319e-06, "epoch": 0.16218678815489748, "percentage": 16.21, "elapsed_time": "3:12:17", "remaining_time": "16:33:50"} +{"current_steps": 268, "total_steps": 1647, "loss": 0.3795, "lr": 9.883573724181683e-06, "epoch": 0.1627942293090357, "percentage": 16.27, "elapsed_time": "3:13:02", "remaining_time": "16:33:16"} +{"current_steps": 269, "total_steps": 1647, "loss": 0.3669, "lr": 9.881288786160413e-06, "epoch": 0.1634016704631739, "percentage": 16.33, "elapsed_time": "3:13:46", "remaining_time": "16:32:39"} +{"current_steps": 270, "total_steps": 1647, "loss": 0.3045, "lr": 9.878981913137178e-06, "epoch": 0.16400911161731208, "percentage": 16.39, "elapsed_time": "3:14:30", "remaining_time": "16:32:01"} +{"current_steps": 271, "total_steps": 1647, "loss": 0.3748, "lr": 9.87665311547836e-06, "epoch": 0.16461655277145026, "percentage": 16.45, "elapsed_time": "3:15:17", "remaining_time": "16:31:35"} +{"current_steps": 272, "total_steps": 1647, "loss": 0.317, "lr": 9.87430240364885e-06, "epoch": 0.16522399392558845, "percentage": 16.51, "elapsed_time": "3:15:57", "remaining_time": "16:30:34"} +{"current_steps": 273, "total_steps": 1647, "loss": 0.3444, "lr": 9.871929788212022e-06, "epoch": 0.16583143507972664, "percentage": 16.58, "elapsed_time": "3:16:39", "remaining_time": "16:29:45"} +{"current_steps": 274, "total_steps": 1647, "loss": 0.3606, "lr": 9.869535279829674e-06, "epoch": 0.16643887623386486, "percentage": 16.64, "elapsed_time": "3:17:18", "remaining_time": "16:28:43"} +{"current_steps": 275, "total_steps": 1647, "loss": 0.3473, "lr": 9.867118889261988e-06, "epoch": 0.16704631738800305, "percentage": 16.7, "elapsed_time": "3:17:59", "remaining_time": "16:27:46"} +{"current_steps": 276, "total_steps": 1647, "loss": 0.3278, "lr": 9.864680627367476e-06, "epoch": 0.16765375854214123, "percentage": 16.76, "elapsed_time": "3:18:38", "remaining_time": "16:26:44"} +{"current_steps": 277, "total_steps": 1647, "loss": 0.3521, "lr": 9.862220505102933e-06, "epoch": 0.16826119969627942, "percentage": 16.82, "elapsed_time": "3:19:22", "remaining_time": "16:26:05"} +{"current_steps": 278, "total_steps": 1647, "loss": 0.319, "lr": 9.859738533523384e-06, "epoch": 0.1688686408504176, "percentage": 16.88, "elapsed_time": "3:20:04", "remaining_time": "16:25:15"} +{"current_steps": 279, "total_steps": 1647, "loss": 0.3352, "lr": 9.857234723782044e-06, "epoch": 0.1694760820045558, "percentage": 16.94, "elapsed_time": "3:20:43", "remaining_time": "16:24:10"} +{"current_steps": 280, "total_steps": 1647, "loss": 0.3139, "lr": 9.854709087130261e-06, "epoch": 0.170083523158694, "percentage": 17.0, "elapsed_time": "3:21:23", "remaining_time": "16:23:12"} +{"current_steps": 281, "total_steps": 1647, "loss": 0.3349, "lr": 9.852161634917463e-06, "epoch": 0.1706909643128322, "percentage": 17.06, "elapsed_time": "3:22:08", "remaining_time": "16:22:37"} +{"current_steps": 282, "total_steps": 1647, "loss": 0.3077, "lr": 9.849592378591113e-06, "epoch": 0.1712984054669704, "percentage": 17.12, "elapsed_time": "3:22:53", "remaining_time": "16:22:04"} +{"current_steps": 283, "total_steps": 1647, "loss": 0.3069, "lr": 9.847001329696653e-06, "epoch": 0.17190584662110858, "percentage": 17.18, "elapsed_time": "3:23:38", "remaining_time": "16:21:29"} +{"current_steps": 284, "total_steps": 1647, "loss": 0.3291, "lr": 9.844388499877457e-06, "epoch": 0.17251328777524677, "percentage": 17.24, "elapsed_time": "3:24:18", "remaining_time": "16:20:34"} +{"current_steps": 285, "total_steps": 1647, "loss": 0.3289, "lr": 9.841753900874774e-06, "epoch": 0.17312072892938496, "percentage": 17.3, "elapsed_time": "3:25:03", "remaining_time": "16:19:59"} +{"current_steps": 286, "total_steps": 1647, "loss": 0.3267, "lr": 9.839097544527674e-06, "epoch": 0.17372817008352315, "percentage": 17.36, "elapsed_time": "3:25:45", "remaining_time": "16:19:09"} +{"current_steps": 287, "total_steps": 1647, "loss": 0.3443, "lr": 9.836419442773004e-06, "epoch": 0.17433561123766136, "percentage": 17.43, "elapsed_time": "3:26:29", "remaining_time": "16:18:31"} +{"current_steps": 288, "total_steps": 1647, "loss": 0.3241, "lr": 9.833719607645325e-06, "epoch": 0.17494305239179955, "percentage": 17.49, "elapsed_time": "3:27:09", "remaining_time": "16:17:31"} +{"current_steps": 289, "total_steps": 1647, "loss": 0.3541, "lr": 9.830998051276858e-06, "epoch": 0.17555049354593774, "percentage": 17.55, "elapsed_time": "3:27:49", "remaining_time": "16:16:35"} +{"current_steps": 290, "total_steps": 1647, "loss": 0.3666, "lr": 9.82825478589744e-06, "epoch": 0.17615793470007593, "percentage": 17.61, "elapsed_time": "3:28:36", "remaining_time": "16:16:06"} +{"current_steps": 291, "total_steps": 1647, "loss": 0.3162, "lr": 9.825489823834454e-06, "epoch": 0.17676537585421412, "percentage": 17.67, "elapsed_time": "3:29:16", "remaining_time": "16:15:11"} +{"current_steps": 292, "total_steps": 1647, "loss": 0.3281, "lr": 9.822703177512783e-06, "epoch": 0.1773728170083523, "percentage": 17.73, "elapsed_time": "3:29:58", "remaining_time": "16:14:21"} +{"current_steps": 293, "total_steps": 1647, "loss": 0.2902, "lr": 9.819894859454756e-06, "epoch": 0.1779802581624905, "percentage": 17.79, "elapsed_time": "3:30:38", "remaining_time": "16:13:25"} +{"current_steps": 294, "total_steps": 1647, "loss": 0.3872, "lr": 9.817064882280085e-06, "epoch": 0.1785876993166287, "percentage": 17.85, "elapsed_time": "3:31:22", "remaining_time": "16:12:43"} +{"current_steps": 295, "total_steps": 1647, "loss": 0.4009, "lr": 9.814213258705813e-06, "epoch": 0.1791951404707669, "percentage": 17.91, "elapsed_time": "3:32:06", "remaining_time": "16:12:04"} +{"current_steps": 296, "total_steps": 1647, "loss": 0.335, "lr": 9.811340001546252e-06, "epoch": 0.1798025816249051, "percentage": 17.97, "elapsed_time": "3:32:46", "remaining_time": "16:11:08"} +{"current_steps": 297, "total_steps": 1647, "loss": 0.3789, "lr": 9.808445123712934e-06, "epoch": 0.18041002277904328, "percentage": 18.03, "elapsed_time": "3:33:28", "remaining_time": "16:10:19"} +{"current_steps": 298, "total_steps": 1647, "loss": 0.365, "lr": 9.805528638214543e-06, "epoch": 0.18101746393318147, "percentage": 18.09, "elapsed_time": "3:34:14", "remaining_time": "16:09:52"} +{"current_steps": 299, "total_steps": 1647, "loss": 0.3267, "lr": 9.802590558156863e-06, "epoch": 0.18162490508731965, "percentage": 18.15, "elapsed_time": "3:35:01", "remaining_time": "16:09:23"} +{"current_steps": 300, "total_steps": 1647, "loss": 0.3258, "lr": 9.799630896742716e-06, "epoch": 0.18223234624145787, "percentage": 18.21, "elapsed_time": "3:35:45", "remaining_time": "16:08:45"} +{"current_steps": 301, "total_steps": 1647, "loss": 0.3588, "lr": 9.796649667271905e-06, "epoch": 0.18283978739559606, "percentage": 18.28, "elapsed_time": "3:36:32", "remaining_time": "16:08:21"} +{"current_steps": 302, "total_steps": 1647, "loss": 0.32, "lr": 9.793646883141155e-06, "epoch": 0.18344722854973425, "percentage": 18.34, "elapsed_time": "3:37:16", "remaining_time": "16:07:40"} +{"current_steps": 303, "total_steps": 1647, "loss": 0.3561, "lr": 9.790622557844047e-06, "epoch": 0.18405466970387244, "percentage": 18.4, "elapsed_time": "3:37:59", "remaining_time": "16:06:56"} +{"current_steps": 304, "total_steps": 1647, "loss": 0.343, "lr": 9.787576704970965e-06, "epoch": 0.18466211085801063, "percentage": 18.46, "elapsed_time": "3:38:41", "remaining_time": "16:06:05"} +{"current_steps": 305, "total_steps": 1647, "loss": 0.339, "lr": 9.784509338209026e-06, "epoch": 0.1852695520121488, "percentage": 18.52, "elapsed_time": "3:39:23", "remaining_time": "16:05:20"} +{"current_steps": 306, "total_steps": 1647, "loss": 0.3204, "lr": 9.781420471342035e-06, "epoch": 0.185876993166287, "percentage": 18.58, "elapsed_time": "3:40:09", "remaining_time": "16:04:47"} +{"current_steps": 307, "total_steps": 1647, "loss": 0.3598, "lr": 9.778310118250397e-06, "epoch": 0.18648443432042522, "percentage": 18.64, "elapsed_time": "3:40:52", "remaining_time": "16:04:02"} +{"current_steps": 308, "total_steps": 1647, "loss": 0.3397, "lr": 9.77517829291108e-06, "epoch": 0.1870918754745634, "percentage": 18.7, "elapsed_time": "3:41:36", "remaining_time": "16:03:26"} +{"current_steps": 309, "total_steps": 1647, "loss": 0.3291, "lr": 9.772025009397538e-06, "epoch": 0.1876993166287016, "percentage": 18.76, "elapsed_time": "3:42:22", "remaining_time": "16:02:53"} +{"current_steps": 310, "total_steps": 1647, "loss": 0.3297, "lr": 9.768850281879651e-06, "epoch": 0.18830675778283978, "percentage": 18.82, "elapsed_time": "3:43:05", "remaining_time": "16:02:09"} +{"current_steps": 311, "total_steps": 1647, "loss": 0.3317, "lr": 9.765654124623664e-06, "epoch": 0.18891419893697797, "percentage": 18.88, "elapsed_time": "3:43:47", "remaining_time": "16:01:20"} +{"current_steps": 312, "total_steps": 1647, "loss": 0.3545, "lr": 9.762436551992117e-06, "epoch": 0.18952164009111616, "percentage": 18.94, "elapsed_time": "3:44:35", "remaining_time": "16:00:59"} +{"current_steps": 313, "total_steps": 1647, "loss": 0.3282, "lr": 9.759197578443787e-06, "epoch": 0.19012908124525438, "percentage": 19.0, "elapsed_time": "3:45:16", "remaining_time": "16:00:06"} +{"current_steps": 314, "total_steps": 1647, "loss": 0.348, "lr": 9.755937218533622e-06, "epoch": 0.19073652239939257, "percentage": 19.06, "elapsed_time": "3:45:58", "remaining_time": "15:59:20"} +{"current_steps": 315, "total_steps": 1647, "loss": 0.3258, "lr": 9.752655486912666e-06, "epoch": 0.19134396355353075, "percentage": 19.13, "elapsed_time": "3:46:41", "remaining_time": "15:58:33"} +{"current_steps": 316, "total_steps": 1647, "loss": 0.3441, "lr": 9.74935239832801e-06, "epoch": 0.19195140470766894, "percentage": 19.19, "elapsed_time": "3:47:23", "remaining_time": "15:57:46"} +{"current_steps": 317, "total_steps": 1647, "loss": 0.3322, "lr": 9.746027967622709e-06, "epoch": 0.19255884586180713, "percentage": 19.25, "elapsed_time": "3:48:07", "remaining_time": "15:57:06"} +{"current_steps": 318, "total_steps": 1647, "loss": 0.3387, "lr": 9.742682209735727e-06, "epoch": 0.19316628701594532, "percentage": 19.31, "elapsed_time": "3:48:52", "remaining_time": "15:56:31"} +{"current_steps": 319, "total_steps": 1647, "loss": 0.3234, "lr": 9.739315139701868e-06, "epoch": 0.19377372817008354, "percentage": 19.37, "elapsed_time": "3:49:33", "remaining_time": "15:55:39"} +{"current_steps": 320, "total_steps": 1647, "loss": 0.3182, "lr": 9.735926772651703e-06, "epoch": 0.19438116932422173, "percentage": 19.43, "elapsed_time": "3:50:15", "remaining_time": "15:54:51"} +{"current_steps": 321, "total_steps": 1647, "loss": 0.3267, "lr": 9.732517123811502e-06, "epoch": 0.19498861047835991, "percentage": 19.49, "elapsed_time": "3:51:02", "remaining_time": "15:54:22"} +{"current_steps": 322, "total_steps": 1647, "loss": 0.3439, "lr": 9.729086208503174e-06, "epoch": 0.1955960516324981, "percentage": 19.55, "elapsed_time": "3:51:47", "remaining_time": "15:53:47"} +{"current_steps": 323, "total_steps": 1647, "loss": 0.3035, "lr": 9.725634042144192e-06, "epoch": 0.1962034927866363, "percentage": 19.61, "elapsed_time": "3:52:29", "remaining_time": "15:52:59"} +{"current_steps": 324, "total_steps": 1647, "loss": 0.3402, "lr": 9.722160640247523e-06, "epoch": 0.19681093394077448, "percentage": 19.67, "elapsed_time": "3:53:09", "remaining_time": "15:52:02"} +{"current_steps": 325, "total_steps": 1647, "loss": 0.3596, "lr": 9.71866601842156e-06, "epoch": 0.19741837509491267, "percentage": 19.73, "elapsed_time": "3:53:53", "remaining_time": "15:51:25"} +{"current_steps": 326, "total_steps": 1647, "loss": 0.3378, "lr": 9.715150192370054e-06, "epoch": 0.19802581624905088, "percentage": 19.79, "elapsed_time": "3:54:36", "remaining_time": "15:50:40"} +{"current_steps": 327, "total_steps": 1647, "loss": 0.312, "lr": 9.71161317789204e-06, "epoch": 0.19863325740318907, "percentage": 19.85, "elapsed_time": "3:55:24", "remaining_time": "15:50:14"} +{"current_steps": 328, "total_steps": 1647, "loss": 0.3028, "lr": 9.708054990881763e-06, "epoch": 0.19924069855732726, "percentage": 19.91, "elapsed_time": "3:56:07", "remaining_time": "15:49:33"} +{"current_steps": 329, "total_steps": 1647, "loss": 0.3161, "lr": 9.70447564732862e-06, "epoch": 0.19984813971146545, "percentage": 19.98, "elapsed_time": "3:56:49", "remaining_time": "15:48:45"} +{"current_steps": 330, "total_steps": 1647, "loss": 0.3159, "lr": 9.700875163317072e-06, "epoch": 0.20045558086560364, "percentage": 20.04, "elapsed_time": "3:57:33", "remaining_time": "15:48:03"} +{"current_steps": 331, "total_steps": 1647, "loss": 0.3555, "lr": 9.69725355502658e-06, "epoch": 0.20106302201974183, "percentage": 20.1, "elapsed_time": "3:58:16", "remaining_time": "15:47:21"} +{"current_steps": 332, "total_steps": 1647, "loss": 0.3256, "lr": 9.693610838731532e-06, "epoch": 0.20167046317388004, "percentage": 20.16, "elapsed_time": "3:58:56", "remaining_time": "15:46:24"} +{"current_steps": 333, "total_steps": 1647, "loss": 0.358, "lr": 9.689947030801168e-06, "epoch": 0.20227790432801823, "percentage": 20.22, "elapsed_time": "3:59:36", "remaining_time": "15:45:30"} +{"current_steps": 334, "total_steps": 1647, "loss": 0.3648, "lr": 9.686262147699507e-06, "epoch": 0.20288534548215642, "percentage": 20.28, "elapsed_time": "4:00:21", "remaining_time": "15:44:51"} +{"current_steps": 335, "total_steps": 1647, "loss": 0.3197, "lr": 9.682556205985274e-06, "epoch": 0.2034927866362946, "percentage": 20.34, "elapsed_time": "4:01:03", "remaining_time": "15:44:03"} +{"current_steps": 336, "total_steps": 1647, "loss": 0.304, "lr": 9.678829222311827e-06, "epoch": 0.2041002277904328, "percentage": 20.4, "elapsed_time": "4:01:47", "remaining_time": "15:43:26"} +{"current_steps": 337, "total_steps": 1647, "loss": 0.3282, "lr": 9.675081213427076e-06, "epoch": 0.204707668944571, "percentage": 20.46, "elapsed_time": "4:02:30", "remaining_time": "15:42:40"} +{"current_steps": 338, "total_steps": 1647, "loss": 0.328, "lr": 9.671312196173413e-06, "epoch": 0.20531511009870917, "percentage": 20.52, "elapsed_time": "4:03:09", "remaining_time": "15:41:40"} +{"current_steps": 339, "total_steps": 1647, "loss": 0.3352, "lr": 9.667522187487635e-06, "epoch": 0.2059225512528474, "percentage": 20.58, "elapsed_time": "4:03:51", "remaining_time": "15:40:53"} +{"current_steps": 340, "total_steps": 1647, "loss": 0.3575, "lr": 9.663711204400872e-06, "epoch": 0.20652999240698558, "percentage": 20.64, "elapsed_time": "4:04:35", "remaining_time": "15:40:12"} +{"current_steps": 341, "total_steps": 1647, "loss": 0.365, "lr": 9.659879264038499e-06, "epoch": 0.20713743356112377, "percentage": 20.7, "elapsed_time": "4:05:21", "remaining_time": "15:39:43"} +{"current_steps": 342, "total_steps": 1647, "loss": 0.3445, "lr": 9.656026383620076e-06, "epoch": 0.20774487471526196, "percentage": 20.77, "elapsed_time": "4:06:03", "remaining_time": "15:38:53"} +{"current_steps": 343, "total_steps": 1647, "loss": 0.2948, "lr": 9.65215258045925e-06, "epoch": 0.20835231586940015, "percentage": 20.83, "elapsed_time": "4:06:46", "remaining_time": "15:38:11"} +{"current_steps": 344, "total_steps": 1647, "loss": 0.3139, "lr": 9.6482578719637e-06, "epoch": 0.20895975702353833, "percentage": 20.89, "elapsed_time": "4:07:29", "remaining_time": "15:37:26"} +{"current_steps": 345, "total_steps": 1647, "loss": 0.3015, "lr": 9.644342275635036e-06, "epoch": 0.20956719817767655, "percentage": 20.95, "elapsed_time": "4:08:10", "remaining_time": "15:36:35"} +{"current_steps": 346, "total_steps": 1647, "loss": 0.3228, "lr": 9.640405809068743e-06, "epoch": 0.21017463933181474, "percentage": 21.01, "elapsed_time": "4:08:53", "remaining_time": "15:35:53"} +{"current_steps": 347, "total_steps": 1647, "loss": 0.307, "lr": 9.636448489954077e-06, "epoch": 0.21078208048595293, "percentage": 21.07, "elapsed_time": "4:09:37", "remaining_time": "15:35:12"} +{"current_steps": 348, "total_steps": 1647, "loss": 0.3284, "lr": 9.632470336074009e-06, "epoch": 0.21138952164009112, "percentage": 21.13, "elapsed_time": "4:10:22", "remaining_time": "15:34:33"} +{"current_steps": 349, "total_steps": 1647, "loss": 0.3437, "lr": 9.628471365305134e-06, "epoch": 0.2119969627942293, "percentage": 21.19, "elapsed_time": "4:11:07", "remaining_time": "15:33:57"} +{"current_steps": 350, "total_steps": 1647, "loss": 0.3185, "lr": 9.624451595617588e-06, "epoch": 0.2126044039483675, "percentage": 21.25, "elapsed_time": "4:11:47", "remaining_time": "15:33:03"} +{"current_steps": 351, "total_steps": 1647, "loss": 0.3626, "lr": 9.620411045074972e-06, "epoch": 0.21321184510250568, "percentage": 21.31, "elapsed_time": "4:12:30", "remaining_time": "15:32:20"} +{"current_steps": 352, "total_steps": 1647, "loss": 0.3225, "lr": 9.616349731834271e-06, "epoch": 0.2138192862566439, "percentage": 21.37, "elapsed_time": "4:13:16", "remaining_time": "15:31:48"} +{"current_steps": 353, "total_steps": 1647, "loss": 0.3534, "lr": 9.612267674145772e-06, "epoch": 0.2144267274107821, "percentage": 21.43, "elapsed_time": "4:14:00", "remaining_time": "15:31:06"} +{"current_steps": 354, "total_steps": 1647, "loss": 0.3459, "lr": 9.608164890352977e-06, "epoch": 0.21503416856492027, "percentage": 21.49, "elapsed_time": "4:14:40", "remaining_time": "15:30:12"} +{"current_steps": 355, "total_steps": 1647, "loss": 0.3288, "lr": 9.604041398892528e-06, "epoch": 0.21564160971905846, "percentage": 21.55, "elapsed_time": "4:15:24", "remaining_time": "15:29:32"} +{"current_steps": 356, "total_steps": 1647, "loss": 0.3509, "lr": 9.599897218294122e-06, "epoch": 0.21624905087319665, "percentage": 21.62, "elapsed_time": "4:16:05", "remaining_time": "15:28:39"} +{"current_steps": 357, "total_steps": 1647, "loss": 0.3173, "lr": 9.595732367180422e-06, "epoch": 0.21685649202733484, "percentage": 21.68, "elapsed_time": "4:16:50", "remaining_time": "15:28:04"} +{"current_steps": 358, "total_steps": 1647, "loss": 0.3507, "lr": 9.591546864266983e-06, "epoch": 0.21746393318147306, "percentage": 21.74, "elapsed_time": "4:17:35", "remaining_time": "15:27:27"} +{"current_steps": 359, "total_steps": 1647, "loss": 0.3001, "lr": 9.58734072836216e-06, "epoch": 0.21807137433561125, "percentage": 21.8, "elapsed_time": "4:18:18", "remaining_time": "15:26:46"} +{"current_steps": 360, "total_steps": 1647, "loss": 0.2957, "lr": 9.583113978367026e-06, "epoch": 0.21867881548974943, "percentage": 21.86, "elapsed_time": "4:19:02", "remaining_time": "15:26:05"} +{"current_steps": 361, "total_steps": 1647, "loss": 0.3383, "lr": 9.578866633275289e-06, "epoch": 0.21928625664388762, "percentage": 21.92, "elapsed_time": "4:19:48", "remaining_time": "15:25:31"} +{"current_steps": 362, "total_steps": 1647, "loss": 0.2735, "lr": 9.574598712173202e-06, "epoch": 0.2198936977980258, "percentage": 21.98, "elapsed_time": "4:20:31", "remaining_time": "15:24:46"} +{"current_steps": 363, "total_steps": 1647, "loss": 0.3166, "lr": 9.570310234239483e-06, "epoch": 0.220501138952164, "percentage": 22.04, "elapsed_time": "4:21:15", "remaining_time": "15:24:06"} +{"current_steps": 364, "total_steps": 1647, "loss": 0.3249, "lr": 9.56600121874523e-06, "epoch": 0.2211085801063022, "percentage": 22.1, "elapsed_time": "4:21:55", "remaining_time": "15:23:12"} +{"current_steps": 365, "total_steps": 1647, "loss": 0.3467, "lr": 9.561671685053818e-06, "epoch": 0.2217160212604404, "percentage": 22.16, "elapsed_time": "4:22:39", "remaining_time": "15:22:34"} +{"current_steps": 366, "total_steps": 1647, "loss": 0.3077, "lr": 9.557321652620839e-06, "epoch": 0.2223234624145786, "percentage": 22.22, "elapsed_time": "4:23:24", "remaining_time": "15:21:54"} +{"current_steps": 367, "total_steps": 1647, "loss": 0.3294, "lr": 9.55295114099399e-06, "epoch": 0.22293090356871678, "percentage": 22.28, "elapsed_time": "4:24:01", "remaining_time": "15:20:51"} +{"current_steps": 368, "total_steps": 1647, "loss": 0.3167, "lr": 9.548560169812997e-06, "epoch": 0.22353834472285497, "percentage": 22.34, "elapsed_time": "4:24:45", "remaining_time": "15:20:11"} +{"current_steps": 369, "total_steps": 1647, "loss": 0.3193, "lr": 9.544148758809528e-06, "epoch": 0.22414578587699316, "percentage": 22.4, "elapsed_time": "4:25:31", "remaining_time": "15:19:37"} +{"current_steps": 370, "total_steps": 1647, "loss": 0.3093, "lr": 9.539716927807102e-06, "epoch": 0.22475322703113135, "percentage": 22.47, "elapsed_time": "4:26:11", "remaining_time": "15:18:42"} +{"current_steps": 371, "total_steps": 1647, "loss": 0.3253, "lr": 9.535264696720993e-06, "epoch": 0.22536066818526956, "percentage": 22.53, "elapsed_time": "4:26:54", "remaining_time": "15:17:59"} +{"current_steps": 372, "total_steps": 1647, "loss": 0.3558, "lr": 9.530792085558151e-06, "epoch": 0.22596810933940775, "percentage": 22.59, "elapsed_time": "4:27:33", "remaining_time": "15:17:03"} +{"current_steps": 373, "total_steps": 1647, "loss": 0.3253, "lr": 9.526299114417108e-06, "epoch": 0.22657555049354594, "percentage": 22.65, "elapsed_time": "4:28:15", "remaining_time": "15:16:16"} +{"current_steps": 374, "total_steps": 1647, "loss": 0.3178, "lr": 9.521785803487888e-06, "epoch": 0.22718299164768413, "percentage": 22.71, "elapsed_time": "4:28:55", "remaining_time": "15:15:22"} +{"current_steps": 375, "total_steps": 1647, "loss": 0.3066, "lr": 9.517252173051912e-06, "epoch": 0.22779043280182232, "percentage": 22.77, "elapsed_time": "4:29:36", "remaining_time": "15:14:30"} +{"current_steps": 376, "total_steps": 1647, "loss": 0.3087, "lr": 9.512698243481914e-06, "epoch": 0.2283978739559605, "percentage": 22.83, "elapsed_time": "4:30:20", "remaining_time": "15:13:50"} +{"current_steps": 377, "total_steps": 1647, "loss": 0.3001, "lr": 9.508124035241843e-06, "epoch": 0.22900531511009872, "percentage": 22.89, "elapsed_time": "4:31:04", "remaining_time": "15:13:09"} +{"current_steps": 378, "total_steps": 1647, "loss": 0.3393, "lr": 9.50352956888678e-06, "epoch": 0.2296127562642369, "percentage": 22.95, "elapsed_time": "4:31:50", "remaining_time": "15:12:36"} +{"current_steps": 379, "total_steps": 1647, "loss": 0.334, "lr": 9.498914865062831e-06, "epoch": 0.2302201974183751, "percentage": 23.01, "elapsed_time": "4:32:30", "remaining_time": "15:11:42"} +{"current_steps": 380, "total_steps": 1647, "loss": 0.3285, "lr": 9.49427994450705e-06, "epoch": 0.2308276385725133, "percentage": 23.07, "elapsed_time": "4:33:14", "remaining_time": "15:11:02"} +{"current_steps": 381, "total_steps": 1647, "loss": 0.3137, "lr": 9.489624828047336e-06, "epoch": 0.23143507972665148, "percentage": 23.13, "elapsed_time": "4:33:55", "remaining_time": "15:10:11"} +{"current_steps": 382, "total_steps": 1647, "loss": 0.3505, "lr": 9.484949536602343e-06, "epoch": 0.23204252088078967, "percentage": 23.19, "elapsed_time": "4:34:35", "remaining_time": "15:09:20"} +{"current_steps": 383, "total_steps": 1647, "loss": 0.3441, "lr": 9.480254091181385e-06, "epoch": 0.23264996203492785, "percentage": 23.25, "elapsed_time": "4:35:19", "remaining_time": "15:08:39"} +{"current_steps": 384, "total_steps": 1647, "loss": 0.328, "lr": 9.47553851288434e-06, "epoch": 0.23325740318906607, "percentage": 23.32, "elapsed_time": "4:36:03", "remaining_time": "15:07:58"} +{"current_steps": 385, "total_steps": 1647, "loss": 0.2914, "lr": 9.470802822901558e-06, "epoch": 0.23386484434320426, "percentage": 23.38, "elapsed_time": "4:36:47", "remaining_time": "15:07:19"} +{"current_steps": 386, "total_steps": 1647, "loss": 0.3194, "lr": 9.466047042513767e-06, "epoch": 0.23447228549734245, "percentage": 23.44, "elapsed_time": "4:37:28", "remaining_time": "15:06:29"} +{"current_steps": 387, "total_steps": 1647, "loss": 0.3329, "lr": 9.461271193091971e-06, "epoch": 0.23507972665148064, "percentage": 23.5, "elapsed_time": "4:38:10", "remaining_time": "15:05:41"} +{"current_steps": 388, "total_steps": 1647, "loss": 0.3295, "lr": 9.45647529609736e-06, "epoch": 0.23568716780561882, "percentage": 23.56, "elapsed_time": "4:38:53", "remaining_time": "15:04:58"} +{"current_steps": 389, "total_steps": 1647, "loss": 0.3447, "lr": 9.451659373081214e-06, "epoch": 0.236294608959757, "percentage": 23.62, "elapsed_time": "4:39:38", "remaining_time": "15:04:20"} +{"current_steps": 390, "total_steps": 1647, "loss": 0.3337, "lr": 9.4468234456848e-06, "epoch": 0.23690205011389523, "percentage": 23.68, "elapsed_time": "4:40:22", "remaining_time": "15:03:39"} +{"current_steps": 391, "total_steps": 1647, "loss": 0.2838, "lr": 9.44196753563928e-06, "epoch": 0.23750949126803342, "percentage": 23.74, "elapsed_time": "4:41:08", "remaining_time": "15:03:06"} +{"current_steps": 392, "total_steps": 1647, "loss": 0.3256, "lr": 9.437091664765611e-06, "epoch": 0.2381169324221716, "percentage": 23.8, "elapsed_time": "4:41:51", "remaining_time": "15:02:22"} +{"current_steps": 393, "total_steps": 1647, "loss": 0.2924, "lr": 9.43219585497445e-06, "epoch": 0.2387243735763098, "percentage": 23.86, "elapsed_time": "4:42:38", "remaining_time": "15:01:50"} +{"current_steps": 394, "total_steps": 1647, "loss": 0.3159, "lr": 9.427280128266049e-06, "epoch": 0.23933181473044798, "percentage": 23.92, "elapsed_time": "4:43:19", "remaining_time": "15:01:01"} +{"current_steps": 395, "total_steps": 1647, "loss": 0.3223, "lr": 9.422344506730168e-06, "epoch": 0.23993925588458617, "percentage": 23.98, "elapsed_time": "4:43:57", "remaining_time": "15:00:03"} +{"current_steps": 396, "total_steps": 1647, "loss": 0.3114, "lr": 9.41738901254596e-06, "epoch": 0.24054669703872436, "percentage": 24.04, "elapsed_time": "4:44:41", "remaining_time": "14:59:22"} +{"current_steps": 397, "total_steps": 1647, "loss": 0.365, "lr": 9.412413667981884e-06, "epoch": 0.24115413819286258, "percentage": 24.1, "elapsed_time": "4:45:22", "remaining_time": "14:58:31"} +{"current_steps": 398, "total_steps": 1647, "loss": 0.3723, "lr": 9.4074184953956e-06, "epoch": 0.24176157934700077, "percentage": 24.17, "elapsed_time": "4:46:06", "remaining_time": "14:57:53"} +{"current_steps": 399, "total_steps": 1647, "loss": 0.3455, "lr": 9.402403517233867e-06, "epoch": 0.24236902050113895, "percentage": 24.23, "elapsed_time": "4:46:47", "remaining_time": "14:57:02"} +{"current_steps": 400, "total_steps": 1647, "loss": 0.3453, "lr": 9.397368756032445e-06, "epoch": 0.24297646165527714, "percentage": 24.29, "elapsed_time": "4:47:33", "remaining_time": "14:56:26"} +{"current_steps": 401, "total_steps": 1647, "loss": 0.3047, "lr": 9.392314234415999e-06, "epoch": 0.24358390280941533, "percentage": 24.35, "elapsed_time": "4:48:14", "remaining_time": "14:55:37"} +{"current_steps": 402, "total_steps": 1647, "loss": 0.313, "lr": 9.38723997509798e-06, "epoch": 0.24419134396355352, "percentage": 24.41, "elapsed_time": "4:48:56", "remaining_time": "14:54:50"} +{"current_steps": 403, "total_steps": 1647, "loss": 0.3285, "lr": 9.38214600088054e-06, "epoch": 0.24479878511769174, "percentage": 24.47, "elapsed_time": "4:49:41", "remaining_time": "14:54:15"} +{"current_steps": 404, "total_steps": 1647, "loss": 0.369, "lr": 9.37703233465443e-06, "epoch": 0.24540622627182992, "percentage": 24.53, "elapsed_time": "4:50:23", "remaining_time": "14:53:26"} +{"current_steps": 405, "total_steps": 1647, "loss": 0.3527, "lr": 9.371898999398876e-06, "epoch": 0.2460136674259681, "percentage": 24.59, "elapsed_time": "4:51:04", "remaining_time": "14:52:38"} +{"current_steps": 406, "total_steps": 1647, "loss": 0.3277, "lr": 9.366746018181503e-06, "epoch": 0.2466211085801063, "percentage": 24.65, "elapsed_time": "4:51:48", "remaining_time": "14:51:58"} +{"current_steps": 407, "total_steps": 1647, "loss": 0.3229, "lr": 9.361573414158215e-06, "epoch": 0.2472285497342445, "percentage": 24.71, "elapsed_time": "4:52:31", "remaining_time": "14:51:14"} +{"current_steps": 408, "total_steps": 1647, "loss": 0.2919, "lr": 9.356381210573092e-06, "epoch": 0.24783599088838268, "percentage": 24.77, "elapsed_time": "4:53:12", "remaining_time": "14:50:24"} +{"current_steps": 409, "total_steps": 1647, "loss": 0.3438, "lr": 9.351169430758293e-06, "epoch": 0.24844343204252087, "percentage": 24.83, "elapsed_time": "4:53:57", "remaining_time": "14:49:46"} +{"current_steps": 410, "total_steps": 1647, "loss": 0.3262, "lr": 9.345938098133946e-06, "epoch": 0.24905087319665908, "percentage": 24.89, "elapsed_time": "4:54:40", "remaining_time": "14:49:04"} +{"current_steps": 411, "total_steps": 1647, "loss": 0.3011, "lr": 9.340687236208037e-06, "epoch": 0.24965831435079727, "percentage": 24.95, "elapsed_time": "4:55:26", "remaining_time": "14:48:28"} +{"current_steps": 412, "total_steps": 1647, "loss": 0.3663, "lr": 9.33541686857632e-06, "epoch": 0.25026575550493546, "percentage": 25.02, "elapsed_time": "4:56:12", "remaining_time": "14:47:55"} +{"current_steps": 413, "total_steps": 1647, "loss": 0.3212, "lr": 9.330127018922195e-06, "epoch": 0.25087319665907365, "percentage": 25.08, "elapsed_time": "4:56:52", "remaining_time": "14:47:01"} +{"current_steps": 414, "total_steps": 1647, "loss": 0.3419, "lr": 9.324817711016609e-06, "epoch": 0.25148063781321184, "percentage": 25.14, "elapsed_time": "4:57:37", "remaining_time": "14:46:23"} +{"current_steps": 415, "total_steps": 1647, "loss": 0.3348, "lr": 9.31948896871795e-06, "epoch": 0.25208807896735, "percentage": 25.2, "elapsed_time": "4:58:20", "remaining_time": "14:45:42"} +{"current_steps": 416, "total_steps": 1647, "loss": 0.3512, "lr": 9.31414081597194e-06, "epoch": 0.2526955201214882, "percentage": 25.26, "elapsed_time": "4:59:03", "remaining_time": "14:44:56"} +{"current_steps": 417, "total_steps": 1647, "loss": 0.3503, "lr": 9.30877327681152e-06, "epoch": 0.2533029612756264, "percentage": 25.32, "elapsed_time": "4:59:44", "remaining_time": "14:44:06"} +{"current_steps": 418, "total_steps": 1647, "loss": 0.3162, "lr": 9.303386375356752e-06, "epoch": 0.2539104024297646, "percentage": 25.38, "elapsed_time": "5:00:28", "remaining_time": "14:43:28"} +{"current_steps": 419, "total_steps": 1647, "loss": 0.3103, "lr": 9.297980135814706e-06, "epoch": 0.25451784358390284, "percentage": 25.44, "elapsed_time": "5:01:14", "remaining_time": "14:42:53"} +{"current_steps": 420, "total_steps": 1647, "loss": 0.3187, "lr": 9.292554582479349e-06, "epoch": 0.255125284738041, "percentage": 25.5, "elapsed_time": "5:01:54", "remaining_time": "14:42:00"} +{"current_steps": 421, "total_steps": 1647, "loss": 0.3267, "lr": 9.28710973973144e-06, "epoch": 0.2557327258921792, "percentage": 25.56, "elapsed_time": "5:02:35", "remaining_time": "14:41:10"} +{"current_steps": 422, "total_steps": 1647, "loss": 0.3329, "lr": 9.281645632038417e-06, "epoch": 0.2563401670463174, "percentage": 25.62, "elapsed_time": "5:03:19", "remaining_time": "14:40:31"} +{"current_steps": 423, "total_steps": 1647, "loss": 0.3148, "lr": 9.276162283954293e-06, "epoch": 0.2569476082004556, "percentage": 25.68, "elapsed_time": "5:04:04", "remaining_time": "14:39:51"} +{"current_steps": 424, "total_steps": 1647, "loss": 0.3431, "lr": 9.270659720119533e-06, "epoch": 0.2575550493545938, "percentage": 25.74, "elapsed_time": "5:04:47", "remaining_time": "14:39:08"} +{"current_steps": 425, "total_steps": 1647, "loss": 0.3422, "lr": 9.265137965260962e-06, "epoch": 0.25816249050873197, "percentage": 25.8, "elapsed_time": "5:05:31", "remaining_time": "14:38:27"} +{"current_steps": 426, "total_steps": 1647, "loss": 0.3195, "lr": 9.259597044191635e-06, "epoch": 0.25876993166287016, "percentage": 25.87, "elapsed_time": "5:06:17", "remaining_time": "14:37:54"} +{"current_steps": 427, "total_steps": 1647, "loss": 0.3238, "lr": 9.254036981810741e-06, "epoch": 0.25937737281700834, "percentage": 25.93, "elapsed_time": "5:07:00", "remaining_time": "14:37:10"} +{"current_steps": 428, "total_steps": 1647, "loss": 0.309, "lr": 9.248457803103476e-06, "epoch": 0.25998481397114653, "percentage": 25.99, "elapsed_time": "5:07:43", "remaining_time": "14:36:26"} +{"current_steps": 429, "total_steps": 1647, "loss": 0.3031, "lr": 9.242859533140947e-06, "epoch": 0.2605922551252847, "percentage": 26.05, "elapsed_time": "5:08:29", "remaining_time": "14:35:51"} +{"current_steps": 430, "total_steps": 1647, "loss": 0.2901, "lr": 9.237242197080045e-06, "epoch": 0.2611996962794229, "percentage": 26.11, "elapsed_time": "5:09:09", "remaining_time": "14:35:00"} +{"current_steps": 431, "total_steps": 1647, "loss": 0.2932, "lr": 9.231605820163343e-06, "epoch": 0.2618071374335611, "percentage": 26.17, "elapsed_time": "5:09:54", "remaining_time": "14:34:20"} +{"current_steps": 432, "total_steps": 1647, "loss": 0.3333, "lr": 9.225950427718974e-06, "epoch": 0.26241457858769934, "percentage": 26.23, "elapsed_time": "5:10:35", "remaining_time": "14:33:33"} +{"current_steps": 433, "total_steps": 1647, "loss": 0.3098, "lr": 9.220276045160524e-06, "epoch": 0.26302201974183753, "percentage": 26.29, "elapsed_time": "5:11:22", "remaining_time": "14:33:01"} +{"current_steps": 434, "total_steps": 1647, "loss": 0.2914, "lr": 9.21458269798691e-06, "epoch": 0.2636294608959757, "percentage": 26.35, "elapsed_time": "5:12:05", "remaining_time": "14:32:17"} +{"current_steps": 435, "total_steps": 1647, "loss": 0.3191, "lr": 9.208870411782276e-06, "epoch": 0.2642369020501139, "percentage": 26.41, "elapsed_time": "5:12:51", "remaining_time": "14:31:40"} +{"current_steps": 436, "total_steps": 1647, "loss": 0.3397, "lr": 9.203139212215868e-06, "epoch": 0.2648443432042521, "percentage": 26.47, "elapsed_time": "5:13:36", "remaining_time": "14:31:04"} +{"current_steps": 437, "total_steps": 1647, "loss": 0.3696, "lr": 9.197389125041925e-06, "epoch": 0.2654517843583903, "percentage": 26.53, "elapsed_time": "5:14:22", "remaining_time": "14:30:29"} +{"current_steps": 438, "total_steps": 1647, "loss": 0.2926, "lr": 9.191620176099559e-06, "epoch": 0.2660592255125285, "percentage": 26.59, "elapsed_time": "5:15:08", "remaining_time": "14:29:53"} +{"current_steps": 439, "total_steps": 1647, "loss": 0.3532, "lr": 9.185832391312644e-06, "epoch": 0.26666666666666666, "percentage": 26.65, "elapsed_time": "5:15:53", "remaining_time": "14:29:14"} +{"current_steps": 440, "total_steps": 1647, "loss": 0.3313, "lr": 9.180025796689692e-06, "epoch": 0.26727410782080485, "percentage": 26.72, "elapsed_time": "5:16:37", "remaining_time": "14:28:34"} +{"current_steps": 441, "total_steps": 1647, "loss": 0.2886, "lr": 9.174200418323746e-06, "epoch": 0.26788154897494304, "percentage": 26.78, "elapsed_time": "5:17:21", "remaining_time": "14:27:52"} +{"current_steps": 442, "total_steps": 1647, "loss": 0.2921, "lr": 9.168356282392253e-06, "epoch": 0.26848899012908123, "percentage": 26.84, "elapsed_time": "5:18:00", "remaining_time": "14:26:58"} +{"current_steps": 443, "total_steps": 1647, "loss": 0.3391, "lr": 9.16249341515695e-06, "epoch": 0.2690964312832194, "percentage": 26.9, "elapsed_time": "5:18:43", "remaining_time": "14:26:13"} +{"current_steps": 444, "total_steps": 1647, "loss": 0.3159, "lr": 9.156611842963753e-06, "epoch": 0.2697038724373576, "percentage": 26.96, "elapsed_time": "5:19:28", "remaining_time": "14:25:35"} +{"current_steps": 445, "total_steps": 1647, "loss": 0.3654, "lr": 9.150711592242627e-06, "epoch": 0.27031131359149585, "percentage": 27.02, "elapsed_time": "5:20:11", "remaining_time": "14:24:53"} +{"current_steps": 446, "total_steps": 1647, "loss": 0.3107, "lr": 9.144792689507471e-06, "epoch": 0.27091875474563404, "percentage": 27.08, "elapsed_time": "5:20:54", "remaining_time": "14:24:09"} +{"current_steps": 447, "total_steps": 1647, "loss": 0.3219, "lr": 9.138855161356006e-06, "epoch": 0.2715261958997722, "percentage": 27.14, "elapsed_time": "5:21:38", "remaining_time": "14:23:27"} +{"current_steps": 448, "total_steps": 1647, "loss": 0.3262, "lr": 9.132899034469648e-06, "epoch": 0.2721336370539104, "percentage": 27.2, "elapsed_time": "5:22:19", "remaining_time": "14:22:39"} +{"current_steps": 449, "total_steps": 1647, "loss": 0.2947, "lr": 9.126924335613385e-06, "epoch": 0.2727410782080486, "percentage": 27.26, "elapsed_time": "5:23:00", "remaining_time": "14:21:50"} +{"current_steps": 450, "total_steps": 1647, "loss": 0.2982, "lr": 9.120931091635669e-06, "epoch": 0.2733485193621868, "percentage": 27.32, "elapsed_time": "5:23:45", "remaining_time": "14:21:10"} +{"current_steps": 451, "total_steps": 1647, "loss": 0.3189, "lr": 9.114919329468283e-06, "epoch": 0.273955960516325, "percentage": 27.38, "elapsed_time": "5:24:25", "remaining_time": "14:20:19"} +{"current_steps": 452, "total_steps": 1647, "loss": 0.2653, "lr": 9.108889076126226e-06, "epoch": 0.27456340167046317, "percentage": 27.44, "elapsed_time": "5:25:05", "remaining_time": "14:19:28"} +{"current_steps": 453, "total_steps": 1647, "loss": 0.2785, "lr": 9.102840358707594e-06, "epoch": 0.27517084282460136, "percentage": 27.5, "elapsed_time": "5:25:47", "remaining_time": "14:18:43"} +{"current_steps": 454, "total_steps": 1647, "loss": 0.3484, "lr": 9.09677320439345e-06, "epoch": 0.27577828397873955, "percentage": 27.57, "elapsed_time": "5:26:29", "remaining_time": "14:17:57"} +{"current_steps": 455, "total_steps": 1647, "loss": 0.3039, "lr": 9.090687640447709e-06, "epoch": 0.27638572513287774, "percentage": 27.63, "elapsed_time": "5:27:09", "remaining_time": "14:17:05"} +{"current_steps": 456, "total_steps": 1647, "loss": 0.3368, "lr": 9.084583694217012e-06, "epoch": 0.2769931662870159, "percentage": 27.69, "elapsed_time": "5:27:50", "remaining_time": "14:16:16"} +{"current_steps": 457, "total_steps": 1647, "loss": 0.3416, "lr": 9.07846139313061e-06, "epoch": 0.2776006074411541, "percentage": 27.75, "elapsed_time": "5:28:33", "remaining_time": "14:15:32"} +{"current_steps": 458, "total_steps": 1647, "loss": 0.2921, "lr": 9.072320764700223e-06, "epoch": 0.27820804859529236, "percentage": 27.81, "elapsed_time": "5:29:17", "remaining_time": "14:14:51"} +{"current_steps": 459, "total_steps": 1647, "loss": 0.2738, "lr": 9.066161836519942e-06, "epoch": 0.27881548974943055, "percentage": 27.87, "elapsed_time": "5:30:01", "remaining_time": "14:14:10"} +{"current_steps": 460, "total_steps": 1647, "loss": 0.3244, "lr": 9.059984636266082e-06, "epoch": 0.27942293090356873, "percentage": 27.93, "elapsed_time": "5:30:40", "remaining_time": "14:13:17"} +{"current_steps": 461, "total_steps": 1647, "loss": 0.2867, "lr": 9.053789191697072e-06, "epoch": 0.2800303720577069, "percentage": 27.99, "elapsed_time": "5:31:24", "remaining_time": "14:12:36"} +{"current_steps": 462, "total_steps": 1647, "loss": 0.2914, "lr": 9.047575530653324e-06, "epoch": 0.2806378132118451, "percentage": 28.05, "elapsed_time": "5:32:07", "remaining_time": "14:11:53"} +{"current_steps": 463, "total_steps": 1647, "loss": 0.2882, "lr": 9.041343681057106e-06, "epoch": 0.2812452543659833, "percentage": 28.11, "elapsed_time": "5:32:53", "remaining_time": "14:11:17"} +{"current_steps": 464, "total_steps": 1647, "loss": 0.2814, "lr": 9.035093670912424e-06, "epoch": 0.2818526955201215, "percentage": 28.17, "elapsed_time": "5:33:35", "remaining_time": "14:10:29"} +{"current_steps": 465, "total_steps": 1647, "loss": 0.3444, "lr": 9.028825528304892e-06, "epoch": 0.2824601366742597, "percentage": 28.23, "elapsed_time": "5:34:17", "remaining_time": "14:09:46"} +{"current_steps": 466, "total_steps": 1647, "loss": 0.3403, "lr": 9.022539281401601e-06, "epoch": 0.28306757782839786, "percentage": 28.29, "elapsed_time": "5:35:00", "remaining_time": "14:09:01"} +{"current_steps": 467, "total_steps": 1647, "loss": 0.3225, "lr": 9.016234958451002e-06, "epoch": 0.28367501898253605, "percentage": 28.35, "elapsed_time": "5:35:43", "remaining_time": "14:08:18"} +{"current_steps": 468, "total_steps": 1647, "loss": 0.2972, "lr": 9.009912587782772e-06, "epoch": 0.28428246013667424, "percentage": 28.42, "elapsed_time": "5:36:25", "remaining_time": "14:07:30"} +{"current_steps": 469, "total_steps": 1647, "loss": 0.3025, "lr": 9.00357219780769e-06, "epoch": 0.28488990129081243, "percentage": 28.48, "elapsed_time": "5:37:06", "remaining_time": "14:06:44"} +{"current_steps": 470, "total_steps": 1647, "loss": 0.3368, "lr": 8.997213817017508e-06, "epoch": 0.2854973424449506, "percentage": 28.54, "elapsed_time": "5:37:50", "remaining_time": "14:06:02"} +{"current_steps": 471, "total_steps": 1647, "loss": 0.3208, "lr": 8.990837473984818e-06, "epoch": 0.28610478359908886, "percentage": 28.6, "elapsed_time": "5:38:33", "remaining_time": "14:05:18"} +{"current_steps": 472, "total_steps": 1647, "loss": 0.2963, "lr": 8.984443197362938e-06, "epoch": 0.28671222475322705, "percentage": 28.66, "elapsed_time": "5:39:19", "remaining_time": "14:04:43"} +{"current_steps": 473, "total_steps": 1647, "loss": 0.3049, "lr": 8.978031015885767e-06, "epoch": 0.28731966590736524, "percentage": 28.72, "elapsed_time": "5:40:04", "remaining_time": "14:04:04"} +{"current_steps": 474, "total_steps": 1647, "loss": 0.2873, "lr": 8.971600958367668e-06, "epoch": 0.28792710706150343, "percentage": 28.78, "elapsed_time": "5:40:50", "remaining_time": "14:03:29"} +{"current_steps": 475, "total_steps": 1647, "loss": 0.2933, "lr": 8.965153053703325e-06, "epoch": 0.2885345482156416, "percentage": 28.84, "elapsed_time": "5:41:33", "remaining_time": "14:02:44"} +{"current_steps": 476, "total_steps": 1647, "loss": 0.3211, "lr": 8.958687330867634e-06, "epoch": 0.2891419893697798, "percentage": 28.9, "elapsed_time": "5:42:13", "remaining_time": "14:01:53"} +{"current_steps": 477, "total_steps": 1647, "loss": 0.3216, "lr": 8.952203818915548e-06, "epoch": 0.289749430523918, "percentage": 28.96, "elapsed_time": "5:42:58", "remaining_time": "14:01:16"} +{"current_steps": 478, "total_steps": 1647, "loss": 0.2862, "lr": 8.94570254698197e-06, "epoch": 0.2903568716780562, "percentage": 29.02, "elapsed_time": "5:43:38", "remaining_time": "14:00:25"} +{"current_steps": 479, "total_steps": 1647, "loss": 0.3105, "lr": 8.939183544281597e-06, "epoch": 0.29096431283219437, "percentage": 29.08, "elapsed_time": "5:44:19", "remaining_time": "13:59:36"} +{"current_steps": 480, "total_steps": 1647, "loss": 0.3272, "lr": 8.932646840108818e-06, "epoch": 0.29157175398633256, "percentage": 29.14, "elapsed_time": "5:45:03", "remaining_time": "13:58:56"} +{"current_steps": 481, "total_steps": 1647, "loss": 0.2761, "lr": 8.926092463837557e-06, "epoch": 0.29217919514047075, "percentage": 29.2, "elapsed_time": "5:45:48", "remaining_time": "13:58:16"} +{"current_steps": 482, "total_steps": 1647, "loss": 0.3064, "lr": 8.919520444921153e-06, "epoch": 0.29278663629460894, "percentage": 29.27, "elapsed_time": "5:46:30", "remaining_time": "13:57:30"} +{"current_steps": 483, "total_steps": 1647, "loss": 0.2865, "lr": 8.912930812892228e-06, "epoch": 0.2933940774487471, "percentage": 29.33, "elapsed_time": "5:47:12", "remaining_time": "13:56:45"} +{"current_steps": 484, "total_steps": 1647, "loss": 0.2686, "lr": 8.906323597362547e-06, "epoch": 0.29400151860288537, "percentage": 29.39, "elapsed_time": "5:47:56", "remaining_time": "13:56:04"} +{"current_steps": 485, "total_steps": 1647, "loss": 0.2879, "lr": 8.899698828022895e-06, "epoch": 0.29460895975702356, "percentage": 29.45, "elapsed_time": "5:48:40", "remaining_time": "13:55:22"} +{"current_steps": 486, "total_steps": 1647, "loss": 0.3086, "lr": 8.893056534642938e-06, "epoch": 0.29521640091116175, "percentage": 29.51, "elapsed_time": "5:49:23", "remaining_time": "13:54:38"} +{"current_steps": 487, "total_steps": 1647, "loss": 0.3277, "lr": 8.886396747071085e-06, "epoch": 0.29582384206529994, "percentage": 29.57, "elapsed_time": "5:50:07", "remaining_time": "13:53:57"} +{"current_steps": 488, "total_steps": 1647, "loss": 0.3181, "lr": 8.879719495234363e-06, "epoch": 0.2964312832194381, "percentage": 29.63, "elapsed_time": "5:50:48", "remaining_time": "13:53:11"} +{"current_steps": 489, "total_steps": 1647, "loss": 0.3102, "lr": 8.873024809138272e-06, "epoch": 0.2970387243735763, "percentage": 29.69, "elapsed_time": "5:51:28", "remaining_time": "13:52:20"} +{"current_steps": 490, "total_steps": 1647, "loss": 0.2998, "lr": 8.866312718866669e-06, "epoch": 0.2976461655277145, "percentage": 29.75, "elapsed_time": "5:52:12", "remaining_time": "13:51:37"} +{"current_steps": 491, "total_steps": 1647, "loss": 0.3099, "lr": 8.859583254581604e-06, "epoch": 0.2982536066818527, "percentage": 29.81, "elapsed_time": "5:52:55", "remaining_time": "13:50:55"} +{"current_steps": 492, "total_steps": 1647, "loss": 0.3386, "lr": 8.852836446523213e-06, "epoch": 0.2988610478359909, "percentage": 29.87, "elapsed_time": "5:53:42", "remaining_time": "13:50:20"} +{"current_steps": 493, "total_steps": 1647, "loss": 0.2987, "lr": 8.846072325009562e-06, "epoch": 0.29946848899012907, "percentage": 29.93, "elapsed_time": "5:54:25", "remaining_time": "13:49:36"} +{"current_steps": 494, "total_steps": 1647, "loss": 0.3282, "lr": 8.83929092043652e-06, "epoch": 0.30007593014426726, "percentage": 29.99, "elapsed_time": "5:55:09", "remaining_time": "13:48:55"} +{"current_steps": 495, "total_steps": 1647, "loss": 0.331, "lr": 8.832492263277624e-06, "epoch": 0.30068337129840544, "percentage": 30.05, "elapsed_time": "5:55:49", "remaining_time": "13:48:07"} +{"current_steps": 496, "total_steps": 1647, "loss": 0.3073, "lr": 8.825676384083936e-06, "epoch": 0.30129081245254363, "percentage": 30.12, "elapsed_time": "5:56:31", "remaining_time": "13:47:21"} +{"current_steps": 497, "total_steps": 1647, "loss": 0.2886, "lr": 8.818843313483907e-06, "epoch": 0.3018982536066819, "percentage": 30.18, "elapsed_time": "5:57:17", "remaining_time": "13:46:43"} +{"current_steps": 498, "total_steps": 1647, "loss": 0.2974, "lr": 8.811993082183243e-06, "epoch": 0.30250569476082007, "percentage": 30.24, "elapsed_time": "5:58:01", "remaining_time": "13:46:02"} +{"current_steps": 499, "total_steps": 1647, "loss": 0.2997, "lr": 8.805125720964766e-06, "epoch": 0.30311313591495825, "percentage": 30.3, "elapsed_time": "5:58:41", "remaining_time": "13:45:11"} +{"current_steps": 500, "total_steps": 1647, "loss": 0.3188, "lr": 8.798241260688273e-06, "epoch": 0.30372057706909644, "percentage": 30.36, "elapsed_time": "5:59:21", "remaining_time": "13:44:21"} +{"current_steps": 501, "total_steps": 1647, "loss": 0.3156, "lr": 8.791339732290398e-06, "epoch": 0.30432801822323463, "percentage": 30.42, "elapsed_time": "6:00:05", "remaining_time": "13:43:41"} +{"current_steps": 502, "total_steps": 1647, "loss": 0.3006, "lr": 8.784421166784476e-06, "epoch": 0.3049354593773728, "percentage": 30.48, "elapsed_time": "6:00:48", "remaining_time": "13:42:58"} +{"current_steps": 503, "total_steps": 1647, "loss": 0.2795, "lr": 8.7774855952604e-06, "epoch": 0.305542900531511, "percentage": 30.54, "elapsed_time": "6:01:30", "remaining_time": "13:42:11"} +{"current_steps": 504, "total_steps": 1647, "loss": 0.3045, "lr": 8.770533048884483e-06, "epoch": 0.3061503416856492, "percentage": 30.6, "elapsed_time": "6:02:13", "remaining_time": "13:41:28"} +{"current_steps": 505, "total_steps": 1647, "loss": 0.2759, "lr": 8.763563558899317e-06, "epoch": 0.3067577828397874, "percentage": 30.66, "elapsed_time": "6:02:54", "remaining_time": "13:40:41"} +{"current_steps": 506, "total_steps": 1647, "loss": 0.3117, "lr": 8.756577156623636e-06, "epoch": 0.3073652239939256, "percentage": 30.72, "elapsed_time": "6:03:35", "remaining_time": "13:39:53"} +{"current_steps": 507, "total_steps": 1647, "loss": 0.2716, "lr": 8.749573873452169e-06, "epoch": 0.30797266514806376, "percentage": 30.78, "elapsed_time": "6:04:19", "remaining_time": "13:39:11"} +{"current_steps": 508, "total_steps": 1647, "loss": 0.2851, "lr": 8.742553740855507e-06, "epoch": 0.30858010630220195, "percentage": 30.84, "elapsed_time": "6:05:04", "remaining_time": "13:38:33"} +{"current_steps": 509, "total_steps": 1647, "loss": 0.2897, "lr": 8.735516790379952e-06, "epoch": 0.30918754745634014, "percentage": 30.9, "elapsed_time": "6:05:45", "remaining_time": "13:37:45"} +{"current_steps": 510, "total_steps": 1647, "loss": 0.2584, "lr": 8.728463053647382e-06, "epoch": 0.3097949886104784, "percentage": 30.97, "elapsed_time": "6:06:22", "remaining_time": "13:36:49"} +{"current_steps": 511, "total_steps": 1647, "loss": 0.3144, "lr": 8.721392562355113e-06, "epoch": 0.31040242976461657, "percentage": 31.03, "elapsed_time": "6:07:03", "remaining_time": "13:35:59"} +{"current_steps": 512, "total_steps": 1647, "loss": 0.3046, "lr": 8.71430534827574e-06, "epoch": 0.31100987091875476, "percentage": 31.09, "elapsed_time": "6:07:48", "remaining_time": "13:35:21"} +{"current_steps": 513, "total_steps": 1647, "loss": 0.3096, "lr": 8.707201443257015e-06, "epoch": 0.31161731207289295, "percentage": 31.15, "elapsed_time": "6:08:30", "remaining_time": "13:34:35"} +{"current_steps": 514, "total_steps": 1647, "loss": 0.3344, "lr": 8.700080879221689e-06, "epoch": 0.31222475322703114, "percentage": 31.21, "elapsed_time": "6:09:12", "remaining_time": "13:33:50"} +{"current_steps": 515, "total_steps": 1647, "loss": 0.3317, "lr": 8.692943688167371e-06, "epoch": 0.3128321943811693, "percentage": 31.27, "elapsed_time": "6:09:53", "remaining_time": "13:33:02"} +{"current_steps": 516, "total_steps": 1647, "loss": 0.3035, "lr": 8.685789902166395e-06, "epoch": 0.3134396355353075, "percentage": 31.33, "elapsed_time": "6:10:40", "remaining_time": "13:32:28"} +{"current_steps": 517, "total_steps": 1647, "loss": 0.305, "lr": 8.67861955336566e-06, "epoch": 0.3140470766894457, "percentage": 31.39, "elapsed_time": "6:11:23", "remaining_time": "13:31:44"} +{"current_steps": 518, "total_steps": 1647, "loss": 0.3161, "lr": 8.671432673986493e-06, "epoch": 0.3146545178435839, "percentage": 31.45, "elapsed_time": "6:12:06", "remaining_time": "13:31:01"} +{"current_steps": 519, "total_steps": 1647, "loss": 0.3157, "lr": 8.664229296324514e-06, "epoch": 0.3152619589977221, "percentage": 31.51, "elapsed_time": "6:12:47", "remaining_time": "13:30:13"} +{"current_steps": 520, "total_steps": 1647, "loss": 0.3048, "lr": 8.657009452749466e-06, "epoch": 0.31586940015186027, "percentage": 31.57, "elapsed_time": "6:13:34", "remaining_time": "13:29:39"} +{"current_steps": 521, "total_steps": 1647, "loss": 0.2668, "lr": 8.649773175705099e-06, "epoch": 0.31647684130599846, "percentage": 31.63, "elapsed_time": "6:14:17", "remaining_time": "13:28:56"} +{"current_steps": 522, "total_steps": 1647, "loss": 0.3098, "lr": 8.642520497709001e-06, "epoch": 0.3170842824601367, "percentage": 31.69, "elapsed_time": "6:14:59", "remaining_time": "13:28:10"} +{"current_steps": 523, "total_steps": 1647, "loss": 0.3015, "lr": 8.635251451352463e-06, "epoch": 0.3176917236142749, "percentage": 31.75, "elapsed_time": "6:15:42", "remaining_time": "13:27:27"} +{"current_steps": 524, "total_steps": 1647, "loss": 0.3245, "lr": 8.627966069300332e-06, "epoch": 0.3182991647684131, "percentage": 31.82, "elapsed_time": "6:16:25", "remaining_time": "13:26:43"} +{"current_steps": 525, "total_steps": 1647, "loss": 0.3039, "lr": 8.620664384290863e-06, "epoch": 0.31890660592255127, "percentage": 31.88, "elapsed_time": "6:17:08", "remaining_time": "13:26:00"} +{"current_steps": 526, "total_steps": 1647, "loss": 0.3078, "lr": 8.613346429135567e-06, "epoch": 0.31951404707668946, "percentage": 31.94, "elapsed_time": "6:17:50", "remaining_time": "13:25:15"} +{"current_steps": 527, "total_steps": 1647, "loss": 0.3385, "lr": 8.606012236719073e-06, "epoch": 0.32012148823082764, "percentage": 32.0, "elapsed_time": "6:18:34", "remaining_time": "13:24:32"} +{"current_steps": 528, "total_steps": 1647, "loss": 0.2775, "lr": 8.598661839998972e-06, "epoch": 0.32072892938496583, "percentage": 32.06, "elapsed_time": "6:19:20", "remaining_time": "13:23:56"} +{"current_steps": 529, "total_steps": 1647, "loss": 0.2942, "lr": 8.591295272005674e-06, "epoch": 0.321336370539104, "percentage": 32.12, "elapsed_time": "6:20:00", "remaining_time": "13:23:06"} +{"current_steps": 530, "total_steps": 1647, "loss": 0.2957, "lr": 8.583912565842258e-06, "epoch": 0.3219438116932422, "percentage": 32.18, "elapsed_time": "6:20:43", "remaining_time": "13:22:24"} +{"current_steps": 531, "total_steps": 1647, "loss": 0.2871, "lr": 8.576513754684318e-06, "epoch": 0.3225512528473804, "percentage": 32.24, "elapsed_time": "6:21:29", "remaining_time": "13:21:46"} +{"current_steps": 532, "total_steps": 1647, "loss": 0.3159, "lr": 8.569098871779828e-06, "epoch": 0.3231586940015186, "percentage": 32.3, "elapsed_time": "6:22:11", "remaining_time": "13:21:01"} +{"current_steps": 533, "total_steps": 1647, "loss": 0.3274, "lr": 8.561667950448973e-06, "epoch": 0.3237661351556568, "percentage": 32.36, "elapsed_time": "6:22:53", "remaining_time": "13:20:15"} +{"current_steps": 534, "total_steps": 1647, "loss": 0.2923, "lr": 8.554221024084019e-06, "epoch": 0.32437357630979496, "percentage": 32.42, "elapsed_time": "6:23:38", "remaining_time": "13:19:36"} +{"current_steps": 535, "total_steps": 1647, "loss": 0.3172, "lr": 8.546758126149148e-06, "epoch": 0.3249810174639332, "percentage": 32.48, "elapsed_time": "6:24:18", "remaining_time": "13:18:48"} +{"current_steps": 536, "total_steps": 1647, "loss": 0.3294, "lr": 8.539279290180315e-06, "epoch": 0.3255884586180714, "percentage": 32.54, "elapsed_time": "6:25:01", "remaining_time": "13:18:03"} +{"current_steps": 537, "total_steps": 1647, "loss": 0.3524, "lr": 8.531784549785098e-06, "epoch": 0.3261958997722096, "percentage": 32.6, "elapsed_time": "6:25:42", "remaining_time": "13:17:16"} +{"current_steps": 538, "total_steps": 1647, "loss": 0.3158, "lr": 8.524273938642539e-06, "epoch": 0.3268033409263478, "percentage": 32.67, "elapsed_time": "6:26:25", "remaining_time": "13:16:33"} +{"current_steps": 539, "total_steps": 1647, "loss": 0.3318, "lr": 8.516747490503001e-06, "epoch": 0.32741078208048596, "percentage": 32.73, "elapsed_time": "6:27:05", "remaining_time": "13:15:43"} +{"current_steps": 540, "total_steps": 1647, "loss": 0.3034, "lr": 8.509205239188017e-06, "epoch": 0.32801822323462415, "percentage": 32.79, "elapsed_time": "6:27:49", "remaining_time": "13:15:02"} +{"current_steps": 541, "total_steps": 1647, "loss": 0.3249, "lr": 8.501647218590127e-06, "epoch": 0.32862566438876234, "percentage": 32.85, "elapsed_time": "6:28:32", "remaining_time": "13:14:18"} +{"current_steps": 542, "total_steps": 1647, "loss": 0.3245, "lr": 8.494073462672743e-06, "epoch": 0.32923310554290053, "percentage": 32.91, "elapsed_time": "6:29:15", "remaining_time": "13:13:35"} +{"current_steps": 543, "total_steps": 1647, "loss": 0.3111, "lr": 8.486484005469977e-06, "epoch": 0.3298405466970387, "percentage": 32.97, "elapsed_time": "6:29:58", "remaining_time": "13:12:53"} +{"current_steps": 544, "total_steps": 1647, "loss": 0.2774, "lr": 8.478878881086505e-06, "epoch": 0.3304479878511769, "percentage": 33.03, "elapsed_time": "6:30:43", "remaining_time": "13:12:13"} +{"current_steps": 545, "total_steps": 1647, "loss": 0.3591, "lr": 8.471258123697403e-06, "epoch": 0.3310554290053151, "percentage": 33.09, "elapsed_time": "6:31:24", "remaining_time": "13:11:25"} +{"current_steps": 546, "total_steps": 1647, "loss": 0.2964, "lr": 8.463621767547998e-06, "epoch": 0.3316628701594533, "percentage": 33.15, "elapsed_time": "6:32:09", "remaining_time": "13:10:47"} +{"current_steps": 547, "total_steps": 1647, "loss": 0.2782, "lr": 8.455969846953711e-06, "epoch": 0.33227031131359147, "percentage": 33.21, "elapsed_time": "6:32:52", "remaining_time": "13:10:03"} +{"current_steps": 548, "total_steps": 1647, "loss": 0.2923, "lr": 8.448302396299906e-06, "epoch": 0.3328777524677297, "percentage": 33.27, "elapsed_time": "6:33:35", "remaining_time": "13:09:20"} +{"current_steps": 549, "total_steps": 1647, "loss": 0.256, "lr": 8.440619450041736e-06, "epoch": 0.3334851936218679, "percentage": 33.33, "elapsed_time": "6:34:20", "remaining_time": "13:08:41"} +{"current_steps": 550, "total_steps": 1647, "loss": 0.2978, "lr": 8.432921042703985e-06, "epoch": 0.3340926347760061, "percentage": 33.39, "elapsed_time": "6:34:58", "remaining_time": "13:07:48"} +{"current_steps": 551, "total_steps": 1647, "loss": 0.3307, "lr": 8.425207208880914e-06, "epoch": 0.3347000759301443, "percentage": 33.45, "elapsed_time": "6:35:42", "remaining_time": "13:07:06"} +{"current_steps": 552, "total_steps": 1647, "loss": 0.3149, "lr": 8.417477983236107e-06, "epoch": 0.33530751708428247, "percentage": 33.52, "elapsed_time": "6:36:25", "remaining_time": "13:06:22"} +{"current_steps": 553, "total_steps": 1647, "loss": 0.3152, "lr": 8.409733400502311e-06, "epoch": 0.33591495823842066, "percentage": 33.58, "elapsed_time": "6:37:10", "remaining_time": "13:05:43"} +{"current_steps": 554, "total_steps": 1647, "loss": 0.2706, "lr": 8.401973495481289e-06, "epoch": 0.33652239939255885, "percentage": 33.64, "elapsed_time": "6:37:53", "remaining_time": "13:05:01"} +{"current_steps": 555, "total_steps": 1647, "loss": 0.2847, "lr": 8.39419830304365e-06, "epoch": 0.33712984054669703, "percentage": 33.7, "elapsed_time": "6:38:37", "remaining_time": "13:04:19"} +{"current_steps": 556, "total_steps": 1647, "loss": 0.2851, "lr": 8.386407858128707e-06, "epoch": 0.3377372817008352, "percentage": 33.76, "elapsed_time": "6:39:17", "remaining_time": "13:03:29"} +{"current_steps": 557, "total_steps": 1647, "loss": 0.3078, "lr": 8.378602195744308e-06, "epoch": 0.3383447228549734, "percentage": 33.82, "elapsed_time": "6:39:58", "remaining_time": "13:02:43"} +{"current_steps": 558, "total_steps": 1647, "loss": 0.3397, "lr": 8.370781350966683e-06, "epoch": 0.3389521640091116, "percentage": 33.88, "elapsed_time": "6:40:42", "remaining_time": "13:02:00"} +{"current_steps": 559, "total_steps": 1647, "loss": 0.2842, "lr": 8.362945358940295e-06, "epoch": 0.3395596051632498, "percentage": 33.94, "elapsed_time": "6:41:24", "remaining_time": "13:01:16"} +{"current_steps": 560, "total_steps": 1647, "loss": 0.2658, "lr": 8.355094254877665e-06, "epoch": 0.340167046317388, "percentage": 34.0, "elapsed_time": "6:42:08", "remaining_time": "13:00:34"} +{"current_steps": 561, "total_steps": 1647, "loss": 0.3266, "lr": 8.347228074059227e-06, "epoch": 0.3407744874715262, "percentage": 34.06, "elapsed_time": "6:42:49", "remaining_time": "12:59:47"} +{"current_steps": 562, "total_steps": 1647, "loss": 0.2889, "lr": 8.339346851833163e-06, "epoch": 0.3413819286256644, "percentage": 34.12, "elapsed_time": "6:43:31", "remaining_time": "12:59:03"} +{"current_steps": 563, "total_steps": 1647, "loss": 0.2993, "lr": 8.33145062361525e-06, "epoch": 0.3419893697798026, "percentage": 34.18, "elapsed_time": "6:44:14", "remaining_time": "12:58:19"} +{"current_steps": 564, "total_steps": 1647, "loss": 0.3011, "lr": 8.323539424888695e-06, "epoch": 0.3425968109339408, "percentage": 34.24, "elapsed_time": "6:44:55", "remaining_time": "12:57:33"} +{"current_steps": 565, "total_steps": 1647, "loss": 0.2745, "lr": 8.315613291203977e-06, "epoch": 0.343204252088079, "percentage": 34.3, "elapsed_time": "6:45:41", "remaining_time": "12:56:54"} +{"current_steps": 566, "total_steps": 1647, "loss": 0.3015, "lr": 8.30767225817869e-06, "epoch": 0.34381169324221716, "percentage": 34.37, "elapsed_time": "6:46:24", "remaining_time": "12:56:12"} +{"current_steps": 567, "total_steps": 1647, "loss": 0.2937, "lr": 8.299716361497377e-06, "epoch": 0.34441913439635535, "percentage": 34.43, "elapsed_time": "6:47:05", "remaining_time": "12:55:23"} +{"current_steps": 568, "total_steps": 1647, "loss": 0.3104, "lr": 8.291745636911382e-06, "epoch": 0.34502657555049354, "percentage": 34.49, "elapsed_time": "6:47:44", "remaining_time": "12:54:34"} +{"current_steps": 569, "total_steps": 1647, "loss": 0.3077, "lr": 8.283760120238672e-06, "epoch": 0.34563401670463173, "percentage": 34.55, "elapsed_time": "6:48:29", "remaining_time": "12:53:53"} +{"current_steps": 570, "total_steps": 1647, "loss": 0.2701, "lr": 8.27575984736369e-06, "epoch": 0.3462414578587699, "percentage": 34.61, "elapsed_time": "6:49:11", "remaining_time": "12:53:10"} +{"current_steps": 571, "total_steps": 1647, "loss": 0.3196, "lr": 8.26774485423719e-06, "epoch": 0.3468488990129081, "percentage": 34.67, "elapsed_time": "6:49:54", "remaining_time": "12:52:25"} +{"current_steps": 572, "total_steps": 1647, "loss": 0.2782, "lr": 8.259715176876069e-06, "epoch": 0.3474563401670463, "percentage": 34.73, "elapsed_time": "6:50:38", "remaining_time": "12:51:44"} +{"current_steps": 573, "total_steps": 1647, "loss": 0.3346, "lr": 8.251670851363214e-06, "epoch": 0.3480637813211845, "percentage": 34.79, "elapsed_time": "6:51:19", "remaining_time": "12:50:57"} +{"current_steps": 574, "total_steps": 1647, "loss": 0.2824, "lr": 8.243611913847337e-06, "epoch": 0.34867122247532273, "percentage": 34.85, "elapsed_time": "6:52:03", "remaining_time": "12:50:16"} +{"current_steps": 575, "total_steps": 1647, "loss": 0.27, "lr": 8.235538400542809e-06, "epoch": 0.3492786636294609, "percentage": 34.91, "elapsed_time": "6:52:45", "remaining_time": "12:49:32"} +{"current_steps": 576, "total_steps": 1647, "loss": 0.2719, "lr": 8.2274503477295e-06, "epoch": 0.3498861047835991, "percentage": 34.97, "elapsed_time": "6:53:25", "remaining_time": "12:48:42"} +{"current_steps": 577, "total_steps": 1647, "loss": 0.3191, "lr": 8.21934779175262e-06, "epoch": 0.3504935459377373, "percentage": 35.03, "elapsed_time": "6:54:07", "remaining_time": "12:47:56"} +{"current_steps": 578, "total_steps": 1647, "loss": 0.302, "lr": 8.211230769022552e-06, "epoch": 0.3511009870918755, "percentage": 35.09, "elapsed_time": "6:54:51", "remaining_time": "12:47:16"} +{"current_steps": 579, "total_steps": 1647, "loss": 0.3236, "lr": 8.203099316014679e-06, "epoch": 0.35170842824601367, "percentage": 35.15, "elapsed_time": "6:55:34", "remaining_time": "12:46:33"} +{"current_steps": 580, "total_steps": 1647, "loss": 0.3174, "lr": 8.19495346926924e-06, "epoch": 0.35231586940015186, "percentage": 35.22, "elapsed_time": "6:56:13", "remaining_time": "12:45:42"} +{"current_steps": 581, "total_steps": 1647, "loss": 0.2984, "lr": 8.18679326539115e-06, "epoch": 0.35292331055429005, "percentage": 35.28, "elapsed_time": "6:56:55", "remaining_time": "12:44:57"} +{"current_steps": 582, "total_steps": 1647, "loss": 0.3142, "lr": 8.178618741049841e-06, "epoch": 0.35353075170842824, "percentage": 35.34, "elapsed_time": "6:57:42", "remaining_time": "12:44:21"} +{"current_steps": 583, "total_steps": 1647, "loss": 0.3118, "lr": 8.170429932979097e-06, "epoch": 0.3541381928625664, "percentage": 35.4, "elapsed_time": "6:58:25", "remaining_time": "12:43:38"} +{"current_steps": 584, "total_steps": 1647, "loss": 0.3114, "lr": 8.162226877976886e-06, "epoch": 0.3547456340167046, "percentage": 35.46, "elapsed_time": "6:59:07", "remaining_time": "12:42:53"} +{"current_steps": 585, "total_steps": 1647, "loss": 0.3252, "lr": 8.154009612905205e-06, "epoch": 0.3553530751708428, "percentage": 35.52, "elapsed_time": "6:59:51", "remaining_time": "12:42:12"} +{"current_steps": 586, "total_steps": 1647, "loss": 0.3388, "lr": 8.145778174689897e-06, "epoch": 0.355960516324981, "percentage": 35.58, "elapsed_time": "7:00:34", "remaining_time": "12:41:28"} +{"current_steps": 587, "total_steps": 1647, "loss": 0.2955, "lr": 8.137532600320502e-06, "epoch": 0.35656795747911924, "percentage": 35.64, "elapsed_time": "7:01:22", "remaining_time": "12:40:54"} +{"current_steps": 588, "total_steps": 1647, "loss": 0.2949, "lr": 8.129272926850079e-06, "epoch": 0.3571753986332574, "percentage": 35.7, "elapsed_time": "7:02:09", "remaining_time": "12:40:19"} +{"current_steps": 589, "total_steps": 1647, "loss": 0.2819, "lr": 8.120999191395048e-06, "epoch": 0.3577828397873956, "percentage": 35.76, "elapsed_time": "7:02:52", "remaining_time": "12:39:36"} +{"current_steps": 590, "total_steps": 1647, "loss": 0.288, "lr": 8.112711431135014e-06, "epoch": 0.3583902809415338, "percentage": 35.82, "elapsed_time": "7:03:36", "remaining_time": "12:38:54"} +{"current_steps": 591, "total_steps": 1647, "loss": 0.2897, "lr": 8.10440968331261e-06, "epoch": 0.358997722095672, "percentage": 35.88, "elapsed_time": "7:04:16", "remaining_time": "12:38:05"} +{"current_steps": 592, "total_steps": 1647, "loss": 0.3182, "lr": 8.096093985233323e-06, "epoch": 0.3596051632498102, "percentage": 35.94, "elapsed_time": "7:05:00", "remaining_time": "12:37:24"} +{"current_steps": 593, "total_steps": 1647, "loss": 0.3171, "lr": 8.087764374265325e-06, "epoch": 0.36021260440394837, "percentage": 36.0, "elapsed_time": "7:05:44", "remaining_time": "12:36:42"} +{"current_steps": 594, "total_steps": 1647, "loss": 0.2841, "lr": 8.079420887839316e-06, "epoch": 0.36082004555808656, "percentage": 36.07, "elapsed_time": "7:06:30", "remaining_time": "12:36:05"} +{"current_steps": 595, "total_steps": 1647, "loss": 0.2975, "lr": 8.071063563448341e-06, "epoch": 0.36142748671222474, "percentage": 36.13, "elapsed_time": "7:07:12", "remaining_time": "12:35:20"} +{"current_steps": 596, "total_steps": 1647, "loss": 0.3001, "lr": 8.062692438647628e-06, "epoch": 0.36203492786636293, "percentage": 36.19, "elapsed_time": "7:07:57", "remaining_time": "12:34:39"} +{"current_steps": 597, "total_steps": 1647, "loss": 0.3006, "lr": 8.054307551054427e-06, "epoch": 0.3626423690205011, "percentage": 36.25, "elapsed_time": "7:08:41", "remaining_time": "12:33:58"} +{"current_steps": 598, "total_steps": 1647, "loss": 0.2829, "lr": 8.045908938347828e-06, "epoch": 0.3632498101746393, "percentage": 36.31, "elapsed_time": "7:09:22", "remaining_time": "12:33:12"} +{"current_steps": 599, "total_steps": 1647, "loss": 0.3338, "lr": 8.037496638268599e-06, "epoch": 0.3638572513287775, "percentage": 36.37, "elapsed_time": "7:10:05", "remaining_time": "12:32:28"} +{"current_steps": 600, "total_steps": 1647, "loss": 0.2817, "lr": 8.029070688619013e-06, "epoch": 0.36446469248291574, "percentage": 36.43, "elapsed_time": "7:10:49", "remaining_time": "12:31:46"} +{"current_steps": 601, "total_steps": 1647, "loss": 0.2928, "lr": 8.020631127262681e-06, "epoch": 0.36507213363705393, "percentage": 36.49, "elapsed_time": "7:11:33", "remaining_time": "12:31:05"} +{"current_steps": 602, "total_steps": 1647, "loss": 0.3163, "lr": 8.012177992124385e-06, "epoch": 0.3656795747911921, "percentage": 36.55, "elapsed_time": "7:12:18", "remaining_time": "12:30:26"} +{"current_steps": 603, "total_steps": 1647, "loss": 0.3026, "lr": 8.003711321189895e-06, "epoch": 0.3662870159453303, "percentage": 36.61, "elapsed_time": "7:12:59", "remaining_time": "12:29:39"} +{"current_steps": 604, "total_steps": 1647, "loss": 0.278, "lr": 7.995231152505815e-06, "epoch": 0.3668944570994685, "percentage": 36.67, "elapsed_time": "7:13:40", "remaining_time": "12:28:52"} +{"current_steps": 605, "total_steps": 1647, "loss": 0.3198, "lr": 7.986737524179398e-06, "epoch": 0.3675018982536067, "percentage": 36.73, "elapsed_time": "7:14:21", "remaining_time": "12:28:05"} +{"current_steps": 606, "total_steps": 1647, "loss": 0.2896, "lr": 7.978230474378383e-06, "epoch": 0.3681093394077449, "percentage": 36.79, "elapsed_time": "7:15:05", "remaining_time": "12:27:24"} +{"current_steps": 607, "total_steps": 1647, "loss": 0.2973, "lr": 7.96971004133082e-06, "epoch": 0.36871678056188306, "percentage": 36.85, "elapsed_time": "7:15:48", "remaining_time": "12:26:41"} +{"current_steps": 608, "total_steps": 1647, "loss": 0.2702, "lr": 7.961176263324902e-06, "epoch": 0.36932422171602125, "percentage": 36.92, "elapsed_time": "7:16:29", "remaining_time": "12:25:54"} +{"current_steps": 609, "total_steps": 1647, "loss": 0.3086, "lr": 7.952629178708783e-06, "epoch": 0.36993166287015944, "percentage": 36.98, "elapsed_time": "7:17:11", "remaining_time": "12:25:10"} +{"current_steps": 610, "total_steps": 1647, "loss": 0.2844, "lr": 7.944068825890424e-06, "epoch": 0.3705391040242976, "percentage": 37.04, "elapsed_time": "7:17:54", "remaining_time": "12:24:25"} +{"current_steps": 611, "total_steps": 1647, "loss": 0.2996, "lr": 7.935495243337397e-06, "epoch": 0.3711465451784358, "percentage": 37.1, "elapsed_time": "7:18:34", "remaining_time": "12:23:38"} +{"current_steps": 612, "total_steps": 1647, "loss": 0.2564, "lr": 7.92690846957673e-06, "epoch": 0.371753986332574, "percentage": 37.16, "elapsed_time": "7:19:15", "remaining_time": "12:22:51"} +{"current_steps": 613, "total_steps": 1647, "loss": 0.2789, "lr": 7.918308543194735e-06, "epoch": 0.37236142748671225, "percentage": 37.22, "elapsed_time": "7:19:56", "remaining_time": "12:22:05"} +{"current_steps": 614, "total_steps": 1647, "loss": 0.3291, "lr": 7.909695502836814e-06, "epoch": 0.37296886864085044, "percentage": 37.28, "elapsed_time": "7:20:39", "remaining_time": "12:21:21"} +{"current_steps": 615, "total_steps": 1647, "loss": 0.2658, "lr": 7.90106938720731e-06, "epoch": 0.3735763097949886, "percentage": 37.34, "elapsed_time": "7:21:24", "remaining_time": "12:20:42"} +{"current_steps": 616, "total_steps": 1647, "loss": 0.3267, "lr": 7.892430235069317e-06, "epoch": 0.3741837509491268, "percentage": 37.4, "elapsed_time": "7:22:09", "remaining_time": "12:20:02"} +{"current_steps": 617, "total_steps": 1647, "loss": 0.3302, "lr": 7.883778085244514e-06, "epoch": 0.374791192103265, "percentage": 37.46, "elapsed_time": "7:22:54", "remaining_time": "12:19:22"} +{"current_steps": 618, "total_steps": 1647, "loss": 0.2825, "lr": 7.875112976612984e-06, "epoch": 0.3753986332574032, "percentage": 37.52, "elapsed_time": "7:23:39", "remaining_time": "12:18:42"} +{"current_steps": 619, "total_steps": 1647, "loss": 0.2988, "lr": 7.866434948113046e-06, "epoch": 0.3760060744115414, "percentage": 37.58, "elapsed_time": "7:24:22", "remaining_time": "12:17:59"} +{"current_steps": 620, "total_steps": 1647, "loss": 0.3192, "lr": 7.857744038741076e-06, "epoch": 0.37661351556567957, "percentage": 37.64, "elapsed_time": "7:25:05", "remaining_time": "12:17:16"} +{"current_steps": 621, "total_steps": 1647, "loss": 0.3149, "lr": 7.849040287551331e-06, "epoch": 0.37722095671981776, "percentage": 37.7, "elapsed_time": "7:25:49", "remaining_time": "12:16:34"} +{"current_steps": 622, "total_steps": 1647, "loss": 0.2815, "lr": 7.84032373365578e-06, "epoch": 0.37782839787395595, "percentage": 37.77, "elapsed_time": "7:26:30", "remaining_time": "12:15:48"} +{"current_steps": 623, "total_steps": 1647, "loss": 0.3008, "lr": 7.831594416223916e-06, "epoch": 0.37843583902809413, "percentage": 37.83, "elapsed_time": "7:27:13", "remaining_time": "12:15:05"} +{"current_steps": 624, "total_steps": 1647, "loss": 0.3148, "lr": 7.822852374482597e-06, "epoch": 0.3790432801822323, "percentage": 37.89, "elapsed_time": "7:27:58", "remaining_time": "12:14:25"} +{"current_steps": 625, "total_steps": 1647, "loss": 0.3058, "lr": 7.814097647715848e-06, "epoch": 0.37965072133637057, "percentage": 37.95, "elapsed_time": "7:28:39", "remaining_time": "12:13:38"} +{"current_steps": 626, "total_steps": 1647, "loss": 0.2889, "lr": 7.805330275264707e-06, "epoch": 0.38025816249050876, "percentage": 38.01, "elapsed_time": "7:29:25", "remaining_time": "12:13:00"} +{"current_steps": 627, "total_steps": 1647, "loss": 0.2636, "lr": 7.796550296527032e-06, "epoch": 0.38086560364464694, "percentage": 38.07, "elapsed_time": "7:30:08", "remaining_time": "12:12:16"} +{"current_steps": 628, "total_steps": 1647, "loss": 0.3026, "lr": 7.787757750957335e-06, "epoch": 0.38147304479878513, "percentage": 38.13, "elapsed_time": "7:30:49", "remaining_time": "12:11:30"} +{"current_steps": 629, "total_steps": 1647, "loss": 0.2613, "lr": 7.778952678066591e-06, "epoch": 0.3820804859529233, "percentage": 38.19, "elapsed_time": "7:31:32", "remaining_time": "12:10:48"} +{"current_steps": 630, "total_steps": 1647, "loss": 0.3146, "lr": 7.77013511742208e-06, "epoch": 0.3826879271070615, "percentage": 38.25, "elapsed_time": "7:32:15", "remaining_time": "12:10:04"} +{"current_steps": 631, "total_steps": 1647, "loss": 0.2957, "lr": 7.761305108647188e-06, "epoch": 0.3832953682611997, "percentage": 38.31, "elapsed_time": "7:32:59", "remaining_time": "12:09:23"} +{"current_steps": 632, "total_steps": 1647, "loss": 0.2947, "lr": 7.752462691421245e-06, "epoch": 0.3839028094153379, "percentage": 38.37, "elapsed_time": "7:33:40", "remaining_time": "12:08:36"} +{"current_steps": 633, "total_steps": 1647, "loss": 0.3063, "lr": 7.743607905479338e-06, "epoch": 0.3845102505694761, "percentage": 38.43, "elapsed_time": "7:34:24", "remaining_time": "12:07:54"} +{"current_steps": 634, "total_steps": 1647, "loss": 0.2824, "lr": 7.734740790612137e-06, "epoch": 0.38511769172361426, "percentage": 38.49, "elapsed_time": "7:35:04", "remaining_time": "12:07:07"} +{"current_steps": 635, "total_steps": 1647, "loss": 0.2546, "lr": 7.72586138666571e-06, "epoch": 0.38572513287775245, "percentage": 38.55, "elapsed_time": "7:35:46", "remaining_time": "12:06:22"} +{"current_steps": 636, "total_steps": 1647, "loss": 0.2704, "lr": 7.716969733541357e-06, "epoch": 0.38633257403189064, "percentage": 38.62, "elapsed_time": "7:36:29", "remaining_time": "12:05:38"} +{"current_steps": 637, "total_steps": 1647, "loss": 0.2606, "lr": 7.708065871195413e-06, "epoch": 0.38694001518602883, "percentage": 38.68, "elapsed_time": "7:37:15", "remaining_time": "12:04:59"} +{"current_steps": 638, "total_steps": 1647, "loss": 0.3175, "lr": 7.699149839639086e-06, "epoch": 0.3875474563401671, "percentage": 38.74, "elapsed_time": "7:37:57", "remaining_time": "12:04:15"} +{"current_steps": 639, "total_steps": 1647, "loss": 0.3641, "lr": 7.690221678938258e-06, "epoch": 0.38815489749430526, "percentage": 38.8, "elapsed_time": "7:38:41", "remaining_time": "12:03:33"} +{"current_steps": 640, "total_steps": 1647, "loss": 0.2731, "lr": 7.681281429213328e-06, "epoch": 0.38876233864844345, "percentage": 38.86, "elapsed_time": "7:39:27", "remaining_time": "12:02:56"} +{"current_steps": 641, "total_steps": 1647, "loss": 0.2791, "lr": 7.672329130639007e-06, "epoch": 0.38936977980258164, "percentage": 38.92, "elapsed_time": "7:40:06", "remaining_time": "12:02:06"} +{"current_steps": 642, "total_steps": 1647, "loss": 0.3173, "lr": 7.663364823444157e-06, "epoch": 0.38997722095671983, "percentage": 38.98, "elapsed_time": "7:40:49", "remaining_time": "12:01:23"} +{"current_steps": 643, "total_steps": 1647, "loss": 0.3198, "lr": 7.654388547911605e-06, "epoch": 0.390584662110858, "percentage": 39.04, "elapsed_time": "7:41:31", "remaining_time": "12:00:38"} +{"current_steps": 644, "total_steps": 1647, "loss": 0.2446, "lr": 7.645400344377953e-06, "epoch": 0.3911921032649962, "percentage": 39.1, "elapsed_time": "7:42:11", "remaining_time": "11:59:51"} +{"current_steps": 645, "total_steps": 1647, "loss": 0.2897, "lr": 7.63640025323341e-06, "epoch": 0.3917995444191344, "percentage": 39.16, "elapsed_time": "7:42:56", "remaining_time": "11:59:10"} +{"current_steps": 646, "total_steps": 1647, "loss": 0.2964, "lr": 7.627388314921602e-06, "epoch": 0.3924069855732726, "percentage": 39.22, "elapsed_time": "7:43:38", "remaining_time": "11:58:25"} +{"current_steps": 647, "total_steps": 1647, "loss": 0.28, "lr": 7.61836456993939e-06, "epoch": 0.39301442672741077, "percentage": 39.28, "elapsed_time": "7:44:20", "remaining_time": "11:57:41"} +{"current_steps": 648, "total_steps": 1647, "loss": 0.3354, "lr": 7.609329058836694e-06, "epoch": 0.39362186788154896, "percentage": 39.34, "elapsed_time": "7:45:04", "remaining_time": "11:56:58"} +{"current_steps": 649, "total_steps": 1647, "loss": 0.312, "lr": 7.600281822216307e-06, "epoch": 0.39422930903568715, "percentage": 39.4, "elapsed_time": "7:45:47", "remaining_time": "11:56:15"} +{"current_steps": 650, "total_steps": 1647, "loss": 0.2691, "lr": 7.59122290073371e-06, "epoch": 0.39483675018982534, "percentage": 39.47, "elapsed_time": "7:46:26", "remaining_time": "11:55:27"} +{"current_steps": 651, "total_steps": 1647, "loss": 0.2817, "lr": 7.582152335096896e-06, "epoch": 0.3954441913439636, "percentage": 39.53, "elapsed_time": "7:47:12", "remaining_time": "11:54:47"} +{"current_steps": 652, "total_steps": 1647, "loss": 0.2804, "lr": 7.5730701660661795e-06, "epoch": 0.39605163249810177, "percentage": 39.59, "elapsed_time": "7:47:54", "remaining_time": "11:54:03"} +{"current_steps": 653, "total_steps": 1647, "loss": 0.2674, "lr": 7.563976434454021e-06, "epoch": 0.39665907365223996, "percentage": 39.65, "elapsed_time": "7:48:38", "remaining_time": "11:53:21"} +{"current_steps": 654, "total_steps": 1647, "loss": 0.2842, "lr": 7.554871181124836e-06, "epoch": 0.39726651480637815, "percentage": 39.71, "elapsed_time": "7:49:22", "remaining_time": "11:52:39"} +{"current_steps": 655, "total_steps": 1647, "loss": 0.2891, "lr": 7.5457544469948164e-06, "epoch": 0.39787395596051633, "percentage": 39.77, "elapsed_time": "7:50:05", "remaining_time": "11:51:57"} +{"current_steps": 656, "total_steps": 1647, "loss": 0.2815, "lr": 7.536626273031747e-06, "epoch": 0.3984813971146545, "percentage": 39.83, "elapsed_time": "7:50:48", "remaining_time": "11:51:14"} +{"current_steps": 657, "total_steps": 1647, "loss": 0.2666, "lr": 7.5274867002548154e-06, "epoch": 0.3990888382687927, "percentage": 39.89, "elapsed_time": "7:51:30", "remaining_time": "11:50:29"} +{"current_steps": 658, "total_steps": 1647, "loss": 0.2834, "lr": 7.5183357697344395e-06, "epoch": 0.3996962794229309, "percentage": 39.95, "elapsed_time": "7:52:12", "remaining_time": "11:49:44"} +{"current_steps": 659, "total_steps": 1647, "loss": 0.3175, "lr": 7.509173522592066e-06, "epoch": 0.4003037205770691, "percentage": 40.01, "elapsed_time": "7:52:51", "remaining_time": "11:48:56"} +{"current_steps": 660, "total_steps": 1647, "loss": 0.2918, "lr": 7.500000000000001e-06, "epoch": 0.4009111617312073, "percentage": 40.07, "elapsed_time": "7:53:36", "remaining_time": "11:48:16"} +{"current_steps": 661, "total_steps": 1647, "loss": 0.2636, "lr": 7.4908152431812175e-06, "epoch": 0.40151860288534547, "percentage": 40.13, "elapsed_time": "7:54:19", "remaining_time": "11:47:32"} +{"current_steps": 662, "total_steps": 1647, "loss": 0.2656, "lr": 7.481619293409173e-06, "epoch": 0.40212604403948365, "percentage": 40.19, "elapsed_time": "7:55:04", "remaining_time": "11:46:52"} +{"current_steps": 663, "total_steps": 1647, "loss": 0.2734, "lr": 7.472412192007619e-06, "epoch": 0.40273348519362184, "percentage": 40.26, "elapsed_time": "7:55:48", "remaining_time": "11:46:10"} +{"current_steps": 664, "total_steps": 1647, "loss": 0.2411, "lr": 7.4631939803504215e-06, "epoch": 0.4033409263477601, "percentage": 40.32, "elapsed_time": "7:56:30", "remaining_time": "11:45:25"} +{"current_steps": 665, "total_steps": 1647, "loss": 0.2621, "lr": 7.453964699861376e-06, "epoch": 0.4039483675018983, "percentage": 40.38, "elapsed_time": "7:57:13", "remaining_time": "11:44:42"} +{"current_steps": 666, "total_steps": 1647, "loss": 0.3353, "lr": 7.44472439201401e-06, "epoch": 0.40455580865603646, "percentage": 40.44, "elapsed_time": "7:57:58", "remaining_time": "11:44:02"} +{"current_steps": 667, "total_steps": 1647, "loss": 0.309, "lr": 7.435473098331411e-06, "epoch": 0.40516324981017465, "percentage": 40.5, "elapsed_time": "7:58:37", "remaining_time": "11:43:13"} +{"current_steps": 668, "total_steps": 1647, "loss": 0.2863, "lr": 7.426210860386032e-06, "epoch": 0.40577069096431284, "percentage": 40.56, "elapsed_time": "7:59:20", "remaining_time": "11:42:30"} +{"current_steps": 669, "total_steps": 1647, "loss": 0.3162, "lr": 7.416937719799502e-06, "epoch": 0.40637813211845103, "percentage": 40.62, "elapsed_time": "8:00:01", "remaining_time": "11:41:44"} +{"current_steps": 670, "total_steps": 1647, "loss": 0.2835, "lr": 7.407653718242449e-06, "epoch": 0.4069855732725892, "percentage": 40.68, "elapsed_time": "8:00:44", "remaining_time": "11:41:01"} +{"current_steps": 671, "total_steps": 1647, "loss": 0.2995, "lr": 7.398358897434303e-06, "epoch": 0.4075930144267274, "percentage": 40.74, "elapsed_time": "8:01:28", "remaining_time": "11:40:19"} +{"current_steps": 672, "total_steps": 1647, "loss": 0.2815, "lr": 7.3890532991431174e-06, "epoch": 0.4082004555808656, "percentage": 40.8, "elapsed_time": "8:02:10", "remaining_time": "11:39:34"} +{"current_steps": 673, "total_steps": 1647, "loss": 0.3244, "lr": 7.379736965185369e-06, "epoch": 0.4088078967350038, "percentage": 40.86, "elapsed_time": "8:02:51", "remaining_time": "11:38:49"} +{"current_steps": 674, "total_steps": 1647, "loss": 0.2994, "lr": 7.370409937425781e-06, "epoch": 0.409415337889142, "percentage": 40.92, "elapsed_time": "8:03:34", "remaining_time": "11:38:05"} +{"current_steps": 675, "total_steps": 1647, "loss": 0.3046, "lr": 7.361072257777132e-06, "epoch": 0.41002277904328016, "percentage": 40.98, "elapsed_time": "8:04:20", "remaining_time": "11:37:27"} +{"current_steps": 676, "total_steps": 1647, "loss": 0.2676, "lr": 7.3517239682000675e-06, "epoch": 0.41063022019741835, "percentage": 41.04, "elapsed_time": "8:05:04", "remaining_time": "11:36:44"} +{"current_steps": 677, "total_steps": 1647, "loss": 0.2233, "lr": 7.342365110702907e-06, "epoch": 0.4112376613515566, "percentage": 41.11, "elapsed_time": "8:05:46", "remaining_time": "11:36:00"} +{"current_steps": 678, "total_steps": 1647, "loss": 0.3282, "lr": 7.332995727341462e-06, "epoch": 0.4118451025056948, "percentage": 41.17, "elapsed_time": "8:06:30", "remaining_time": "11:35:18"} +{"current_steps": 679, "total_steps": 1647, "loss": 0.2366, "lr": 7.323615860218844e-06, "epoch": 0.41245254365983297, "percentage": 41.23, "elapsed_time": "8:07:12", "remaining_time": "11:34:34"} +{"current_steps": 680, "total_steps": 1647, "loss": 0.2424, "lr": 7.314225551485273e-06, "epoch": 0.41305998481397116, "percentage": 41.29, "elapsed_time": "8:07:52", "remaining_time": "11:33:47"} +{"current_steps": 681, "total_steps": 1647, "loss": 0.3075, "lr": 7.304824843337893e-06, "epoch": 0.41366742596810935, "percentage": 41.35, "elapsed_time": "8:08:31", "remaining_time": "11:32:58"} +{"current_steps": 682, "total_steps": 1647, "loss": 0.2862, "lr": 7.295413778020579e-06, "epoch": 0.41427486712224754, "percentage": 41.41, "elapsed_time": "8:09:17", "remaining_time": "11:32:19"} +{"current_steps": 683, "total_steps": 1647, "loss": 0.3039, "lr": 7.285992397823747e-06, "epoch": 0.4148823082763857, "percentage": 41.47, "elapsed_time": "8:09:58", "remaining_time": "11:31:32"} +{"current_steps": 684, "total_steps": 1647, "loss": 0.2982, "lr": 7.276560745084167e-06, "epoch": 0.4154897494305239, "percentage": 41.53, "elapsed_time": "8:10:45", "remaining_time": "11:30:56"} +{"current_steps": 685, "total_steps": 1647, "loss": 0.2769, "lr": 7.267118862184767e-06, "epoch": 0.4160971905846621, "percentage": 41.59, "elapsed_time": "8:11:25", "remaining_time": "11:30:09"} +{"current_steps": 686, "total_steps": 1647, "loss": 0.3057, "lr": 7.257666791554448e-06, "epoch": 0.4167046317388003, "percentage": 41.65, "elapsed_time": "8:12:08", "remaining_time": "11:29:26"} +{"current_steps": 687, "total_steps": 1647, "loss": 0.2686, "lr": 7.248204575667893e-06, "epoch": 0.4173120728929385, "percentage": 41.71, "elapsed_time": "8:12:55", "remaining_time": "11:28:48"} +{"current_steps": 688, "total_steps": 1647, "loss": 0.2819, "lr": 7.2387322570453724e-06, "epoch": 0.41791951404707667, "percentage": 41.77, "elapsed_time": "8:13:38", "remaining_time": "11:28:05"} +{"current_steps": 689, "total_steps": 1647, "loss": 0.2659, "lr": 7.229249878252558e-06, "epoch": 0.41852695520121486, "percentage": 41.83, "elapsed_time": "8:14:21", "remaining_time": "11:27:21"} +{"current_steps": 690, "total_steps": 1647, "loss": 0.2878, "lr": 7.219757481900325e-06, "epoch": 0.4191343963553531, "percentage": 41.89, "elapsed_time": "8:15:06", "remaining_time": "11:26:41"} +{"current_steps": 691, "total_steps": 1647, "loss": 0.3096, "lr": 7.210255110644569e-06, "epoch": 0.4197418375094913, "percentage": 41.96, "elapsed_time": "8:15:50", "remaining_time": "11:26:00"} +{"current_steps": 692, "total_steps": 1647, "loss": 0.3036, "lr": 7.2007428071860045e-06, "epoch": 0.4203492786636295, "percentage": 42.02, "elapsed_time": "8:16:31", "remaining_time": "11:25:14"} +{"current_steps": 693, "total_steps": 1647, "loss": 0.2748, "lr": 7.191220614269981e-06, "epoch": 0.42095671981776767, "percentage": 42.08, "elapsed_time": "8:17:10", "remaining_time": "11:24:25"} +{"current_steps": 694, "total_steps": 1647, "loss": 0.294, "lr": 7.181688574686292e-06, "epoch": 0.42156416097190585, "percentage": 42.14, "elapsed_time": "8:17:50", "remaining_time": "11:23:38"} +{"current_steps": 695, "total_steps": 1647, "loss": 0.2817, "lr": 7.17214673126897e-06, "epoch": 0.42217160212604404, "percentage": 42.2, "elapsed_time": "8:18:32", "remaining_time": "11:22:53"} +{"current_steps": 696, "total_steps": 1647, "loss": 0.2699, "lr": 7.162595126896111e-06, "epoch": 0.42277904328018223, "percentage": 42.26, "elapsed_time": "8:19:14", "remaining_time": "11:22:09"} +{"current_steps": 697, "total_steps": 1647, "loss": 0.2666, "lr": 7.15303380448967e-06, "epoch": 0.4233864844343204, "percentage": 42.32, "elapsed_time": "8:19:56", "remaining_time": "11:21:25"} +{"current_steps": 698, "total_steps": 1647, "loss": 0.2996, "lr": 7.143462807015271e-06, "epoch": 0.4239939255884586, "percentage": 42.38, "elapsed_time": "8:20:39", "remaining_time": "11:20:41"} +{"current_steps": 699, "total_steps": 1647, "loss": 0.283, "lr": 7.133882177482019e-06, "epoch": 0.4246013667425968, "percentage": 42.44, "elapsed_time": "8:21:24", "remaining_time": "11:20:01"} +{"current_steps": 700, "total_steps": 1647, "loss": 0.283, "lr": 7.1242919589422974e-06, "epoch": 0.425208807896735, "percentage": 42.5, "elapsed_time": "8:22:08", "remaining_time": "11:19:19"} +{"current_steps": 701, "total_steps": 1647, "loss": 0.2771, "lr": 7.114692194491583e-06, "epoch": 0.4258162490508732, "percentage": 42.56, "elapsed_time": "8:22:48", "remaining_time": "11:18:32"} +{"current_steps": 702, "total_steps": 1647, "loss": 0.3462, "lr": 7.105082927268247e-06, "epoch": 0.42642369020501136, "percentage": 42.62, "elapsed_time": "8:23:32", "remaining_time": "11:17:51"} +{"current_steps": 703, "total_steps": 1647, "loss": 0.2657, "lr": 7.095464200453366e-06, "epoch": 0.4270311313591496, "percentage": 42.68, "elapsed_time": "8:24:16", "remaining_time": "11:17:08"} +{"current_steps": 704, "total_steps": 1647, "loss": 0.3113, "lr": 7.085836057270521e-06, "epoch": 0.4276385725132878, "percentage": 42.74, "elapsed_time": "8:24:57", "remaining_time": "11:16:23"} +{"current_steps": 705, "total_steps": 1647, "loss": 0.2622, "lr": 7.07619854098561e-06, "epoch": 0.428246013667426, "percentage": 42.81, "elapsed_time": "8:25:39", "remaining_time": "11:15:38"} +{"current_steps": 706, "total_steps": 1647, "loss": 0.254, "lr": 7.066551694906651e-06, "epoch": 0.4288534548215642, "percentage": 42.87, "elapsed_time": "8:26:25", "remaining_time": "11:14:59"} +{"current_steps": 707, "total_steps": 1647, "loss": 0.283, "lr": 7.056895562383585e-06, "epoch": 0.42946089597570236, "percentage": 42.93, "elapsed_time": "8:27:08", "remaining_time": "11:14:16"} +{"current_steps": 708, "total_steps": 1647, "loss": 0.2979, "lr": 7.047230186808085e-06, "epoch": 0.43006833712984055, "percentage": 42.99, "elapsed_time": "8:27:50", "remaining_time": "11:13:31"} +{"current_steps": 709, "total_steps": 1647, "loss": 0.3149, "lr": 7.0375556116133605e-06, "epoch": 0.43067577828397874, "percentage": 43.05, "elapsed_time": "8:28:32", "remaining_time": "11:12:47"} +{"current_steps": 710, "total_steps": 1647, "loss": 0.267, "lr": 7.027871880273959e-06, "epoch": 0.4312832194381169, "percentage": 43.11, "elapsed_time": "8:29:16", "remaining_time": "11:12:05"} +{"current_steps": 711, "total_steps": 1647, "loss": 0.2777, "lr": 7.018179036305574e-06, "epoch": 0.4318906605922551, "percentage": 43.17, "elapsed_time": "8:29:57", "remaining_time": "11:11:19"} +{"current_steps": 712, "total_steps": 1647, "loss": 0.2881, "lr": 7.008477123264849e-06, "epoch": 0.4324981017463933, "percentage": 43.23, "elapsed_time": "8:30:39", "remaining_time": "11:10:36"} +{"current_steps": 713, "total_steps": 1647, "loss": 0.2688, "lr": 6.9987661847491786e-06, "epoch": 0.4331055429005315, "percentage": 43.29, "elapsed_time": "8:31:23", "remaining_time": "11:09:53"} +{"current_steps": 714, "total_steps": 1647, "loss": 0.3073, "lr": 6.989046264396516e-06, "epoch": 0.4337129840546697, "percentage": 43.35, "elapsed_time": "8:32:04", "remaining_time": "11:09:08"} +{"current_steps": 715, "total_steps": 1647, "loss": 0.2942, "lr": 6.9793174058851805e-06, "epoch": 0.43432042520880787, "percentage": 43.41, "elapsed_time": "8:32:48", "remaining_time": "11:08:26"} +{"current_steps": 716, "total_steps": 1647, "loss": 0.2603, "lr": 6.96957965293365e-06, "epoch": 0.4349278663629461, "percentage": 43.47, "elapsed_time": "8:33:29", "remaining_time": "11:07:40"} +{"current_steps": 717, "total_steps": 1647, "loss": 0.2659, "lr": 6.959833049300376e-06, "epoch": 0.4355353075170843, "percentage": 43.53, "elapsed_time": "8:34:12", "remaining_time": "11:06:57"} +{"current_steps": 718, "total_steps": 1647, "loss": 0.2626, "lr": 6.9500776387835785e-06, "epoch": 0.4361427486712225, "percentage": 43.59, "elapsed_time": "8:34:53", "remaining_time": "11:06:12"} +{"current_steps": 719, "total_steps": 1647, "loss": 0.283, "lr": 6.940313465221057e-06, "epoch": 0.4367501898253607, "percentage": 43.66, "elapsed_time": "8:35:35", "remaining_time": "11:05:27"} +{"current_steps": 720, "total_steps": 1647, "loss": 0.2878, "lr": 6.9305405724899876e-06, "epoch": 0.43735763097949887, "percentage": 43.72, "elapsed_time": "8:36:17", "remaining_time": "11:04:43"} +{"current_steps": 721, "total_steps": 1647, "loss": 0.2381, "lr": 6.920759004506723e-06, "epoch": 0.43796507213363706, "percentage": 43.78, "elapsed_time": "8:37:00", "remaining_time": "11:04:00"} +{"current_steps": 722, "total_steps": 1647, "loss": 0.3125, "lr": 6.91096880522661e-06, "epoch": 0.43857251328777525, "percentage": 43.84, "elapsed_time": "8:37:41", "remaining_time": "11:03:15"} +{"current_steps": 723, "total_steps": 1647, "loss": 0.2778, "lr": 6.90117001864377e-06, "epoch": 0.43917995444191343, "percentage": 43.9, "elapsed_time": "8:38:24", "remaining_time": "11:02:31"} +{"current_steps": 724, "total_steps": 1647, "loss": 0.2741, "lr": 6.891362688790925e-06, "epoch": 0.4397873955960516, "percentage": 43.96, "elapsed_time": "8:39:08", "remaining_time": "11:01:50"} +{"current_steps": 725, "total_steps": 1647, "loss": 0.2961, "lr": 6.8815468597391785e-06, "epoch": 0.4403948367501898, "percentage": 44.02, "elapsed_time": "8:39:53", "remaining_time": "11:01:08"} +{"current_steps": 726, "total_steps": 1647, "loss": 0.2806, "lr": 6.871722575597829e-06, "epoch": 0.441002277904328, "percentage": 44.08, "elapsed_time": "8:40:38", "remaining_time": "11:00:29"} +{"current_steps": 727, "total_steps": 1647, "loss": 0.2788, "lr": 6.8618898805141744e-06, "epoch": 0.4416097190584662, "percentage": 44.14, "elapsed_time": "8:41:24", "remaining_time": "10:59:49"} +{"current_steps": 728, "total_steps": 1647, "loss": 0.2916, "lr": 6.8520488186733e-06, "epoch": 0.4422171602126044, "percentage": 44.2, "elapsed_time": "8:42:08", "remaining_time": "10:59:07"} +{"current_steps": 729, "total_steps": 1647, "loss": 0.297, "lr": 6.8421994342979e-06, "epoch": 0.4428246013667426, "percentage": 44.26, "elapsed_time": "8:42:51", "remaining_time": "10:58:24"} +{"current_steps": 730, "total_steps": 1647, "loss": 0.2934, "lr": 6.832341771648057e-06, "epoch": 0.4434320425208808, "percentage": 44.32, "elapsed_time": "8:43:34", "remaining_time": "10:57:41"} +{"current_steps": 731, "total_steps": 1647, "loss": 0.2924, "lr": 6.822475875021057e-06, "epoch": 0.444039483675019, "percentage": 44.38, "elapsed_time": "8:44:16", "remaining_time": "10:56:57"} +{"current_steps": 732, "total_steps": 1647, "loss": 0.2875, "lr": 6.812601788751192e-06, "epoch": 0.4446469248291572, "percentage": 44.44, "elapsed_time": "8:44:57", "remaining_time": "10:56:11"} +{"current_steps": 733, "total_steps": 1647, "loss": 0.2723, "lr": 6.802719557209547e-06, "epoch": 0.4452543659832954, "percentage": 44.51, "elapsed_time": "8:45:41", "remaining_time": "10:55:29"} +{"current_steps": 734, "total_steps": 1647, "loss": 0.2902, "lr": 6.792829224803816e-06, "epoch": 0.44586180713743356, "percentage": 44.57, "elapsed_time": "8:46:25", "remaining_time": "10:54:48"} +{"current_steps": 735, "total_steps": 1647, "loss": 0.3298, "lr": 6.782930835978094e-06, "epoch": 0.44646924829157175, "percentage": 44.63, "elapsed_time": "8:47:06", "remaining_time": "10:54:02"} +{"current_steps": 736, "total_steps": 1647, "loss": 0.2654, "lr": 6.773024435212678e-06, "epoch": 0.44707668944570994, "percentage": 44.69, "elapsed_time": "8:47:49", "remaining_time": "10:53:19"} +{"current_steps": 737, "total_steps": 1647, "loss": 0.27, "lr": 6.76311006702387e-06, "epoch": 0.44768413059984813, "percentage": 44.75, "elapsed_time": "8:48:32", "remaining_time": "10:52:36"} +{"current_steps": 738, "total_steps": 1647, "loss": 0.245, "lr": 6.753187775963773e-06, "epoch": 0.4482915717539863, "percentage": 44.81, "elapsed_time": "8:49:15", "remaining_time": "10:51:54"} +{"current_steps": 739, "total_steps": 1647, "loss": 0.2551, "lr": 6.743257606620094e-06, "epoch": 0.4488990129081245, "percentage": 44.87, "elapsed_time": "8:50:00", "remaining_time": "10:51:12"} +{"current_steps": 740, "total_steps": 1647, "loss": 0.274, "lr": 6.733319603615941e-06, "epoch": 0.4495064540622627, "percentage": 44.93, "elapsed_time": "8:50:42", "remaining_time": "10:50:28"} +{"current_steps": 741, "total_steps": 1647, "loss": 0.2698, "lr": 6.723373811609628e-06, "epoch": 0.45011389521640094, "percentage": 44.99, "elapsed_time": "8:51:23", "remaining_time": "10:49:42"} +{"current_steps": 742, "total_steps": 1647, "loss": 0.3096, "lr": 6.713420275294467e-06, "epoch": 0.4507213363705391, "percentage": 45.05, "elapsed_time": "8:52:10", "remaining_time": "10:49:04"} +{"current_steps": 743, "total_steps": 1647, "loss": 0.3101, "lr": 6.703459039398571e-06, "epoch": 0.4513287775246773, "percentage": 45.11, "elapsed_time": "8:52:54", "remaining_time": "10:48:22"} +{"current_steps": 744, "total_steps": 1647, "loss": 0.2478, "lr": 6.693490148684654e-06, "epoch": 0.4519362186788155, "percentage": 45.17, "elapsed_time": "8:53:35", "remaining_time": "10:47:37"} +{"current_steps": 745, "total_steps": 1647, "loss": 0.3075, "lr": 6.683513647949826e-06, "epoch": 0.4525436598329537, "percentage": 45.23, "elapsed_time": "8:54:17", "remaining_time": "10:46:53"} +{"current_steps": 746, "total_steps": 1647, "loss": 0.2737, "lr": 6.673529582025398e-06, "epoch": 0.4531511009870919, "percentage": 45.29, "elapsed_time": "8:55:03", "remaining_time": "10:46:13"} +{"current_steps": 747, "total_steps": 1647, "loss": 0.2791, "lr": 6.66353799577667e-06, "epoch": 0.45375854214123007, "percentage": 45.36, "elapsed_time": "8:55:47", "remaining_time": "10:45:32"} +{"current_steps": 748, "total_steps": 1647, "loss": 0.3014, "lr": 6.653538934102743e-06, "epoch": 0.45436598329536826, "percentage": 45.42, "elapsed_time": "8:56:28", "remaining_time": "10:44:46"} +{"current_steps": 749, "total_steps": 1647, "loss": 0.2749, "lr": 6.643532441936307e-06, "epoch": 0.45497342444950645, "percentage": 45.48, "elapsed_time": "8:57:12", "remaining_time": "10:44:04"} +{"current_steps": 750, "total_steps": 1647, "loss": 0.2971, "lr": 6.633518564243442e-06, "epoch": 0.45558086560364464, "percentage": 45.54, "elapsed_time": "8:57:55", "remaining_time": "10:43:21"} +{"current_steps": 751, "total_steps": 1647, "loss": 0.2868, "lr": 6.6234973460234184e-06, "epoch": 0.4561883067577828, "percentage": 45.6, "elapsed_time": "8:58:37", "remaining_time": "10:42:37"} +{"current_steps": 752, "total_steps": 1647, "loss": 0.2731, "lr": 6.6134688323084884e-06, "epoch": 0.456795747911921, "percentage": 45.66, "elapsed_time": "8:59:19", "remaining_time": "10:41:53"} +{"current_steps": 753, "total_steps": 1647, "loss": 0.2616, "lr": 6.603433068163694e-06, "epoch": 0.4574031890660592, "percentage": 45.72, "elapsed_time": "9:00:00", "remaining_time": "10:41:07"} +{"current_steps": 754, "total_steps": 1647, "loss": 0.2907, "lr": 6.593390098686653e-06, "epoch": 0.45801063022019745, "percentage": 45.78, "elapsed_time": "9:00:44", "remaining_time": "10:40:26"} +{"current_steps": 755, "total_steps": 1647, "loss": 0.3044, "lr": 6.583339969007364e-06, "epoch": 0.45861807137433563, "percentage": 45.84, "elapsed_time": "9:01:28", "remaining_time": "10:39:43"} +{"current_steps": 756, "total_steps": 1647, "loss": 0.2728, "lr": 6.573282724288001e-06, "epoch": 0.4592255125284738, "percentage": 45.9, "elapsed_time": "9:02:12", "remaining_time": "10:39:01"} +{"current_steps": 757, "total_steps": 1647, "loss": 0.276, "lr": 6.563218409722712e-06, "epoch": 0.459832953682612, "percentage": 45.96, "elapsed_time": "9:02:57", "remaining_time": "10:38:20"} +{"current_steps": 758, "total_steps": 1647, "loss": 0.2777, "lr": 6.553147070537413e-06, "epoch": 0.4604403948367502, "percentage": 46.02, "elapsed_time": "9:03:39", "remaining_time": "10:37:36"} +{"current_steps": 759, "total_steps": 1647, "loss": 0.2765, "lr": 6.543068751989585e-06, "epoch": 0.4610478359908884, "percentage": 46.08, "elapsed_time": "9:04:24", "remaining_time": "10:36:55"} +{"current_steps": 760, "total_steps": 1647, "loss": 0.2931, "lr": 6.532983499368078e-06, "epoch": 0.4616552771450266, "percentage": 46.14, "elapsed_time": "9:05:11", "remaining_time": "10:36:17"} +{"current_steps": 761, "total_steps": 1647, "loss": 0.2519, "lr": 6.522891357992895e-06, "epoch": 0.46226271829916477, "percentage": 46.21, "elapsed_time": "9:05:55", "remaining_time": "10:35:35"} +{"current_steps": 762, "total_steps": 1647, "loss": 0.2804, "lr": 6.512792373215e-06, "epoch": 0.46287015945330295, "percentage": 46.27, "elapsed_time": "9:06:44", "remaining_time": "10:34:59"} +{"current_steps": 763, "total_steps": 1647, "loss": 0.2734, "lr": 6.502686590416105e-06, "epoch": 0.46347760060744114, "percentage": 46.33, "elapsed_time": "9:07:33", "remaining_time": "10:34:23"} +{"current_steps": 764, "total_steps": 1647, "loss": 0.2671, "lr": 6.492574055008474e-06, "epoch": 0.46408504176157933, "percentage": 46.39, "elapsed_time": "9:08:15", "remaining_time": "10:33:39"} +{"current_steps": 765, "total_steps": 1647, "loss": 0.2843, "lr": 6.482454812434711e-06, "epoch": 0.4646924829157175, "percentage": 46.45, "elapsed_time": "9:08:58", "remaining_time": "10:32:56"} +{"current_steps": 766, "total_steps": 1647, "loss": 0.2744, "lr": 6.472328908167562e-06, "epoch": 0.4652999240698557, "percentage": 46.51, "elapsed_time": "9:09:40", "remaining_time": "10:32:11"} +{"current_steps": 767, "total_steps": 1647, "loss": 0.2838, "lr": 6.4621963877097105e-06, "epoch": 0.46590736522399395, "percentage": 46.57, "elapsed_time": "9:10:20", "remaining_time": "10:31:25"} +{"current_steps": 768, "total_steps": 1647, "loss": 0.3075, "lr": 6.452057296593568e-06, "epoch": 0.46651480637813214, "percentage": 46.63, "elapsed_time": "9:11:02", "remaining_time": "10:30:41"} +{"current_steps": 769, "total_steps": 1647, "loss": 0.2803, "lr": 6.441911680381074e-06, "epoch": 0.46712224753227033, "percentage": 46.69, "elapsed_time": "9:11:47", "remaining_time": "10:30:00"} +{"current_steps": 770, "total_steps": 1647, "loss": 0.2594, "lr": 6.431759584663492e-06, "epoch": 0.4677296886864085, "percentage": 46.75, "elapsed_time": "9:12:32", "remaining_time": "10:29:19"} +{"current_steps": 771, "total_steps": 1647, "loss": 0.294, "lr": 6.421601055061195e-06, "epoch": 0.4683371298405467, "percentage": 46.81, "elapsed_time": "9:13:20", "remaining_time": "10:28:41"} +{"current_steps": 772, "total_steps": 1647, "loss": 0.276, "lr": 6.411436137223479e-06, "epoch": 0.4689445709946849, "percentage": 46.87, "elapsed_time": "9:14:04", "remaining_time": "10:28:00"} +{"current_steps": 773, "total_steps": 1647, "loss": 0.2739, "lr": 6.401264876828335e-06, "epoch": 0.4695520121488231, "percentage": 46.93, "elapsed_time": "9:14:47", "remaining_time": "10:27:16"} +{"current_steps": 774, "total_steps": 1647, "loss": 0.2689, "lr": 6.391087319582264e-06, "epoch": 0.47015945330296127, "percentage": 46.99, "elapsed_time": "9:15:28", "remaining_time": "10:26:31"} +{"current_steps": 775, "total_steps": 1647, "loss": 0.2523, "lr": 6.38090351122006e-06, "epoch": 0.47076689445709946, "percentage": 47.06, "elapsed_time": "9:16:11", "remaining_time": "10:25:48"} +{"current_steps": 776, "total_steps": 1647, "loss": 0.2443, "lr": 6.370713497504607e-06, "epoch": 0.47137433561123765, "percentage": 47.12, "elapsed_time": "9:16:56", "remaining_time": "10:25:07"} +{"current_steps": 777, "total_steps": 1647, "loss": 0.2783, "lr": 6.360517324226676e-06, "epoch": 0.47198177676537584, "percentage": 47.18, "elapsed_time": "9:17:39", "remaining_time": "10:24:24"} +{"current_steps": 778, "total_steps": 1647, "loss": 0.272, "lr": 6.350315037204714e-06, "epoch": 0.472589217919514, "percentage": 47.24, "elapsed_time": "9:18:26", "remaining_time": "10:23:45"} +{"current_steps": 779, "total_steps": 1647, "loss": 0.2838, "lr": 6.340106682284645e-06, "epoch": 0.4731966590736522, "percentage": 47.3, "elapsed_time": "9:19:06", "remaining_time": "10:22:59"} +{"current_steps": 780, "total_steps": 1647, "loss": 0.2557, "lr": 6.329892305339659e-06, "epoch": 0.47380410022779046, "percentage": 47.36, "elapsed_time": "9:19:48", "remaining_time": "10:22:14"} +{"current_steps": 781, "total_steps": 1647, "loss": 0.2729, "lr": 6.319671952270004e-06, "epoch": 0.47441154138192865, "percentage": 47.42, "elapsed_time": "9:20:33", "remaining_time": "10:21:33"} +{"current_steps": 782, "total_steps": 1647, "loss": 0.2493, "lr": 6.309445669002787e-06, "epoch": 0.47501898253606684, "percentage": 47.48, "elapsed_time": "9:21:17", "remaining_time": "10:20:52"} +{"current_steps": 783, "total_steps": 1647, "loss": 0.3008, "lr": 6.299213501491761e-06, "epoch": 0.475626423690205, "percentage": 47.54, "elapsed_time": "9:22:02", "remaining_time": "10:20:10"} +{"current_steps": 784, "total_steps": 1647, "loss": 0.2867, "lr": 6.288975495717124e-06, "epoch": 0.4762338648443432, "percentage": 47.6, "elapsed_time": "9:22:45", "remaining_time": "10:19:28"} +{"current_steps": 785, "total_steps": 1647, "loss": 0.2495, "lr": 6.2787316976853045e-06, "epoch": 0.4768413059984814, "percentage": 47.66, "elapsed_time": "9:23:26", "remaining_time": "10:18:42"} +{"current_steps": 786, "total_steps": 1647, "loss": 0.2348, "lr": 6.268482153428763e-06, "epoch": 0.4774487471526196, "percentage": 47.72, "elapsed_time": "9:24:11", "remaining_time": "10:18:01"} +{"current_steps": 787, "total_steps": 1647, "loss": 0.2809, "lr": 6.258226909005783e-06, "epoch": 0.4780561883067578, "percentage": 47.78, "elapsed_time": "9:24:54", "remaining_time": "10:17:18"} +{"current_steps": 788, "total_steps": 1647, "loss": 0.2794, "lr": 6.247966010500258e-06, "epoch": 0.47866362946089597, "percentage": 47.84, "elapsed_time": "9:25:38", "remaining_time": "10:16:36"} +{"current_steps": 789, "total_steps": 1647, "loss": 0.2892, "lr": 6.237699504021495e-06, "epoch": 0.47927107061503416, "percentage": 47.91, "elapsed_time": "9:26:25", "remaining_time": "10:15:57"} +{"current_steps": 790, "total_steps": 1647, "loss": 0.277, "lr": 6.227427435703997e-06, "epoch": 0.47987851176917234, "percentage": 47.97, "elapsed_time": "9:27:09", "remaining_time": "10:15:15"} +{"current_steps": 791, "total_steps": 1647, "loss": 0.253, "lr": 6.217149851707261e-06, "epoch": 0.48048595292331053, "percentage": 48.03, "elapsed_time": "9:27:51", "remaining_time": "10:14:31"} +{"current_steps": 792, "total_steps": 1647, "loss": 0.2675, "lr": 6.206866798215571e-06, "epoch": 0.4810933940774487, "percentage": 48.09, "elapsed_time": "9:28:34", "remaining_time": "10:13:47"} +{"current_steps": 793, "total_steps": 1647, "loss": 0.2498, "lr": 6.1965783214377895e-06, "epoch": 0.48170083523158697, "percentage": 48.15, "elapsed_time": "9:29:16", "remaining_time": "10:13:04"} +{"current_steps": 794, "total_steps": 1647, "loss": 0.2594, "lr": 6.186284467607149e-06, "epoch": 0.48230827638572515, "percentage": 48.21, "elapsed_time": "9:29:57", "remaining_time": "10:12:18"} +{"current_steps": 795, "total_steps": 1647, "loss": 0.2644, "lr": 6.175985282981042e-06, "epoch": 0.48291571753986334, "percentage": 48.27, "elapsed_time": "9:30:43", "remaining_time": "10:11:38"} +{"current_steps": 796, "total_steps": 1647, "loss": 0.2546, "lr": 6.165680813840822e-06, "epoch": 0.48352315869400153, "percentage": 48.33, "elapsed_time": "9:31:29", "remaining_time": "10:10:58"} +{"current_steps": 797, "total_steps": 1647, "loss": 0.3234, "lr": 6.155371106491584e-06, "epoch": 0.4841305998481397, "percentage": 48.39, "elapsed_time": "9:32:08", "remaining_time": "10:10:10"} +{"current_steps": 798, "total_steps": 1647, "loss": 0.245, "lr": 6.1450562072619635e-06, "epoch": 0.4847380410022779, "percentage": 48.45, "elapsed_time": "9:32:54", "remaining_time": "10:09:30"} +{"current_steps": 799, "total_steps": 1647, "loss": 0.2631, "lr": 6.134736162503929e-06, "epoch": 0.4853454821564161, "percentage": 48.51, "elapsed_time": "9:33:38", "remaining_time": "10:08:48"} +{"current_steps": 800, "total_steps": 1647, "loss": 0.2632, "lr": 6.124411018592568e-06, "epoch": 0.4859529233105543, "percentage": 48.57, "elapsed_time": "9:34:22", "remaining_time": "10:08:07"} +{"current_steps": 801, "total_steps": 1647, "loss": 0.272, "lr": 6.114080821925885e-06, "epoch": 0.4865603644646925, "percentage": 48.63, "elapsed_time": "9:35:04", "remaining_time": "10:07:22"} +{"current_steps": 802, "total_steps": 1647, "loss": 0.2577, "lr": 6.103745618924587e-06, "epoch": 0.48716780561883066, "percentage": 48.69, "elapsed_time": "9:35:50", "remaining_time": "10:06:42"} +{"current_steps": 803, "total_steps": 1647, "loss": 0.2925, "lr": 6.09340545603188e-06, "epoch": 0.48777524677296885, "percentage": 48.76, "elapsed_time": "9:36:37", "remaining_time": "10:06:04"} +{"current_steps": 804, "total_steps": 1647, "loss": 0.2582, "lr": 6.0830603797132574e-06, "epoch": 0.48838268792710704, "percentage": 48.82, "elapsed_time": "9:37:17", "remaining_time": "10:05:17"} +{"current_steps": 805, "total_steps": 1647, "loss": 0.2832, "lr": 6.072710436456293e-06, "epoch": 0.48899012908124523, "percentage": 48.88, "elapsed_time": "9:38:00", "remaining_time": "10:04:34"} +{"current_steps": 806, "total_steps": 1647, "loss": 0.2676, "lr": 6.0623556727704306e-06, "epoch": 0.4895975702353835, "percentage": 48.94, "elapsed_time": "9:38:45", "remaining_time": "10:03:53"} +{"current_steps": 807, "total_steps": 1647, "loss": 0.289, "lr": 6.051996135186774e-06, "epoch": 0.49020501138952166, "percentage": 49.0, "elapsed_time": "9:39:28", "remaining_time": "10:03:09"} +{"current_steps": 808, "total_steps": 1647, "loss": 0.2847, "lr": 6.041631870257882e-06, "epoch": 0.49081245254365985, "percentage": 49.06, "elapsed_time": "9:40:07", "remaining_time": "10:02:23"} +{"current_steps": 809, "total_steps": 1647, "loss": 0.3195, "lr": 6.0312629245575534e-06, "epoch": 0.49141989369779804, "percentage": 49.12, "elapsed_time": "9:40:49", "remaining_time": "10:01:39"} +{"current_steps": 810, "total_steps": 1647, "loss": 0.3136, "lr": 6.020889344680627e-06, "epoch": 0.4920273348519362, "percentage": 49.18, "elapsed_time": "9:41:29", "remaining_time": "10:00:52"} +{"current_steps": 811, "total_steps": 1647, "loss": 0.285, "lr": 6.010511177242757e-06, "epoch": 0.4926347760060744, "percentage": 49.24, "elapsed_time": "9:42:10", "remaining_time": "10:00:07"} +{"current_steps": 812, "total_steps": 1647, "loss": 0.2651, "lr": 6.000128468880223e-06, "epoch": 0.4932422171602126, "percentage": 49.3, "elapsed_time": "9:42:56", "remaining_time": "9:59:27"} +{"current_steps": 813, "total_steps": 1647, "loss": 0.2961, "lr": 5.989741266249701e-06, "epoch": 0.4938496583143508, "percentage": 49.36, "elapsed_time": "9:43:37", "remaining_time": "9:58:41"} +{"current_steps": 814, "total_steps": 1647, "loss": 0.3049, "lr": 5.979349616028067e-06, "epoch": 0.494457099468489, "percentage": 49.42, "elapsed_time": "9:44:19", "remaining_time": "9:57:57"} +{"current_steps": 815, "total_steps": 1647, "loss": 0.2891, "lr": 5.9689535649121855e-06, "epoch": 0.49506454062262717, "percentage": 49.48, "elapsed_time": "9:45:02", "remaining_time": "9:57:14"} +{"current_steps": 816, "total_steps": 1647, "loss": 0.2619, "lr": 5.958553159618693e-06, "epoch": 0.49567198177676536, "percentage": 49.54, "elapsed_time": "9:45:43", "remaining_time": "9:56:29"} +{"current_steps": 817, "total_steps": 1647, "loss": 0.2753, "lr": 5.948148446883794e-06, "epoch": 0.49627942293090355, "percentage": 49.61, "elapsed_time": "9:46:26", "remaining_time": "9:55:46"} +{"current_steps": 818, "total_steps": 1647, "loss": 0.3255, "lr": 5.937739473463047e-06, "epoch": 0.49688686408504174, "percentage": 49.67, "elapsed_time": "9:47:09", "remaining_time": "9:55:03"} +{"current_steps": 819, "total_steps": 1647, "loss": 0.2774, "lr": 5.927326286131162e-06, "epoch": 0.49749430523918, "percentage": 49.73, "elapsed_time": "9:47:52", "remaining_time": "9:54:19"} +{"current_steps": 820, "total_steps": 1647, "loss": 0.2771, "lr": 5.916908931681781e-06, "epoch": 0.49810174639331817, "percentage": 49.79, "elapsed_time": "9:48:38", "remaining_time": "9:53:40"} +{"current_steps": 821, "total_steps": 1647, "loss": 0.2949, "lr": 5.906487456927273e-06, "epoch": 0.49870918754745636, "percentage": 49.85, "elapsed_time": "9:49:21", "remaining_time": "9:52:57"} +{"current_steps": 822, "total_steps": 1647, "loss": 0.2771, "lr": 5.896061908698521e-06, "epoch": 0.49931662870159454, "percentage": 49.91, "elapsed_time": "9:50:03", "remaining_time": "9:52:12"} +{"current_steps": 823, "total_steps": 1647, "loss": 0.2746, "lr": 5.885632333844714e-06, "epoch": 0.49992406985573273, "percentage": 49.97, "elapsed_time": "9:50:47", "remaining_time": "9:51:30"} +{"current_steps": 824, "total_steps": 1647, "loss": 0.2728, "lr": 5.8751987792331365e-06, "epoch": 0.5005315110098709, "percentage": 50.03, "elapsed_time": "9:51:29", "remaining_time": "9:50:46"} +{"current_steps": 825, "total_steps": 1647, "loss": 0.2669, "lr": 5.864761291748956e-06, "epoch": 0.5011389521640092, "percentage": 50.09, "elapsed_time": "9:52:11", "remaining_time": "9:50:02"} +{"current_steps": 826, "total_steps": 1647, "loss": 0.2711, "lr": 5.854319918295012e-06, "epoch": 0.5017463933181473, "percentage": 50.15, "elapsed_time": "9:52:54", "remaining_time": "9:49:19"} +{"current_steps": 827, "total_steps": 1647, "loss": 0.2463, "lr": 5.843874705791607e-06, "epoch": 0.5023538344722855, "percentage": 50.21, "elapsed_time": "9:53:35", "remaining_time": "9:48:34"} +{"current_steps": 828, "total_steps": 1647, "loss": 0.3234, "lr": 5.833425701176294e-06, "epoch": 0.5029612756264237, "percentage": 50.27, "elapsed_time": "9:54:18", "remaining_time": "9:47:50"} +{"current_steps": 829, "total_steps": 1647, "loss": 0.2757, "lr": 5.82297295140367e-06, "epoch": 0.5035687167805619, "percentage": 50.33, "elapsed_time": "9:55:01", "remaining_time": "9:47:07"} +{"current_steps": 830, "total_steps": 1647, "loss": 0.2555, "lr": 5.812516503445158e-06, "epoch": 0.5041761579347, "percentage": 50.39, "elapsed_time": "9:55:46", "remaining_time": "9:46:26"} +{"current_steps": 831, "total_steps": 1647, "loss": 0.2864, "lr": 5.8020564042888015e-06, "epoch": 0.5047835990888383, "percentage": 50.46, "elapsed_time": "9:56:27", "remaining_time": "9:45:41"} +{"current_steps": 832, "total_steps": 1647, "loss": 0.2871, "lr": 5.79159270093905e-06, "epoch": 0.5053910402429764, "percentage": 50.52, "elapsed_time": "9:57:10", "remaining_time": "9:44:58"} +{"current_steps": 833, "total_steps": 1647, "loss": 0.2611, "lr": 5.781125440416552e-06, "epoch": 0.5059984813971147, "percentage": 50.58, "elapsed_time": "9:57:53", "remaining_time": "9:44:15"} +{"current_steps": 834, "total_steps": 1647, "loss": 0.2938, "lr": 5.770654669757935e-06, "epoch": 0.5066059225512528, "percentage": 50.64, "elapsed_time": "9:58:36", "remaining_time": "9:43:32"} +{"current_steps": 835, "total_steps": 1647, "loss": 0.2726, "lr": 5.760180436015604e-06, "epoch": 0.507213363705391, "percentage": 50.7, "elapsed_time": "9:59:21", "remaining_time": "9:42:51"} +{"current_steps": 836, "total_steps": 1647, "loss": 0.2808, "lr": 5.749702786257529e-06, "epoch": 0.5078208048595292, "percentage": 50.76, "elapsed_time": "10:00:08", "remaining_time": "9:42:11"} +{"current_steps": 837, "total_steps": 1647, "loss": 0.2515, "lr": 5.739221767567025e-06, "epoch": 0.5084282460136674, "percentage": 50.82, "elapsed_time": "10:00:55", "remaining_time": "9:41:32"} +{"current_steps": 838, "total_steps": 1647, "loss": 0.2764, "lr": 5.7287374270425475e-06, "epoch": 0.5090356871678057, "percentage": 50.88, "elapsed_time": "10:01:40", "remaining_time": "9:40:51"} +{"current_steps": 839, "total_steps": 1647, "loss": 0.2895, "lr": 5.718249811797482e-06, "epoch": 0.5096431283219438, "percentage": 50.94, "elapsed_time": "10:02:25", "remaining_time": "9:40:10"} +{"current_steps": 840, "total_steps": 1647, "loss": 0.2746, "lr": 5.707758968959923e-06, "epoch": 0.510250569476082, "percentage": 51.0, "elapsed_time": "10:03:10", "remaining_time": "9:39:28"} +{"current_steps": 841, "total_steps": 1647, "loss": 0.2646, "lr": 5.69726494567248e-06, "epoch": 0.5108580106302202, "percentage": 51.06, "elapsed_time": "10:03:58", "remaining_time": "9:38:50"} +{"current_steps": 842, "total_steps": 1647, "loss": 0.2927, "lr": 5.686767789092041e-06, "epoch": 0.5114654517843584, "percentage": 51.12, "elapsed_time": "10:04:38", "remaining_time": "9:38:04"} +{"current_steps": 843, "total_steps": 1647, "loss": 0.255, "lr": 5.676267546389587e-06, "epoch": 0.5120728929384966, "percentage": 51.18, "elapsed_time": "10:05:21", "remaining_time": "9:37:21"} +{"current_steps": 844, "total_steps": 1647, "loss": 0.2825, "lr": 5.6657642647499545e-06, "epoch": 0.5126803340926348, "percentage": 51.24, "elapsed_time": "10:06:07", "remaining_time": "9:36:41"} +{"current_steps": 845, "total_steps": 1647, "loss": 0.2614, "lr": 5.655257991371646e-06, "epoch": 0.5132877752467729, "percentage": 51.31, "elapsed_time": "10:06:50", "remaining_time": "9:35:57"} +{"current_steps": 846, "total_steps": 1647, "loss": 0.2739, "lr": 5.644748773466606e-06, "epoch": 0.5138952164009112, "percentage": 51.37, "elapsed_time": "10:07:34", "remaining_time": "9:35:15"} +{"current_steps": 847, "total_steps": 1647, "loss": 0.3136, "lr": 5.6342366582600035e-06, "epoch": 0.5145026575550493, "percentage": 51.43, "elapsed_time": "10:08:15", "remaining_time": "9:34:30"} +{"current_steps": 848, "total_steps": 1647, "loss": 0.2931, "lr": 5.62372169299004e-06, "epoch": 0.5151100987091876, "percentage": 51.49, "elapsed_time": "10:08:57", "remaining_time": "9:33:46"} +{"current_steps": 849, "total_steps": 1647, "loss": 0.2635, "lr": 5.613203924907711e-06, "epoch": 0.5157175398633257, "percentage": 51.55, "elapsed_time": "10:09:37", "remaining_time": "9:33:00"} +{"current_steps": 850, "total_steps": 1647, "loss": 0.2523, "lr": 5.6026834012766155e-06, "epoch": 0.5163249810174639, "percentage": 51.61, "elapsed_time": "10:10:20", "remaining_time": "9:32:17"} +{"current_steps": 851, "total_steps": 1647, "loss": 0.2884, "lr": 5.592160169372734e-06, "epoch": 0.5169324221716022, "percentage": 51.67, "elapsed_time": "10:11:05", "remaining_time": "9:31:36"} +{"current_steps": 852, "total_steps": 1647, "loss": 0.2701, "lr": 5.581634276484211e-06, "epoch": 0.5175398633257403, "percentage": 51.73, "elapsed_time": "10:11:51", "remaining_time": "9:30:55"} +{"current_steps": 853, "total_steps": 1647, "loss": 0.3305, "lr": 5.571105769911159e-06, "epoch": 0.5181473044798786, "percentage": 51.79, "elapsed_time": "10:12:36", "remaining_time": "9:30:14"} +{"current_steps": 854, "total_steps": 1647, "loss": 0.2401, "lr": 5.560574696965425e-06, "epoch": 0.5187547456340167, "percentage": 51.85, "elapsed_time": "10:13:23", "remaining_time": "9:29:34"} +{"current_steps": 855, "total_steps": 1647, "loss": 0.2647, "lr": 5.550041104970398e-06, "epoch": 0.5193621867881549, "percentage": 51.91, "elapsed_time": "10:14:09", "remaining_time": "9:28:53"} +{"current_steps": 856, "total_steps": 1647, "loss": 0.2867, "lr": 5.539505041260779e-06, "epoch": 0.5199696279422931, "percentage": 51.97, "elapsed_time": "10:14:52", "remaining_time": "9:28:10"} +{"current_steps": 857, "total_steps": 1647, "loss": 0.2492, "lr": 5.528966553182379e-06, "epoch": 0.5205770690964313, "percentage": 52.03, "elapsed_time": "10:15:32", "remaining_time": "9:27:25"} +{"current_steps": 858, "total_steps": 1647, "loss": 0.2945, "lr": 5.518425688091906e-06, "epoch": 0.5211845102505694, "percentage": 52.09, "elapsed_time": "10:16:16", "remaining_time": "9:26:42"} +{"current_steps": 859, "total_steps": 1647, "loss": 0.2579, "lr": 5.507882493356745e-06, "epoch": 0.5217919514047077, "percentage": 52.16, "elapsed_time": "10:17:01", "remaining_time": "9:26:01"} +{"current_steps": 860, "total_steps": 1647, "loss": 0.2843, "lr": 5.497337016354757e-06, "epoch": 0.5223993925588458, "percentage": 52.22, "elapsed_time": "10:17:44", "remaining_time": "9:25:18"} +{"current_steps": 861, "total_steps": 1647, "loss": 0.2463, "lr": 5.486789304474047e-06, "epoch": 0.5230068337129841, "percentage": 52.28, "elapsed_time": "10:18:28", "remaining_time": "9:24:35"} +{"current_steps": 862, "total_steps": 1647, "loss": 0.2961, "lr": 5.476239405112775e-06, "epoch": 0.5236142748671222, "percentage": 52.34, "elapsed_time": "10:19:10", "remaining_time": "9:23:52"} +{"current_steps": 863, "total_steps": 1647, "loss": 0.2883, "lr": 5.465687365678921e-06, "epoch": 0.5242217160212604, "percentage": 52.4, "elapsed_time": "10:19:51", "remaining_time": "9:23:07"} +{"current_steps": 864, "total_steps": 1647, "loss": 0.3138, "lr": 5.45513323359009e-06, "epoch": 0.5248291571753987, "percentage": 52.46, "elapsed_time": "10:20:32", "remaining_time": "9:22:22"} +{"current_steps": 865, "total_steps": 1647, "loss": 0.2755, "lr": 5.444577056273284e-06, "epoch": 0.5254365983295368, "percentage": 52.52, "elapsed_time": "10:21:16", "remaining_time": "9:21:40"} +{"current_steps": 866, "total_steps": 1647, "loss": 0.2701, "lr": 5.434018881164702e-06, "epoch": 0.5260440394836751, "percentage": 52.58, "elapsed_time": "10:21:56", "remaining_time": "9:20:53"} +{"current_steps": 867, "total_steps": 1647, "loss": 0.284, "lr": 5.423458755709516e-06, "epoch": 0.5266514806378132, "percentage": 52.64, "elapsed_time": "10:22:40", "remaining_time": "9:20:11"} +{"current_steps": 868, "total_steps": 1647, "loss": 0.2381, "lr": 5.412896727361663e-06, "epoch": 0.5272589217919514, "percentage": 52.7, "elapsed_time": "10:23:20", "remaining_time": "9:19:25"} +{"current_steps": 869, "total_steps": 1647, "loss": 0.2748, "lr": 5.402332843583631e-06, "epoch": 0.5278663629460896, "percentage": 52.76, "elapsed_time": "10:24:01", "remaining_time": "9:18:40"} +{"current_steps": 870, "total_steps": 1647, "loss": 0.2717, "lr": 5.391767151846247e-06, "epoch": 0.5284738041002278, "percentage": 52.82, "elapsed_time": "10:24:45", "remaining_time": "9:17:58"} +{"current_steps": 871, "total_steps": 1647, "loss": 0.2982, "lr": 5.381199699628459e-06, "epoch": 0.529081245254366, "percentage": 52.88, "elapsed_time": "10:25:31", "remaining_time": "9:17:17"} +{"current_steps": 872, "total_steps": 1647, "loss": 0.2531, "lr": 5.370630534417133e-06, "epoch": 0.5296886864085042, "percentage": 52.94, "elapsed_time": "10:26:15", "remaining_time": "9:16:35"} +{"current_steps": 873, "total_steps": 1647, "loss": 0.2995, "lr": 5.360059703706823e-06, "epoch": 0.5302961275626423, "percentage": 53.01, "elapsed_time": "10:26:58", "remaining_time": "9:15:52"} +{"current_steps": 874, "total_steps": 1647, "loss": 0.2959, "lr": 5.349487254999579e-06, "epoch": 0.5309035687167806, "percentage": 53.07, "elapsed_time": "10:27:38", "remaining_time": "9:15:06"} +{"current_steps": 875, "total_steps": 1647, "loss": 0.28, "lr": 5.3389132358047115e-06, "epoch": 0.5315110098709187, "percentage": 53.13, "elapsed_time": "10:28:21", "remaining_time": "9:14:23"} +{"current_steps": 876, "total_steps": 1647, "loss": 0.2856, "lr": 5.328337693638591e-06, "epoch": 0.532118451025057, "percentage": 53.19, "elapsed_time": "10:29:05", "remaining_time": "9:13:40"} +{"current_steps": 877, "total_steps": 1647, "loss": 0.2757, "lr": 5.317760676024436e-06, "epoch": 0.5327258921791952, "percentage": 53.25, "elapsed_time": "10:29:48", "remaining_time": "9:12:58"} +{"current_steps": 878, "total_steps": 1647, "loss": 0.2757, "lr": 5.307182230492089e-06, "epoch": 0.5333333333333333, "percentage": 53.31, "elapsed_time": "10:30:33", "remaining_time": "9:12:16"} +{"current_steps": 879, "total_steps": 1647, "loss": 0.2455, "lr": 5.296602404577814e-06, "epoch": 0.5339407744874716, "percentage": 53.37, "elapsed_time": "10:31:17", "remaining_time": "9:11:34"} +{"current_steps": 880, "total_steps": 1647, "loss": 0.2947, "lr": 5.286021245824075e-06, "epoch": 0.5345482156416097, "percentage": 53.43, "elapsed_time": "10:31:57", "remaining_time": "9:10:48"} +{"current_steps": 881, "total_steps": 1647, "loss": 0.2687, "lr": 5.275438801779328e-06, "epoch": 0.535155656795748, "percentage": 53.49, "elapsed_time": "10:32:45", "remaining_time": "9:10:09"} +{"current_steps": 882, "total_steps": 1647, "loss": 0.283, "lr": 5.264855119997803e-06, "epoch": 0.5357630979498861, "percentage": 53.55, "elapsed_time": "10:33:28", "remaining_time": "9:09:26"} +{"current_steps": 883, "total_steps": 1647, "loss": 0.2448, "lr": 5.254270248039291e-06, "epoch": 0.5363705391040243, "percentage": 53.61, "elapsed_time": "10:34:08", "remaining_time": "9:08:41"} +{"current_steps": 884, "total_steps": 1647, "loss": 0.285, "lr": 5.243684233468933e-06, "epoch": 0.5369779802581625, "percentage": 53.67, "elapsed_time": "10:34:50", "remaining_time": "9:07:56"} +{"current_steps": 885, "total_steps": 1647, "loss": 0.2556, "lr": 5.233097123857004e-06, "epoch": 0.5375854214123007, "percentage": 53.73, "elapsed_time": "10:35:29", "remaining_time": "9:07:10"} +{"current_steps": 886, "total_steps": 1647, "loss": 0.2484, "lr": 5.222508966778702e-06, "epoch": 0.5381928625664388, "percentage": 53.79, "elapsed_time": "10:36:11", "remaining_time": "9:06:25"} +{"current_steps": 887, "total_steps": 1647, "loss": 0.2568, "lr": 5.211919809813927e-06, "epoch": 0.5388003037205771, "percentage": 53.86, "elapsed_time": "10:36:52", "remaining_time": "9:05:40"} +{"current_steps": 888, "total_steps": 1647, "loss": 0.296, "lr": 5.201329700547077e-06, "epoch": 0.5394077448747152, "percentage": 53.92, "elapsed_time": "10:37:35", "remaining_time": "9:04:58"} +{"current_steps": 889, "total_steps": 1647, "loss": 0.2641, "lr": 5.190738686566826e-06, "epoch": 0.5400151860288535, "percentage": 53.98, "elapsed_time": "10:38:19", "remaining_time": "9:04:15"} +{"current_steps": 890, "total_steps": 1647, "loss": 0.28, "lr": 5.180146815465915e-06, "epoch": 0.5406226271829917, "percentage": 54.04, "elapsed_time": "10:39:04", "remaining_time": "9:03:34"} +{"current_steps": 891, "total_steps": 1647, "loss": 0.2646, "lr": 5.169554134840937e-06, "epoch": 0.5412300683371298, "percentage": 54.1, "elapsed_time": "10:39:46", "remaining_time": "9:02:50"} +{"current_steps": 892, "total_steps": 1647, "loss": 0.267, "lr": 5.158960692292122e-06, "epoch": 0.5418375094912681, "percentage": 54.16, "elapsed_time": "10:40:27", "remaining_time": "9:02:05"} +{"current_steps": 893, "total_steps": 1647, "loss": 0.2777, "lr": 5.148366535423126e-06, "epoch": 0.5424449506454062, "percentage": 54.22, "elapsed_time": "10:41:11", "remaining_time": "9:01:23"} +{"current_steps": 894, "total_steps": 1647, "loss": 0.2678, "lr": 5.137771711840811e-06, "epoch": 0.5430523917995445, "percentage": 54.28, "elapsed_time": "10:41:53", "remaining_time": "9:00:39"} +{"current_steps": 895, "total_steps": 1647, "loss": 0.2639, "lr": 5.1271762691550375e-06, "epoch": 0.5436598329536826, "percentage": 54.34, "elapsed_time": "10:42:37", "remaining_time": "8:59:56"} +{"current_steps": 896, "total_steps": 1647, "loss": 0.2659, "lr": 5.116580254978447e-06, "epoch": 0.5442672741078208, "percentage": 54.4, "elapsed_time": "10:43:18", "remaining_time": "8:59:12"} +{"current_steps": 897, "total_steps": 1647, "loss": 0.2657, "lr": 5.1059837169262506e-06, "epoch": 0.544874715261959, "percentage": 54.46, "elapsed_time": "10:44:00", "remaining_time": "8:58:28"} +{"current_steps": 898, "total_steps": 1647, "loss": 0.2737, "lr": 5.095386702616012e-06, "epoch": 0.5454821564160972, "percentage": 54.52, "elapsed_time": "10:44:40", "remaining_time": "8:57:42"} +{"current_steps": 899, "total_steps": 1647, "loss": 0.2229, "lr": 5.084789259667437e-06, "epoch": 0.5460895975702353, "percentage": 54.58, "elapsed_time": "10:45:25", "remaining_time": "8:57:01"} +{"current_steps": 900, "total_steps": 1647, "loss": 0.2621, "lr": 5.074191435702155e-06, "epoch": 0.5466970387243736, "percentage": 54.64, "elapsed_time": "10:46:10", "remaining_time": "8:56:19"} +{"current_steps": 901, "total_steps": 1647, "loss": 0.2735, "lr": 5.06359327834351e-06, "epoch": 0.5473044798785117, "percentage": 54.71, "elapsed_time": "10:46:57", "remaining_time": "8:55:39"} +{"current_steps": 902, "total_steps": 1647, "loss": 0.2804, "lr": 5.05299483521634e-06, "epoch": 0.54791192103265, "percentage": 54.77, "elapsed_time": "10:47:43", "remaining_time": "8:54:58"} +{"current_steps": 903, "total_steps": 1647, "loss": 0.251, "lr": 5.0423961539467754e-06, "epoch": 0.5485193621867882, "percentage": 54.83, "elapsed_time": "10:48:22", "remaining_time": "8:54:12"} +{"current_steps": 904, "total_steps": 1647, "loss": 0.275, "lr": 5.031797282162007e-06, "epoch": 0.5491268033409263, "percentage": 54.89, "elapsed_time": "10:49:06", "remaining_time": "8:53:29"} +{"current_steps": 905, "total_steps": 1647, "loss": 0.3109, "lr": 5.021198267490088e-06, "epoch": 0.5497342444950646, "percentage": 54.95, "elapsed_time": "10:49:49", "remaining_time": "8:52:46"} +{"current_steps": 906, "total_steps": 1647, "loss": 0.2744, "lr": 5.010599157559713e-06, "epoch": 0.5503416856492027, "percentage": 55.01, "elapsed_time": "10:50:31", "remaining_time": "8:52:02"} +{"current_steps": 907, "total_steps": 1647, "loss": 0.2833, "lr": 5e-06, "epoch": 0.550949126803341, "percentage": 55.07, "elapsed_time": "10:51:13", "remaining_time": "8:51:19"} +{"current_steps": 908, "total_steps": 1647, "loss": 0.2738, "lr": 4.98940084244029e-06, "epoch": 0.5515565679574791, "percentage": 55.13, "elapsed_time": "10:51:56", "remaining_time": "8:50:36"} +{"current_steps": 909, "total_steps": 1647, "loss": 0.2902, "lr": 4.9788017325099134e-06, "epoch": 0.5521640091116173, "percentage": 55.19, "elapsed_time": "10:52:40", "remaining_time": "8:49:53"} +{"current_steps": 910, "total_steps": 1647, "loss": 0.2448, "lr": 4.968202717837996e-06, "epoch": 0.5527714502657555, "percentage": 55.25, "elapsed_time": "10:53:25", "remaining_time": "8:49:11"} +{"current_steps": 911, "total_steps": 1647, "loss": 0.2777, "lr": 4.957603846053225e-06, "epoch": 0.5533788914198937, "percentage": 55.31, "elapsed_time": "10:54:06", "remaining_time": "8:48:27"} +{"current_steps": 912, "total_steps": 1647, "loss": 0.252, "lr": 4.947005164783661e-06, "epoch": 0.5539863325740318, "percentage": 55.37, "elapsed_time": "10:54:46", "remaining_time": "8:47:41"} +{"current_steps": 913, "total_steps": 1647, "loss": 0.262, "lr": 4.936406721656492e-06, "epoch": 0.5545937737281701, "percentage": 55.43, "elapsed_time": "10:55:27", "remaining_time": "8:46:57"} +{"current_steps": 914, "total_steps": 1647, "loss": 0.2809, "lr": 4.925808564297847e-06, "epoch": 0.5552012148823082, "percentage": 55.49, "elapsed_time": "10:56:10", "remaining_time": "8:46:14"} +{"current_steps": 915, "total_steps": 1647, "loss": 0.2727, "lr": 4.915210740332564e-06, "epoch": 0.5558086560364465, "percentage": 55.56, "elapsed_time": "10:56:55", "remaining_time": "8:45:32"} +{"current_steps": 916, "total_steps": 1647, "loss": 0.3337, "lr": 4.9046132973839895e-06, "epoch": 0.5564160971905847, "percentage": 55.62, "elapsed_time": "10:57:39", "remaining_time": "8:44:50"} +{"current_steps": 917, "total_steps": 1647, "loss": 0.2598, "lr": 4.894016283073753e-06, "epoch": 0.5570235383447228, "percentage": 55.68, "elapsed_time": "10:58:23", "remaining_time": "8:44:07"} +{"current_steps": 918, "total_steps": 1647, "loss": 0.2483, "lr": 4.883419745021554e-06, "epoch": 0.5576309794988611, "percentage": 55.74, "elapsed_time": "10:59:04", "remaining_time": "8:43:23"} +{"current_steps": 919, "total_steps": 1647, "loss": 0.255, "lr": 4.872823730844966e-06, "epoch": 0.5582384206529992, "percentage": 55.8, "elapsed_time": "10:59:45", "remaining_time": "8:42:37"} +{"current_steps": 920, "total_steps": 1647, "loss": 0.2804, "lr": 4.862228288159191e-06, "epoch": 0.5588458618071375, "percentage": 55.86, "elapsed_time": "11:00:29", "remaining_time": "8:41:55"} +{"current_steps": 921, "total_steps": 1647, "loss": 0.2487, "lr": 4.851633464576876e-06, "epoch": 0.5594533029612756, "percentage": 55.92, "elapsed_time": "11:01:09", "remaining_time": "8:41:10"} +{"current_steps": 922, "total_steps": 1647, "loss": 0.2567, "lr": 4.841039307707878e-06, "epoch": 0.5600607441154138, "percentage": 55.98, "elapsed_time": "11:01:49", "remaining_time": "8:40:25"} +{"current_steps": 923, "total_steps": 1647, "loss": 0.2736, "lr": 4.8304458651590645e-06, "epoch": 0.560668185269552, "percentage": 56.04, "elapsed_time": "11:02:30", "remaining_time": "8:39:40"} +{"current_steps": 924, "total_steps": 1647, "loss": 0.2638, "lr": 4.819853184534085e-06, "epoch": 0.5612756264236902, "percentage": 56.1, "elapsed_time": "11:03:13", "remaining_time": "8:38:57"} +{"current_steps": 925, "total_steps": 1647, "loss": 0.2582, "lr": 4.809261313433176e-06, "epoch": 0.5618830675778284, "percentage": 56.16, "elapsed_time": "11:03:58", "remaining_time": "8:38:15"} +{"current_steps": 926, "total_steps": 1647, "loss": 0.2606, "lr": 4.798670299452926e-06, "epoch": 0.5624905087319666, "percentage": 56.22, "elapsed_time": "11:04:41", "remaining_time": "8:37:32"} +{"current_steps": 927, "total_steps": 1647, "loss": 0.2725, "lr": 4.788080190186075e-06, "epoch": 0.5630979498861047, "percentage": 56.28, "elapsed_time": "11:05:23", "remaining_time": "8:36:48"} +{"current_steps": 928, "total_steps": 1647, "loss": 0.2889, "lr": 4.7774910332213005e-06, "epoch": 0.563705391040243, "percentage": 56.34, "elapsed_time": "11:06:06", "remaining_time": "8:36:05"} +{"current_steps": 929, "total_steps": 1647, "loss": 0.2536, "lr": 4.766902876142996e-06, "epoch": 0.5643128321943812, "percentage": 56.41, "elapsed_time": "11:06:50", "remaining_time": "8:35:22"} +{"current_steps": 930, "total_steps": 1647, "loss": 0.2748, "lr": 4.756315766531069e-06, "epoch": 0.5649202733485194, "percentage": 56.47, "elapsed_time": "11:07:31", "remaining_time": "8:34:38"} +{"current_steps": 931, "total_steps": 1647, "loss": 0.2453, "lr": 4.74572975196071e-06, "epoch": 0.5655277145026576, "percentage": 56.53, "elapsed_time": "11:08:16", "remaining_time": "8:33:56"} +{"current_steps": 932, "total_steps": 1647, "loss": 0.2834, "lr": 4.735144880002199e-06, "epoch": 0.5661351556567957, "percentage": 56.59, "elapsed_time": "11:09:02", "remaining_time": "8:33:16"} +{"current_steps": 933, "total_steps": 1647, "loss": 0.2525, "lr": 4.724561198220672e-06, "epoch": 0.566742596810934, "percentage": 56.65, "elapsed_time": "11:09:52", "remaining_time": "8:32:38"} +{"current_steps": 934, "total_steps": 1647, "loss": 0.2698, "lr": 4.713978754175926e-06, "epoch": 0.5673500379650721, "percentage": 56.71, "elapsed_time": "11:10:36", "remaining_time": "8:31:56"} +{"current_steps": 935, "total_steps": 1647, "loss": 0.2674, "lr": 4.703397595422188e-06, "epoch": 0.5679574791192104, "percentage": 56.77, "elapsed_time": "11:11:21", "remaining_time": "8:31:13"} +{"current_steps": 936, "total_steps": 1647, "loss": 0.2684, "lr": 4.692817769507912e-06, "epoch": 0.5685649202733485, "percentage": 56.83, "elapsed_time": "11:12:04", "remaining_time": "8:30:31"} +{"current_steps": 937, "total_steps": 1647, "loss": 0.2558, "lr": 4.682239323975566e-06, "epoch": 0.5691723614274867, "percentage": 56.89, "elapsed_time": "11:12:50", "remaining_time": "8:29:50"} +{"current_steps": 938, "total_steps": 1647, "loss": 0.2935, "lr": 4.671662306361409e-06, "epoch": 0.5697798025816249, "percentage": 56.95, "elapsed_time": "11:13:34", "remaining_time": "8:29:07"} +{"current_steps": 939, "total_steps": 1647, "loss": 0.294, "lr": 4.66108676419529e-06, "epoch": 0.5703872437357631, "percentage": 57.01, "elapsed_time": "11:14:16", "remaining_time": "8:28:23"} +{"current_steps": 940, "total_steps": 1647, "loss": 0.2632, "lr": 4.6505127450004216e-06, "epoch": 0.5709946848899012, "percentage": 57.07, "elapsed_time": "11:14:59", "remaining_time": "8:27:41"} +{"current_steps": 941, "total_steps": 1647, "loss": 0.2688, "lr": 4.6399402962931775e-06, "epoch": 0.5716021260440395, "percentage": 57.13, "elapsed_time": "11:15:42", "remaining_time": "8:26:57"} +{"current_steps": 942, "total_steps": 1647, "loss": 0.2712, "lr": 4.62936946558287e-06, "epoch": 0.5722095671981777, "percentage": 57.19, "elapsed_time": "11:16:25", "remaining_time": "8:26:14"} +{"current_steps": 943, "total_steps": 1647, "loss": 0.2545, "lr": 4.618800300371543e-06, "epoch": 0.5728170083523159, "percentage": 57.26, "elapsed_time": "11:17:06", "remaining_time": "8:25:29"} +{"current_steps": 944, "total_steps": 1647, "loss": 0.2412, "lr": 4.608232848153757e-06, "epoch": 0.5734244495064541, "percentage": 57.32, "elapsed_time": "11:17:49", "remaining_time": "8:24:46"} +{"current_steps": 945, "total_steps": 1647, "loss": 0.2893, "lr": 4.597667156416371e-06, "epoch": 0.5740318906605922, "percentage": 57.38, "elapsed_time": "11:18:32", "remaining_time": "8:24:03"} +{"current_steps": 946, "total_steps": 1647, "loss": 0.272, "lr": 4.587103272638339e-06, "epoch": 0.5746393318147305, "percentage": 57.44, "elapsed_time": "11:19:15", "remaining_time": "8:23:20"} +{"current_steps": 947, "total_steps": 1647, "loss": 0.2735, "lr": 4.576541244290484e-06, "epoch": 0.5752467729688686, "percentage": 57.5, "elapsed_time": "11:19:59", "remaining_time": "8:22:38"} +{"current_steps": 948, "total_steps": 1647, "loss": 0.2747, "lr": 4.565981118835299e-06, "epoch": 0.5758542141230069, "percentage": 57.56, "elapsed_time": "11:20:38", "remaining_time": "8:21:51"} +{"current_steps": 949, "total_steps": 1647, "loss": 0.318, "lr": 4.555422943726715e-06, "epoch": 0.576461655277145, "percentage": 57.62, "elapsed_time": "11:21:18", "remaining_time": "8:21:06"} +{"current_steps": 950, "total_steps": 1647, "loss": 0.2991, "lr": 4.5448667664099125e-06, "epoch": 0.5770690964312832, "percentage": 57.68, "elapsed_time": "11:22:06", "remaining_time": "8:20:27"} +{"current_steps": 951, "total_steps": 1647, "loss": 0.2748, "lr": 4.534312634321081e-06, "epoch": 0.5776765375854214, "percentage": 57.74, "elapsed_time": "11:22:50", "remaining_time": "8:19:44"} +{"current_steps": 952, "total_steps": 1647, "loss": 0.246, "lr": 4.523760594887228e-06, "epoch": 0.5782839787395596, "percentage": 57.8, "elapsed_time": "11:23:36", "remaining_time": "8:19:03"} +{"current_steps": 953, "total_steps": 1647, "loss": 0.2521, "lr": 4.513210695525954e-06, "epoch": 0.5788914198936977, "percentage": 57.86, "elapsed_time": "11:24:18", "remaining_time": "8:18:19"} +{"current_steps": 954, "total_steps": 1647, "loss": 0.2965, "lr": 4.5026629836452445e-06, "epoch": 0.579498861047836, "percentage": 57.92, "elapsed_time": "11:25:02", "remaining_time": "8:17:37"} +{"current_steps": 955, "total_steps": 1647, "loss": 0.2487, "lr": 4.492117506643256e-06, "epoch": 0.5801063022019742, "percentage": 57.98, "elapsed_time": "11:25:43", "remaining_time": "8:16:52"} +{"current_steps": 956, "total_steps": 1647, "loss": 0.2714, "lr": 4.481574311908096e-06, "epoch": 0.5807137433561124, "percentage": 58.04, "elapsed_time": "11:26:23", "remaining_time": "8:16:07"} +{"current_steps": 957, "total_steps": 1647, "loss": 0.2645, "lr": 4.471033446817623e-06, "epoch": 0.5813211845102506, "percentage": 58.11, "elapsed_time": "11:27:06", "remaining_time": "8:15:24"} +{"current_steps": 958, "total_steps": 1647, "loss": 0.2827, "lr": 4.460494958739223e-06, "epoch": 0.5819286256643887, "percentage": 58.17, "elapsed_time": "11:27:47", "remaining_time": "8:14:40"} +{"current_steps": 959, "total_steps": 1647, "loss": 0.2889, "lr": 4.449958895029604e-06, "epoch": 0.582536066818527, "percentage": 58.23, "elapsed_time": "11:28:29", "remaining_time": "8:13:55"} +{"current_steps": 960, "total_steps": 1647, "loss": 0.2438, "lr": 4.439425303034576e-06, "epoch": 0.5831435079726651, "percentage": 58.29, "elapsed_time": "11:29:11", "remaining_time": "8:13:11"} +{"current_steps": 961, "total_steps": 1647, "loss": 0.2567, "lr": 4.428894230088842e-06, "epoch": 0.5837509491268034, "percentage": 58.35, "elapsed_time": "11:29:53", "remaining_time": "8:12:28"} +{"current_steps": 962, "total_steps": 1647, "loss": 0.2777, "lr": 4.418365723515791e-06, "epoch": 0.5843583902809415, "percentage": 58.41, "elapsed_time": "11:30:35", "remaining_time": "8:11:44"} +{"current_steps": 963, "total_steps": 1647, "loss": 0.2723, "lr": 4.407839830627269e-06, "epoch": 0.5849658314350797, "percentage": 58.47, "elapsed_time": "11:31:18", "remaining_time": "8:11:01"} +{"current_steps": 964, "total_steps": 1647, "loss": 0.261, "lr": 4.397316598723385e-06, "epoch": 0.5855732725892179, "percentage": 58.53, "elapsed_time": "11:31:59", "remaining_time": "8:10:17"} +{"current_steps": 965, "total_steps": 1647, "loss": 0.2785, "lr": 4.38679607509229e-06, "epoch": 0.5861807137433561, "percentage": 58.59, "elapsed_time": "11:32:46", "remaining_time": "8:09:36"} +{"current_steps": 966, "total_steps": 1647, "loss": 0.3026, "lr": 4.376278307009962e-06, "epoch": 0.5867881548974943, "percentage": 58.65, "elapsed_time": "11:33:29", "remaining_time": "8:08:53"} +{"current_steps": 967, "total_steps": 1647, "loss": 0.2583, "lr": 4.365763341739996e-06, "epoch": 0.5873955960516325, "percentage": 58.71, "elapsed_time": "11:34:09", "remaining_time": "8:08:07"} +{"current_steps": 968, "total_steps": 1647, "loss": 0.2825, "lr": 4.355251226533396e-06, "epoch": 0.5880030372057707, "percentage": 58.77, "elapsed_time": "11:34:54", "remaining_time": "8:07:26"} +{"current_steps": 969, "total_steps": 1647, "loss": 0.2525, "lr": 4.344742008628356e-06, "epoch": 0.5886104783599089, "percentage": 58.83, "elapsed_time": "11:35:41", "remaining_time": "8:06:45"} +{"current_steps": 970, "total_steps": 1647, "loss": 0.2289, "lr": 4.334235735250047e-06, "epoch": 0.5892179195140471, "percentage": 58.89, "elapsed_time": "11:36:25", "remaining_time": "8:06:03"} +{"current_steps": 971, "total_steps": 1647, "loss": 0.2478, "lr": 4.3237324536104165e-06, "epoch": 0.5898253606681853, "percentage": 58.96, "elapsed_time": "11:37:08", "remaining_time": "8:05:20"} +{"current_steps": 972, "total_steps": 1647, "loss": 0.2898, "lr": 4.313232210907959e-06, "epoch": 0.5904328018223235, "percentage": 59.02, "elapsed_time": "11:37:48", "remaining_time": "8:04:35"} +{"current_steps": 973, "total_steps": 1647, "loss": 0.2682, "lr": 4.302735054327523e-06, "epoch": 0.5910402429764616, "percentage": 59.08, "elapsed_time": "11:38:30", "remaining_time": "8:03:51"} +{"current_steps": 974, "total_steps": 1647, "loss": 0.2485, "lr": 4.292241031040077e-06, "epoch": 0.5916476841305999, "percentage": 59.14, "elapsed_time": "11:39:13", "remaining_time": "8:03:08"} +{"current_steps": 975, "total_steps": 1647, "loss": 0.249, "lr": 4.28175018820252e-06, "epoch": 0.592255125284738, "percentage": 59.2, "elapsed_time": "11:39:58", "remaining_time": "8:02:26"} +{"current_steps": 976, "total_steps": 1647, "loss": 0.2877, "lr": 4.271262572957453e-06, "epoch": 0.5928625664388762, "percentage": 59.26, "elapsed_time": "11:40:38", "remaining_time": "8:01:41"} +{"current_steps": 977, "total_steps": 1647, "loss": 0.3063, "lr": 4.2607782324329776e-06, "epoch": 0.5934700075930144, "percentage": 59.32, "elapsed_time": "11:41:19", "remaining_time": "8:00:57"} +{"current_steps": 978, "total_steps": 1647, "loss": 0.2405, "lr": 4.250297213742473e-06, "epoch": 0.5940774487471526, "percentage": 59.38, "elapsed_time": "11:42:06", "remaining_time": "8:00:16"} +{"current_steps": 979, "total_steps": 1647, "loss": 0.304, "lr": 4.239819563984397e-06, "epoch": 0.5946848899012908, "percentage": 59.44, "elapsed_time": "11:42:48", "remaining_time": "7:59:33"} +{"current_steps": 980, "total_steps": 1647, "loss": 0.2592, "lr": 4.229345330242067e-06, "epoch": 0.595292331055429, "percentage": 59.5, "elapsed_time": "11:43:36", "remaining_time": "7:58:52"} +{"current_steps": 981, "total_steps": 1647, "loss": 0.2656, "lr": 4.21887455958345e-06, "epoch": 0.5958997722095672, "percentage": 59.56, "elapsed_time": "11:44:19", "remaining_time": "7:58:10"} +{"current_steps": 982, "total_steps": 1647, "loss": 0.2793, "lr": 4.2084072990609505e-06, "epoch": 0.5965072133637054, "percentage": 59.62, "elapsed_time": "11:45:06", "remaining_time": "7:57:29"} +{"current_steps": 983, "total_steps": 1647, "loss": 0.2836, "lr": 4.1979435957111984e-06, "epoch": 0.5971146545178436, "percentage": 59.68, "elapsed_time": "11:45:54", "remaining_time": "7:56:49"} +{"current_steps": 984, "total_steps": 1647, "loss": 0.2583, "lr": 4.187483496554844e-06, "epoch": 0.5977220956719818, "percentage": 59.74, "elapsed_time": "11:46:35", "remaining_time": "7:56:05"} +{"current_steps": 985, "total_steps": 1647, "loss": 0.2675, "lr": 4.17702704859633e-06, "epoch": 0.59832953682612, "percentage": 59.81, "elapsed_time": "11:47:17", "remaining_time": "7:55:21"} +{"current_steps": 986, "total_steps": 1647, "loss": 0.2676, "lr": 4.166574298823707e-06, "epoch": 0.5989369779802581, "percentage": 59.87, "elapsed_time": "11:47:59", "remaining_time": "7:54:37"} +{"current_steps": 987, "total_steps": 1647, "loss": 0.242, "lr": 4.156125294208396e-06, "epoch": 0.5995444191343964, "percentage": 59.93, "elapsed_time": "11:48:41", "remaining_time": "7:53:53"} +{"current_steps": 988, "total_steps": 1647, "loss": 0.2516, "lr": 4.145680081704989e-06, "epoch": 0.6001518602885345, "percentage": 59.99, "elapsed_time": "11:49:27", "remaining_time": "7:53:12"} +{"current_steps": 989, "total_steps": 1647, "loss": 0.2758, "lr": 4.135238708251045e-06, "epoch": 0.6007593014426728, "percentage": 60.05, "elapsed_time": "11:50:15", "remaining_time": "7:52:32"} +{"current_steps": 990, "total_steps": 1647, "loss": 0.2565, "lr": 4.1248012207668635e-06, "epoch": 0.6013667425968109, "percentage": 60.11, "elapsed_time": "11:50:57", "remaining_time": "7:51:49"} +{"current_steps": 991, "total_steps": 1647, "loss": 0.2828, "lr": 4.1143676661552876e-06, "epoch": 0.6019741837509491, "percentage": 60.17, "elapsed_time": "11:51:40", "remaining_time": "7:51:05"} +{"current_steps": 992, "total_steps": 1647, "loss": 0.2374, "lr": 4.103938091301479e-06, "epoch": 0.6025816249050873, "percentage": 60.23, "elapsed_time": "11:52:20", "remaining_time": "7:50:20"} +{"current_steps": 993, "total_steps": 1647, "loss": 0.256, "lr": 4.093512543072729e-06, "epoch": 0.6031890660592255, "percentage": 60.29, "elapsed_time": "11:52:59", "remaining_time": "7:49:35"} +{"current_steps": 994, "total_steps": 1647, "loss": 0.2732, "lr": 4.08309106831822e-06, "epoch": 0.6037965072133638, "percentage": 60.35, "elapsed_time": "11:53:41", "remaining_time": "7:48:51"} +{"current_steps": 995, "total_steps": 1647, "loss": 0.2808, "lr": 4.07267371386884e-06, "epoch": 0.6044039483675019, "percentage": 60.41, "elapsed_time": "11:54:22", "remaining_time": "7:48:06"} +{"current_steps": 996, "total_steps": 1647, "loss": 0.2936, "lr": 4.062260526536955e-06, "epoch": 0.6050113895216401, "percentage": 60.47, "elapsed_time": "11:55:07", "remaining_time": "7:47:24"} +{"current_steps": 997, "total_steps": 1647, "loss": 0.2797, "lr": 4.051851553116208e-06, "epoch": 0.6056188306757783, "percentage": 60.53, "elapsed_time": "11:55:48", "remaining_time": "7:46:40"} +{"current_steps": 998, "total_steps": 1647, "loss": 0.2847, "lr": 4.041446840381309e-06, "epoch": 0.6062262718299165, "percentage": 60.6, "elapsed_time": "11:56:27", "remaining_time": "7:45:54"} +{"current_steps": 999, "total_steps": 1647, "loss": 0.2803, "lr": 4.0310464350878145e-06, "epoch": 0.6068337129840546, "percentage": 60.66, "elapsed_time": "11:57:07", "remaining_time": "7:45:09"} +{"current_steps": 1000, "total_steps": 1647, "loss": 0.2762, "lr": 4.0206503839719335e-06, "epoch": 0.6074411541381929, "percentage": 60.72, "elapsed_time": "11:57:46", "remaining_time": "7:44:24"} +{"current_steps": 1001, "total_steps": 1647, "loss": 0.2813, "lr": 4.0102587337503e-06, "epoch": 0.608048595292331, "percentage": 60.78, "elapsed_time": "11:59:17", "remaining_time": "7:44:11"} +{"current_steps": 1002, "total_steps": 1647, "loss": 0.2791, "lr": 3.999871531119779e-06, "epoch": 0.6086560364464693, "percentage": 60.84, "elapsed_time": "11:59:57", "remaining_time": "7:43:26"} +{"current_steps": 1003, "total_steps": 1647, "loss": 0.2529, "lr": 3.989488822757244e-06, "epoch": 0.6092634776006074, "percentage": 60.9, "elapsed_time": "12:00:41", "remaining_time": "7:42:43"} +{"current_steps": 1004, "total_steps": 1647, "loss": 0.2681, "lr": 3.9791106553193746e-06, "epoch": 0.6098709187547456, "percentage": 60.96, "elapsed_time": "12:01:26", "remaining_time": "7:42:02"} +{"current_steps": 1005, "total_steps": 1647, "loss": 0.2774, "lr": 3.968737075442449e-06, "epoch": 0.6104783599088838, "percentage": 61.02, "elapsed_time": "12:02:06", "remaining_time": "7:41:17"} +{"current_steps": 1006, "total_steps": 1647, "loss": 0.2738, "lr": 3.9583681297421194e-06, "epoch": 0.611085801063022, "percentage": 61.08, "elapsed_time": "12:02:52", "remaining_time": "7:40:35"} +{"current_steps": 1007, "total_steps": 1647, "loss": 0.258, "lr": 3.9480038648132285e-06, "epoch": 0.6116932422171603, "percentage": 61.14, "elapsed_time": "12:03:33", "remaining_time": "7:39:51"} +{"current_steps": 1008, "total_steps": 1647, "loss": 0.256, "lr": 3.937644327229572e-06, "epoch": 0.6123006833712984, "percentage": 61.2, "elapsed_time": "12:04:17", "remaining_time": "7:39:09"} +{"current_steps": 1009, "total_steps": 1647, "loss": 0.2139, "lr": 3.927289563543709e-06, "epoch": 0.6129081245254366, "percentage": 61.26, "elapsed_time": "12:05:04", "remaining_time": "7:38:28"} +{"current_steps": 1010, "total_steps": 1647, "loss": 0.3042, "lr": 3.916939620286743e-06, "epoch": 0.6135155656795748, "percentage": 61.32, "elapsed_time": "12:05:44", "remaining_time": "7:37:43"} +{"current_steps": 1011, "total_steps": 1647, "loss": 0.2461, "lr": 3.906594543968122e-06, "epoch": 0.614123006833713, "percentage": 61.38, "elapsed_time": "12:06:27", "remaining_time": "7:36:59"} +{"current_steps": 1012, "total_steps": 1647, "loss": 0.2135, "lr": 3.896254381075416e-06, "epoch": 0.6147304479878511, "percentage": 61.45, "elapsed_time": "12:07:13", "remaining_time": "7:36:18"} +{"current_steps": 1013, "total_steps": 1647, "loss": 0.2656, "lr": 3.885919178074116e-06, "epoch": 0.6153378891419894, "percentage": 61.51, "elapsed_time": "12:08:00", "remaining_time": "7:35:38"} +{"current_steps": 1014, "total_steps": 1647, "loss": 0.2696, "lr": 3.875588981407433e-06, "epoch": 0.6159453302961275, "percentage": 61.57, "elapsed_time": "12:08:43", "remaining_time": "7:34:54"} +{"current_steps": 1015, "total_steps": 1647, "loss": 0.2807, "lr": 3.865263837496072e-06, "epoch": 0.6165527714502658, "percentage": 61.63, "elapsed_time": "12:09:26", "remaining_time": "7:34:11"} +{"current_steps": 1016, "total_steps": 1647, "loss": 0.2724, "lr": 3.854943792738037e-06, "epoch": 0.6171602126044039, "percentage": 61.69, "elapsed_time": "12:10:12", "remaining_time": "7:33:30"} +{"current_steps": 1017, "total_steps": 1647, "loss": 0.2849, "lr": 3.844628893508417e-06, "epoch": 0.6177676537585421, "percentage": 61.75, "elapsed_time": "12:10:55", "remaining_time": "7:32:47"} +{"current_steps": 1018, "total_steps": 1647, "loss": 0.2807, "lr": 3.834319186159179e-06, "epoch": 0.6183750949126803, "percentage": 61.81, "elapsed_time": "12:11:37", "remaining_time": "7:32:03"} +{"current_steps": 1019, "total_steps": 1647, "loss": 0.2674, "lr": 3.8240147170189575e-06, "epoch": 0.6189825360668185, "percentage": 61.87, "elapsed_time": "12:12:19", "remaining_time": "7:31:19"} +{"current_steps": 1020, "total_steps": 1647, "loss": 0.2801, "lr": 3.8137155323928526e-06, "epoch": 0.6195899772209568, "percentage": 61.93, "elapsed_time": "12:13:02", "remaining_time": "7:30:36"} +{"current_steps": 1021, "total_steps": 1647, "loss": 0.2464, "lr": 3.803421678562213e-06, "epoch": 0.6201974183750949, "percentage": 61.99, "elapsed_time": "12:13:46", "remaining_time": "7:29:53"} +{"current_steps": 1022, "total_steps": 1647, "loss": 0.2219, "lr": 3.7931332017844302e-06, "epoch": 0.6208048595292331, "percentage": 62.05, "elapsed_time": "12:14:28", "remaining_time": "7:29:10"} +{"current_steps": 1023, "total_steps": 1647, "loss": 0.2841, "lr": 3.7828501482927416e-06, "epoch": 0.6214123006833713, "percentage": 62.11, "elapsed_time": "12:15:14", "remaining_time": "7:28:28"} +{"current_steps": 1024, "total_steps": 1647, "loss": 0.2977, "lr": 3.7725725642960047e-06, "epoch": 0.6220197418375095, "percentage": 62.17, "elapsed_time": "12:15:57", "remaining_time": "7:27:45"} +{"current_steps": 1025, "total_steps": 1647, "loss": 0.2373, "lr": 3.7623004959785066e-06, "epoch": 0.6226271829916477, "percentage": 62.23, "elapsed_time": "12:16:43", "remaining_time": "7:27:03"} +{"current_steps": 1026, "total_steps": 1647, "loss": 0.2786, "lr": 3.752033989499742e-06, "epoch": 0.6232346241457859, "percentage": 62.3, "elapsed_time": "12:17:26", "remaining_time": "7:26:20"} +{"current_steps": 1027, "total_steps": 1647, "loss": 0.231, "lr": 3.7417730909942184e-06, "epoch": 0.623842065299924, "percentage": 62.36, "elapsed_time": "12:18:11", "remaining_time": "7:25:38"} +{"current_steps": 1028, "total_steps": 1647, "loss": 0.2623, "lr": 3.7315178465712364e-06, "epoch": 0.6244495064540623, "percentage": 62.42, "elapsed_time": "12:18:55", "remaining_time": "7:24:56"} +{"current_steps": 1029, "total_steps": 1647, "loss": 0.262, "lr": 3.721268302314698e-06, "epoch": 0.6250569476082004, "percentage": 62.48, "elapsed_time": "12:19:37", "remaining_time": "7:24:12"} +{"current_steps": 1030, "total_steps": 1647, "loss": 0.2576, "lr": 3.7110245042828786e-06, "epoch": 0.6256643887623387, "percentage": 62.54, "elapsed_time": "12:20:23", "remaining_time": "7:23:31"} +{"current_steps": 1031, "total_steps": 1647, "loss": 0.2911, "lr": 3.70078649850824e-06, "epoch": 0.6262718299164769, "percentage": 62.6, "elapsed_time": "12:21:06", "remaining_time": "7:22:47"} +{"current_steps": 1032, "total_steps": 1647, "loss": 0.2699, "lr": 3.690554330997215e-06, "epoch": 0.626879271070615, "percentage": 62.66, "elapsed_time": "12:21:51", "remaining_time": "7:22:05"} +{"current_steps": 1033, "total_steps": 1647, "loss": 0.2449, "lr": 3.6803280477299975e-06, "epoch": 0.6274867122247533, "percentage": 62.72, "elapsed_time": "12:22:36", "remaining_time": "7:21:23"} +{"current_steps": 1034, "total_steps": 1647, "loss": 0.3003, "lr": 3.670107694660343e-06, "epoch": 0.6280941533788914, "percentage": 62.78, "elapsed_time": "12:23:20", "remaining_time": "7:20:41"} +{"current_steps": 1035, "total_steps": 1647, "loss": 0.2712, "lr": 3.659893317715355e-06, "epoch": 0.6287015945330297, "percentage": 62.84, "elapsed_time": "12:24:07", "remaining_time": "7:20:00"} +{"current_steps": 1036, "total_steps": 1647, "loss": 0.2838, "lr": 3.6496849627952875e-06, "epoch": 0.6293090356871678, "percentage": 62.9, "elapsed_time": "12:24:52", "remaining_time": "7:19:18"} +{"current_steps": 1037, "total_steps": 1647, "loss": 0.2729, "lr": 3.639482675773324e-06, "epoch": 0.629916476841306, "percentage": 62.96, "elapsed_time": "12:25:36", "remaining_time": "7:18:35"} +{"current_steps": 1038, "total_steps": 1647, "loss": 0.2541, "lr": 3.6292865024953945e-06, "epoch": 0.6305239179954442, "percentage": 63.02, "elapsed_time": "12:26:19", "remaining_time": "7:17:52"} +{"current_steps": 1039, "total_steps": 1647, "loss": 0.3177, "lr": 3.6190964887799418e-06, "epoch": 0.6311313591495824, "percentage": 63.08, "elapsed_time": "12:27:02", "remaining_time": "7:17:08"} +{"current_steps": 1040, "total_steps": 1647, "loss": 0.2253, "lr": 3.6089126804177373e-06, "epoch": 0.6317388003037205, "percentage": 63.15, "elapsed_time": "12:27:48", "remaining_time": "7:16:27"} +{"current_steps": 1041, "total_steps": 1647, "loss": 0.2484, "lr": 3.5987351231716665e-06, "epoch": 0.6323462414578588, "percentage": 63.21, "elapsed_time": "12:28:30", "remaining_time": "7:15:44"} +{"current_steps": 1042, "total_steps": 1647, "loss": 0.2747, "lr": 3.5885638627765228e-06, "epoch": 0.6329536826119969, "percentage": 63.27, "elapsed_time": "12:29:15", "remaining_time": "7:15:01"} +{"current_steps": 1043, "total_steps": 1647, "loss": 0.2631, "lr": 3.5783989449388063e-06, "epoch": 0.6335611237661352, "percentage": 63.33, "elapsed_time": "12:29:59", "remaining_time": "7:14:19"} +{"current_steps": 1044, "total_steps": 1647, "loss": 0.2438, "lr": 3.568240415336509e-06, "epoch": 0.6341685649202734, "percentage": 63.39, "elapsed_time": "12:30:47", "remaining_time": "7:13:38"} +{"current_steps": 1045, "total_steps": 1647, "loss": 0.2784, "lr": 3.5580883196189265e-06, "epoch": 0.6347760060744115, "percentage": 63.45, "elapsed_time": "12:31:27", "remaining_time": "7:12:53"} +{"current_steps": 1046, "total_steps": 1647, "loss": 0.2494, "lr": 3.547942703406433e-06, "epoch": 0.6353834472285498, "percentage": 63.51, "elapsed_time": "12:32:10", "remaining_time": "7:12:10"} +{"current_steps": 1047, "total_steps": 1647, "loss": 0.2277, "lr": 3.5378036122902907e-06, "epoch": 0.6359908883826879, "percentage": 63.57, "elapsed_time": "12:32:51", "remaining_time": "7:11:26"} +{"current_steps": 1048, "total_steps": 1647, "loss": 0.2479, "lr": 3.52767109183244e-06, "epoch": 0.6365983295368262, "percentage": 63.63, "elapsed_time": "12:33:33", "remaining_time": "7:10:42"} +{"current_steps": 1049, "total_steps": 1647, "loss": 0.3218, "lr": 3.5175451875652906e-06, "epoch": 0.6372057706909643, "percentage": 63.69, "elapsed_time": "12:34:18", "remaining_time": "7:10:00"} +{"current_steps": 1050, "total_steps": 1647, "loss": 0.2782, "lr": 3.507425944991529e-06, "epoch": 0.6378132118451025, "percentage": 63.75, "elapsed_time": "12:35:02", "remaining_time": "7:09:17"} +{"current_steps": 1051, "total_steps": 1647, "loss": 0.2587, "lr": 3.4973134095838943e-06, "epoch": 0.6384206529992407, "percentage": 63.81, "elapsed_time": "12:35:47", "remaining_time": "7:08:35"} +{"current_steps": 1052, "total_steps": 1647, "loss": 0.2541, "lr": 3.4872076267850015e-06, "epoch": 0.6390280941533789, "percentage": 63.87, "elapsed_time": "12:36:30", "remaining_time": "7:07:52"} +{"current_steps": 1053, "total_steps": 1647, "loss": 0.2664, "lr": 3.4771086420071053e-06, "epoch": 0.639635535307517, "percentage": 63.93, "elapsed_time": "12:37:13", "remaining_time": "7:07:09"} +{"current_steps": 1054, "total_steps": 1647, "loss": 0.2799, "lr": 3.4670165006319236e-06, "epoch": 0.6402429764616553, "percentage": 64.0, "elapsed_time": "12:37:52", "remaining_time": "7:06:23"} +{"current_steps": 1055, "total_steps": 1647, "loss": 0.2829, "lr": 3.4569312480104157e-06, "epoch": 0.6408504176157934, "percentage": 64.06, "elapsed_time": "12:38:33", "remaining_time": "7:05:39"} +{"current_steps": 1056, "total_steps": 1647, "loss": 0.2574, "lr": 3.4468529294625895e-06, "epoch": 0.6414578587699317, "percentage": 64.12, "elapsed_time": "12:39:16", "remaining_time": "7:04:56"} +{"current_steps": 1057, "total_steps": 1647, "loss": 0.2562, "lr": 3.4367815902772917e-06, "epoch": 0.6420652999240699, "percentage": 64.18, "elapsed_time": "12:39:56", "remaining_time": "7:04:11"} +{"current_steps": 1058, "total_steps": 1647, "loss": 0.2635, "lr": 3.4267172757120005e-06, "epoch": 0.642672741078208, "percentage": 64.24, "elapsed_time": "12:40:36", "remaining_time": "7:03:26"} +{"current_steps": 1059, "total_steps": 1647, "loss": 0.2631, "lr": 3.416660030992639e-06, "epoch": 0.6432801822323463, "percentage": 64.3, "elapsed_time": "12:41:18", "remaining_time": "7:02:42"} +{"current_steps": 1060, "total_steps": 1647, "loss": 0.2716, "lr": 3.406609901313349e-06, "epoch": 0.6438876233864844, "percentage": 64.36, "elapsed_time": "12:42:01", "remaining_time": "7:01:59"} +{"current_steps": 1061, "total_steps": 1647, "loss": 0.2633, "lr": 3.396566931836308e-06, "epoch": 0.6444950645406227, "percentage": 64.42, "elapsed_time": "12:42:44", "remaining_time": "7:01:15"} +{"current_steps": 1062, "total_steps": 1647, "loss": 0.2551, "lr": 3.386531167691512e-06, "epoch": 0.6451025056947608, "percentage": 64.48, "elapsed_time": "12:43:26", "remaining_time": "7:00:32"} +{"current_steps": 1063, "total_steps": 1647, "loss": 0.2484, "lr": 3.3765026539765832e-06, "epoch": 0.645709946848899, "percentage": 64.54, "elapsed_time": "12:44:13", "remaining_time": "6:59:51"} +{"current_steps": 1064, "total_steps": 1647, "loss": 0.2724, "lr": 3.36648143575656e-06, "epoch": 0.6463173880030372, "percentage": 64.6, "elapsed_time": "12:44:55", "remaining_time": "6:59:07"} +{"current_steps": 1065, "total_steps": 1647, "loss": 0.2544, "lr": 3.3564675580636946e-06, "epoch": 0.6469248291571754, "percentage": 64.66, "elapsed_time": "12:45:36", "remaining_time": "6:58:23"} +{"current_steps": 1066, "total_steps": 1647, "loss": 0.2518, "lr": 3.3464610658972584e-06, "epoch": 0.6475322703113136, "percentage": 64.72, "elapsed_time": "12:46:17", "remaining_time": "6:57:38"} +{"current_steps": 1067, "total_steps": 1647, "loss": 0.2362, "lr": 3.3364620042233316e-06, "epoch": 0.6481397114654518, "percentage": 64.78, "elapsed_time": "12:46:59", "remaining_time": "6:56:55"} +{"current_steps": 1068, "total_steps": 1647, "loss": 0.2417, "lr": 3.326470417974604e-06, "epoch": 0.6487471526195899, "percentage": 64.85, "elapsed_time": "12:47:39", "remaining_time": "6:56:10"} +{"current_steps": 1069, "total_steps": 1647, "loss": 0.2289, "lr": 3.3164863520501744e-06, "epoch": 0.6493545937737282, "percentage": 64.91, "elapsed_time": "12:48:19", "remaining_time": "6:55:25"} +{"current_steps": 1070, "total_steps": 1647, "loss": 0.2839, "lr": 3.3065098513153473e-06, "epoch": 0.6499620349278664, "percentage": 64.97, "elapsed_time": "12:49:02", "remaining_time": "6:54:42"} +{"current_steps": 1071, "total_steps": 1647, "loss": 0.2758, "lr": 3.29654096060143e-06, "epoch": 0.6505694760820045, "percentage": 65.03, "elapsed_time": "12:49:44", "remaining_time": "6:53:58"} +{"current_steps": 1072, "total_steps": 1647, "loss": 0.2662, "lr": 3.2865797247055354e-06, "epoch": 0.6511769172361428, "percentage": 65.09, "elapsed_time": "12:50:29", "remaining_time": "6:53:16"} +{"current_steps": 1073, "total_steps": 1647, "loss": 0.2549, "lr": 3.2766261883903744e-06, "epoch": 0.6517843583902809, "percentage": 65.15, "elapsed_time": "12:51:13", "remaining_time": "6:52:33"} +{"current_steps": 1074, "total_steps": 1647, "loss": 0.293, "lr": 3.266680396384061e-06, "epoch": 0.6523917995444192, "percentage": 65.21, "elapsed_time": "12:51:54", "remaining_time": "6:51:49"} +{"current_steps": 1075, "total_steps": 1647, "loss": 0.225, "lr": 3.256742393379909e-06, "epoch": 0.6529992406985573, "percentage": 65.27, "elapsed_time": "12:52:36", "remaining_time": "6:51:06"} +{"current_steps": 1076, "total_steps": 1647, "loss": 0.224, "lr": 3.2468122240362287e-06, "epoch": 0.6536066818526955, "percentage": 65.33, "elapsed_time": "12:53:20", "remaining_time": "6:50:23"} +{"current_steps": 1077, "total_steps": 1647, "loss": 0.2607, "lr": 3.2368899329761316e-06, "epoch": 0.6542141230068337, "percentage": 65.39, "elapsed_time": "12:54:02", "remaining_time": "6:49:39"} +{"current_steps": 1078, "total_steps": 1647, "loss": 0.2276, "lr": 3.226975564787322e-06, "epoch": 0.6548215641609719, "percentage": 65.45, "elapsed_time": "12:54:51", "remaining_time": "6:48:59"} +{"current_steps": 1079, "total_steps": 1647, "loss": 0.2648, "lr": 3.2170691640219077e-06, "epoch": 0.6554290053151101, "percentage": 65.51, "elapsed_time": "12:55:35", "remaining_time": "6:48:16"} +{"current_steps": 1080, "total_steps": 1647, "loss": 0.2785, "lr": 3.2071707751961838e-06, "epoch": 0.6560364464692483, "percentage": 65.57, "elapsed_time": "12:56:19", "remaining_time": "6:47:34"} +{"current_steps": 1081, "total_steps": 1647, "loss": 0.2503, "lr": 3.197280442790455e-06, "epoch": 0.6566438876233864, "percentage": 65.63, "elapsed_time": "12:57:02", "remaining_time": "6:46:50"} +{"current_steps": 1082, "total_steps": 1647, "loss": 0.2367, "lr": 3.187398211248811e-06, "epoch": 0.6572513287775247, "percentage": 65.7, "elapsed_time": "12:57:42", "remaining_time": "6:46:06"} +{"current_steps": 1083, "total_steps": 1647, "loss": 0.254, "lr": 3.1775241249789434e-06, "epoch": 0.6578587699316629, "percentage": 65.76, "elapsed_time": "12:58:26", "remaining_time": "6:45:23"} +{"current_steps": 1084, "total_steps": 1647, "loss": 0.2279, "lr": 3.1676582283519454e-06, "epoch": 0.6584662110858011, "percentage": 65.82, "elapsed_time": "12:59:07", "remaining_time": "6:44:39"} +{"current_steps": 1085, "total_steps": 1647, "loss": 0.2285, "lr": 3.1578005657021004e-06, "epoch": 0.6590736522399393, "percentage": 65.88, "elapsed_time": "12:59:48", "remaining_time": "6:43:55"} +{"current_steps": 1086, "total_steps": 1647, "loss": 0.2655, "lr": 3.1479511813267006e-06, "epoch": 0.6596810933940774, "percentage": 65.94, "elapsed_time": "13:00:31", "remaining_time": "6:43:11"} +{"current_steps": 1087, "total_steps": 1647, "loss": 0.2407, "lr": 3.1381101194858264e-06, "epoch": 0.6602885345482157, "percentage": 66.0, "elapsed_time": "13:01:14", "remaining_time": "6:42:28"} +{"current_steps": 1088, "total_steps": 1647, "loss": 0.2604, "lr": 3.1282774244021717e-06, "epoch": 0.6608959757023538, "percentage": 66.06, "elapsed_time": "13:02:01", "remaining_time": "6:41:47"} +{"current_steps": 1089, "total_steps": 1647, "loss": 0.284, "lr": 3.118453140260823e-06, "epoch": 0.661503416856492, "percentage": 66.12, "elapsed_time": "13:02:41", "remaining_time": "6:41:03"} +{"current_steps": 1090, "total_steps": 1647, "loss": 0.2523, "lr": 3.1086373112090762e-06, "epoch": 0.6621108580106302, "percentage": 66.18, "elapsed_time": "13:03:22", "remaining_time": "6:40:18"} +{"current_steps": 1091, "total_steps": 1647, "loss": 0.2783, "lr": 3.0988299813562304e-06, "epoch": 0.6627182991647684, "percentage": 66.24, "elapsed_time": "13:04:06", "remaining_time": "6:39:36"} +{"current_steps": 1092, "total_steps": 1647, "loss": 0.2502, "lr": 3.089031194773392e-06, "epoch": 0.6633257403189066, "percentage": 66.3, "elapsed_time": "13:04:46", "remaining_time": "6:38:51"} +{"current_steps": 1093, "total_steps": 1647, "loss": 0.2479, "lr": 3.079240995493279e-06, "epoch": 0.6639331814730448, "percentage": 66.36, "elapsed_time": "13:05:28", "remaining_time": "6:38:07"} +{"current_steps": 1094, "total_steps": 1647, "loss": 0.2442, "lr": 3.069459427510014e-06, "epoch": 0.6645406226271829, "percentage": 66.42, "elapsed_time": "13:06:12", "remaining_time": "6:37:24"} +{"current_steps": 1095, "total_steps": 1647, "loss": 0.2722, "lr": 3.0596865347789444e-06, "epoch": 0.6651480637813212, "percentage": 66.48, "elapsed_time": "13:06:51", "remaining_time": "6:36:39"} +{"current_steps": 1096, "total_steps": 1647, "loss": 0.2425, "lr": 3.049922361216422e-06, "epoch": 0.6657555049354594, "percentage": 66.55, "elapsed_time": "13:07:30", "remaining_time": "6:35:54"} +{"current_steps": 1097, "total_steps": 1647, "loss": 0.2496, "lr": 3.040166950699626e-06, "epoch": 0.6663629460895976, "percentage": 66.61, "elapsed_time": "13:08:14", "remaining_time": "6:35:12"} +{"current_steps": 1098, "total_steps": 1647, "loss": 0.2619, "lr": 3.0304203470663507e-06, "epoch": 0.6669703872437358, "percentage": 66.67, "elapsed_time": "13:08:57", "remaining_time": "6:34:28"} +{"current_steps": 1099, "total_steps": 1647, "loss": 0.3065, "lr": 3.0206825941148203e-06, "epoch": 0.6675778283978739, "percentage": 66.73, "elapsed_time": "13:09:41", "remaining_time": "6:33:46"} +{"current_steps": 1100, "total_steps": 1647, "loss": 0.2737, "lr": 3.0109537356034856e-06, "epoch": 0.6681852695520122, "percentage": 66.79, "elapsed_time": "13:10:22", "remaining_time": "6:33:01"} +{"current_steps": 1101, "total_steps": 1647, "loss": 0.2899, "lr": 3.001233815250823e-06, "epoch": 0.6687927107061503, "percentage": 66.85, "elapsed_time": "13:11:04", "remaining_time": "6:32:18"} +{"current_steps": 1102, "total_steps": 1647, "loss": 0.2624, "lr": 2.991522876735154e-06, "epoch": 0.6694001518602886, "percentage": 66.91, "elapsed_time": "13:11:47", "remaining_time": "6:31:35"} +{"current_steps": 1103, "total_steps": 1647, "loss": 0.2301, "lr": 2.981820963694427e-06, "epoch": 0.6700075930144267, "percentage": 66.97, "elapsed_time": "13:12:27", "remaining_time": "6:30:50"} +{"current_steps": 1104, "total_steps": 1647, "loss": 0.2864, "lr": 2.9721281197260427e-06, "epoch": 0.6706150341685649, "percentage": 67.03, "elapsed_time": "13:13:11", "remaining_time": "6:30:07"} +{"current_steps": 1105, "total_steps": 1647, "loss": 0.2441, "lr": 2.9624443883866403e-06, "epoch": 0.6712224753227031, "percentage": 67.09, "elapsed_time": "13:13:55", "remaining_time": "6:29:25"} +{"current_steps": 1106, "total_steps": 1647, "loss": 0.2891, "lr": 2.9527698131919156e-06, "epoch": 0.6718299164768413, "percentage": 67.15, "elapsed_time": "13:14:36", "remaining_time": "6:28:41"} +{"current_steps": 1107, "total_steps": 1647, "loss": 0.2978, "lr": 2.9431044376164165e-06, "epoch": 0.6724373576309794, "percentage": 67.21, "elapsed_time": "13:15:19", "remaining_time": "6:27:57"} +{"current_steps": 1108, "total_steps": 1647, "loss": 0.2507, "lr": 2.9334483050933506e-06, "epoch": 0.6730447987851177, "percentage": 67.27, "elapsed_time": "13:16:04", "remaining_time": "6:27:15"} +{"current_steps": 1109, "total_steps": 1647, "loss": 0.2376, "lr": 2.9238014590143925e-06, "epoch": 0.6736522399392559, "percentage": 67.33, "elapsed_time": "13:16:47", "remaining_time": "6:26:32"} +{"current_steps": 1110, "total_steps": 1647, "loss": 0.2582, "lr": 2.91416394272948e-06, "epoch": 0.6742596810933941, "percentage": 67.4, "elapsed_time": "13:17:32", "remaining_time": "6:25:50"} +{"current_steps": 1111, "total_steps": 1647, "loss": 0.2177, "lr": 2.904535799546636e-06, "epoch": 0.6748671222475323, "percentage": 67.46, "elapsed_time": "13:18:14", "remaining_time": "6:25:06"} +{"current_steps": 1112, "total_steps": 1647, "loss": 0.2607, "lr": 2.894917072731753e-06, "epoch": 0.6754745634016704, "percentage": 67.52, "elapsed_time": "13:18:55", "remaining_time": "6:24:22"} +{"current_steps": 1113, "total_steps": 1647, "loss": 0.2588, "lr": 2.8853078055084192e-06, "epoch": 0.6760820045558087, "percentage": 67.58, "elapsed_time": "13:19:35", "remaining_time": "6:23:38"} +{"current_steps": 1114, "total_steps": 1647, "loss": 0.2701, "lr": 2.8757080410577042e-06, "epoch": 0.6766894457099468, "percentage": 67.64, "elapsed_time": "13:20:17", "remaining_time": "6:22:54"} +{"current_steps": 1115, "total_steps": 1647, "loss": 0.2078, "lr": 2.866117822517982e-06, "epoch": 0.6772968868640851, "percentage": 67.7, "elapsed_time": "13:21:02", "remaining_time": "6:22:11"} +{"current_steps": 1116, "total_steps": 1647, "loss": 0.2519, "lr": 2.8565371929847286e-06, "epoch": 0.6779043280182232, "percentage": 67.76, "elapsed_time": "13:21:44", "remaining_time": "6:21:28"} +{"current_steps": 1117, "total_steps": 1647, "loss": 0.2586, "lr": 2.846966195510332e-06, "epoch": 0.6785117691723614, "percentage": 67.82, "elapsed_time": "13:22:26", "remaining_time": "6:20:44"} +{"current_steps": 1118, "total_steps": 1647, "loss": 0.2651, "lr": 2.83740487310389e-06, "epoch": 0.6791192103264996, "percentage": 67.88, "elapsed_time": "13:23:10", "remaining_time": "6:20:02"} +{"current_steps": 1119, "total_steps": 1647, "loss": 0.2593, "lr": 2.82785326873103e-06, "epoch": 0.6797266514806378, "percentage": 67.94, "elapsed_time": "13:23:53", "remaining_time": "6:19:19"} +{"current_steps": 1120, "total_steps": 1647, "loss": 0.2597, "lr": 2.81831142531371e-06, "epoch": 0.680334092634776, "percentage": 68.0, "elapsed_time": "13:24:33", "remaining_time": "6:18:34"} +{"current_steps": 1121, "total_steps": 1647, "loss": 0.2682, "lr": 2.8087793857300193e-06, "epoch": 0.6809415337889142, "percentage": 68.06, "elapsed_time": "13:25:19", "remaining_time": "6:17:52"} +{"current_steps": 1122, "total_steps": 1647, "loss": 0.2481, "lr": 2.7992571928139984e-06, "epoch": 0.6815489749430524, "percentage": 68.12, "elapsed_time": "13:26:01", "remaining_time": "6:17:09"} +{"current_steps": 1123, "total_steps": 1647, "loss": 0.2581, "lr": 2.7897448893554335e-06, "epoch": 0.6821564160971906, "percentage": 68.18, "elapsed_time": "13:26:45", "remaining_time": "6:16:26"} +{"current_steps": 1124, "total_steps": 1647, "loss": 0.2503, "lr": 2.780242518099675e-06, "epoch": 0.6827638572513288, "percentage": 68.25, "elapsed_time": "13:27:30", "remaining_time": "6:15:44"} +{"current_steps": 1125, "total_steps": 1647, "loss": 0.2744, "lr": 2.7707501217474443e-06, "epoch": 0.683371298405467, "percentage": 68.31, "elapsed_time": "13:28:12", "remaining_time": "6:15:00"} +{"current_steps": 1126, "total_steps": 1647, "loss": 0.2524, "lr": 2.761267742954629e-06, "epoch": 0.6839787395596052, "percentage": 68.37, "elapsed_time": "13:28:55", "remaining_time": "6:14:17"} +{"current_steps": 1127, "total_steps": 1647, "loss": 0.2659, "lr": 2.7517954243321097e-06, "epoch": 0.6845861807137433, "percentage": 68.43, "elapsed_time": "13:29:35", "remaining_time": "6:13:33"} +{"current_steps": 1128, "total_steps": 1647, "loss": 0.2851, "lr": 2.7423332084455543e-06, "epoch": 0.6851936218678816, "percentage": 68.49, "elapsed_time": "13:30:17", "remaining_time": "6:12:49"} +{"current_steps": 1129, "total_steps": 1647, "loss": 0.2557, "lr": 2.7328811378152355e-06, "epoch": 0.6858010630220197, "percentage": 68.55, "elapsed_time": "13:31:01", "remaining_time": "6:12:06"} +{"current_steps": 1130, "total_steps": 1647, "loss": 0.275, "lr": 2.723439254915834e-06, "epoch": 0.686408504176158, "percentage": 68.61, "elapsed_time": "13:31:43", "remaining_time": "6:11:23"} +{"current_steps": 1131, "total_steps": 1647, "loss": 0.2413, "lr": 2.714007602176254e-06, "epoch": 0.6870159453302961, "percentage": 68.67, "elapsed_time": "13:32:30", "remaining_time": "6:10:41"} +{"current_steps": 1132, "total_steps": 1647, "loss": 0.2645, "lr": 2.704586221979422e-06, "epoch": 0.6876233864844343, "percentage": 68.73, "elapsed_time": "13:33:11", "remaining_time": "6:09:57"} +{"current_steps": 1133, "total_steps": 1647, "loss": 0.2574, "lr": 2.695175156662107e-06, "epoch": 0.6882308276385725, "percentage": 68.79, "elapsed_time": "13:33:56", "remaining_time": "6:09:15"} +{"current_steps": 1134, "total_steps": 1647, "loss": 0.2383, "lr": 2.6857744485147286e-06, "epoch": 0.6888382687927107, "percentage": 68.85, "elapsed_time": "13:34:40", "remaining_time": "6:08:32"} +{"current_steps": 1135, "total_steps": 1647, "loss": 0.2735, "lr": 2.6763841397811576e-06, "epoch": 0.689445709946849, "percentage": 68.91, "elapsed_time": "13:35:23", "remaining_time": "6:07:49"} +{"current_steps": 1136, "total_steps": 1647, "loss": 0.2768, "lr": 2.667004272658541e-06, "epoch": 0.6900531511009871, "percentage": 68.97, "elapsed_time": "13:36:06", "remaining_time": "6:07:06"} +{"current_steps": 1137, "total_steps": 1647, "loss": 0.2636, "lr": 2.6576348892970947e-06, "epoch": 0.6906605922551253, "percentage": 69.03, "elapsed_time": "13:36:49", "remaining_time": "6:06:23"} +{"current_steps": 1138, "total_steps": 1647, "loss": 0.2559, "lr": 2.6482760317999338e-06, "epoch": 0.6912680334092635, "percentage": 69.1, "elapsed_time": "13:37:31", "remaining_time": "6:05:39"} +{"current_steps": 1139, "total_steps": 1647, "loss": 0.2537, "lr": 2.638927742222868e-06, "epoch": 0.6918754745634017, "percentage": 69.16, "elapsed_time": "13:38:17", "remaining_time": "6:04:57"} +{"current_steps": 1140, "total_steps": 1647, "loss": 0.2656, "lr": 2.629590062574221e-06, "epoch": 0.6924829157175398, "percentage": 69.22, "elapsed_time": "13:39:01", "remaining_time": "6:04:15"} +{"current_steps": 1141, "total_steps": 1647, "loss": 0.2899, "lr": 2.6202630348146323e-06, "epoch": 0.6930903568716781, "percentage": 69.28, "elapsed_time": "13:39:44", "remaining_time": "6:03:31"} +{"current_steps": 1142, "total_steps": 1647, "loss": 0.267, "lr": 2.610946700856885e-06, "epoch": 0.6936977980258162, "percentage": 69.34, "elapsed_time": "13:40:27", "remaining_time": "6:02:48"} +{"current_steps": 1143, "total_steps": 1647, "loss": 0.2535, "lr": 2.6016411025656973e-06, "epoch": 0.6943052391799545, "percentage": 69.4, "elapsed_time": "13:41:12", "remaining_time": "6:02:06"} +{"current_steps": 1144, "total_steps": 1647, "loss": 0.2509, "lr": 2.592346281757552e-06, "epoch": 0.6949126803340926, "percentage": 69.46, "elapsed_time": "13:41:55", "remaining_time": "6:01:23"} +{"current_steps": 1145, "total_steps": 1647, "loss": 0.2593, "lr": 2.583062280200501e-06, "epoch": 0.6955201214882308, "percentage": 69.52, "elapsed_time": "13:42:39", "remaining_time": "6:00:40"} +{"current_steps": 1146, "total_steps": 1647, "loss": 0.255, "lr": 2.5737891396139713e-06, "epoch": 0.696127562642369, "percentage": 69.58, "elapsed_time": "13:43:23", "remaining_time": "5:59:58"} +{"current_steps": 1147, "total_steps": 1647, "loss": 0.2704, "lr": 2.5645269016685905e-06, "epoch": 0.6967350037965072, "percentage": 69.64, "elapsed_time": "13:44:05", "remaining_time": "5:59:14"} +{"current_steps": 1148, "total_steps": 1647, "loss": 0.2594, "lr": 2.5552756079859904e-06, "epoch": 0.6973424449506455, "percentage": 69.7, "elapsed_time": "13:44:50", "remaining_time": "5:58:31"} +{"current_steps": 1149, "total_steps": 1647, "loss": 0.2529, "lr": 2.5460353001386263e-06, "epoch": 0.6979498861047836, "percentage": 69.76, "elapsed_time": "13:45:34", "remaining_time": "5:57:49"} +{"current_steps": 1150, "total_steps": 1647, "loss": 0.2564, "lr": 2.5368060196495785e-06, "epoch": 0.6985573272589218, "percentage": 69.82, "elapsed_time": "13:46:17", "remaining_time": "5:57:06"} +{"current_steps": 1151, "total_steps": 1647, "loss": 0.2387, "lr": 2.527587807992383e-06, "epoch": 0.69916476841306, "percentage": 69.88, "elapsed_time": "13:46:59", "remaining_time": "5:56:22"} +{"current_steps": 1152, "total_steps": 1647, "loss": 0.2542, "lr": 2.5183807065908296e-06, "epoch": 0.6997722095671982, "percentage": 69.95, "elapsed_time": "13:47:39", "remaining_time": "5:55:38"} +{"current_steps": 1153, "total_steps": 1647, "loss": 0.2281, "lr": 2.5091847568187834e-06, "epoch": 0.7003796507213363, "percentage": 70.01, "elapsed_time": "13:48:24", "remaining_time": "5:54:55"} +{"current_steps": 1154, "total_steps": 1647, "loss": 0.241, "lr": 2.5000000000000015e-06, "epoch": 0.7009870918754746, "percentage": 70.07, "elapsed_time": "13:49:06", "remaining_time": "5:54:12"} +{"current_steps": 1155, "total_steps": 1647, "loss": 0.2605, "lr": 2.4908264774079355e-06, "epoch": 0.7015945330296127, "percentage": 70.13, "elapsed_time": "13:49:52", "remaining_time": "5:53:30"} +{"current_steps": 1156, "total_steps": 1647, "loss": 0.2541, "lr": 2.4816642302655634e-06, "epoch": 0.702201974183751, "percentage": 70.19, "elapsed_time": "13:50:37", "remaining_time": "5:52:47"} +{"current_steps": 1157, "total_steps": 1647, "loss": 0.2601, "lr": 2.4725132997451833e-06, "epoch": 0.7028094153378891, "percentage": 70.25, "elapsed_time": "13:51:19", "remaining_time": "5:52:04"} +{"current_steps": 1158, "total_steps": 1647, "loss": 0.3022, "lr": 2.4633737269682546e-06, "epoch": 0.7034168564920273, "percentage": 70.31, "elapsed_time": "13:52:04", "remaining_time": "5:51:21"} +{"current_steps": 1159, "total_steps": 1647, "loss": 0.2643, "lr": 2.454245553005184e-06, "epoch": 0.7040242976461655, "percentage": 70.37, "elapsed_time": "13:52:43", "remaining_time": "5:50:37"} +{"current_steps": 1160, "total_steps": 1647, "loss": 0.2852, "lr": 2.445128818875166e-06, "epoch": 0.7046317388003037, "percentage": 70.43, "elapsed_time": "13:53:28", "remaining_time": "5:49:54"} +{"current_steps": 1161, "total_steps": 1647, "loss": 0.3014, "lr": 2.4360235655459804e-06, "epoch": 0.705239179954442, "percentage": 70.49, "elapsed_time": "13:54:13", "remaining_time": "5:49:12"} +{"current_steps": 1162, "total_steps": 1647, "loss": 0.2464, "lr": 2.4269298339338205e-06, "epoch": 0.7058466211085801, "percentage": 70.55, "elapsed_time": "13:54:56", "remaining_time": "5:48:29"} +{"current_steps": 1163, "total_steps": 1647, "loss": 0.2611, "lr": 2.4178476649031057e-06, "epoch": 0.7064540622627183, "percentage": 70.61, "elapsed_time": "13:55:39", "remaining_time": "5:47:46"} +{"current_steps": 1164, "total_steps": 1647, "loss": 0.2628, "lr": 2.408777099266291e-06, "epoch": 0.7070615034168565, "percentage": 70.67, "elapsed_time": "13:56:27", "remaining_time": "5:47:05"} +{"current_steps": 1165, "total_steps": 1647, "loss": 0.3069, "lr": 2.3997181777836955e-06, "epoch": 0.7076689445709947, "percentage": 70.73, "elapsed_time": "13:57:12", "remaining_time": "5:46:22"} +{"current_steps": 1166, "total_steps": 1647, "loss": 0.2405, "lr": 2.3906709411633073e-06, "epoch": 0.7082763857251329, "percentage": 70.8, "elapsed_time": "13:57:54", "remaining_time": "5:45:39"} +{"current_steps": 1167, "total_steps": 1647, "loss": 0.28, "lr": 2.381635430060611e-06, "epoch": 0.7088838268792711, "percentage": 70.86, "elapsed_time": "13:58:36", "remaining_time": "5:44:55"} +{"current_steps": 1168, "total_steps": 1647, "loss": 0.2696, "lr": 2.3726116850783987e-06, "epoch": 0.7094912680334092, "percentage": 70.92, "elapsed_time": "13:59:19", "remaining_time": "5:44:12"} +{"current_steps": 1169, "total_steps": 1647, "loss": 0.2527, "lr": 2.3635997467665905e-06, "epoch": 0.7100987091875475, "percentage": 70.98, "elapsed_time": "14:00:10", "remaining_time": "5:43:32"} +{"current_steps": 1170, "total_steps": 1647, "loss": 0.2425, "lr": 2.354599655622049e-06, "epoch": 0.7107061503416856, "percentage": 71.04, "elapsed_time": "14:00:58", "remaining_time": "5:42:51"} +{"current_steps": 1171, "total_steps": 1647, "loss": 0.2478, "lr": 2.3456114520883956e-06, "epoch": 0.7113135914958238, "percentage": 71.1, "elapsed_time": "14:01:42", "remaining_time": "5:42:08"} +{"current_steps": 1172, "total_steps": 1647, "loss": 0.2552, "lr": 2.3366351765558437e-06, "epoch": 0.711921032649962, "percentage": 71.16, "elapsed_time": "14:02:25", "remaining_time": "5:41:25"} +{"current_steps": 1173, "total_steps": 1647, "loss": 0.2798, "lr": 2.3276708693609947e-06, "epoch": 0.7125284738041002, "percentage": 71.22, "elapsed_time": "14:03:07", "remaining_time": "5:40:42"} +{"current_steps": 1174, "total_steps": 1647, "loss": 0.2463, "lr": 2.318718570786675e-06, "epoch": 0.7131359149582385, "percentage": 71.28, "elapsed_time": "14:03:51", "remaining_time": "5:39:59"} +{"current_steps": 1175, "total_steps": 1647, "loss": 0.2416, "lr": 2.309778321061742e-06, "epoch": 0.7137433561123766, "percentage": 71.34, "elapsed_time": "14:04:36", "remaining_time": "5:39:16"} +{"current_steps": 1176, "total_steps": 1647, "loss": 0.275, "lr": 2.3008501603609147e-06, "epoch": 0.7143507972665148, "percentage": 71.4, "elapsed_time": "14:05:20", "remaining_time": "5:38:34"} +{"current_steps": 1177, "total_steps": 1647, "loss": 0.2502, "lr": 2.2919341288045853e-06, "epoch": 0.714958238420653, "percentage": 71.46, "elapsed_time": "14:06:01", "remaining_time": "5:37:50"} +{"current_steps": 1178, "total_steps": 1647, "loss": 0.2754, "lr": 2.283030266458644e-06, "epoch": 0.7155656795747912, "percentage": 71.52, "elapsed_time": "14:06:45", "remaining_time": "5:37:07"} +{"current_steps": 1179, "total_steps": 1647, "loss": 0.2505, "lr": 2.2741386133342923e-06, "epoch": 0.7161731207289294, "percentage": 71.58, "elapsed_time": "14:07:28", "remaining_time": "5:36:23"} +{"current_steps": 1180, "total_steps": 1647, "loss": 0.2304, "lr": 2.265259209387867e-06, "epoch": 0.7167805618830676, "percentage": 71.65, "elapsed_time": "14:08:09", "remaining_time": "5:35:40"} +{"current_steps": 1181, "total_steps": 1647, "loss": 0.2697, "lr": 2.256392094520664e-06, "epoch": 0.7173880030372057, "percentage": 71.71, "elapsed_time": "14:08:52", "remaining_time": "5:34:56"} +{"current_steps": 1182, "total_steps": 1647, "loss": 0.2644, "lr": 2.2475373085787568e-06, "epoch": 0.717995444191344, "percentage": 71.77, "elapsed_time": "14:09:37", "remaining_time": "5:34:14"} +{"current_steps": 1183, "total_steps": 1647, "loss": 0.2637, "lr": 2.238694891352814e-06, "epoch": 0.7186028853454821, "percentage": 71.83, "elapsed_time": "14:10:21", "remaining_time": "5:33:31"} +{"current_steps": 1184, "total_steps": 1647, "loss": 0.2303, "lr": 2.229864882577921e-06, "epoch": 0.7192103264996204, "percentage": 71.89, "elapsed_time": "14:11:07", "remaining_time": "5:32:49"} +{"current_steps": 1185, "total_steps": 1647, "loss": 0.255, "lr": 2.2210473219334083e-06, "epoch": 0.7198177676537585, "percentage": 71.95, "elapsed_time": "14:11:50", "remaining_time": "5:32:06"} +{"current_steps": 1186, "total_steps": 1647, "loss": 0.2384, "lr": 2.2122422490426676e-06, "epoch": 0.7204252088078967, "percentage": 72.01, "elapsed_time": "14:12:33", "remaining_time": "5:31:23"} +{"current_steps": 1187, "total_steps": 1647, "loss": 0.268, "lr": 2.203449703472969e-06, "epoch": 0.721032649962035, "percentage": 72.07, "elapsed_time": "14:13:16", "remaining_time": "5:30:40"} +{"current_steps": 1188, "total_steps": 1647, "loss": 0.2755, "lr": 2.194669724735296e-06, "epoch": 0.7216400911161731, "percentage": 72.13, "elapsed_time": "14:13:58", "remaining_time": "5:29:56"} +{"current_steps": 1189, "total_steps": 1647, "loss": 0.2327, "lr": 2.1859023522841543e-06, "epoch": 0.7222475322703114, "percentage": 72.19, "elapsed_time": "14:14:42", "remaining_time": "5:29:14"} +{"current_steps": 1190, "total_steps": 1647, "loss": 0.2735, "lr": 2.1771476255174056e-06, "epoch": 0.7228549734244495, "percentage": 72.25, "elapsed_time": "14:15:26", "remaining_time": "5:28:31"} +{"current_steps": 1191, "total_steps": 1647, "loss": 0.2757, "lr": 2.1684055837760837e-06, "epoch": 0.7234624145785877, "percentage": 72.31, "elapsed_time": "14:16:07", "remaining_time": "5:27:47"} +{"current_steps": 1192, "total_steps": 1647, "loss": 0.268, "lr": 2.159676266344222e-06, "epoch": 0.7240698557327259, "percentage": 72.37, "elapsed_time": "14:16:52", "remaining_time": "5:27:04"} +{"current_steps": 1193, "total_steps": 1647, "loss": 0.2367, "lr": 2.1509597124486693e-06, "epoch": 0.7246772968868641, "percentage": 72.43, "elapsed_time": "14:17:32", "remaining_time": "5:26:20"} +{"current_steps": 1194, "total_steps": 1647, "loss": 0.2964, "lr": 2.1422559612589266e-06, "epoch": 0.7252847380410022, "percentage": 72.5, "elapsed_time": "14:18:14", "remaining_time": "5:25:36"} +{"current_steps": 1195, "total_steps": 1647, "loss": 0.2625, "lr": 2.1335650518869555e-06, "epoch": 0.7258921791951405, "percentage": 72.56, "elapsed_time": "14:18:59", "remaining_time": "5:24:54"} +{"current_steps": 1196, "total_steps": 1647, "loss": 0.2974, "lr": 2.124887023387017e-06, "epoch": 0.7264996203492786, "percentage": 72.62, "elapsed_time": "14:19:43", "remaining_time": "5:24:11"} +{"current_steps": 1197, "total_steps": 1647, "loss": 0.2858, "lr": 2.1162219147554884e-06, "epoch": 0.7271070615034169, "percentage": 72.68, "elapsed_time": "14:20:25", "remaining_time": "5:23:27"} +{"current_steps": 1198, "total_steps": 1647, "loss": 0.2651, "lr": 2.1075697649306838e-06, "epoch": 0.727714502657555, "percentage": 72.74, "elapsed_time": "14:21:10", "remaining_time": "5:22:45"} +{"current_steps": 1199, "total_steps": 1647, "loss": 0.2611, "lr": 2.09893061279269e-06, "epoch": 0.7283219438116932, "percentage": 72.8, "elapsed_time": "14:21:59", "remaining_time": "5:22:04"} +{"current_steps": 1200, "total_steps": 1647, "loss": 0.2498, "lr": 2.0903044971631854e-06, "epoch": 0.7289293849658315, "percentage": 72.86, "elapsed_time": "14:22:40", "remaining_time": "5:21:20"} +{"current_steps": 1201, "total_steps": 1647, "loss": 0.2549, "lr": 2.0816914568052664e-06, "epoch": 0.7295368261199696, "percentage": 72.92, "elapsed_time": "14:23:22", "remaining_time": "5:20:37"} +{"current_steps": 1202, "total_steps": 1647, "loss": 0.2753, "lr": 2.0730915304232692e-06, "epoch": 0.7301442672741079, "percentage": 72.98, "elapsed_time": "14:24:04", "remaining_time": "5:19:53"} +{"current_steps": 1203, "total_steps": 1647, "loss": 0.2429, "lr": 2.0645047566626057e-06, "epoch": 0.730751708428246, "percentage": 73.04, "elapsed_time": "14:24:46", "remaining_time": "5:19:10"} +{"current_steps": 1204, "total_steps": 1647, "loss": 0.2923, "lr": 2.055931174109579e-06, "epoch": 0.7313591495823842, "percentage": 73.1, "elapsed_time": "14:25:31", "remaining_time": "5:18:27"} +{"current_steps": 1205, "total_steps": 1647, "loss": 0.2416, "lr": 2.0473708212912167e-06, "epoch": 0.7319665907365224, "percentage": 73.16, "elapsed_time": "14:26:18", "remaining_time": "5:17:45"} +{"current_steps": 1206, "total_steps": 1647, "loss": 0.2538, "lr": 2.0388237366751005e-06, "epoch": 0.7325740318906606, "percentage": 73.22, "elapsed_time": "14:27:05", "remaining_time": "5:17:04"} +{"current_steps": 1207, "total_steps": 1647, "loss": 0.2649, "lr": 2.030289958669181e-06, "epoch": 0.7331814730447987, "percentage": 73.28, "elapsed_time": "14:27:49", "remaining_time": "5:16:21"} +{"current_steps": 1208, "total_steps": 1647, "loss": 0.2517, "lr": 2.02176952562162e-06, "epoch": 0.733788914198937, "percentage": 73.35, "elapsed_time": "14:28:34", "remaining_time": "5:15:39"} +{"current_steps": 1209, "total_steps": 1647, "loss": 0.2716, "lr": 2.013262475820602e-06, "epoch": 0.7343963553530751, "percentage": 73.41, "elapsed_time": "14:29:21", "remaining_time": "5:14:57"} +{"current_steps": 1210, "total_steps": 1647, "loss": 0.2365, "lr": 2.004768847494186e-06, "epoch": 0.7350037965072134, "percentage": 73.47, "elapsed_time": "14:30:04", "remaining_time": "5:14:14"} +{"current_steps": 1211, "total_steps": 1647, "loss": 0.2632, "lr": 1.996288678810105e-06, "epoch": 0.7356112376613515, "percentage": 73.53, "elapsed_time": "14:30:45", "remaining_time": "5:13:30"} +{"current_steps": 1212, "total_steps": 1647, "loss": 0.2675, "lr": 1.987822007875617e-06, "epoch": 0.7362186788154897, "percentage": 73.59, "elapsed_time": "14:31:29", "remaining_time": "5:12:47"} +{"current_steps": 1213, "total_steps": 1647, "loss": 0.2282, "lr": 1.979368872737319e-06, "epoch": 0.736826119969628, "percentage": 73.65, "elapsed_time": "14:32:12", "remaining_time": "5:12:03"} +{"current_steps": 1214, "total_steps": 1647, "loss": 0.237, "lr": 1.9709293113809876e-06, "epoch": 0.7374335611237661, "percentage": 73.71, "elapsed_time": "14:32:55", "remaining_time": "5:11:20"} +{"current_steps": 1215, "total_steps": 1647, "loss": 0.2347, "lr": 1.962503361731403e-06, "epoch": 0.7380410022779044, "percentage": 73.77, "elapsed_time": "14:33:36", "remaining_time": "5:10:37"} +{"current_steps": 1216, "total_steps": 1647, "loss": 0.249, "lr": 1.954091061652172e-06, "epoch": 0.7386484434320425, "percentage": 73.83, "elapsed_time": "14:34:17", "remaining_time": "5:09:53"} +{"current_steps": 1217, "total_steps": 1647, "loss": 0.2684, "lr": 1.945692448945574e-06, "epoch": 0.7392558845861807, "percentage": 73.89, "elapsed_time": "14:35:04", "remaining_time": "5:09:11"} +{"current_steps": 1218, "total_steps": 1647, "loss": 0.269, "lr": 1.9373075613523728e-06, "epoch": 0.7398633257403189, "percentage": 73.95, "elapsed_time": "14:35:45", "remaining_time": "5:08:27"} +{"current_steps": 1219, "total_steps": 1647, "loss": 0.2422, "lr": 1.928936436551661e-06, "epoch": 0.7404707668944571, "percentage": 74.01, "elapsed_time": "14:36:28", "remaining_time": "5:07:44"} +{"current_steps": 1220, "total_steps": 1647, "loss": 0.2199, "lr": 1.920579112160685e-06, "epoch": 0.7410782080485953, "percentage": 74.07, "elapsed_time": "14:37:10", "remaining_time": "5:07:00"} +{"current_steps": 1221, "total_steps": 1647, "loss": 0.2854, "lr": 1.912235625734676e-06, "epoch": 0.7416856492027335, "percentage": 74.13, "elapsed_time": "14:37:57", "remaining_time": "5:06:18"} +{"current_steps": 1222, "total_steps": 1647, "loss": 0.2761, "lr": 1.903906014766681e-06, "epoch": 0.7422930903568716, "percentage": 74.2, "elapsed_time": "14:38:41", "remaining_time": "5:05:35"} +{"current_steps": 1223, "total_steps": 1647, "loss": 0.25, "lr": 1.8955903166873924e-06, "epoch": 0.7429005315110099, "percentage": 74.26, "elapsed_time": "14:39:26", "remaining_time": "5:04:53"} +{"current_steps": 1224, "total_steps": 1647, "loss": 0.2876, "lr": 1.8872885688649879e-06, "epoch": 0.743507972665148, "percentage": 74.32, "elapsed_time": "14:40:12", "remaining_time": "5:04:11"} +{"current_steps": 1225, "total_steps": 1647, "loss": 0.255, "lr": 1.8790008086049534e-06, "epoch": 0.7441154138192863, "percentage": 74.38, "elapsed_time": "14:40:58", "remaining_time": "5:03:29"} +{"current_steps": 1226, "total_steps": 1647, "loss": 0.2401, "lr": 1.8707270731499223e-06, "epoch": 0.7447228549734245, "percentage": 74.44, "elapsed_time": "14:41:46", "remaining_time": "5:02:47"} +{"current_steps": 1227, "total_steps": 1647, "loss": 0.2855, "lr": 1.862467399679499e-06, "epoch": 0.7453302961275626, "percentage": 74.5, "elapsed_time": "14:42:29", "remaining_time": "5:02:04"} +{"current_steps": 1228, "total_steps": 1647, "loss": 0.2376, "lr": 1.854221825310103e-06, "epoch": 0.7459377372817009, "percentage": 74.56, "elapsed_time": "14:43:13", "remaining_time": "5:01:21"} +{"current_steps": 1229, "total_steps": 1647, "loss": 0.277, "lr": 1.8459903870947954e-06, "epoch": 0.746545178435839, "percentage": 74.62, "elapsed_time": "14:43:57", "remaining_time": "5:00:38"} +{"current_steps": 1230, "total_steps": 1647, "loss": 0.2506, "lr": 1.8377731220231144e-06, "epoch": 0.7471526195899773, "percentage": 74.68, "elapsed_time": "14:44:40", "remaining_time": "4:59:55"} +{"current_steps": 1231, "total_steps": 1647, "loss": 0.2448, "lr": 1.829570067020906e-06, "epoch": 0.7477600607441154, "percentage": 74.74, "elapsed_time": "14:45:28", "remaining_time": "4:59:14"} +{"current_steps": 1232, "total_steps": 1647, "loss": 0.2547, "lr": 1.8213812589501611e-06, "epoch": 0.7483675018982536, "percentage": 74.8, "elapsed_time": "14:46:13", "remaining_time": "4:58:31"} +{"current_steps": 1233, "total_steps": 1647, "loss": 0.2603, "lr": 1.813206734608851e-06, "epoch": 0.7489749430523918, "percentage": 74.86, "elapsed_time": "14:46:59", "remaining_time": "4:57:49"} +{"current_steps": 1234, "total_steps": 1647, "loss": 0.2461, "lr": 1.8050465307307602e-06, "epoch": 0.74958238420653, "percentage": 74.92, "elapsed_time": "14:47:38", "remaining_time": "4:57:04"} +{"current_steps": 1235, "total_steps": 1647, "loss": 0.2226, "lr": 1.7969006839853227e-06, "epoch": 0.7501898253606681, "percentage": 74.98, "elapsed_time": "14:48:21", "remaining_time": "4:56:21"} +{"current_steps": 1236, "total_steps": 1647, "loss": 0.2553, "lr": 1.78876923097745e-06, "epoch": 0.7507972665148064, "percentage": 75.05, "elapsed_time": "14:49:00", "remaining_time": "4:55:37"} +{"current_steps": 1237, "total_steps": 1647, "loss": 0.2549, "lr": 1.7806522082473809e-06, "epoch": 0.7514047076689445, "percentage": 75.11, "elapsed_time": "14:49:45", "remaining_time": "4:54:54"} +{"current_steps": 1238, "total_steps": 1647, "loss": 0.2582, "lr": 1.7725496522704998e-06, "epoch": 0.7520121488230828, "percentage": 75.17, "elapsed_time": "14:50:28", "remaining_time": "4:54:11"} +{"current_steps": 1239, "total_steps": 1647, "loss": 0.2509, "lr": 1.7644615994571934e-06, "epoch": 0.752619589977221, "percentage": 75.23, "elapsed_time": "14:51:08", "remaining_time": "4:53:26"} +{"current_steps": 1240, "total_steps": 1647, "loss": 0.2444, "lr": 1.7563880861526656e-06, "epoch": 0.7532270311313591, "percentage": 75.29, "elapsed_time": "14:51:47", "remaining_time": "4:52:42"} +{"current_steps": 1241, "total_steps": 1647, "loss": 0.2236, "lr": 1.748329148636787e-06, "epoch": 0.7538344722854974, "percentage": 75.35, "elapsed_time": "14:52:29", "remaining_time": "4:51:58"} +{"current_steps": 1242, "total_steps": 1647, "loss": 0.2544, "lr": 1.7402848231239317e-06, "epoch": 0.7544419134396355, "percentage": 75.41, "elapsed_time": "14:53:12", "remaining_time": "4:51:15"} +{"current_steps": 1243, "total_steps": 1647, "loss": 0.2665, "lr": 1.73225514576281e-06, "epoch": 0.7550493545937738, "percentage": 75.47, "elapsed_time": "14:53:53", "remaining_time": "4:50:31"} +{"current_steps": 1244, "total_steps": 1647, "loss": 0.2745, "lr": 1.7242401526363095e-06, "epoch": 0.7556567957479119, "percentage": 75.53, "elapsed_time": "14:54:33", "remaining_time": "4:49:47"} +{"current_steps": 1245, "total_steps": 1647, "loss": 0.251, "lr": 1.7162398797613284e-06, "epoch": 0.7562642369020501, "percentage": 75.59, "elapsed_time": "14:55:17", "remaining_time": "4:49:04"} +{"current_steps": 1246, "total_steps": 1647, "loss": 0.2699, "lr": 1.70825436308862e-06, "epoch": 0.7568716780561883, "percentage": 75.65, "elapsed_time": "14:56:00", "remaining_time": "4:48:21"} +{"current_steps": 1247, "total_steps": 1647, "loss": 0.2429, "lr": 1.7002836385026234e-06, "epoch": 0.7574791192103265, "percentage": 75.71, "elapsed_time": "14:56:43", "remaining_time": "4:47:38"} +{"current_steps": 1248, "total_steps": 1647, "loss": 0.2733, "lr": 1.692327741821312e-06, "epoch": 0.7580865603644646, "percentage": 75.77, "elapsed_time": "14:57:28", "remaining_time": "4:46:56"} +{"current_steps": 1249, "total_steps": 1647, "loss": 0.2671, "lr": 1.6843867087960252e-06, "epoch": 0.7586940015186029, "percentage": 75.83, "elapsed_time": "14:58:12", "remaining_time": "4:46:13"} +{"current_steps": 1250, "total_steps": 1647, "loss": 0.2515, "lr": 1.676460575111306e-06, "epoch": 0.7593014426727411, "percentage": 75.9, "elapsed_time": "14:58:56", "remaining_time": "4:45:30"} +{"current_steps": 1251, "total_steps": 1647, "loss": 0.259, "lr": 1.6685493763847515e-06, "epoch": 0.7599088838268793, "percentage": 75.96, "elapsed_time": "14:59:41", "remaining_time": "4:44:47"} +{"current_steps": 1252, "total_steps": 1647, "loss": 0.2633, "lr": 1.6606531481668364e-06, "epoch": 0.7605163249810175, "percentage": 76.02, "elapsed_time": "15:00:21", "remaining_time": "4:44:03"} +{"current_steps": 1253, "total_steps": 1647, "loss": 0.249, "lr": 1.6527719259407743e-06, "epoch": 0.7611237661351556, "percentage": 76.08, "elapsed_time": "15:01:01", "remaining_time": "4:43:19"} +{"current_steps": 1254, "total_steps": 1647, "loss": 0.253, "lr": 1.6449057451223354e-06, "epoch": 0.7617312072892939, "percentage": 76.14, "elapsed_time": "15:01:42", "remaining_time": "4:42:35"} +{"current_steps": 1255, "total_steps": 1647, "loss": 0.2799, "lr": 1.6370546410597066e-06, "epoch": 0.762338648443432, "percentage": 76.2, "elapsed_time": "15:02:24", "remaining_time": "4:41:51"} +{"current_steps": 1256, "total_steps": 1647, "loss": 0.265, "lr": 1.6292186490333172e-06, "epoch": 0.7629460895975703, "percentage": 76.26, "elapsed_time": "15:03:07", "remaining_time": "4:41:08"} +{"current_steps": 1257, "total_steps": 1647, "loss": 0.2319, "lr": 1.6213978042556938e-06, "epoch": 0.7635535307517084, "percentage": 76.32, "elapsed_time": "15:03:48", "remaining_time": "4:40:24"} +{"current_steps": 1258, "total_steps": 1647, "loss": 0.2512, "lr": 1.6135921418712959e-06, "epoch": 0.7641609719058466, "percentage": 76.38, "elapsed_time": "15:04:30", "remaining_time": "4:39:41"} +{"current_steps": 1259, "total_steps": 1647, "loss": 0.2598, "lr": 1.6058016969563512e-06, "epoch": 0.7647684130599848, "percentage": 76.44, "elapsed_time": "15:05:15", "remaining_time": "4:38:59"} +{"current_steps": 1260, "total_steps": 1647, "loss": 0.2707, "lr": 1.5980265045187139e-06, "epoch": 0.765375854214123, "percentage": 76.5, "elapsed_time": "15:06:00", "remaining_time": "4:38:16"} +{"current_steps": 1261, "total_steps": 1647, "loss": 0.269, "lr": 1.5902665994976896e-06, "epoch": 0.7659832953682612, "percentage": 76.56, "elapsed_time": "15:06:41", "remaining_time": "4:37:32"} +{"current_steps": 1262, "total_steps": 1647, "loss": 0.2215, "lr": 1.5825220167638945e-06, "epoch": 0.7665907365223994, "percentage": 76.62, "elapsed_time": "15:07:25", "remaining_time": "4:36:49"} +{"current_steps": 1263, "total_steps": 1647, "loss": 0.2713, "lr": 1.5747927911190858e-06, "epoch": 0.7671981776765376, "percentage": 76.68, "elapsed_time": "15:08:04", "remaining_time": "4:36:05"} +{"current_steps": 1264, "total_steps": 1647, "loss": 0.266, "lr": 1.567078957296016e-06, "epoch": 0.7678056188306758, "percentage": 76.75, "elapsed_time": "15:08:44", "remaining_time": "4:35:21"} +{"current_steps": 1265, "total_steps": 1647, "loss": 0.2365, "lr": 1.5593805499582659e-06, "epoch": 0.768413059984814, "percentage": 76.81, "elapsed_time": "15:09:25", "remaining_time": "4:34:37"} +{"current_steps": 1266, "total_steps": 1647, "loss": 0.2188, "lr": 1.5516976037000941e-06, "epoch": 0.7690205011389522, "percentage": 76.87, "elapsed_time": "15:10:06", "remaining_time": "4:33:53"} +{"current_steps": 1267, "total_steps": 1647, "loss": 0.2567, "lr": 1.544030153046291e-06, "epoch": 0.7696279422930904, "percentage": 76.93, "elapsed_time": "15:10:47", "remaining_time": "4:33:09"} +{"current_steps": 1268, "total_steps": 1647, "loss": 0.2803, "lr": 1.5363782324520033e-06, "epoch": 0.7702353834472285, "percentage": 76.99, "elapsed_time": "15:11:29", "remaining_time": "4:32:26"} +{"current_steps": 1269, "total_steps": 1647, "loss": 0.2772, "lr": 1.528741876302598e-06, "epoch": 0.7708428246013668, "percentage": 77.05, "elapsed_time": "15:12:13", "remaining_time": "4:31:43"} +{"current_steps": 1270, "total_steps": 1647, "loss": 0.2478, "lr": 1.5211211189134955e-06, "epoch": 0.7714502657555049, "percentage": 77.11, "elapsed_time": "15:12:52", "remaining_time": "4:30:59"} +{"current_steps": 1271, "total_steps": 1647, "loss": 0.2401, "lr": 1.5135159945300232e-06, "epoch": 0.7720577069096431, "percentage": 77.17, "elapsed_time": "15:13:37", "remaining_time": "4:30:16"} +{"current_steps": 1272, "total_steps": 1647, "loss": 0.2617, "lr": 1.5059265373272574e-06, "epoch": 0.7726651480637813, "percentage": 77.23, "elapsed_time": "15:14:19", "remaining_time": "4:29:33"} +{"current_steps": 1273, "total_steps": 1647, "loss": 0.267, "lr": 1.4983527814098736e-06, "epoch": 0.7732725892179195, "percentage": 77.29, "elapsed_time": "15:15:00", "remaining_time": "4:28:49"} +{"current_steps": 1274, "total_steps": 1647, "loss": 0.2421, "lr": 1.4907947608119866e-06, "epoch": 0.7738800303720577, "percentage": 77.35, "elapsed_time": "15:15:43", "remaining_time": "4:28:06"} +{"current_steps": 1275, "total_steps": 1647, "loss": 0.2452, "lr": 1.4832525094970007e-06, "epoch": 0.7744874715261959, "percentage": 77.41, "elapsed_time": "15:16:24", "remaining_time": "4:27:22"} +{"current_steps": 1276, "total_steps": 1647, "loss": 0.2166, "lr": 1.475726061357463e-06, "epoch": 0.7750949126803341, "percentage": 77.47, "elapsed_time": "15:17:04", "remaining_time": "4:26:38"} +{"current_steps": 1277, "total_steps": 1647, "loss": 0.2415, "lr": 1.4682154502149025e-06, "epoch": 0.7757023538344723, "percentage": 77.53, "elapsed_time": "15:17:42", "remaining_time": "4:25:54"} +{"current_steps": 1278, "total_steps": 1647, "loss": 0.2569, "lr": 1.4607207098196851e-06, "epoch": 0.7763097949886105, "percentage": 77.6, "elapsed_time": "15:18:30", "remaining_time": "4:25:12"} +{"current_steps": 1279, "total_steps": 1647, "loss": 0.2648, "lr": 1.4532418738508525e-06, "epoch": 0.7769172361427487, "percentage": 77.66, "elapsed_time": "15:19:10", "remaining_time": "4:24:28"} +{"current_steps": 1280, "total_steps": 1647, "loss": 0.2018, "lr": 1.4457789759159813e-06, "epoch": 0.7775246772968869, "percentage": 77.72, "elapsed_time": "15:19:53", "remaining_time": "4:23:44"} +{"current_steps": 1281, "total_steps": 1647, "loss": 0.2616, "lr": 1.4383320495510267e-06, "epoch": 0.778132118451025, "percentage": 77.78, "elapsed_time": "15:20:37", "remaining_time": "4:23:02"} +{"current_steps": 1282, "total_steps": 1647, "loss": 0.2529, "lr": 1.430901128220174e-06, "epoch": 0.7787395596051633, "percentage": 77.84, "elapsed_time": "15:21:17", "remaining_time": "4:22:18"} +{"current_steps": 1283, "total_steps": 1647, "loss": 0.2756, "lr": 1.4234862453156839e-06, "epoch": 0.7793470007593014, "percentage": 77.9, "elapsed_time": "15:22:02", "remaining_time": "4:21:35"} +{"current_steps": 1284, "total_steps": 1647, "loss": 0.2484, "lr": 1.4160874341577447e-06, "epoch": 0.7799544419134397, "percentage": 77.96, "elapsed_time": "15:22:46", "remaining_time": "4:20:52"} +{"current_steps": 1285, "total_steps": 1647, "loss": 0.2687, "lr": 1.4087047279943267e-06, "epoch": 0.7805618830675778, "percentage": 78.02, "elapsed_time": "15:23:26", "remaining_time": "4:20:08"} +{"current_steps": 1286, "total_steps": 1647, "loss": 0.2563, "lr": 1.4013381600010278e-06, "epoch": 0.781169324221716, "percentage": 78.08, "elapsed_time": "15:24:11", "remaining_time": "4:19:26"} +{"current_steps": 1287, "total_steps": 1647, "loss": 0.2866, "lr": 1.3939877632809279e-06, "epoch": 0.7817767653758542, "percentage": 78.14, "elapsed_time": "15:24:52", "remaining_time": "4:18:42"} +{"current_steps": 1288, "total_steps": 1647, "loss": 0.2418, "lr": 1.3866535708644335e-06, "epoch": 0.7823842065299924, "percentage": 78.2, "elapsed_time": "15:25:38", "remaining_time": "4:17:59"} +{"current_steps": 1289, "total_steps": 1647, "loss": 0.2582, "lr": 1.3793356157091387e-06, "epoch": 0.7829916476841307, "percentage": 78.26, "elapsed_time": "15:26:20", "remaining_time": "4:17:16"} +{"current_steps": 1290, "total_steps": 1647, "loss": 0.2834, "lr": 1.3720339306996666e-06, "epoch": 0.7835990888382688, "percentage": 78.32, "elapsed_time": "15:27:03", "remaining_time": "4:16:33"} +{"current_steps": 1291, "total_steps": 1647, "loss": 0.2374, "lr": 1.3647485486475376e-06, "epoch": 0.784206529992407, "percentage": 78.38, "elapsed_time": "15:27:47", "remaining_time": "4:15:50"} +{"current_steps": 1292, "total_steps": 1647, "loss": 0.2531, "lr": 1.3574795022910014e-06, "epoch": 0.7848139711465452, "percentage": 78.45, "elapsed_time": "15:28:27", "remaining_time": "4:15:06"} +{"current_steps": 1293, "total_steps": 1647, "loss": 0.2575, "lr": 1.3502268242949025e-06, "epoch": 0.7854214123006834, "percentage": 78.51, "elapsed_time": "15:29:09", "remaining_time": "4:14:23"} +{"current_steps": 1294, "total_steps": 1647, "loss": 0.2383, "lr": 1.3429905472505344e-06, "epoch": 0.7860288534548215, "percentage": 78.57, "elapsed_time": "15:29:52", "remaining_time": "4:13:40"} +{"current_steps": 1295, "total_steps": 1647, "loss": 0.2585, "lr": 1.3357707036754875e-06, "epoch": 0.7866362946089598, "percentage": 78.63, "elapsed_time": "15:30:34", "remaining_time": "4:12:56"} +{"current_steps": 1296, "total_steps": 1647, "loss": 0.2452, "lr": 1.3285673260135073e-06, "epoch": 0.7872437357630979, "percentage": 78.69, "elapsed_time": "15:31:16", "remaining_time": "4:12:13"} +{"current_steps": 1297, "total_steps": 1647, "loss": 0.2514, "lr": 1.321380446634342e-06, "epoch": 0.7878511769172362, "percentage": 78.75, "elapsed_time": "15:31:59", "remaining_time": "4:11:30"} +{"current_steps": 1298, "total_steps": 1647, "loss": 0.2698, "lr": 1.314210097833607e-06, "epoch": 0.7884586180713743, "percentage": 78.81, "elapsed_time": "15:32:42", "remaining_time": "4:10:46"} +{"current_steps": 1299, "total_steps": 1647, "loss": 0.2623, "lr": 1.3070563118326295e-06, "epoch": 0.7890660592255125, "percentage": 78.87, "elapsed_time": "15:33:23", "remaining_time": "4:10:03"} +{"current_steps": 1300, "total_steps": 1647, "loss": 0.2227, "lr": 1.2999191207783129e-06, "epoch": 0.7896735003796507, "percentage": 78.93, "elapsed_time": "15:34:07", "remaining_time": "4:09:20"} +{"current_steps": 1301, "total_steps": 1647, "loss": 0.2386, "lr": 1.2927985567429868e-06, "epoch": 0.7902809415337889, "percentage": 78.99, "elapsed_time": "15:34:50", "remaining_time": "4:08:37"} +{"current_steps": 1302, "total_steps": 1647, "loss": 0.2299, "lr": 1.2856946517242608e-06, "epoch": 0.7908883826879272, "percentage": 79.05, "elapsed_time": "15:35:38", "remaining_time": "4:07:55"} +{"current_steps": 1303, "total_steps": 1647, "loss": 0.2536, "lr": 1.27860743764489e-06, "epoch": 0.7914958238420653, "percentage": 79.11, "elapsed_time": "15:36:19", "remaining_time": "4:07:11"} +{"current_steps": 1304, "total_steps": 1647, "loss": 0.2485, "lr": 1.2715369463526173e-06, "epoch": 0.7921032649962035, "percentage": 79.17, "elapsed_time": "15:37:02", "remaining_time": "4:06:28"} +{"current_steps": 1305, "total_steps": 1647, "loss": 0.241, "lr": 1.2644832096200498e-06, "epoch": 0.7927107061503417, "percentage": 79.23, "elapsed_time": "15:37:42", "remaining_time": "4:05:44"} +{"current_steps": 1306, "total_steps": 1647, "loss": 0.2236, "lr": 1.257446259144494e-06, "epoch": 0.7933181473044799, "percentage": 79.3, "elapsed_time": "15:38:30", "remaining_time": "4:05:02"} +{"current_steps": 1307, "total_steps": 1647, "loss": 0.2552, "lr": 1.2504261265478324e-06, "epoch": 0.793925588458618, "percentage": 79.36, "elapsed_time": "15:39:15", "remaining_time": "4:04:20"} +{"current_steps": 1308, "total_steps": 1647, "loss": 0.2469, "lr": 1.2434228433763657e-06, "epoch": 0.7945330296127563, "percentage": 79.42, "elapsed_time": "15:39:58", "remaining_time": "4:03:36"} +{"current_steps": 1309, "total_steps": 1647, "loss": 0.2437, "lr": 1.2364364411006841e-06, "epoch": 0.7951404707668944, "percentage": 79.48, "elapsed_time": "15:40:40", "remaining_time": "4:02:53"} +{"current_steps": 1310, "total_steps": 1647, "loss": 0.2327, "lr": 1.2294669511155193e-06, "epoch": 0.7957479119210327, "percentage": 79.54, "elapsed_time": "15:41:22", "remaining_time": "4:02:10"} +{"current_steps": 1311, "total_steps": 1647, "loss": 0.2627, "lr": 1.2225144047396015e-06, "epoch": 0.7963553530751708, "percentage": 79.6, "elapsed_time": "15:42:08", "remaining_time": "4:01:27"} +{"current_steps": 1312, "total_steps": 1647, "loss": 0.262, "lr": 1.215578833215526e-06, "epoch": 0.796962794229309, "percentage": 79.66, "elapsed_time": "15:42:49", "remaining_time": "4:00:44"} +{"current_steps": 1313, "total_steps": 1647, "loss": 0.2416, "lr": 1.2086602677096033e-06, "epoch": 0.7975702353834472, "percentage": 79.72, "elapsed_time": "15:43:33", "remaining_time": "4:00:01"} +{"current_steps": 1314, "total_steps": 1647, "loss": 0.2478, "lr": 1.201758739311728e-06, "epoch": 0.7981776765375854, "percentage": 79.78, "elapsed_time": "15:44:16", "remaining_time": "3:59:18"} +{"current_steps": 1315, "total_steps": 1647, "loss": 0.2663, "lr": 1.1948742790352342e-06, "epoch": 0.7987851176917237, "percentage": 79.84, "elapsed_time": "15:45:00", "remaining_time": "3:58:35"} +{"current_steps": 1316, "total_steps": 1647, "loss": 0.2271, "lr": 1.1880069178167586e-06, "epoch": 0.7993925588458618, "percentage": 79.9, "elapsed_time": "15:45:42", "remaining_time": "3:57:51"} +{"current_steps": 1317, "total_steps": 1647, "loss": 0.222, "lr": 1.1811566865160961e-06, "epoch": 0.8, "percentage": 79.96, "elapsed_time": "15:46:25", "remaining_time": "3:57:08"} +{"current_steps": 1318, "total_steps": 1647, "loss": 0.2592, "lr": 1.1743236159160654e-06, "epoch": 0.8006074411541382, "percentage": 80.02, "elapsed_time": "15:47:09", "remaining_time": "3:56:25"} +{"current_steps": 1319, "total_steps": 1647, "loss": 0.266, "lr": 1.167507736722377e-06, "epoch": 0.8012148823082764, "percentage": 80.09, "elapsed_time": "15:47:49", "remaining_time": "3:55:41"} +{"current_steps": 1320, "total_steps": 1647, "loss": 0.249, "lr": 1.1607090795634802e-06, "epoch": 0.8018223234624146, "percentage": 80.15, "elapsed_time": "15:48:31", "remaining_time": "3:54:58"} +{"current_steps": 1321, "total_steps": 1647, "loss": 0.273, "lr": 1.15392767499044e-06, "epoch": 0.8024297646165528, "percentage": 80.21, "elapsed_time": "15:49:14", "remaining_time": "3:54:15"} +{"current_steps": 1322, "total_steps": 1647, "loss": 0.2713, "lr": 1.1471635534767877e-06, "epoch": 0.8030372057706909, "percentage": 80.27, "elapsed_time": "15:49:56", "remaining_time": "3:53:31"} +{"current_steps": 1323, "total_steps": 1647, "loss": 0.2509, "lr": 1.1404167454183957e-06, "epoch": 0.8036446469248292, "percentage": 80.33, "elapsed_time": "15:50:37", "remaining_time": "3:52:48"} +{"current_steps": 1324, "total_steps": 1647, "loss": 0.2414, "lr": 1.133687281133331e-06, "epoch": 0.8042520880789673, "percentage": 80.39, "elapsed_time": "15:51:20", "remaining_time": "3:52:05"} +{"current_steps": 1325, "total_steps": 1647, "loss": 0.2382, "lr": 1.1269751908617277e-06, "epoch": 0.8048595292331056, "percentage": 80.45, "elapsed_time": "15:52:03", "remaining_time": "3:51:22"} +{"current_steps": 1326, "total_steps": 1647, "loss": 0.2336, "lr": 1.1202805047656406e-06, "epoch": 0.8054669703872437, "percentage": 80.51, "elapsed_time": "15:52:44", "remaining_time": "3:50:38"} +{"current_steps": 1327, "total_steps": 1647, "loss": 0.2576, "lr": 1.113603252928917e-06, "epoch": 0.8060744115413819, "percentage": 80.57, "elapsed_time": "15:53:27", "remaining_time": "3:49:55"} +{"current_steps": 1328, "total_steps": 1647, "loss": 0.2703, "lr": 1.1069434653570633e-06, "epoch": 0.8066818526955202, "percentage": 80.63, "elapsed_time": "15:54:11", "remaining_time": "3:49:12"} +{"current_steps": 1329, "total_steps": 1647, "loss": 0.251, "lr": 1.1003011719771046e-06, "epoch": 0.8072892938496583, "percentage": 80.69, "elapsed_time": "15:54:55", "remaining_time": "3:48:29"} +{"current_steps": 1330, "total_steps": 1647, "loss": 0.2523, "lr": 1.0936764026374547e-06, "epoch": 0.8078967350037966, "percentage": 80.75, "elapsed_time": "15:55:40", "remaining_time": "3:47:46"} +{"current_steps": 1331, "total_steps": 1647, "loss": 0.2573, "lr": 1.0870691871077738e-06, "epoch": 0.8085041761579347, "percentage": 80.81, "elapsed_time": "15:56:23", "remaining_time": "3:47:03"} +{"current_steps": 1332, "total_steps": 1647, "loss": 0.2727, "lr": 1.0804795550788473e-06, "epoch": 0.8091116173120729, "percentage": 80.87, "elapsed_time": "15:57:05", "remaining_time": "3:46:20"} +{"current_steps": 1333, "total_steps": 1647, "loss": 0.2499, "lr": 1.073907536162443e-06, "epoch": 0.8097190584662111, "percentage": 80.94, "elapsed_time": "15:57:49", "remaining_time": "3:45:37"} +{"current_steps": 1334, "total_steps": 1647, "loss": 0.2077, "lr": 1.0673531598911824e-06, "epoch": 0.8103264996203493, "percentage": 81.0, "elapsed_time": "15:58:29", "remaining_time": "3:44:53"} +{"current_steps": 1335, "total_steps": 1647, "loss": 0.2733, "lr": 1.0608164557184042e-06, "epoch": 0.8109339407744874, "percentage": 81.06, "elapsed_time": "15:59:15", "remaining_time": "3:44:11"} +{"current_steps": 1336, "total_steps": 1647, "loss": 0.2712, "lr": 1.0542974530180327e-06, "epoch": 0.8115413819286257, "percentage": 81.12, "elapsed_time": "15:59:58", "remaining_time": "3:43:27"} +{"current_steps": 1337, "total_steps": 1647, "loss": 0.281, "lr": 1.0477961810844517e-06, "epoch": 0.8121488230827638, "percentage": 81.18, "elapsed_time": "16:00:40", "remaining_time": "3:42:44"} +{"current_steps": 1338, "total_steps": 1647, "loss": 0.2521, "lr": 1.0413126691323667e-06, "epoch": 0.8127562642369021, "percentage": 81.24, "elapsed_time": "16:01:25", "remaining_time": "3:42:02"} +{"current_steps": 1339, "total_steps": 1647, "loss": 0.2869, "lr": 1.0348469462966753e-06, "epoch": 0.8133637053910402, "percentage": 81.3, "elapsed_time": "16:02:08", "remaining_time": "3:41:18"} +{"current_steps": 1340, "total_steps": 1647, "loss": 0.2747, "lr": 1.0283990416323336e-06, "epoch": 0.8139711465451784, "percentage": 81.36, "elapsed_time": "16:02:52", "remaining_time": "3:40:35"} +{"current_steps": 1341, "total_steps": 1647, "loss": 0.2071, "lr": 1.0219689841142343e-06, "epoch": 0.8145785876993167, "percentage": 81.42, "elapsed_time": "16:03:34", "remaining_time": "3:39:52"} +{"current_steps": 1342, "total_steps": 1647, "loss": 0.2345, "lr": 1.0155568026370637e-06, "epoch": 0.8151860288534548, "percentage": 81.48, "elapsed_time": "16:04:19", "remaining_time": "3:39:09"} +{"current_steps": 1343, "total_steps": 1647, "loss": 0.2435, "lr": 1.0091625260151827e-06, "epoch": 0.8157934700075931, "percentage": 81.54, "elapsed_time": "16:05:06", "remaining_time": "3:38:27"} +{"current_steps": 1344, "total_steps": 1647, "loss": 0.287, "lr": 1.0027861829824953e-06, "epoch": 0.8164009111617312, "percentage": 81.6, "elapsed_time": "16:05:47", "remaining_time": "3:37:43"} +{"current_steps": 1345, "total_steps": 1647, "loss": 0.2605, "lr": 9.964278021923107e-07, "epoch": 0.8170083523158694, "percentage": 81.66, "elapsed_time": "16:06:29", "remaining_time": "3:37:00"} +{"current_steps": 1346, "total_steps": 1647, "loss": 0.2647, "lr": 9.900874122172294e-07, "epoch": 0.8176157934700076, "percentage": 81.72, "elapsed_time": "16:07:09", "remaining_time": "3:36:16"} +{"current_steps": 1347, "total_steps": 1647, "loss": 0.2707, "lr": 9.83765041548998e-07, "epoch": 0.8182232346241458, "percentage": 81.79, "elapsed_time": "16:07:54", "remaining_time": "3:35:34"} +{"current_steps": 1348, "total_steps": 1647, "loss": 0.2515, "lr": 9.774607185984004e-07, "epoch": 0.818830675778284, "percentage": 81.85, "elapsed_time": "16:08:37", "remaining_time": "3:34:50"} +{"current_steps": 1349, "total_steps": 1647, "loss": 0.241, "lr": 9.711744716951093e-07, "epoch": 0.8194381169324222, "percentage": 81.91, "elapsed_time": "16:09:17", "remaining_time": "3:34:07"} +{"current_steps": 1350, "total_steps": 1647, "loss": 0.2197, "lr": 9.649063290875771e-07, "epoch": 0.8200455580865603, "percentage": 81.97, "elapsed_time": "16:09:59", "remaining_time": "3:33:23"} +{"current_steps": 1351, "total_steps": 1647, "loss": 0.2367, "lr": 9.586563189428954e-07, "epoch": 0.8206529992406986, "percentage": 82.03, "elapsed_time": "16:10:39", "remaining_time": "3:32:40"} +{"current_steps": 1352, "total_steps": 1647, "loss": 0.2391, "lr": 9.524244693466773e-07, "epoch": 0.8212604403948367, "percentage": 82.09, "elapsed_time": "16:11:21", "remaining_time": "3:31:56"} +{"current_steps": 1353, "total_steps": 1647, "loss": 0.247, "lr": 9.462108083029287e-07, "epoch": 0.8218678815489749, "percentage": 82.15, "elapsed_time": "16:12:06", "remaining_time": "3:31:14"} +{"current_steps": 1354, "total_steps": 1647, "loss": 0.2908, "lr": 9.400153637339182e-07, "epoch": 0.8224753227031132, "percentage": 82.21, "elapsed_time": "16:12:47", "remaining_time": "3:30:30"} +{"current_steps": 1355, "total_steps": 1647, "loss": 0.2264, "lr": 9.338381634800597e-07, "epoch": 0.8230827638572513, "percentage": 82.27, "elapsed_time": "16:13:32", "remaining_time": "3:29:47"} +{"current_steps": 1356, "total_steps": 1647, "loss": 0.2334, "lr": 9.276792352997782e-07, "epoch": 0.8236902050113896, "percentage": 82.33, "elapsed_time": "16:14:12", "remaining_time": "3:29:04"} +{"current_steps": 1357, "total_steps": 1647, "loss": 0.2554, "lr": 9.215386068693927e-07, "epoch": 0.8242976461655277, "percentage": 82.39, "elapsed_time": "16:14:57", "remaining_time": "3:28:21"} +{"current_steps": 1358, "total_steps": 1647, "loss": 0.2328, "lr": 9.154163057829879e-07, "epoch": 0.8249050873196659, "percentage": 82.45, "elapsed_time": "16:15:41", "remaining_time": "3:27:38"} +{"current_steps": 1359, "total_steps": 1647, "loss": 0.2641, "lr": 9.093123595522929e-07, "epoch": 0.8255125284738041, "percentage": 82.51, "elapsed_time": "16:16:23", "remaining_time": "3:26:54"} +{"current_steps": 1360, "total_steps": 1647, "loss": 0.2168, "lr": 9.032267956065516e-07, "epoch": 0.8261199696279423, "percentage": 82.57, "elapsed_time": "16:17:07", "remaining_time": "3:26:12"} +{"current_steps": 1361, "total_steps": 1647, "loss": 0.2665, "lr": 8.971596412924067e-07, "epoch": 0.8267274107820805, "percentage": 82.64, "elapsed_time": "16:17:47", "remaining_time": "3:25:28"} +{"current_steps": 1362, "total_steps": 1647, "loss": 0.2654, "lr": 8.911109238737748e-07, "epoch": 0.8273348519362187, "percentage": 82.7, "elapsed_time": "16:18:30", "remaining_time": "3:24:45"} +{"current_steps": 1363, "total_steps": 1647, "loss": 0.2572, "lr": 8.850806705317183e-07, "epoch": 0.8279422930903568, "percentage": 82.76, "elapsed_time": "16:19:12", "remaining_time": "3:24:01"} +{"current_steps": 1364, "total_steps": 1647, "loss": 0.2762, "lr": 8.790689083643328e-07, "epoch": 0.8285497342444951, "percentage": 82.82, "elapsed_time": "16:19:55", "remaining_time": "3:23:18"} +{"current_steps": 1365, "total_steps": 1647, "loss": 0.2728, "lr": 8.730756643866157e-07, "epoch": 0.8291571753986332, "percentage": 82.88, "elapsed_time": "16:20:41", "remaining_time": "3:22:36"} +{"current_steps": 1366, "total_steps": 1647, "loss": 0.2876, "lr": 8.671009655303531e-07, "epoch": 0.8297646165527715, "percentage": 82.94, "elapsed_time": "16:21:25", "remaining_time": "3:21:53"} +{"current_steps": 1367, "total_steps": 1647, "loss": 0.2582, "lr": 8.611448386439936e-07, "epoch": 0.8303720577069097, "percentage": 83.0, "elapsed_time": "16:22:05", "remaining_time": "3:21:09"} +{"current_steps": 1368, "total_steps": 1647, "loss": 0.2428, "lr": 8.552073104925296e-07, "epoch": 0.8309794988610478, "percentage": 83.06, "elapsed_time": "16:22:49", "remaining_time": "3:20:26"} +{"current_steps": 1369, "total_steps": 1647, "loss": 0.2568, "lr": 8.492884077573749e-07, "epoch": 0.8315869400151861, "percentage": 83.12, "elapsed_time": "16:23:34", "remaining_time": "3:19:43"} +{"current_steps": 1370, "total_steps": 1647, "loss": 0.2176, "lr": 8.433881570362484e-07, "epoch": 0.8321943811693242, "percentage": 83.18, "elapsed_time": "16:24:21", "remaining_time": "3:19:01"} +{"current_steps": 1371, "total_steps": 1647, "loss": 0.274, "lr": 8.375065848430508e-07, "epoch": 0.8328018223234624, "percentage": 83.24, "elapsed_time": "16:25:10", "remaining_time": "3:18:19"} +{"current_steps": 1372, "total_steps": 1647, "loss": 0.2649, "lr": 8.316437176077491e-07, "epoch": 0.8334092634776006, "percentage": 83.3, "elapsed_time": "16:25:54", "remaining_time": "3:17:36"} +{"current_steps": 1373, "total_steps": 1647, "loss": 0.238, "lr": 8.257995816762559e-07, "epoch": 0.8340167046317388, "percentage": 83.36, "elapsed_time": "16:26:41", "remaining_time": "3:16:54"} +{"current_steps": 1374, "total_steps": 1647, "loss": 0.22, "lr": 8.199742033103091e-07, "epoch": 0.834624145785877, "percentage": 83.42, "elapsed_time": "16:27:28", "remaining_time": "3:16:12"} +{"current_steps": 1375, "total_steps": 1647, "loss": 0.2523, "lr": 8.141676086873574e-07, "epoch": 0.8352315869400152, "percentage": 83.49, "elapsed_time": "16:28:08", "remaining_time": "3:15:28"} +{"current_steps": 1376, "total_steps": 1647, "loss": 0.2601, "lr": 8.083798239004408e-07, "epoch": 0.8358390280941533, "percentage": 83.55, "elapsed_time": "16:28:52", "remaining_time": "3:14:45"} +{"current_steps": 1377, "total_steps": 1647, "loss": 0.2538, "lr": 8.026108749580758e-07, "epoch": 0.8364464692482916, "percentage": 83.61, "elapsed_time": "16:29:34", "remaining_time": "3:14:02"} +{"current_steps": 1378, "total_steps": 1647, "loss": 0.2844, "lr": 7.968607877841333e-07, "epoch": 0.8370539104024297, "percentage": 83.67, "elapsed_time": "16:30:17", "remaining_time": "3:13:18"} +{"current_steps": 1379, "total_steps": 1647, "loss": 0.236, "lr": 7.911295882177256e-07, "epoch": 0.837661351556568, "percentage": 83.73, "elapsed_time": "16:31:04", "remaining_time": "3:12:36"} +{"current_steps": 1380, "total_steps": 1647, "loss": 0.2403, "lr": 7.854173020130906e-07, "epoch": 0.8382687927107062, "percentage": 83.79, "elapsed_time": "16:31:45", "remaining_time": "3:11:53"} +{"current_steps": 1381, "total_steps": 1647, "loss": 0.2287, "lr": 7.79723954839477e-07, "epoch": 0.8388762338648443, "percentage": 83.85, "elapsed_time": "16:32:24", "remaining_time": "3:11:09"} +{"current_steps": 1382, "total_steps": 1647, "loss": 0.2435, "lr": 7.740495722810271e-07, "epoch": 0.8394836750189826, "percentage": 83.91, "elapsed_time": "16:33:06", "remaining_time": "3:10:25"} +{"current_steps": 1383, "total_steps": 1647, "loss": 0.2906, "lr": 7.683941798366578e-07, "epoch": 0.8400911161731207, "percentage": 83.97, "elapsed_time": "16:33:52", "remaining_time": "3:09:43"} +{"current_steps": 1384, "total_steps": 1647, "loss": 0.2498, "lr": 7.627578029199562e-07, "epoch": 0.840698557327259, "percentage": 84.03, "elapsed_time": "16:34:41", "remaining_time": "3:09:01"} +{"current_steps": 1385, "total_steps": 1647, "loss": 0.2742, "lr": 7.571404668590532e-07, "epoch": 0.8413059984813971, "percentage": 84.09, "elapsed_time": "16:35:26", "remaining_time": "3:08:18"} +{"current_steps": 1386, "total_steps": 1647, "loss": 0.2285, "lr": 7.515421968965242e-07, "epoch": 0.8419134396355353, "percentage": 84.15, "elapsed_time": "16:36:12", "remaining_time": "3:07:35"} +{"current_steps": 1387, "total_steps": 1647, "loss": 0.262, "lr": 7.459630181892608e-07, "epoch": 0.8425208807896735, "percentage": 84.21, "elapsed_time": "16:36:54", "remaining_time": "3:06:52"} +{"current_steps": 1388, "total_steps": 1647, "loss": 0.2675, "lr": 7.404029558083653e-07, "epoch": 0.8431283219438117, "percentage": 84.27, "elapsed_time": "16:37:33", "remaining_time": "3:06:08"} +{"current_steps": 1389, "total_steps": 1647, "loss": 0.2855, "lr": 7.348620347390384e-07, "epoch": 0.8437357630979498, "percentage": 84.34, "elapsed_time": "16:38:18", "remaining_time": "3:05:25"} +{"current_steps": 1390, "total_steps": 1647, "loss": 0.2345, "lr": 7.293402798804667e-07, "epoch": 0.8443432042520881, "percentage": 84.4, "elapsed_time": "16:39:02", "remaining_time": "3:04:42"} +{"current_steps": 1391, "total_steps": 1647, "loss": 0.2645, "lr": 7.238377160457094e-07, "epoch": 0.8449506454062262, "percentage": 84.46, "elapsed_time": "16:39:43", "remaining_time": "3:03:59"} +{"current_steps": 1392, "total_steps": 1647, "loss": 0.2626, "lr": 7.183543679615834e-07, "epoch": 0.8455580865603645, "percentage": 84.52, "elapsed_time": "16:40:24", "remaining_time": "3:03:15"} +{"current_steps": 1393, "total_steps": 1647, "loss": 0.2709, "lr": 7.128902602685617e-07, "epoch": 0.8461655277145027, "percentage": 84.58, "elapsed_time": "16:41:04", "remaining_time": "3:02:32"} +{"current_steps": 1394, "total_steps": 1647, "loss": 0.2464, "lr": 7.074454175206524e-07, "epoch": 0.8467729688686408, "percentage": 84.64, "elapsed_time": "16:41:46", "remaining_time": "3:01:48"} +{"current_steps": 1395, "total_steps": 1647, "loss": 0.2447, "lr": 7.020198641852949e-07, "epoch": 0.8473804100227791, "percentage": 84.7, "elapsed_time": "16:42:27", "remaining_time": "3:01:05"} +{"current_steps": 1396, "total_steps": 1647, "loss": 0.2647, "lr": 6.966136246432492e-07, "epoch": 0.8479878511769172, "percentage": 84.76, "elapsed_time": "16:43:08", "remaining_time": "3:00:21"} +{"current_steps": 1397, "total_steps": 1647, "loss": 0.266, "lr": 6.912267231884817e-07, "epoch": 0.8485952923310555, "percentage": 84.82, "elapsed_time": "16:43:50", "remaining_time": "2:59:38"} +{"current_steps": 1398, "total_steps": 1647, "loss": 0.2891, "lr": 6.858591840280627e-07, "epoch": 0.8492027334851936, "percentage": 84.88, "elapsed_time": "16:44:32", "remaining_time": "2:58:55"} +{"current_steps": 1399, "total_steps": 1647, "loss": 0.2545, "lr": 6.805110312820501e-07, "epoch": 0.8498101746393318, "percentage": 84.94, "elapsed_time": "16:45:17", "remaining_time": "2:58:12"} +{"current_steps": 1400, "total_steps": 1647, "loss": 0.2522, "lr": 6.751822889833926e-07, "epoch": 0.85041761579347, "percentage": 85.0, "elapsed_time": "16:45:59", "remaining_time": "2:57:29"} +{"current_steps": 1401, "total_steps": 1647, "loss": 0.2411, "lr": 6.698729810778065e-07, "epoch": 0.8510250569476082, "percentage": 85.06, "elapsed_time": "16:46:41", "remaining_time": "2:56:45"} +{"current_steps": 1402, "total_steps": 1647, "loss": 0.2484, "lr": 6.645831314236817e-07, "epoch": 0.8516324981017463, "percentage": 85.12, "elapsed_time": "16:47:26", "remaining_time": "2:56:03"} +{"current_steps": 1403, "total_steps": 1647, "loss": 0.2852, "lr": 6.593127637919633e-07, "epoch": 0.8522399392558846, "percentage": 85.19, "elapsed_time": "16:48:13", "remaining_time": "2:55:20"} +{"current_steps": 1404, "total_steps": 1647, "loss": 0.2512, "lr": 6.540619018660555e-07, "epoch": 0.8528473804100227, "percentage": 85.25, "elapsed_time": "16:48:58", "remaining_time": "2:54:37"} +{"current_steps": 1405, "total_steps": 1647, "loss": 0.2538, "lr": 6.488305692417074e-07, "epoch": 0.853454821564161, "percentage": 85.31, "elapsed_time": "16:49:39", "remaining_time": "2:53:54"} +{"current_steps": 1406, "total_steps": 1647, "loss": 0.2412, "lr": 6.436187894269086e-07, "epoch": 0.8540622627182992, "percentage": 85.37, "elapsed_time": "16:50:23", "remaining_time": "2:53:11"} +{"current_steps": 1407, "total_steps": 1647, "loss": 0.2618, "lr": 6.384265858417877e-07, "epoch": 0.8546697038724373, "percentage": 85.43, "elapsed_time": "16:51:05", "remaining_time": "2:52:28"} +{"current_steps": 1408, "total_steps": 1647, "loss": 0.2363, "lr": 6.332539818184985e-07, "epoch": 0.8552771450265756, "percentage": 85.49, "elapsed_time": "16:51:47", "remaining_time": "2:51:44"} +{"current_steps": 1409, "total_steps": 1647, "loss": 0.2511, "lr": 6.281010006011256e-07, "epoch": 0.8558845861807137, "percentage": 85.55, "elapsed_time": "16:52:28", "remaining_time": "2:51:01"} +{"current_steps": 1410, "total_steps": 1647, "loss": 0.2861, "lr": 6.229676653455719e-07, "epoch": 0.856492027334852, "percentage": 85.61, "elapsed_time": "16:53:11", "remaining_time": "2:50:18"} +{"current_steps": 1411, "total_steps": 1647, "loss": 0.2562, "lr": 6.178539991194599e-07, "epoch": 0.8570994684889901, "percentage": 85.67, "elapsed_time": "16:53:56", "remaining_time": "2:49:35"} +{"current_steps": 1412, "total_steps": 1647, "loss": 0.2394, "lr": 6.127600249020216e-07, "epoch": 0.8577069096431283, "percentage": 85.73, "elapsed_time": "16:54:39", "remaining_time": "2:48:52"} +{"current_steps": 1413, "total_steps": 1647, "loss": 0.2844, "lr": 6.076857655840024e-07, "epoch": 0.8583143507972665, "percentage": 85.79, "elapsed_time": "16:55:22", "remaining_time": "2:48:09"} +{"current_steps": 1414, "total_steps": 1647, "loss": 0.2221, "lr": 6.026312439675553e-07, "epoch": 0.8589217919514047, "percentage": 85.85, "elapsed_time": "16:56:05", "remaining_time": "2:47:25"} +{"current_steps": 1415, "total_steps": 1647, "loss": 0.2839, "lr": 5.975964827661346e-07, "epoch": 0.8595292331055429, "percentage": 85.91, "elapsed_time": "16:56:49", "remaining_time": "2:46:42"} +{"current_steps": 1416, "total_steps": 1647, "loss": 0.283, "lr": 5.925815046044026e-07, "epoch": 0.8601366742596811, "percentage": 85.97, "elapsed_time": "16:57:36", "remaining_time": "2:46:00"} +{"current_steps": 1417, "total_steps": 1647, "loss": 0.2386, "lr": 5.875863320181175e-07, "epoch": 0.8607441154138192, "percentage": 86.04, "elapsed_time": "16:58:25", "remaining_time": "2:45:18"} +{"current_steps": 1418, "total_steps": 1647, "loss": 0.2672, "lr": 5.826109874540409e-07, "epoch": 0.8613515565679575, "percentage": 86.1, "elapsed_time": "16:59:07", "remaining_time": "2:44:35"} +{"current_steps": 1419, "total_steps": 1647, "loss": 0.2645, "lr": 5.776554932698325e-07, "epoch": 0.8619589977220957, "percentage": 86.16, "elapsed_time": "16:59:52", "remaining_time": "2:43:52"} +{"current_steps": 1420, "total_steps": 1647, "loss": 0.2326, "lr": 5.727198717339511e-07, "epoch": 0.8625664388762339, "percentage": 86.22, "elapsed_time": "17:00:32", "remaining_time": "2:43:08"} +{"current_steps": 1421, "total_steps": 1647, "loss": 0.2629, "lr": 5.678041450255512e-07, "epoch": 0.8631738800303721, "percentage": 86.28, "elapsed_time": "17:01:17", "remaining_time": "2:42:25"} +{"current_steps": 1422, "total_steps": 1647, "loss": 0.2641, "lr": 5.6290833523439e-07, "epoch": 0.8637813211845102, "percentage": 86.34, "elapsed_time": "17:02:00", "remaining_time": "2:41:42"} +{"current_steps": 1423, "total_steps": 1647, "loss": 0.2803, "lr": 5.58032464360721e-07, "epoch": 0.8643887623386485, "percentage": 86.4, "elapsed_time": "17:02:43", "remaining_time": "2:40:59"} +{"current_steps": 1424, "total_steps": 1647, "loss": 0.2356, "lr": 5.531765543152002e-07, "epoch": 0.8649962034927866, "percentage": 86.46, "elapsed_time": "17:03:24", "remaining_time": "2:40:16"} +{"current_steps": 1425, "total_steps": 1647, "loss": 0.2189, "lr": 5.483406269187869e-07, "epoch": 0.8656036446469249, "percentage": 86.52, "elapsed_time": "17:04:07", "remaining_time": "2:39:32"} +{"current_steps": 1426, "total_steps": 1647, "loss": 0.2094, "lr": 5.435247039026398e-07, "epoch": 0.866211085801063, "percentage": 86.58, "elapsed_time": "17:04:50", "remaining_time": "2:38:49"} +{"current_steps": 1427, "total_steps": 1647, "loss": 0.231, "lr": 5.387288069080298e-07, "epoch": 0.8668185269552012, "percentage": 86.64, "elapsed_time": "17:05:33", "remaining_time": "2:38:06"} +{"current_steps": 1428, "total_steps": 1647, "loss": 0.2333, "lr": 5.33952957486234e-07, "epoch": 0.8674259681093394, "percentage": 86.7, "elapsed_time": "17:06:15", "remaining_time": "2:37:23"} +{"current_steps": 1429, "total_steps": 1647, "loss": 0.2958, "lr": 5.291971770984428e-07, "epoch": 0.8680334092634776, "percentage": 86.76, "elapsed_time": "17:07:03", "remaining_time": "2:36:40"} +{"current_steps": 1430, "total_steps": 1647, "loss": 0.2405, "lr": 5.244614871156612e-07, "epoch": 0.8686408504176157, "percentage": 86.82, "elapsed_time": "17:07:50", "remaining_time": "2:35:58"} +{"current_steps": 1431, "total_steps": 1647, "loss": 0.221, "lr": 5.197459088186163e-07, "epoch": 0.869248291571754, "percentage": 86.89, "elapsed_time": "17:08:35", "remaining_time": "2:35:15"} +{"current_steps": 1432, "total_steps": 1647, "loss": 0.2859, "lr": 5.150504633976572e-07, "epoch": 0.8698557327258922, "percentage": 86.95, "elapsed_time": "17:09:18", "remaining_time": "2:34:32"} +{"current_steps": 1433, "total_steps": 1647, "loss": 0.2239, "lr": 5.103751719526639e-07, "epoch": 0.8704631738800304, "percentage": 87.01, "elapsed_time": "17:10:02", "remaining_time": "2:33:49"} +{"current_steps": 1434, "total_steps": 1647, "loss": 0.2574, "lr": 5.057200554929509e-07, "epoch": 0.8710706150341686, "percentage": 87.07, "elapsed_time": "17:10:45", "remaining_time": "2:33:06"} +{"current_steps": 1435, "total_steps": 1647, "loss": 0.2639, "lr": 5.010851349371704e-07, "epoch": 0.8716780561883067, "percentage": 87.13, "elapsed_time": "17:11:30", "remaining_time": "2:32:23"} +{"current_steps": 1436, "total_steps": 1647, "loss": 0.2488, "lr": 4.964704311132224e-07, "epoch": 0.872285497342445, "percentage": 87.19, "elapsed_time": "17:12:10", "remaining_time": "2:31:39"} +{"current_steps": 1437, "total_steps": 1647, "loss": 0.2581, "lr": 4.918759647581578e-07, "epoch": 0.8728929384965831, "percentage": 87.25, "elapsed_time": "17:12:53", "remaining_time": "2:30:56"} +{"current_steps": 1438, "total_steps": 1647, "loss": 0.2578, "lr": 4.873017565180871e-07, "epoch": 0.8735003796507214, "percentage": 87.31, "elapsed_time": "17:13:37", "remaining_time": "2:30:13"} +{"current_steps": 1439, "total_steps": 1647, "loss": 0.2405, "lr": 4.827478269480895e-07, "epoch": 0.8741078208048595, "percentage": 87.37, "elapsed_time": "17:14:18", "remaining_time": "2:29:30"} +{"current_steps": 1440, "total_steps": 1647, "loss": 0.2701, "lr": 4.782141965121129e-07, "epoch": 0.8747152619589977, "percentage": 87.43, "elapsed_time": "17:15:06", "remaining_time": "2:28:47"} +{"current_steps": 1441, "total_steps": 1647, "loss": 0.2886, "lr": 4.7370088558289175e-07, "epoch": 0.8753227031131359, "percentage": 87.49, "elapsed_time": "17:15:46", "remaining_time": "2:28:04"} +{"current_steps": 1442, "total_steps": 1647, "loss": 0.2471, "lr": 4.6920791444184934e-07, "epoch": 0.8759301442672741, "percentage": 87.55, "elapsed_time": "17:16:27", "remaining_time": "2:27:20"} +{"current_steps": 1443, "total_steps": 1647, "loss": 0.2414, "lr": 4.647353032790086e-07, "epoch": 0.8765375854214122, "percentage": 87.61, "elapsed_time": "17:17:07", "remaining_time": "2:26:37"} +{"current_steps": 1444, "total_steps": 1647, "loss": 0.2079, "lr": 4.602830721928997e-07, "epoch": 0.8771450265755505, "percentage": 87.67, "elapsed_time": "17:17:49", "remaining_time": "2:25:53"} +{"current_steps": 1445, "total_steps": 1647, "loss": 0.2949, "lr": 4.558512411904731e-07, "epoch": 0.8777524677296887, "percentage": 87.74, "elapsed_time": "17:18:33", "remaining_time": "2:25:10"} +{"current_steps": 1446, "total_steps": 1647, "loss": 0.249, "lr": 4.5143983018700485e-07, "epoch": 0.8783599088838269, "percentage": 87.8, "elapsed_time": "17:19:13", "remaining_time": "2:24:27"} +{"current_steps": 1447, "total_steps": 1647, "loss": 0.2422, "lr": 4.4704885900601236e-07, "epoch": 0.8789673500379651, "percentage": 87.86, "elapsed_time": "17:19:54", "remaining_time": "2:23:44"} +{"current_steps": 1448, "total_steps": 1647, "loss": 0.2516, "lr": 4.4267834737916295e-07, "epoch": 0.8795747911921032, "percentage": 87.92, "elapsed_time": "17:20:37", "remaining_time": "2:23:00"} +{"current_steps": 1449, "total_steps": 1647, "loss": 0.2585, "lr": 4.3832831494618255e-07, "epoch": 0.8801822323462415, "percentage": 87.98, "elapsed_time": "17:21:17", "remaining_time": "2:22:17"} +{"current_steps": 1450, "total_steps": 1647, "loss": 0.2508, "lr": 4.33998781254773e-07, "epoch": 0.8807896735003796, "percentage": 88.04, "elapsed_time": "17:22:02", "remaining_time": "2:21:34"} +{"current_steps": 1451, "total_steps": 1647, "loss": 0.2848, "lr": 4.2968976576051703e-07, "epoch": 0.8813971146545179, "percentage": 88.1, "elapsed_time": "17:22:48", "remaining_time": "2:20:51"} +{"current_steps": 1452, "total_steps": 1647, "loss": 0.2185, "lr": 4.2540128782679934e-07, "epoch": 0.882004555808656, "percentage": 88.16, "elapsed_time": "17:23:34", "remaining_time": "2:20:08"} +{"current_steps": 1453, "total_steps": 1647, "loss": 0.2464, "lr": 4.211333667247125e-07, "epoch": 0.8826119969627942, "percentage": 88.22, "elapsed_time": "17:24:16", "remaining_time": "2:19:25"} +{"current_steps": 1454, "total_steps": 1647, "loss": 0.2666, "lr": 4.1688602163297564e-07, "epoch": 0.8832194381169324, "percentage": 88.28, "elapsed_time": "17:24:59", "remaining_time": "2:18:42"} +{"current_steps": 1455, "total_steps": 1647, "loss": 0.2296, "lr": 4.126592716378408e-07, "epoch": 0.8838268792710706, "percentage": 88.34, "elapsed_time": "17:25:42", "remaining_time": "2:17:59"} +{"current_steps": 1456, "total_steps": 1647, "loss": 0.2682, "lr": 4.0845313573301736e-07, "epoch": 0.8844343204252088, "percentage": 88.4, "elapsed_time": "17:26:24", "remaining_time": "2:17:16"} +{"current_steps": 1457, "total_steps": 1647, "loss": 0.2643, "lr": 4.042676328195788e-07, "epoch": 0.885041761579347, "percentage": 88.46, "elapsed_time": "17:27:07", "remaining_time": "2:16:32"} +{"current_steps": 1458, "total_steps": 1647, "loss": 0.238, "lr": 4.001027817058789e-07, "epoch": 0.8856492027334852, "percentage": 88.52, "elapsed_time": "17:27:48", "remaining_time": "2:15:49"} +{"current_steps": 1459, "total_steps": 1647, "loss": 0.2155, "lr": 3.959586011074729e-07, "epoch": 0.8862566438876234, "percentage": 88.59, "elapsed_time": "17:28:32", "remaining_time": "2:15:06"} +{"current_steps": 1460, "total_steps": 1647, "loss": 0.2418, "lr": 3.9183510964702463e-07, "epoch": 0.8868640850417616, "percentage": 88.65, "elapsed_time": "17:29:16", "remaining_time": "2:14:23"} +{"current_steps": 1461, "total_steps": 1647, "loss": 0.2297, "lr": 3.8773232585422924e-07, "epoch": 0.8874715261958998, "percentage": 88.71, "elapsed_time": "17:30:00", "remaining_time": "2:13:40"} +{"current_steps": 1462, "total_steps": 1647, "loss": 0.2462, "lr": 3.836502681657289e-07, "epoch": 0.888078967350038, "percentage": 88.77, "elapsed_time": "17:30:43", "remaining_time": "2:12:57"} +{"current_steps": 1463, "total_steps": 1647, "loss": 0.2171, "lr": 3.795889549250292e-07, "epoch": 0.8886864085041761, "percentage": 88.83, "elapsed_time": "17:31:26", "remaining_time": "2:12:14"} +{"current_steps": 1464, "total_steps": 1647, "loss": 0.2243, "lr": 3.755484043824131e-07, "epoch": 0.8892938496583144, "percentage": 88.89, "elapsed_time": "17:32:07", "remaining_time": "2:11:30"} +{"current_steps": 1465, "total_steps": 1647, "loss": 0.2149, "lr": 3.715286346948671e-07, "epoch": 0.8899012908124525, "percentage": 88.95, "elapsed_time": "17:32:48", "remaining_time": "2:10:47"} +{"current_steps": 1466, "total_steps": 1647, "loss": 0.228, "lr": 3.675296639259912e-07, "epoch": 0.8905087319665907, "percentage": 89.01, "elapsed_time": "17:33:32", "remaining_time": "2:10:04"} +{"current_steps": 1467, "total_steps": 1647, "loss": 0.2233, "lr": 3.6355151004592414e-07, "epoch": 0.8911161731207289, "percentage": 89.07, "elapsed_time": "17:34:20", "remaining_time": "2:09:22"} +{"current_steps": 1468, "total_steps": 1647, "loss": 0.2603, "lr": 3.595941909312595e-07, "epoch": 0.8917236142748671, "percentage": 89.13, "elapsed_time": "17:35:04", "remaining_time": "2:08:39"} +{"current_steps": 1469, "total_steps": 1647, "loss": 0.2269, "lr": 3.5565772436496336e-07, "epoch": 0.8923310554290053, "percentage": 89.19, "elapsed_time": "17:35:46", "remaining_time": "2:07:55"} +{"current_steps": 1470, "total_steps": 1647, "loss": 0.2477, "lr": 3.517421280363004e-07, "epoch": 0.8929384965831435, "percentage": 89.25, "elapsed_time": "17:36:29", "remaining_time": "2:07:12"} +{"current_steps": 1471, "total_steps": 1647, "loss": 0.2645, "lr": 3.4784741954074884e-07, "epoch": 0.8935459377372817, "percentage": 89.31, "elapsed_time": "17:37:12", "remaining_time": "2:06:29"} +{"current_steps": 1472, "total_steps": 1647, "loss": 0.2331, "lr": 3.439736163799251e-07, "epoch": 0.8941533788914199, "percentage": 89.37, "elapsed_time": "17:37:54", "remaining_time": "2:05:46"} +{"current_steps": 1473, "total_steps": 1647, "loss": 0.235, "lr": 3.4012073596150106e-07, "epoch": 0.8947608200455581, "percentage": 89.44, "elapsed_time": "17:38:34", "remaining_time": "2:05:02"} +{"current_steps": 1474, "total_steps": 1647, "loss": 0.2408, "lr": 3.362887955991301e-07, "epoch": 0.8953682611996963, "percentage": 89.5, "elapsed_time": "17:39:19", "remaining_time": "2:04:19"} +{"current_steps": 1475, "total_steps": 1647, "loss": 0.256, "lr": 3.3247781251236623e-07, "epoch": 0.8959757023538345, "percentage": 89.56, "elapsed_time": "17:40:01", "remaining_time": "2:03:36"} +{"current_steps": 1476, "total_steps": 1647, "loss": 0.2259, "lr": 3.2868780382658895e-07, "epoch": 0.8965831435079726, "percentage": 89.62, "elapsed_time": "17:40:44", "remaining_time": "2:02:53"} +{"current_steps": 1477, "total_steps": 1647, "loss": 0.2552, "lr": 3.2491878657292643e-07, "epoch": 0.8971905846621109, "percentage": 89.68, "elapsed_time": "17:41:25", "remaining_time": "2:02:10"} +{"current_steps": 1478, "total_steps": 1647, "loss": 0.2271, "lr": 3.2117077768817395e-07, "epoch": 0.897798025816249, "percentage": 89.74, "elapsed_time": "17:42:08", "remaining_time": "2:01:26"} +{"current_steps": 1479, "total_steps": 1647, "loss": 0.2447, "lr": 3.174437940147268e-07, "epoch": 0.8984054669703873, "percentage": 89.8, "elapsed_time": "17:42:50", "remaining_time": "2:00:43"} +{"current_steps": 1480, "total_steps": 1647, "loss": 0.2718, "lr": 3.1373785230049356e-07, "epoch": 0.8990129081245254, "percentage": 89.86, "elapsed_time": "17:43:32", "remaining_time": "2:00:00"} +{"current_steps": 1481, "total_steps": 1647, "loss": 0.2563, "lr": 3.1005296919883354e-07, "epoch": 0.8996203492786636, "percentage": 89.92, "elapsed_time": "17:44:17", "remaining_time": "1:59:17"} +{"current_steps": 1482, "total_steps": 1647, "loss": 0.2488, "lr": 3.0638916126846885e-07, "epoch": 0.9002277904328019, "percentage": 89.98, "elapsed_time": "17:44:58", "remaining_time": "1:58:34"} +{"current_steps": 1483, "total_steps": 1647, "loss": 0.2304, "lr": 3.0274644497342133e-07, "epoch": 0.90083523158694, "percentage": 90.04, "elapsed_time": "17:45:42", "remaining_time": "1:57:51"} +{"current_steps": 1484, "total_steps": 1647, "loss": 0.2141, "lr": 2.991248366829291e-07, "epoch": 0.9014426727410783, "percentage": 90.1, "elapsed_time": "17:46:24", "remaining_time": "1:57:07"} +{"current_steps": 1485, "total_steps": 1647, "loss": 0.2382, "lr": 2.955243526713808e-07, "epoch": 0.9020501138952164, "percentage": 90.16, "elapsed_time": "17:47:08", "remaining_time": "1:56:24"} +{"current_steps": 1486, "total_steps": 1647, "loss": 0.2527, "lr": 2.91945009118238e-07, "epoch": 0.9026575550493546, "percentage": 90.22, "elapsed_time": "17:47:50", "remaining_time": "1:55:41"} +{"current_steps": 1487, "total_steps": 1647, "loss": 0.3125, "lr": 2.883868221079628e-07, "epoch": 0.9032649962034928, "percentage": 90.29, "elapsed_time": "17:48:33", "remaining_time": "1:54:58"} +{"current_steps": 1488, "total_steps": 1647, "loss": 0.2788, "lr": 2.848498076299483e-07, "epoch": 0.903872437357631, "percentage": 90.35, "elapsed_time": "17:49:12", "remaining_time": "1:54:15"} +{"current_steps": 1489, "total_steps": 1647, "loss": 0.2439, "lr": 2.813339815784416e-07, "epoch": 0.9044798785117691, "percentage": 90.41, "elapsed_time": "17:49:56", "remaining_time": "1:53:31"} +{"current_steps": 1490, "total_steps": 1647, "loss": 0.2679, "lr": 2.7783935975247867e-07, "epoch": 0.9050873196659074, "percentage": 90.47, "elapsed_time": "17:50:40", "remaining_time": "1:52:48"} +{"current_steps": 1491, "total_steps": 1647, "loss": 0.2375, "lr": 2.743659578558089e-07, "epoch": 0.9056947608200455, "percentage": 90.53, "elapsed_time": "17:51:19", "remaining_time": "1:52:05"} +{"current_steps": 1492, "total_steps": 1647, "loss": 0.2293, "lr": 2.7091379149682683e-07, "epoch": 0.9063022019741838, "percentage": 90.59, "elapsed_time": "17:52:02", "remaining_time": "1:51:22"} +{"current_steps": 1493, "total_steps": 1647, "loss": 0.2409, "lr": 2.6748287618849957e-07, "epoch": 0.9069096431283219, "percentage": 90.65, "elapsed_time": "17:52:50", "remaining_time": "1:50:39"} +{"current_steps": 1494, "total_steps": 1647, "loss": 0.236, "lr": 2.6407322734829763e-07, "epoch": 0.9075170842824601, "percentage": 90.71, "elapsed_time": "17:53:29", "remaining_time": "1:49:56"} +{"current_steps": 1495, "total_steps": 1647, "loss": 0.2356, "lr": 2.6068486029813154e-07, "epoch": 0.9081245254365984, "percentage": 90.77, "elapsed_time": "17:54:10", "remaining_time": "1:49:12"} +{"current_steps": 1496, "total_steps": 1647, "loss": 0.2245, "lr": 2.573177902642726e-07, "epoch": 0.9087319665907365, "percentage": 90.83, "elapsed_time": "17:54:55", "remaining_time": "1:48:29"} +{"current_steps": 1497, "total_steps": 1647, "loss": 0.2416, "lr": 2.539720323772926e-07, "epoch": 0.9093394077448748, "percentage": 90.89, "elapsed_time": "17:55:36", "remaining_time": "1:47:46"} +{"current_steps": 1498, "total_steps": 1647, "loss": 0.2341, "lr": 2.506476016719922e-07, "epoch": 0.9099468488990129, "percentage": 90.95, "elapsed_time": "17:56:18", "remaining_time": "1:47:03"} +{"current_steps": 1499, "total_steps": 1647, "loss": 0.2628, "lr": 2.473445130873353e-07, "epoch": 0.9105542900531511, "percentage": 91.01, "elapsed_time": "17:57:05", "remaining_time": "1:46:20"} +{"current_steps": 1500, "total_steps": 1647, "loss": 0.235, "lr": 2.440627814663804e-07, "epoch": 0.9111617312072893, "percentage": 91.07, "elapsed_time": "17:57:47", "remaining_time": "1:45:37"} +{"current_steps": 1501, "total_steps": 1647, "loss": 0.2469, "lr": 2.4080242155621327e-07, "epoch": 0.9117691723614275, "percentage": 91.14, "elapsed_time": "17:58:26", "remaining_time": "1:44:53"} +{"current_steps": 1502, "total_steps": 1647, "loss": 0.2474, "lr": 2.3756344800788421e-07, "epoch": 0.9123766135155656, "percentage": 91.2, "elapsed_time": "17:59:07", "remaining_time": "1:44:10"} +{"current_steps": 1503, "total_steps": 1647, "loss": 0.2242, "lr": 2.343458753763378e-07, "epoch": 0.9129840546697039, "percentage": 91.26, "elapsed_time": "17:59:51", "remaining_time": "1:43:27"} +{"current_steps": 1504, "total_steps": 1647, "loss": 0.2504, "lr": 2.3114971812034981e-07, "epoch": 0.913591495823842, "percentage": 91.32, "elapsed_time": "18:00:33", "remaining_time": "1:42:44"} +{"current_steps": 1505, "total_steps": 1647, "loss": 0.2517, "lr": 2.2797499060246253e-07, "epoch": 0.9141989369779803, "percentage": 91.38, "elapsed_time": "18:01:14", "remaining_time": "1:42:01"} +{"current_steps": 1506, "total_steps": 1647, "loss": 0.2333, "lr": 2.2482170708892083e-07, "epoch": 0.9148063781321184, "percentage": 91.44, "elapsed_time": "18:01:57", "remaining_time": "1:41:17"} +{"current_steps": 1507, "total_steps": 1647, "loss": 0.242, "lr": 2.2168988174960382e-07, "epoch": 0.9154138192862566, "percentage": 91.5, "elapsed_time": "18:02:44", "remaining_time": "1:40:35"} +{"current_steps": 1508, "total_steps": 1647, "loss": 0.2494, "lr": 2.1857952865796616e-07, "epoch": 0.9160212604403949, "percentage": 91.56, "elapsed_time": "18:03:24", "remaining_time": "1:39:51"} +{"current_steps": 1509, "total_steps": 1647, "loss": 0.2581, "lr": 2.1549066179097355e-07, "epoch": 0.916628701594533, "percentage": 91.62, "elapsed_time": "18:04:05", "remaining_time": "1:39:08"} +{"current_steps": 1510, "total_steps": 1647, "loss": 0.2536, "lr": 2.124232950290367e-07, "epoch": 0.9172361427486713, "percentage": 91.68, "elapsed_time": "18:04:50", "remaining_time": "1:38:25"} +{"current_steps": 1511, "total_steps": 1647, "loss": 0.2409, "lr": 2.0937744215595467e-07, "epoch": 0.9178435839028094, "percentage": 91.74, "elapsed_time": "18:05:30", "remaining_time": "1:37:42"} +{"current_steps": 1512, "total_steps": 1647, "loss": 0.2095, "lr": 2.0635311685884675e-07, "epoch": 0.9184510250569476, "percentage": 91.8, "elapsed_time": "18:06:15", "remaining_time": "1:36:59"} +{"current_steps": 1513, "total_steps": 1647, "loss": 0.2757, "lr": 2.0335033272809612e-07, "epoch": 0.9190584662110858, "percentage": 91.86, "elapsed_time": "18:06:59", "remaining_time": "1:36:16"} +{"current_steps": 1514, "total_steps": 1647, "loss": 0.2397, "lr": 2.0036910325728521e-07, "epoch": 0.919665907365224, "percentage": 91.92, "elapsed_time": "18:07:42", "remaining_time": "1:35:33"} +{"current_steps": 1515, "total_steps": 1647, "loss": 0.256, "lr": 1.9740944184313882e-07, "epoch": 0.9202733485193622, "percentage": 91.99, "elapsed_time": "18:08:31", "remaining_time": "1:34:50"} +{"current_steps": 1516, "total_steps": 1647, "loss": 0.2351, "lr": 1.9447136178545766e-07, "epoch": 0.9208807896735004, "percentage": 92.05, "elapsed_time": "18:09:15", "remaining_time": "1:34:07"} +{"current_steps": 1517, "total_steps": 1647, "loss": 0.2149, "lr": 1.9155487628706672e-07, "epoch": 0.9214882308276385, "percentage": 92.11, "elapsed_time": "18:09:58", "remaining_time": "1:33:24"} +{"current_steps": 1518, "total_steps": 1647, "loss": 0.2401, "lr": 1.8865999845374794e-07, "epoch": 0.9220956719817768, "percentage": 92.17, "elapsed_time": "18:10:43", "remaining_time": "1:32:41"} +{"current_steps": 1519, "total_steps": 1647, "loss": 0.2259, "lr": 1.857867412941883e-07, "epoch": 0.9227031131359149, "percentage": 92.23, "elapsed_time": "18:11:27", "remaining_time": "1:31:58"} +{"current_steps": 1520, "total_steps": 1647, "loss": 0.2562, "lr": 1.8293511771991624e-07, "epoch": 0.9233105542900532, "percentage": 92.29, "elapsed_time": "18:12:11", "remaining_time": "1:31:15"} +{"current_steps": 1521, "total_steps": 1647, "loss": 0.2496, "lr": 1.8010514054524531e-07, "epoch": 0.9239179954441914, "percentage": 92.35, "elapsed_time": "18:12:55", "remaining_time": "1:30:32"} +{"current_steps": 1522, "total_steps": 1647, "loss": 0.2193, "lr": 1.7729682248721848e-07, "epoch": 0.9245254365983295, "percentage": 92.41, "elapsed_time": "18:13:39", "remaining_time": "1:29:49"} +{"current_steps": 1523, "total_steps": 1647, "loss": 0.2346, "lr": 1.7451017616554822e-07, "epoch": 0.9251328777524678, "percentage": 92.47, "elapsed_time": "18:14:24", "remaining_time": "1:29:06"} +{"current_steps": 1524, "total_steps": 1647, "loss": 0.2739, "lr": 1.7174521410256162e-07, "epoch": 0.9257403189066059, "percentage": 92.53, "elapsed_time": "18:15:10", "remaining_time": "1:28:23"} +{"current_steps": 1525, "total_steps": 1647, "loss": 0.2709, "lr": 1.69001948723142e-07, "epoch": 0.9263477600607442, "percentage": 92.59, "elapsed_time": "18:15:55", "remaining_time": "1:27:40"} +{"current_steps": 1526, "total_steps": 1647, "loss": 0.2472, "lr": 1.6628039235467686e-07, "epoch": 0.9269552012148823, "percentage": 92.65, "elapsed_time": "18:16:35", "remaining_time": "1:26:57"} +{"current_steps": 1527, "total_steps": 1647, "loss": 0.2376, "lr": 1.6358055722699662e-07, "epoch": 0.9275626423690205, "percentage": 92.71, "elapsed_time": "18:17:16", "remaining_time": "1:26:13"} +{"current_steps": 1528, "total_steps": 1647, "loss": 0.2445, "lr": 1.6090245547232707e-07, "epoch": 0.9281700835231587, "percentage": 92.77, "elapsed_time": "18:18:00", "remaining_time": "1:25:30"} +{"current_steps": 1529, "total_steps": 1647, "loss": 0.2495, "lr": 1.5824609912522825e-07, "epoch": 0.9287775246772969, "percentage": 92.84, "elapsed_time": "18:18:43", "remaining_time": "1:24:47"} +{"current_steps": 1530, "total_steps": 1647, "loss": 0.252, "lr": 1.5561150012254446e-07, "epoch": 0.929384965831435, "percentage": 92.9, "elapsed_time": "18:19:24", "remaining_time": "1:24:04"} +{"current_steps": 1531, "total_steps": 1647, "loss": 0.2544, "lr": 1.5299867030334815e-07, "epoch": 0.9299924069855733, "percentage": 92.96, "elapsed_time": "18:20:07", "remaining_time": "1:23:21"} +{"current_steps": 1532, "total_steps": 1647, "loss": 0.2509, "lr": 1.5040762140888843e-07, "epoch": 0.9305998481397114, "percentage": 93.02, "elapsed_time": "18:20:47", "remaining_time": "1:22:37"} +{"current_steps": 1533, "total_steps": 1647, "loss": 0.219, "lr": 1.4783836508253823e-07, "epoch": 0.9312072892938497, "percentage": 93.08, "elapsed_time": "18:21:31", "remaining_time": "1:21:54"} +{"current_steps": 1534, "total_steps": 1647, "loss": 0.2584, "lr": 1.4529091286973994e-07, "epoch": 0.9318147304479879, "percentage": 93.14, "elapsed_time": "18:22:13", "remaining_time": "1:21:11"} +{"current_steps": 1535, "total_steps": 1647, "loss": 0.2477, "lr": 1.4276527621795655e-07, "epoch": 0.932422171602126, "percentage": 93.2, "elapsed_time": "18:22:59", "remaining_time": "1:20:28"} +{"current_steps": 1536, "total_steps": 1647, "loss": 0.2515, "lr": 1.402614664766172e-07, "epoch": 0.9330296127562643, "percentage": 93.26, "elapsed_time": "18:23:40", "remaining_time": "1:19:45"} +{"current_steps": 1537, "total_steps": 1647, "loss": 0.231, "lr": 1.3777949489706898e-07, "epoch": 0.9336370539104024, "percentage": 93.32, "elapsed_time": "18:24:19", "remaining_time": "1:19:02"} +{"current_steps": 1538, "total_steps": 1647, "loss": 0.2503, "lr": 1.353193726325247e-07, "epoch": 0.9342444950645407, "percentage": 93.38, "elapsed_time": "18:25:05", "remaining_time": "1:18:19"} +{"current_steps": 1539, "total_steps": 1647, "loss": 0.2784, "lr": 1.3288111073801235e-07, "epoch": 0.9348519362186788, "percentage": 93.44, "elapsed_time": "18:25:49", "remaining_time": "1:17:36"} +{"current_steps": 1540, "total_steps": 1647, "loss": 0.2418, "lr": 1.3046472017032685e-07, "epoch": 0.935459377372817, "percentage": 93.5, "elapsed_time": "18:26:34", "remaining_time": "1:16:53"} +{"current_steps": 1541, "total_steps": 1647, "loss": 0.2397, "lr": 1.280702117879795e-07, "epoch": 0.9360668185269552, "percentage": 93.56, "elapsed_time": "18:27:16", "remaining_time": "1:16:09"} +{"current_steps": 1542, "total_steps": 1647, "loss": 0.2582, "lr": 1.2569759635115086e-07, "epoch": 0.9366742596810934, "percentage": 93.62, "elapsed_time": "18:28:00", "remaining_time": "1:15:26"} +{"current_steps": 1543, "total_steps": 1647, "loss": 0.2575, "lr": 1.2334688452164122e-07, "epoch": 0.9372817008352315, "percentage": 93.69, "elapsed_time": "18:28:43", "remaining_time": "1:14:43"} +{"current_steps": 1544, "total_steps": 1647, "loss": 0.271, "lr": 1.210180868628219e-07, "epoch": 0.9378891419893698, "percentage": 93.75, "elapsed_time": "18:29:28", "remaining_time": "1:14:00"} +{"current_steps": 1545, "total_steps": 1647, "loss": 0.2392, "lr": 1.1871121383958961e-07, "epoch": 0.9384965831435079, "percentage": 93.81, "elapsed_time": "18:30:10", "remaining_time": "1:13:17"} +{"current_steps": 1546, "total_steps": 1647, "loss": 0.2533, "lr": 1.1642627581831767e-07, "epoch": 0.9391040242976462, "percentage": 93.87, "elapsed_time": "18:30:51", "remaining_time": "1:12:34"} +{"current_steps": 1547, "total_steps": 1647, "loss": 0.2665, "lr": 1.1416328306681046e-07, "epoch": 0.9397114654517844, "percentage": 93.93, "elapsed_time": "18:31:35", "remaining_time": "1:11:51"} +{"current_steps": 1548, "total_steps": 1647, "loss": 0.2233, "lr": 1.1192224575425848e-07, "epoch": 0.9403189066059225, "percentage": 93.99, "elapsed_time": "18:32:18", "remaining_time": "1:11:08"} +{"current_steps": 1549, "total_steps": 1647, "loss": 0.2722, "lr": 1.0970317395119001e-07, "epoch": 0.9409263477600608, "percentage": 94.05, "elapsed_time": "18:33:00", "remaining_time": "1:10:24"} +{"current_steps": 1550, "total_steps": 1647, "loss": 0.2374, "lr": 1.0750607762942622e-07, "epoch": 0.9415337889141989, "percentage": 94.11, "elapsed_time": "18:33:48", "remaining_time": "1:09:42"} +{"current_steps": 1551, "total_steps": 1647, "loss": 0.247, "lr": 1.0533096666203946e-07, "epoch": 0.9421412300683372, "percentage": 94.17, "elapsed_time": "18:34:30", "remaining_time": "1:08:59"} +{"current_steps": 1552, "total_steps": 1647, "loss": 0.2415, "lr": 1.0317785082330555e-07, "epoch": 0.9427486712224753, "percentage": 94.23, "elapsed_time": "18:35:14", "remaining_time": "1:08:15"} +{"current_steps": 1553, "total_steps": 1647, "loss": 0.2131, "lr": 1.0104673978866164e-07, "epoch": 0.9433561123766135, "percentage": 94.29, "elapsed_time": "18:35:58", "remaining_time": "1:07:32"} +{"current_steps": 1554, "total_steps": 1647, "loss": 0.276, "lr": 9.89376431346606e-08, "epoch": 0.9439635535307517, "percentage": 94.35, "elapsed_time": "18:36:38", "remaining_time": "1:06:49"} +{"current_steps": 1555, "total_steps": 1647, "loss": 0.2582, "lr": 9.685057033892998e-08, "epoch": 0.9445709946848899, "percentage": 94.41, "elapsed_time": "18:37:24", "remaining_time": "1:06:06"} +{"current_steps": 1556, "total_steps": 1647, "loss": 0.2553, "lr": 9.478553078013042e-08, "epoch": 0.945178435839028, "percentage": 94.47, "elapsed_time": "18:38:07", "remaining_time": "1:05:23"} +{"current_steps": 1557, "total_steps": 1647, "loss": 0.2555, "lr": 9.274253373791064e-08, "epoch": 0.9457858769931663, "percentage": 94.54, "elapsed_time": "18:38:49", "remaining_time": "1:04:40"} +{"current_steps": 1558, "total_steps": 1647, "loss": 0.2405, "lr": 9.072158839286748e-08, "epoch": 0.9463933181473044, "percentage": 94.6, "elapsed_time": "18:39:35", "remaining_time": "1:03:57"} +{"current_steps": 1559, "total_steps": 1647, "loss": 0.2397, "lr": 8.872270382650372e-08, "epoch": 0.9470007593014427, "percentage": 94.66, "elapsed_time": "18:40:19", "remaining_time": "1:03:14"} +{"current_steps": 1560, "total_steps": 1647, "loss": 0.2581, "lr": 8.674588902118919e-08, "epoch": 0.9476082004555809, "percentage": 94.72, "elapsed_time": "18:41:03", "remaining_time": "1:02:31"} +{"current_steps": 1561, "total_steps": 1647, "loss": 0.2578, "lr": 8.479115286011752e-08, "epoch": 0.948215641609719, "percentage": 94.78, "elapsed_time": "18:41:47", "remaining_time": "1:01:48"} +{"current_steps": 1562, "total_steps": 1647, "loss": 0.2768, "lr": 8.285850412726837e-08, "epoch": 0.9488230827638573, "percentage": 94.84, "elapsed_time": "18:42:26", "remaining_time": "1:01:04"} +{"current_steps": 1563, "total_steps": 1647, "loss": 0.2124, "lr": 8.094795150736745e-08, "epoch": 0.9494305239179954, "percentage": 94.9, "elapsed_time": "18:43:09", "remaining_time": "1:00:21"} +{"current_steps": 1564, "total_steps": 1647, "loss": 0.2358, "lr": 7.905950358584768e-08, "epoch": 0.9500379650721337, "percentage": 94.96, "elapsed_time": "18:43:52", "remaining_time": "0:59:38"} +{"current_steps": 1565, "total_steps": 1647, "loss": 0.2615, "lr": 7.719316884880922e-08, "epoch": 0.9506454062262718, "percentage": 95.02, "elapsed_time": "18:44:33", "remaining_time": "0:58:55"} +{"current_steps": 1566, "total_steps": 1647, "loss": 0.2352, "lr": 7.534895568298395e-08, "epoch": 0.95125284738041, "percentage": 95.08, "elapsed_time": "18:45:14", "remaining_time": "0:58:12"} +{"current_steps": 1567, "total_steps": 1647, "loss": 0.2557, "lr": 7.352687237569489e-08, "epoch": 0.9518602885345482, "percentage": 95.14, "elapsed_time": "18:45:55", "remaining_time": "0:57:28"} +{"current_steps": 1568, "total_steps": 1647, "loss": 0.2156, "lr": 7.172692711482022e-08, "epoch": 0.9524677296886864, "percentage": 95.2, "elapsed_time": "18:46:39", "remaining_time": "0:56:45"} +{"current_steps": 1569, "total_steps": 1647, "loss": 0.2465, "lr": 6.994912798875875e-08, "epoch": 0.9530751708428246, "percentage": 95.26, "elapsed_time": "18:47:19", "remaining_time": "0:56:02"} +{"current_steps": 1570, "total_steps": 1647, "loss": 0.2816, "lr": 6.819348298638839e-08, "epoch": 0.9536826119969628, "percentage": 95.32, "elapsed_time": "18:47:59", "remaining_time": "0:55:19"} +{"current_steps": 1571, "total_steps": 1647, "loss": 0.2412, "lr": 6.6459999997035e-08, "epoch": 0.9542900531511009, "percentage": 95.39, "elapsed_time": "18:48:44", "remaining_time": "0:54:36"} +{"current_steps": 1572, "total_steps": 1647, "loss": 0.2782, "lr": 6.474868681043578e-08, "epoch": 0.9548974943052392, "percentage": 95.45, "elapsed_time": "18:49:23", "remaining_time": "0:53:53"} +{"current_steps": 1573, "total_steps": 1647, "loss": 0.241, "lr": 6.305955111670204e-08, "epoch": 0.9555049354593774, "percentage": 95.51, "elapsed_time": "18:50:05", "remaining_time": "0:53:09"} +{"current_steps": 1574, "total_steps": 1647, "loss": 0.2506, "lr": 6.13926005062876e-08, "epoch": 0.9561123766135156, "percentage": 95.57, "elapsed_time": "18:50:47", "remaining_time": "0:52:26"} +{"current_steps": 1575, "total_steps": 1647, "loss": 0.2117, "lr": 5.974784246995214e-08, "epoch": 0.9567198177676538, "percentage": 95.63, "elapsed_time": "18:51:30", "remaining_time": "0:51:43"} +{"current_steps": 1576, "total_steps": 1647, "loss": 0.2237, "lr": 5.8125284398730666e-08, "epoch": 0.9573272589217919, "percentage": 95.69, "elapsed_time": "18:52:12", "remaining_time": "0:51:00"} +{"current_steps": 1577, "total_steps": 1647, "loss": 0.2126, "lr": 5.6524933583896326e-08, "epoch": 0.9579347000759302, "percentage": 95.75, "elapsed_time": "18:52:55", "remaining_time": "0:50:17"} +{"current_steps": 1578, "total_steps": 1647, "loss": 0.2545, "lr": 5.4946797216931524e-08, "epoch": 0.9585421412300683, "percentage": 95.81, "elapsed_time": "18:53:36", "remaining_time": "0:49:34"} +{"current_steps": 1579, "total_steps": 1647, "loss": 0.2354, "lr": 5.339088238949186e-08, "epoch": 0.9591495823842066, "percentage": 95.87, "elapsed_time": "18:54:21", "remaining_time": "0:48:51"} +{"current_steps": 1580, "total_steps": 1647, "loss": 0.2342, "lr": 5.185719609337836e-08, "epoch": 0.9597570235383447, "percentage": 95.93, "elapsed_time": "18:55:04", "remaining_time": "0:48:07"} +{"current_steps": 1581, "total_steps": 1647, "loss": 0.2467, "lr": 5.034574522050251e-08, "epoch": 0.9603644646924829, "percentage": 95.99, "elapsed_time": "18:55:47", "remaining_time": "0:47:24"} +{"current_steps": 1582, "total_steps": 1647, "loss": 0.254, "lr": 4.885653656285627e-08, "epoch": 0.9609719058466211, "percentage": 96.05, "elapsed_time": "18:56:29", "remaining_time": "0:46:41"} +{"current_steps": 1583, "total_steps": 1647, "loss": 0.2441, "lr": 4.73895768124838e-08, "epoch": 0.9615793470007593, "percentage": 96.11, "elapsed_time": "18:57:14", "remaining_time": "0:45:58"} +{"current_steps": 1584, "total_steps": 1647, "loss": 0.2331, "lr": 4.5944872561448084e-08, "epoch": 0.9621867881548974, "percentage": 96.17, "elapsed_time": "18:57:54", "remaining_time": "0:45:15"} +{"current_steps": 1585, "total_steps": 1647, "loss": 0.2402, "lr": 4.45224303018027e-08, "epoch": 0.9627942293090357, "percentage": 96.24, "elapsed_time": "18:58:36", "remaining_time": "0:44:32"} +{"current_steps": 1586, "total_steps": 1647, "loss": 0.266, "lr": 4.3122256425563444e-08, "epoch": 0.9634016704631739, "percentage": 96.3, "elapsed_time": "18:59:18", "remaining_time": "0:43:49"} +{"current_steps": 1587, "total_steps": 1647, "loss": 0.2625, "lr": 4.174435722467951e-08, "epoch": 0.9640091116173121, "percentage": 96.36, "elapsed_time": "19:00:06", "remaining_time": "0:43:06"} +{"current_steps": 1588, "total_steps": 1647, "loss": 0.2642, "lr": 4.038873889100237e-08, "epoch": 0.9646165527714503, "percentage": 96.42, "elapsed_time": "19:00:48", "remaining_time": "0:42:23"} +{"current_steps": 1589, "total_steps": 1647, "loss": 0.2472, "lr": 3.905540751626191e-08, "epoch": 0.9652239939255884, "percentage": 96.48, "elapsed_time": "19:01:32", "remaining_time": "0:41:40"} +{"current_steps": 1590, "total_steps": 1647, "loss": 0.2307, "lr": 3.77443690920376e-08, "epoch": 0.9658314350797267, "percentage": 96.54, "elapsed_time": "19:02:15", "remaining_time": "0:40:56"} +{"current_steps": 1591, "total_steps": 1647, "loss": 0.2495, "lr": 3.645562950973014e-08, "epoch": 0.9664388762338648, "percentage": 96.6, "elapsed_time": "19:02:54", "remaining_time": "0:40:13"} +{"current_steps": 1592, "total_steps": 1647, "loss": 0.2738, "lr": 3.518919456053649e-08, "epoch": 0.9670463173880031, "percentage": 96.66, "elapsed_time": "19:03:36", "remaining_time": "0:39:30"} +{"current_steps": 1593, "total_steps": 1647, "loss": 0.2449, "lr": 3.3945069935423234e-08, "epoch": 0.9676537585421412, "percentage": 96.72, "elapsed_time": "19:04:14", "remaining_time": "0:38:47"} +{"current_steps": 1594, "total_steps": 1647, "loss": 0.2377, "lr": 3.2723261225102164e-08, "epoch": 0.9682611996962794, "percentage": 96.78, "elapsed_time": "19:04:59", "remaining_time": "0:38:04"} +{"current_steps": 1595, "total_steps": 1647, "loss": 0.2671, "lr": 3.152377392000361e-08, "epoch": 0.9688686408504176, "percentage": 96.84, "elapsed_time": "19:05:46", "remaining_time": "0:37:21"} +{"current_steps": 1596, "total_steps": 1647, "loss": 0.231, "lr": 3.034661341025258e-08, "epoch": 0.9694760820045558, "percentage": 96.9, "elapsed_time": "19:06:25", "remaining_time": "0:36:38"} +{"current_steps": 1597, "total_steps": 1647, "loss": 0.241, "lr": 2.9191784985644345e-08, "epoch": 0.970083523158694, "percentage": 96.96, "elapsed_time": "19:07:06", "remaining_time": "0:35:54"} +{"current_steps": 1598, "total_steps": 1647, "loss": 0.2349, "lr": 2.8059293835620006e-08, "epoch": 0.9706909643128322, "percentage": 97.02, "elapsed_time": "19:07:46", "remaining_time": "0:35:11"} +{"current_steps": 1599, "total_steps": 1647, "loss": 0.249, "lr": 2.6949145049245396e-08, "epoch": 0.9712984054669704, "percentage": 97.09, "elapsed_time": "19:08:34", "remaining_time": "0:34:28"} +{"current_steps": 1600, "total_steps": 1647, "loss": 0.245, "lr": 2.5861343615184997e-08, "epoch": 0.9719058466211086, "percentage": 97.15, "elapsed_time": "19:09:17", "remaining_time": "0:33:45"} +{"current_steps": 1601, "total_steps": 1647, "loss": 0.2333, "lr": 2.479589442168251e-08, "epoch": 0.9725132877752468, "percentage": 97.21, "elapsed_time": "19:10:00", "remaining_time": "0:33:02"} +{"current_steps": 1602, "total_steps": 1647, "loss": 0.2683, "lr": 2.3752802256536423e-08, "epoch": 0.973120728929385, "percentage": 97.27, "elapsed_time": "19:10:44", "remaining_time": "0:32:19"} +{"current_steps": 1603, "total_steps": 1647, "loss": 0.2285, "lr": 2.2732071807081147e-08, "epoch": 0.9737281700835232, "percentage": 97.33, "elapsed_time": "19:11:27", "remaining_time": "0:31:36"} +{"current_steps": 1604, "total_steps": 1647, "loss": 0.2315, "lr": 2.173370766016314e-08, "epoch": 0.9743356112376613, "percentage": 97.39, "elapsed_time": "19:12:12", "remaining_time": "0:30:53"} +{"current_steps": 1605, "total_steps": 1647, "loss": 0.241, "lr": 2.0757714302122035e-08, "epoch": 0.9749430523917996, "percentage": 97.45, "elapsed_time": "19:12:56", "remaining_time": "0:30:10"} +{"current_steps": 1606, "total_steps": 1647, "loss": 0.2703, "lr": 1.98040961187701e-08, "epoch": 0.9755504935459377, "percentage": 97.51, "elapsed_time": "19:13:39", "remaining_time": "0:29:27"} +{"current_steps": 1607, "total_steps": 1647, "loss": 0.2597, "lr": 1.8872857395372812e-08, "epoch": 0.976157934700076, "percentage": 97.57, "elapsed_time": "19:14:20", "remaining_time": "0:28:43"} +{"current_steps": 1608, "total_steps": 1647, "loss": 0.2916, "lr": 1.7964002316628316e-08, "epoch": 0.9767653758542141, "percentage": 97.63, "elapsed_time": "19:15:01", "remaining_time": "0:28:00"} +{"current_steps": 1609, "total_steps": 1647, "loss": 0.2558, "lr": 1.7077534966650767e-08, "epoch": 0.9773728170083523, "percentage": 97.69, "elapsed_time": "19:15:47", "remaining_time": "0:27:17"} +{"current_steps": 1610, "total_steps": 1647, "loss": 0.244, "lr": 1.6213459328950355e-08, "epoch": 0.9779802581624905, "percentage": 97.75, "elapsed_time": "19:16:30", "remaining_time": "0:26:34"} +{"current_steps": 1611, "total_steps": 1647, "loss": 0.2462, "lr": 1.537177928641498e-08, "epoch": 0.9785876993166287, "percentage": 97.81, "elapsed_time": "19:17:14", "remaining_time": "0:25:51"} +{"current_steps": 1612, "total_steps": 1647, "loss": 0.2535, "lr": 1.4552498621295264e-08, "epoch": 0.979195140470767, "percentage": 97.87, "elapsed_time": "19:17:54", "remaining_time": "0:25:08"} +{"current_steps": 1613, "total_steps": 1647, "loss": 0.2515, "lr": 1.3755621015184018e-08, "epoch": 0.9798025816249051, "percentage": 97.94, "elapsed_time": "19:18:41", "remaining_time": "0:24:25"} +{"current_steps": 1614, "total_steps": 1647, "loss": 0.2327, "lr": 1.2981150049004021e-08, "epoch": 0.9804100227790433, "percentage": 98.0, "elapsed_time": "19:19:24", "remaining_time": "0:23:42"} +{"current_steps": 1615, "total_steps": 1647, "loss": 0.2317, "lr": 1.2229089202987487e-08, "epoch": 0.9810174639331815, "percentage": 98.06, "elapsed_time": "19:20:05", "remaining_time": "0:22:59"} +{"current_steps": 1616, "total_steps": 1647, "loss": 0.2516, "lr": 1.1499441856663296e-08, "epoch": 0.9816249050873197, "percentage": 98.12, "elapsed_time": "19:20:49", "remaining_time": "0:22:16"} +{"current_steps": 1617, "total_steps": 1647, "loss": 0.2599, "lr": 1.0792211288841447e-08, "epoch": 0.9822323462414578, "percentage": 98.18, "elapsed_time": "19:21:29", "remaining_time": "0:21:32"} +{"current_steps": 1618, "total_steps": 1647, "loss": 0.2547, "lr": 1.0107400677596413e-08, "epoch": 0.9828397873955961, "percentage": 98.24, "elapsed_time": "19:22:13", "remaining_time": "0:20:49"} +{"current_steps": 1619, "total_steps": 1647, "loss": 0.2519, "lr": 9.44501310025603e-09, "epoch": 0.9834472285497342, "percentage": 98.3, "elapsed_time": "19:22:54", "remaining_time": "0:20:06"} +{"current_steps": 1620, "total_steps": 1647, "loss": 0.2315, "lr": 8.805051533384846e-09, "epoch": 0.9840546697038725, "percentage": 98.36, "elapsed_time": "19:23:37", "remaining_time": "0:19:23"} +{"current_steps": 1621, "total_steps": 1647, "loss": 0.2636, "lr": 8.187518852771914e-09, "epoch": 0.9846621108580106, "percentage": 98.42, "elapsed_time": "19:24:20", "remaining_time": "0:18:40"} +{"current_steps": 1622, "total_steps": 1647, "loss": 0.2535, "lr": 7.59241783341913e-09, "epoch": 0.9852695520121488, "percentage": 98.48, "elapsed_time": "19:25:03", "remaining_time": "0:17:57"} +{"current_steps": 1623, "total_steps": 1647, "loss": 0.2098, "lr": 7.019751149525133e-09, "epoch": 0.985876993166287, "percentage": 98.54, "elapsed_time": "19:25:46", "remaining_time": "0:17:14"} +{"current_steps": 1624, "total_steps": 1647, "loss": 0.2739, "lr": 6.469521374477539e-09, "epoch": 0.9864844343204252, "percentage": 98.6, "elapsed_time": "19:26:28", "remaining_time": "0:16:31"} +{"current_steps": 1625, "total_steps": 1647, "loss": 0.2423, "lr": 5.941730980839056e-09, "epoch": 0.9870918754745635, "percentage": 98.66, "elapsed_time": "19:27:13", "remaining_time": "0:15:48"} +{"current_steps": 1626, "total_steps": 1647, "loss": 0.2275, "lr": 5.436382340335833e-09, "epoch": 0.9876993166287016, "percentage": 98.72, "elapsed_time": "19:27:55", "remaining_time": "0:15:05"} +{"current_steps": 1627, "total_steps": 1647, "loss": 0.2458, "lr": 4.9534777238485764e-09, "epoch": 0.9883067577828398, "percentage": 98.79, "elapsed_time": "19:28:35", "remaining_time": "0:14:21"} +{"current_steps": 1628, "total_steps": 1647, "loss": 0.2413, "lr": 4.493019301401447e-09, "epoch": 0.988914198936978, "percentage": 98.85, "elapsed_time": "19:29:18", "remaining_time": "0:13:38"} +{"current_steps": 1629, "total_steps": 1647, "loss": 0.2717, "lr": 4.055009142152066e-09, "epoch": 0.9895216400911162, "percentage": 98.91, "elapsed_time": "19:30:03", "remaining_time": "0:12:55"} +{"current_steps": 1630, "total_steps": 1647, "loss": 0.2631, "lr": 3.6394492143820847e-09, "epoch": 0.9901290812452543, "percentage": 98.97, "elapsed_time": "19:30:46", "remaining_time": "0:12:12"} +{"current_steps": 1631, "total_steps": 1647, "loss": 0.2356, "lr": 3.2463413854899594e-09, "epoch": 0.9907365223993926, "percentage": 99.03, "elapsed_time": "19:31:26", "remaining_time": "0:11:29"} +{"current_steps": 1632, "total_steps": 1647, "loss": 0.2435, "lr": 2.875687421980966e-09, "epoch": 0.9913439635535307, "percentage": 99.09, "elapsed_time": "19:32:09", "remaining_time": "0:10:46"} +{"current_steps": 1633, "total_steps": 1647, "loss": 0.2455, "lr": 2.5274889894583156e-09, "epoch": 0.991951404707669, "percentage": 99.15, "elapsed_time": "19:32:54", "remaining_time": "0:10:03"} +{"current_steps": 1634, "total_steps": 1647, "loss": 0.2713, "lr": 2.201747652618713e-09, "epoch": 0.9925588458618071, "percentage": 99.21, "elapsed_time": "19:33:38", "remaining_time": "0:09:20"} +{"current_steps": 1635, "total_steps": 1647, "loss": 0.2716, "lr": 1.8984648752429222e-09, "epoch": 0.9931662870159453, "percentage": 99.27, "elapsed_time": "19:34:20", "remaining_time": "0:08:37"} +{"current_steps": 1636, "total_steps": 1647, "loss": 0.2427, "lr": 1.6176420201902132e-09, "epoch": 0.9937737281700835, "percentage": 99.33, "elapsed_time": "19:35:08", "remaining_time": "0:07:54"} +{"current_steps": 1637, "total_steps": 1647, "loss": 0.2492, "lr": 1.3592803493905904e-09, "epoch": 0.9943811693242217, "percentage": 99.39, "elapsed_time": "19:35:49", "remaining_time": "0:07:10"} +{"current_steps": 1638, "total_steps": 1647, "loss": 0.2329, "lr": 1.1233810238425735e-09, "epoch": 0.99498861047836, "percentage": 99.45, "elapsed_time": "19:36:28", "remaining_time": "0:06:27"} +{"current_steps": 1639, "total_steps": 1647, "loss": 0.2464, "lr": 9.099451036048701e-10, "epoch": 0.9955960516324981, "percentage": 99.51, "elapsed_time": "19:37:07", "remaining_time": "0:05:44"} +{"current_steps": 1640, "total_steps": 1647, "loss": 0.243, "lr": 7.189735477913795e-10, "epoch": 0.9962034927866363, "percentage": 99.57, "elapsed_time": "19:37:49", "remaining_time": "0:05:01"} +{"current_steps": 1641, "total_steps": 1647, "loss": 0.294, "lr": 5.504672145700829e-10, "epoch": 0.9968109339407745, "percentage": 99.64, "elapsed_time": "19:38:33", "remaining_time": "0:04:18"} +{"current_steps": 1642, "total_steps": 1647, "loss": 0.2607, "lr": 4.0442686115582665e-10, "epoch": 0.9974183750949127, "percentage": 99.7, "elapsed_time": "19:39:21", "remaining_time": "0:03:35"} +{"current_steps": 1643, "total_steps": 1647, "loss": 0.2626, "lr": 2.8085314380976725e-10, "epoch": 0.9980258162490508, "percentage": 99.76, "elapsed_time": "19:40:04", "remaining_time": "0:02:52"} +{"current_steps": 1644, "total_steps": 1647, "loss": 0.2622, "lr": 1.797466178327101e-10, "epoch": 0.9986332574031891, "percentage": 99.82, "elapsed_time": "19:40:48", "remaining_time": "0:02:09"} +{"current_steps": 1645, "total_steps": 1647, "loss": 0.2386, "lr": 1.011077375662195e-10, "epoch": 0.9992406985573272, "percentage": 99.88, "elapsed_time": "19:41:33", "remaining_time": "0:01:26"} +{"current_steps": 1646, "total_steps": 1647, "loss": 0.2989, "lr": 4.4936856390398465e-11, "epoch": 0.9998481397114655, "percentage": 99.94, "elapsed_time": "19:42:12", "remaining_time": "0:00:43"} +{"current_steps": 1647, "total_steps": 1647, "loss": 0.0578, "lr": 1.1234226718337405e-11, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "19:42:23", "remaining_time": "0:00:00"} +{"current_steps": 1647, "total_steps": 1647, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "19:43:08", "remaining_time": "0:00:00"} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..2e86c7f --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,11572 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 1647, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0006074411541381929, + "grad_norm": 4.187357914113359, + "learning_rate": 0.0, + "loss": 1.3011, + "step": 1 + }, + { + "epoch": 0.0012148823082763858, + "grad_norm": 4.498512475534671, + "learning_rate": 6.060606060606061e-08, + "loss": 1.3126, + "step": 2 + }, + { + "epoch": 0.0018223234624145787, + "grad_norm": 4.0434971741932495, + "learning_rate": 1.2121212121212122e-07, + "loss": 1.2803, + "step": 3 + }, + { + "epoch": 0.0024297646165527716, + "grad_norm": 4.162307471665992, + "learning_rate": 1.8181818181818183e-07, + "loss": 1.3162, + "step": 4 + }, + { + "epoch": 0.0030372057706909645, + "grad_norm": 4.278843392925189, + "learning_rate": 2.4242424242424244e-07, + "loss": 1.3803, + "step": 5 + }, + { + "epoch": 0.0036446469248291574, + "grad_norm": 4.123864416498828, + "learning_rate": 3.0303030303030305e-07, + "loss": 1.3463, + "step": 6 + }, + { + "epoch": 0.00425208807896735, + "grad_norm": 4.304171763132361, + "learning_rate": 3.6363636363636366e-07, + "loss": 1.3397, + "step": 7 + }, + { + "epoch": 0.004859529233105543, + "grad_norm": 3.9277126463319307, + "learning_rate": 4.242424242424243e-07, + "loss": 1.3189, + "step": 8 + }, + { + "epoch": 0.005466970387243736, + "grad_norm": 3.922407631596546, + "learning_rate": 4.848484848484849e-07, + "loss": 1.2963, + "step": 9 + }, + { + "epoch": 0.006074411541381929, + "grad_norm": 3.895403645466344, + "learning_rate": 5.454545454545455e-07, + "loss": 1.2897, + "step": 10 + }, + { + "epoch": 0.006681852695520122, + "grad_norm": 4.0159782701678814, + "learning_rate": 6.060606060606061e-07, + "loss": 1.2556, + "step": 11 + }, + { + "epoch": 0.007289293849658315, + "grad_norm": 4.3312492479849976, + "learning_rate": 6.666666666666667e-07, + "loss": 1.2826, + "step": 12 + }, + { + "epoch": 0.007896735003796507, + "grad_norm": 3.8648934637566277, + "learning_rate": 7.272727272727273e-07, + "loss": 1.3033, + "step": 13 + }, + { + "epoch": 0.0085041761579347, + "grad_norm": 4.286961260584155, + "learning_rate": 7.878787878787879e-07, + "loss": 1.3182, + "step": 14 + }, + { + "epoch": 0.009111617312072893, + "grad_norm": 3.9563681075623696, + "learning_rate": 8.484848484848486e-07, + "loss": 1.2456, + "step": 15 + }, + { + "epoch": 0.009719058466211086, + "grad_norm": 4.609717367893727, + "learning_rate": 9.090909090909091e-07, + "loss": 1.3312, + "step": 16 + }, + { + "epoch": 0.010326499620349278, + "grad_norm": 3.8182264142948212, + "learning_rate": 9.696969696969698e-07, + "loss": 1.2702, + "step": 17 + }, + { + "epoch": 0.010933940774487472, + "grad_norm": 3.9030184249626765, + "learning_rate": 1.0303030303030304e-06, + "loss": 1.2982, + "step": 18 + }, + { + "epoch": 0.011541381928625664, + "grad_norm": 3.630670067928241, + "learning_rate": 1.090909090909091e-06, + "loss": 1.2928, + "step": 19 + }, + { + "epoch": 0.012148823082763858, + "grad_norm": 3.627122940350712, + "learning_rate": 1.1515151515151516e-06, + "loss": 1.3399, + "step": 20 + }, + { + "epoch": 0.01275626423690205, + "grad_norm": 3.4178400978584498, + "learning_rate": 1.2121212121212122e-06, + "loss": 1.201, + "step": 21 + }, + { + "epoch": 0.013363705391040244, + "grad_norm": 3.8805835742800308, + "learning_rate": 1.2727272727272728e-06, + "loss": 1.3176, + "step": 22 + }, + { + "epoch": 0.013971146545178436, + "grad_norm": 5.15609913207104, + "learning_rate": 1.3333333333333334e-06, + "loss": 1.2253, + "step": 23 + }, + { + "epoch": 0.01457858769931663, + "grad_norm": 3.361843882497272, + "learning_rate": 1.3939393939393942e-06, + "loss": 1.2849, + "step": 24 + }, + { + "epoch": 0.015186028853454821, + "grad_norm": 3.419552970347977, + "learning_rate": 1.4545454545454546e-06, + "loss": 1.0856, + "step": 25 + }, + { + "epoch": 0.015793470007593013, + "grad_norm": 4.990297184335331, + "learning_rate": 1.5151515151515152e-06, + "loss": 1.1542, + "step": 26 + }, + { + "epoch": 0.01640091116173121, + "grad_norm": 3.6910946706226024, + "learning_rate": 1.5757575757575759e-06, + "loss": 1.1332, + "step": 27 + }, + { + "epoch": 0.0170083523158694, + "grad_norm": 3.2707017108314105, + "learning_rate": 1.6363636363636365e-06, + "loss": 1.0859, + "step": 28 + }, + { + "epoch": 0.017615793470007593, + "grad_norm": 3.6848753932789022, + "learning_rate": 1.6969696969696973e-06, + "loss": 1.1462, + "step": 29 + }, + { + "epoch": 0.018223234624145785, + "grad_norm": 3.539975633137712, + "learning_rate": 1.7575757575757577e-06, + "loss": 1.1017, + "step": 30 + }, + { + "epoch": 0.018830675778283977, + "grad_norm": 5.020856655134608, + "learning_rate": 1.8181818181818183e-06, + "loss": 1.1052, + "step": 31 + }, + { + "epoch": 0.019438116932422173, + "grad_norm": 2.8322427452989403, + "learning_rate": 1.878787878787879e-06, + "loss": 1.0702, + "step": 32 + }, + { + "epoch": 0.020045558086560365, + "grad_norm": 11.978063076522911, + "learning_rate": 1.9393939393939395e-06, + "loss": 1.0783, + "step": 33 + }, + { + "epoch": 0.020652999240698557, + "grad_norm": 3.0620204118757615, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.9578, + "step": 34 + }, + { + "epoch": 0.02126044039483675, + "grad_norm": 2.6396729812241992, + "learning_rate": 2.0606060606060607e-06, + "loss": 1.0325, + "step": 35 + }, + { + "epoch": 0.021867881548974944, + "grad_norm": 2.605944141300131, + "learning_rate": 2.1212121212121216e-06, + "loss": 0.9263, + "step": 36 + }, + { + "epoch": 0.022475322703113136, + "grad_norm": 2.253997875811574, + "learning_rate": 2.181818181818182e-06, + "loss": 0.9275, + "step": 37 + }, + { + "epoch": 0.023082763857251328, + "grad_norm": 2.713661141656442, + "learning_rate": 2.2424242424242428e-06, + "loss": 0.9474, + "step": 38 + }, + { + "epoch": 0.02369020501138952, + "grad_norm": 3.6449984569878833, + "learning_rate": 2.303030303030303e-06, + "loss": 0.9734, + "step": 39 + }, + { + "epoch": 0.024297646165527716, + "grad_norm": 2.810830857915748, + "learning_rate": 2.363636363636364e-06, + "loss": 0.8991, + "step": 40 + }, + { + "epoch": 0.024905087319665908, + "grad_norm": 2.567407308011712, + "learning_rate": 2.4242424242424244e-06, + "loss": 0.8963, + "step": 41 + }, + { + "epoch": 0.0255125284738041, + "grad_norm": 2.637128320235312, + "learning_rate": 2.4848484848484848e-06, + "loss": 0.8778, + "step": 42 + }, + { + "epoch": 0.026119969627942292, + "grad_norm": 3.0903128789155754, + "learning_rate": 2.5454545454545456e-06, + "loss": 0.8738, + "step": 43 + }, + { + "epoch": 0.026727410782080487, + "grad_norm": 2.6726103064757214, + "learning_rate": 2.6060606060606064e-06, + "loss": 0.8107, + "step": 44 + }, + { + "epoch": 0.02733485193621868, + "grad_norm": 2.4879173840412006, + "learning_rate": 2.666666666666667e-06, + "loss": 0.8709, + "step": 45 + }, + { + "epoch": 0.02794229309035687, + "grad_norm": 2.7470444516164294, + "learning_rate": 2.7272727272727272e-06, + "loss": 0.8008, + "step": 46 + }, + { + "epoch": 0.028549734244495063, + "grad_norm": 3.0832323670913566, + "learning_rate": 2.7878787878787885e-06, + "loss": 0.8585, + "step": 47 + }, + { + "epoch": 0.02915717539863326, + "grad_norm": 2.3479372726052334, + "learning_rate": 2.848484848484849e-06, + "loss": 0.799, + "step": 48 + }, + { + "epoch": 0.02976461655277145, + "grad_norm": 3.333892504748674, + "learning_rate": 2.9090909090909093e-06, + "loss": 0.7643, + "step": 49 + }, + { + "epoch": 0.030372057706909643, + "grad_norm": 4.005716160822946, + "learning_rate": 2.96969696969697e-06, + "loss": 0.7075, + "step": 50 + }, + { + "epoch": 0.030979498861047835, + "grad_norm": 2.9637475493256393, + "learning_rate": 3.0303030303030305e-06, + "loss": 0.752, + "step": 51 + }, + { + "epoch": 0.03158694001518603, + "grad_norm": 6.448028143148418, + "learning_rate": 3.090909090909091e-06, + "loss": 0.7298, + "step": 52 + }, + { + "epoch": 0.03219438116932422, + "grad_norm": 2.5226109840726267, + "learning_rate": 3.1515151515151517e-06, + "loss": 0.6652, + "step": 53 + }, + { + "epoch": 0.03280182232346242, + "grad_norm": 2.493123673145167, + "learning_rate": 3.2121212121212125e-06, + "loss": 0.6731, + "step": 54 + }, + { + "epoch": 0.033409263477600606, + "grad_norm": 2.605987670786732, + "learning_rate": 3.272727272727273e-06, + "loss": 0.7381, + "step": 55 + }, + { + "epoch": 0.0340167046317388, + "grad_norm": 2.29936704850675, + "learning_rate": 3.3333333333333333e-06, + "loss": 0.6703, + "step": 56 + }, + { + "epoch": 0.03462414578587699, + "grad_norm": 5.095296867406457, + "learning_rate": 3.3939393939393946e-06, + "loss": 0.6278, + "step": 57 + }, + { + "epoch": 0.035231586940015186, + "grad_norm": 4.103295631128622, + "learning_rate": 3.454545454545455e-06, + "loss": 0.6501, + "step": 58 + }, + { + "epoch": 0.03583902809415338, + "grad_norm": 3.266586264244061, + "learning_rate": 3.5151515151515154e-06, + "loss": 0.6557, + "step": 59 + }, + { + "epoch": 0.03644646924829157, + "grad_norm": 1.996000103297209, + "learning_rate": 3.575757575757576e-06, + "loss": 0.605, + "step": 60 + }, + { + "epoch": 0.037053910402429766, + "grad_norm": 2.616817368318823, + "learning_rate": 3.6363636363636366e-06, + "loss": 0.6313, + "step": 61 + }, + { + "epoch": 0.037661351556567954, + "grad_norm": 2.977002184120951, + "learning_rate": 3.6969696969696974e-06, + "loss": 0.6025, + "step": 62 + }, + { + "epoch": 0.03826879271070615, + "grad_norm": 1.8839479352203694, + "learning_rate": 3.757575757575758e-06, + "loss": 0.5803, + "step": 63 + }, + { + "epoch": 0.038876233864844345, + "grad_norm": 1.8084677913612675, + "learning_rate": 3.818181818181819e-06, + "loss": 0.6036, + "step": 64 + }, + { + "epoch": 0.039483675018982534, + "grad_norm": 3.298041724732867, + "learning_rate": 3.878787878787879e-06, + "loss": 0.5619, + "step": 65 + }, + { + "epoch": 0.04009111617312073, + "grad_norm": 3.210051259243264, + "learning_rate": 3.93939393939394e-06, + "loss": 0.6117, + "step": 66 + }, + { + "epoch": 0.040698557327258925, + "grad_norm": 1.7548473190282035, + "learning_rate": 4.000000000000001e-06, + "loss": 0.6115, + "step": 67 + }, + { + "epoch": 0.04130599848139711, + "grad_norm": 4.027003074298192, + "learning_rate": 4.060606060606061e-06, + "loss": 0.555, + "step": 68 + }, + { + "epoch": 0.04191343963553531, + "grad_norm": 1.7049695945671344, + "learning_rate": 4.1212121212121215e-06, + "loss": 0.5281, + "step": 69 + }, + { + "epoch": 0.0425208807896735, + "grad_norm": 1.9295753107848288, + "learning_rate": 4.181818181818182e-06, + "loss": 0.4976, + "step": 70 + }, + { + "epoch": 0.04312832194381169, + "grad_norm": 5.6724940525137315, + "learning_rate": 4.242424242424243e-06, + "loss": 0.5599, + "step": 71 + }, + { + "epoch": 0.04373576309794989, + "grad_norm": 2.5165077043031925, + "learning_rate": 4.303030303030303e-06, + "loss": 0.5043, + "step": 72 + }, + { + "epoch": 0.04434320425208808, + "grad_norm": 2.7600416718167438, + "learning_rate": 4.363636363636364e-06, + "loss": 0.5835, + "step": 73 + }, + { + "epoch": 0.04495064540622627, + "grad_norm": 4.577709098881687, + "learning_rate": 4.424242424242425e-06, + "loss": 0.5495, + "step": 74 + }, + { + "epoch": 0.04555808656036447, + "grad_norm": 3.185211056113932, + "learning_rate": 4.4848484848484855e-06, + "loss": 0.5371, + "step": 75 + }, + { + "epoch": 0.046165527714502656, + "grad_norm": 1.7788256477959088, + "learning_rate": 4.5454545454545455e-06, + "loss": 0.4778, + "step": 76 + }, + { + "epoch": 0.04677296886864085, + "grad_norm": 2.0763650855771254, + "learning_rate": 4.606060606060606e-06, + "loss": 0.4913, + "step": 77 + }, + { + "epoch": 0.04738041002277904, + "grad_norm": 2.1625612516573605, + "learning_rate": 4.666666666666667e-06, + "loss": 0.453, + "step": 78 + }, + { + "epoch": 0.047987851176917236, + "grad_norm": 2.186861003935316, + "learning_rate": 4.727272727272728e-06, + "loss": 0.559, + "step": 79 + }, + { + "epoch": 0.04859529233105543, + "grad_norm": 2.7651744089253056, + "learning_rate": 4.787878787878788e-06, + "loss": 0.4996, + "step": 80 + }, + { + "epoch": 0.04920273348519362, + "grad_norm": 1.8551809178974643, + "learning_rate": 4.848484848484849e-06, + "loss": 0.5501, + "step": 81 + }, + { + "epoch": 0.049810174639331815, + "grad_norm": 1.8080650695028413, + "learning_rate": 4.90909090909091e-06, + "loss": 0.5637, + "step": 82 + }, + { + "epoch": 0.05041761579347001, + "grad_norm": 1.830828959106882, + "learning_rate": 4.9696969696969696e-06, + "loss": 0.4904, + "step": 83 + }, + { + "epoch": 0.0510250569476082, + "grad_norm": 1.7016759327222268, + "learning_rate": 5.030303030303031e-06, + "loss": 0.485, + "step": 84 + }, + { + "epoch": 0.051632498101746395, + "grad_norm": 1.5379148608918123, + "learning_rate": 5.090909090909091e-06, + "loss": 0.4898, + "step": 85 + }, + { + "epoch": 0.052239939255884583, + "grad_norm": 1.8661306362602696, + "learning_rate": 5.151515151515152e-06, + "loss": 0.4896, + "step": 86 + }, + { + "epoch": 0.05284738041002278, + "grad_norm": 2.0602919860599482, + "learning_rate": 5.212121212121213e-06, + "loss": 0.4677, + "step": 87 + }, + { + "epoch": 0.053454821564160974, + "grad_norm": 1.772568608105656, + "learning_rate": 5.272727272727273e-06, + "loss": 0.4233, + "step": 88 + }, + { + "epoch": 0.05406226271829916, + "grad_norm": 1.6245327242364143, + "learning_rate": 5.333333333333334e-06, + "loss": 0.4784, + "step": 89 + }, + { + "epoch": 0.05466970387243736, + "grad_norm": 1.4521657126765681, + "learning_rate": 5.3939393939393945e-06, + "loss": 0.4646, + "step": 90 + }, + { + "epoch": 0.05527714502657555, + "grad_norm": 6.093340972704043, + "learning_rate": 5.4545454545454545e-06, + "loss": 0.4966, + "step": 91 + }, + { + "epoch": 0.05588458618071374, + "grad_norm": 3.72118634286479, + "learning_rate": 5.515151515151515e-06, + "loss": 0.5535, + "step": 92 + }, + { + "epoch": 0.05649202733485194, + "grad_norm": 2.058084505025063, + "learning_rate": 5.575757575757577e-06, + "loss": 0.4948, + "step": 93 + }, + { + "epoch": 0.05709946848899013, + "grad_norm": 1.6056271512619456, + "learning_rate": 5.636363636363636e-06, + "loss": 0.4807, + "step": 94 + }, + { + "epoch": 0.05770690964312832, + "grad_norm": 2.583298049219201, + "learning_rate": 5.696969696969698e-06, + "loss": 0.4591, + "step": 95 + }, + { + "epoch": 0.05831435079726652, + "grad_norm": 1.5715554881698062, + "learning_rate": 5.7575757575757586e-06, + "loss": 0.477, + "step": 96 + }, + { + "epoch": 0.058921791951404706, + "grad_norm": 2.8917057921549425, + "learning_rate": 5.8181818181818185e-06, + "loss": 0.5028, + "step": 97 + }, + { + "epoch": 0.0595292331055429, + "grad_norm": 1.620684043715009, + "learning_rate": 5.878787878787879e-06, + "loss": 0.4828, + "step": 98 + }, + { + "epoch": 0.06013667425968109, + "grad_norm": 1.375161799406732, + "learning_rate": 5.93939393939394e-06, + "loss": 0.4249, + "step": 99 + }, + { + "epoch": 0.060744115413819286, + "grad_norm": 1.4172846210611314, + "learning_rate": 6e-06, + "loss": 0.3987, + "step": 100 + }, + { + "epoch": 0.06135155656795748, + "grad_norm": 1.8383152351112293, + "learning_rate": 6.060606060606061e-06, + "loss": 0.459, + "step": 101 + }, + { + "epoch": 0.06195899772209567, + "grad_norm": 1.609009063333299, + "learning_rate": 6.121212121212121e-06, + "loss": 0.4439, + "step": 102 + }, + { + "epoch": 0.06256643887623387, + "grad_norm": 2.8231836394185734, + "learning_rate": 6.181818181818182e-06, + "loss": 0.529, + "step": 103 + }, + { + "epoch": 0.06317388003037205, + "grad_norm": 1.4932765424378436, + "learning_rate": 6.2424242424242434e-06, + "loss": 0.4752, + "step": 104 + }, + { + "epoch": 0.06378132118451026, + "grad_norm": 1.4265072751982764, + "learning_rate": 6.303030303030303e-06, + "loss": 0.4559, + "step": 105 + }, + { + "epoch": 0.06438876233864844, + "grad_norm": 1.7232201589379095, + "learning_rate": 6.363636363636364e-06, + "loss": 0.4081, + "step": 106 + }, + { + "epoch": 0.06499620349278663, + "grad_norm": 1.9090397829519035, + "learning_rate": 6.424242424242425e-06, + "loss": 0.4906, + "step": 107 + }, + { + "epoch": 0.06560364464692484, + "grad_norm": 5.566791403419221, + "learning_rate": 6.484848484848485e-06, + "loss": 0.5131, + "step": 108 + }, + { + "epoch": 0.06621108580106302, + "grad_norm": 2.1515254306159153, + "learning_rate": 6.545454545454546e-06, + "loss": 0.4312, + "step": 109 + }, + { + "epoch": 0.06681852695520121, + "grad_norm": 1.53243371760272, + "learning_rate": 6.606060606060607e-06, + "loss": 0.4083, + "step": 110 + }, + { + "epoch": 0.0674259681093394, + "grad_norm": 1.5854107306879113, + "learning_rate": 6.666666666666667e-06, + "loss": 0.4338, + "step": 111 + }, + { + "epoch": 0.0680334092634776, + "grad_norm": 1.7854671288961894, + "learning_rate": 6.7272727272727275e-06, + "loss": 0.4259, + "step": 112 + }, + { + "epoch": 0.06864085041761579, + "grad_norm": 1.603406823497176, + "learning_rate": 6.787878787878789e-06, + "loss": 0.4359, + "step": 113 + }, + { + "epoch": 0.06924829157175398, + "grad_norm": 1.4707223980643627, + "learning_rate": 6.848484848484849e-06, + "loss": 0.4215, + "step": 114 + }, + { + "epoch": 0.06985573272589218, + "grad_norm": 1.6534737889131494, + "learning_rate": 6.90909090909091e-06, + "loss": 0.4414, + "step": 115 + }, + { + "epoch": 0.07046317388003037, + "grad_norm": 1.532739371631904, + "learning_rate": 6.969696969696971e-06, + "loss": 0.4297, + "step": 116 + }, + { + "epoch": 0.07107061503416856, + "grad_norm": 1.4297734448243231, + "learning_rate": 7.030303030303031e-06, + "loss": 0.4479, + "step": 117 + }, + { + "epoch": 0.07167805618830676, + "grad_norm": 1.4036156057391842, + "learning_rate": 7.0909090909090916e-06, + "loss": 0.4278, + "step": 118 + }, + { + "epoch": 0.07228549734244495, + "grad_norm": 1.6693103514376786, + "learning_rate": 7.151515151515152e-06, + "loss": 0.4375, + "step": 119 + }, + { + "epoch": 0.07289293849658314, + "grad_norm": 1.8164935073923107, + "learning_rate": 7.212121212121212e-06, + "loss": 0.3987, + "step": 120 + }, + { + "epoch": 0.07350037965072134, + "grad_norm": 1.6026752209068431, + "learning_rate": 7.272727272727273e-06, + "loss": 0.4095, + "step": 121 + }, + { + "epoch": 0.07410782080485953, + "grad_norm": 1.652701468229036, + "learning_rate": 7.333333333333333e-06, + "loss": 0.4165, + "step": 122 + }, + { + "epoch": 0.07471526195899772, + "grad_norm": 1.5322103898261112, + "learning_rate": 7.393939393939395e-06, + "loss": 0.4534, + "step": 123 + }, + { + "epoch": 0.07532270311313591, + "grad_norm": 1.5253210647459732, + "learning_rate": 7.454545454545456e-06, + "loss": 0.4315, + "step": 124 + }, + { + "epoch": 0.07593014426727411, + "grad_norm": 1.774830478931673, + "learning_rate": 7.515151515151516e-06, + "loss": 0.4044, + "step": 125 + }, + { + "epoch": 0.0765375854214123, + "grad_norm": 1.887705476184069, + "learning_rate": 7.5757575757575764e-06, + "loss": 0.4153, + "step": 126 + }, + { + "epoch": 0.07714502657555049, + "grad_norm": 2.38958143214645, + "learning_rate": 7.636363636363638e-06, + "loss": 0.3912, + "step": 127 + }, + { + "epoch": 0.07775246772968869, + "grad_norm": 1.6032007121408827, + "learning_rate": 7.696969696969696e-06, + "loss": 0.3989, + "step": 128 + }, + { + "epoch": 0.07835990888382688, + "grad_norm": 1.3782945465938736, + "learning_rate": 7.757575757575758e-06, + "loss": 0.4357, + "step": 129 + }, + { + "epoch": 0.07896735003796507, + "grad_norm": 1.332527605688991, + "learning_rate": 7.81818181818182e-06, + "loss": 0.3616, + "step": 130 + }, + { + "epoch": 0.07957479119210327, + "grad_norm": 1.8131621493966852, + "learning_rate": 7.87878787878788e-06, + "loss": 0.4498, + "step": 131 + }, + { + "epoch": 0.08018223234624146, + "grad_norm": 4.745812951213747, + "learning_rate": 7.93939393939394e-06, + "loss": 0.4247, + "step": 132 + }, + { + "epoch": 0.08078967350037965, + "grad_norm": 2.294695141070265, + "learning_rate": 8.000000000000001e-06, + "loss": 0.3744, + "step": 133 + }, + { + "epoch": 0.08139711465451785, + "grad_norm": 1.6356983798797735, + "learning_rate": 8.060606060606061e-06, + "loss": 0.4373, + "step": 134 + }, + { + "epoch": 0.08200455580865604, + "grad_norm": 1.9911745307305873, + "learning_rate": 8.121212121212121e-06, + "loss": 0.4294, + "step": 135 + }, + { + "epoch": 0.08261199696279423, + "grad_norm": 1.3737076521844085, + "learning_rate": 8.181818181818183e-06, + "loss": 0.4106, + "step": 136 + }, + { + "epoch": 0.08321943811693243, + "grad_norm": 1.5919226800718935, + "learning_rate": 8.242424242424243e-06, + "loss": 0.4537, + "step": 137 + }, + { + "epoch": 0.08382687927107062, + "grad_norm": 1.4783198785793517, + "learning_rate": 8.303030303030305e-06, + "loss": 0.3788, + "step": 138 + }, + { + "epoch": 0.0844343204252088, + "grad_norm": 1.6410317800155916, + "learning_rate": 8.363636363636365e-06, + "loss": 0.3867, + "step": 139 + }, + { + "epoch": 0.085041761579347, + "grad_norm": 1.6735556190365386, + "learning_rate": 8.424242424242425e-06, + "loss": 0.4355, + "step": 140 + }, + { + "epoch": 0.0856492027334852, + "grad_norm": 1.5607650619109157, + "learning_rate": 8.484848484848486e-06, + "loss": 0.4044, + "step": 141 + }, + { + "epoch": 0.08625664388762339, + "grad_norm": 1.4477035531105342, + "learning_rate": 8.545454545454546e-06, + "loss": 0.4603, + "step": 142 + }, + { + "epoch": 0.08686408504176157, + "grad_norm": 2.315589018866609, + "learning_rate": 8.606060606060606e-06, + "loss": 0.3701, + "step": 143 + }, + { + "epoch": 0.08747152619589978, + "grad_norm": 1.282913362768846, + "learning_rate": 8.666666666666668e-06, + "loss": 0.4052, + "step": 144 + }, + { + "epoch": 0.08807896735003796, + "grad_norm": 1.441795861741466, + "learning_rate": 8.727272727272728e-06, + "loss": 0.3743, + "step": 145 + }, + { + "epoch": 0.08868640850417615, + "grad_norm": 1.6705399490739978, + "learning_rate": 8.787878787878788e-06, + "loss": 0.4156, + "step": 146 + }, + { + "epoch": 0.08929384965831436, + "grad_norm": 1.3022330139774325, + "learning_rate": 8.84848484848485e-06, + "loss": 0.403, + "step": 147 + }, + { + "epoch": 0.08990129081245254, + "grad_norm": 1.3289138423968723, + "learning_rate": 8.90909090909091e-06, + "loss": 0.4241, + "step": 148 + }, + { + "epoch": 0.09050873196659073, + "grad_norm": 1.9110745404266152, + "learning_rate": 8.969696969696971e-06, + "loss": 0.3843, + "step": 149 + }, + { + "epoch": 0.09111617312072894, + "grad_norm": 1.4735930505959485, + "learning_rate": 9.030303030303031e-06, + "loss": 0.3963, + "step": 150 + }, + { + "epoch": 0.09172361427486712, + "grad_norm": 1.542399354189347, + "learning_rate": 9.090909090909091e-06, + "loss": 0.3909, + "step": 151 + }, + { + "epoch": 0.09233105542900531, + "grad_norm": 3.7339191436590244, + "learning_rate": 9.151515151515153e-06, + "loss": 0.3864, + "step": 152 + }, + { + "epoch": 0.0929384965831435, + "grad_norm": 1.3925338316644487, + "learning_rate": 9.212121212121213e-06, + "loss": 0.3766, + "step": 153 + }, + { + "epoch": 0.0935459377372817, + "grad_norm": 1.4555873503122925, + "learning_rate": 9.272727272727273e-06, + "loss": 0.352, + "step": 154 + }, + { + "epoch": 0.09415337889141989, + "grad_norm": 1.5097093818651814, + "learning_rate": 9.333333333333334e-06, + "loss": 0.3938, + "step": 155 + }, + { + "epoch": 0.09476082004555808, + "grad_norm": 1.5316324465377855, + "learning_rate": 9.393939393939396e-06, + "loss": 0.3722, + "step": 156 + }, + { + "epoch": 0.09536826119969628, + "grad_norm": 1.3962340669474915, + "learning_rate": 9.454545454545456e-06, + "loss": 0.4142, + "step": 157 + }, + { + "epoch": 0.09597570235383447, + "grad_norm": 1.2746895088209749, + "learning_rate": 9.515151515151516e-06, + "loss": 0.3723, + "step": 158 + }, + { + "epoch": 0.09658314350797266, + "grad_norm": 1.818570241099621, + "learning_rate": 9.575757575757576e-06, + "loss": 0.4198, + "step": 159 + }, + { + "epoch": 0.09719058466211086, + "grad_norm": 1.2766595136162489, + "learning_rate": 9.636363636363638e-06, + "loss": 0.3832, + "step": 160 + }, + { + "epoch": 0.09779802581624905, + "grad_norm": 1.2291199390553869, + "learning_rate": 9.696969696969698e-06, + "loss": 0.3825, + "step": 161 + }, + { + "epoch": 0.09840546697038724, + "grad_norm": 1.316405861184458, + "learning_rate": 9.757575757575758e-06, + "loss": 0.3418, + "step": 162 + }, + { + "epoch": 0.09901290812452544, + "grad_norm": 1.9801803743837283, + "learning_rate": 9.81818181818182e-06, + "loss": 0.3554, + "step": 163 + }, + { + "epoch": 0.09962034927866363, + "grad_norm": 1.3735171287256052, + "learning_rate": 9.87878787878788e-06, + "loss": 0.4433, + "step": 164 + }, + { + "epoch": 0.10022779043280182, + "grad_norm": 1.548447451053059, + "learning_rate": 9.939393939393939e-06, + "loss": 0.3988, + "step": 165 + }, + { + "epoch": 0.10083523158694002, + "grad_norm": 1.9424963562307047, + "learning_rate": 1e-05, + "loss": 0.4318, + "step": 166 + }, + { + "epoch": 0.10144267274107821, + "grad_norm": 1.4796993703531827, + "learning_rate": 9.999988765773283e-06, + "loss": 0.3901, + "step": 167 + }, + { + "epoch": 0.1020501138952164, + "grad_norm": 2.718931345910638, + "learning_rate": 9.99995506314361e-06, + "loss": 0.3733, + "step": 168 + }, + { + "epoch": 0.10265755504935459, + "grad_norm": 1.5993151857382264, + "learning_rate": 9.999898892262433e-06, + "loss": 0.3702, + "step": 169 + }, + { + "epoch": 0.10326499620349279, + "grad_norm": 2.0474737975378914, + "learning_rate": 9.99982025338217e-06, + "loss": 0.3925, + "step": 170 + }, + { + "epoch": 0.10387243735763098, + "grad_norm": 1.36801258921155, + "learning_rate": 9.999719146856191e-06, + "loss": 0.3601, + "step": 171 + }, + { + "epoch": 0.10447987851176917, + "grad_norm": 2.62979787342245, + "learning_rate": 9.999595573138845e-06, + "loss": 0.3676, + "step": 172 + }, + { + "epoch": 0.10508731966590737, + "grad_norm": 1.651611235990868, + "learning_rate": 9.99944953278543e-06, + "loss": 0.3896, + "step": 173 + }, + { + "epoch": 0.10569476082004556, + "grad_norm": 1.537437823682991, + "learning_rate": 9.99928102645221e-06, + "loss": 0.3931, + "step": 174 + }, + { + "epoch": 0.10630220197418375, + "grad_norm": 1.483529775109398, + "learning_rate": 9.999090054896397e-06, + "loss": 0.4084, + "step": 175 + }, + { + "epoch": 0.10690964312832195, + "grad_norm": 1.846243435186876, + "learning_rate": 9.99887661897616e-06, + "loss": 0.4019, + "step": 176 + }, + { + "epoch": 0.10751708428246014, + "grad_norm": 1.9400365455774309, + "learning_rate": 9.998640719650609e-06, + "loss": 0.353, + "step": 177 + }, + { + "epoch": 0.10812452543659833, + "grad_norm": 1.6331047828876988, + "learning_rate": 9.99838235797981e-06, + "loss": 0.3934, + "step": 178 + }, + { + "epoch": 0.10873196659073653, + "grad_norm": 1.3201503553736853, + "learning_rate": 9.998101535124758e-06, + "loss": 0.3784, + "step": 179 + }, + { + "epoch": 0.10933940774487472, + "grad_norm": 1.5036507542871547, + "learning_rate": 9.997798252347382e-06, + "loss": 0.3829, + "step": 180 + }, + { + "epoch": 0.1099468488990129, + "grad_norm": 1.8873362444241242, + "learning_rate": 9.997472511010543e-06, + "loss": 0.3468, + "step": 181 + }, + { + "epoch": 0.1105542900531511, + "grad_norm": 1.7817085438588727, + "learning_rate": 9.99712431257802e-06, + "loss": 0.3942, + "step": 182 + }, + { + "epoch": 0.1111617312072893, + "grad_norm": 1.4579688480639608, + "learning_rate": 9.99675365861451e-06, + "loss": 0.3493, + "step": 183 + }, + { + "epoch": 0.11176917236142749, + "grad_norm": 1.3333541821120902, + "learning_rate": 9.996360550785619e-06, + "loss": 0.3748, + "step": 184 + }, + { + "epoch": 0.11237661351556567, + "grad_norm": 1.4462662425656174, + "learning_rate": 9.995944990857848e-06, + "loss": 0.3929, + "step": 185 + }, + { + "epoch": 0.11298405466970388, + "grad_norm": 2.8011059160646785, + "learning_rate": 9.9955069806986e-06, + "loss": 0.3832, + "step": 186 + }, + { + "epoch": 0.11359149582384206, + "grad_norm": 1.795646163613219, + "learning_rate": 9.995046522276152e-06, + "loss": 0.3726, + "step": 187 + }, + { + "epoch": 0.11419893697798025, + "grad_norm": 1.6154980564971173, + "learning_rate": 9.994563617659665e-06, + "loss": 0.4186, + "step": 188 + }, + { + "epoch": 0.11480637813211846, + "grad_norm": 4.098449801468239, + "learning_rate": 9.994058269019163e-06, + "loss": 0.3649, + "step": 189 + }, + { + "epoch": 0.11541381928625664, + "grad_norm": 2.0366760130819452, + "learning_rate": 9.993530478625524e-06, + "loss": 0.3889, + "step": 190 + }, + { + "epoch": 0.11602126044039483, + "grad_norm": 1.3683556243295762, + "learning_rate": 9.992980248850476e-06, + "loss": 0.3505, + "step": 191 + }, + { + "epoch": 0.11662870159453304, + "grad_norm": 1.631109364001529, + "learning_rate": 9.992407582166582e-06, + "loss": 0.3716, + "step": 192 + }, + { + "epoch": 0.11723614274867122, + "grad_norm": 1.2863914949680042, + "learning_rate": 9.99181248114723e-06, + "loss": 0.353, + "step": 193 + }, + { + "epoch": 0.11784358390280941, + "grad_norm": 2.3770367523137512, + "learning_rate": 9.991194948466615e-06, + "loss": 0.3212, + "step": 194 + }, + { + "epoch": 0.11845102505694761, + "grad_norm": 1.251810621948992, + "learning_rate": 9.990554986899745e-06, + "loss": 0.3383, + "step": 195 + }, + { + "epoch": 0.1190584662110858, + "grad_norm": 1.3642089088744158, + "learning_rate": 9.989892599322404e-06, + "loss": 0.4176, + "step": 196 + }, + { + "epoch": 0.11966590736522399, + "grad_norm": 1.6922372698689354, + "learning_rate": 9.98920778871116e-06, + "loss": 0.3655, + "step": 197 + }, + { + "epoch": 0.12027334851936218, + "grad_norm": 3.407996768283883, + "learning_rate": 9.988500558143337e-06, + "loss": 0.3844, + "step": 198 + }, + { + "epoch": 0.12088078967350038, + "grad_norm": 1.609402206597202, + "learning_rate": 9.987770910797014e-06, + "loss": 0.4128, + "step": 199 + }, + { + "epoch": 0.12148823082763857, + "grad_norm": 1.3140316757802264, + "learning_rate": 9.987018849950996e-06, + "loss": 0.3962, + "step": 200 + }, + { + "epoch": 0.12209567198177676, + "grad_norm": 1.7469090493735866, + "learning_rate": 9.986244378984817e-06, + "loss": 0.3691, + "step": 201 + }, + { + "epoch": 0.12270311313591496, + "grad_norm": 1.33925380250546, + "learning_rate": 9.985447501378706e-06, + "loss": 0.3566, + "step": 202 + }, + { + "epoch": 0.12331055429005315, + "grad_norm": 1.809637543111782, + "learning_rate": 9.984628220713587e-06, + "loss": 0.33, + "step": 203 + }, + { + "epoch": 0.12391799544419134, + "grad_norm": 8.810976676413983, + "learning_rate": 9.983786540671052e-06, + "loss": 0.3745, + "step": 204 + }, + { + "epoch": 0.12452543659832954, + "grad_norm": 1.5678141251242255, + "learning_rate": 9.98292246503335e-06, + "loss": 0.3863, + "step": 205 + }, + { + "epoch": 0.12513287775246773, + "grad_norm": 1.2444290855149283, + "learning_rate": 9.982035997683372e-06, + "loss": 0.3449, + "step": 206 + }, + { + "epoch": 0.12574031890660592, + "grad_norm": 1.262107824746014, + "learning_rate": 9.981127142604628e-06, + "loss": 0.3553, + "step": 207 + }, + { + "epoch": 0.1263477600607441, + "grad_norm": 2.261971867644215, + "learning_rate": 9.980195903881231e-06, + "loss": 0.3929, + "step": 208 + }, + { + "epoch": 0.1269552012148823, + "grad_norm": 1.8939164327645688, + "learning_rate": 9.979242285697878e-06, + "loss": 0.3894, + "step": 209 + }, + { + "epoch": 0.1275626423690205, + "grad_norm": 1.3109993122933488, + "learning_rate": 9.978266292339838e-06, + "loss": 0.3462, + "step": 210 + }, + { + "epoch": 0.1281700835231587, + "grad_norm": 1.2689263598262492, + "learning_rate": 9.97726792819292e-06, + "loss": 0.3756, + "step": 211 + }, + { + "epoch": 0.1287775246772969, + "grad_norm": 1.413911853511702, + "learning_rate": 9.976247197743465e-06, + "loss": 0.3517, + "step": 212 + }, + { + "epoch": 0.12938496583143508, + "grad_norm": 1.1477594568471046, + "learning_rate": 9.975204105578318e-06, + "loss": 0.3524, + "step": 213 + }, + { + "epoch": 0.12999240698557327, + "grad_norm": 1.3268983856893921, + "learning_rate": 9.974138656384815e-06, + "loss": 0.3703, + "step": 214 + }, + { + "epoch": 0.13059984813971146, + "grad_norm": 1.6085096632167164, + "learning_rate": 9.973050854950756e-06, + "loss": 0.4107, + "step": 215 + }, + { + "epoch": 0.13120728929384967, + "grad_norm": 1.2710644494087506, + "learning_rate": 9.97194070616438e-06, + "loss": 0.3704, + "step": 216 + }, + { + "epoch": 0.13181473044798786, + "grad_norm": 1.7007435080900732, + "learning_rate": 9.970808215014357e-06, + "loss": 0.3616, + "step": 217 + }, + { + "epoch": 0.13242217160212605, + "grad_norm": 2.8035141258704965, + "learning_rate": 9.969653386589749e-06, + "loss": 0.3476, + "step": 218 + }, + { + "epoch": 0.13302961275626424, + "grad_norm": 1.4530105606551262, + "learning_rate": 9.968476226079997e-06, + "loss": 0.3658, + "step": 219 + }, + { + "epoch": 0.13363705391040243, + "grad_norm": 1.365931702075457, + "learning_rate": 9.967276738774897e-06, + "loss": 0.3559, + "step": 220 + }, + { + "epoch": 0.13424449506454061, + "grad_norm": 2.4105147491908845, + "learning_rate": 9.966054930064577e-06, + "loss": 0.3464, + "step": 221 + }, + { + "epoch": 0.1348519362186788, + "grad_norm": 1.460590918478701, + "learning_rate": 9.964810805439464e-06, + "loss": 0.3835, + "step": 222 + }, + { + "epoch": 0.13545937737281702, + "grad_norm": 1.4592160813082191, + "learning_rate": 9.96354437049027e-06, + "loss": 0.3649, + "step": 223 + }, + { + "epoch": 0.1360668185269552, + "grad_norm": 1.3706484221475523, + "learning_rate": 9.962255630907964e-06, + "loss": 0.306, + "step": 224 + }, + { + "epoch": 0.1366742596810934, + "grad_norm": 1.9923965820088605, + "learning_rate": 9.96094459248374e-06, + "loss": 0.4094, + "step": 225 + }, + { + "epoch": 0.13728170083523158, + "grad_norm": 1.212563998181567, + "learning_rate": 9.959611261108999e-06, + "loss": 0.3601, + "step": 226 + }, + { + "epoch": 0.13788914198936977, + "grad_norm": 1.0692124626260124, + "learning_rate": 9.95825564277532e-06, + "loss": 0.3532, + "step": 227 + }, + { + "epoch": 0.13849658314350796, + "grad_norm": 1.1033511924540673, + "learning_rate": 9.956877743574437e-06, + "loss": 0.3384, + "step": 228 + }, + { + "epoch": 0.13910402429764618, + "grad_norm": 1.2707398286483838, + "learning_rate": 9.955477569698197e-06, + "loss": 0.3367, + "step": 229 + }, + { + "epoch": 0.13971146545178437, + "grad_norm": 1.405772249647566, + "learning_rate": 9.954055127438554e-06, + "loss": 0.3673, + "step": 230 + }, + { + "epoch": 0.14031890660592256, + "grad_norm": 1.8473504788016004, + "learning_rate": 9.952610423187516e-06, + "loss": 0.4095, + "step": 231 + }, + { + "epoch": 0.14092634776006074, + "grad_norm": 1.4016757128131039, + "learning_rate": 9.951143463437145e-06, + "loss": 0.3503, + "step": 232 + }, + { + "epoch": 0.14153378891419893, + "grad_norm": 1.3608962633812787, + "learning_rate": 9.949654254779499e-06, + "loss": 0.3897, + "step": 233 + }, + { + "epoch": 0.14214123006833712, + "grad_norm": 1.2731854733891155, + "learning_rate": 9.948142803906623e-06, + "loss": 0.3596, + "step": 234 + }, + { + "epoch": 0.1427486712224753, + "grad_norm": 2.744204155126872, + "learning_rate": 9.946609117610508e-06, + "loss": 0.3311, + "step": 235 + }, + { + "epoch": 0.14335611237661353, + "grad_norm": 1.4126443556826698, + "learning_rate": 9.94505320278307e-06, + "loss": 0.3417, + "step": 236 + }, + { + "epoch": 0.14396355353075171, + "grad_norm": 1.8771416206071374, + "learning_rate": 9.943475066416105e-06, + "loss": 0.3246, + "step": 237 + }, + { + "epoch": 0.1445709946848899, + "grad_norm": 1.662514482004372, + "learning_rate": 9.94187471560127e-06, + "loss": 0.336, + "step": 238 + }, + { + "epoch": 0.1451784358390281, + "grad_norm": 1.3252537752081341, + "learning_rate": 9.940252157530048e-06, + "loss": 0.3728, + "step": 239 + }, + { + "epoch": 0.14578587699316628, + "grad_norm": 1.4379575767599655, + "learning_rate": 9.938607399493714e-06, + "loss": 0.3349, + "step": 240 + }, + { + "epoch": 0.14639331814730447, + "grad_norm": 1.7085189808679144, + "learning_rate": 9.936940448883299e-06, + "loss": 0.3732, + "step": 241 + }, + { + "epoch": 0.14700075930144268, + "grad_norm": 1.3475867031172948, + "learning_rate": 9.935251313189564e-06, + "loss": 0.3614, + "step": 242 + }, + { + "epoch": 0.14760820045558087, + "grad_norm": 1.3757718265613308, + "learning_rate": 9.933540000002966e-06, + "loss": 0.3495, + "step": 243 + }, + { + "epoch": 0.14821564160971906, + "grad_norm": 2.5569808145548016, + "learning_rate": 9.931806517013612e-06, + "loss": 0.3846, + "step": 244 + }, + { + "epoch": 0.14882308276385725, + "grad_norm": 2.3675332462526724, + "learning_rate": 9.930050872011242e-06, + "loss": 0.3927, + "step": 245 + }, + { + "epoch": 0.14943052391799544, + "grad_norm": 1.257087872816816, + "learning_rate": 9.92827307288518e-06, + "loss": 0.347, + "step": 246 + }, + { + "epoch": 0.15003796507213363, + "grad_norm": 1.1999532829094337, + "learning_rate": 9.926473127624306e-06, + "loss": 0.3099, + "step": 247 + }, + { + "epoch": 0.15064540622627182, + "grad_norm": 1.3143658832484064, + "learning_rate": 9.924651044317017e-06, + "loss": 0.3476, + "step": 248 + }, + { + "epoch": 0.15125284738041003, + "grad_norm": 1.2916975919820823, + "learning_rate": 9.922806831151192e-06, + "loss": 0.3829, + "step": 249 + }, + { + "epoch": 0.15186028853454822, + "grad_norm": 1.3792160543399081, + "learning_rate": 9.920940496414153e-06, + "loss": 0.3414, + "step": 250 + }, + { + "epoch": 0.1524677296886864, + "grad_norm": 1.552844179932476, + "learning_rate": 9.919052048492633e-06, + "loss": 0.329, + "step": 251 + }, + { + "epoch": 0.1530751708428246, + "grad_norm": 1.2281468240128537, + "learning_rate": 9.917141495872733e-06, + "loss": 0.3112, + "step": 252 + }, + { + "epoch": 0.1536826119969628, + "grad_norm": 1.2302352301120831, + "learning_rate": 9.915208847139883e-06, + "loss": 0.3576, + "step": 253 + }, + { + "epoch": 0.15429005315110098, + "grad_norm": 1.618659315298819, + "learning_rate": 9.913254110978812e-06, + "loss": 0.3669, + "step": 254 + }, + { + "epoch": 0.1548974943052392, + "grad_norm": 1.8288474552445757, + "learning_rate": 9.911277296173498e-06, + "loss": 0.3572, + "step": 255 + }, + { + "epoch": 0.15550493545937738, + "grad_norm": 1.1302723791543823, + "learning_rate": 9.909278411607134e-06, + "loss": 0.3432, + "step": 256 + }, + { + "epoch": 0.15611237661351557, + "grad_norm": 1.1877481410718438, + "learning_rate": 9.90725746626209e-06, + "loss": 0.3096, + "step": 257 + }, + { + "epoch": 0.15671981776765376, + "grad_norm": 1.168952450453289, + "learning_rate": 9.90521446921987e-06, + "loss": 0.3067, + "step": 258 + }, + { + "epoch": 0.15732725892179195, + "grad_norm": 1.3157190721989047, + "learning_rate": 9.903149429661072e-06, + "loss": 0.3666, + "step": 259 + }, + { + "epoch": 0.15793470007593013, + "grad_norm": 1.1390654533947697, + "learning_rate": 9.90106235686534e-06, + "loss": 0.3506, + "step": 260 + }, + { + "epoch": 0.15854214123006835, + "grad_norm": 4.00051251257636, + "learning_rate": 9.89895326021134e-06, + "loss": 0.3249, + "step": 261 + }, + { + "epoch": 0.15914958238420654, + "grad_norm": 1.2234262044458446, + "learning_rate": 9.896822149176695e-06, + "loss": 0.3318, + "step": 262 + }, + { + "epoch": 0.15975702353834473, + "grad_norm": 1.3897253859010028, + "learning_rate": 9.894669033337962e-06, + "loss": 0.396, + "step": 263 + }, + { + "epoch": 0.16036446469248292, + "grad_norm": 1.7242712965980627, + "learning_rate": 9.892493922370575e-06, + "loss": 0.3188, + "step": 264 + }, + { + "epoch": 0.1609719058466211, + "grad_norm": 6.4297649243853225, + "learning_rate": 9.89029682604881e-06, + "loss": 0.3379, + "step": 265 + }, + { + "epoch": 0.1615793470007593, + "grad_norm": 1.1537471226413523, + "learning_rate": 9.888077754245741e-06, + "loss": 0.3493, + "step": 266 + }, + { + "epoch": 0.16218678815489748, + "grad_norm": 1.6875687617066326, + "learning_rate": 9.88583671693319e-06, + "loss": 0.3608, + "step": 267 + }, + { + "epoch": 0.1627942293090357, + "grad_norm": 1.2568745648537023, + "learning_rate": 9.883573724181683e-06, + "loss": 0.3795, + "step": 268 + }, + { + "epoch": 0.1634016704631739, + "grad_norm": 3.273702543597702, + "learning_rate": 9.881288786160413e-06, + "loss": 0.3669, + "step": 269 + }, + { + "epoch": 0.16400911161731208, + "grad_norm": 1.6681243106945143, + "learning_rate": 9.878981913137178e-06, + "loss": 0.3045, + "step": 270 + }, + { + "epoch": 0.16461655277145026, + "grad_norm": 1.3040049020132651, + "learning_rate": 9.87665311547836e-06, + "loss": 0.3748, + "step": 271 + }, + { + "epoch": 0.16522399392558845, + "grad_norm": 1.4186566442108688, + "learning_rate": 9.87430240364885e-06, + "loss": 0.317, + "step": 272 + }, + { + "epoch": 0.16583143507972664, + "grad_norm": 1.2927229360317918, + "learning_rate": 9.871929788212022e-06, + "loss": 0.3444, + "step": 273 + }, + { + "epoch": 0.16643887623386486, + "grad_norm": 1.2231558908365099, + "learning_rate": 9.869535279829674e-06, + "loss": 0.3606, + "step": 274 + }, + { + "epoch": 0.16704631738800305, + "grad_norm": 1.9640328612851339, + "learning_rate": 9.867118889261988e-06, + "loss": 0.3473, + "step": 275 + }, + { + "epoch": 0.16765375854214123, + "grad_norm": 1.627402530642274, + "learning_rate": 9.864680627367476e-06, + "loss": 0.3278, + "step": 276 + }, + { + "epoch": 0.16826119969627942, + "grad_norm": 1.2427133272428312, + "learning_rate": 9.862220505102933e-06, + "loss": 0.3521, + "step": 277 + }, + { + "epoch": 0.1688686408504176, + "grad_norm": 1.3565240591384948, + "learning_rate": 9.859738533523384e-06, + "loss": 0.319, + "step": 278 + }, + { + "epoch": 0.1694760820045558, + "grad_norm": 1.2107487526492342, + "learning_rate": 9.857234723782044e-06, + "loss": 0.3352, + "step": 279 + }, + { + "epoch": 0.170083523158694, + "grad_norm": 1.3180319945310117, + "learning_rate": 9.854709087130261e-06, + "loss": 0.3139, + "step": 280 + }, + { + "epoch": 0.1706909643128322, + "grad_norm": 3.2813293101863916, + "learning_rate": 9.852161634917463e-06, + "loss": 0.3349, + "step": 281 + }, + { + "epoch": 0.1712984054669704, + "grad_norm": 1.245803428057021, + "learning_rate": 9.849592378591113e-06, + "loss": 0.3077, + "step": 282 + }, + { + "epoch": 0.17190584662110858, + "grad_norm": 1.223977954013305, + "learning_rate": 9.847001329696653e-06, + "loss": 0.3069, + "step": 283 + }, + { + "epoch": 0.17251328777524677, + "grad_norm": 1.3069739682646992, + "learning_rate": 9.844388499877457e-06, + "loss": 0.3291, + "step": 284 + }, + { + "epoch": 0.17312072892938496, + "grad_norm": 1.131190253610025, + "learning_rate": 9.841753900874774e-06, + "loss": 0.3289, + "step": 285 + }, + { + "epoch": 0.17372817008352315, + "grad_norm": 1.34440810872577, + "learning_rate": 9.839097544527674e-06, + "loss": 0.3267, + "step": 286 + }, + { + "epoch": 0.17433561123766136, + "grad_norm": 1.3644537366382798, + "learning_rate": 9.836419442773004e-06, + "loss": 0.3443, + "step": 287 + }, + { + "epoch": 0.17494305239179955, + "grad_norm": 1.2569085833757287, + "learning_rate": 9.833719607645325e-06, + "loss": 0.3241, + "step": 288 + }, + { + "epoch": 0.17555049354593774, + "grad_norm": 1.2077036555513847, + "learning_rate": 9.830998051276858e-06, + "loss": 0.3541, + "step": 289 + }, + { + "epoch": 0.17615793470007593, + "grad_norm": 3.9279665228825187, + "learning_rate": 9.82825478589744e-06, + "loss": 0.3666, + "step": 290 + }, + { + "epoch": 0.17676537585421412, + "grad_norm": 1.3633793584504903, + "learning_rate": 9.825489823834454e-06, + "loss": 0.3162, + "step": 291 + }, + { + "epoch": 0.1773728170083523, + "grad_norm": 1.190852630500219, + "learning_rate": 9.822703177512783e-06, + "loss": 0.3281, + "step": 292 + }, + { + "epoch": 0.1779802581624905, + "grad_norm": 1.032612295087311, + "learning_rate": 9.819894859454756e-06, + "loss": 0.2902, + "step": 293 + }, + { + "epoch": 0.1785876993166287, + "grad_norm": 2.2666599075970058, + "learning_rate": 9.817064882280085e-06, + "loss": 0.3872, + "step": 294 + }, + { + "epoch": 0.1791951404707669, + "grad_norm": 1.5056944572723148, + "learning_rate": 9.814213258705813e-06, + "loss": 0.4009, + "step": 295 + }, + { + "epoch": 0.1798025816249051, + "grad_norm": 1.2750096010881427, + "learning_rate": 9.811340001546252e-06, + "loss": 0.335, + "step": 296 + }, + { + "epoch": 0.18041002277904328, + "grad_norm": 1.3167579603123851, + "learning_rate": 9.808445123712934e-06, + "loss": 0.3789, + "step": 297 + }, + { + "epoch": 0.18101746393318147, + "grad_norm": 1.2917298455538913, + "learning_rate": 9.805528638214543e-06, + "loss": 0.365, + "step": 298 + }, + { + "epoch": 0.18162490508731965, + "grad_norm": 1.2249270654309992, + "learning_rate": 9.802590558156863e-06, + "loss": 0.3267, + "step": 299 + }, + { + "epoch": 0.18223234624145787, + "grad_norm": 1.28147771791881, + "learning_rate": 9.799630896742716e-06, + "loss": 0.3258, + "step": 300 + }, + { + "epoch": 0.18283978739559606, + "grad_norm": 2.076161195627259, + "learning_rate": 9.796649667271905e-06, + "loss": 0.3588, + "step": 301 + }, + { + "epoch": 0.18344722854973425, + "grad_norm": 1.1215708430697366, + "learning_rate": 9.793646883141155e-06, + "loss": 0.32, + "step": 302 + }, + { + "epoch": 0.18405466970387244, + "grad_norm": 1.2637743993804484, + "learning_rate": 9.790622557844047e-06, + "loss": 0.3561, + "step": 303 + }, + { + "epoch": 0.18466211085801063, + "grad_norm": 2.4961674050461635, + "learning_rate": 9.787576704970965e-06, + "loss": 0.343, + "step": 304 + }, + { + "epoch": 0.1852695520121488, + "grad_norm": 1.8062025541980924, + "learning_rate": 9.784509338209026e-06, + "loss": 0.339, + "step": 305 + }, + { + "epoch": 0.185876993166287, + "grad_norm": 1.1705663644423028, + "learning_rate": 9.781420471342035e-06, + "loss": 0.3204, + "step": 306 + }, + { + "epoch": 0.18648443432042522, + "grad_norm": 1.3501675244896367, + "learning_rate": 9.778310118250397e-06, + "loss": 0.3598, + "step": 307 + }, + { + "epoch": 0.1870918754745634, + "grad_norm": 1.2093391302114258, + "learning_rate": 9.77517829291108e-06, + "loss": 0.3397, + "step": 308 + }, + { + "epoch": 0.1876993166287016, + "grad_norm": 1.3119917853957324, + "learning_rate": 9.772025009397538e-06, + "loss": 0.3291, + "step": 309 + }, + { + "epoch": 0.18830675778283978, + "grad_norm": 3.4574677223030217, + "learning_rate": 9.768850281879651e-06, + "loss": 0.3297, + "step": 310 + }, + { + "epoch": 0.18891419893697797, + "grad_norm": 1.3155763470245156, + "learning_rate": 9.765654124623664e-06, + "loss": 0.3317, + "step": 311 + }, + { + "epoch": 0.18952164009111616, + "grad_norm": 1.2868677150111685, + "learning_rate": 9.762436551992117e-06, + "loss": 0.3545, + "step": 312 + }, + { + "epoch": 0.19012908124525438, + "grad_norm": 1.2047620112147, + "learning_rate": 9.759197578443787e-06, + "loss": 0.3282, + "step": 313 + }, + { + "epoch": 0.19073652239939257, + "grad_norm": 1.341450099932963, + "learning_rate": 9.755937218533622e-06, + "loss": 0.348, + "step": 314 + }, + { + "epoch": 0.19134396355353075, + "grad_norm": 1.3549028797747085, + "learning_rate": 9.752655486912666e-06, + "loss": 0.3258, + "step": 315 + }, + { + "epoch": 0.19195140470766894, + "grad_norm": 1.2140305820046362, + "learning_rate": 9.74935239832801e-06, + "loss": 0.3441, + "step": 316 + }, + { + "epoch": 0.19255884586180713, + "grad_norm": 1.3240982666908445, + "learning_rate": 9.746027967622709e-06, + "loss": 0.3322, + "step": 317 + }, + { + "epoch": 0.19316628701594532, + "grad_norm": 1.1866493550648762, + "learning_rate": 9.742682209735727e-06, + "loss": 0.3387, + "step": 318 + }, + { + "epoch": 0.19377372817008354, + "grad_norm": 1.4708765807963506, + "learning_rate": 9.739315139701868e-06, + "loss": 0.3234, + "step": 319 + }, + { + "epoch": 0.19438116932422173, + "grad_norm": 1.0795492879673514, + "learning_rate": 9.735926772651703e-06, + "loss": 0.3182, + "step": 320 + }, + { + "epoch": 0.19498861047835991, + "grad_norm": 1.1995704100657156, + "learning_rate": 9.732517123811502e-06, + "loss": 0.3267, + "step": 321 + }, + { + "epoch": 0.1955960516324981, + "grad_norm": 1.2807722640717565, + "learning_rate": 9.729086208503174e-06, + "loss": 0.3439, + "step": 322 + }, + { + "epoch": 0.1962034927866363, + "grad_norm": 1.1439191799968789, + "learning_rate": 9.725634042144192e-06, + "loss": 0.3035, + "step": 323 + }, + { + "epoch": 0.19681093394077448, + "grad_norm": 1.123574240810596, + "learning_rate": 9.722160640247523e-06, + "loss": 0.3402, + "step": 324 + }, + { + "epoch": 0.19741837509491267, + "grad_norm": 1.9809732054403608, + "learning_rate": 9.71866601842156e-06, + "loss": 0.3596, + "step": 325 + }, + { + "epoch": 0.19802581624905088, + "grad_norm": 3.2675821795654474, + "learning_rate": 9.715150192370054e-06, + "loss": 0.3378, + "step": 326 + }, + { + "epoch": 0.19863325740318907, + "grad_norm": 1.1513389970747174, + "learning_rate": 9.71161317789204e-06, + "loss": 0.312, + "step": 327 + }, + { + "epoch": 0.19924069855732726, + "grad_norm": 1.0732590413444016, + "learning_rate": 9.708054990881763e-06, + "loss": 0.3028, + "step": 328 + }, + { + "epoch": 0.19984813971146545, + "grad_norm": 1.130054869998939, + "learning_rate": 9.70447564732862e-06, + "loss": 0.3161, + "step": 329 + }, + { + "epoch": 0.20045558086560364, + "grad_norm": 1.4279288053692063, + "learning_rate": 9.700875163317072e-06, + "loss": 0.3159, + "step": 330 + }, + { + "epoch": 0.20106302201974183, + "grad_norm": 1.4180293061625155, + "learning_rate": 9.69725355502658e-06, + "loss": 0.3555, + "step": 331 + }, + { + "epoch": 0.20167046317388004, + "grad_norm": 1.5620650901784414, + "learning_rate": 9.693610838731532e-06, + "loss": 0.3256, + "step": 332 + }, + { + "epoch": 0.20227790432801823, + "grad_norm": 1.3488433094850794, + "learning_rate": 9.689947030801168e-06, + "loss": 0.358, + "step": 333 + }, + { + "epoch": 0.20288534548215642, + "grad_norm": 1.2086835793396953, + "learning_rate": 9.686262147699507e-06, + "loss": 0.3648, + "step": 334 + }, + { + "epoch": 0.2034927866362946, + "grad_norm": 1.0080244863547254, + "learning_rate": 9.682556205985274e-06, + "loss": 0.3197, + "step": 335 + }, + { + "epoch": 0.2041002277904328, + "grad_norm": 1.0405594952124566, + "learning_rate": 9.678829222311827e-06, + "loss": 0.304, + "step": 336 + }, + { + "epoch": 0.204707668944571, + "grad_norm": 1.669499151030841, + "learning_rate": 9.675081213427076e-06, + "loss": 0.3282, + "step": 337 + }, + { + "epoch": 0.20531511009870917, + "grad_norm": 1.162339657589905, + "learning_rate": 9.671312196173413e-06, + "loss": 0.328, + "step": 338 + }, + { + "epoch": 0.2059225512528474, + "grad_norm": 1.1111055689988498, + "learning_rate": 9.667522187487635e-06, + "loss": 0.3352, + "step": 339 + }, + { + "epoch": 0.20652999240698558, + "grad_norm": 1.3779787045612117, + "learning_rate": 9.663711204400872e-06, + "loss": 0.3575, + "step": 340 + }, + { + "epoch": 0.20713743356112377, + "grad_norm": 1.6323496019886752, + "learning_rate": 9.659879264038499e-06, + "loss": 0.365, + "step": 341 + }, + { + "epoch": 0.20774487471526196, + "grad_norm": 1.493535511167361, + "learning_rate": 9.656026383620076e-06, + "loss": 0.3445, + "step": 342 + }, + { + "epoch": 0.20835231586940015, + "grad_norm": 1.3056823613349453, + "learning_rate": 9.65215258045925e-06, + "loss": 0.2948, + "step": 343 + }, + { + "epoch": 0.20895975702353833, + "grad_norm": 1.0670662511449958, + "learning_rate": 9.6482578719637e-06, + "loss": 0.3139, + "step": 344 + }, + { + "epoch": 0.20956719817767655, + "grad_norm": 1.3642861541498819, + "learning_rate": 9.644342275635036e-06, + "loss": 0.3015, + "step": 345 + }, + { + "epoch": 0.21017463933181474, + "grad_norm": 1.0747742911930387, + "learning_rate": 9.640405809068743e-06, + "loss": 0.3228, + "step": 346 + }, + { + "epoch": 0.21078208048595293, + "grad_norm": 1.1565608956431175, + "learning_rate": 9.636448489954077e-06, + "loss": 0.307, + "step": 347 + }, + { + "epoch": 0.21138952164009112, + "grad_norm": 1.195151098550731, + "learning_rate": 9.632470336074009e-06, + "loss": 0.3284, + "step": 348 + }, + { + "epoch": 0.2119969627942293, + "grad_norm": 1.1885220245152495, + "learning_rate": 9.628471365305134e-06, + "loss": 0.3437, + "step": 349 + }, + { + "epoch": 0.2126044039483675, + "grad_norm": 1.0344142275699475, + "learning_rate": 9.624451595617588e-06, + "loss": 0.3185, + "step": 350 + }, + { + "epoch": 0.21321184510250568, + "grad_norm": 1.2656391323297032, + "learning_rate": 9.620411045074972e-06, + "loss": 0.3626, + "step": 351 + }, + { + "epoch": 0.2138192862566439, + "grad_norm": 1.0752778164280428, + "learning_rate": 9.616349731834271e-06, + "loss": 0.3225, + "step": 352 + }, + { + "epoch": 0.2144267274107821, + "grad_norm": 1.2178645720798402, + "learning_rate": 9.612267674145772e-06, + "loss": 0.3534, + "step": 353 + }, + { + "epoch": 0.21503416856492027, + "grad_norm": 1.4072309869153488, + "learning_rate": 9.608164890352977e-06, + "loss": 0.3459, + "step": 354 + }, + { + "epoch": 0.21564160971905846, + "grad_norm": 1.1875602285502396, + "learning_rate": 9.604041398892528e-06, + "loss": 0.3288, + "step": 355 + }, + { + "epoch": 0.21624905087319665, + "grad_norm": 1.2188563316023242, + "learning_rate": 9.599897218294122e-06, + "loss": 0.3509, + "step": 356 + }, + { + "epoch": 0.21685649202733484, + "grad_norm": 1.1569315648201919, + "learning_rate": 9.595732367180422e-06, + "loss": 0.3173, + "step": 357 + }, + { + "epoch": 0.21746393318147306, + "grad_norm": 1.5018233135579402, + "learning_rate": 9.591546864266983e-06, + "loss": 0.3507, + "step": 358 + }, + { + "epoch": 0.21807137433561125, + "grad_norm": 1.0272557252775882, + "learning_rate": 9.58734072836216e-06, + "loss": 0.3001, + "step": 359 + }, + { + "epoch": 0.21867881548974943, + "grad_norm": 1.245040066414171, + "learning_rate": 9.583113978367026e-06, + "loss": 0.2957, + "step": 360 + }, + { + "epoch": 0.21928625664388762, + "grad_norm": 1.3275806437802142, + "learning_rate": 9.578866633275289e-06, + "loss": 0.3383, + "step": 361 + }, + { + "epoch": 0.2198936977980258, + "grad_norm": 1.1727768085477153, + "learning_rate": 9.574598712173202e-06, + "loss": 0.2735, + "step": 362 + }, + { + "epoch": 0.220501138952164, + "grad_norm": 1.22146926918798, + "learning_rate": 9.570310234239483e-06, + "loss": 0.3166, + "step": 363 + }, + { + "epoch": 0.2211085801063022, + "grad_norm": 1.2281212541536195, + "learning_rate": 9.56600121874523e-06, + "loss": 0.3249, + "step": 364 + }, + { + "epoch": 0.2217160212604404, + "grad_norm": 2.3299644415456036, + "learning_rate": 9.561671685053818e-06, + "loss": 0.3467, + "step": 365 + }, + { + "epoch": 0.2223234624145786, + "grad_norm": 1.1619894211736224, + "learning_rate": 9.557321652620839e-06, + "loss": 0.3077, + "step": 366 + }, + { + "epoch": 0.22293090356871678, + "grad_norm": 1.1786921246836153, + "learning_rate": 9.55295114099399e-06, + "loss": 0.3294, + "step": 367 + }, + { + "epoch": 0.22353834472285497, + "grad_norm": 1.1859186341534969, + "learning_rate": 9.548560169812997e-06, + "loss": 0.3167, + "step": 368 + }, + { + "epoch": 0.22414578587699316, + "grad_norm": 1.1441591110703015, + "learning_rate": 9.544148758809528e-06, + "loss": 0.3193, + "step": 369 + }, + { + "epoch": 0.22475322703113135, + "grad_norm": 1.1128313603192685, + "learning_rate": 9.539716927807102e-06, + "loss": 0.3093, + "step": 370 + }, + { + "epoch": 0.22536066818526956, + "grad_norm": 1.4910675750318487, + "learning_rate": 9.535264696720993e-06, + "loss": 0.3253, + "step": 371 + }, + { + "epoch": 0.22596810933940775, + "grad_norm": 1.189666585401165, + "learning_rate": 9.530792085558151e-06, + "loss": 0.3558, + "step": 372 + }, + { + "epoch": 0.22657555049354594, + "grad_norm": 1.2061368174942724, + "learning_rate": 9.526299114417108e-06, + "loss": 0.3253, + "step": 373 + }, + { + "epoch": 0.22718299164768413, + "grad_norm": 1.0468374477355344, + "learning_rate": 9.521785803487888e-06, + "loss": 0.3178, + "step": 374 + }, + { + "epoch": 0.22779043280182232, + "grad_norm": 1.2500589524530483, + "learning_rate": 9.517252173051912e-06, + "loss": 0.3066, + "step": 375 + }, + { + "epoch": 0.2283978739559605, + "grad_norm": 2.396102762626989, + "learning_rate": 9.512698243481914e-06, + "loss": 0.3087, + "step": 376 + }, + { + "epoch": 0.22900531511009872, + "grad_norm": 13.857607430818112, + "learning_rate": 9.508124035241843e-06, + "loss": 0.3001, + "step": 377 + }, + { + "epoch": 0.2296127562642369, + "grad_norm": 1.5821099485242913, + "learning_rate": 9.50352956888678e-06, + "loss": 0.3393, + "step": 378 + }, + { + "epoch": 0.2302201974183751, + "grad_norm": 1.2074450897701234, + "learning_rate": 9.498914865062831e-06, + "loss": 0.334, + "step": 379 + }, + { + "epoch": 0.2308276385725133, + "grad_norm": 1.1721133873450802, + "learning_rate": 9.49427994450705e-06, + "loss": 0.3285, + "step": 380 + }, + { + "epoch": 0.23143507972665148, + "grad_norm": 1.3430574212868829, + "learning_rate": 9.489624828047336e-06, + "loss": 0.3137, + "step": 381 + }, + { + "epoch": 0.23204252088078967, + "grad_norm": 1.5915750295490272, + "learning_rate": 9.484949536602343e-06, + "loss": 0.3505, + "step": 382 + }, + { + "epoch": 0.23264996203492785, + "grad_norm": 1.440309732703764, + "learning_rate": 9.480254091181385e-06, + "loss": 0.3441, + "step": 383 + }, + { + "epoch": 0.23325740318906607, + "grad_norm": 1.2699897467088066, + "learning_rate": 9.47553851288434e-06, + "loss": 0.328, + "step": 384 + }, + { + "epoch": 0.23386484434320426, + "grad_norm": 1.118564432611521, + "learning_rate": 9.470802822901558e-06, + "loss": 0.2914, + "step": 385 + }, + { + "epoch": 0.23447228549734245, + "grad_norm": 2.267610070857606, + "learning_rate": 9.466047042513767e-06, + "loss": 0.3194, + "step": 386 + }, + { + "epoch": 0.23507972665148064, + "grad_norm": 1.1933730736468051, + "learning_rate": 9.461271193091971e-06, + "loss": 0.3329, + "step": 387 + }, + { + "epoch": 0.23568716780561882, + "grad_norm": 1.2144519426747942, + "learning_rate": 9.45647529609736e-06, + "loss": 0.3295, + "step": 388 + }, + { + "epoch": 0.236294608959757, + "grad_norm": 1.3580165469389258, + "learning_rate": 9.451659373081214e-06, + "loss": 0.3447, + "step": 389 + }, + { + "epoch": 0.23690205011389523, + "grad_norm": 1.1100635616742127, + "learning_rate": 9.4468234456848e-06, + "loss": 0.3337, + "step": 390 + }, + { + "epoch": 0.23750949126803342, + "grad_norm": 1.3428354458732323, + "learning_rate": 9.44196753563928e-06, + "loss": 0.2838, + "step": 391 + }, + { + "epoch": 0.2381169324221716, + "grad_norm": 1.3921865014793011, + "learning_rate": 9.437091664765611e-06, + "loss": 0.3256, + "step": 392 + }, + { + "epoch": 0.2387243735763098, + "grad_norm": 1.0558795045496834, + "learning_rate": 9.43219585497445e-06, + "loss": 0.2924, + "step": 393 + }, + { + "epoch": 0.23933181473044798, + "grad_norm": 1.0852216391713467, + "learning_rate": 9.427280128266049e-06, + "loss": 0.3159, + "step": 394 + }, + { + "epoch": 0.23993925588458617, + "grad_norm": 1.2663888095002538, + "learning_rate": 9.422344506730168e-06, + "loss": 0.3223, + "step": 395 + }, + { + "epoch": 0.24054669703872436, + "grad_norm": 1.2598248186127823, + "learning_rate": 9.41738901254596e-06, + "loss": 0.3114, + "step": 396 + }, + { + "epoch": 0.24115413819286258, + "grad_norm": 1.2212737926468238, + "learning_rate": 9.412413667981884e-06, + "loss": 0.365, + "step": 397 + }, + { + "epoch": 0.24176157934700077, + "grad_norm": 1.1959984881869385, + "learning_rate": 9.4074184953956e-06, + "loss": 0.3723, + "step": 398 + }, + { + "epoch": 0.24236902050113895, + "grad_norm": 1.291676435173795, + "learning_rate": 9.402403517233867e-06, + "loss": 0.3455, + "step": 399 + }, + { + "epoch": 0.24297646165527714, + "grad_norm": 1.1960840171763427, + "learning_rate": 9.397368756032445e-06, + "loss": 0.3453, + "step": 400 + }, + { + "epoch": 0.24358390280941533, + "grad_norm": 1.0826676561819115, + "learning_rate": 9.392314234415999e-06, + "loss": 0.3047, + "step": 401 + }, + { + "epoch": 0.24419134396355352, + "grad_norm": 1.1755710795468963, + "learning_rate": 9.38723997509798e-06, + "loss": 0.313, + "step": 402 + }, + { + "epoch": 0.24479878511769174, + "grad_norm": 1.3499086111109924, + "learning_rate": 9.38214600088054e-06, + "loss": 0.3285, + "step": 403 + }, + { + "epoch": 0.24540622627182992, + "grad_norm": 1.3200873501528223, + "learning_rate": 9.37703233465443e-06, + "loss": 0.369, + "step": 404 + }, + { + "epoch": 0.2460136674259681, + "grad_norm": 1.3826162413858059, + "learning_rate": 9.371898999398876e-06, + "loss": 0.3527, + "step": 405 + }, + { + "epoch": 0.2466211085801063, + "grad_norm": 1.2278576280498825, + "learning_rate": 9.366746018181503e-06, + "loss": 0.3277, + "step": 406 + }, + { + "epoch": 0.2472285497342445, + "grad_norm": 1.1329545797723328, + "learning_rate": 9.361573414158215e-06, + "loss": 0.3229, + "step": 407 + }, + { + "epoch": 0.24783599088838268, + "grad_norm": 1.0477461146627898, + "learning_rate": 9.356381210573092e-06, + "loss": 0.2919, + "step": 408 + }, + { + "epoch": 0.24844343204252087, + "grad_norm": 2.4554214547877704, + "learning_rate": 9.351169430758293e-06, + "loss": 0.3438, + "step": 409 + }, + { + "epoch": 0.24905087319665908, + "grad_norm": 3.5352794640669685, + "learning_rate": 9.345938098133946e-06, + "loss": 0.3262, + "step": 410 + }, + { + "epoch": 0.24965831435079727, + "grad_norm": 2.021698412413005, + "learning_rate": 9.340687236208037e-06, + "loss": 0.3011, + "step": 411 + }, + { + "epoch": 0.25026575550493546, + "grad_norm": 2.2039087798756465, + "learning_rate": 9.33541686857632e-06, + "loss": 0.3663, + "step": 412 + }, + { + "epoch": 0.25087319665907365, + "grad_norm": 1.2067385283486605, + "learning_rate": 9.330127018922195e-06, + "loss": 0.3212, + "step": 413 + }, + { + "epoch": 0.25148063781321184, + "grad_norm": 1.2316558886892726, + "learning_rate": 9.324817711016609e-06, + "loss": 0.3419, + "step": 414 + }, + { + "epoch": 0.25208807896735, + "grad_norm": 1.4875274199476636, + "learning_rate": 9.31948896871795e-06, + "loss": 0.3348, + "step": 415 + }, + { + "epoch": 0.2526955201214882, + "grad_norm": 1.1115019195847107, + "learning_rate": 9.31414081597194e-06, + "loss": 0.3512, + "step": 416 + }, + { + "epoch": 0.2533029612756264, + "grad_norm": 1.2295268471367569, + "learning_rate": 9.30877327681152e-06, + "loss": 0.3503, + "step": 417 + }, + { + "epoch": 0.2539104024297646, + "grad_norm": 1.0786831844077154, + "learning_rate": 9.303386375356752e-06, + "loss": 0.3162, + "step": 418 + }, + { + "epoch": 0.25451784358390284, + "grad_norm": 1.0475351605214382, + "learning_rate": 9.297980135814706e-06, + "loss": 0.3103, + "step": 419 + }, + { + "epoch": 0.255125284738041, + "grad_norm": 1.0774726768233147, + "learning_rate": 9.292554582479349e-06, + "loss": 0.3187, + "step": 420 + }, + { + "epoch": 0.2557327258921792, + "grad_norm": 1.1341288746675326, + "learning_rate": 9.28710973973144e-06, + "loss": 0.3267, + "step": 421 + }, + { + "epoch": 0.2563401670463174, + "grad_norm": 1.7271345636020505, + "learning_rate": 9.281645632038417e-06, + "loss": 0.3329, + "step": 422 + }, + { + "epoch": 0.2569476082004556, + "grad_norm": 1.1865979526898363, + "learning_rate": 9.276162283954293e-06, + "loss": 0.3148, + "step": 423 + }, + { + "epoch": 0.2575550493545938, + "grad_norm": 1.050767524282803, + "learning_rate": 9.270659720119533e-06, + "loss": 0.3431, + "step": 424 + }, + { + "epoch": 0.25816249050873197, + "grad_norm": 1.196771620264985, + "learning_rate": 9.265137965260962e-06, + "loss": 0.3422, + "step": 425 + }, + { + "epoch": 0.25876993166287016, + "grad_norm": 1.0955500339958768, + "learning_rate": 9.259597044191635e-06, + "loss": 0.3195, + "step": 426 + }, + { + "epoch": 0.25937737281700834, + "grad_norm": 1.115960598799573, + "learning_rate": 9.254036981810741e-06, + "loss": 0.3238, + "step": 427 + }, + { + "epoch": 0.25998481397114653, + "grad_norm": 1.0005694364136173, + "learning_rate": 9.248457803103476e-06, + "loss": 0.309, + "step": 428 + }, + { + "epoch": 0.2605922551252847, + "grad_norm": 1.0726223391150986, + "learning_rate": 9.242859533140947e-06, + "loss": 0.3031, + "step": 429 + }, + { + "epoch": 0.2611996962794229, + "grad_norm": 1.1657341953685123, + "learning_rate": 9.237242197080045e-06, + "loss": 0.2901, + "step": 430 + }, + { + "epoch": 0.2618071374335611, + "grad_norm": 1.013927670160624, + "learning_rate": 9.231605820163343e-06, + "loss": 0.2932, + "step": 431 + }, + { + "epoch": 0.26241457858769934, + "grad_norm": 1.1498383770847378, + "learning_rate": 9.225950427718974e-06, + "loss": 0.3333, + "step": 432 + }, + { + "epoch": 0.26302201974183753, + "grad_norm": 1.2571593199046411, + "learning_rate": 9.220276045160524e-06, + "loss": 0.3098, + "step": 433 + }, + { + "epoch": 0.2636294608959757, + "grad_norm": 1.004792456164235, + "learning_rate": 9.21458269798691e-06, + "loss": 0.2914, + "step": 434 + }, + { + "epoch": 0.2642369020501139, + "grad_norm": 1.0509819009913075, + "learning_rate": 9.208870411782276e-06, + "loss": 0.3191, + "step": 435 + }, + { + "epoch": 0.2648443432042521, + "grad_norm": 1.0808764700945872, + "learning_rate": 9.203139212215868e-06, + "loss": 0.3397, + "step": 436 + }, + { + "epoch": 0.2654517843583903, + "grad_norm": 1.9069945576857954, + "learning_rate": 9.197389125041925e-06, + "loss": 0.3696, + "step": 437 + }, + { + "epoch": 0.2660592255125285, + "grad_norm": 0.9868084753868711, + "learning_rate": 9.191620176099559e-06, + "loss": 0.2926, + "step": 438 + }, + { + "epoch": 0.26666666666666666, + "grad_norm": 1.236174799741463, + "learning_rate": 9.185832391312644e-06, + "loss": 0.3532, + "step": 439 + }, + { + "epoch": 0.26727410782080485, + "grad_norm": 1.085032203623591, + "learning_rate": 9.180025796689692e-06, + "loss": 0.3313, + "step": 440 + }, + { + "epoch": 0.26788154897494304, + "grad_norm": 0.9582279787310294, + "learning_rate": 9.174200418323746e-06, + "loss": 0.2886, + "step": 441 + }, + { + "epoch": 0.26848899012908123, + "grad_norm": 0.9169437747174185, + "learning_rate": 9.168356282392253e-06, + "loss": 0.2921, + "step": 442 + }, + { + "epoch": 0.2690964312832194, + "grad_norm": 1.0775595515767766, + "learning_rate": 9.16249341515695e-06, + "loss": 0.3391, + "step": 443 + }, + { + "epoch": 0.2697038724373576, + "grad_norm": 1.0089375719934834, + "learning_rate": 9.156611842963753e-06, + "loss": 0.3159, + "step": 444 + }, + { + "epoch": 0.27031131359149585, + "grad_norm": 1.2417432186550288, + "learning_rate": 9.150711592242627e-06, + "loss": 0.3654, + "step": 445 + }, + { + "epoch": 0.27091875474563404, + "grad_norm": 1.0274792842367653, + "learning_rate": 9.144792689507471e-06, + "loss": 0.3107, + "step": 446 + }, + { + "epoch": 0.2715261958997722, + "grad_norm": 1.1641760376218897, + "learning_rate": 9.138855161356006e-06, + "loss": 0.3219, + "step": 447 + }, + { + "epoch": 0.2721336370539104, + "grad_norm": 1.038602346175002, + "learning_rate": 9.132899034469648e-06, + "loss": 0.3262, + "step": 448 + }, + { + "epoch": 0.2727410782080486, + "grad_norm": 1.0193170343688194, + "learning_rate": 9.126924335613385e-06, + "loss": 0.2947, + "step": 449 + }, + { + "epoch": 0.2733485193621868, + "grad_norm": 1.0412898987806527, + "learning_rate": 9.120931091635669e-06, + "loss": 0.2982, + "step": 450 + }, + { + "epoch": 0.273955960516325, + "grad_norm": 1.0086545373941327, + "learning_rate": 9.114919329468283e-06, + "loss": 0.3189, + "step": 451 + }, + { + "epoch": 0.27456340167046317, + "grad_norm": 0.9279048193591232, + "learning_rate": 9.108889076126226e-06, + "loss": 0.2653, + "step": 452 + }, + { + "epoch": 0.27517084282460136, + "grad_norm": 0.995776916618753, + "learning_rate": 9.102840358707594e-06, + "loss": 0.2785, + "step": 453 + }, + { + "epoch": 0.27577828397873955, + "grad_norm": 1.1209448523987642, + "learning_rate": 9.09677320439345e-06, + "loss": 0.3484, + "step": 454 + }, + { + "epoch": 0.27638572513287774, + "grad_norm": 1.0086795340037955, + "learning_rate": 9.090687640447709e-06, + "loss": 0.3039, + "step": 455 + }, + { + "epoch": 0.2769931662870159, + "grad_norm": 1.0635853354673632, + "learning_rate": 9.084583694217012e-06, + "loss": 0.3368, + "step": 456 + }, + { + "epoch": 0.2776006074411541, + "grad_norm": 1.3855504059825723, + "learning_rate": 9.07846139313061e-06, + "loss": 0.3416, + "step": 457 + }, + { + "epoch": 0.27820804859529236, + "grad_norm": 1.0574619979291167, + "learning_rate": 9.072320764700223e-06, + "loss": 0.2921, + "step": 458 + }, + { + "epoch": 0.27881548974943055, + "grad_norm": 0.9945293438865139, + "learning_rate": 9.066161836519942e-06, + "loss": 0.2738, + "step": 459 + }, + { + "epoch": 0.27942293090356873, + "grad_norm": 1.0200515276803628, + "learning_rate": 9.059984636266082e-06, + "loss": 0.3244, + "step": 460 + }, + { + "epoch": 0.2800303720577069, + "grad_norm": 0.9552331626040478, + "learning_rate": 9.053789191697072e-06, + "loss": 0.2867, + "step": 461 + }, + { + "epoch": 0.2806378132118451, + "grad_norm": 0.9682413028760446, + "learning_rate": 9.047575530653324e-06, + "loss": 0.2914, + "step": 462 + }, + { + "epoch": 0.2812452543659833, + "grad_norm": 1.0309908640635839, + "learning_rate": 9.041343681057106e-06, + "loss": 0.2882, + "step": 463 + }, + { + "epoch": 0.2818526955201215, + "grad_norm": 1.038796502390337, + "learning_rate": 9.035093670912424e-06, + "loss": 0.2814, + "step": 464 + }, + { + "epoch": 0.2824601366742597, + "grad_norm": 1.1394035890796435, + "learning_rate": 9.028825528304892e-06, + "loss": 0.3444, + "step": 465 + }, + { + "epoch": 0.28306757782839786, + "grad_norm": 1.4464664575484871, + "learning_rate": 9.022539281401601e-06, + "loss": 0.3403, + "step": 466 + }, + { + "epoch": 0.28367501898253605, + "grad_norm": 1.725019753808846, + "learning_rate": 9.016234958451002e-06, + "loss": 0.3225, + "step": 467 + }, + { + "epoch": 0.28428246013667424, + "grad_norm": 1.390207557379254, + "learning_rate": 9.009912587782772e-06, + "loss": 0.2972, + "step": 468 + }, + { + "epoch": 0.28488990129081243, + "grad_norm": 1.1656375132150465, + "learning_rate": 9.00357219780769e-06, + "loss": 0.3025, + "step": 469 + }, + { + "epoch": 0.2854973424449506, + "grad_norm": 2.458767731063712, + "learning_rate": 8.997213817017508e-06, + "loss": 0.3368, + "step": 470 + }, + { + "epoch": 0.28610478359908886, + "grad_norm": 1.0665249956470793, + "learning_rate": 8.990837473984818e-06, + "loss": 0.3208, + "step": 471 + }, + { + "epoch": 0.28671222475322705, + "grad_norm": 1.1467648154057113, + "learning_rate": 8.984443197362938e-06, + "loss": 0.2963, + "step": 472 + }, + { + "epoch": 0.28731966590736524, + "grad_norm": 1.188817694944298, + "learning_rate": 8.978031015885767e-06, + "loss": 0.3049, + "step": 473 + }, + { + "epoch": 0.28792710706150343, + "grad_norm": 0.9664646995536351, + "learning_rate": 8.971600958367668e-06, + "loss": 0.2873, + "step": 474 + }, + { + "epoch": 0.2885345482156416, + "grad_norm": 1.0103760175347862, + "learning_rate": 8.965153053703325e-06, + "loss": 0.2933, + "step": 475 + }, + { + "epoch": 0.2891419893697798, + "grad_norm": 0.969093412655753, + "learning_rate": 8.958687330867634e-06, + "loss": 0.3211, + "step": 476 + }, + { + "epoch": 0.289749430523918, + "grad_norm": 1.025864636561286, + "learning_rate": 8.952203818915548e-06, + "loss": 0.3216, + "step": 477 + }, + { + "epoch": 0.2903568716780562, + "grad_norm": 1.109916705902771, + "learning_rate": 8.94570254698197e-06, + "loss": 0.2862, + "step": 478 + }, + { + "epoch": 0.29096431283219437, + "grad_norm": 0.9713323047580559, + "learning_rate": 8.939183544281597e-06, + "loss": 0.3105, + "step": 479 + }, + { + "epoch": 0.29157175398633256, + "grad_norm": 1.081777308673117, + "learning_rate": 8.932646840108818e-06, + "loss": 0.3272, + "step": 480 + }, + { + "epoch": 0.29217919514047075, + "grad_norm": 0.967596641805163, + "learning_rate": 8.926092463837557e-06, + "loss": 0.2761, + "step": 481 + }, + { + "epoch": 0.29278663629460894, + "grad_norm": 1.155987695983836, + "learning_rate": 8.919520444921153e-06, + "loss": 0.3064, + "step": 482 + }, + { + "epoch": 0.2933940774487471, + "grad_norm": 1.3696382864069947, + "learning_rate": 8.912930812892228e-06, + "loss": 0.2865, + "step": 483 + }, + { + "epoch": 0.29400151860288537, + "grad_norm": 1.0400446945814443, + "learning_rate": 8.906323597362547e-06, + "loss": 0.2686, + "step": 484 + }, + { + "epoch": 0.29460895975702356, + "grad_norm": 1.0325058068789128, + "learning_rate": 8.899698828022895e-06, + "loss": 0.2879, + "step": 485 + }, + { + "epoch": 0.29521640091116175, + "grad_norm": 1.0137176891852828, + "learning_rate": 8.893056534642938e-06, + "loss": 0.3086, + "step": 486 + }, + { + "epoch": 0.29582384206529994, + "grad_norm": 1.2122910237107214, + "learning_rate": 8.886396747071085e-06, + "loss": 0.3277, + "step": 487 + }, + { + "epoch": 0.2964312832194381, + "grad_norm": 1.073984642104128, + "learning_rate": 8.879719495234363e-06, + "loss": 0.3181, + "step": 488 + }, + { + "epoch": 0.2970387243735763, + "grad_norm": 1.095543075839678, + "learning_rate": 8.873024809138272e-06, + "loss": 0.3102, + "step": 489 + }, + { + "epoch": 0.2976461655277145, + "grad_norm": 1.01242570236515, + "learning_rate": 8.866312718866669e-06, + "loss": 0.2998, + "step": 490 + }, + { + "epoch": 0.2982536066818527, + "grad_norm": 0.9488282118596906, + "learning_rate": 8.859583254581604e-06, + "loss": 0.3099, + "step": 491 + }, + { + "epoch": 0.2988610478359909, + "grad_norm": 0.9722813663051023, + "learning_rate": 8.852836446523213e-06, + "loss": 0.3386, + "step": 492 + }, + { + "epoch": 0.29946848899012907, + "grad_norm": 1.0326428240517584, + "learning_rate": 8.846072325009562e-06, + "loss": 0.2987, + "step": 493 + }, + { + "epoch": 0.30007593014426726, + "grad_norm": 1.0268489993356678, + "learning_rate": 8.83929092043652e-06, + "loss": 0.3282, + "step": 494 + }, + { + "epoch": 0.30068337129840544, + "grad_norm": 1.0504404390194961, + "learning_rate": 8.832492263277624e-06, + "loss": 0.331, + "step": 495 + }, + { + "epoch": 0.30129081245254363, + "grad_norm": 1.0223568612198137, + "learning_rate": 8.825676384083936e-06, + "loss": 0.3073, + "step": 496 + }, + { + "epoch": 0.3018982536066819, + "grad_norm": 0.9798177174047413, + "learning_rate": 8.818843313483907e-06, + "loss": 0.2886, + "step": 497 + }, + { + "epoch": 0.30250569476082007, + "grad_norm": 1.0532247166652768, + "learning_rate": 8.811993082183243e-06, + "loss": 0.2974, + "step": 498 + }, + { + "epoch": 0.30311313591495825, + "grad_norm": 0.975121772940988, + "learning_rate": 8.805125720964766e-06, + "loss": 0.2997, + "step": 499 + }, + { + "epoch": 0.30372057706909644, + "grad_norm": 1.137742002586217, + "learning_rate": 8.798241260688273e-06, + "loss": 0.3188, + "step": 500 + }, + { + "epoch": 0.30432801822323463, + "grad_norm": 2.779671078767294, + "learning_rate": 8.791339732290398e-06, + "loss": 0.3156, + "step": 501 + }, + { + "epoch": 0.3049354593773728, + "grad_norm": 1.2822627424235575, + "learning_rate": 8.784421166784476e-06, + "loss": 0.3006, + "step": 502 + }, + { + "epoch": 0.305542900531511, + "grad_norm": 0.9483433674131612, + "learning_rate": 8.7774855952604e-06, + "loss": 0.2795, + "step": 503 + }, + { + "epoch": 0.3061503416856492, + "grad_norm": 1.0109525806584068, + "learning_rate": 8.770533048884483e-06, + "loss": 0.3045, + "step": 504 + }, + { + "epoch": 0.3067577828397874, + "grad_norm": 0.9625557196195801, + "learning_rate": 8.763563558899317e-06, + "loss": 0.2759, + "step": 505 + }, + { + "epoch": 0.3073652239939256, + "grad_norm": 1.0453305774145718, + "learning_rate": 8.756577156623636e-06, + "loss": 0.3117, + "step": 506 + }, + { + "epoch": 0.30797266514806376, + "grad_norm": 0.8875823603586328, + "learning_rate": 8.749573873452169e-06, + "loss": 0.2716, + "step": 507 + }, + { + "epoch": 0.30858010630220195, + "grad_norm": 0.968540908249718, + "learning_rate": 8.742553740855507e-06, + "loss": 0.2851, + "step": 508 + }, + { + "epoch": 0.30918754745634014, + "grad_norm": 0.9903789827417551, + "learning_rate": 8.735516790379952e-06, + "loss": 0.2897, + "step": 509 + }, + { + "epoch": 0.3097949886104784, + "grad_norm": 1.0711211077943226, + "learning_rate": 8.728463053647382e-06, + "loss": 0.2584, + "step": 510 + }, + { + "epoch": 0.31040242976461657, + "grad_norm": 1.0473520524330442, + "learning_rate": 8.721392562355113e-06, + "loss": 0.3144, + "step": 511 + }, + { + "epoch": 0.31100987091875476, + "grad_norm": 1.0396429716589337, + "learning_rate": 8.71430534827574e-06, + "loss": 0.3046, + "step": 512 + }, + { + "epoch": 0.31161731207289295, + "grad_norm": 2.22911229260033, + "learning_rate": 8.707201443257015e-06, + "loss": 0.3096, + "step": 513 + }, + { + "epoch": 0.31222475322703114, + "grad_norm": 1.0461555035207433, + "learning_rate": 8.700080879221689e-06, + "loss": 0.3344, + "step": 514 + }, + { + "epoch": 0.3128321943811693, + "grad_norm": 1.0243099223034418, + "learning_rate": 8.692943688167371e-06, + "loss": 0.3317, + "step": 515 + }, + { + "epoch": 0.3134396355353075, + "grad_norm": 1.0001264307440594, + "learning_rate": 8.685789902166395e-06, + "loss": 0.3035, + "step": 516 + }, + { + "epoch": 0.3140470766894457, + "grad_norm": 1.067152048322313, + "learning_rate": 8.67861955336566e-06, + "loss": 0.305, + "step": 517 + }, + { + "epoch": 0.3146545178435839, + "grad_norm": 1.030162408720675, + "learning_rate": 8.671432673986493e-06, + "loss": 0.3161, + "step": 518 + }, + { + "epoch": 0.3152619589977221, + "grad_norm": 0.9760695402458939, + "learning_rate": 8.664229296324514e-06, + "loss": 0.3157, + "step": 519 + }, + { + "epoch": 0.31586940015186027, + "grad_norm": 0.9705147238493051, + "learning_rate": 8.657009452749466e-06, + "loss": 0.3048, + "step": 520 + }, + { + "epoch": 0.31647684130599846, + "grad_norm": 0.8938796766666607, + "learning_rate": 8.649773175705099e-06, + "loss": 0.2668, + "step": 521 + }, + { + "epoch": 0.3170842824601367, + "grad_norm": 1.011233752497999, + "learning_rate": 8.642520497709001e-06, + "loss": 0.3098, + "step": 522 + }, + { + "epoch": 0.3176917236142749, + "grad_norm": 1.137929526930306, + "learning_rate": 8.635251451352463e-06, + "loss": 0.3015, + "step": 523 + }, + { + "epoch": 0.3182991647684131, + "grad_norm": 1.0130770256069037, + "learning_rate": 8.627966069300332e-06, + "loss": 0.3245, + "step": 524 + }, + { + "epoch": 0.31890660592255127, + "grad_norm": 1.1354507509321872, + "learning_rate": 8.620664384290863e-06, + "loss": 0.3039, + "step": 525 + }, + { + "epoch": 0.31951404707668946, + "grad_norm": 1.2939991366637584, + "learning_rate": 8.613346429135567e-06, + "loss": 0.3078, + "step": 526 + }, + { + "epoch": 0.32012148823082764, + "grad_norm": 1.1684109323065632, + "learning_rate": 8.606012236719073e-06, + "loss": 0.3385, + "step": 527 + }, + { + "epoch": 0.32072892938496583, + "grad_norm": 1.9632990216219541, + "learning_rate": 8.598661839998972e-06, + "loss": 0.2775, + "step": 528 + }, + { + "epoch": 0.321336370539104, + "grad_norm": 1.0999059016474897, + "learning_rate": 8.591295272005674e-06, + "loss": 0.2942, + "step": 529 + }, + { + "epoch": 0.3219438116932422, + "grad_norm": 1.1924126987732822, + "learning_rate": 8.583912565842258e-06, + "loss": 0.2957, + "step": 530 + }, + { + "epoch": 0.3225512528473804, + "grad_norm": 0.9830056635559057, + "learning_rate": 8.576513754684318e-06, + "loss": 0.2871, + "step": 531 + }, + { + "epoch": 0.3231586940015186, + "grad_norm": 1.003723254647249, + "learning_rate": 8.569098871779828e-06, + "loss": 0.3159, + "step": 532 + }, + { + "epoch": 0.3237661351556568, + "grad_norm": 1.1546050485627521, + "learning_rate": 8.561667950448973e-06, + "loss": 0.3274, + "step": 533 + }, + { + "epoch": 0.32437357630979496, + "grad_norm": 7.898389008169903, + "learning_rate": 8.554221024084019e-06, + "loss": 0.2923, + "step": 534 + }, + { + "epoch": 0.3249810174639332, + "grad_norm": 1.066007060725776, + "learning_rate": 8.546758126149148e-06, + "loss": 0.3172, + "step": 535 + }, + { + "epoch": 0.3255884586180714, + "grad_norm": 1.0571265266645584, + "learning_rate": 8.539279290180315e-06, + "loss": 0.3294, + "step": 536 + }, + { + "epoch": 0.3261958997722096, + "grad_norm": 1.1006676423033956, + "learning_rate": 8.531784549785098e-06, + "loss": 0.3524, + "step": 537 + }, + { + "epoch": 0.3268033409263478, + "grad_norm": 1.1991261856518842, + "learning_rate": 8.524273938642539e-06, + "loss": 0.3158, + "step": 538 + }, + { + "epoch": 0.32741078208048596, + "grad_norm": 1.0695433507748728, + "learning_rate": 8.516747490503001e-06, + "loss": 0.3318, + "step": 539 + }, + { + "epoch": 0.32801822323462415, + "grad_norm": 1.009828314645369, + "learning_rate": 8.509205239188017e-06, + "loss": 0.3034, + "step": 540 + }, + { + "epoch": 0.32862566438876234, + "grad_norm": 1.0614942016734776, + "learning_rate": 8.501647218590127e-06, + "loss": 0.3249, + "step": 541 + }, + { + "epoch": 0.32923310554290053, + "grad_norm": 1.0535816660890713, + "learning_rate": 8.494073462672743e-06, + "loss": 0.3245, + "step": 542 + }, + { + "epoch": 0.3298405466970387, + "grad_norm": 1.1116235063360127, + "learning_rate": 8.486484005469977e-06, + "loss": 0.3111, + "step": 543 + }, + { + "epoch": 0.3304479878511769, + "grad_norm": 1.2499481577520084, + "learning_rate": 8.478878881086505e-06, + "loss": 0.2774, + "step": 544 + }, + { + "epoch": 0.3310554290053151, + "grad_norm": 1.2434383006954242, + "learning_rate": 8.471258123697403e-06, + "loss": 0.3591, + "step": 545 + }, + { + "epoch": 0.3316628701594533, + "grad_norm": 2.629111128401328, + "learning_rate": 8.463621767547998e-06, + "loss": 0.2964, + "step": 546 + }, + { + "epoch": 0.33227031131359147, + "grad_norm": 1.0085205462907783, + "learning_rate": 8.455969846953711e-06, + "loss": 0.2782, + "step": 547 + }, + { + "epoch": 0.3328777524677297, + "grad_norm": 1.0091177434087597, + "learning_rate": 8.448302396299906e-06, + "loss": 0.2923, + "step": 548 + }, + { + "epoch": 0.3334851936218679, + "grad_norm": 0.9548710150038249, + "learning_rate": 8.440619450041736e-06, + "loss": 0.256, + "step": 549 + }, + { + "epoch": 0.3340926347760061, + "grad_norm": 1.0061487620910332, + "learning_rate": 8.432921042703985e-06, + "loss": 0.2978, + "step": 550 + }, + { + "epoch": 0.3347000759301443, + "grad_norm": 1.0205034766401575, + "learning_rate": 8.425207208880914e-06, + "loss": 0.3307, + "step": 551 + }, + { + "epoch": 0.33530751708428247, + "grad_norm": 0.9995329253562294, + "learning_rate": 8.417477983236107e-06, + "loss": 0.3149, + "step": 552 + }, + { + "epoch": 0.33591495823842066, + "grad_norm": 0.9454465727784129, + "learning_rate": 8.409733400502311e-06, + "loss": 0.3152, + "step": 553 + }, + { + "epoch": 0.33652239939255885, + "grad_norm": 1.1810082686737633, + "learning_rate": 8.401973495481289e-06, + "loss": 0.2706, + "step": 554 + }, + { + "epoch": 0.33712984054669703, + "grad_norm": 0.8806178339494458, + "learning_rate": 8.39419830304365e-06, + "loss": 0.2847, + "step": 555 + }, + { + "epoch": 0.3377372817008352, + "grad_norm": 1.1386249132431194, + "learning_rate": 8.386407858128707e-06, + "loss": 0.2851, + "step": 556 + }, + { + "epoch": 0.3383447228549734, + "grad_norm": 1.028806142818622, + "learning_rate": 8.378602195744308e-06, + "loss": 0.3078, + "step": 557 + }, + { + "epoch": 0.3389521640091116, + "grad_norm": 1.0457874244613665, + "learning_rate": 8.370781350966683e-06, + "loss": 0.3397, + "step": 558 + }, + { + "epoch": 0.3395596051632498, + "grad_norm": 0.8684256285368641, + "learning_rate": 8.362945358940295e-06, + "loss": 0.2842, + "step": 559 + }, + { + "epoch": 0.340167046317388, + "grad_norm": 0.9167915425454639, + "learning_rate": 8.355094254877665e-06, + "loss": 0.2658, + "step": 560 + }, + { + "epoch": 0.3407744874715262, + "grad_norm": 1.0174829618995838, + "learning_rate": 8.347228074059227e-06, + "loss": 0.3266, + "step": 561 + }, + { + "epoch": 0.3413819286256644, + "grad_norm": 0.960163727054609, + "learning_rate": 8.339346851833163e-06, + "loss": 0.2889, + "step": 562 + }, + { + "epoch": 0.3419893697798026, + "grad_norm": 0.9190612974975497, + "learning_rate": 8.33145062361525e-06, + "loss": 0.2993, + "step": 563 + }, + { + "epoch": 0.3425968109339408, + "grad_norm": 1.260688931755127, + "learning_rate": 8.323539424888695e-06, + "loss": 0.3011, + "step": 564 + }, + { + "epoch": 0.343204252088079, + "grad_norm": 0.8862857315713791, + "learning_rate": 8.315613291203977e-06, + "loss": 0.2745, + "step": 565 + }, + { + "epoch": 0.34381169324221716, + "grad_norm": 1.534384818793313, + "learning_rate": 8.30767225817869e-06, + "loss": 0.3015, + "step": 566 + }, + { + "epoch": 0.34441913439635535, + "grad_norm": 1.0421030425662317, + "learning_rate": 8.299716361497377e-06, + "loss": 0.2937, + "step": 567 + }, + { + "epoch": 0.34502657555049354, + "grad_norm": 1.078821559929711, + "learning_rate": 8.291745636911382e-06, + "loss": 0.3104, + "step": 568 + }, + { + "epoch": 0.34563401670463173, + "grad_norm": 1.0058600245745741, + "learning_rate": 8.283760120238672e-06, + "loss": 0.3077, + "step": 569 + }, + { + "epoch": 0.3462414578587699, + "grad_norm": 0.8608607521318042, + "learning_rate": 8.27575984736369e-06, + "loss": 0.2701, + "step": 570 + }, + { + "epoch": 0.3468488990129081, + "grad_norm": 1.0527010355534043, + "learning_rate": 8.26774485423719e-06, + "loss": 0.3196, + "step": 571 + }, + { + "epoch": 0.3474563401670463, + "grad_norm": 0.92875430774757, + "learning_rate": 8.259715176876069e-06, + "loss": 0.2782, + "step": 572 + }, + { + "epoch": 0.3480637813211845, + "grad_norm": 1.3921020583644004, + "learning_rate": 8.251670851363214e-06, + "loss": 0.3346, + "step": 573 + }, + { + "epoch": 0.34867122247532273, + "grad_norm": 0.9590626440697687, + "learning_rate": 8.243611913847337e-06, + "loss": 0.2824, + "step": 574 + }, + { + "epoch": 0.3492786636294609, + "grad_norm": 0.9520186243816708, + "learning_rate": 8.235538400542809e-06, + "loss": 0.27, + "step": 575 + }, + { + "epoch": 0.3498861047835991, + "grad_norm": 0.9591343436311952, + "learning_rate": 8.2274503477295e-06, + "loss": 0.2719, + "step": 576 + }, + { + "epoch": 0.3504935459377373, + "grad_norm": 1.0279410178241586, + "learning_rate": 8.21934779175262e-06, + "loss": 0.3191, + "step": 577 + }, + { + "epoch": 0.3511009870918755, + "grad_norm": 0.941703751218152, + "learning_rate": 8.211230769022552e-06, + "loss": 0.302, + "step": 578 + }, + { + "epoch": 0.35170842824601367, + "grad_norm": 0.9536297735317896, + "learning_rate": 8.203099316014679e-06, + "loss": 0.3236, + "step": 579 + }, + { + "epoch": 0.35231586940015186, + "grad_norm": 1.0851175895827145, + "learning_rate": 8.19495346926924e-06, + "loss": 0.3174, + "step": 580 + }, + { + "epoch": 0.35292331055429005, + "grad_norm": 0.9375847401656001, + "learning_rate": 8.18679326539115e-06, + "loss": 0.2984, + "step": 581 + }, + { + "epoch": 0.35353075170842824, + "grad_norm": 1.2513870142868544, + "learning_rate": 8.178618741049841e-06, + "loss": 0.3142, + "step": 582 + }, + { + "epoch": 0.3541381928625664, + "grad_norm": 1.0326942039827636, + "learning_rate": 8.170429932979097e-06, + "loss": 0.3118, + "step": 583 + }, + { + "epoch": 0.3547456340167046, + "grad_norm": 0.97117910690105, + "learning_rate": 8.162226877976886e-06, + "loss": 0.3114, + "step": 584 + }, + { + "epoch": 0.3553530751708428, + "grad_norm": 1.105458493777876, + "learning_rate": 8.154009612905205e-06, + "loss": 0.3252, + "step": 585 + }, + { + "epoch": 0.355960516324981, + "grad_norm": 1.1341020674749815, + "learning_rate": 8.145778174689897e-06, + "loss": 0.3388, + "step": 586 + }, + { + "epoch": 0.35656795747911924, + "grad_norm": 0.8971554688120575, + "learning_rate": 8.137532600320502e-06, + "loss": 0.2955, + "step": 587 + }, + { + "epoch": 0.3571753986332574, + "grad_norm": 1.0151980524067494, + "learning_rate": 8.129272926850079e-06, + "loss": 0.2949, + "step": 588 + }, + { + "epoch": 0.3577828397873956, + "grad_norm": 0.941672453155367, + "learning_rate": 8.120999191395048e-06, + "loss": 0.2819, + "step": 589 + }, + { + "epoch": 0.3583902809415338, + "grad_norm": 1.057217581283722, + "learning_rate": 8.112711431135014e-06, + "loss": 0.288, + "step": 590 + }, + { + "epoch": 0.358997722095672, + "grad_norm": 1.375861607230768, + "learning_rate": 8.10440968331261e-06, + "loss": 0.2897, + "step": 591 + }, + { + "epoch": 0.3596051632498102, + "grad_norm": 6.874880808118071, + "learning_rate": 8.096093985233323e-06, + "loss": 0.3182, + "step": 592 + }, + { + "epoch": 0.36021260440394837, + "grad_norm": 1.0459697501354848, + "learning_rate": 8.087764374265325e-06, + "loss": 0.3171, + "step": 593 + }, + { + "epoch": 0.36082004555808656, + "grad_norm": 1.0424410896636964, + "learning_rate": 8.079420887839316e-06, + "loss": 0.2841, + "step": 594 + }, + { + "epoch": 0.36142748671222474, + "grad_norm": 0.9398782517670305, + "learning_rate": 8.071063563448341e-06, + "loss": 0.2975, + "step": 595 + }, + { + "epoch": 0.36203492786636293, + "grad_norm": 0.9740196516759212, + "learning_rate": 8.062692438647628e-06, + "loss": 0.3001, + "step": 596 + }, + { + "epoch": 0.3626423690205011, + "grad_norm": 1.1401753407703934, + "learning_rate": 8.054307551054427e-06, + "loss": 0.3006, + "step": 597 + }, + { + "epoch": 0.3632498101746393, + "grad_norm": 1.5471172495911225, + "learning_rate": 8.045908938347828e-06, + "loss": 0.2829, + "step": 598 + }, + { + "epoch": 0.3638572513287775, + "grad_norm": 1.024433089085474, + "learning_rate": 8.037496638268599e-06, + "loss": 0.3338, + "step": 599 + }, + { + "epoch": 0.36446469248291574, + "grad_norm": 1.0602623846991535, + "learning_rate": 8.029070688619013e-06, + "loss": 0.2817, + "step": 600 + }, + { + "epoch": 0.36507213363705393, + "grad_norm": 0.9832965892055614, + "learning_rate": 8.020631127262681e-06, + "loss": 0.2928, + "step": 601 + }, + { + "epoch": 0.3656795747911921, + "grad_norm": 1.0377272606893748, + "learning_rate": 8.012177992124385e-06, + "loss": 0.3163, + "step": 602 + }, + { + "epoch": 0.3662870159453303, + "grad_norm": 1.162756936534878, + "learning_rate": 8.003711321189895e-06, + "loss": 0.3026, + "step": 603 + }, + { + "epoch": 0.3668944570994685, + "grad_norm": 0.9081590107485684, + "learning_rate": 7.995231152505815e-06, + "loss": 0.278, + "step": 604 + }, + { + "epoch": 0.3675018982536067, + "grad_norm": 1.0285104632076432, + "learning_rate": 7.986737524179398e-06, + "loss": 0.3198, + "step": 605 + }, + { + "epoch": 0.3681093394077449, + "grad_norm": 1.0089602950227836, + "learning_rate": 7.978230474378383e-06, + "loss": 0.2896, + "step": 606 + }, + { + "epoch": 0.36871678056188306, + "grad_norm": 0.9253892326788151, + "learning_rate": 7.96971004133082e-06, + "loss": 0.2973, + "step": 607 + }, + { + "epoch": 0.36932422171602125, + "grad_norm": 0.9373958997004501, + "learning_rate": 7.961176263324902e-06, + "loss": 0.2702, + "step": 608 + }, + { + "epoch": 0.36993166287015944, + "grad_norm": 1.0340689976239543, + "learning_rate": 7.952629178708783e-06, + "loss": 0.3086, + "step": 609 + }, + { + "epoch": 0.3705391040242976, + "grad_norm": 0.9434565199275107, + "learning_rate": 7.944068825890424e-06, + "loss": 0.2844, + "step": 610 + }, + { + "epoch": 0.3711465451784358, + "grad_norm": 2.0206497983922165, + "learning_rate": 7.935495243337397e-06, + "loss": 0.2996, + "step": 611 + }, + { + "epoch": 0.371753986332574, + "grad_norm": 1.220826721106363, + "learning_rate": 7.92690846957673e-06, + "loss": 0.2564, + "step": 612 + }, + { + "epoch": 0.37236142748671225, + "grad_norm": 0.9495640406098673, + "learning_rate": 7.918308543194735e-06, + "loss": 0.2789, + "step": 613 + }, + { + "epoch": 0.37296886864085044, + "grad_norm": 1.1839526557503743, + "learning_rate": 7.909695502836814e-06, + "loss": 0.3291, + "step": 614 + }, + { + "epoch": 0.3735763097949886, + "grad_norm": 0.9422030510378584, + "learning_rate": 7.90106938720731e-06, + "loss": 0.2658, + "step": 615 + }, + { + "epoch": 0.3741837509491268, + "grad_norm": 2.6158876200316943, + "learning_rate": 7.892430235069317e-06, + "loss": 0.3267, + "step": 616 + }, + { + "epoch": 0.374791192103265, + "grad_norm": 1.0569339252672818, + "learning_rate": 7.883778085244514e-06, + "loss": 0.3302, + "step": 617 + }, + { + "epoch": 0.3753986332574032, + "grad_norm": 0.9481970640626258, + "learning_rate": 7.875112976612984e-06, + "loss": 0.2825, + "step": 618 + }, + { + "epoch": 0.3760060744115414, + "grad_norm": 0.9971575781640729, + "learning_rate": 7.866434948113046e-06, + "loss": 0.2988, + "step": 619 + }, + { + "epoch": 0.37661351556567957, + "grad_norm": 0.9807473222056035, + "learning_rate": 7.857744038741076e-06, + "loss": 0.3192, + "step": 620 + }, + { + "epoch": 0.37722095671981776, + "grad_norm": 1.0243743769936524, + "learning_rate": 7.849040287551331e-06, + "loss": 0.3149, + "step": 621 + }, + { + "epoch": 0.37782839787395595, + "grad_norm": 0.9586280652231142, + "learning_rate": 7.84032373365578e-06, + "loss": 0.2815, + "step": 622 + }, + { + "epoch": 0.37843583902809413, + "grad_norm": 0.9369075735191363, + "learning_rate": 7.831594416223916e-06, + "loss": 0.3008, + "step": 623 + }, + { + "epoch": 0.3790432801822323, + "grad_norm": 3.308019410340958, + "learning_rate": 7.822852374482597e-06, + "loss": 0.3148, + "step": 624 + }, + { + "epoch": 0.37965072133637057, + "grad_norm": 1.164444173629437, + "learning_rate": 7.814097647715848e-06, + "loss": 0.3058, + "step": 625 + }, + { + "epoch": 0.38025816249050876, + "grad_norm": 1.4177016659749746, + "learning_rate": 7.805330275264707e-06, + "loss": 0.2889, + "step": 626 + }, + { + "epoch": 0.38086560364464694, + "grad_norm": 1.4357393277200918, + "learning_rate": 7.796550296527032e-06, + "loss": 0.2636, + "step": 627 + }, + { + "epoch": 0.38147304479878513, + "grad_norm": 1.0253772001951136, + "learning_rate": 7.787757750957335e-06, + "loss": 0.3026, + "step": 628 + }, + { + "epoch": 0.3820804859529233, + "grad_norm": 0.926193976032409, + "learning_rate": 7.778952678066591e-06, + "loss": 0.2613, + "step": 629 + }, + { + "epoch": 0.3826879271070615, + "grad_norm": 0.9522660951838047, + "learning_rate": 7.77013511742208e-06, + "loss": 0.3146, + "step": 630 + }, + { + "epoch": 0.3832953682611997, + "grad_norm": 1.0242396609306572, + "learning_rate": 7.761305108647188e-06, + "loss": 0.2957, + "step": 631 + }, + { + "epoch": 0.3839028094153379, + "grad_norm": 0.9802706086421458, + "learning_rate": 7.752462691421245e-06, + "loss": 0.2947, + "step": 632 + }, + { + "epoch": 0.3845102505694761, + "grad_norm": 0.9593635598943385, + "learning_rate": 7.743607905479338e-06, + "loss": 0.3063, + "step": 633 + }, + { + "epoch": 0.38511769172361426, + "grad_norm": 0.9149349282493551, + "learning_rate": 7.734740790612137e-06, + "loss": 0.2824, + "step": 634 + }, + { + "epoch": 0.38572513287775245, + "grad_norm": 0.8104078792665621, + "learning_rate": 7.72586138666571e-06, + "loss": 0.2546, + "step": 635 + }, + { + "epoch": 0.38633257403189064, + "grad_norm": 1.0286565476756544, + "learning_rate": 7.716969733541357e-06, + "loss": 0.2704, + "step": 636 + }, + { + "epoch": 0.38694001518602883, + "grad_norm": 0.8490863136577027, + "learning_rate": 7.708065871195413e-06, + "loss": 0.2606, + "step": 637 + }, + { + "epoch": 0.3875474563401671, + "grad_norm": 1.0903672375251494, + "learning_rate": 7.699149839639086e-06, + "loss": 0.3175, + "step": 638 + }, + { + "epoch": 0.38815489749430526, + "grad_norm": 1.4642223666112006, + "learning_rate": 7.690221678938258e-06, + "loss": 0.3641, + "step": 639 + }, + { + "epoch": 0.38876233864844345, + "grad_norm": 0.9534979437652094, + "learning_rate": 7.681281429213328e-06, + "loss": 0.2731, + "step": 640 + }, + { + "epoch": 0.38936977980258164, + "grad_norm": 1.1167627986990094, + "learning_rate": 7.672329130639007e-06, + "loss": 0.2791, + "step": 641 + }, + { + "epoch": 0.38997722095671983, + "grad_norm": 1.1634813414266154, + "learning_rate": 7.663364823444157e-06, + "loss": 0.3173, + "step": 642 + }, + { + "epoch": 0.390584662110858, + "grad_norm": 0.9674791602076483, + "learning_rate": 7.654388547911605e-06, + "loss": 0.3198, + "step": 643 + }, + { + "epoch": 0.3911921032649962, + "grad_norm": 0.9326003350894531, + "learning_rate": 7.645400344377953e-06, + "loss": 0.2446, + "step": 644 + }, + { + "epoch": 0.3917995444191344, + "grad_norm": 1.0664043566671932, + "learning_rate": 7.63640025323341e-06, + "loss": 0.2897, + "step": 645 + }, + { + "epoch": 0.3924069855732726, + "grad_norm": 1.1042940465708586, + "learning_rate": 7.627388314921602e-06, + "loss": 0.2964, + "step": 646 + }, + { + "epoch": 0.39301442672741077, + "grad_norm": 0.9931594882104945, + "learning_rate": 7.61836456993939e-06, + "loss": 0.28, + "step": 647 + }, + { + "epoch": 0.39362186788154896, + "grad_norm": 1.471343399649596, + "learning_rate": 7.609329058836694e-06, + "loss": 0.3354, + "step": 648 + }, + { + "epoch": 0.39422930903568715, + "grad_norm": 1.368059808767781, + "learning_rate": 7.600281822216307e-06, + "loss": 0.312, + "step": 649 + }, + { + "epoch": 0.39483675018982534, + "grad_norm": 0.9867176052292568, + "learning_rate": 7.59122290073371e-06, + "loss": 0.2691, + "step": 650 + }, + { + "epoch": 0.3954441913439636, + "grad_norm": 1.0007304468451435, + "learning_rate": 7.582152335096896e-06, + "loss": 0.2817, + "step": 651 + }, + { + "epoch": 0.39605163249810177, + "grad_norm": 1.0529002222158175, + "learning_rate": 7.5730701660661795e-06, + "loss": 0.2804, + "step": 652 + }, + { + "epoch": 0.39665907365223996, + "grad_norm": 0.9111676203769294, + "learning_rate": 7.563976434454021e-06, + "loss": 0.2674, + "step": 653 + }, + { + "epoch": 0.39726651480637815, + "grad_norm": 0.928422868586772, + "learning_rate": 7.554871181124836e-06, + "loss": 0.2842, + "step": 654 + }, + { + "epoch": 0.39787395596051633, + "grad_norm": 0.9776630589483184, + "learning_rate": 7.5457544469948164e-06, + "loss": 0.2891, + "step": 655 + }, + { + "epoch": 0.3984813971146545, + "grad_norm": 0.9696610718502006, + "learning_rate": 7.536626273031747e-06, + "loss": 0.2815, + "step": 656 + }, + { + "epoch": 0.3990888382687927, + "grad_norm": 0.9352484358853785, + "learning_rate": 7.5274867002548154e-06, + "loss": 0.2666, + "step": 657 + }, + { + "epoch": 0.3996962794229309, + "grad_norm": 0.9663340246117754, + "learning_rate": 7.5183357697344395e-06, + "loss": 0.2834, + "step": 658 + }, + { + "epoch": 0.4003037205770691, + "grad_norm": 1.0770927677521116, + "learning_rate": 7.509173522592066e-06, + "loss": 0.3175, + "step": 659 + }, + { + "epoch": 0.4009111617312073, + "grad_norm": 0.9005571117374582, + "learning_rate": 7.500000000000001e-06, + "loss": 0.2918, + "step": 660 + }, + { + "epoch": 0.40151860288534547, + "grad_norm": 1.053393785111227, + "learning_rate": 7.4908152431812175e-06, + "loss": 0.2636, + "step": 661 + }, + { + "epoch": 0.40212604403948365, + "grad_norm": 0.9355577937726377, + "learning_rate": 7.481619293409173e-06, + "loss": 0.2656, + "step": 662 + }, + { + "epoch": 0.40273348519362184, + "grad_norm": 1.067579553680483, + "learning_rate": 7.472412192007619e-06, + "loss": 0.2734, + "step": 663 + }, + { + "epoch": 0.4033409263477601, + "grad_norm": 0.8813079944231941, + "learning_rate": 7.4631939803504215e-06, + "loss": 0.2411, + "step": 664 + }, + { + "epoch": 0.4039483675018983, + "grad_norm": 1.008354055836425, + "learning_rate": 7.453964699861376e-06, + "loss": 0.2621, + "step": 665 + }, + { + "epoch": 0.40455580865603646, + "grad_norm": 1.3145794725051865, + "learning_rate": 7.44472439201401e-06, + "loss": 0.3353, + "step": 666 + }, + { + "epoch": 0.40516324981017465, + "grad_norm": 1.0499020867013482, + "learning_rate": 7.435473098331411e-06, + "loss": 0.309, + "step": 667 + }, + { + "epoch": 0.40577069096431284, + "grad_norm": 1.0498570111550733, + "learning_rate": 7.426210860386032e-06, + "loss": 0.2863, + "step": 668 + }, + { + "epoch": 0.40637813211845103, + "grad_norm": 0.9381854841212456, + "learning_rate": 7.416937719799502e-06, + "loss": 0.3162, + "step": 669 + }, + { + "epoch": 0.4069855732725892, + "grad_norm": 1.0356224890807904, + "learning_rate": 7.407653718242449e-06, + "loss": 0.2835, + "step": 670 + }, + { + "epoch": 0.4075930144267274, + "grad_norm": 1.1471319493901884, + "learning_rate": 7.398358897434303e-06, + "loss": 0.2995, + "step": 671 + }, + { + "epoch": 0.4082004555808656, + "grad_norm": 0.9661407586663348, + "learning_rate": 7.3890532991431174e-06, + "loss": 0.2815, + "step": 672 + }, + { + "epoch": 0.4088078967350038, + "grad_norm": 1.236474905505436, + "learning_rate": 7.379736965185369e-06, + "loss": 0.3244, + "step": 673 + }, + { + "epoch": 0.409415337889142, + "grad_norm": 1.0600796964601655, + "learning_rate": 7.370409937425781e-06, + "loss": 0.2994, + "step": 674 + }, + { + "epoch": 0.41002277904328016, + "grad_norm": 0.9816189966958595, + "learning_rate": 7.361072257777132e-06, + "loss": 0.3046, + "step": 675 + }, + { + "epoch": 0.41063022019741835, + "grad_norm": 1.1164647876294425, + "learning_rate": 7.3517239682000675e-06, + "loss": 0.2676, + "step": 676 + }, + { + "epoch": 0.4112376613515566, + "grad_norm": 0.836806710584883, + "learning_rate": 7.342365110702907e-06, + "loss": 0.2233, + "step": 677 + }, + { + "epoch": 0.4118451025056948, + "grad_norm": 1.0083900885239343, + "learning_rate": 7.332995727341462e-06, + "loss": 0.3282, + "step": 678 + }, + { + "epoch": 0.41245254365983297, + "grad_norm": 0.8982701658241349, + "learning_rate": 7.323615860218844e-06, + "loss": 0.2366, + "step": 679 + }, + { + "epoch": 0.41305998481397116, + "grad_norm": 0.8535635256156215, + "learning_rate": 7.314225551485273e-06, + "loss": 0.2424, + "step": 680 + }, + { + "epoch": 0.41366742596810935, + "grad_norm": 1.122301845672972, + "learning_rate": 7.304824843337893e-06, + "loss": 0.3075, + "step": 681 + }, + { + "epoch": 0.41427486712224754, + "grad_norm": 0.9413071187408356, + "learning_rate": 7.295413778020579e-06, + "loss": 0.2862, + "step": 682 + }, + { + "epoch": 0.4148823082763857, + "grad_norm": 1.0778162018478468, + "learning_rate": 7.285992397823747e-06, + "loss": 0.3039, + "step": 683 + }, + { + "epoch": 0.4154897494305239, + "grad_norm": 1.0058545387610198, + "learning_rate": 7.276560745084167e-06, + "loss": 0.2982, + "step": 684 + }, + { + "epoch": 0.4160971905846621, + "grad_norm": 1.247146796633824, + "learning_rate": 7.267118862184767e-06, + "loss": 0.2769, + "step": 685 + }, + { + "epoch": 0.4167046317388003, + "grad_norm": 0.9580540977436537, + "learning_rate": 7.257666791554448e-06, + "loss": 0.3057, + "step": 686 + }, + { + "epoch": 0.4173120728929385, + "grad_norm": 4.047797878525422, + "learning_rate": 7.248204575667893e-06, + "loss": 0.2686, + "step": 687 + }, + { + "epoch": 0.41791951404707667, + "grad_norm": 0.9842953093021652, + "learning_rate": 7.2387322570453724e-06, + "loss": 0.2819, + "step": 688 + }, + { + "epoch": 0.41852695520121486, + "grad_norm": 0.9798887505619451, + "learning_rate": 7.229249878252558e-06, + "loss": 0.2659, + "step": 689 + }, + { + "epoch": 0.4191343963553531, + "grad_norm": 1.0112521392212896, + "learning_rate": 7.219757481900325e-06, + "loss": 0.2878, + "step": 690 + }, + { + "epoch": 0.4197418375094913, + "grad_norm": 0.9383147067402705, + "learning_rate": 7.210255110644569e-06, + "loss": 0.3096, + "step": 691 + }, + { + "epoch": 0.4203492786636295, + "grad_norm": 0.9751290815844371, + "learning_rate": 7.2007428071860045e-06, + "loss": 0.3036, + "step": 692 + }, + { + "epoch": 0.42095671981776767, + "grad_norm": 0.954940864157478, + "learning_rate": 7.191220614269981e-06, + "loss": 0.2748, + "step": 693 + }, + { + "epoch": 0.42156416097190585, + "grad_norm": 0.955968898320391, + "learning_rate": 7.181688574686292e-06, + "loss": 0.294, + "step": 694 + }, + { + "epoch": 0.42217160212604404, + "grad_norm": 1.0024277031805326, + "learning_rate": 7.17214673126897e-06, + "loss": 0.2817, + "step": 695 + }, + { + "epoch": 0.42277904328018223, + "grad_norm": 1.1088991455813102, + "learning_rate": 7.162595126896111e-06, + "loss": 0.2699, + "step": 696 + }, + { + "epoch": 0.4233864844343204, + "grad_norm": 0.9068874217571249, + "learning_rate": 7.15303380448967e-06, + "loss": 0.2666, + "step": 697 + }, + { + "epoch": 0.4239939255884586, + "grad_norm": 0.9846540050512566, + "learning_rate": 7.143462807015271e-06, + "loss": 0.2996, + "step": 698 + }, + { + "epoch": 0.4246013667425968, + "grad_norm": 1.2920144608956823, + "learning_rate": 7.133882177482019e-06, + "loss": 0.283, + "step": 699 + }, + { + "epoch": 0.425208807896735, + "grad_norm": 0.9844981703237418, + "learning_rate": 7.1242919589422974e-06, + "loss": 0.283, + "step": 700 + }, + { + "epoch": 0.4258162490508732, + "grad_norm": 1.0792092917236789, + "learning_rate": 7.114692194491583e-06, + "loss": 0.2771, + "step": 701 + }, + { + "epoch": 0.42642369020501136, + "grad_norm": 1.1366050037820674, + "learning_rate": 7.105082927268247e-06, + "loss": 0.3462, + "step": 702 + }, + { + "epoch": 0.4270311313591496, + "grad_norm": 1.9892569077431685, + "learning_rate": 7.095464200453366e-06, + "loss": 0.2657, + "step": 703 + }, + { + "epoch": 0.4276385725132878, + "grad_norm": 1.030861638233516, + "learning_rate": 7.085836057270521e-06, + "loss": 0.3113, + "step": 704 + }, + { + "epoch": 0.428246013667426, + "grad_norm": 1.070680183715172, + "learning_rate": 7.07619854098561e-06, + "loss": 0.2622, + "step": 705 + }, + { + "epoch": 0.4288534548215642, + "grad_norm": 1.0086658824371333, + "learning_rate": 7.066551694906651e-06, + "loss": 0.254, + "step": 706 + }, + { + "epoch": 0.42946089597570236, + "grad_norm": 1.2178606474849707, + "learning_rate": 7.056895562383585e-06, + "loss": 0.283, + "step": 707 + }, + { + "epoch": 0.43006833712984055, + "grad_norm": 1.0905334429821039, + "learning_rate": 7.047230186808085e-06, + "loss": 0.2979, + "step": 708 + }, + { + "epoch": 0.43067577828397874, + "grad_norm": 1.0513698337032553, + "learning_rate": 7.0375556116133605e-06, + "loss": 0.3149, + "step": 709 + }, + { + "epoch": 0.4312832194381169, + "grad_norm": 0.905817609480293, + "learning_rate": 7.027871880273959e-06, + "loss": 0.267, + "step": 710 + }, + { + "epoch": 0.4318906605922551, + "grad_norm": 0.8779355405635809, + "learning_rate": 7.018179036305574e-06, + "loss": 0.2777, + "step": 711 + }, + { + "epoch": 0.4324981017463933, + "grad_norm": 0.9741991469020347, + "learning_rate": 7.008477123264849e-06, + "loss": 0.2881, + "step": 712 + }, + { + "epoch": 0.4331055429005315, + "grad_norm": 1.0958805822729734, + "learning_rate": 6.9987661847491786e-06, + "loss": 0.2688, + "step": 713 + }, + { + "epoch": 0.4337129840546697, + "grad_norm": 0.9812819976899622, + "learning_rate": 6.989046264396516e-06, + "loss": 0.3073, + "step": 714 + }, + { + "epoch": 0.43432042520880787, + "grad_norm": 0.9280917573243459, + "learning_rate": 6.9793174058851805e-06, + "loss": 0.2942, + "step": 715 + }, + { + "epoch": 0.4349278663629461, + "grad_norm": 0.9208122228856829, + "learning_rate": 6.96957965293365e-06, + "loss": 0.2603, + "step": 716 + }, + { + "epoch": 0.4355353075170843, + "grad_norm": 1.0481626035767557, + "learning_rate": 6.959833049300376e-06, + "loss": 0.2659, + "step": 717 + }, + { + "epoch": 0.4361427486712225, + "grad_norm": 0.8888068906740508, + "learning_rate": 6.9500776387835785e-06, + "loss": 0.2626, + "step": 718 + }, + { + "epoch": 0.4367501898253607, + "grad_norm": 0.9005255044425835, + "learning_rate": 6.940313465221057e-06, + "loss": 0.283, + "step": 719 + }, + { + "epoch": 0.43735763097949887, + "grad_norm": 0.9313053275341852, + "learning_rate": 6.9305405724899876e-06, + "loss": 0.2878, + "step": 720 + }, + { + "epoch": 0.43796507213363706, + "grad_norm": 1.0001487266577187, + "learning_rate": 6.920759004506723e-06, + "loss": 0.2381, + "step": 721 + }, + { + "epoch": 0.43857251328777525, + "grad_norm": 1.104902424962282, + "learning_rate": 6.91096880522661e-06, + "loss": 0.3125, + "step": 722 + }, + { + "epoch": 0.43917995444191343, + "grad_norm": 0.9710695360675384, + "learning_rate": 6.90117001864377e-06, + "loss": 0.2778, + "step": 723 + }, + { + "epoch": 0.4397873955960516, + "grad_norm": 0.8790773492913548, + "learning_rate": 6.891362688790925e-06, + "loss": 0.2741, + "step": 724 + }, + { + "epoch": 0.4403948367501898, + "grad_norm": 1.27448461099361, + "learning_rate": 6.8815468597391785e-06, + "loss": 0.2961, + "step": 725 + }, + { + "epoch": 0.441002277904328, + "grad_norm": 1.8340194618418497, + "learning_rate": 6.871722575597829e-06, + "loss": 0.2806, + "step": 726 + }, + { + "epoch": 0.4416097190584662, + "grad_norm": 0.9529710613769301, + "learning_rate": 6.8618898805141744e-06, + "loss": 0.2788, + "step": 727 + }, + { + "epoch": 0.4422171602126044, + "grad_norm": 1.1180921584509682, + "learning_rate": 6.8520488186733e-06, + "loss": 0.2916, + "step": 728 + }, + { + "epoch": 0.4428246013667426, + "grad_norm": 0.9569903924698052, + "learning_rate": 6.8421994342979e-06, + "loss": 0.297, + "step": 729 + }, + { + "epoch": 0.4434320425208808, + "grad_norm": 0.9663109439234678, + "learning_rate": 6.832341771648057e-06, + "loss": 0.2934, + "step": 730 + }, + { + "epoch": 0.444039483675019, + "grad_norm": 0.9525035732233259, + "learning_rate": 6.822475875021057e-06, + "loss": 0.2924, + "step": 731 + }, + { + "epoch": 0.4446469248291572, + "grad_norm": 1.6736139762109075, + "learning_rate": 6.812601788751192e-06, + "loss": 0.2875, + "step": 732 + }, + { + "epoch": 0.4452543659832954, + "grad_norm": 1.3961577247630492, + "learning_rate": 6.802719557209547e-06, + "loss": 0.2723, + "step": 733 + }, + { + "epoch": 0.44586180713743356, + "grad_norm": 1.0192583283909253, + "learning_rate": 6.792829224803816e-06, + "loss": 0.2902, + "step": 734 + }, + { + "epoch": 0.44646924829157175, + "grad_norm": 1.403077195941947, + "learning_rate": 6.782930835978094e-06, + "loss": 0.3298, + "step": 735 + }, + { + "epoch": 0.44707668944570994, + "grad_norm": 0.912658050114097, + "learning_rate": 6.773024435212678e-06, + "loss": 0.2654, + "step": 736 + }, + { + "epoch": 0.44768413059984813, + "grad_norm": 1.0482352218310602, + "learning_rate": 6.76311006702387e-06, + "loss": 0.27, + "step": 737 + }, + { + "epoch": 0.4482915717539863, + "grad_norm": 0.8572502039817809, + "learning_rate": 6.753187775963773e-06, + "loss": 0.245, + "step": 738 + }, + { + "epoch": 0.4488990129081245, + "grad_norm": 1.0913308805109663, + "learning_rate": 6.743257606620094e-06, + "loss": 0.2551, + "step": 739 + }, + { + "epoch": 0.4495064540622627, + "grad_norm": 0.9439876776084938, + "learning_rate": 6.733319603615941e-06, + "loss": 0.274, + "step": 740 + }, + { + "epoch": 0.45011389521640094, + "grad_norm": 0.9593594109115189, + "learning_rate": 6.723373811609628e-06, + "loss": 0.2698, + "step": 741 + }, + { + "epoch": 0.4507213363705391, + "grad_norm": 1.065483676595223, + "learning_rate": 6.713420275294467e-06, + "loss": 0.3096, + "step": 742 + }, + { + "epoch": 0.4513287775246773, + "grad_norm": 1.0245095563483146, + "learning_rate": 6.703459039398571e-06, + "loss": 0.3101, + "step": 743 + }, + { + "epoch": 0.4519362186788155, + "grad_norm": 0.8544225145366473, + "learning_rate": 6.693490148684654e-06, + "loss": 0.2478, + "step": 744 + }, + { + "epoch": 0.4525436598329537, + "grad_norm": 1.1603691969888617, + "learning_rate": 6.683513647949826e-06, + "loss": 0.3075, + "step": 745 + }, + { + "epoch": 0.4531511009870919, + "grad_norm": 1.052592239520445, + "learning_rate": 6.673529582025398e-06, + "loss": 0.2737, + "step": 746 + }, + { + "epoch": 0.45375854214123007, + "grad_norm": 3.117949544460077, + "learning_rate": 6.66353799577667e-06, + "loss": 0.2791, + "step": 747 + }, + { + "epoch": 0.45436598329536826, + "grad_norm": 0.9613711534282726, + "learning_rate": 6.653538934102743e-06, + "loss": 0.3014, + "step": 748 + }, + { + "epoch": 0.45497342444950645, + "grad_norm": 0.9748105893226726, + "learning_rate": 6.643532441936307e-06, + "loss": 0.2749, + "step": 749 + }, + { + "epoch": 0.45558086560364464, + "grad_norm": 0.981233876336576, + "learning_rate": 6.633518564243442e-06, + "loss": 0.2971, + "step": 750 + }, + { + "epoch": 0.4561883067577828, + "grad_norm": 0.9457213271619488, + "learning_rate": 6.6234973460234184e-06, + "loss": 0.2868, + "step": 751 + }, + { + "epoch": 0.456795747911921, + "grad_norm": 2.686802516445107, + "learning_rate": 6.6134688323084884e-06, + "loss": 0.2731, + "step": 752 + }, + { + "epoch": 0.4574031890660592, + "grad_norm": 0.9223972209238886, + "learning_rate": 6.603433068163694e-06, + "loss": 0.2616, + "step": 753 + }, + { + "epoch": 0.45801063022019745, + "grad_norm": 1.025121154265423, + "learning_rate": 6.593390098686653e-06, + "loss": 0.2907, + "step": 754 + }, + { + "epoch": 0.45861807137433563, + "grad_norm": 0.9622665588756997, + "learning_rate": 6.583339969007364e-06, + "loss": 0.3044, + "step": 755 + }, + { + "epoch": 0.4592255125284738, + "grad_norm": 0.9057032918476721, + "learning_rate": 6.573282724288001e-06, + "loss": 0.2728, + "step": 756 + }, + { + "epoch": 0.459832953682612, + "grad_norm": 0.9234610842082313, + "learning_rate": 6.563218409722712e-06, + "loss": 0.276, + "step": 757 + }, + { + "epoch": 0.4604403948367502, + "grad_norm": 0.958933012871059, + "learning_rate": 6.553147070537413e-06, + "loss": 0.2777, + "step": 758 + }, + { + "epoch": 0.4610478359908884, + "grad_norm": 1.0680318113393608, + "learning_rate": 6.543068751989585e-06, + "loss": 0.2765, + "step": 759 + }, + { + "epoch": 0.4616552771450266, + "grad_norm": 0.9152338969224387, + "learning_rate": 6.532983499368078e-06, + "loss": 0.2931, + "step": 760 + }, + { + "epoch": 0.46226271829916477, + "grad_norm": 0.9026579775576341, + "learning_rate": 6.522891357992895e-06, + "loss": 0.2519, + "step": 761 + }, + { + "epoch": 0.46287015945330295, + "grad_norm": 0.9283253808655062, + "learning_rate": 6.512792373215e-06, + "loss": 0.2804, + "step": 762 + }, + { + "epoch": 0.46347760060744114, + "grad_norm": 0.9173677882111563, + "learning_rate": 6.502686590416105e-06, + "loss": 0.2734, + "step": 763 + }, + { + "epoch": 0.46408504176157933, + "grad_norm": 0.9717106384462386, + "learning_rate": 6.492574055008474e-06, + "loss": 0.2671, + "step": 764 + }, + { + "epoch": 0.4646924829157175, + "grad_norm": 1.0114399277783983, + "learning_rate": 6.482454812434711e-06, + "loss": 0.2843, + "step": 765 + }, + { + "epoch": 0.4652999240698557, + "grad_norm": 0.9171006340811538, + "learning_rate": 6.472328908167562e-06, + "loss": 0.2744, + "step": 766 + }, + { + "epoch": 0.46590736522399395, + "grad_norm": 1.0070946523066167, + "learning_rate": 6.4621963877097105e-06, + "loss": 0.2838, + "step": 767 + }, + { + "epoch": 0.46651480637813214, + "grad_norm": 0.9699071928177333, + "learning_rate": 6.452057296593568e-06, + "loss": 0.3075, + "step": 768 + }, + { + "epoch": 0.46712224753227033, + "grad_norm": 1.2714437302384922, + "learning_rate": 6.441911680381074e-06, + "loss": 0.2803, + "step": 769 + }, + { + "epoch": 0.4677296886864085, + "grad_norm": 0.8642721638499515, + "learning_rate": 6.431759584663492e-06, + "loss": 0.2594, + "step": 770 + }, + { + "epoch": 0.4683371298405467, + "grad_norm": 0.9642319769554307, + "learning_rate": 6.421601055061195e-06, + "loss": 0.294, + "step": 771 + }, + { + "epoch": 0.4689445709946849, + "grad_norm": 0.9718176611982676, + "learning_rate": 6.411436137223479e-06, + "loss": 0.276, + "step": 772 + }, + { + "epoch": 0.4695520121488231, + "grad_norm": 1.044555593128521, + "learning_rate": 6.401264876828335e-06, + "loss": 0.2739, + "step": 773 + }, + { + "epoch": 0.47015945330296127, + "grad_norm": 1.2599360050994348, + "learning_rate": 6.391087319582264e-06, + "loss": 0.2689, + "step": 774 + }, + { + "epoch": 0.47076689445709946, + "grad_norm": 1.0049522316698594, + "learning_rate": 6.38090351122006e-06, + "loss": 0.2523, + "step": 775 + }, + { + "epoch": 0.47137433561123765, + "grad_norm": 0.966627950881477, + "learning_rate": 6.370713497504607e-06, + "loss": 0.2443, + "step": 776 + }, + { + "epoch": 0.47198177676537584, + "grad_norm": 0.9792802121829564, + "learning_rate": 6.360517324226676e-06, + "loss": 0.2783, + "step": 777 + }, + { + "epoch": 0.472589217919514, + "grad_norm": 0.8715410479118867, + "learning_rate": 6.350315037204714e-06, + "loss": 0.272, + "step": 778 + }, + { + "epoch": 0.4731966590736522, + "grad_norm": 1.0001665140104699, + "learning_rate": 6.340106682284645e-06, + "loss": 0.2838, + "step": 779 + }, + { + "epoch": 0.47380410022779046, + "grad_norm": 0.934121158767379, + "learning_rate": 6.329892305339659e-06, + "loss": 0.2557, + "step": 780 + }, + { + "epoch": 0.47441154138192865, + "grad_norm": 1.00237931674153, + "learning_rate": 6.319671952270004e-06, + "loss": 0.2729, + "step": 781 + }, + { + "epoch": 0.47501898253606684, + "grad_norm": 1.222988126809238, + "learning_rate": 6.309445669002787e-06, + "loss": 0.2493, + "step": 782 + }, + { + "epoch": 0.475626423690205, + "grad_norm": 1.107165014150917, + "learning_rate": 6.299213501491761e-06, + "loss": 0.3008, + "step": 783 + }, + { + "epoch": 0.4762338648443432, + "grad_norm": 1.2066643126228012, + "learning_rate": 6.288975495717124e-06, + "loss": 0.2867, + "step": 784 + }, + { + "epoch": 0.4768413059984814, + "grad_norm": 0.9028058894006271, + "learning_rate": 6.2787316976853045e-06, + "loss": 0.2495, + "step": 785 + }, + { + "epoch": 0.4774487471526196, + "grad_norm": 0.8790390490726189, + "learning_rate": 6.268482153428763e-06, + "loss": 0.2348, + "step": 786 + }, + { + "epoch": 0.4780561883067578, + "grad_norm": 0.9822253223369787, + "learning_rate": 6.258226909005783e-06, + "loss": 0.2809, + "step": 787 + }, + { + "epoch": 0.47866362946089597, + "grad_norm": 0.9996010163514261, + "learning_rate": 6.247966010500258e-06, + "loss": 0.2794, + "step": 788 + }, + { + "epoch": 0.47927107061503416, + "grad_norm": 0.95106120425031, + "learning_rate": 6.237699504021495e-06, + "loss": 0.2892, + "step": 789 + }, + { + "epoch": 0.47987851176917234, + "grad_norm": 0.9498966653557624, + "learning_rate": 6.227427435703997e-06, + "loss": 0.277, + "step": 790 + }, + { + "epoch": 0.48048595292331053, + "grad_norm": 0.9516961785802474, + "learning_rate": 6.217149851707261e-06, + "loss": 0.253, + "step": 791 + }, + { + "epoch": 0.4810933940774487, + "grad_norm": 0.948579051817591, + "learning_rate": 6.206866798215571e-06, + "loss": 0.2675, + "step": 792 + }, + { + "epoch": 0.48170083523158697, + "grad_norm": 0.8987027506170783, + "learning_rate": 6.1965783214377895e-06, + "loss": 0.2498, + "step": 793 + }, + { + "epoch": 0.48230827638572515, + "grad_norm": 0.8985403214008115, + "learning_rate": 6.186284467607149e-06, + "loss": 0.2594, + "step": 794 + }, + { + "epoch": 0.48291571753986334, + "grad_norm": 0.9277532386845908, + "learning_rate": 6.175985282981042e-06, + "loss": 0.2644, + "step": 795 + }, + { + "epoch": 0.48352315869400153, + "grad_norm": 1.1995838839316881, + "learning_rate": 6.165680813840822e-06, + "loss": 0.2546, + "step": 796 + }, + { + "epoch": 0.4841305998481397, + "grad_norm": 1.801894016231239, + "learning_rate": 6.155371106491584e-06, + "loss": 0.3234, + "step": 797 + }, + { + "epoch": 0.4847380410022779, + "grad_norm": 0.906615804627482, + "learning_rate": 6.1450562072619635e-06, + "loss": 0.245, + "step": 798 + }, + { + "epoch": 0.4853454821564161, + "grad_norm": 1.0302579157248837, + "learning_rate": 6.134736162503929e-06, + "loss": 0.2631, + "step": 799 + }, + { + "epoch": 0.4859529233105543, + "grad_norm": 0.9384658833738736, + "learning_rate": 6.124411018592568e-06, + "loss": 0.2632, + "step": 800 + }, + { + "epoch": 0.4865603644646925, + "grad_norm": 0.8947013802717417, + "learning_rate": 6.114080821925885e-06, + "loss": 0.272, + "step": 801 + }, + { + "epoch": 0.48716780561883066, + "grad_norm": 1.3316960693705406, + "learning_rate": 6.103745618924587e-06, + "loss": 0.2577, + "step": 802 + }, + { + "epoch": 0.48777524677296885, + "grad_norm": 1.0028338947085624, + "learning_rate": 6.09340545603188e-06, + "loss": 0.2925, + "step": 803 + }, + { + "epoch": 0.48838268792710704, + "grad_norm": 1.0004427933236308, + "learning_rate": 6.0830603797132574e-06, + "loss": 0.2582, + "step": 804 + }, + { + "epoch": 0.48899012908124523, + "grad_norm": 0.9812737917561756, + "learning_rate": 6.072710436456293e-06, + "loss": 0.2832, + "step": 805 + }, + { + "epoch": 0.4895975702353835, + "grad_norm": 0.9548200362731749, + "learning_rate": 6.0623556727704306e-06, + "loss": 0.2676, + "step": 806 + }, + { + "epoch": 0.49020501138952166, + "grad_norm": 0.9929722264569331, + "learning_rate": 6.051996135186774e-06, + "loss": 0.289, + "step": 807 + }, + { + "epoch": 0.49081245254365985, + "grad_norm": 1.2854841849196001, + "learning_rate": 6.041631870257882e-06, + "loss": 0.2847, + "step": 808 + }, + { + "epoch": 0.49141989369779804, + "grad_norm": 1.0000863726047369, + "learning_rate": 6.0312629245575534e-06, + "loss": 0.3195, + "step": 809 + }, + { + "epoch": 0.4920273348519362, + "grad_norm": 0.9562458448178358, + "learning_rate": 6.020889344680627e-06, + "loss": 0.3136, + "step": 810 + }, + { + "epoch": 0.4926347760060744, + "grad_norm": 3.4103757955936653, + "learning_rate": 6.010511177242757e-06, + "loss": 0.285, + "step": 811 + }, + { + "epoch": 0.4932422171602126, + "grad_norm": 0.9236674911746163, + "learning_rate": 6.000128468880223e-06, + "loss": 0.2651, + "step": 812 + }, + { + "epoch": 0.4938496583143508, + "grad_norm": 1.50715645288713, + "learning_rate": 5.989741266249701e-06, + "loss": 0.2961, + "step": 813 + }, + { + "epoch": 0.494457099468489, + "grad_norm": 1.2283119233067459, + "learning_rate": 5.979349616028067e-06, + "loss": 0.3049, + "step": 814 + }, + { + "epoch": 0.49506454062262717, + "grad_norm": 1.0356567298104182, + "learning_rate": 5.9689535649121855e-06, + "loss": 0.2891, + "step": 815 + }, + { + "epoch": 0.49567198177676536, + "grad_norm": 0.9560994912289477, + "learning_rate": 5.958553159618693e-06, + "loss": 0.2619, + "step": 816 + }, + { + "epoch": 0.49627942293090355, + "grad_norm": 0.8976018878445121, + "learning_rate": 5.948148446883794e-06, + "loss": 0.2753, + "step": 817 + }, + { + "epoch": 0.49688686408504174, + "grad_norm": 1.057276020274219, + "learning_rate": 5.937739473463047e-06, + "loss": 0.3255, + "step": 818 + }, + { + "epoch": 0.49749430523918, + "grad_norm": 0.9249628177128568, + "learning_rate": 5.927326286131162e-06, + "loss": 0.2774, + "step": 819 + }, + { + "epoch": 0.49810174639331817, + "grad_norm": 0.9503742934078119, + "learning_rate": 5.916908931681781e-06, + "loss": 0.2771, + "step": 820 + }, + { + "epoch": 0.49870918754745636, + "grad_norm": 1.2103474150033977, + "learning_rate": 5.906487456927273e-06, + "loss": 0.2949, + "step": 821 + }, + { + "epoch": 0.49931662870159454, + "grad_norm": 0.9494133739592079, + "learning_rate": 5.896061908698521e-06, + "loss": 0.2771, + "step": 822 + }, + { + "epoch": 0.49992406985573273, + "grad_norm": 0.9595078564844419, + "learning_rate": 5.885632333844714e-06, + "loss": 0.2746, + "step": 823 + }, + { + "epoch": 0.5005315110098709, + "grad_norm": 0.8889046754711648, + "learning_rate": 5.8751987792331365e-06, + "loss": 0.2728, + "step": 824 + }, + { + "epoch": 0.5011389521640092, + "grad_norm": 0.9984221504514925, + "learning_rate": 5.864761291748956e-06, + "loss": 0.2669, + "step": 825 + }, + { + "epoch": 0.5017463933181473, + "grad_norm": 1.0642385661388791, + "learning_rate": 5.854319918295012e-06, + "loss": 0.2711, + "step": 826 + }, + { + "epoch": 0.5023538344722855, + "grad_norm": 1.1473823060409172, + "learning_rate": 5.843874705791607e-06, + "loss": 0.2463, + "step": 827 + }, + { + "epoch": 0.5029612756264237, + "grad_norm": 1.0570678499864414, + "learning_rate": 5.833425701176294e-06, + "loss": 0.3234, + "step": 828 + }, + { + "epoch": 0.5035687167805619, + "grad_norm": 1.1140589963258467, + "learning_rate": 5.82297295140367e-06, + "loss": 0.2757, + "step": 829 + }, + { + "epoch": 0.5041761579347, + "grad_norm": 0.8357461454559573, + "learning_rate": 5.812516503445158e-06, + "loss": 0.2555, + "step": 830 + }, + { + "epoch": 0.5047835990888383, + "grad_norm": 0.9851929754213505, + "learning_rate": 5.8020564042888015e-06, + "loss": 0.2864, + "step": 831 + }, + { + "epoch": 0.5053910402429764, + "grad_norm": 0.9649129922182244, + "learning_rate": 5.79159270093905e-06, + "loss": 0.2871, + "step": 832 + }, + { + "epoch": 0.5059984813971147, + "grad_norm": 0.9056292022966355, + "learning_rate": 5.781125440416552e-06, + "loss": 0.2611, + "step": 833 + }, + { + "epoch": 0.5066059225512528, + "grad_norm": 1.2561528927314698, + "learning_rate": 5.770654669757935e-06, + "loss": 0.2938, + "step": 834 + }, + { + "epoch": 0.507213363705391, + "grad_norm": 1.429458180902475, + "learning_rate": 5.760180436015604e-06, + "loss": 0.2726, + "step": 835 + }, + { + "epoch": 0.5078208048595292, + "grad_norm": 0.9977501941214085, + "learning_rate": 5.749702786257529e-06, + "loss": 0.2808, + "step": 836 + }, + { + "epoch": 0.5084282460136674, + "grad_norm": 0.8542206729805362, + "learning_rate": 5.739221767567025e-06, + "loss": 0.2515, + "step": 837 + }, + { + "epoch": 0.5090356871678057, + "grad_norm": 1.1569449493080222, + "learning_rate": 5.7287374270425475e-06, + "loss": 0.2764, + "step": 838 + }, + { + "epoch": 0.5096431283219438, + "grad_norm": 0.9444050762828973, + "learning_rate": 5.718249811797482e-06, + "loss": 0.2895, + "step": 839 + }, + { + "epoch": 0.510250569476082, + "grad_norm": 0.9584586543782557, + "learning_rate": 5.707758968959923e-06, + "loss": 0.2746, + "step": 840 + }, + { + "epoch": 0.5108580106302202, + "grad_norm": 0.8938350182024917, + "learning_rate": 5.69726494567248e-06, + "loss": 0.2646, + "step": 841 + }, + { + "epoch": 0.5114654517843584, + "grad_norm": 0.9952500187835128, + "learning_rate": 5.686767789092041e-06, + "loss": 0.2927, + "step": 842 + }, + { + "epoch": 0.5120728929384966, + "grad_norm": 0.8980910835024705, + "learning_rate": 5.676267546389587e-06, + "loss": 0.255, + "step": 843 + }, + { + "epoch": 0.5126803340926348, + "grad_norm": 0.9195976062449588, + "learning_rate": 5.6657642647499545e-06, + "loss": 0.2825, + "step": 844 + }, + { + "epoch": 0.5132877752467729, + "grad_norm": 1.0411762343769737, + "learning_rate": 5.655257991371646e-06, + "loss": 0.2614, + "step": 845 + }, + { + "epoch": 0.5138952164009112, + "grad_norm": 0.8901603514925267, + "learning_rate": 5.644748773466606e-06, + "loss": 0.2739, + "step": 846 + }, + { + "epoch": 0.5145026575550493, + "grad_norm": 0.995174149677262, + "learning_rate": 5.6342366582600035e-06, + "loss": 0.3136, + "step": 847 + }, + { + "epoch": 0.5151100987091876, + "grad_norm": 1.7141964167900545, + "learning_rate": 5.62372169299004e-06, + "loss": 0.2931, + "step": 848 + }, + { + "epoch": 0.5157175398633257, + "grad_norm": 0.9986737322206383, + "learning_rate": 5.613203924907711e-06, + "loss": 0.2635, + "step": 849 + }, + { + "epoch": 0.5163249810174639, + "grad_norm": 0.9184311409988306, + "learning_rate": 5.6026834012766155e-06, + "loss": 0.2523, + "step": 850 + }, + { + "epoch": 0.5169324221716022, + "grad_norm": 1.0089379913752443, + "learning_rate": 5.592160169372734e-06, + "loss": 0.2884, + "step": 851 + }, + { + "epoch": 0.5175398633257403, + "grad_norm": 0.9856271821475499, + "learning_rate": 5.581634276484211e-06, + "loss": 0.2701, + "step": 852 + }, + { + "epoch": 0.5181473044798786, + "grad_norm": 1.4723310483980634, + "learning_rate": 5.571105769911159e-06, + "loss": 0.3305, + "step": 853 + }, + { + "epoch": 0.5187547456340167, + "grad_norm": 0.972595176753459, + "learning_rate": 5.560574696965425e-06, + "loss": 0.2401, + "step": 854 + }, + { + "epoch": 0.5193621867881549, + "grad_norm": 0.9359549675099497, + "learning_rate": 5.550041104970398e-06, + "loss": 0.2647, + "step": 855 + }, + { + "epoch": 0.5199696279422931, + "grad_norm": 1.0700686033849647, + "learning_rate": 5.539505041260779e-06, + "loss": 0.2867, + "step": 856 + }, + { + "epoch": 0.5205770690964313, + "grad_norm": 1.8194028377094806, + "learning_rate": 5.528966553182379e-06, + "loss": 0.2492, + "step": 857 + }, + { + "epoch": 0.5211845102505694, + "grad_norm": 0.9354512112441699, + "learning_rate": 5.518425688091906e-06, + "loss": 0.2945, + "step": 858 + }, + { + "epoch": 0.5217919514047077, + "grad_norm": 0.8575059431791342, + "learning_rate": 5.507882493356745e-06, + "loss": 0.2579, + "step": 859 + }, + { + "epoch": 0.5223993925588458, + "grad_norm": 0.8801723522103503, + "learning_rate": 5.497337016354757e-06, + "loss": 0.2843, + "step": 860 + }, + { + "epoch": 0.5230068337129841, + "grad_norm": 1.055054705643407, + "learning_rate": 5.486789304474047e-06, + "loss": 0.2463, + "step": 861 + }, + { + "epoch": 0.5236142748671222, + "grad_norm": 0.9847077421418023, + "learning_rate": 5.476239405112775e-06, + "loss": 0.2961, + "step": 862 + }, + { + "epoch": 0.5242217160212604, + "grad_norm": 0.9528909479486961, + "learning_rate": 5.465687365678921e-06, + "loss": 0.2883, + "step": 863 + }, + { + "epoch": 0.5248291571753987, + "grad_norm": 0.9331008172836965, + "learning_rate": 5.45513323359009e-06, + "loss": 0.3138, + "step": 864 + }, + { + "epoch": 0.5254365983295368, + "grad_norm": 0.9666089596639151, + "learning_rate": 5.444577056273284e-06, + "loss": 0.2755, + "step": 865 + }, + { + "epoch": 0.5260440394836751, + "grad_norm": 0.9347956099649323, + "learning_rate": 5.434018881164702e-06, + "loss": 0.2701, + "step": 866 + }, + { + "epoch": 0.5266514806378132, + "grad_norm": 0.8850204653085014, + "learning_rate": 5.423458755709516e-06, + "loss": 0.284, + "step": 867 + }, + { + "epoch": 0.5272589217919514, + "grad_norm": 0.846363898916957, + "learning_rate": 5.412896727361663e-06, + "loss": 0.2381, + "step": 868 + }, + { + "epoch": 0.5278663629460896, + "grad_norm": 1.0043627806351294, + "learning_rate": 5.402332843583631e-06, + "loss": 0.2748, + "step": 869 + }, + { + "epoch": 0.5284738041002278, + "grad_norm": 1.0846231794328964, + "learning_rate": 5.391767151846247e-06, + "loss": 0.2717, + "step": 870 + }, + { + "epoch": 0.529081245254366, + "grad_norm": 1.046609605860445, + "learning_rate": 5.381199699628459e-06, + "loss": 0.2982, + "step": 871 + }, + { + "epoch": 0.5296886864085042, + "grad_norm": 0.912932730088245, + "learning_rate": 5.370630534417133e-06, + "loss": 0.2531, + "step": 872 + }, + { + "epoch": 0.5302961275626423, + "grad_norm": 1.1500626604620934, + "learning_rate": 5.360059703706823e-06, + "loss": 0.2995, + "step": 873 + }, + { + "epoch": 0.5309035687167806, + "grad_norm": 1.0117429899245785, + "learning_rate": 5.349487254999579e-06, + "loss": 0.2959, + "step": 874 + }, + { + "epoch": 0.5315110098709187, + "grad_norm": 1.2473392199840632, + "learning_rate": 5.3389132358047115e-06, + "loss": 0.28, + "step": 875 + }, + { + "epoch": 0.532118451025057, + "grad_norm": 0.9342083750338547, + "learning_rate": 5.328337693638591e-06, + "loss": 0.2856, + "step": 876 + }, + { + "epoch": 0.5327258921791952, + "grad_norm": 0.9455670305571291, + "learning_rate": 5.317760676024436e-06, + "loss": 0.2757, + "step": 877 + }, + { + "epoch": 0.5333333333333333, + "grad_norm": 1.7595753273077446, + "learning_rate": 5.307182230492089e-06, + "loss": 0.2757, + "step": 878 + }, + { + "epoch": 0.5339407744874716, + "grad_norm": 0.8967029314384648, + "learning_rate": 5.296602404577814e-06, + "loss": 0.2455, + "step": 879 + }, + { + "epoch": 0.5345482156416097, + "grad_norm": 1.3247032268401917, + "learning_rate": 5.286021245824075e-06, + "loss": 0.2947, + "step": 880 + }, + { + "epoch": 0.535155656795748, + "grad_norm": 0.9423673536300602, + "learning_rate": 5.275438801779328e-06, + "loss": 0.2687, + "step": 881 + }, + { + "epoch": 0.5357630979498861, + "grad_norm": 0.9635039934677604, + "learning_rate": 5.264855119997803e-06, + "loss": 0.283, + "step": 882 + }, + { + "epoch": 0.5363705391040243, + "grad_norm": 0.8537857300482113, + "learning_rate": 5.254270248039291e-06, + "loss": 0.2448, + "step": 883 + }, + { + "epoch": 0.5369779802581625, + "grad_norm": 0.983767837657037, + "learning_rate": 5.243684233468933e-06, + "loss": 0.285, + "step": 884 + }, + { + "epoch": 0.5375854214123007, + "grad_norm": 0.8691004474077461, + "learning_rate": 5.233097123857004e-06, + "loss": 0.2556, + "step": 885 + }, + { + "epoch": 0.5381928625664388, + "grad_norm": 0.9137454862081088, + "learning_rate": 5.222508966778702e-06, + "loss": 0.2484, + "step": 886 + }, + { + "epoch": 0.5388003037205771, + "grad_norm": 0.9557993995814925, + "learning_rate": 5.211919809813927e-06, + "loss": 0.2568, + "step": 887 + }, + { + "epoch": 0.5394077448747152, + "grad_norm": 0.9810092195544174, + "learning_rate": 5.201329700547077e-06, + "loss": 0.296, + "step": 888 + }, + { + "epoch": 0.5400151860288535, + "grad_norm": 0.9230699355344197, + "learning_rate": 5.190738686566826e-06, + "loss": 0.2641, + "step": 889 + }, + { + "epoch": 0.5406226271829917, + "grad_norm": 1.6906836035620436, + "learning_rate": 5.180146815465915e-06, + "loss": 0.28, + "step": 890 + }, + { + "epoch": 0.5412300683371298, + "grad_norm": 0.9241916165908446, + "learning_rate": 5.169554134840937e-06, + "loss": 0.2646, + "step": 891 + }, + { + "epoch": 0.5418375094912681, + "grad_norm": 0.9358728519319159, + "learning_rate": 5.158960692292122e-06, + "loss": 0.267, + "step": 892 + }, + { + "epoch": 0.5424449506454062, + "grad_norm": 1.1048133481375857, + "learning_rate": 5.148366535423126e-06, + "loss": 0.2777, + "step": 893 + }, + { + "epoch": 0.5430523917995445, + "grad_norm": 0.9378744728399504, + "learning_rate": 5.137771711840811e-06, + "loss": 0.2678, + "step": 894 + }, + { + "epoch": 0.5436598329536826, + "grad_norm": 12.537370676924667, + "learning_rate": 5.1271762691550375e-06, + "loss": 0.2639, + "step": 895 + }, + { + "epoch": 0.5442672741078208, + "grad_norm": 0.9266142790273743, + "learning_rate": 5.116580254978447e-06, + "loss": 0.2659, + "step": 896 + }, + { + "epoch": 0.544874715261959, + "grad_norm": 0.8872145017815025, + "learning_rate": 5.1059837169262506e-06, + "loss": 0.2657, + "step": 897 + }, + { + "epoch": 0.5454821564160972, + "grad_norm": 0.9596765024185803, + "learning_rate": 5.095386702616012e-06, + "loss": 0.2737, + "step": 898 + }, + { + "epoch": 0.5460895975702353, + "grad_norm": 0.8505452588238741, + "learning_rate": 5.084789259667437e-06, + "loss": 0.2229, + "step": 899 + }, + { + "epoch": 0.5466970387243736, + "grad_norm": 0.9742318710472475, + "learning_rate": 5.074191435702155e-06, + "loss": 0.2621, + "step": 900 + }, + { + "epoch": 0.5473044798785117, + "grad_norm": 0.9174362261127138, + "learning_rate": 5.06359327834351e-06, + "loss": 0.2735, + "step": 901 + }, + { + "epoch": 0.54791192103265, + "grad_norm": 1.0480082974980471, + "learning_rate": 5.05299483521634e-06, + "loss": 0.2804, + "step": 902 + }, + { + "epoch": 0.5485193621867882, + "grad_norm": 0.9044085564794266, + "learning_rate": 5.0423961539467754e-06, + "loss": 0.251, + "step": 903 + }, + { + "epoch": 0.5491268033409263, + "grad_norm": 1.0846067776083739, + "learning_rate": 5.031797282162007e-06, + "loss": 0.275, + "step": 904 + }, + { + "epoch": 0.5497342444950646, + "grad_norm": 1.054126628964702, + "learning_rate": 5.021198267490088e-06, + "loss": 0.3109, + "step": 905 + }, + { + "epoch": 0.5503416856492027, + "grad_norm": 0.9731663440580473, + "learning_rate": 5.010599157559713e-06, + "loss": 0.2744, + "step": 906 + }, + { + "epoch": 0.550949126803341, + "grad_norm": 0.935385971022661, + "learning_rate": 5e-06, + "loss": 0.2833, + "step": 907 + }, + { + "epoch": 0.5515565679574791, + "grad_norm": 0.95732647622787, + "learning_rate": 4.98940084244029e-06, + "loss": 0.2738, + "step": 908 + }, + { + "epoch": 0.5521640091116173, + "grad_norm": 0.9778546392598653, + "learning_rate": 4.9788017325099134e-06, + "loss": 0.2902, + "step": 909 + }, + { + "epoch": 0.5527714502657555, + "grad_norm": 1.161637726781862, + "learning_rate": 4.968202717837996e-06, + "loss": 0.2448, + "step": 910 + }, + { + "epoch": 0.5533788914198937, + "grad_norm": 1.0325179748583595, + "learning_rate": 4.957603846053225e-06, + "loss": 0.2777, + "step": 911 + }, + { + "epoch": 0.5539863325740318, + "grad_norm": 0.9385803768928881, + "learning_rate": 4.947005164783661e-06, + "loss": 0.252, + "step": 912 + }, + { + "epoch": 0.5545937737281701, + "grad_norm": 0.8813294108193872, + "learning_rate": 4.936406721656492e-06, + "loss": 0.262, + "step": 913 + }, + { + "epoch": 0.5552012148823082, + "grad_norm": 1.0450236387704728, + "learning_rate": 4.925808564297847e-06, + "loss": 0.2809, + "step": 914 + }, + { + "epoch": 0.5558086560364465, + "grad_norm": 0.9743579657662962, + "learning_rate": 4.915210740332564e-06, + "loss": 0.2727, + "step": 915 + }, + { + "epoch": 0.5564160971905847, + "grad_norm": 1.1211404996380052, + "learning_rate": 4.9046132973839895e-06, + "loss": 0.3337, + "step": 916 + }, + { + "epoch": 0.5570235383447228, + "grad_norm": 0.9501582701592088, + "learning_rate": 4.894016283073753e-06, + "loss": 0.2598, + "step": 917 + }, + { + "epoch": 0.5576309794988611, + "grad_norm": 1.023090140186835, + "learning_rate": 4.883419745021554e-06, + "loss": 0.2483, + "step": 918 + }, + { + "epoch": 0.5582384206529992, + "grad_norm": 0.9242815154848782, + "learning_rate": 4.872823730844966e-06, + "loss": 0.255, + "step": 919 + }, + { + "epoch": 0.5588458618071375, + "grad_norm": 1.0437928013824458, + "learning_rate": 4.862228288159191e-06, + "loss": 0.2804, + "step": 920 + }, + { + "epoch": 0.5594533029612756, + "grad_norm": 0.928662371565127, + "learning_rate": 4.851633464576876e-06, + "loss": 0.2487, + "step": 921 + }, + { + "epoch": 0.5600607441154138, + "grad_norm": 0.8860877007011291, + "learning_rate": 4.841039307707878e-06, + "loss": 0.2567, + "step": 922 + }, + { + "epoch": 0.560668185269552, + "grad_norm": 0.9520319656600132, + "learning_rate": 4.8304458651590645e-06, + "loss": 0.2736, + "step": 923 + }, + { + "epoch": 0.5612756264236902, + "grad_norm": 0.9185300392147626, + "learning_rate": 4.819853184534085e-06, + "loss": 0.2638, + "step": 924 + }, + { + "epoch": 0.5618830675778284, + "grad_norm": 0.8949825038070447, + "learning_rate": 4.809261313433176e-06, + "loss": 0.2582, + "step": 925 + }, + { + "epoch": 0.5624905087319666, + "grad_norm": 1.0105141831102669, + "learning_rate": 4.798670299452926e-06, + "loss": 0.2606, + "step": 926 + }, + { + "epoch": 0.5630979498861047, + "grad_norm": 0.8759995193174664, + "learning_rate": 4.788080190186075e-06, + "loss": 0.2725, + "step": 927 + }, + { + "epoch": 0.563705391040243, + "grad_norm": 0.9842928399442494, + "learning_rate": 4.7774910332213005e-06, + "loss": 0.2889, + "step": 928 + }, + { + "epoch": 0.5643128321943812, + "grad_norm": 0.9982390837595454, + "learning_rate": 4.766902876142996e-06, + "loss": 0.2536, + "step": 929 + }, + { + "epoch": 0.5649202733485194, + "grad_norm": 0.8982965289569348, + "learning_rate": 4.756315766531069e-06, + "loss": 0.2748, + "step": 930 + }, + { + "epoch": 0.5655277145026576, + "grad_norm": 0.9053592532295419, + "learning_rate": 4.74572975196071e-06, + "loss": 0.2453, + "step": 931 + }, + { + "epoch": 0.5661351556567957, + "grad_norm": 0.9728048806580342, + "learning_rate": 4.735144880002199e-06, + "loss": 0.2834, + "step": 932 + }, + { + "epoch": 0.566742596810934, + "grad_norm": 1.0237268565258881, + "learning_rate": 4.724561198220672e-06, + "loss": 0.2525, + "step": 933 + }, + { + "epoch": 0.5673500379650721, + "grad_norm": 0.8905565256751365, + "learning_rate": 4.713978754175926e-06, + "loss": 0.2698, + "step": 934 + }, + { + "epoch": 0.5679574791192104, + "grad_norm": 0.9139326984298836, + "learning_rate": 4.703397595422188e-06, + "loss": 0.2674, + "step": 935 + }, + { + "epoch": 0.5685649202733485, + "grad_norm": 0.8833013039618229, + "learning_rate": 4.692817769507912e-06, + "loss": 0.2684, + "step": 936 + }, + { + "epoch": 0.5691723614274867, + "grad_norm": 0.9238763026108181, + "learning_rate": 4.682239323975566e-06, + "loss": 0.2558, + "step": 937 + }, + { + "epoch": 0.5697798025816249, + "grad_norm": 1.5115130854908239, + "learning_rate": 4.671662306361409e-06, + "loss": 0.2935, + "step": 938 + }, + { + "epoch": 0.5703872437357631, + "grad_norm": 0.9482923489278967, + "learning_rate": 4.66108676419529e-06, + "loss": 0.294, + "step": 939 + }, + { + "epoch": 0.5709946848899012, + "grad_norm": 0.9477355385020175, + "learning_rate": 4.6505127450004216e-06, + "loss": 0.2632, + "step": 940 + }, + { + "epoch": 0.5716021260440395, + "grad_norm": 0.9464692568004452, + "learning_rate": 4.6399402962931775e-06, + "loss": 0.2688, + "step": 941 + }, + { + "epoch": 0.5722095671981777, + "grad_norm": 1.5567830309937185, + "learning_rate": 4.62936946558287e-06, + "loss": 0.2712, + "step": 942 + }, + { + "epoch": 0.5728170083523159, + "grad_norm": 0.9424657036172921, + "learning_rate": 4.618800300371543e-06, + "loss": 0.2545, + "step": 943 + }, + { + "epoch": 0.5734244495064541, + "grad_norm": 0.8823928171248179, + "learning_rate": 4.608232848153757e-06, + "loss": 0.2412, + "step": 944 + }, + { + "epoch": 0.5740318906605922, + "grad_norm": 0.9999498692731567, + "learning_rate": 4.597667156416371e-06, + "loss": 0.2893, + "step": 945 + }, + { + "epoch": 0.5746393318147305, + "grad_norm": 1.3850490330933427, + "learning_rate": 4.587103272638339e-06, + "loss": 0.272, + "step": 946 + }, + { + "epoch": 0.5752467729688686, + "grad_norm": 0.9012990165306813, + "learning_rate": 4.576541244290484e-06, + "loss": 0.2735, + "step": 947 + }, + { + "epoch": 0.5758542141230069, + "grad_norm": 0.9444183793088788, + "learning_rate": 4.565981118835299e-06, + "loss": 0.2747, + "step": 948 + }, + { + "epoch": 0.576461655277145, + "grad_norm": 1.0837085618557278, + "learning_rate": 4.555422943726715e-06, + "loss": 0.318, + "step": 949 + }, + { + "epoch": 0.5770690964312832, + "grad_norm": 0.9744715635599894, + "learning_rate": 4.5448667664099125e-06, + "loss": 0.2991, + "step": 950 + }, + { + "epoch": 0.5776765375854214, + "grad_norm": 0.8967007719257041, + "learning_rate": 4.534312634321081e-06, + "loss": 0.2748, + "step": 951 + }, + { + "epoch": 0.5782839787395596, + "grad_norm": 0.9164171208837589, + "learning_rate": 4.523760594887228e-06, + "loss": 0.246, + "step": 952 + }, + { + "epoch": 0.5788914198936977, + "grad_norm": 0.8748749367095119, + "learning_rate": 4.513210695525954e-06, + "loss": 0.2521, + "step": 953 + }, + { + "epoch": 0.579498861047836, + "grad_norm": 0.9504021763832875, + "learning_rate": 4.5026629836452445e-06, + "loss": 0.2965, + "step": 954 + }, + { + "epoch": 0.5801063022019742, + "grad_norm": 1.0694432667746632, + "learning_rate": 4.492117506643256e-06, + "loss": 0.2487, + "step": 955 + }, + { + "epoch": 0.5807137433561124, + "grad_norm": 0.9126259590301874, + "learning_rate": 4.481574311908096e-06, + "loss": 0.2714, + "step": 956 + }, + { + "epoch": 0.5813211845102506, + "grad_norm": 0.8752331832756258, + "learning_rate": 4.471033446817623e-06, + "loss": 0.2645, + "step": 957 + }, + { + "epoch": 0.5819286256643887, + "grad_norm": 0.9292223873298213, + "learning_rate": 4.460494958739223e-06, + "loss": 0.2827, + "step": 958 + }, + { + "epoch": 0.582536066818527, + "grad_norm": 0.9249697636061491, + "learning_rate": 4.449958895029604e-06, + "loss": 0.2889, + "step": 959 + }, + { + "epoch": 0.5831435079726651, + "grad_norm": 0.896078007157414, + "learning_rate": 4.439425303034576e-06, + "loss": 0.2438, + "step": 960 + }, + { + "epoch": 0.5837509491268034, + "grad_norm": 0.994404825370418, + "learning_rate": 4.428894230088842e-06, + "loss": 0.2567, + "step": 961 + }, + { + "epoch": 0.5843583902809415, + "grad_norm": 1.003722332588181, + "learning_rate": 4.418365723515791e-06, + "loss": 0.2777, + "step": 962 + }, + { + "epoch": 0.5849658314350797, + "grad_norm": 0.9051722268130655, + "learning_rate": 4.407839830627269e-06, + "loss": 0.2723, + "step": 963 + }, + { + "epoch": 0.5855732725892179, + "grad_norm": 0.9993245696949893, + "learning_rate": 4.397316598723385e-06, + "loss": 0.261, + "step": 964 + }, + { + "epoch": 0.5861807137433561, + "grad_norm": 1.129318363010175, + "learning_rate": 4.38679607509229e-06, + "loss": 0.2785, + "step": 965 + }, + { + "epoch": 0.5867881548974943, + "grad_norm": 0.9652429592155066, + "learning_rate": 4.376278307009962e-06, + "loss": 0.3026, + "step": 966 + }, + { + "epoch": 0.5873955960516325, + "grad_norm": 1.08394459651315, + "learning_rate": 4.365763341739996e-06, + "loss": 0.2583, + "step": 967 + }, + { + "epoch": 0.5880030372057707, + "grad_norm": 0.9919795857924707, + "learning_rate": 4.355251226533396e-06, + "loss": 0.2825, + "step": 968 + }, + { + "epoch": 0.5886104783599089, + "grad_norm": 1.3252968659585784, + "learning_rate": 4.344742008628356e-06, + "loss": 0.2525, + "step": 969 + }, + { + "epoch": 0.5892179195140471, + "grad_norm": 1.2622690111591564, + "learning_rate": 4.334235735250047e-06, + "loss": 0.2289, + "step": 970 + }, + { + "epoch": 0.5898253606681853, + "grad_norm": 1.051902837554305, + "learning_rate": 4.3237324536104165e-06, + "loss": 0.2478, + "step": 971 + }, + { + "epoch": 0.5904328018223235, + "grad_norm": 1.0527055472641202, + "learning_rate": 4.313232210907959e-06, + "loss": 0.2898, + "step": 972 + }, + { + "epoch": 0.5910402429764616, + "grad_norm": 1.0101095079376434, + "learning_rate": 4.302735054327523e-06, + "loss": 0.2682, + "step": 973 + }, + { + "epoch": 0.5916476841305999, + "grad_norm": 0.8621015161725616, + "learning_rate": 4.292241031040077e-06, + "loss": 0.2485, + "step": 974 + }, + { + "epoch": 0.592255125284738, + "grad_norm": 0.8726005630346255, + "learning_rate": 4.28175018820252e-06, + "loss": 0.249, + "step": 975 + }, + { + "epoch": 0.5928625664388762, + "grad_norm": 0.9751348393117523, + "learning_rate": 4.271262572957453e-06, + "loss": 0.2877, + "step": 976 + }, + { + "epoch": 0.5934700075930144, + "grad_norm": 3.66261461720758, + "learning_rate": 4.2607782324329776e-06, + "loss": 0.3063, + "step": 977 + }, + { + "epoch": 0.5940774487471526, + "grad_norm": 0.8943406986539186, + "learning_rate": 4.250297213742473e-06, + "loss": 0.2405, + "step": 978 + }, + { + "epoch": 0.5946848899012908, + "grad_norm": 1.1389623298222618, + "learning_rate": 4.239819563984397e-06, + "loss": 0.304, + "step": 979 + }, + { + "epoch": 0.595292331055429, + "grad_norm": 0.9274441396848276, + "learning_rate": 4.229345330242067e-06, + "loss": 0.2592, + "step": 980 + }, + { + "epoch": 0.5958997722095672, + "grad_norm": 0.920248304426081, + "learning_rate": 4.21887455958345e-06, + "loss": 0.2656, + "step": 981 + }, + { + "epoch": 0.5965072133637054, + "grad_norm": 1.0045840102748669, + "learning_rate": 4.2084072990609505e-06, + "loss": 0.2793, + "step": 982 + }, + { + "epoch": 0.5971146545178436, + "grad_norm": 1.0001754794903, + "learning_rate": 4.1979435957111984e-06, + "loss": 0.2836, + "step": 983 + }, + { + "epoch": 0.5977220956719818, + "grad_norm": 0.9044442095717217, + "learning_rate": 4.187483496554844e-06, + "loss": 0.2583, + "step": 984 + }, + { + "epoch": 0.59832953682612, + "grad_norm": 0.9671571248544576, + "learning_rate": 4.17702704859633e-06, + "loss": 0.2675, + "step": 985 + }, + { + "epoch": 0.5989369779802581, + "grad_norm": 1.0757021127620503, + "learning_rate": 4.166574298823707e-06, + "loss": 0.2676, + "step": 986 + }, + { + "epoch": 0.5995444191343964, + "grad_norm": 0.9735187115759455, + "learning_rate": 4.156125294208396e-06, + "loss": 0.242, + "step": 987 + }, + { + "epoch": 0.6001518602885345, + "grad_norm": 0.9405044204067678, + "learning_rate": 4.145680081704989e-06, + "loss": 0.2516, + "step": 988 + }, + { + "epoch": 0.6007593014426728, + "grad_norm": 0.9651846089243521, + "learning_rate": 4.135238708251045e-06, + "loss": 0.2758, + "step": 989 + }, + { + "epoch": 0.6013667425968109, + "grad_norm": 0.9646286486818011, + "learning_rate": 4.1248012207668635e-06, + "loss": 0.2565, + "step": 990 + }, + { + "epoch": 0.6019741837509491, + "grad_norm": 1.003985298512368, + "learning_rate": 4.1143676661552876e-06, + "loss": 0.2828, + "step": 991 + }, + { + "epoch": 0.6025816249050873, + "grad_norm": 1.3272162976579898, + "learning_rate": 4.103938091301479e-06, + "loss": 0.2374, + "step": 992 + }, + { + "epoch": 0.6031890660592255, + "grad_norm": 1.029165825211411, + "learning_rate": 4.093512543072729e-06, + "loss": 0.256, + "step": 993 + }, + { + "epoch": 0.6037965072133638, + "grad_norm": 1.015855525425474, + "learning_rate": 4.08309106831822e-06, + "loss": 0.2732, + "step": 994 + }, + { + "epoch": 0.6044039483675019, + "grad_norm": 1.0983207807319615, + "learning_rate": 4.07267371386884e-06, + "loss": 0.2808, + "step": 995 + }, + { + "epoch": 0.6050113895216401, + "grad_norm": 1.0434667435702822, + "learning_rate": 4.062260526536955e-06, + "loss": 0.2936, + "step": 996 + }, + { + "epoch": 0.6056188306757783, + "grad_norm": 2.7705088228683183, + "learning_rate": 4.051851553116208e-06, + "loss": 0.2797, + "step": 997 + }, + { + "epoch": 0.6062262718299165, + "grad_norm": 1.0175524502413011, + "learning_rate": 4.041446840381309e-06, + "loss": 0.2847, + "step": 998 + }, + { + "epoch": 0.6068337129840546, + "grad_norm": 1.0263313633646869, + "learning_rate": 4.0310464350878145e-06, + "loss": 0.2803, + "step": 999 + }, + { + "epoch": 0.6074411541381929, + "grad_norm": 0.9619358823368712, + "learning_rate": 4.0206503839719335e-06, + "loss": 0.2762, + "step": 1000 + }, + { + "epoch": 0.608048595292331, + "grad_norm": 1.1203236915172694, + "learning_rate": 4.0102587337503e-06, + "loss": 0.2813, + "step": 1001 + }, + { + "epoch": 0.6086560364464693, + "grad_norm": 0.9232892656793803, + "learning_rate": 3.999871531119779e-06, + "loss": 0.2791, + "step": 1002 + }, + { + "epoch": 0.6092634776006074, + "grad_norm": 1.2408650869198945, + "learning_rate": 3.989488822757244e-06, + "loss": 0.2529, + "step": 1003 + }, + { + "epoch": 0.6098709187547456, + "grad_norm": 1.3861837200018423, + "learning_rate": 3.9791106553193746e-06, + "loss": 0.2681, + "step": 1004 + }, + { + "epoch": 0.6104783599088838, + "grad_norm": 1.94554998345937, + "learning_rate": 3.968737075442449e-06, + "loss": 0.2774, + "step": 1005 + }, + { + "epoch": 0.611085801063022, + "grad_norm": 1.049688439654031, + "learning_rate": 3.9583681297421194e-06, + "loss": 0.2738, + "step": 1006 + }, + { + "epoch": 0.6116932422171603, + "grad_norm": 1.1130517893828742, + "learning_rate": 3.9480038648132285e-06, + "loss": 0.258, + "step": 1007 + }, + { + "epoch": 0.6123006833712984, + "grad_norm": 0.9091158603102817, + "learning_rate": 3.937644327229572e-06, + "loss": 0.256, + "step": 1008 + }, + { + "epoch": 0.6129081245254366, + "grad_norm": 0.8593163726216395, + "learning_rate": 3.927289563543709e-06, + "loss": 0.2139, + "step": 1009 + }, + { + "epoch": 0.6135155656795748, + "grad_norm": 2.2304132729314436, + "learning_rate": 3.916939620286743e-06, + "loss": 0.3042, + "step": 1010 + }, + { + "epoch": 0.614123006833713, + "grad_norm": 0.8846471003483667, + "learning_rate": 3.906594543968122e-06, + "loss": 0.2461, + "step": 1011 + }, + { + "epoch": 0.6147304479878511, + "grad_norm": 0.9578022421490531, + "learning_rate": 3.896254381075416e-06, + "loss": 0.2135, + "step": 1012 + }, + { + "epoch": 0.6153378891419894, + "grad_norm": 0.9488296987772777, + "learning_rate": 3.885919178074116e-06, + "loss": 0.2656, + "step": 1013 + }, + { + "epoch": 0.6159453302961275, + "grad_norm": 0.9095777905606226, + "learning_rate": 3.875588981407433e-06, + "loss": 0.2696, + "step": 1014 + }, + { + "epoch": 0.6165527714502658, + "grad_norm": 1.172457556031403, + "learning_rate": 3.865263837496072e-06, + "loss": 0.2807, + "step": 1015 + }, + { + "epoch": 0.6171602126044039, + "grad_norm": 0.880155690724818, + "learning_rate": 3.854943792738037e-06, + "loss": 0.2724, + "step": 1016 + }, + { + "epoch": 0.6177676537585421, + "grad_norm": 0.9726329271199242, + "learning_rate": 3.844628893508417e-06, + "loss": 0.2849, + "step": 1017 + }, + { + "epoch": 0.6183750949126803, + "grad_norm": 0.9428793397994366, + "learning_rate": 3.834319186159179e-06, + "loss": 0.2807, + "step": 1018 + }, + { + "epoch": 0.6189825360668185, + "grad_norm": 1.1315897196957132, + "learning_rate": 3.8240147170189575e-06, + "loss": 0.2674, + "step": 1019 + }, + { + "epoch": 0.6195899772209568, + "grad_norm": 0.9181683595405062, + "learning_rate": 3.8137155323928526e-06, + "loss": 0.2801, + "step": 1020 + }, + { + "epoch": 0.6201974183750949, + "grad_norm": 1.2695788430486319, + "learning_rate": 3.803421678562213e-06, + "loss": 0.2464, + "step": 1021 + }, + { + "epoch": 0.6208048595292331, + "grad_norm": 0.9830056869409215, + "learning_rate": 3.7931332017844302e-06, + "loss": 0.2219, + "step": 1022 + }, + { + "epoch": 0.6214123006833713, + "grad_norm": 0.9647382303431887, + "learning_rate": 3.7828501482927416e-06, + "loss": 0.2841, + "step": 1023 + }, + { + "epoch": 0.6220197418375095, + "grad_norm": 0.9717568307189096, + "learning_rate": 3.7725725642960047e-06, + "loss": 0.2977, + "step": 1024 + }, + { + "epoch": 0.6226271829916477, + "grad_norm": 0.9411703602062548, + "learning_rate": 3.7623004959785066e-06, + "loss": 0.2373, + "step": 1025 + }, + { + "epoch": 0.6232346241457859, + "grad_norm": 1.0071266637924543, + "learning_rate": 3.752033989499742e-06, + "loss": 0.2786, + "step": 1026 + }, + { + "epoch": 0.623842065299924, + "grad_norm": 0.9059441456779171, + "learning_rate": 3.7417730909942184e-06, + "loss": 0.231, + "step": 1027 + }, + { + "epoch": 0.6244495064540623, + "grad_norm": 1.012261264702401, + "learning_rate": 3.7315178465712364e-06, + "loss": 0.2623, + "step": 1028 + }, + { + "epoch": 0.6250569476082004, + "grad_norm": 0.9552757738267845, + "learning_rate": 3.721268302314698e-06, + "loss": 0.262, + "step": 1029 + }, + { + "epoch": 0.6256643887623387, + "grad_norm": 1.027708835953696, + "learning_rate": 3.7110245042828786e-06, + "loss": 0.2576, + "step": 1030 + }, + { + "epoch": 0.6262718299164769, + "grad_norm": 1.4132850004379782, + "learning_rate": 3.70078649850824e-06, + "loss": 0.2911, + "step": 1031 + }, + { + "epoch": 0.626879271070615, + "grad_norm": 1.299324195215338, + "learning_rate": 3.690554330997215e-06, + "loss": 0.2699, + "step": 1032 + }, + { + "epoch": 0.6274867122247533, + "grad_norm": 0.9195896367280948, + "learning_rate": 3.6803280477299975e-06, + "loss": 0.2449, + "step": 1033 + }, + { + "epoch": 0.6280941533788914, + "grad_norm": 1.2092094482741347, + "learning_rate": 3.670107694660343e-06, + "loss": 0.3003, + "step": 1034 + }, + { + "epoch": 0.6287015945330297, + "grad_norm": 0.9323263275420439, + "learning_rate": 3.659893317715355e-06, + "loss": 0.2712, + "step": 1035 + }, + { + "epoch": 0.6293090356871678, + "grad_norm": 0.9032020829812812, + "learning_rate": 3.6496849627952875e-06, + "loss": 0.2838, + "step": 1036 + }, + { + "epoch": 0.629916476841306, + "grad_norm": 0.9083525940287548, + "learning_rate": 3.639482675773324e-06, + "loss": 0.2729, + "step": 1037 + }, + { + "epoch": 0.6305239179954442, + "grad_norm": 0.9352716602967113, + "learning_rate": 3.6292865024953945e-06, + "loss": 0.2541, + "step": 1038 + }, + { + "epoch": 0.6311313591495824, + "grad_norm": 1.1060377092126286, + "learning_rate": 3.6190964887799418e-06, + "loss": 0.3177, + "step": 1039 + }, + { + "epoch": 0.6317388003037205, + "grad_norm": 0.9438466117610348, + "learning_rate": 3.6089126804177373e-06, + "loss": 0.2253, + "step": 1040 + }, + { + "epoch": 0.6323462414578588, + "grad_norm": 0.9463389610475652, + "learning_rate": 3.5987351231716665e-06, + "loss": 0.2484, + "step": 1041 + }, + { + "epoch": 0.6329536826119969, + "grad_norm": 1.0525074350281083, + "learning_rate": 3.5885638627765228e-06, + "loss": 0.2747, + "step": 1042 + }, + { + "epoch": 0.6335611237661352, + "grad_norm": 0.9145186747529931, + "learning_rate": 3.5783989449388063e-06, + "loss": 0.2631, + "step": 1043 + }, + { + "epoch": 0.6341685649202734, + "grad_norm": 0.8785607963190103, + "learning_rate": 3.568240415336509e-06, + "loss": 0.2438, + "step": 1044 + }, + { + "epoch": 0.6347760060744115, + "grad_norm": 0.9826184323548174, + "learning_rate": 3.5580883196189265e-06, + "loss": 0.2784, + "step": 1045 + }, + { + "epoch": 0.6353834472285498, + "grad_norm": 1.2684340321470944, + "learning_rate": 3.547942703406433e-06, + "loss": 0.2494, + "step": 1046 + }, + { + "epoch": 0.6359908883826879, + "grad_norm": 1.080171576140306, + "learning_rate": 3.5378036122902907e-06, + "loss": 0.2277, + "step": 1047 + }, + { + "epoch": 0.6365983295368262, + "grad_norm": 1.0670243919181694, + "learning_rate": 3.52767109183244e-06, + "loss": 0.2479, + "step": 1048 + }, + { + "epoch": 0.6372057706909643, + "grad_norm": 1.16999981733913, + "learning_rate": 3.5175451875652906e-06, + "loss": 0.3218, + "step": 1049 + }, + { + "epoch": 0.6378132118451025, + "grad_norm": 0.9540949950156941, + "learning_rate": 3.507425944991529e-06, + "loss": 0.2782, + "step": 1050 + }, + { + "epoch": 0.6384206529992407, + "grad_norm": 1.091171127154527, + "learning_rate": 3.4973134095838943e-06, + "loss": 0.2587, + "step": 1051 + }, + { + "epoch": 0.6390280941533789, + "grad_norm": 0.9498496318574927, + "learning_rate": 3.4872076267850015e-06, + "loss": 0.2541, + "step": 1052 + }, + { + "epoch": 0.639635535307517, + "grad_norm": 1.0314901642287697, + "learning_rate": 3.4771086420071053e-06, + "loss": 0.2664, + "step": 1053 + }, + { + "epoch": 0.6402429764616553, + "grad_norm": 0.9996172120430332, + "learning_rate": 3.4670165006319236e-06, + "loss": 0.2799, + "step": 1054 + }, + { + "epoch": 0.6408504176157934, + "grad_norm": 0.9350416152513497, + "learning_rate": 3.4569312480104157e-06, + "loss": 0.2829, + "step": 1055 + }, + { + "epoch": 0.6414578587699317, + "grad_norm": 1.1632073226641764, + "learning_rate": 3.4468529294625895e-06, + "loss": 0.2574, + "step": 1056 + }, + { + "epoch": 0.6420652999240699, + "grad_norm": 0.9447749084911037, + "learning_rate": 3.4367815902772917e-06, + "loss": 0.2562, + "step": 1057 + }, + { + "epoch": 0.642672741078208, + "grad_norm": 1.2070485411268692, + "learning_rate": 3.4267172757120005e-06, + "loss": 0.2635, + "step": 1058 + }, + { + "epoch": 0.6432801822323463, + "grad_norm": 0.9408532287008231, + "learning_rate": 3.416660030992639e-06, + "loss": 0.2631, + "step": 1059 + }, + { + "epoch": 0.6438876233864844, + "grad_norm": 1.279419634233724, + "learning_rate": 3.406609901313349e-06, + "loss": 0.2716, + "step": 1060 + }, + { + "epoch": 0.6444950645406227, + "grad_norm": 1.0437711278438688, + "learning_rate": 3.396566931836308e-06, + "loss": 0.2633, + "step": 1061 + }, + { + "epoch": 0.6451025056947608, + "grad_norm": 0.9477234648384033, + "learning_rate": 3.386531167691512e-06, + "loss": 0.2551, + "step": 1062 + }, + { + "epoch": 0.645709946848899, + "grad_norm": 0.8821252495312435, + "learning_rate": 3.3765026539765832e-06, + "loss": 0.2484, + "step": 1063 + }, + { + "epoch": 0.6463173880030372, + "grad_norm": 1.0630208248660034, + "learning_rate": 3.36648143575656e-06, + "loss": 0.2724, + "step": 1064 + }, + { + "epoch": 0.6469248291571754, + "grad_norm": 0.8969008349261947, + "learning_rate": 3.3564675580636946e-06, + "loss": 0.2544, + "step": 1065 + }, + { + "epoch": 0.6475322703113136, + "grad_norm": 0.8758051487969973, + "learning_rate": 3.3464610658972584e-06, + "loss": 0.2518, + "step": 1066 + }, + { + "epoch": 0.6481397114654518, + "grad_norm": 0.9068783910815723, + "learning_rate": 3.3364620042233316e-06, + "loss": 0.2362, + "step": 1067 + }, + { + "epoch": 0.6487471526195899, + "grad_norm": 0.9711243894474835, + "learning_rate": 3.326470417974604e-06, + "loss": 0.2417, + "step": 1068 + }, + { + "epoch": 0.6493545937737282, + "grad_norm": 0.9040626259172609, + "learning_rate": 3.3164863520501744e-06, + "loss": 0.2289, + "step": 1069 + }, + { + "epoch": 0.6499620349278664, + "grad_norm": 4.042527322341976, + "learning_rate": 3.3065098513153473e-06, + "loss": 0.2839, + "step": 1070 + }, + { + "epoch": 0.6505694760820045, + "grad_norm": 0.8905148331919976, + "learning_rate": 3.29654096060143e-06, + "loss": 0.2758, + "step": 1071 + }, + { + "epoch": 0.6511769172361428, + "grad_norm": 1.0792077197141197, + "learning_rate": 3.2865797247055354e-06, + "loss": 0.2662, + "step": 1072 + }, + { + "epoch": 0.6517843583902809, + "grad_norm": 0.9497844499158115, + "learning_rate": 3.2766261883903744e-06, + "loss": 0.2549, + "step": 1073 + }, + { + "epoch": 0.6523917995444192, + "grad_norm": 1.0939981168516681, + "learning_rate": 3.266680396384061e-06, + "loss": 0.293, + "step": 1074 + }, + { + "epoch": 0.6529992406985573, + "grad_norm": 2.703463930587898, + "learning_rate": 3.256742393379909e-06, + "loss": 0.225, + "step": 1075 + }, + { + "epoch": 0.6536066818526955, + "grad_norm": 0.8280179235274505, + "learning_rate": 3.2468122240362287e-06, + "loss": 0.224, + "step": 1076 + }, + { + "epoch": 0.6542141230068337, + "grad_norm": 1.439064209594826, + "learning_rate": 3.2368899329761316e-06, + "loss": 0.2607, + "step": 1077 + }, + { + "epoch": 0.6548215641609719, + "grad_norm": 0.8868653685170421, + "learning_rate": 3.226975564787322e-06, + "loss": 0.2276, + "step": 1078 + }, + { + "epoch": 0.6554290053151101, + "grad_norm": 1.0980898669737127, + "learning_rate": 3.2170691640219077e-06, + "loss": 0.2648, + "step": 1079 + }, + { + "epoch": 0.6560364464692483, + "grad_norm": 0.9994606567591712, + "learning_rate": 3.2071707751961838e-06, + "loss": 0.2785, + "step": 1080 + }, + { + "epoch": 0.6566438876233864, + "grad_norm": 1.0995716769114432, + "learning_rate": 3.197280442790455e-06, + "loss": 0.2503, + "step": 1081 + }, + { + "epoch": 0.6572513287775247, + "grad_norm": 0.9205922048045703, + "learning_rate": 3.187398211248811e-06, + "loss": 0.2367, + "step": 1082 + }, + { + "epoch": 0.6578587699316629, + "grad_norm": 0.9635183689443267, + "learning_rate": 3.1775241249789434e-06, + "loss": 0.254, + "step": 1083 + }, + { + "epoch": 0.6584662110858011, + "grad_norm": 0.924480671064993, + "learning_rate": 3.1676582283519454e-06, + "loss": 0.2279, + "step": 1084 + }, + { + "epoch": 0.6590736522399393, + "grad_norm": 0.906465381610937, + "learning_rate": 3.1578005657021004e-06, + "loss": 0.2285, + "step": 1085 + }, + { + "epoch": 0.6596810933940774, + "grad_norm": 0.92471932747414, + "learning_rate": 3.1479511813267006e-06, + "loss": 0.2655, + "step": 1086 + }, + { + "epoch": 0.6602885345482157, + "grad_norm": 1.0142140195825766, + "learning_rate": 3.1381101194858264e-06, + "loss": 0.2407, + "step": 1087 + }, + { + "epoch": 0.6608959757023538, + "grad_norm": 1.0686680921013536, + "learning_rate": 3.1282774244021717e-06, + "loss": 0.2604, + "step": 1088 + }, + { + "epoch": 0.661503416856492, + "grad_norm": 1.0244620367393154, + "learning_rate": 3.118453140260823e-06, + "loss": 0.284, + "step": 1089 + }, + { + "epoch": 0.6621108580106302, + "grad_norm": 1.0170876360692431, + "learning_rate": 3.1086373112090762e-06, + "loss": 0.2523, + "step": 1090 + }, + { + "epoch": 0.6627182991647684, + "grad_norm": 0.9802160451886665, + "learning_rate": 3.0988299813562304e-06, + "loss": 0.2783, + "step": 1091 + }, + { + "epoch": 0.6633257403189066, + "grad_norm": 0.9103056036600278, + "learning_rate": 3.089031194773392e-06, + "loss": 0.2502, + "step": 1092 + }, + { + "epoch": 0.6639331814730448, + "grad_norm": 1.359229570037281, + "learning_rate": 3.079240995493279e-06, + "loss": 0.2479, + "step": 1093 + }, + { + "epoch": 0.6645406226271829, + "grad_norm": 1.0565783940509939, + "learning_rate": 3.069459427510014e-06, + "loss": 0.2442, + "step": 1094 + }, + { + "epoch": 0.6651480637813212, + "grad_norm": 0.8822214330244847, + "learning_rate": 3.0596865347789444e-06, + "loss": 0.2722, + "step": 1095 + }, + { + "epoch": 0.6657555049354594, + "grad_norm": 0.9150480419339629, + "learning_rate": 3.049922361216422e-06, + "loss": 0.2425, + "step": 1096 + }, + { + "epoch": 0.6663629460895976, + "grad_norm": 0.9517674541707122, + "learning_rate": 3.040166950699626e-06, + "loss": 0.2496, + "step": 1097 + }, + { + "epoch": 0.6669703872437358, + "grad_norm": 0.9599880059742387, + "learning_rate": 3.0304203470663507e-06, + "loss": 0.2619, + "step": 1098 + }, + { + "epoch": 0.6675778283978739, + "grad_norm": 0.9460242432498148, + "learning_rate": 3.0206825941148203e-06, + "loss": 0.3065, + "step": 1099 + }, + { + "epoch": 0.6681852695520122, + "grad_norm": 1.4106793360221765, + "learning_rate": 3.0109537356034856e-06, + "loss": 0.2737, + "step": 1100 + }, + { + "epoch": 0.6687927107061503, + "grad_norm": 1.2737238922916891, + "learning_rate": 3.001233815250823e-06, + "loss": 0.2899, + "step": 1101 + }, + { + "epoch": 0.6694001518602886, + "grad_norm": 0.9517912503819469, + "learning_rate": 2.991522876735154e-06, + "loss": 0.2624, + "step": 1102 + }, + { + "epoch": 0.6700075930144267, + "grad_norm": 1.015322451634877, + "learning_rate": 2.981820963694427e-06, + "loss": 0.2301, + "step": 1103 + }, + { + "epoch": 0.6706150341685649, + "grad_norm": 1.4417460163504778, + "learning_rate": 2.9721281197260427e-06, + "loss": 0.2864, + "step": 1104 + }, + { + "epoch": 0.6712224753227031, + "grad_norm": 1.3786447145331062, + "learning_rate": 2.9624443883866403e-06, + "loss": 0.2441, + "step": 1105 + }, + { + "epoch": 0.6718299164768413, + "grad_norm": 1.0007659923579586, + "learning_rate": 2.9527698131919156e-06, + "loss": 0.2891, + "step": 1106 + }, + { + "epoch": 0.6724373576309794, + "grad_norm": 1.1633207582868845, + "learning_rate": 2.9431044376164165e-06, + "loss": 0.2978, + "step": 1107 + }, + { + "epoch": 0.6730447987851177, + "grad_norm": 1.032656472550036, + "learning_rate": 2.9334483050933506e-06, + "loss": 0.2507, + "step": 1108 + }, + { + "epoch": 0.6736522399392559, + "grad_norm": 0.9333208809560491, + "learning_rate": 2.9238014590143925e-06, + "loss": 0.2376, + "step": 1109 + }, + { + "epoch": 0.6742596810933941, + "grad_norm": 1.1289827469541969, + "learning_rate": 2.91416394272948e-06, + "loss": 0.2582, + "step": 1110 + }, + { + "epoch": 0.6748671222475323, + "grad_norm": 0.925760485434696, + "learning_rate": 2.904535799546636e-06, + "loss": 0.2177, + "step": 1111 + }, + { + "epoch": 0.6754745634016704, + "grad_norm": 0.9162175321455921, + "learning_rate": 2.894917072731753e-06, + "loss": 0.2607, + "step": 1112 + }, + { + "epoch": 0.6760820045558087, + "grad_norm": 1.0213577123976934, + "learning_rate": 2.8853078055084192e-06, + "loss": 0.2588, + "step": 1113 + }, + { + "epoch": 0.6766894457099468, + "grad_norm": 0.9140846537600611, + "learning_rate": 2.8757080410577042e-06, + "loss": 0.2701, + "step": 1114 + }, + { + "epoch": 0.6772968868640851, + "grad_norm": 0.9558677038661029, + "learning_rate": 2.866117822517982e-06, + "loss": 0.2078, + "step": 1115 + }, + { + "epoch": 0.6779043280182232, + "grad_norm": 1.6365480186656665, + "learning_rate": 2.8565371929847286e-06, + "loss": 0.2519, + "step": 1116 + }, + { + "epoch": 0.6785117691723614, + "grad_norm": 0.8999981416609766, + "learning_rate": 2.846966195510332e-06, + "loss": 0.2586, + "step": 1117 + }, + { + "epoch": 0.6791192103264996, + "grad_norm": 0.8986580797788825, + "learning_rate": 2.83740487310389e-06, + "loss": 0.2651, + "step": 1118 + }, + { + "epoch": 0.6797266514806378, + "grad_norm": 1.0174347095785217, + "learning_rate": 2.82785326873103e-06, + "loss": 0.2593, + "step": 1119 + }, + { + "epoch": 0.680334092634776, + "grad_norm": 0.922083211098202, + "learning_rate": 2.81831142531371e-06, + "loss": 0.2597, + "step": 1120 + }, + { + "epoch": 0.6809415337889142, + "grad_norm": 1.006983963737431, + "learning_rate": 2.8087793857300193e-06, + "loss": 0.2682, + "step": 1121 + }, + { + "epoch": 0.6815489749430524, + "grad_norm": 1.8659752629573085, + "learning_rate": 2.7992571928139984e-06, + "loss": 0.2481, + "step": 1122 + }, + { + "epoch": 0.6821564160971906, + "grad_norm": 0.9701288254765418, + "learning_rate": 2.7897448893554335e-06, + "loss": 0.2581, + "step": 1123 + }, + { + "epoch": 0.6827638572513288, + "grad_norm": 0.9122901540097156, + "learning_rate": 2.780242518099675e-06, + "loss": 0.2503, + "step": 1124 + }, + { + "epoch": 0.683371298405467, + "grad_norm": 0.9807303601001196, + "learning_rate": 2.7707501217474443e-06, + "loss": 0.2744, + "step": 1125 + }, + { + "epoch": 0.6839787395596052, + "grad_norm": 6.645271440733989, + "learning_rate": 2.761267742954629e-06, + "loss": 0.2524, + "step": 1126 + }, + { + "epoch": 0.6845861807137433, + "grad_norm": 1.534147172507746, + "learning_rate": 2.7517954243321097e-06, + "loss": 0.2659, + "step": 1127 + }, + { + "epoch": 0.6851936218678816, + "grad_norm": 0.9373688303596897, + "learning_rate": 2.7423332084455543e-06, + "loss": 0.2851, + "step": 1128 + }, + { + "epoch": 0.6858010630220197, + "grad_norm": 0.8831368522306644, + "learning_rate": 2.7328811378152355e-06, + "loss": 0.2557, + "step": 1129 + }, + { + "epoch": 0.686408504176158, + "grad_norm": 0.9679109580287109, + "learning_rate": 2.723439254915834e-06, + "loss": 0.275, + "step": 1130 + }, + { + "epoch": 0.6870159453302961, + "grad_norm": 1.2218248931218192, + "learning_rate": 2.714007602176254e-06, + "loss": 0.2413, + "step": 1131 + }, + { + "epoch": 0.6876233864844343, + "grad_norm": 0.9708101090046806, + "learning_rate": 2.704586221979422e-06, + "loss": 0.2645, + "step": 1132 + }, + { + "epoch": 0.6882308276385725, + "grad_norm": 1.2522386234048026, + "learning_rate": 2.695175156662107e-06, + "loss": 0.2574, + "step": 1133 + }, + { + "epoch": 0.6888382687927107, + "grad_norm": 0.8419129242667286, + "learning_rate": 2.6857744485147286e-06, + "loss": 0.2383, + "step": 1134 + }, + { + "epoch": 0.689445709946849, + "grad_norm": 1.0193831420490371, + "learning_rate": 2.6763841397811576e-06, + "loss": 0.2735, + "step": 1135 + }, + { + "epoch": 0.6900531511009871, + "grad_norm": 0.9241296407909637, + "learning_rate": 2.667004272658541e-06, + "loss": 0.2768, + "step": 1136 + }, + { + "epoch": 0.6906605922551253, + "grad_norm": 1.1359832523999245, + "learning_rate": 2.6576348892970947e-06, + "loss": 0.2636, + "step": 1137 + }, + { + "epoch": 0.6912680334092635, + "grad_norm": 0.8941017093322563, + "learning_rate": 2.6482760317999338e-06, + "loss": 0.2559, + "step": 1138 + }, + { + "epoch": 0.6918754745634017, + "grad_norm": 0.8968310666010292, + "learning_rate": 2.638927742222868e-06, + "loss": 0.2537, + "step": 1139 + }, + { + "epoch": 0.6924829157175398, + "grad_norm": 0.959662408955417, + "learning_rate": 2.629590062574221e-06, + "loss": 0.2656, + "step": 1140 + }, + { + "epoch": 0.6930903568716781, + "grad_norm": 0.9000247977135409, + "learning_rate": 2.6202630348146323e-06, + "loss": 0.2899, + "step": 1141 + }, + { + "epoch": 0.6936977980258162, + "grad_norm": 1.0079650585712254, + "learning_rate": 2.610946700856885e-06, + "loss": 0.267, + "step": 1142 + }, + { + "epoch": 0.6943052391799545, + "grad_norm": 0.8890790658066724, + "learning_rate": 2.6016411025656973e-06, + "loss": 0.2535, + "step": 1143 + }, + { + "epoch": 0.6949126803340926, + "grad_norm": 0.9198656469536414, + "learning_rate": 2.592346281757552e-06, + "loss": 0.2509, + "step": 1144 + }, + { + "epoch": 0.6955201214882308, + "grad_norm": 0.9738974660627011, + "learning_rate": 2.583062280200501e-06, + "loss": 0.2593, + "step": 1145 + }, + { + "epoch": 0.696127562642369, + "grad_norm": 0.9837219510435016, + "learning_rate": 2.5737891396139713e-06, + "loss": 0.255, + "step": 1146 + }, + { + "epoch": 0.6967350037965072, + "grad_norm": 0.9076420454349192, + "learning_rate": 2.5645269016685905e-06, + "loss": 0.2704, + "step": 1147 + }, + { + "epoch": 0.6973424449506455, + "grad_norm": 0.8898433106915349, + "learning_rate": 2.5552756079859904e-06, + "loss": 0.2594, + "step": 1148 + }, + { + "epoch": 0.6979498861047836, + "grad_norm": 0.9063509680084296, + "learning_rate": 2.5460353001386263e-06, + "loss": 0.2529, + "step": 1149 + }, + { + "epoch": 0.6985573272589218, + "grad_norm": 0.8948107859742076, + "learning_rate": 2.5368060196495785e-06, + "loss": 0.2564, + "step": 1150 + }, + { + "epoch": 0.69916476841306, + "grad_norm": 0.8945325429627021, + "learning_rate": 2.527587807992383e-06, + "loss": 0.2387, + "step": 1151 + }, + { + "epoch": 0.6997722095671982, + "grad_norm": 0.9769838924293288, + "learning_rate": 2.5183807065908296e-06, + "loss": 0.2542, + "step": 1152 + }, + { + "epoch": 0.7003796507213363, + "grad_norm": 0.9645249197834942, + "learning_rate": 2.5091847568187834e-06, + "loss": 0.2281, + "step": 1153 + }, + { + "epoch": 0.7009870918754746, + "grad_norm": 0.9496141254681564, + "learning_rate": 2.5000000000000015e-06, + "loss": 0.241, + "step": 1154 + }, + { + "epoch": 0.7015945330296127, + "grad_norm": 0.9380133404008738, + "learning_rate": 2.4908264774079355e-06, + "loss": 0.2605, + "step": 1155 + }, + { + "epoch": 0.702201974183751, + "grad_norm": 0.9074627496390306, + "learning_rate": 2.4816642302655634e-06, + "loss": 0.2541, + "step": 1156 + }, + { + "epoch": 0.7028094153378891, + "grad_norm": 1.0027152368026724, + "learning_rate": 2.4725132997451833e-06, + "loss": 0.2601, + "step": 1157 + }, + { + "epoch": 0.7034168564920273, + "grad_norm": 2.6289745813296284, + "learning_rate": 2.4633737269682546e-06, + "loss": 0.3022, + "step": 1158 + }, + { + "epoch": 0.7040242976461655, + "grad_norm": 0.9370322619588107, + "learning_rate": 2.454245553005184e-06, + "loss": 0.2643, + "step": 1159 + }, + { + "epoch": 0.7046317388003037, + "grad_norm": 1.468995991623161, + "learning_rate": 2.445128818875166e-06, + "loss": 0.2852, + "step": 1160 + }, + { + "epoch": 0.705239179954442, + "grad_norm": 0.9901499615769476, + "learning_rate": 2.4360235655459804e-06, + "loss": 0.3014, + "step": 1161 + }, + { + "epoch": 0.7058466211085801, + "grad_norm": 0.9762587690316699, + "learning_rate": 2.4269298339338205e-06, + "loss": 0.2464, + "step": 1162 + }, + { + "epoch": 0.7064540622627183, + "grad_norm": 0.9823369995064071, + "learning_rate": 2.4178476649031057e-06, + "loss": 0.2611, + "step": 1163 + }, + { + "epoch": 0.7070615034168565, + "grad_norm": 1.040540505906759, + "learning_rate": 2.408777099266291e-06, + "loss": 0.2628, + "step": 1164 + }, + { + "epoch": 0.7076689445709947, + "grad_norm": 0.9626462256229749, + "learning_rate": 2.3997181777836955e-06, + "loss": 0.3069, + "step": 1165 + }, + { + "epoch": 0.7082763857251329, + "grad_norm": 1.2283451848928204, + "learning_rate": 2.3906709411633073e-06, + "loss": 0.2405, + "step": 1166 + }, + { + "epoch": 0.7088838268792711, + "grad_norm": 0.9137970515612295, + "learning_rate": 2.381635430060611e-06, + "loss": 0.28, + "step": 1167 + }, + { + "epoch": 0.7094912680334092, + "grad_norm": 0.9656636216601993, + "learning_rate": 2.3726116850783987e-06, + "loss": 0.2696, + "step": 1168 + }, + { + "epoch": 0.7100987091875475, + "grad_norm": 0.9461355671018838, + "learning_rate": 2.3635997467665905e-06, + "loss": 0.2527, + "step": 1169 + }, + { + "epoch": 0.7107061503416856, + "grad_norm": 0.8499788622610774, + "learning_rate": 2.354599655622049e-06, + "loss": 0.2425, + "step": 1170 + }, + { + "epoch": 0.7113135914958238, + "grad_norm": 0.9394600691367851, + "learning_rate": 2.3456114520883956e-06, + "loss": 0.2478, + "step": 1171 + }, + { + "epoch": 0.711921032649962, + "grad_norm": 0.9539174173321666, + "learning_rate": 2.3366351765558437e-06, + "loss": 0.2552, + "step": 1172 + }, + { + "epoch": 0.7125284738041002, + "grad_norm": 1.0414224878560323, + "learning_rate": 2.3276708693609947e-06, + "loss": 0.2798, + "step": 1173 + }, + { + "epoch": 0.7131359149582385, + "grad_norm": 0.9245170066700932, + "learning_rate": 2.318718570786675e-06, + "loss": 0.2463, + "step": 1174 + }, + { + "epoch": 0.7137433561123766, + "grad_norm": 0.9803347614971838, + "learning_rate": 2.309778321061742e-06, + "loss": 0.2416, + "step": 1175 + }, + { + "epoch": 0.7143507972665148, + "grad_norm": 0.9130379562604788, + "learning_rate": 2.3008501603609147e-06, + "loss": 0.275, + "step": 1176 + }, + { + "epoch": 0.714958238420653, + "grad_norm": 0.8761644255482913, + "learning_rate": 2.2919341288045853e-06, + "loss": 0.2502, + "step": 1177 + }, + { + "epoch": 0.7155656795747912, + "grad_norm": 0.9584496523002601, + "learning_rate": 2.283030266458644e-06, + "loss": 0.2754, + "step": 1178 + }, + { + "epoch": 0.7161731207289294, + "grad_norm": 0.8665475599966695, + "learning_rate": 2.2741386133342923e-06, + "loss": 0.2505, + "step": 1179 + }, + { + "epoch": 0.7167805618830676, + "grad_norm": 0.8576308093825102, + "learning_rate": 2.265259209387867e-06, + "loss": 0.2304, + "step": 1180 + }, + { + "epoch": 0.7173880030372057, + "grad_norm": 0.9335114757524509, + "learning_rate": 2.256392094520664e-06, + "loss": 0.2697, + "step": 1181 + }, + { + "epoch": 0.717995444191344, + "grad_norm": 1.2455184951743299, + "learning_rate": 2.2475373085787568e-06, + "loss": 0.2644, + "step": 1182 + }, + { + "epoch": 0.7186028853454821, + "grad_norm": 1.8108379220736726, + "learning_rate": 2.238694891352814e-06, + "loss": 0.2637, + "step": 1183 + }, + { + "epoch": 0.7192103264996204, + "grad_norm": 0.884996855921727, + "learning_rate": 2.229864882577921e-06, + "loss": 0.2303, + "step": 1184 + }, + { + "epoch": 0.7198177676537585, + "grad_norm": 0.9318262199049523, + "learning_rate": 2.2210473219334083e-06, + "loss": 0.255, + "step": 1185 + }, + { + "epoch": 0.7204252088078967, + "grad_norm": 0.8164042083609618, + "learning_rate": 2.2122422490426676e-06, + "loss": 0.2384, + "step": 1186 + }, + { + "epoch": 0.721032649962035, + "grad_norm": 1.0115748704170144, + "learning_rate": 2.203449703472969e-06, + "loss": 0.268, + "step": 1187 + }, + { + "epoch": 0.7216400911161731, + "grad_norm": 1.1883752167190924, + "learning_rate": 2.194669724735296e-06, + "loss": 0.2755, + "step": 1188 + }, + { + "epoch": 0.7222475322703114, + "grad_norm": 1.0308833660996168, + "learning_rate": 2.1859023522841543e-06, + "loss": 0.2327, + "step": 1189 + }, + { + "epoch": 0.7228549734244495, + "grad_norm": 0.9120682340740801, + "learning_rate": 2.1771476255174056e-06, + "loss": 0.2735, + "step": 1190 + }, + { + "epoch": 0.7234624145785877, + "grad_norm": 1.013727297073577, + "learning_rate": 2.1684055837760837e-06, + "loss": 0.2757, + "step": 1191 + }, + { + "epoch": 0.7240698557327259, + "grad_norm": 0.9458973783887059, + "learning_rate": 2.159676266344222e-06, + "loss": 0.268, + "step": 1192 + }, + { + "epoch": 0.7246772968868641, + "grad_norm": 1.3758680862531418, + "learning_rate": 2.1509597124486693e-06, + "loss": 0.2367, + "step": 1193 + }, + { + "epoch": 0.7252847380410022, + "grad_norm": 1.2033412446646528, + "learning_rate": 2.1422559612589266e-06, + "loss": 0.2964, + "step": 1194 + }, + { + "epoch": 0.7258921791951405, + "grad_norm": 1.0020458048011924, + "learning_rate": 2.1335650518869555e-06, + "loss": 0.2625, + "step": 1195 + }, + { + "epoch": 0.7264996203492786, + "grad_norm": 0.956244480904364, + "learning_rate": 2.124887023387017e-06, + "loss": 0.2974, + "step": 1196 + }, + { + "epoch": 0.7271070615034169, + "grad_norm": 0.969906166075625, + "learning_rate": 2.1162219147554884e-06, + "loss": 0.2858, + "step": 1197 + }, + { + "epoch": 0.727714502657555, + "grad_norm": 1.242369815476469, + "learning_rate": 2.1075697649306838e-06, + "loss": 0.2651, + "step": 1198 + }, + { + "epoch": 0.7283219438116932, + "grad_norm": 1.0002620329734284, + "learning_rate": 2.09893061279269e-06, + "loss": 0.2611, + "step": 1199 + }, + { + "epoch": 0.7289293849658315, + "grad_norm": 1.2055397902089622, + "learning_rate": 2.0903044971631854e-06, + "loss": 0.2498, + "step": 1200 + }, + { + "epoch": 0.7295368261199696, + "grad_norm": 0.9189460422388009, + "learning_rate": 2.0816914568052664e-06, + "loss": 0.2549, + "step": 1201 + }, + { + "epoch": 0.7301442672741079, + "grad_norm": 0.9278551686313573, + "learning_rate": 2.0730915304232692e-06, + "loss": 0.2753, + "step": 1202 + }, + { + "epoch": 0.730751708428246, + "grad_norm": 0.9259931376538301, + "learning_rate": 2.0645047566626057e-06, + "loss": 0.2429, + "step": 1203 + }, + { + "epoch": 0.7313591495823842, + "grad_norm": 1.0024167165948739, + "learning_rate": 2.055931174109579e-06, + "loss": 0.2923, + "step": 1204 + }, + { + "epoch": 0.7319665907365224, + "grad_norm": 0.8930847111145748, + "learning_rate": 2.0473708212912167e-06, + "loss": 0.2416, + "step": 1205 + }, + { + "epoch": 0.7325740318906606, + "grad_norm": 0.8864928181427251, + "learning_rate": 2.0388237366751005e-06, + "loss": 0.2538, + "step": 1206 + }, + { + "epoch": 0.7331814730447987, + "grad_norm": 0.8850193923988731, + "learning_rate": 2.030289958669181e-06, + "loss": 0.2649, + "step": 1207 + }, + { + "epoch": 0.733788914198937, + "grad_norm": 0.9739541510077735, + "learning_rate": 2.02176952562162e-06, + "loss": 0.2517, + "step": 1208 + }, + { + "epoch": 0.7343963553530751, + "grad_norm": 0.9799826210952297, + "learning_rate": 2.013262475820602e-06, + "loss": 0.2716, + "step": 1209 + }, + { + "epoch": 0.7350037965072134, + "grad_norm": 1.0600437940139478, + "learning_rate": 2.004768847494186e-06, + "loss": 0.2365, + "step": 1210 + }, + { + "epoch": 0.7356112376613515, + "grad_norm": 0.9207976884534176, + "learning_rate": 1.996288678810105e-06, + "loss": 0.2632, + "step": 1211 + }, + { + "epoch": 0.7362186788154897, + "grad_norm": 1.0260990612904581, + "learning_rate": 1.987822007875617e-06, + "loss": 0.2675, + "step": 1212 + }, + { + "epoch": 0.736826119969628, + "grad_norm": 1.0643126752862775, + "learning_rate": 1.979368872737319e-06, + "loss": 0.2282, + "step": 1213 + }, + { + "epoch": 0.7374335611237661, + "grad_norm": 1.002822192943713, + "learning_rate": 1.9709293113809876e-06, + "loss": 0.237, + "step": 1214 + }, + { + "epoch": 0.7380410022779044, + "grad_norm": 0.8146421380435214, + "learning_rate": 1.962503361731403e-06, + "loss": 0.2347, + "step": 1215 + }, + { + "epoch": 0.7386484434320425, + "grad_norm": 1.44451984462559, + "learning_rate": 1.954091061652172e-06, + "loss": 0.249, + "step": 1216 + }, + { + "epoch": 0.7392558845861807, + "grad_norm": 1.0048037676428558, + "learning_rate": 1.945692448945574e-06, + "loss": 0.2684, + "step": 1217 + }, + { + "epoch": 0.7398633257403189, + "grad_norm": 0.9134185361949188, + "learning_rate": 1.9373075613523728e-06, + "loss": 0.269, + "step": 1218 + }, + { + "epoch": 0.7404707668944571, + "grad_norm": 1.3119909261331324, + "learning_rate": 1.928936436551661e-06, + "loss": 0.2422, + "step": 1219 + }, + { + "epoch": 0.7410782080485953, + "grad_norm": 0.8520218708347329, + "learning_rate": 1.920579112160685e-06, + "loss": 0.2199, + "step": 1220 + }, + { + "epoch": 0.7416856492027335, + "grad_norm": 2.6495798687566086, + "learning_rate": 1.912235625734676e-06, + "loss": 0.2854, + "step": 1221 + }, + { + "epoch": 0.7422930903568716, + "grad_norm": 1.6880210137763603, + "learning_rate": 1.903906014766681e-06, + "loss": 0.2761, + "step": 1222 + }, + { + "epoch": 0.7429005315110099, + "grad_norm": 0.9375069285048585, + "learning_rate": 1.8955903166873924e-06, + "loss": 0.25, + "step": 1223 + }, + { + "epoch": 0.743507972665148, + "grad_norm": 1.239196974591738, + "learning_rate": 1.8872885688649879e-06, + "loss": 0.2876, + "step": 1224 + }, + { + "epoch": 0.7441154138192863, + "grad_norm": 0.9780811220285901, + "learning_rate": 1.8790008086049534e-06, + "loss": 0.255, + "step": 1225 + }, + { + "epoch": 0.7447228549734245, + "grad_norm": 0.9109861670066071, + "learning_rate": 1.8707270731499223e-06, + "loss": 0.2401, + "step": 1226 + }, + { + "epoch": 0.7453302961275626, + "grad_norm": 0.9572237400053779, + "learning_rate": 1.862467399679499e-06, + "loss": 0.2855, + "step": 1227 + }, + { + "epoch": 0.7459377372817009, + "grad_norm": 0.9835745008775473, + "learning_rate": 1.854221825310103e-06, + "loss": 0.2376, + "step": 1228 + }, + { + "epoch": 0.746545178435839, + "grad_norm": 0.9473916357579606, + "learning_rate": 1.8459903870947954e-06, + "loss": 0.277, + "step": 1229 + }, + { + "epoch": 0.7471526195899773, + "grad_norm": 0.8905602752123002, + "learning_rate": 1.8377731220231144e-06, + "loss": 0.2506, + "step": 1230 + }, + { + "epoch": 0.7477600607441154, + "grad_norm": 0.9604834177033152, + "learning_rate": 1.829570067020906e-06, + "loss": 0.2448, + "step": 1231 + }, + { + "epoch": 0.7483675018982536, + "grad_norm": 0.8845596702356572, + "learning_rate": 1.8213812589501611e-06, + "loss": 0.2547, + "step": 1232 + }, + { + "epoch": 0.7489749430523918, + "grad_norm": 0.9925482849261942, + "learning_rate": 1.813206734608851e-06, + "loss": 0.2603, + "step": 1233 + }, + { + "epoch": 0.74958238420653, + "grad_norm": 1.0974123543068024, + "learning_rate": 1.8050465307307602e-06, + "loss": 0.2461, + "step": 1234 + }, + { + "epoch": 0.7501898253606681, + "grad_norm": 0.9382971676426907, + "learning_rate": 1.7969006839853227e-06, + "loss": 0.2226, + "step": 1235 + }, + { + "epoch": 0.7507972665148064, + "grad_norm": 1.6609571035124198, + "learning_rate": 1.78876923097745e-06, + "loss": 0.2553, + "step": 1236 + }, + { + "epoch": 0.7514047076689445, + "grad_norm": 1.0951347816044532, + "learning_rate": 1.7806522082473809e-06, + "loss": 0.2549, + "step": 1237 + }, + { + "epoch": 0.7520121488230828, + "grad_norm": 0.9563113436198466, + "learning_rate": 1.7725496522704998e-06, + "loss": 0.2582, + "step": 1238 + }, + { + "epoch": 0.752619589977221, + "grad_norm": 0.9531971950924529, + "learning_rate": 1.7644615994571934e-06, + "loss": 0.2509, + "step": 1239 + }, + { + "epoch": 0.7532270311313591, + "grad_norm": 1.0124203802056453, + "learning_rate": 1.7563880861526656e-06, + "loss": 0.2444, + "step": 1240 + }, + { + "epoch": 0.7538344722854974, + "grad_norm": 0.9577538726500576, + "learning_rate": 1.748329148636787e-06, + "loss": 0.2236, + "step": 1241 + }, + { + "epoch": 0.7544419134396355, + "grad_norm": 0.9178846740365786, + "learning_rate": 1.7402848231239317e-06, + "loss": 0.2544, + "step": 1242 + }, + { + "epoch": 0.7550493545937738, + "grad_norm": 0.9382975967726378, + "learning_rate": 1.73225514576281e-06, + "loss": 0.2665, + "step": 1243 + }, + { + "epoch": 0.7556567957479119, + "grad_norm": 1.3363746570955906, + "learning_rate": 1.7242401526363095e-06, + "loss": 0.2745, + "step": 1244 + }, + { + "epoch": 0.7562642369020501, + "grad_norm": 1.3353177833317331, + "learning_rate": 1.7162398797613284e-06, + "loss": 0.251, + "step": 1245 + }, + { + "epoch": 0.7568716780561883, + "grad_norm": 1.0235724081052306, + "learning_rate": 1.70825436308862e-06, + "loss": 0.2699, + "step": 1246 + }, + { + "epoch": 0.7574791192103265, + "grad_norm": 0.9858229899690142, + "learning_rate": 1.7002836385026234e-06, + "loss": 0.2429, + "step": 1247 + }, + { + "epoch": 0.7580865603644646, + "grad_norm": 1.1427480361449285, + "learning_rate": 1.692327741821312e-06, + "loss": 0.2733, + "step": 1248 + }, + { + "epoch": 0.7586940015186029, + "grad_norm": 0.9835216641075001, + "learning_rate": 1.6843867087960252e-06, + "loss": 0.2671, + "step": 1249 + }, + { + "epoch": 0.7593014426727411, + "grad_norm": 0.9784659480769877, + "learning_rate": 1.676460575111306e-06, + "loss": 0.2515, + "step": 1250 + }, + { + "epoch": 0.7599088838268793, + "grad_norm": 0.9481282295943736, + "learning_rate": 1.6685493763847515e-06, + "loss": 0.259, + "step": 1251 + }, + { + "epoch": 0.7605163249810175, + "grad_norm": 1.0452063539012255, + "learning_rate": 1.6606531481668364e-06, + "loss": 0.2633, + "step": 1252 + }, + { + "epoch": 0.7611237661351556, + "grad_norm": 0.9745231850326872, + "learning_rate": 1.6527719259407743e-06, + "loss": 0.249, + "step": 1253 + }, + { + "epoch": 0.7617312072892939, + "grad_norm": 1.3873562933934764, + "learning_rate": 1.6449057451223354e-06, + "loss": 0.253, + "step": 1254 + }, + { + "epoch": 0.762338648443432, + "grad_norm": 1.1512772469160202, + "learning_rate": 1.6370546410597066e-06, + "loss": 0.2799, + "step": 1255 + }, + { + "epoch": 0.7629460895975703, + "grad_norm": 1.0250344913669225, + "learning_rate": 1.6292186490333172e-06, + "loss": 0.265, + "step": 1256 + }, + { + "epoch": 0.7635535307517084, + "grad_norm": 1.1545097210078017, + "learning_rate": 1.6213978042556938e-06, + "loss": 0.2319, + "step": 1257 + }, + { + "epoch": 0.7641609719058466, + "grad_norm": 0.9063231723472821, + "learning_rate": 1.6135921418712959e-06, + "loss": 0.2512, + "step": 1258 + }, + { + "epoch": 0.7647684130599848, + "grad_norm": 1.061377737003138, + "learning_rate": 1.6058016969563512e-06, + "loss": 0.2598, + "step": 1259 + }, + { + "epoch": 0.765375854214123, + "grad_norm": 0.9337491753620247, + "learning_rate": 1.5980265045187139e-06, + "loss": 0.2707, + "step": 1260 + }, + { + "epoch": 0.7659832953682612, + "grad_norm": 1.2607704019846233, + "learning_rate": 1.5902665994976896e-06, + "loss": 0.269, + "step": 1261 + }, + { + "epoch": 0.7665907365223994, + "grad_norm": 1.086835681982817, + "learning_rate": 1.5825220167638945e-06, + "loss": 0.2215, + "step": 1262 + }, + { + "epoch": 0.7671981776765376, + "grad_norm": 1.0948737291989328, + "learning_rate": 1.5747927911190858e-06, + "loss": 0.2713, + "step": 1263 + }, + { + "epoch": 0.7678056188306758, + "grad_norm": 1.102976831652949, + "learning_rate": 1.567078957296016e-06, + "loss": 0.266, + "step": 1264 + }, + { + "epoch": 0.768413059984814, + "grad_norm": 1.046644859411465, + "learning_rate": 1.5593805499582659e-06, + "loss": 0.2365, + "step": 1265 + }, + { + "epoch": 0.7690205011389522, + "grad_norm": 0.8613582389098838, + "learning_rate": 1.5516976037000941e-06, + "loss": 0.2188, + "step": 1266 + }, + { + "epoch": 0.7696279422930904, + "grad_norm": 1.7227164484553419, + "learning_rate": 1.544030153046291e-06, + "loss": 0.2567, + "step": 1267 + }, + { + "epoch": 0.7702353834472285, + "grad_norm": 2.92880493600265, + "learning_rate": 1.5363782324520033e-06, + "loss": 0.2803, + "step": 1268 + }, + { + "epoch": 0.7708428246013668, + "grad_norm": 1.0122911998142148, + "learning_rate": 1.528741876302598e-06, + "loss": 0.2772, + "step": 1269 + }, + { + "epoch": 0.7714502657555049, + "grad_norm": 1.3980473384689627, + "learning_rate": 1.5211211189134955e-06, + "loss": 0.2478, + "step": 1270 + }, + { + "epoch": 0.7720577069096431, + "grad_norm": 0.8508504199116661, + "learning_rate": 1.5135159945300232e-06, + "loss": 0.2401, + "step": 1271 + }, + { + "epoch": 0.7726651480637813, + "grad_norm": 0.9969962158012547, + "learning_rate": 1.5059265373272574e-06, + "loss": 0.2617, + "step": 1272 + }, + { + "epoch": 0.7732725892179195, + "grad_norm": 0.964226927135635, + "learning_rate": 1.4983527814098736e-06, + "loss": 0.267, + "step": 1273 + }, + { + "epoch": 0.7738800303720577, + "grad_norm": 1.0115140691606623, + "learning_rate": 1.4907947608119866e-06, + "loss": 0.2421, + "step": 1274 + }, + { + "epoch": 0.7744874715261959, + "grad_norm": 1.2895393161155704, + "learning_rate": 1.4832525094970007e-06, + "loss": 0.2452, + "step": 1275 + }, + { + "epoch": 0.7750949126803341, + "grad_norm": 0.8408524834828659, + "learning_rate": 1.475726061357463e-06, + "loss": 0.2166, + "step": 1276 + }, + { + "epoch": 0.7757023538344723, + "grad_norm": 1.333514308934424, + "learning_rate": 1.4682154502149025e-06, + "loss": 0.2415, + "step": 1277 + }, + { + "epoch": 0.7763097949886105, + "grad_norm": 1.036857242677457, + "learning_rate": 1.4607207098196851e-06, + "loss": 0.2569, + "step": 1278 + }, + { + "epoch": 0.7769172361427487, + "grad_norm": 7.103965440866741, + "learning_rate": 1.4532418738508525e-06, + "loss": 0.2648, + "step": 1279 + }, + { + "epoch": 0.7775246772968869, + "grad_norm": 0.8400952986765654, + "learning_rate": 1.4457789759159813e-06, + "loss": 0.2018, + "step": 1280 + }, + { + "epoch": 0.778132118451025, + "grad_norm": 1.4757073564314478, + "learning_rate": 1.4383320495510267e-06, + "loss": 0.2616, + "step": 1281 + }, + { + "epoch": 0.7787395596051633, + "grad_norm": 1.004482929976758, + "learning_rate": 1.430901128220174e-06, + "loss": 0.2529, + "step": 1282 + }, + { + "epoch": 0.7793470007593014, + "grad_norm": 1.0139377829258103, + "learning_rate": 1.4234862453156839e-06, + "loss": 0.2756, + "step": 1283 + }, + { + "epoch": 0.7799544419134397, + "grad_norm": 1.0100199779353403, + "learning_rate": 1.4160874341577447e-06, + "loss": 0.2484, + "step": 1284 + }, + { + "epoch": 0.7805618830675778, + "grad_norm": 1.1168401047776593, + "learning_rate": 1.4087047279943267e-06, + "loss": 0.2687, + "step": 1285 + }, + { + "epoch": 0.781169324221716, + "grad_norm": 0.9503234909845282, + "learning_rate": 1.4013381600010278e-06, + "loss": 0.2563, + "step": 1286 + }, + { + "epoch": 0.7817767653758542, + "grad_norm": 1.0368290840258114, + "learning_rate": 1.3939877632809279e-06, + "loss": 0.2866, + "step": 1287 + }, + { + "epoch": 0.7823842065299924, + "grad_norm": 1.0086813795279805, + "learning_rate": 1.3866535708644335e-06, + "loss": 0.2418, + "step": 1288 + }, + { + "epoch": 0.7829916476841307, + "grad_norm": 1.1754106526081984, + "learning_rate": 1.3793356157091387e-06, + "loss": 0.2582, + "step": 1289 + }, + { + "epoch": 0.7835990888382688, + "grad_norm": 0.9640662064683282, + "learning_rate": 1.3720339306996666e-06, + "loss": 0.2834, + "step": 1290 + }, + { + "epoch": 0.784206529992407, + "grad_norm": 1.741496452010621, + "learning_rate": 1.3647485486475376e-06, + "loss": 0.2374, + "step": 1291 + }, + { + "epoch": 0.7848139711465452, + "grad_norm": 1.0182189209813342, + "learning_rate": 1.3574795022910014e-06, + "loss": 0.2531, + "step": 1292 + }, + { + "epoch": 0.7854214123006834, + "grad_norm": 0.9760934213660039, + "learning_rate": 1.3502268242949025e-06, + "loss": 0.2575, + "step": 1293 + }, + { + "epoch": 0.7860288534548215, + "grad_norm": 1.8383703679855188, + "learning_rate": 1.3429905472505344e-06, + "loss": 0.2383, + "step": 1294 + }, + { + "epoch": 0.7866362946089598, + "grad_norm": 0.9502317083781607, + "learning_rate": 1.3357707036754875e-06, + "loss": 0.2585, + "step": 1295 + }, + { + "epoch": 0.7872437357630979, + "grad_norm": 0.9297333282490423, + "learning_rate": 1.3285673260135073e-06, + "loss": 0.2452, + "step": 1296 + }, + { + "epoch": 0.7878511769172362, + "grad_norm": 0.9116623980444865, + "learning_rate": 1.321380446634342e-06, + "loss": 0.2514, + "step": 1297 + }, + { + "epoch": 0.7884586180713743, + "grad_norm": 1.0165136386704785, + "learning_rate": 1.314210097833607e-06, + "loss": 0.2698, + "step": 1298 + }, + { + "epoch": 0.7890660592255125, + "grad_norm": 1.0097975118483586, + "learning_rate": 1.3070563118326295e-06, + "loss": 0.2623, + "step": 1299 + }, + { + "epoch": 0.7896735003796507, + "grad_norm": 0.9511823883591485, + "learning_rate": 1.2999191207783129e-06, + "loss": 0.2227, + "step": 1300 + }, + { + "epoch": 0.7902809415337889, + "grad_norm": 1.083790995951702, + "learning_rate": 1.2927985567429868e-06, + "loss": 0.2386, + "step": 1301 + }, + { + "epoch": 0.7908883826879272, + "grad_norm": 0.9289196009158714, + "learning_rate": 1.2856946517242608e-06, + "loss": 0.2299, + "step": 1302 + }, + { + "epoch": 0.7914958238420653, + "grad_norm": 0.9764047061340886, + "learning_rate": 1.27860743764489e-06, + "loss": 0.2536, + "step": 1303 + }, + { + "epoch": 0.7921032649962035, + "grad_norm": 1.066452400553731, + "learning_rate": 1.2715369463526173e-06, + "loss": 0.2485, + "step": 1304 + }, + { + "epoch": 0.7927107061503417, + "grad_norm": 0.8830887915707148, + "learning_rate": 1.2644832096200498e-06, + "loss": 0.241, + "step": 1305 + }, + { + "epoch": 0.7933181473044799, + "grad_norm": 0.8852127517971522, + "learning_rate": 1.257446259144494e-06, + "loss": 0.2236, + "step": 1306 + }, + { + "epoch": 0.793925588458618, + "grad_norm": 0.9428821852573767, + "learning_rate": 1.2504261265478324e-06, + "loss": 0.2552, + "step": 1307 + }, + { + "epoch": 0.7945330296127563, + "grad_norm": 1.0320142315688623, + "learning_rate": 1.2434228433763657e-06, + "loss": 0.2469, + "step": 1308 + }, + { + "epoch": 0.7951404707668944, + "grad_norm": 1.0170174001133827, + "learning_rate": 1.2364364411006841e-06, + "loss": 0.2437, + "step": 1309 + }, + { + "epoch": 0.7957479119210327, + "grad_norm": 1.0560205729362242, + "learning_rate": 1.2294669511155193e-06, + "loss": 0.2327, + "step": 1310 + }, + { + "epoch": 0.7963553530751708, + "grad_norm": 1.239053820039574, + "learning_rate": 1.2225144047396015e-06, + "loss": 0.2627, + "step": 1311 + }, + { + "epoch": 0.796962794229309, + "grad_norm": 1.0988583237532765, + "learning_rate": 1.215578833215526e-06, + "loss": 0.262, + "step": 1312 + }, + { + "epoch": 0.7975702353834472, + "grad_norm": 0.867925390100386, + "learning_rate": 1.2086602677096033e-06, + "loss": 0.2416, + "step": 1313 + }, + { + "epoch": 0.7981776765375854, + "grad_norm": 1.2397806394064825, + "learning_rate": 1.201758739311728e-06, + "loss": 0.2478, + "step": 1314 + }, + { + "epoch": 0.7987851176917237, + "grad_norm": 1.7686159599994773, + "learning_rate": 1.1948742790352342e-06, + "loss": 0.2663, + "step": 1315 + }, + { + "epoch": 0.7993925588458618, + "grad_norm": 1.23788413067218, + "learning_rate": 1.1880069178167586e-06, + "loss": 0.2271, + "step": 1316 + }, + { + "epoch": 0.8, + "grad_norm": 0.899946412274731, + "learning_rate": 1.1811566865160961e-06, + "loss": 0.222, + "step": 1317 + }, + { + "epoch": 0.8006074411541382, + "grad_norm": 0.87270457464745, + "learning_rate": 1.1743236159160654e-06, + "loss": 0.2592, + "step": 1318 + }, + { + "epoch": 0.8012148823082764, + "grad_norm": 1.3219310442302556, + "learning_rate": 1.167507736722377e-06, + "loss": 0.266, + "step": 1319 + }, + { + "epoch": 0.8018223234624146, + "grad_norm": 0.9267745860014674, + "learning_rate": 1.1607090795634802e-06, + "loss": 0.249, + "step": 1320 + }, + { + "epoch": 0.8024297646165528, + "grad_norm": 1.145281359593592, + "learning_rate": 1.15392767499044e-06, + "loss": 0.273, + "step": 1321 + }, + { + "epoch": 0.8030372057706909, + "grad_norm": 0.9943238830649733, + "learning_rate": 1.1471635534767877e-06, + "loss": 0.2713, + "step": 1322 + }, + { + "epoch": 0.8036446469248292, + "grad_norm": 0.8925102247035651, + "learning_rate": 1.1404167454183957e-06, + "loss": 0.2509, + "step": 1323 + }, + { + "epoch": 0.8042520880789673, + "grad_norm": 2.8209341271672557, + "learning_rate": 1.133687281133331e-06, + "loss": 0.2414, + "step": 1324 + }, + { + "epoch": 0.8048595292331056, + "grad_norm": 0.8913737685748099, + "learning_rate": 1.1269751908617277e-06, + "loss": 0.2382, + "step": 1325 + }, + { + "epoch": 0.8054669703872437, + "grad_norm": 0.8955833156392411, + "learning_rate": 1.1202805047656406e-06, + "loss": 0.2336, + "step": 1326 + }, + { + "epoch": 0.8060744115413819, + "grad_norm": 1.1427272635049914, + "learning_rate": 1.113603252928917e-06, + "loss": 0.2576, + "step": 1327 + }, + { + "epoch": 0.8066818526955202, + "grad_norm": 0.9867069146988969, + "learning_rate": 1.1069434653570633e-06, + "loss": 0.2703, + "step": 1328 + }, + { + "epoch": 0.8072892938496583, + "grad_norm": 1.0341115309067341, + "learning_rate": 1.1003011719771046e-06, + "loss": 0.251, + "step": 1329 + }, + { + "epoch": 0.8078967350037966, + "grad_norm": 0.941968386464464, + "learning_rate": 1.0936764026374547e-06, + "loss": 0.2523, + "step": 1330 + }, + { + "epoch": 0.8085041761579347, + "grad_norm": 0.9109334482631996, + "learning_rate": 1.0870691871077738e-06, + "loss": 0.2573, + "step": 1331 + }, + { + "epoch": 0.8091116173120729, + "grad_norm": 1.1679903287794757, + "learning_rate": 1.0804795550788473e-06, + "loss": 0.2727, + "step": 1332 + }, + { + "epoch": 0.8097190584662111, + "grad_norm": 1.0262961821675425, + "learning_rate": 1.073907536162443e-06, + "loss": 0.2499, + "step": 1333 + }, + { + "epoch": 0.8103264996203493, + "grad_norm": 0.8638969582311489, + "learning_rate": 1.0673531598911824e-06, + "loss": 0.2077, + "step": 1334 + }, + { + "epoch": 0.8109339407744874, + "grad_norm": 1.065445241867707, + "learning_rate": 1.0608164557184042e-06, + "loss": 0.2733, + "step": 1335 + }, + { + "epoch": 0.8115413819286257, + "grad_norm": 1.615302331483808, + "learning_rate": 1.0542974530180327e-06, + "loss": 0.2712, + "step": 1336 + }, + { + "epoch": 0.8121488230827638, + "grad_norm": 0.9558232094508515, + "learning_rate": 1.0477961810844517e-06, + "loss": 0.281, + "step": 1337 + }, + { + "epoch": 0.8127562642369021, + "grad_norm": 1.0153735128244517, + "learning_rate": 1.0413126691323667e-06, + "loss": 0.2521, + "step": 1338 + }, + { + "epoch": 0.8133637053910402, + "grad_norm": 0.9566927051256368, + "learning_rate": 1.0348469462966753e-06, + "loss": 0.2869, + "step": 1339 + }, + { + "epoch": 0.8139711465451784, + "grad_norm": 1.2528160410336169, + "learning_rate": 1.0283990416323336e-06, + "loss": 0.2747, + "step": 1340 + }, + { + "epoch": 0.8145785876993167, + "grad_norm": 0.8811463059194917, + "learning_rate": 1.0219689841142343e-06, + "loss": 0.2071, + "step": 1341 + }, + { + "epoch": 0.8151860288534548, + "grad_norm": 1.9173571112957752, + "learning_rate": 1.0155568026370637e-06, + "loss": 0.2345, + "step": 1342 + }, + { + "epoch": 0.8157934700075931, + "grad_norm": 1.0402835209362526, + "learning_rate": 1.0091625260151827e-06, + "loss": 0.2435, + "step": 1343 + }, + { + "epoch": 0.8164009111617312, + "grad_norm": 1.6822710586636964, + "learning_rate": 1.0027861829824953e-06, + "loss": 0.287, + "step": 1344 + }, + { + "epoch": 0.8170083523158694, + "grad_norm": 1.397843369835043, + "learning_rate": 9.964278021923107e-07, + "loss": 0.2605, + "step": 1345 + }, + { + "epoch": 0.8176157934700076, + "grad_norm": 1.123716581875761, + "learning_rate": 9.900874122172294e-07, + "loss": 0.2647, + "step": 1346 + }, + { + "epoch": 0.8182232346241458, + "grad_norm": 1.0901754083400064, + "learning_rate": 9.83765041548998e-07, + "loss": 0.2707, + "step": 1347 + }, + { + "epoch": 0.818830675778284, + "grad_norm": 0.9517248980182025, + "learning_rate": 9.774607185984004e-07, + "loss": 0.2515, + "step": 1348 + }, + { + "epoch": 0.8194381169324222, + "grad_norm": 0.93504230650595, + "learning_rate": 9.711744716951093e-07, + "loss": 0.241, + "step": 1349 + }, + { + "epoch": 0.8200455580865603, + "grad_norm": 1.0424392516819492, + "learning_rate": 9.649063290875771e-07, + "loss": 0.2197, + "step": 1350 + }, + { + "epoch": 0.8206529992406986, + "grad_norm": 2.171189768685288, + "learning_rate": 9.586563189428954e-07, + "loss": 0.2367, + "step": 1351 + }, + { + "epoch": 0.8212604403948367, + "grad_norm": 0.9333141948132236, + "learning_rate": 9.524244693466773e-07, + "loss": 0.2391, + "step": 1352 + }, + { + "epoch": 0.8218678815489749, + "grad_norm": 0.986280542191797, + "learning_rate": 9.462108083029287e-07, + "loss": 0.247, + "step": 1353 + }, + { + "epoch": 0.8224753227031132, + "grad_norm": 1.1077079150850047, + "learning_rate": 9.400153637339182e-07, + "loss": 0.2908, + "step": 1354 + }, + { + "epoch": 0.8230827638572513, + "grad_norm": 1.3469195622589663, + "learning_rate": 9.338381634800597e-07, + "loss": 0.2264, + "step": 1355 + }, + { + "epoch": 0.8236902050113896, + "grad_norm": 2.3223661832113476, + "learning_rate": 9.276792352997782e-07, + "loss": 0.2334, + "step": 1356 + }, + { + "epoch": 0.8242976461655277, + "grad_norm": 0.965162694370609, + "learning_rate": 9.215386068693927e-07, + "loss": 0.2554, + "step": 1357 + }, + { + "epoch": 0.8249050873196659, + "grad_norm": 1.1093572084109473, + "learning_rate": 9.154163057829879e-07, + "loss": 0.2328, + "step": 1358 + }, + { + "epoch": 0.8255125284738041, + "grad_norm": 0.9658006597774278, + "learning_rate": 9.093123595522929e-07, + "loss": 0.2641, + "step": 1359 + }, + { + "epoch": 0.8261199696279423, + "grad_norm": 1.1141024465330946, + "learning_rate": 9.032267956065516e-07, + "loss": 0.2168, + "step": 1360 + }, + { + "epoch": 0.8267274107820805, + "grad_norm": 1.085834176055846, + "learning_rate": 8.971596412924067e-07, + "loss": 0.2665, + "step": 1361 + }, + { + "epoch": 0.8273348519362187, + "grad_norm": 1.0277054005618411, + "learning_rate": 8.911109238737748e-07, + "loss": 0.2654, + "step": 1362 + }, + { + "epoch": 0.8279422930903568, + "grad_norm": 1.1286512062535323, + "learning_rate": 8.850806705317183e-07, + "loss": 0.2572, + "step": 1363 + }, + { + "epoch": 0.8285497342444951, + "grad_norm": 0.9881387168493946, + "learning_rate": 8.790689083643328e-07, + "loss": 0.2762, + "step": 1364 + }, + { + "epoch": 0.8291571753986332, + "grad_norm": 1.7913780275956543, + "learning_rate": 8.730756643866157e-07, + "loss": 0.2728, + "step": 1365 + }, + { + "epoch": 0.8297646165527715, + "grad_norm": 1.0438179673664785, + "learning_rate": 8.671009655303531e-07, + "loss": 0.2876, + "step": 1366 + }, + { + "epoch": 0.8303720577069097, + "grad_norm": 1.1157108355581231, + "learning_rate": 8.611448386439936e-07, + "loss": 0.2582, + "step": 1367 + }, + { + "epoch": 0.8309794988610478, + "grad_norm": 1.5568021179946305, + "learning_rate": 8.552073104925296e-07, + "loss": 0.2428, + "step": 1368 + }, + { + "epoch": 0.8315869400151861, + "grad_norm": 1.0699739124750929, + "learning_rate": 8.492884077573749e-07, + "loss": 0.2568, + "step": 1369 + }, + { + "epoch": 0.8321943811693242, + "grad_norm": 0.7926743610930811, + "learning_rate": 8.433881570362484e-07, + "loss": 0.2176, + "step": 1370 + }, + { + "epoch": 0.8328018223234624, + "grad_norm": 1.225524035303792, + "learning_rate": 8.375065848430508e-07, + "loss": 0.274, + "step": 1371 + }, + { + "epoch": 0.8334092634776006, + "grad_norm": 0.9030010611168635, + "learning_rate": 8.316437176077491e-07, + "loss": 0.2649, + "step": 1372 + }, + { + "epoch": 0.8340167046317388, + "grad_norm": 0.8628776555924657, + "learning_rate": 8.257995816762559e-07, + "loss": 0.238, + "step": 1373 + }, + { + "epoch": 0.834624145785877, + "grad_norm": 0.9519929470187486, + "learning_rate": 8.199742033103091e-07, + "loss": 0.22, + "step": 1374 + }, + { + "epoch": 0.8352315869400152, + "grad_norm": 0.9316429752072123, + "learning_rate": 8.141676086873574e-07, + "loss": 0.2523, + "step": 1375 + }, + { + "epoch": 0.8358390280941533, + "grad_norm": 1.5230699643150925, + "learning_rate": 8.083798239004408e-07, + "loss": 0.2601, + "step": 1376 + }, + { + "epoch": 0.8364464692482916, + "grad_norm": 1.0766184599801747, + "learning_rate": 8.026108749580758e-07, + "loss": 0.2538, + "step": 1377 + }, + { + "epoch": 0.8370539104024297, + "grad_norm": 2.320789746346856, + "learning_rate": 7.968607877841333e-07, + "loss": 0.2844, + "step": 1378 + }, + { + "epoch": 0.837661351556568, + "grad_norm": 0.8792983215848907, + "learning_rate": 7.911295882177256e-07, + "loss": 0.236, + "step": 1379 + }, + { + "epoch": 0.8382687927107062, + "grad_norm": 1.6787572932160961, + "learning_rate": 7.854173020130906e-07, + "loss": 0.2403, + "step": 1380 + }, + { + "epoch": 0.8388762338648443, + "grad_norm": 0.9829023277192255, + "learning_rate": 7.79723954839477e-07, + "loss": 0.2287, + "step": 1381 + }, + { + "epoch": 0.8394836750189826, + "grad_norm": 1.7023786251385593, + "learning_rate": 7.740495722810271e-07, + "loss": 0.2435, + "step": 1382 + }, + { + "epoch": 0.8400911161731207, + "grad_norm": 0.9441864654332802, + "learning_rate": 7.683941798366578e-07, + "loss": 0.2906, + "step": 1383 + }, + { + "epoch": 0.840698557327259, + "grad_norm": 1.2877471436954095, + "learning_rate": 7.627578029199562e-07, + "loss": 0.2498, + "step": 1384 + }, + { + "epoch": 0.8413059984813971, + "grad_norm": 0.9842560572472016, + "learning_rate": 7.571404668590532e-07, + "loss": 0.2742, + "step": 1385 + }, + { + "epoch": 0.8419134396355353, + "grad_norm": 1.130499069526576, + "learning_rate": 7.515421968965242e-07, + "loss": 0.2285, + "step": 1386 + }, + { + "epoch": 0.8425208807896735, + "grad_norm": 0.9696805118746494, + "learning_rate": 7.459630181892608e-07, + "loss": 0.262, + "step": 1387 + }, + { + "epoch": 0.8431283219438117, + "grad_norm": 1.2144471408339115, + "learning_rate": 7.404029558083653e-07, + "loss": 0.2675, + "step": 1388 + }, + { + "epoch": 0.8437357630979498, + "grad_norm": 1.0391512842651585, + "learning_rate": 7.348620347390384e-07, + "loss": 0.2855, + "step": 1389 + }, + { + "epoch": 0.8443432042520881, + "grad_norm": 0.9328634429354515, + "learning_rate": 7.293402798804667e-07, + "loss": 0.2345, + "step": 1390 + }, + { + "epoch": 0.8449506454062262, + "grad_norm": 0.9723351719934986, + "learning_rate": 7.238377160457094e-07, + "loss": 0.2645, + "step": 1391 + }, + { + "epoch": 0.8455580865603645, + "grad_norm": 1.4147751762924923, + "learning_rate": 7.183543679615834e-07, + "loss": 0.2626, + "step": 1392 + }, + { + "epoch": 0.8461655277145027, + "grad_norm": 1.177367440845403, + "learning_rate": 7.128902602685617e-07, + "loss": 0.2709, + "step": 1393 + }, + { + "epoch": 0.8467729688686408, + "grad_norm": 1.0052526002592344, + "learning_rate": 7.074454175206524e-07, + "loss": 0.2464, + "step": 1394 + }, + { + "epoch": 0.8473804100227791, + "grad_norm": 0.977663222241663, + "learning_rate": 7.020198641852949e-07, + "loss": 0.2447, + "step": 1395 + }, + { + "epoch": 0.8479878511769172, + "grad_norm": 2.675337956474618, + "learning_rate": 6.966136246432492e-07, + "loss": 0.2647, + "step": 1396 + }, + { + "epoch": 0.8485952923310555, + "grad_norm": 0.9873856637443686, + "learning_rate": 6.912267231884817e-07, + "loss": 0.266, + "step": 1397 + }, + { + "epoch": 0.8492027334851936, + "grad_norm": 1.003763602141105, + "learning_rate": 6.858591840280627e-07, + "loss": 0.2891, + "step": 1398 + }, + { + "epoch": 0.8498101746393318, + "grad_norm": 0.917577761815308, + "learning_rate": 6.805110312820501e-07, + "loss": 0.2545, + "step": 1399 + }, + { + "epoch": 0.85041761579347, + "grad_norm": 0.8727291361789857, + "learning_rate": 6.751822889833926e-07, + "loss": 0.2522, + "step": 1400 + }, + { + "epoch": 0.8510250569476082, + "grad_norm": 0.9567515585475891, + "learning_rate": 6.698729810778065e-07, + "loss": 0.2411, + "step": 1401 + }, + { + "epoch": 0.8516324981017463, + "grad_norm": 0.8896081239860772, + "learning_rate": 6.645831314236817e-07, + "loss": 0.2484, + "step": 1402 + }, + { + "epoch": 0.8522399392558846, + "grad_norm": 1.1157181737389836, + "learning_rate": 6.593127637919633e-07, + "loss": 0.2852, + "step": 1403 + }, + { + "epoch": 0.8528473804100227, + "grad_norm": 0.9750689112401771, + "learning_rate": 6.540619018660555e-07, + "loss": 0.2512, + "step": 1404 + }, + { + "epoch": 0.853454821564161, + "grad_norm": 0.8887565653775829, + "learning_rate": 6.488305692417074e-07, + "loss": 0.2538, + "step": 1405 + }, + { + "epoch": 0.8540622627182992, + "grad_norm": 0.9449518854689749, + "learning_rate": 6.436187894269086e-07, + "loss": 0.2412, + "step": 1406 + }, + { + "epoch": 0.8546697038724373, + "grad_norm": 1.1922033805891283, + "learning_rate": 6.384265858417877e-07, + "loss": 0.2618, + "step": 1407 + }, + { + "epoch": 0.8552771450265756, + "grad_norm": 0.9056621667680124, + "learning_rate": 6.332539818184985e-07, + "loss": 0.2363, + "step": 1408 + }, + { + "epoch": 0.8558845861807137, + "grad_norm": 1.0040516816367477, + "learning_rate": 6.281010006011256e-07, + "loss": 0.2511, + "step": 1409 + }, + { + "epoch": 0.856492027334852, + "grad_norm": 0.9636326863778567, + "learning_rate": 6.229676653455719e-07, + "loss": 0.2861, + "step": 1410 + }, + { + "epoch": 0.8570994684889901, + "grad_norm": 1.267651631290491, + "learning_rate": 6.178539991194599e-07, + "loss": 0.2562, + "step": 1411 + }, + { + "epoch": 0.8577069096431283, + "grad_norm": 1.125217028031925, + "learning_rate": 6.127600249020216e-07, + "loss": 0.2394, + "step": 1412 + }, + { + "epoch": 0.8583143507972665, + "grad_norm": 1.16266685601348, + "learning_rate": 6.076857655840024e-07, + "loss": 0.2844, + "step": 1413 + }, + { + "epoch": 0.8589217919514047, + "grad_norm": 0.9618647822548747, + "learning_rate": 6.026312439675553e-07, + "loss": 0.2221, + "step": 1414 + }, + { + "epoch": 0.8595292331055429, + "grad_norm": 1.0967811765567483, + "learning_rate": 5.975964827661346e-07, + "loss": 0.2839, + "step": 1415 + }, + { + "epoch": 0.8601366742596811, + "grad_norm": 1.2542292575403695, + "learning_rate": 5.925815046044026e-07, + "loss": 0.283, + "step": 1416 + }, + { + "epoch": 0.8607441154138192, + "grad_norm": 0.9685662585305622, + "learning_rate": 5.875863320181175e-07, + "loss": 0.2386, + "step": 1417 + }, + { + "epoch": 0.8613515565679575, + "grad_norm": 1.2191630870814079, + "learning_rate": 5.826109874540409e-07, + "loss": 0.2672, + "step": 1418 + }, + { + "epoch": 0.8619589977220957, + "grad_norm": 0.9467425337398014, + "learning_rate": 5.776554932698325e-07, + "loss": 0.2645, + "step": 1419 + }, + { + "epoch": 0.8625664388762339, + "grad_norm": 3.223483400807002, + "learning_rate": 5.727198717339511e-07, + "loss": 0.2326, + "step": 1420 + }, + { + "epoch": 0.8631738800303721, + "grad_norm": 0.9512745639027146, + "learning_rate": 5.678041450255512e-07, + "loss": 0.2629, + "step": 1421 + }, + { + "epoch": 0.8637813211845102, + "grad_norm": 0.9320452148866075, + "learning_rate": 5.6290833523439e-07, + "loss": 0.2641, + "step": 1422 + }, + { + "epoch": 0.8643887623386485, + "grad_norm": 1.0278008843267301, + "learning_rate": 5.58032464360721e-07, + "loss": 0.2803, + "step": 1423 + }, + { + "epoch": 0.8649962034927866, + "grad_norm": 0.908323450955481, + "learning_rate": 5.531765543152002e-07, + "loss": 0.2356, + "step": 1424 + }, + { + "epoch": 0.8656036446469249, + "grad_norm": 0.8303574957373083, + "learning_rate": 5.483406269187869e-07, + "loss": 0.2189, + "step": 1425 + }, + { + "epoch": 0.866211085801063, + "grad_norm": 1.1970452420325983, + "learning_rate": 5.435247039026398e-07, + "loss": 0.2094, + "step": 1426 + }, + { + "epoch": 0.8668185269552012, + "grad_norm": 1.2257111130524938, + "learning_rate": 5.387288069080298e-07, + "loss": 0.231, + "step": 1427 + }, + { + "epoch": 0.8674259681093394, + "grad_norm": 0.9627292722754438, + "learning_rate": 5.33952957486234e-07, + "loss": 0.2333, + "step": 1428 + }, + { + "epoch": 0.8680334092634776, + "grad_norm": 1.032967615425608, + "learning_rate": 5.291971770984428e-07, + "loss": 0.2958, + "step": 1429 + }, + { + "epoch": 0.8686408504176157, + "grad_norm": 1.139677124417918, + "learning_rate": 5.244614871156612e-07, + "loss": 0.2405, + "step": 1430 + }, + { + "epoch": 0.869248291571754, + "grad_norm": 1.2580527412823377, + "learning_rate": 5.197459088186163e-07, + "loss": 0.221, + "step": 1431 + }, + { + "epoch": 0.8698557327258922, + "grad_norm": 1.1944284727855752, + "learning_rate": 5.150504633976572e-07, + "loss": 0.2859, + "step": 1432 + }, + { + "epoch": 0.8704631738800304, + "grad_norm": 1.0957916763809294, + "learning_rate": 5.103751719526639e-07, + "loss": 0.2239, + "step": 1433 + }, + { + "epoch": 0.8710706150341686, + "grad_norm": 1.0470854505908578, + "learning_rate": 5.057200554929509e-07, + "loss": 0.2574, + "step": 1434 + }, + { + "epoch": 0.8716780561883067, + "grad_norm": 1.1296719722218975, + "learning_rate": 5.010851349371704e-07, + "loss": 0.2639, + "step": 1435 + }, + { + "epoch": 0.872285497342445, + "grad_norm": 2.711592251059139, + "learning_rate": 4.964704311132224e-07, + "loss": 0.2488, + "step": 1436 + }, + { + "epoch": 0.8728929384965831, + "grad_norm": 0.953048159062841, + "learning_rate": 4.918759647581578e-07, + "loss": 0.2581, + "step": 1437 + }, + { + "epoch": 0.8735003796507214, + "grad_norm": 1.0707876735381872, + "learning_rate": 4.873017565180871e-07, + "loss": 0.2578, + "step": 1438 + }, + { + "epoch": 0.8741078208048595, + "grad_norm": 0.9374479476013973, + "learning_rate": 4.827478269480895e-07, + "loss": 0.2405, + "step": 1439 + }, + { + "epoch": 0.8747152619589977, + "grad_norm": 1.0277390378554292, + "learning_rate": 4.782141965121129e-07, + "loss": 0.2701, + "step": 1440 + }, + { + "epoch": 0.8753227031131359, + "grad_norm": 1.4882213121058918, + "learning_rate": 4.7370088558289175e-07, + "loss": 0.2886, + "step": 1441 + }, + { + "epoch": 0.8759301442672741, + "grad_norm": 1.3548392131624356, + "learning_rate": 4.6920791444184934e-07, + "loss": 0.2471, + "step": 1442 + }, + { + "epoch": 0.8765375854214122, + "grad_norm": 1.0466228565297642, + "learning_rate": 4.647353032790086e-07, + "loss": 0.2414, + "step": 1443 + }, + { + "epoch": 0.8771450265755505, + "grad_norm": 0.8743868205121337, + "learning_rate": 4.602830721928997e-07, + "loss": 0.2079, + "step": 1444 + }, + { + "epoch": 0.8777524677296887, + "grad_norm": 1.0599925032639006, + "learning_rate": 4.558512411904731e-07, + "loss": 0.2949, + "step": 1445 + }, + { + "epoch": 0.8783599088838269, + "grad_norm": 1.4979260055048251, + "learning_rate": 4.5143983018700485e-07, + "loss": 0.249, + "step": 1446 + }, + { + "epoch": 0.8789673500379651, + "grad_norm": 0.9462464867555567, + "learning_rate": 4.4704885900601236e-07, + "loss": 0.2422, + "step": 1447 + }, + { + "epoch": 0.8795747911921032, + "grad_norm": 1.0122245637859872, + "learning_rate": 4.4267834737916295e-07, + "loss": 0.2516, + "step": 1448 + }, + { + "epoch": 0.8801822323462415, + "grad_norm": 1.2775093153721113, + "learning_rate": 4.3832831494618255e-07, + "loss": 0.2585, + "step": 1449 + }, + { + "epoch": 0.8807896735003796, + "grad_norm": 0.9907820157094275, + "learning_rate": 4.33998781254773e-07, + "loss": 0.2508, + "step": 1450 + }, + { + "epoch": 0.8813971146545179, + "grad_norm": 0.9761789386358818, + "learning_rate": 4.2968976576051703e-07, + "loss": 0.2848, + "step": 1451 + }, + { + "epoch": 0.882004555808656, + "grad_norm": 0.8788756221973065, + "learning_rate": 4.2540128782679934e-07, + "loss": 0.2185, + "step": 1452 + }, + { + "epoch": 0.8826119969627942, + "grad_norm": 0.9350228380136899, + "learning_rate": 4.211333667247125e-07, + "loss": 0.2464, + "step": 1453 + }, + { + "epoch": 0.8832194381169324, + "grad_norm": 0.9813301382441217, + "learning_rate": 4.1688602163297564e-07, + "loss": 0.2666, + "step": 1454 + }, + { + "epoch": 0.8838268792710706, + "grad_norm": 0.9810743433744146, + "learning_rate": 4.126592716378408e-07, + "loss": 0.2296, + "step": 1455 + }, + { + "epoch": 0.8844343204252088, + "grad_norm": 1.2038878027224096, + "learning_rate": 4.0845313573301736e-07, + "loss": 0.2682, + "step": 1456 + }, + { + "epoch": 0.885041761579347, + "grad_norm": 0.9884971893945054, + "learning_rate": 4.042676328195788e-07, + "loss": 0.2643, + "step": 1457 + }, + { + "epoch": 0.8856492027334852, + "grad_norm": 5.323233068234899, + "learning_rate": 4.001027817058789e-07, + "loss": 0.238, + "step": 1458 + }, + { + "epoch": 0.8862566438876234, + "grad_norm": 0.9225650218982654, + "learning_rate": 3.959586011074729e-07, + "loss": 0.2155, + "step": 1459 + }, + { + "epoch": 0.8868640850417616, + "grad_norm": 0.9528679131681773, + "learning_rate": 3.9183510964702463e-07, + "loss": 0.2418, + "step": 1460 + }, + { + "epoch": 0.8874715261958998, + "grad_norm": 1.1303434388021751, + "learning_rate": 3.8773232585422924e-07, + "loss": 0.2297, + "step": 1461 + }, + { + "epoch": 0.888078967350038, + "grad_norm": 0.9900217922322905, + "learning_rate": 3.836502681657289e-07, + "loss": 0.2462, + "step": 1462 + }, + { + "epoch": 0.8886864085041761, + "grad_norm": 0.887907349960081, + "learning_rate": 3.795889549250292e-07, + "loss": 0.2171, + "step": 1463 + }, + { + "epoch": 0.8892938496583144, + "grad_norm": 0.9248553866957503, + "learning_rate": 3.755484043824131e-07, + "loss": 0.2243, + "step": 1464 + }, + { + "epoch": 0.8899012908124525, + "grad_norm": 0.8912025953132797, + "learning_rate": 3.715286346948671e-07, + "loss": 0.2149, + "step": 1465 + }, + { + "epoch": 0.8905087319665907, + "grad_norm": 0.8751189326641786, + "learning_rate": 3.675296639259912e-07, + "loss": 0.228, + "step": 1466 + }, + { + "epoch": 0.8911161731207289, + "grad_norm": 1.0691558945787711, + "learning_rate": 3.6355151004592414e-07, + "loss": 0.2233, + "step": 1467 + }, + { + "epoch": 0.8917236142748671, + "grad_norm": 1.3028028349470695, + "learning_rate": 3.595941909312595e-07, + "loss": 0.2603, + "step": 1468 + }, + { + "epoch": 0.8923310554290053, + "grad_norm": 0.8896967704695612, + "learning_rate": 3.5565772436496336e-07, + "loss": 0.2269, + "step": 1469 + }, + { + "epoch": 0.8929384965831435, + "grad_norm": 0.907343561878061, + "learning_rate": 3.517421280363004e-07, + "loss": 0.2477, + "step": 1470 + }, + { + "epoch": 0.8935459377372817, + "grad_norm": 0.9527250045159997, + "learning_rate": 3.4784741954074884e-07, + "loss": 0.2645, + "step": 1471 + }, + { + "epoch": 0.8941533788914199, + "grad_norm": 0.9958249751109337, + "learning_rate": 3.439736163799251e-07, + "loss": 0.2331, + "step": 1472 + }, + { + "epoch": 0.8947608200455581, + "grad_norm": 1.2873604817141377, + "learning_rate": 3.4012073596150106e-07, + "loss": 0.235, + "step": 1473 + }, + { + "epoch": 0.8953682611996963, + "grad_norm": 0.8961669297469559, + "learning_rate": 3.362887955991301e-07, + "loss": 0.2408, + "step": 1474 + }, + { + "epoch": 0.8959757023538345, + "grad_norm": 1.051005440352167, + "learning_rate": 3.3247781251236623e-07, + "loss": 0.256, + "step": 1475 + }, + { + "epoch": 0.8965831435079726, + "grad_norm": 1.396804063195131, + "learning_rate": 3.2868780382658895e-07, + "loss": 0.2259, + "step": 1476 + }, + { + "epoch": 0.8971905846621109, + "grad_norm": 0.8551324108863454, + "learning_rate": 3.2491878657292643e-07, + "loss": 0.2552, + "step": 1477 + }, + { + "epoch": 0.897798025816249, + "grad_norm": 0.9274670411652475, + "learning_rate": 3.2117077768817395e-07, + "loss": 0.2271, + "step": 1478 + }, + { + "epoch": 0.8984054669703873, + "grad_norm": 1.2221587711171944, + "learning_rate": 3.174437940147268e-07, + "loss": 0.2447, + "step": 1479 + }, + { + "epoch": 0.8990129081245254, + "grad_norm": 6.662057062986324, + "learning_rate": 3.1373785230049356e-07, + "loss": 0.2718, + "step": 1480 + }, + { + "epoch": 0.8996203492786636, + "grad_norm": 2.162458231338751, + "learning_rate": 3.1005296919883354e-07, + "loss": 0.2563, + "step": 1481 + }, + { + "epoch": 0.9002277904328019, + "grad_norm": 1.1484595739033663, + "learning_rate": 3.0638916126846885e-07, + "loss": 0.2488, + "step": 1482 + }, + { + "epoch": 0.90083523158694, + "grad_norm": 0.9027654629735503, + "learning_rate": 3.0274644497342133e-07, + "loss": 0.2304, + "step": 1483 + }, + { + "epoch": 0.9014426727410783, + "grad_norm": 0.950573848140933, + "learning_rate": 2.991248366829291e-07, + "loss": 0.2141, + "step": 1484 + }, + { + "epoch": 0.9020501138952164, + "grad_norm": 0.8792954327784936, + "learning_rate": 2.955243526713808e-07, + "loss": 0.2382, + "step": 1485 + }, + { + "epoch": 0.9026575550493546, + "grad_norm": 1.1214719838180265, + "learning_rate": 2.91945009118238e-07, + "loss": 0.2527, + "step": 1486 + }, + { + "epoch": 0.9032649962034928, + "grad_norm": 1.1337122451080186, + "learning_rate": 2.883868221079628e-07, + "loss": 0.3125, + "step": 1487 + }, + { + "epoch": 0.903872437357631, + "grad_norm": 1.149501989804444, + "learning_rate": 2.848498076299483e-07, + "loss": 0.2788, + "step": 1488 + }, + { + "epoch": 0.9044798785117691, + "grad_norm": 0.9593512296584564, + "learning_rate": 2.813339815784416e-07, + "loss": 0.2439, + "step": 1489 + }, + { + "epoch": 0.9050873196659074, + "grad_norm": 1.0889375186520727, + "learning_rate": 2.7783935975247867e-07, + "loss": 0.2679, + "step": 1490 + }, + { + "epoch": 0.9056947608200455, + "grad_norm": 1.0266400030570082, + "learning_rate": 2.743659578558089e-07, + "loss": 0.2375, + "step": 1491 + }, + { + "epoch": 0.9063022019741838, + "grad_norm": 0.9071334642534141, + "learning_rate": 2.7091379149682683e-07, + "loss": 0.2293, + "step": 1492 + }, + { + "epoch": 0.9069096431283219, + "grad_norm": 1.2936790169599448, + "learning_rate": 2.6748287618849957e-07, + "loss": 0.2409, + "step": 1493 + }, + { + "epoch": 0.9075170842824601, + "grad_norm": 1.0584178177194592, + "learning_rate": 2.6407322734829763e-07, + "loss": 0.236, + "step": 1494 + }, + { + "epoch": 0.9081245254365984, + "grad_norm": 1.3313437851181316, + "learning_rate": 2.6068486029813154e-07, + "loss": 0.2356, + "step": 1495 + }, + { + "epoch": 0.9087319665907365, + "grad_norm": 0.9005677622751922, + "learning_rate": 2.573177902642726e-07, + "loss": 0.2245, + "step": 1496 + }, + { + "epoch": 0.9093394077448748, + "grad_norm": 0.9376710431209911, + "learning_rate": 2.539720323772926e-07, + "loss": 0.2416, + "step": 1497 + }, + { + "epoch": 0.9099468488990129, + "grad_norm": 0.9311283280877473, + "learning_rate": 2.506476016719922e-07, + "loss": 0.2341, + "step": 1498 + }, + { + "epoch": 0.9105542900531511, + "grad_norm": 1.1243831646626379, + "learning_rate": 2.473445130873353e-07, + "loss": 0.2628, + "step": 1499 + }, + { + "epoch": 0.9111617312072893, + "grad_norm": 0.9100015428632143, + "learning_rate": 2.440627814663804e-07, + "loss": 0.235, + "step": 1500 + }, + { + "epoch": 0.9117691723614275, + "grad_norm": 1.8437664997799614, + "learning_rate": 2.4080242155621327e-07, + "loss": 0.2469, + "step": 1501 + }, + { + "epoch": 0.9123766135155656, + "grad_norm": 1.0290473471963233, + "learning_rate": 2.3756344800788421e-07, + "loss": 0.2474, + "step": 1502 + }, + { + "epoch": 0.9129840546697039, + "grad_norm": 1.1477504753716588, + "learning_rate": 2.343458753763378e-07, + "loss": 0.2242, + "step": 1503 + }, + { + "epoch": 0.913591495823842, + "grad_norm": 1.1198944079500255, + "learning_rate": 2.3114971812034981e-07, + "loss": 0.2504, + "step": 1504 + }, + { + "epoch": 0.9141989369779803, + "grad_norm": 0.945076046858483, + "learning_rate": 2.2797499060246253e-07, + "loss": 0.2517, + "step": 1505 + }, + { + "epoch": 0.9148063781321184, + "grad_norm": 1.285167047982773, + "learning_rate": 2.2482170708892083e-07, + "loss": 0.2333, + "step": 1506 + }, + { + "epoch": 0.9154138192862566, + "grad_norm": 0.9188875249417393, + "learning_rate": 2.2168988174960382e-07, + "loss": 0.242, + "step": 1507 + }, + { + "epoch": 0.9160212604403949, + "grad_norm": 0.9392029956247224, + "learning_rate": 2.1857952865796616e-07, + "loss": 0.2494, + "step": 1508 + }, + { + "epoch": 0.916628701594533, + "grad_norm": 3.5960021463661223, + "learning_rate": 2.1549066179097355e-07, + "loss": 0.2581, + "step": 1509 + }, + { + "epoch": 0.9172361427486713, + "grad_norm": 1.4581742375551667, + "learning_rate": 2.124232950290367e-07, + "loss": 0.2536, + "step": 1510 + }, + { + "epoch": 0.9178435839028094, + "grad_norm": 1.0527162495034155, + "learning_rate": 2.0937744215595467e-07, + "loss": 0.2409, + "step": 1511 + }, + { + "epoch": 0.9184510250569476, + "grad_norm": 1.4110685445772864, + "learning_rate": 2.0635311685884675e-07, + "loss": 0.2095, + "step": 1512 + }, + { + "epoch": 0.9190584662110858, + "grad_norm": 1.213907408406235, + "learning_rate": 2.0335033272809612e-07, + "loss": 0.2757, + "step": 1513 + }, + { + "epoch": 0.919665907365224, + "grad_norm": 2.0561680152756114, + "learning_rate": 2.0036910325728521e-07, + "loss": 0.2397, + "step": 1514 + }, + { + "epoch": 0.9202733485193622, + "grad_norm": 1.0091633854522606, + "learning_rate": 1.9740944184313882e-07, + "loss": 0.256, + "step": 1515 + }, + { + "epoch": 0.9208807896735004, + "grad_norm": 1.374619992070271, + "learning_rate": 1.9447136178545766e-07, + "loss": 0.2351, + "step": 1516 + }, + { + "epoch": 0.9214882308276385, + "grad_norm": 1.0149607815681039, + "learning_rate": 1.9155487628706672e-07, + "loss": 0.2149, + "step": 1517 + }, + { + "epoch": 0.9220956719817768, + "grad_norm": 1.5286222741924442, + "learning_rate": 1.8865999845374794e-07, + "loss": 0.2401, + "step": 1518 + }, + { + "epoch": 0.9227031131359149, + "grad_norm": 1.1238029435165344, + "learning_rate": 1.857867412941883e-07, + "loss": 0.2259, + "step": 1519 + }, + { + "epoch": 0.9233105542900532, + "grad_norm": 1.047113679889672, + "learning_rate": 1.8293511771991624e-07, + "loss": 0.2562, + "step": 1520 + }, + { + "epoch": 0.9239179954441914, + "grad_norm": 1.1015346889794326, + "learning_rate": 1.8010514054524531e-07, + "loss": 0.2496, + "step": 1521 + }, + { + "epoch": 0.9245254365983295, + "grad_norm": 0.9348867843858392, + "learning_rate": 1.7729682248721848e-07, + "loss": 0.2193, + "step": 1522 + }, + { + "epoch": 0.9251328777524678, + "grad_norm": 0.900662481617006, + "learning_rate": 1.7451017616554822e-07, + "loss": 0.2346, + "step": 1523 + }, + { + "epoch": 0.9257403189066059, + "grad_norm": 1.0079629512704111, + "learning_rate": 1.7174521410256162e-07, + "loss": 0.2739, + "step": 1524 + }, + { + "epoch": 0.9263477600607442, + "grad_norm": 1.0034552226211848, + "learning_rate": 1.69001948723142e-07, + "loss": 0.2709, + "step": 1525 + }, + { + "epoch": 0.9269552012148823, + "grad_norm": 1.1773428284591294, + "learning_rate": 1.6628039235467686e-07, + "loss": 0.2472, + "step": 1526 + }, + { + "epoch": 0.9275626423690205, + "grad_norm": 1.1087538756550075, + "learning_rate": 1.6358055722699662e-07, + "loss": 0.2376, + "step": 1527 + }, + { + "epoch": 0.9281700835231587, + "grad_norm": 3.5302378698596972, + "learning_rate": 1.6090245547232707e-07, + "loss": 0.2445, + "step": 1528 + }, + { + "epoch": 0.9287775246772969, + "grad_norm": 0.9318045452754465, + "learning_rate": 1.5824609912522825e-07, + "loss": 0.2495, + "step": 1529 + }, + { + "epoch": 0.929384965831435, + "grad_norm": 0.9925458437861561, + "learning_rate": 1.5561150012254446e-07, + "loss": 0.252, + "step": 1530 + }, + { + "epoch": 0.9299924069855733, + "grad_norm": 0.9204194360128435, + "learning_rate": 1.5299867030334815e-07, + "loss": 0.2544, + "step": 1531 + }, + { + "epoch": 0.9305998481397114, + "grad_norm": 1.4389165535169934, + "learning_rate": 1.5040762140888843e-07, + "loss": 0.2509, + "step": 1532 + }, + { + "epoch": 0.9312072892938497, + "grad_norm": 0.9464143937114549, + "learning_rate": 1.4783836508253823e-07, + "loss": 0.219, + "step": 1533 + }, + { + "epoch": 0.9318147304479879, + "grad_norm": 1.1584265502532431, + "learning_rate": 1.4529091286973994e-07, + "loss": 0.2584, + "step": 1534 + }, + { + "epoch": 0.932422171602126, + "grad_norm": 1.0767653117954572, + "learning_rate": 1.4276527621795655e-07, + "loss": 0.2477, + "step": 1535 + }, + { + "epoch": 0.9330296127562643, + "grad_norm": 1.1174557743113676, + "learning_rate": 1.402614664766172e-07, + "loss": 0.2515, + "step": 1536 + }, + { + "epoch": 0.9336370539104024, + "grad_norm": 0.9313798735305144, + "learning_rate": 1.3777949489706898e-07, + "loss": 0.231, + "step": 1537 + }, + { + "epoch": 0.9342444950645407, + "grad_norm": 1.1379310451818712, + "learning_rate": 1.353193726325247e-07, + "loss": 0.2503, + "step": 1538 + }, + { + "epoch": 0.9348519362186788, + "grad_norm": 1.1585882440499968, + "learning_rate": 1.3288111073801235e-07, + "loss": 0.2784, + "step": 1539 + }, + { + "epoch": 0.935459377372817, + "grad_norm": 1.5930803179580344, + "learning_rate": 1.3046472017032685e-07, + "loss": 0.2418, + "step": 1540 + }, + { + "epoch": 0.9360668185269552, + "grad_norm": 0.9991292646052891, + "learning_rate": 1.280702117879795e-07, + "loss": 0.2397, + "step": 1541 + }, + { + "epoch": 0.9366742596810934, + "grad_norm": 1.212096857283085, + "learning_rate": 1.2569759635115086e-07, + "loss": 0.2582, + "step": 1542 + }, + { + "epoch": 0.9372817008352315, + "grad_norm": 1.1250193762265426, + "learning_rate": 1.2334688452164122e-07, + "loss": 0.2575, + "step": 1543 + }, + { + "epoch": 0.9378891419893698, + "grad_norm": 0.971115660382781, + "learning_rate": 1.210180868628219e-07, + "loss": 0.271, + "step": 1544 + }, + { + "epoch": 0.9384965831435079, + "grad_norm": 0.907500253470022, + "learning_rate": 1.1871121383958961e-07, + "loss": 0.2392, + "step": 1545 + }, + { + "epoch": 0.9391040242976462, + "grad_norm": 1.47219686578771, + "learning_rate": 1.1642627581831767e-07, + "loss": 0.2533, + "step": 1546 + }, + { + "epoch": 0.9397114654517844, + "grad_norm": 1.4887563913664645, + "learning_rate": 1.1416328306681046e-07, + "loss": 0.2665, + "step": 1547 + }, + { + "epoch": 0.9403189066059225, + "grad_norm": 0.9182957946443633, + "learning_rate": 1.1192224575425848e-07, + "loss": 0.2233, + "step": 1548 + }, + { + "epoch": 0.9409263477600608, + "grad_norm": 1.0661000364774975, + "learning_rate": 1.0970317395119001e-07, + "loss": 0.2722, + "step": 1549 + }, + { + "epoch": 0.9415337889141989, + "grad_norm": 1.156185857708016, + "learning_rate": 1.0750607762942622e-07, + "loss": 0.2374, + "step": 1550 + }, + { + "epoch": 0.9421412300683372, + "grad_norm": 1.0021679642199284, + "learning_rate": 1.0533096666203946e-07, + "loss": 0.247, + "step": 1551 + }, + { + "epoch": 0.9427486712224753, + "grad_norm": 1.062302865690974, + "learning_rate": 1.0317785082330555e-07, + "loss": 0.2415, + "step": 1552 + }, + { + "epoch": 0.9433561123766135, + "grad_norm": 0.8324437312272753, + "learning_rate": 1.0104673978866164e-07, + "loss": 0.2131, + "step": 1553 + }, + { + "epoch": 0.9439635535307517, + "grad_norm": 1.2649813678192605, + "learning_rate": 9.89376431346606e-08, + "loss": 0.276, + "step": 1554 + }, + { + "epoch": 0.9445709946848899, + "grad_norm": 1.622757976163991, + "learning_rate": 9.685057033892998e-08, + "loss": 0.2582, + "step": 1555 + }, + { + "epoch": 0.945178435839028, + "grad_norm": 1.765578442579649, + "learning_rate": 9.478553078013042e-08, + "loss": 0.2553, + "step": 1556 + }, + { + "epoch": 0.9457858769931663, + "grad_norm": 1.4608500363168406, + "learning_rate": 9.274253373791064e-08, + "loss": 0.2555, + "step": 1557 + }, + { + "epoch": 0.9463933181473044, + "grad_norm": 0.8888287744971497, + "learning_rate": 9.072158839286748e-08, + "loss": 0.2405, + "step": 1558 + }, + { + "epoch": 0.9470007593014427, + "grad_norm": 1.269488158961429, + "learning_rate": 8.872270382650372e-08, + "loss": 0.2397, + "step": 1559 + }, + { + "epoch": 0.9476082004555809, + "grad_norm": 0.9140050155362377, + "learning_rate": 8.674588902118919e-08, + "loss": 0.2581, + "step": 1560 + }, + { + "epoch": 0.948215641609719, + "grad_norm": 0.9350253085292872, + "learning_rate": 8.479115286011752e-08, + "loss": 0.2578, + "step": 1561 + }, + { + "epoch": 0.9488230827638573, + "grad_norm": 1.294156199026534, + "learning_rate": 8.285850412726837e-08, + "loss": 0.2768, + "step": 1562 + }, + { + "epoch": 0.9494305239179954, + "grad_norm": 0.8781441810000316, + "learning_rate": 8.094795150736745e-08, + "loss": 0.2124, + "step": 1563 + }, + { + "epoch": 0.9500379650721337, + "grad_norm": 0.8821255230738889, + "learning_rate": 7.905950358584768e-08, + "loss": 0.2358, + "step": 1564 + }, + { + "epoch": 0.9506454062262718, + "grad_norm": 1.08985926239788, + "learning_rate": 7.719316884880922e-08, + "loss": 0.2615, + "step": 1565 + }, + { + "epoch": 0.95125284738041, + "grad_norm": 0.8755195395367136, + "learning_rate": 7.534895568298395e-08, + "loss": 0.2352, + "step": 1566 + }, + { + "epoch": 0.9518602885345482, + "grad_norm": 0.9952147011406434, + "learning_rate": 7.352687237569489e-08, + "loss": 0.2557, + "step": 1567 + }, + { + "epoch": 0.9524677296886864, + "grad_norm": 1.1520050119158871, + "learning_rate": 7.172692711482022e-08, + "loss": 0.2156, + "step": 1568 + }, + { + "epoch": 0.9530751708428246, + "grad_norm": 1.0057754256396354, + "learning_rate": 6.994912798875875e-08, + "loss": 0.2465, + "step": 1569 + }, + { + "epoch": 0.9536826119969628, + "grad_norm": 1.0040861530461729, + "learning_rate": 6.819348298638839e-08, + "loss": 0.2816, + "step": 1570 + }, + { + "epoch": 0.9542900531511009, + "grad_norm": 1.0700694831197364, + "learning_rate": 6.6459999997035e-08, + "loss": 0.2412, + "step": 1571 + }, + { + "epoch": 0.9548974943052392, + "grad_norm": 1.038954346833618, + "learning_rate": 6.474868681043578e-08, + "loss": 0.2782, + "step": 1572 + }, + { + "epoch": 0.9555049354593774, + "grad_norm": 0.9247657016066214, + "learning_rate": 6.305955111670204e-08, + "loss": 0.241, + "step": 1573 + }, + { + "epoch": 0.9561123766135156, + "grad_norm": 1.2125088257095862, + "learning_rate": 6.13926005062876e-08, + "loss": 0.2506, + "step": 1574 + }, + { + "epoch": 0.9567198177676538, + "grad_norm": 2.8028781405959005, + "learning_rate": 5.974784246995214e-08, + "loss": 0.2117, + "step": 1575 + }, + { + "epoch": 0.9573272589217919, + "grad_norm": 1.2280962513812903, + "learning_rate": 5.8125284398730666e-08, + "loss": 0.2237, + "step": 1576 + }, + { + "epoch": 0.9579347000759302, + "grad_norm": 1.2300450579760327, + "learning_rate": 5.6524933583896326e-08, + "loss": 0.2126, + "step": 1577 + }, + { + "epoch": 0.9585421412300683, + "grad_norm": 2.4999481290325116, + "learning_rate": 5.4946797216931524e-08, + "loss": 0.2545, + "step": 1578 + }, + { + "epoch": 0.9591495823842066, + "grad_norm": 6.486220633951538, + "learning_rate": 5.339088238949186e-08, + "loss": 0.2354, + "step": 1579 + }, + { + "epoch": 0.9597570235383447, + "grad_norm": 0.9307011705790993, + "learning_rate": 5.185719609337836e-08, + "loss": 0.2342, + "step": 1580 + }, + { + "epoch": 0.9603644646924829, + "grad_norm": 0.9393824630850843, + "learning_rate": 5.034574522050251e-08, + "loss": 0.2467, + "step": 1581 + }, + { + "epoch": 0.9609719058466211, + "grad_norm": 1.4403224099920036, + "learning_rate": 4.885653656285627e-08, + "loss": 0.254, + "step": 1582 + }, + { + "epoch": 0.9615793470007593, + "grad_norm": 0.8733649961719668, + "learning_rate": 4.73895768124838e-08, + "loss": 0.2441, + "step": 1583 + }, + { + "epoch": 0.9621867881548974, + "grad_norm": 1.197435869062917, + "learning_rate": 4.5944872561448084e-08, + "loss": 0.2331, + "step": 1584 + }, + { + "epoch": 0.9627942293090357, + "grad_norm": 1.096594381356183, + "learning_rate": 4.45224303018027e-08, + "loss": 0.2402, + "step": 1585 + }, + { + "epoch": 0.9634016704631739, + "grad_norm": 1.0250873227147124, + "learning_rate": 4.3122256425563444e-08, + "loss": 0.266, + "step": 1586 + }, + { + "epoch": 0.9640091116173121, + "grad_norm": 1.0372168523415322, + "learning_rate": 4.174435722467951e-08, + "loss": 0.2625, + "step": 1587 + }, + { + "epoch": 0.9646165527714503, + "grad_norm": 0.9433606458038362, + "learning_rate": 4.038873889100237e-08, + "loss": 0.2642, + "step": 1588 + }, + { + "epoch": 0.9652239939255884, + "grad_norm": 0.9807120841293877, + "learning_rate": 3.905540751626191e-08, + "loss": 0.2472, + "step": 1589 + }, + { + "epoch": 0.9658314350797267, + "grad_norm": 0.9083975877708611, + "learning_rate": 3.77443690920376e-08, + "loss": 0.2307, + "step": 1590 + }, + { + "epoch": 0.9664388762338648, + "grad_norm": 3.6391836768271273, + "learning_rate": 3.645562950973014e-08, + "loss": 0.2495, + "step": 1591 + }, + { + "epoch": 0.9670463173880031, + "grad_norm": 1.0037148121403654, + "learning_rate": 3.518919456053649e-08, + "loss": 0.2738, + "step": 1592 + }, + { + "epoch": 0.9676537585421412, + "grad_norm": 1.1951622922237168, + "learning_rate": 3.3945069935423234e-08, + "loss": 0.2449, + "step": 1593 + }, + { + "epoch": 0.9682611996962794, + "grad_norm": 1.5169082502067843, + "learning_rate": 3.2723261225102164e-08, + "loss": 0.2377, + "step": 1594 + }, + { + "epoch": 0.9688686408504176, + "grad_norm": 3.6717572091002997, + "learning_rate": 3.152377392000361e-08, + "loss": 0.2671, + "step": 1595 + }, + { + "epoch": 0.9694760820045558, + "grad_norm": 0.8724528062444226, + "learning_rate": 3.034661341025258e-08, + "loss": 0.231, + "step": 1596 + }, + { + "epoch": 0.970083523158694, + "grad_norm": 0.9374629987363168, + "learning_rate": 2.9191784985644345e-08, + "loss": 0.241, + "step": 1597 + }, + { + "epoch": 0.9706909643128322, + "grad_norm": 0.9282402240595091, + "learning_rate": 2.8059293835620006e-08, + "loss": 0.2349, + "step": 1598 + }, + { + "epoch": 0.9712984054669704, + "grad_norm": 1.2644746158155864, + "learning_rate": 2.6949145049245396e-08, + "loss": 0.249, + "step": 1599 + }, + { + "epoch": 0.9719058466211086, + "grad_norm": 0.9000862092444777, + "learning_rate": 2.5861343615184997e-08, + "loss": 0.245, + "step": 1600 + }, + { + "epoch": 0.9725132877752468, + "grad_norm": 1.368391294736083, + "learning_rate": 2.479589442168251e-08, + "loss": 0.2333, + "step": 1601 + }, + { + "epoch": 0.973120728929385, + "grad_norm": 1.29020328624742, + "learning_rate": 2.3752802256536423e-08, + "loss": 0.2683, + "step": 1602 + }, + { + "epoch": 0.9737281700835232, + "grad_norm": 0.9410886564376265, + "learning_rate": 2.2732071807081147e-08, + "loss": 0.2285, + "step": 1603 + }, + { + "epoch": 0.9743356112376613, + "grad_norm": 1.4761677232747976, + "learning_rate": 2.173370766016314e-08, + "loss": 0.2315, + "step": 1604 + }, + { + "epoch": 0.9749430523917996, + "grad_norm": 0.9501609497089889, + "learning_rate": 2.0757714302122035e-08, + "loss": 0.241, + "step": 1605 + }, + { + "epoch": 0.9755504935459377, + "grad_norm": 1.2314833591182839, + "learning_rate": 1.98040961187701e-08, + "loss": 0.2703, + "step": 1606 + }, + { + "epoch": 0.976157934700076, + "grad_norm": 1.0158036848314118, + "learning_rate": 1.8872857395372812e-08, + "loss": 0.2597, + "step": 1607 + }, + { + "epoch": 0.9767653758542141, + "grad_norm": 1.1694648264181446, + "learning_rate": 1.7964002316628316e-08, + "loss": 0.2916, + "step": 1608 + }, + { + "epoch": 0.9773728170083523, + "grad_norm": 1.0688476934543394, + "learning_rate": 1.7077534966650767e-08, + "loss": 0.2558, + "step": 1609 + }, + { + "epoch": 0.9779802581624905, + "grad_norm": 0.9549062578238298, + "learning_rate": 1.6213459328950355e-08, + "loss": 0.244, + "step": 1610 + }, + { + "epoch": 0.9785876993166287, + "grad_norm": 2.412110087204276, + "learning_rate": 1.537177928641498e-08, + "loss": 0.2462, + "step": 1611 + }, + { + "epoch": 0.979195140470767, + "grad_norm": 0.941429067459168, + "learning_rate": 1.4552498621295264e-08, + "loss": 0.2535, + "step": 1612 + }, + { + "epoch": 0.9798025816249051, + "grad_norm": 0.9371186125393489, + "learning_rate": 1.3755621015184018e-08, + "loss": 0.2515, + "step": 1613 + }, + { + "epoch": 0.9804100227790433, + "grad_norm": 1.2909491242156825, + "learning_rate": 1.2981150049004021e-08, + "loss": 0.2327, + "step": 1614 + }, + { + "epoch": 0.9810174639331815, + "grad_norm": 0.9804844053080275, + "learning_rate": 1.2229089202987487e-08, + "loss": 0.2317, + "step": 1615 + }, + { + "epoch": 0.9816249050873197, + "grad_norm": 1.0876245667656317, + "learning_rate": 1.1499441856663296e-08, + "loss": 0.2516, + "step": 1616 + }, + { + "epoch": 0.9822323462414578, + "grad_norm": 0.8612954321226812, + "learning_rate": 1.0792211288841447e-08, + "loss": 0.2599, + "step": 1617 + }, + { + "epoch": 0.9828397873955961, + "grad_norm": 0.888353089870358, + "learning_rate": 1.0107400677596413e-08, + "loss": 0.2547, + "step": 1618 + }, + { + "epoch": 0.9834472285497342, + "grad_norm": 0.9827706274173068, + "learning_rate": 9.44501310025603e-09, + "loss": 0.2519, + "step": 1619 + }, + { + "epoch": 0.9840546697038725, + "grad_norm": 0.9827464321021941, + "learning_rate": 8.805051533384846e-09, + "loss": 0.2315, + "step": 1620 + }, + { + "epoch": 0.9846621108580106, + "grad_norm": 0.9339543239161487, + "learning_rate": 8.187518852771914e-09, + "loss": 0.2636, + "step": 1621 + }, + { + "epoch": 0.9852695520121488, + "grad_norm": 0.8991352561684951, + "learning_rate": 7.59241783341913e-09, + "loss": 0.2535, + "step": 1622 + }, + { + "epoch": 0.985876993166287, + "grad_norm": 0.8964567756827048, + "learning_rate": 7.019751149525133e-09, + "loss": 0.2098, + "step": 1623 + }, + { + "epoch": 0.9864844343204252, + "grad_norm": 1.0511236807122393, + "learning_rate": 6.469521374477539e-09, + "loss": 0.2739, + "step": 1624 + }, + { + "epoch": 0.9870918754745635, + "grad_norm": 0.8742334390126567, + "learning_rate": 5.941730980839056e-09, + "loss": 0.2423, + "step": 1625 + }, + { + "epoch": 0.9876993166287016, + "grad_norm": 0.8982580571091601, + "learning_rate": 5.436382340335833e-09, + "loss": 0.2275, + "step": 1626 + }, + { + "epoch": 0.9883067577828398, + "grad_norm": 0.9802151698563373, + "learning_rate": 4.9534777238485764e-09, + "loss": 0.2458, + "step": 1627 + }, + { + "epoch": 0.988914198936978, + "grad_norm": 1.021763994953802, + "learning_rate": 4.493019301401447e-09, + "loss": 0.2413, + "step": 1628 + }, + { + "epoch": 0.9895216400911162, + "grad_norm": 1.4457966401308637, + "learning_rate": 4.055009142152066e-09, + "loss": 0.2717, + "step": 1629 + }, + { + "epoch": 0.9901290812452543, + "grad_norm": 0.9291088704888705, + "learning_rate": 3.6394492143820847e-09, + "loss": 0.2631, + "step": 1630 + }, + { + "epoch": 0.9907365223993926, + "grad_norm": 0.9333078223932707, + "learning_rate": 3.2463413854899594e-09, + "loss": 0.2356, + "step": 1631 + }, + { + "epoch": 0.9913439635535307, + "grad_norm": 0.8258221417650397, + "learning_rate": 2.875687421980966e-09, + "loss": 0.2435, + "step": 1632 + }, + { + "epoch": 0.991951404707669, + "grad_norm": 1.3664318253744514, + "learning_rate": 2.5274889894583156e-09, + "loss": 0.2455, + "step": 1633 + }, + { + "epoch": 0.9925588458618071, + "grad_norm": 1.152187566076446, + "learning_rate": 2.201747652618713e-09, + "loss": 0.2713, + "step": 1634 + }, + { + "epoch": 0.9931662870159453, + "grad_norm": 1.085926992818226, + "learning_rate": 1.8984648752429222e-09, + "loss": 0.2716, + "step": 1635 + }, + { + "epoch": 0.9937737281700835, + "grad_norm": 1.1962597289024564, + "learning_rate": 1.6176420201902132e-09, + "loss": 0.2427, + "step": 1636 + }, + { + "epoch": 0.9943811693242217, + "grad_norm": 0.9297404890182976, + "learning_rate": 1.3592803493905904e-09, + "loss": 0.2492, + "step": 1637 + }, + { + "epoch": 0.99498861047836, + "grad_norm": 1.4127019374082828, + "learning_rate": 1.1233810238425735e-09, + "loss": 0.2329, + "step": 1638 + }, + { + "epoch": 0.9955960516324981, + "grad_norm": 0.9889547616457451, + "learning_rate": 9.099451036048701e-10, + "loss": 0.2464, + "step": 1639 + }, + { + "epoch": 0.9962034927866363, + "grad_norm": 1.479970043099125, + "learning_rate": 7.189735477913795e-10, + "loss": 0.243, + "step": 1640 + }, + { + "epoch": 0.9968109339407745, + "grad_norm": 0.9559279964912788, + "learning_rate": 5.504672145700829e-10, + "loss": 0.294, + "step": 1641 + }, + { + "epoch": 0.9974183750949127, + "grad_norm": 1.013358124558348, + "learning_rate": 4.0442686115582665e-10, + "loss": 0.2607, + "step": 1642 + }, + { + "epoch": 0.9980258162490508, + "grad_norm": 1.7596074983835073, + "learning_rate": 2.8085314380976725e-10, + "loss": 0.2626, + "step": 1643 + }, + { + "epoch": 0.9986332574031891, + "grad_norm": 1.0764621343908087, + "learning_rate": 1.797466178327101e-10, + "loss": 0.2622, + "step": 1644 + }, + { + "epoch": 0.9992406985573272, + "grad_norm": 1.1105069084316046, + "learning_rate": 1.011077375662195e-10, + "loss": 0.2386, + "step": 1645 + }, + { + "epoch": 0.9998481397114655, + "grad_norm": 1.0590187008021932, + "learning_rate": 4.4936856390398465e-11, + "loss": 0.2989, + "step": 1646 + }, + { + "epoch": 1.0, + "grad_norm": 1.0590187008021932, + "learning_rate": 1.1234226718337405e-11, + "loss": 0.0578, + "step": 1647 + }, + { + "epoch": 1.0, + "step": 1647, + "total_flos": 669099333058560.0, + "train_loss": 0.32129256987551813, + "train_runtime": 70988.8215, + "train_samples_per_second": 0.742, + "train_steps_per_second": 0.023 + } + ], + "logging_steps": 1, + "max_steps": 1647, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 669099333058560.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..a5bf940 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cff55b2418c281fee7942a5b6a774d847784484ac73c2aea62d40af8d47db7f1 +size 7544 diff --git a/training_loss.png b/training_loss.png new file mode 100644 index 0000000..c99cc24 Binary files /dev/null and b/training_loss.png differ diff --git a/video_preprocessor_config.json b/video_preprocessor_config.json new file mode 100644 index 0000000..b64d80b --- /dev/null +++ b/video_preprocessor_config.json @@ -0,0 +1,86 @@ +{ + "_valid_kwargs_names": [ + "do_convert_rgb", + "do_resize", + "size", + "size_divisor", + "default_to_square", + "resample", + "do_rescale", + "rescale_factor", + "do_normalize", + "image_mean", + "image_std", + "do_pad", + "do_center_crop", + "crop_size", + "data_format", + "input_data_format", + "device", + "min_pixels", + "max_pixels", + "patch_size", + "temporal_patch_size", + "merge_size" + ], + "crop_size": null, + "data_format": "channels_first", + "default_to_square": true, + "device": null, + "do_center_crop": null, + "do_convert_rgb": true, + "do_normalize": true, + "do_pad": null, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessor", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "input_data_format": null, + "max_pixels": 12845056, + "merge_size": 2, + "min_pixels": 3136, + "model_valid_processing_keys": [ + "do_convert_rgb", + "do_resize", + "size", + "size_divisor", + "default_to_square", + "resample", + "do_rescale", + "rescale_factor", + "do_normalize", + "image_mean", + "image_std", + "do_pad", + "do_center_crop", + "crop_size", + "data_format", + "input_data_format", + "device", + "min_pixels", + "max_pixels", + "patch_size", + "temporal_patch_size", + "merge_size" + ], + "patch_size": 14, + "processor_class": "Qwen2_5_VLProcessor", + "resample": 3, + "rescale_factor": 0.00392156862745098, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "size_divisor": null, + "temporal_patch_size": 2, + "video_processor_type": "Qwen2VLVideoProcessor" +} diff --git a/vocab.json b/vocab.json new file mode 100644 index 0000000..6c49fc6 --- /dev/null +++ b/vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910 +size 2776833