初始化项目,由ModelHub XC社区提供模型

Model: DCAgent/a1-nemotron_rspec
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-04-21 18:25:31 +08:00
commit a8c265e99a
21 changed files with 153300 additions and 0 deletions

36
.gitattributes vendored Normal file
View File

@@ -0,0 +1,36 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
tokenizer.json filter=lfs diff=lfs merge=lfs -text

60
README.md Normal file
View File

@@ -0,0 +1,60 @@
---
library_name: transformers
license: other
base_model: Qwen/Qwen3-8B
tags:
- llama-factory
- full
- generated_from_trainer
model-index:
- name: sft_a1_nemotron_rspec__Qwen3-8B
results: []
---
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
should probably proofread and complete it, then remove this comment. -->
# sft_a1_nemotron_rspec__Qwen3-8B
This model is a fine-tuned version of [Qwen/Qwen3-8B](https://huggingface.co/Qwen/Qwen3-8B) on the /e/scratch/jureap59/raoof1/sft_data/hf_hub/datasets--DCAgent--exp_rpt_nemotron-ruby_10k_glm_4.7_traces_jupiter/snapshots/5b15bfe3310d49b8689ea7df8b91f86aff654e50_thinking_preprocessed dataset.
## Model description
More information needed
## Intended uses & limitations
More information needed
## Training and evaluation data
More information needed
## Training procedure
### Training hyperparameters
The following hyperparameters were used during training:
- learning_rate: 4e-05
- train_batch_size: 1
- eval_batch_size: 8
- seed: 42
- distributed_type: multi-GPU
- num_devices: 16
- total_train_batch_size: 16
- total_eval_batch_size: 128
- optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.98) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
- lr_scheduler_type: cosine
- lr_scheduler_warmup_ratio: 0.1
- num_epochs: 7.0
### Training results
### Framework versions
- Transformers 4.57.6
- Pytorch 2.9.1+cu130
- Datasets 4.7.0
- Tokenizers 0.22.2

28
added_tokens.json Normal file
View File

@@ -0,0 +1,28 @@
{
"</think>": 151668,
"</tool_call>": 151658,
"</tool_response>": 151666,
"<think>": 151667,
"<tool_call>": 151657,
"<tool_response>": 151665,
"<|box_end|>": 151649,
"<|box_start|>": 151648,
"<|endoftext|>": 151643,
"<|file_sep|>": 151664,
"<|fim_middle|>": 151660,
"<|fim_pad|>": 151662,
"<|fim_prefix|>": 151659,
"<|fim_suffix|>": 151661,
"<|im_end|>": 151645,
"<|im_start|>": 151644,
"<|image_pad|>": 151655,
"<|object_ref_end|>": 151647,
"<|object_ref_start|>": 151646,
"<|quad_end|>": 151651,
"<|quad_start|>": 151650,
"<|repo_name|>": 151663,
"<|video_pad|>": 151656,
"<|vision_end|>": 151653,
"<|vision_pad|>": 151654,
"<|vision_start|>": 151652
}

16
all_results.json Normal file
View File

@@ -0,0 +1,16 @@
{
"achieved_tflops_per_gpu": 0.003026226548476734,
"achieved_tflops_per_gpu_theoretical": 586.107489368408,
"epoch": 7.0,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17929485440254211,
"mfu_percent": 0.0002138676005990625,
"mfu_percent_theoretical": 41.42102398363307,
"total_flos": 1235167211487232.0,
"train_loss": 0.21478925156317163,
"train_runtime": 25509.6403,
"train_samples_per_second": 2.761,
"train_steps_per_second": 0.173,
"valid_targets_mean": 3512.4,
"valid_targets_min": 1353
}

89
chat_template.jinja Normal file
View File

@@ -0,0 +1,89 @@
{%- if tools %}
{{- '<|im_start|>system\n' }}
{%- if messages[0].role == 'system' %}
{{- messages[0].content + '\n\n' }}
{%- endif %}
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
{%- for tool in tools %}
{{- "\n" }}
{{- tool | tojson }}
{%- endfor %}
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
{%- else %}
{%- if messages[0].role == 'system' %}
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
{%- endif %}
{%- endif %}
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
{%- for message in messages[::-1] %}
{%- set index = (messages|length - 1) - loop.index0 %}
{%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
{%- set ns.multi_step_tool = false %}
{%- set ns.last_query_index = index %}
{%- endif %}
{%- endfor %}
{%- for message in messages %}
{%- if message.content is string %}
{%- set content = message.content %}
{%- else %}
{%- set content = '' %}
{%- endif %}
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
{%- elif message.role == "assistant" %}
{%- set reasoning_content = '' %}
{%- if message.reasoning_content is string %}
{%- set reasoning_content = message.reasoning_content %}
{%- else %}
{%- if '</think>' in content %}
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
{%- endif %}
{%- endif %}
{%- if loop.index0 > ns.last_query_index %}
{%- if loop.last or (not loop.last and reasoning_content) %}
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
{%- else %}
{{- '<|im_start|>' + message.role + '\n' + content }}
{%- endif %}
{%- else %}
{{- '<|im_start|>' + message.role + '\n' + content }}
{%- endif %}
{%- if message.tool_calls %}
{%- for tool_call in message.tool_calls %}
{%- if (loop.first and content) or (not loop.first) %}
{{- '\n' }}
{%- endif %}
{%- if tool_call.function %}
{%- set tool_call = tool_call.function %}
{%- endif %}
{{- '<tool_call>\n{"name": "' }}
{{- tool_call.name }}
{{- '", "arguments": ' }}
{%- if tool_call.arguments is string %}
{{- tool_call.arguments }}
{%- else %}
{{- tool_call.arguments | tojson }}
{%- endif %}
{{- '}\n</tool_call>' }}
{%- endfor %}
{%- endif %}
{{- '<|im_end|>\n' }}
{%- elif message.role == "tool" %}
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
{{- '<|im_start|>user' }}
{%- endif %}
{{- '\n<tool_response>\n' }}
{{- content }}
{{- '\n</tool_response>' }}
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
{{- '<|im_end|>\n' }}
{%- endif %}
{%- endif %}
{%- endfor %}
{%- if add_generation_prompt %}
{{- '<|im_start|>assistant\n' }}
{%- if enable_thinking is defined and enable_thinking is false %}
{{- '<think>\n\n</think>\n\n' }}
{%- endif %}
{%- endif %}

68
config.json Normal file
View File

@@ -0,0 +1,68 @@
{
"architectures": [
"Qwen3ForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"dtype": "bfloat16",
"eos_token_id": 151645,
"head_dim": 128,
"hidden_act": "silu",
"hidden_size": 4096,
"initializer_range": 0.02,
"intermediate_size": 12288,
"layer_types": [
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention"
],
"max_position_embeddings": 40960,
"max_window_layers": 36,
"model_type": "qwen3",
"num_attention_heads": 32,
"num_hidden_layers": 36,
"num_key_value_heads": 8,
"pad_token_id": 151643,
"rms_norm_eps": 1e-06,
"rope_scaling": null,
"rope_theta": 1000000,
"sliding_window": null,
"tie_word_embeddings": false,
"transformers_version": "4.57.6",
"use_cache": false,
"use_sliding_window": false,
"vocab_size": 151936
}

12
generation_config.json Normal file
View File

@@ -0,0 +1,12 @@
{
"do_sample": true,
"eos_token_id": [
151645,
151643
],
"pad_token_id": 151643,
"temperature": 0.6,
"top_k": 20,
"top_p": 0.95,
"transformers_version": "4.57.6"
}

151388
merges.txt Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5f756b694235c969d2f06f57a2fe1b3b0e1d4840f504abd202fa4238592ec09f
size 4902257696

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:8cebd3cf1931cfb384e7988b688055e93b9f69c25a3cb41a0b9d0842e285acb9
size 4915960368

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:552646d1dfea7fe320e4de17968aa50be38dc1d52378088bc699390eae280e6f
size 4983068496

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:dfcf78d30501407aaee5b513fa333d26a75a64f1810c1db97c4ac2cb2dbbee5c
size 1580230264

View File

@@ -0,0 +1,407 @@
{
"metadata": {
"total_parameters": 308224,
"total_size": 16381470720
},
"weight_map": {
"lm_head.weight": "model-00004-of-00004.safetensors",
"model.embed_tokens.weight": "model-00001-of-00004.safetensors",
"model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.0.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.0.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.1.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.1.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.10.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
"model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.10.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
"model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.11.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
"model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.11.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
"model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.12.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
"model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.12.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
"model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.13.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
"model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.13.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
"model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.14.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
"model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.14.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
"model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.15.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
"model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.15.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
"model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.16.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
"model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.16.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
"model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.17.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
"model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.17.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
"model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.18.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
"model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.18.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
"model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.19.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
"model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.19.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
"model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.2.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.2.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.20.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
"model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.20.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
"model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.21.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
"model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.21.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
"model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.22.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
"model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.22.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
"model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.23.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
"model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.23.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
"model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.24.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
"model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.24.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
"model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.25.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
"model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.25.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
"model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.26.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
"model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.26.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
"model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.27.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
"model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.27.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
"model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.28.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
"model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.28.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
"model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.29.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
"model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.29.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
"model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.3.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.3.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.30.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
"model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.30.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
"model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.31.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
"model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.31.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
"model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.32.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.32.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.32.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.32.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.32.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.32.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
"model.layers.32.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.32.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.32.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
"model.layers.32.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.32.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.33.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.33.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.33.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.33.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.33.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.33.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
"model.layers.33.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.33.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.33.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
"model.layers.33.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.33.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.34.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.34.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.34.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.34.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.34.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.34.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
"model.layers.34.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.34.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.34.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
"model.layers.34.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.34.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.35.input_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.35.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.35.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.35.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.35.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
"model.layers.35.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
"model.layers.35.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.35.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
"model.layers.35.self_attn.q_norm.weight": "model-00004-of-00004.safetensors",
"model.layers.35.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.35.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.4.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.4.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
"model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.5.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
"model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.5.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
"model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.6.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
"model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.6.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
"model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.7.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
"model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.7.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
"model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.8.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
"model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.8.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
"model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.9.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
"model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.9.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
"model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.norm.weight": "model-00004-of-00004.safetensors"
}
}

12
run_summary.json Normal file
View File

@@ -0,0 +1,12 @@
{
"agent_name": "5b15bfe3310d49b8689ea7df8b91f86aff654e50_thinking_preprocessed",
"training_start": null,
"training_end": null,
"created_by": "raoof1",
"base_model_name": "Qwen/Qwen3-8B",
"dataset_name": "/e/scratch/jureap59/raoof1/sft_data/hf_hub/datasets--DCAgent--exp_rpt_nemotron-ruby_10k_glm_4.7_traces_jupiter/snapshots/5b15bfe3310d49b8689ea7df8b91f86aff654e50_thinking_preprocessed",
"training_type": "SFT",
"training_parameters": "https://huggingface.co/DCAgent/a1-nemotron_rspec/blob/main/config.json",
"wandb_link": null,
"traces_location_s3": null
}

31
special_tokens_map.json Normal file
View File

@@ -0,0 +1,31 @@
{
"additional_special_tokens": [
"<|im_start|>",
"<|im_end|>",
"<|object_ref_start|>",
"<|object_ref_end|>",
"<|box_start|>",
"<|box_end|>",
"<|quad_start|>",
"<|quad_end|>",
"<|vision_start|>",
"<|vision_end|>",
"<|vision_pad|>",
"<|image_pad|>",
"<|video_pad|>"
],
"eos_token": {
"content": "<|im_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"pad_token": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
}

3
tokenizer.json Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
size 11422654

240
tokenizer_config.json Normal file
View File

@@ -0,0 +1,240 @@
{
"add_bos_token": false,
"add_prefix_space": false,
"added_tokens_decoder": {
"151643": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151644": {
"content": "<|im_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151645": {
"content": "<|im_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151646": {
"content": "<|object_ref_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151647": {
"content": "<|object_ref_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151648": {
"content": "<|box_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151649": {
"content": "<|box_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151650": {
"content": "<|quad_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151651": {
"content": "<|quad_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151652": {
"content": "<|vision_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151653": {
"content": "<|vision_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151654": {
"content": "<|vision_pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151655": {
"content": "<|image_pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151656": {
"content": "<|video_pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151657": {
"content": "<tool_call>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151658": {
"content": "</tool_call>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151659": {
"content": "<|fim_prefix|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151660": {
"content": "<|fim_middle|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151661": {
"content": "<|fim_suffix|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151662": {
"content": "<|fim_pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151663": {
"content": "<|repo_name|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151664": {
"content": "<|file_sep|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151665": {
"content": "<tool_response>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151666": {
"content": "</tool_response>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151667": {
"content": "<think>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151668": {
"content": "</think>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
}
},
"additional_special_tokens": [
"<|im_start|>",
"<|im_end|>",
"<|object_ref_start|>",
"<|object_ref_end|>",
"<|box_start|>",
"<|box_end|>",
"<|quad_start|>",
"<|quad_end|>",
"<|vision_start|>",
"<|vision_end|>",
"<|vision_pad|>",
"<|image_pad|>",
"<|video_pad|>"
],
"bos_token": null,
"clean_up_tokenization_spaces": false,
"eos_token": "<|im_end|>",
"errors": "replace",
"extra_special_tokens": {},
"model_max_length": 32768,
"pad_token": "<|endoftext|>",
"padding_side": "right",
"split_special_tokens": false,
"tokenizer_class": "Qwen2Tokenizer",
"unk_token": null
}

16
train_results.json Normal file
View File

@@ -0,0 +1,16 @@
{
"achieved_tflops_per_gpu": 0.003026226548476734,
"achieved_tflops_per_gpu_theoretical": 586.107489368408,
"epoch": 7.0,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17929485440254211,
"mfu_percent": 0.0002138676005990625,
"mfu_percent_theoretical": 41.42102398363307,
"total_flos": 1235167211487232.0,
"train_loss": 0.21478925156317163,
"train_runtime": 25509.6403,
"train_samples_per_second": 2.761,
"train_steps_per_second": 0.173,
"valid_targets_mean": 3512.4,
"valid_targets_min": 1353
}

881
trainer_log.jsonl Normal file
View File

@@ -0,0 +1,881 @@
{"current_steps": 5, "total_steps": 4403, "loss": 0.7834, "lr": 3.6281179138322e-07, "epoch": 0.00794912559618442, "percentage": 0.11, "elapsed_time": "0:00:38", "remaining_time": "9:21:56"}
{"current_steps": 10, "total_steps": 4403, "loss": 0.7678, "lr": 8.163265306122449e-07, "epoch": 0.01589825119236884, "percentage": 0.23, "elapsed_time": "0:01:07", "remaining_time": "8:12:44"}
{"current_steps": 15, "total_steps": 4403, "loss": 0.7262, "lr": 1.26984126984127e-06, "epoch": 0.02384737678855326, "percentage": 0.34, "elapsed_time": "0:01:36", "remaining_time": "7:50:56"}
{"current_steps": 20, "total_steps": 4403, "loss": 0.7206, "lr": 1.723356009070295e-06, "epoch": 0.03179650238473768, "percentage": 0.45, "elapsed_time": "0:02:06", "remaining_time": "7:40:52"}
{"current_steps": 25, "total_steps": 4403, "loss": 0.6199, "lr": 2.17687074829932e-06, "epoch": 0.0397456279809221, "percentage": 0.57, "elapsed_time": "0:02:38", "remaining_time": "7:43:58"}
{"current_steps": 30, "total_steps": 4403, "loss": 0.6127, "lr": 2.6303854875283447e-06, "epoch": 0.04769475357710652, "percentage": 0.68, "elapsed_time": "0:03:13", "remaining_time": "7:49:10"}
{"current_steps": 35, "total_steps": 4403, "loss": 0.5628, "lr": 3.08390022675737e-06, "epoch": 0.05564387917329094, "percentage": 0.79, "elapsed_time": "0:03:39", "remaining_time": "7:36:06"}
{"current_steps": 40, "total_steps": 4403, "loss": 0.5103, "lr": 3.537414965986395e-06, "epoch": 0.06359300476947535, "percentage": 0.91, "elapsed_time": "0:04:07", "remaining_time": "7:29:39"}
{"current_steps": 45, "total_steps": 4403, "loss": 0.5023, "lr": 3.99092970521542e-06, "epoch": 0.07154213036565978, "percentage": 1.02, "elapsed_time": "0:04:39", "remaining_time": "7:30:35"}
{"current_steps": 50, "total_steps": 4403, "loss": 0.4644, "lr": 4.444444444444444e-06, "epoch": 0.0794912559618442, "percentage": 1.14, "elapsed_time": "0:05:08", "remaining_time": "7:27:54"}
{"current_steps": 55, "total_steps": 4403, "loss": 0.4919, "lr": 4.897959183673469e-06, "epoch": 0.08744038155802862, "percentage": 1.25, "elapsed_time": "0:05:40", "remaining_time": "7:28:22"}
{"current_steps": 60, "total_steps": 4403, "loss": 0.4566, "lr": 5.3514739229024945e-06, "epoch": 0.09538950715421304, "percentage": 1.36, "elapsed_time": "0:06:12", "remaining_time": "7:29:05"}
{"current_steps": 65, "total_steps": 4403, "loss": 0.4522, "lr": 5.80498866213152e-06, "epoch": 0.10333863275039745, "percentage": 1.48, "elapsed_time": "0:06:45", "remaining_time": "7:31:00"}
{"current_steps": 70, "total_steps": 4403, "loss": 0.4344, "lr": 6.258503401360545e-06, "epoch": 0.11128775834658187, "percentage": 1.59, "elapsed_time": "0:07:13", "remaining_time": "7:27:33"}
{"current_steps": 75, "total_steps": 4403, "loss": 0.4311, "lr": 6.71201814058957e-06, "epoch": 0.1192368839427663, "percentage": 1.7, "elapsed_time": "0:07:44", "remaining_time": "7:27:03"}
{"current_steps": 80, "total_steps": 4403, "loss": 0.4101, "lr": 7.165532879818595e-06, "epoch": 0.1271860095389507, "percentage": 1.82, "elapsed_time": "0:08:17", "remaining_time": "7:28:14"}
{"current_steps": 85, "total_steps": 4403, "loss": 0.3868, "lr": 7.61904761904762e-06, "epoch": 0.13513513513513514, "percentage": 1.93, "elapsed_time": "0:08:46", "remaining_time": "7:25:27"}
{"current_steps": 90, "total_steps": 4403, "loss": 0.3882, "lr": 8.072562358276645e-06, "epoch": 0.14308426073131955, "percentage": 2.04, "elapsed_time": "0:09:16", "remaining_time": "7:24:48"}
{"current_steps": 95, "total_steps": 4403, "loss": 0.3943, "lr": 8.52607709750567e-06, "epoch": 0.151033386327504, "percentage": 2.16, "elapsed_time": "0:09:48", "remaining_time": "7:24:31"}
{"current_steps": 100, "total_steps": 4403, "loss": 0.3924, "lr": 8.979591836734695e-06, "epoch": 0.1589825119236884, "percentage": 2.27, "elapsed_time": "0:10:19", "remaining_time": "7:24:02"}
{"current_steps": 105, "total_steps": 4403, "loss": 0.3926, "lr": 9.43310657596372e-06, "epoch": 0.1669316375198728, "percentage": 2.38, "elapsed_time": "0:10:49", "remaining_time": "7:23:22"}
{"current_steps": 110, "total_steps": 4403, "loss": 0.3535, "lr": 9.886621315192746e-06, "epoch": 0.17488076311605724, "percentage": 2.5, "elapsed_time": "0:11:14", "remaining_time": "7:18:32"}
{"current_steps": 115, "total_steps": 4403, "loss": 0.3797, "lr": 1.034013605442177e-05, "epoch": 0.18282988871224165, "percentage": 2.61, "elapsed_time": "0:11:49", "remaining_time": "7:20:43"}
{"current_steps": 120, "total_steps": 4403, "loss": 0.346, "lr": 1.0793650793650794e-05, "epoch": 0.1907790143084261, "percentage": 2.73, "elapsed_time": "0:12:13", "remaining_time": "7:16:29"}
{"current_steps": 125, "total_steps": 4403, "loss": 0.3799, "lr": 1.124716553287982e-05, "epoch": 0.1987281399046105, "percentage": 2.84, "elapsed_time": "0:12:42", "remaining_time": "7:14:48"}
{"current_steps": 130, "total_steps": 4403, "loss": 0.3447, "lr": 1.1700680272108845e-05, "epoch": 0.2066772655007949, "percentage": 2.95, "elapsed_time": "0:13:12", "remaining_time": "7:14:14"}
{"current_steps": 135, "total_steps": 4403, "loss": 0.3513, "lr": 1.215419501133787e-05, "epoch": 0.21462639109697934, "percentage": 3.07, "elapsed_time": "0:13:33", "remaining_time": "7:08:51"}
{"current_steps": 140, "total_steps": 4403, "loss": 0.3438, "lr": 1.2607709750566895e-05, "epoch": 0.22257551669316375, "percentage": 3.18, "elapsed_time": "0:14:05", "remaining_time": "7:09:04"}
{"current_steps": 145, "total_steps": 4403, "loss": 0.345, "lr": 1.3061224489795918e-05, "epoch": 0.23052464228934816, "percentage": 3.29, "elapsed_time": "0:14:34", "remaining_time": "7:07:52"}
{"current_steps": 150, "total_steps": 4403, "loss": 0.3382, "lr": 1.3514739229024945e-05, "epoch": 0.2384737678855326, "percentage": 3.41, "elapsed_time": "0:15:04", "remaining_time": "7:07:22"}
{"current_steps": 155, "total_steps": 4403, "loss": 0.3626, "lr": 1.3968253968253968e-05, "epoch": 0.246422893481717, "percentage": 3.52, "elapsed_time": "0:15:33", "remaining_time": "7:06:26"}
{"current_steps": 160, "total_steps": 4403, "loss": 0.3379, "lr": 1.4421768707482994e-05, "epoch": 0.2543720190779014, "percentage": 3.63, "elapsed_time": "0:16:06", "remaining_time": "7:07:16"}
{"current_steps": 165, "total_steps": 4403, "loss": 0.343, "lr": 1.4875283446712018e-05, "epoch": 0.26232114467408585, "percentage": 3.75, "elapsed_time": "0:16:37", "remaining_time": "7:07:11"}
{"current_steps": 170, "total_steps": 4403, "loss": 0.3339, "lr": 1.5328798185941044e-05, "epoch": 0.2702702702702703, "percentage": 3.86, "elapsed_time": "0:17:07", "remaining_time": "7:06:23"}
{"current_steps": 175, "total_steps": 4403, "loss": 0.3559, "lr": 1.578231292517007e-05, "epoch": 0.27821939586645467, "percentage": 3.97, "elapsed_time": "0:17:40", "remaining_time": "7:06:58"}
{"current_steps": 180, "total_steps": 4403, "loss": 0.3324, "lr": 1.6235827664399097e-05, "epoch": 0.2861685214626391, "percentage": 4.09, "elapsed_time": "0:18:12", "remaining_time": "7:07:22"}
{"current_steps": 185, "total_steps": 4403, "loss": 0.3369, "lr": 1.668934240362812e-05, "epoch": 0.29411764705882354, "percentage": 4.2, "elapsed_time": "0:18:44", "remaining_time": "7:07:13"}
{"current_steps": 190, "total_steps": 4403, "loss": 0.3206, "lr": 1.7142857142857142e-05, "epoch": 0.302066772655008, "percentage": 4.32, "elapsed_time": "0:19:16", "remaining_time": "7:07:20"}
{"current_steps": 195, "total_steps": 4403, "loss": 0.3221, "lr": 1.759637188208617e-05, "epoch": 0.31001589825119236, "percentage": 4.43, "elapsed_time": "0:19:42", "remaining_time": "7:05:27"}
{"current_steps": 200, "total_steps": 4403, "loss": 0.3356, "lr": 1.8049886621315194e-05, "epoch": 0.3179650238473768, "percentage": 4.54, "elapsed_time": "0:20:11", "remaining_time": "7:04:19"}
{"current_steps": 205, "total_steps": 4403, "loss": 0.3543, "lr": 1.8503401360544218e-05, "epoch": 0.32591414944356123, "percentage": 4.66, "elapsed_time": "0:20:42", "remaining_time": "7:04:13"}
{"current_steps": 210, "total_steps": 4403, "loss": 0.3249, "lr": 1.8956916099773243e-05, "epoch": 0.3338632750397456, "percentage": 4.77, "elapsed_time": "0:21:14", "remaining_time": "7:04:03"}
{"current_steps": 215, "total_steps": 4403, "loss": 0.3238, "lr": 1.941043083900227e-05, "epoch": 0.34181240063593005, "percentage": 4.88, "elapsed_time": "0:21:46", "remaining_time": "7:04:15"}
{"current_steps": 220, "total_steps": 4403, "loss": 0.3166, "lr": 1.9863945578231295e-05, "epoch": 0.3497615262321145, "percentage": 5.0, "elapsed_time": "0:22:16", "remaining_time": "7:03:27"}
{"current_steps": 225, "total_steps": 4403, "loss": 0.313, "lr": 2.031746031746032e-05, "epoch": 0.35771065182829886, "percentage": 5.11, "elapsed_time": "0:22:42", "remaining_time": "7:01:31"}
{"current_steps": 230, "total_steps": 4403, "loss": 0.2818, "lr": 2.0770975056689343e-05, "epoch": 0.3656597774244833, "percentage": 5.22, "elapsed_time": "0:23:11", "remaining_time": "7:00:50"}
{"current_steps": 235, "total_steps": 4403, "loss": 0.3189, "lr": 2.122448979591837e-05, "epoch": 0.37360890302066774, "percentage": 5.34, "elapsed_time": "0:23:44", "remaining_time": "7:01:10"}
{"current_steps": 240, "total_steps": 4403, "loss": 0.3114, "lr": 2.1678004535147395e-05, "epoch": 0.3815580286168522, "percentage": 5.45, "elapsed_time": "0:24:18", "remaining_time": "7:01:41"}
{"current_steps": 245, "total_steps": 4403, "loss": 0.3029, "lr": 2.213151927437642e-05, "epoch": 0.38950715421303655, "percentage": 5.56, "elapsed_time": "0:24:46", "remaining_time": "7:00:36"}
{"current_steps": 250, "total_steps": 4403, "loss": 0.3287, "lr": 2.2585034013605444e-05, "epoch": 0.397456279809221, "percentage": 5.68, "elapsed_time": "0:25:16", "remaining_time": "6:59:49"}
{"current_steps": 255, "total_steps": 4403, "loss": 0.2961, "lr": 2.3038548752834472e-05, "epoch": 0.40540540540540543, "percentage": 5.79, "elapsed_time": "0:25:47", "remaining_time": "6:59:32"}
{"current_steps": 260, "total_steps": 4403, "loss": 0.2936, "lr": 2.3492063492063496e-05, "epoch": 0.4133545310015898, "percentage": 5.91, "elapsed_time": "0:26:18", "remaining_time": "6:59:05"}
{"current_steps": 265, "total_steps": 4403, "loss": 0.2898, "lr": 2.394557823129252e-05, "epoch": 0.42130365659777425, "percentage": 6.02, "elapsed_time": "0:26:48", "remaining_time": "6:58:35"}
{"current_steps": 270, "total_steps": 4403, "loss": 0.3131, "lr": 2.439909297052154e-05, "epoch": 0.4292527821939587, "percentage": 6.13, "elapsed_time": "0:27:22", "remaining_time": "6:59:09"}
{"current_steps": 275, "total_steps": 4403, "loss": 0.3108, "lr": 2.4852607709750566e-05, "epoch": 0.43720190779014306, "percentage": 6.25, "elapsed_time": "0:27:54", "remaining_time": "6:58:57"}
{"current_steps": 280, "total_steps": 4403, "loss": 0.3034, "lr": 2.5306122448979597e-05, "epoch": 0.4451510333863275, "percentage": 6.36, "elapsed_time": "0:28:21", "remaining_time": "6:57:34"}
{"current_steps": 285, "total_steps": 4403, "loss": 0.2836, "lr": 2.5759637188208618e-05, "epoch": 0.45310015898251194, "percentage": 6.47, "elapsed_time": "0:28:53", "remaining_time": "6:57:21"}
{"current_steps": 290, "total_steps": 4403, "loss": 0.3048, "lr": 2.6213151927437642e-05, "epoch": 0.4610492845786963, "percentage": 6.59, "elapsed_time": "0:29:21", "remaining_time": "6:56:26"}
{"current_steps": 295, "total_steps": 4403, "loss": 0.2863, "lr": 2.6666666666666667e-05, "epoch": 0.46899841017488075, "percentage": 6.7, "elapsed_time": "0:29:53", "remaining_time": "6:56:12"}
{"current_steps": 300, "total_steps": 4403, "loss": 0.2791, "lr": 2.7120181405895694e-05, "epoch": 0.4769475357710652, "percentage": 6.81, "elapsed_time": "0:30:23", "remaining_time": "6:55:37"}
{"current_steps": 305, "total_steps": 4403, "loss": 0.2962, "lr": 2.757369614512472e-05, "epoch": 0.4848966613672496, "percentage": 6.93, "elapsed_time": "0:30:55", "remaining_time": "6:55:32"}
{"current_steps": 310, "total_steps": 4403, "loss": 0.3183, "lr": 2.8027210884353743e-05, "epoch": 0.492845786963434, "percentage": 7.04, "elapsed_time": "0:31:23", "remaining_time": "6:54:24"}
{"current_steps": 315, "total_steps": 4403, "loss": 0.3042, "lr": 2.8480725623582767e-05, "epoch": 0.5007949125596184, "percentage": 7.15, "elapsed_time": "0:31:53", "remaining_time": "6:53:56"}
{"current_steps": 320, "total_steps": 4403, "loss": 0.2877, "lr": 2.893424036281179e-05, "epoch": 0.5087440381558028, "percentage": 7.27, "elapsed_time": "0:32:24", "remaining_time": "6:53:29"}
{"current_steps": 325, "total_steps": 4403, "loss": 0.3105, "lr": 2.938775510204082e-05, "epoch": 0.5166931637519873, "percentage": 7.38, "elapsed_time": "0:32:52", "remaining_time": "6:52:30"}
{"current_steps": 330, "total_steps": 4403, "loss": 0.2774, "lr": 2.9841269841269844e-05, "epoch": 0.5246422893481717, "percentage": 7.49, "elapsed_time": "0:33:16", "remaining_time": "6:50:41"}
{"current_steps": 335, "total_steps": 4403, "loss": 0.3094, "lr": 3.0294784580498868e-05, "epoch": 0.5325914149443561, "percentage": 7.61, "elapsed_time": "0:33:46", "remaining_time": "6:50:11"}
{"current_steps": 340, "total_steps": 4403, "loss": 0.2684, "lr": 3.074829931972789e-05, "epoch": 0.5405405405405406, "percentage": 7.72, "elapsed_time": "0:34:15", "remaining_time": "6:49:26"}
{"current_steps": 345, "total_steps": 4403, "loss": 0.2878, "lr": 3.1201814058956924e-05, "epoch": 0.548489666136725, "percentage": 7.84, "elapsed_time": "0:34:44", "remaining_time": "6:48:34"}
{"current_steps": 350, "total_steps": 4403, "loss": 0.3028, "lr": 3.1655328798185945e-05, "epoch": 0.5564387917329093, "percentage": 7.95, "elapsed_time": "0:35:18", "remaining_time": "6:48:52"}
{"current_steps": 355, "total_steps": 4403, "loss": 0.3029, "lr": 3.2108843537414965e-05, "epoch": 0.5643879173290938, "percentage": 8.06, "elapsed_time": "0:35:48", "remaining_time": "6:48:20"}
{"current_steps": 360, "total_steps": 4403, "loss": 0.2768, "lr": 3.256235827664399e-05, "epoch": 0.5723370429252782, "percentage": 8.18, "elapsed_time": "0:36:19", "remaining_time": "6:47:59"}
{"current_steps": 365, "total_steps": 4403, "loss": 0.2938, "lr": 3.3015873015873014e-05, "epoch": 0.5802861685214626, "percentage": 8.29, "elapsed_time": "0:36:49", "remaining_time": "6:47:28"}
{"current_steps": 370, "total_steps": 4403, "loss": 0.2773, "lr": 3.346938775510204e-05, "epoch": 0.5882352941176471, "percentage": 8.4, "elapsed_time": "0:37:20", "remaining_time": "6:47:05"}
{"current_steps": 375, "total_steps": 4403, "loss": 0.2886, "lr": 3.392290249433107e-05, "epoch": 0.5961844197138315, "percentage": 8.52, "elapsed_time": "0:37:47", "remaining_time": "6:45:56"}
{"current_steps": 380, "total_steps": 4403, "loss": 0.2946, "lr": 3.437641723356009e-05, "epoch": 0.604133545310016, "percentage": 8.63, "elapsed_time": "0:38:21", "remaining_time": "6:46:08"}
{"current_steps": 385, "total_steps": 4403, "loss": 0.2831, "lr": 3.482993197278912e-05, "epoch": 0.6120826709062003, "percentage": 8.74, "elapsed_time": "0:38:49", "remaining_time": "6:45:07"}
{"current_steps": 390, "total_steps": 4403, "loss": 0.3016, "lr": 3.5283446712018146e-05, "epoch": 0.6200317965023847, "percentage": 8.86, "elapsed_time": "0:39:17", "remaining_time": "6:44:22"}
{"current_steps": 395, "total_steps": 4403, "loss": 0.2783, "lr": 3.573696145124717e-05, "epoch": 0.6279809220985691, "percentage": 8.97, "elapsed_time": "0:39:45", "remaining_time": "6:43:21"}
{"current_steps": 400, "total_steps": 4403, "loss": 0.2683, "lr": 3.6190476190476195e-05, "epoch": 0.6359300476947536, "percentage": 9.08, "elapsed_time": "0:40:15", "remaining_time": "6:42:55"}
{"current_steps": 405, "total_steps": 4403, "loss": 0.2793, "lr": 3.6643990929705216e-05, "epoch": 0.643879173290938, "percentage": 9.2, "elapsed_time": "0:40:40", "remaining_time": "6:41:29"}
{"current_steps": 410, "total_steps": 4403, "loss": 0.2866, "lr": 3.7097505668934243e-05, "epoch": 0.6518282988871225, "percentage": 9.31, "elapsed_time": "0:41:08", "remaining_time": "6:40:44"}
{"current_steps": 415, "total_steps": 4403, "loss": 0.2739, "lr": 3.755102040816327e-05, "epoch": 0.6597774244833068, "percentage": 9.43, "elapsed_time": "0:41:37", "remaining_time": "6:39:58"}
{"current_steps": 420, "total_steps": 4403, "loss": 0.2668, "lr": 3.800453514739229e-05, "epoch": 0.6677265500794912, "percentage": 9.54, "elapsed_time": "0:42:09", "remaining_time": "6:39:45"}
{"current_steps": 425, "total_steps": 4403, "loss": 0.2759, "lr": 3.845804988662132e-05, "epoch": 0.6756756756756757, "percentage": 9.65, "elapsed_time": "0:42:41", "remaining_time": "6:39:35"}
{"current_steps": 430, "total_steps": 4403, "loss": 0.2878, "lr": 3.891156462585034e-05, "epoch": 0.6836248012718601, "percentage": 9.77, "elapsed_time": "0:43:10", "remaining_time": "6:38:55"}
{"current_steps": 435, "total_steps": 4403, "loss": 0.2873, "lr": 3.936507936507937e-05, "epoch": 0.6915739268680445, "percentage": 9.88, "elapsed_time": "0:43:37", "remaining_time": "6:37:54"}
{"current_steps": 440, "total_steps": 4403, "loss": 0.2676, "lr": 3.9818594104308396e-05, "epoch": 0.699523052464229, "percentage": 9.99, "elapsed_time": "0:44:05", "remaining_time": "6:37:10"}
{"current_steps": 445, "total_steps": 4403, "loss": 0.2828, "lr": 3.999994341346418e-05, "epoch": 0.7074721780604134, "percentage": 10.11, "elapsed_time": "0:44:36", "remaining_time": "6:36:45"}
{"current_steps": 450, "total_steps": 4403, "loss": 0.2832, "lr": 3.999959760801596e-05, "epoch": 0.7154213036565977, "percentage": 10.22, "elapsed_time": "0:45:09", "remaining_time": "6:36:38"}
{"current_steps": 455, "total_steps": 4403, "loss": 0.2627, "lr": 3.999893743951281e-05, "epoch": 0.7233704292527822, "percentage": 10.33, "elapsed_time": "0:45:36", "remaining_time": "6:35:45"}
{"current_steps": 460, "total_steps": 4403, "loss": 0.2959, "lr": 3.9997962918331554e-05, "epoch": 0.7313195548489666, "percentage": 10.45, "elapsed_time": "0:46:03", "remaining_time": "6:34:44"}
{"current_steps": 465, "total_steps": 4403, "loss": 0.2829, "lr": 3.999667405979019e-05, "epoch": 0.739268680445151, "percentage": 10.56, "elapsed_time": "0:46:30", "remaining_time": "6:33:53"}
{"current_steps": 470, "total_steps": 4403, "loss": 0.2983, "lr": 3.9995070884147604e-05, "epoch": 0.7472178060413355, "percentage": 10.67, "elapsed_time": "0:47:02", "remaining_time": "6:33:37"}
{"current_steps": 475, "total_steps": 4403, "loss": 0.2772, "lr": 3.999315341660325e-05, "epoch": 0.7551669316375199, "percentage": 10.79, "elapsed_time": "0:47:32", "remaining_time": "6:33:09"}
{"current_steps": 480, "total_steps": 4403, "loss": 0.2732, "lr": 3.9990921687296785e-05, "epoch": 0.7631160572337043, "percentage": 10.9, "elapsed_time": "0:48:00", "remaining_time": "6:32:19"}
{"current_steps": 485, "total_steps": 4403, "loss": 0.2833, "lr": 3.998837573130758e-05, "epoch": 0.7710651828298887, "percentage": 11.02, "elapsed_time": "0:48:28", "remaining_time": "6:31:36"}
{"current_steps": 490, "total_steps": 4403, "loss": 0.2696, "lr": 3.9985515588654166e-05, "epoch": 0.7790143084260731, "percentage": 11.13, "elapsed_time": "0:48:56", "remaining_time": "6:30:51"}
{"current_steps": 495, "total_steps": 4403, "loss": 0.2792, "lr": 3.99823413042936e-05, "epoch": 0.7869634340222575, "percentage": 11.24, "elapsed_time": "0:49:24", "remaining_time": "6:30:05"}
{"current_steps": 500, "total_steps": 4403, "loss": 0.2836, "lr": 3.997885292812078e-05, "epoch": 0.794912559618442, "percentage": 11.36, "elapsed_time": "0:49:52", "remaining_time": "6:29:23"}
{"current_steps": 505, "total_steps": 4403, "loss": 0.2784, "lr": 3.997505051496764e-05, "epoch": 0.8028616852146264, "percentage": 11.47, "elapsed_time": "0:50:23", "remaining_time": "6:29:00"}
{"current_steps": 510, "total_steps": 4403, "loss": 0.2719, "lr": 3.997093412460229e-05, "epoch": 0.8108108108108109, "percentage": 11.58, "elapsed_time": "0:50:54", "remaining_time": "6:28:35"}
{"current_steps": 515, "total_steps": 4403, "loss": 0.262, "lr": 3.9966503821728074e-05, "epoch": 0.8187599364069952, "percentage": 11.7, "elapsed_time": "0:51:17", "remaining_time": "6:27:11"}
{"current_steps": 520, "total_steps": 4403, "loss": 0.2577, "lr": 3.996175967598258e-05, "epoch": 0.8267090620031796, "percentage": 11.81, "elapsed_time": "0:51:44", "remaining_time": "6:26:24"}
{"current_steps": 525, "total_steps": 4403, "loss": 0.2598, "lr": 3.995670176193651e-05, "epoch": 0.834658187599364, "percentage": 11.92, "elapsed_time": "0:52:13", "remaining_time": "6:25:44"}
{"current_steps": 530, "total_steps": 4403, "loss": 0.2692, "lr": 3.9951330159092554e-05, "epoch": 0.8426073131955485, "percentage": 12.04, "elapsed_time": "0:52:44", "remaining_time": "6:25:22"}
{"current_steps": 535, "total_steps": 4403, "loss": 0.2526, "lr": 3.994564495188405e-05, "epoch": 0.8505564387917329, "percentage": 12.15, "elapsed_time": "0:53:13", "remaining_time": "6:24:50"}
{"current_steps": 540, "total_steps": 4403, "loss": 0.2804, "lr": 3.9939646229673775e-05, "epoch": 0.8585055643879174, "percentage": 12.26, "elapsed_time": "0:53:43", "remaining_time": "6:24:21"}
{"current_steps": 545, "total_steps": 4403, "loss": 0.2735, "lr": 3.993333408675244e-05, "epoch": 0.8664546899841018, "percentage": 12.38, "elapsed_time": "0:54:12", "remaining_time": "6:23:44"}
{"current_steps": 550, "total_steps": 4403, "loss": 0.2851, "lr": 3.9926708622337285e-05, "epoch": 0.8744038155802861, "percentage": 12.49, "elapsed_time": "0:54:38", "remaining_time": "6:22:49"}
{"current_steps": 555, "total_steps": 4403, "loss": 0.2848, "lr": 3.991976994057046e-05, "epoch": 0.8823529411764706, "percentage": 12.61, "elapsed_time": "0:55:13", "remaining_time": "6:22:51"}
{"current_steps": 560, "total_steps": 4403, "loss": 0.2616, "lr": 3.991251815051741e-05, "epoch": 0.890302066772655, "percentage": 12.72, "elapsed_time": "0:55:42", "remaining_time": "6:22:20"}
{"current_steps": 565, "total_steps": 4403, "loss": 0.2738, "lr": 3.990495336616519e-05, "epoch": 0.8982511923688394, "percentage": 12.83, "elapsed_time": "0:56:13", "remaining_time": "6:21:54"}
{"current_steps": 570, "total_steps": 4403, "loss": 0.2784, "lr": 3.989707570642062e-05, "epoch": 0.9062003179650239, "percentage": 12.95, "elapsed_time": "0:56:37", "remaining_time": "6:20:44"}
{"current_steps": 575, "total_steps": 4403, "loss": 0.2529, "lr": 3.988888529510844e-05, "epoch": 0.9141494435612083, "percentage": 13.06, "elapsed_time": "0:57:07", "remaining_time": "6:20:15"}
{"current_steps": 580, "total_steps": 4403, "loss": 0.2671, "lr": 3.988038226096939e-05, "epoch": 0.9220985691573926, "percentage": 13.17, "elapsed_time": "0:57:34", "remaining_time": "6:19:32"}
{"current_steps": 585, "total_steps": 4403, "loss": 0.2692, "lr": 3.9871566737658144e-05, "epoch": 0.9300476947535771, "percentage": 13.29, "elapsed_time": "0:58:06", "remaining_time": "6:19:13"}
{"current_steps": 590, "total_steps": 4403, "loss": 0.2698, "lr": 3.986243886374124e-05, "epoch": 0.9379968203497615, "percentage": 13.4, "elapsed_time": "0:58:34", "remaining_time": "6:18:30"}
{"current_steps": 595, "total_steps": 4403, "loss": 0.2714, "lr": 3.985299878269486e-05, "epoch": 0.9459459459459459, "percentage": 13.51, "elapsed_time": "0:59:06", "remaining_time": "6:18:19"}
{"current_steps": 600, "total_steps": 4403, "loss": 0.278, "lr": 3.9843246642902646e-05, "epoch": 0.9538950715421304, "percentage": 13.63, "elapsed_time": "0:59:36", "remaining_time": "6:17:48"}
{"current_steps": 605, "total_steps": 4403, "loss": 0.2481, "lr": 3.98331825976533e-05, "epoch": 0.9618441971383148, "percentage": 13.74, "elapsed_time": "1:00:04", "remaining_time": "6:17:09"}
{"current_steps": 610, "total_steps": 4403, "loss": 0.2815, "lr": 3.98228068051382e-05, "epoch": 0.9697933227344993, "percentage": 13.85, "elapsed_time": "1:00:34", "remaining_time": "6:16:41"}
{"current_steps": 615, "total_steps": 4403, "loss": 0.2511, "lr": 3.9812119428448926e-05, "epoch": 0.9777424483306836, "percentage": 13.97, "elapsed_time": "1:01:06", "remaining_time": "6:16:24"}
{"current_steps": 620, "total_steps": 4403, "loss": 0.2772, "lr": 3.9801120635574664e-05, "epoch": 0.985691573926868, "percentage": 14.08, "elapsed_time": "1:01:33", "remaining_time": "6:15:35"}
{"current_steps": 625, "total_steps": 4403, "loss": 0.264, "lr": 3.978981059939961e-05, "epoch": 0.9936406995230525, "percentage": 14.19, "elapsed_time": "1:02:04", "remaining_time": "6:15:14"}
{"current_steps": 630, "total_steps": 4403, "loss": 0.2773, "lr": 3.977818949770022e-05, "epoch": 1.0015898251192368, "percentage": 14.31, "elapsed_time": "1:02:30", "remaining_time": "6:14:18"}
{"current_steps": 635, "total_steps": 4403, "loss": 0.2623, "lr": 3.976625751314241e-05, "epoch": 1.0095389507154213, "percentage": 14.42, "elapsed_time": "1:02:59", "remaining_time": "6:13:48"}
{"current_steps": 640, "total_steps": 4403, "loss": 0.2412, "lr": 3.975401483327871e-05, "epoch": 1.0174880763116056, "percentage": 14.54, "elapsed_time": "1:03:25", "remaining_time": "6:12:57"}
{"current_steps": 645, "total_steps": 4403, "loss": 0.2504, "lr": 3.974146165054532e-05, "epoch": 1.0254372019077902, "percentage": 14.65, "elapsed_time": "1:03:54", "remaining_time": "6:12:23"}
{"current_steps": 650, "total_steps": 4403, "loss": 0.2476, "lr": 3.972859816225904e-05, "epoch": 1.0333863275039745, "percentage": 14.76, "elapsed_time": "1:04:22", "remaining_time": "6:11:38"}
{"current_steps": 655, "total_steps": 4403, "loss": 0.2655, "lr": 3.97154245706142e-05, "epoch": 1.041335453100159, "percentage": 14.88, "elapsed_time": "1:04:50", "remaining_time": "6:10:59"}
{"current_steps": 660, "total_steps": 4403, "loss": 0.257, "lr": 3.970194108267952e-05, "epoch": 1.0492845786963434, "percentage": 14.99, "elapsed_time": "1:05:22", "remaining_time": "6:10:47"}
{"current_steps": 665, "total_steps": 4403, "loss": 0.263, "lr": 3.968814791039477e-05, "epoch": 1.0572337042925277, "percentage": 15.1, "elapsed_time": "1:05:55", "remaining_time": "6:10:31"}
{"current_steps": 670, "total_steps": 4403, "loss": 0.2442, "lr": 3.967404527056751e-05, "epoch": 1.0651828298887123, "percentage": 15.22, "elapsed_time": "1:06:23", "remaining_time": "6:09:56"}
{"current_steps": 675, "total_steps": 4403, "loss": 0.2678, "lr": 3.9659633384869626e-05, "epoch": 1.0731319554848966, "percentage": 15.33, "elapsed_time": "1:06:59", "remaining_time": "6:09:58"}
{"current_steps": 680, "total_steps": 4403, "loss": 0.2553, "lr": 3.964491247983392e-05, "epoch": 1.0810810810810811, "percentage": 15.44, "elapsed_time": "1:07:30", "remaining_time": "6:09:36"}
{"current_steps": 685, "total_steps": 4403, "loss": 0.2562, "lr": 3.962988278685047e-05, "epoch": 1.0890302066772655, "percentage": 15.56, "elapsed_time": "1:07:59", "remaining_time": "6:09:00"}
{"current_steps": 690, "total_steps": 4403, "loss": 0.2285, "lr": 3.961454454216305e-05, "epoch": 1.09697933227345, "percentage": 15.67, "elapsed_time": "1:08:25", "remaining_time": "6:08:13"}
{"current_steps": 695, "total_steps": 4403, "loss": 0.2548, "lr": 3.9598897986865364e-05, "epoch": 1.1049284578696343, "percentage": 15.78, "elapsed_time": "1:08:53", "remaining_time": "6:07:35"}
{"current_steps": 700, "total_steps": 4403, "loss": 0.2475, "lr": 3.9582943366897316e-05, "epoch": 1.1128775834658187, "percentage": 15.9, "elapsed_time": "1:09:22", "remaining_time": "6:06:59"}
{"current_steps": 705, "total_steps": 4403, "loss": 0.2591, "lr": 3.956668093304112e-05, "epoch": 1.1208267090620032, "percentage": 16.01, "elapsed_time": "1:09:52", "remaining_time": "6:06:31"}
{"current_steps": 710, "total_steps": 4403, "loss": 0.2428, "lr": 3.9550110940917313e-05, "epoch": 1.1287758346581875, "percentage": 16.13, "elapsed_time": "1:10:22", "remaining_time": "6:06:02"}
{"current_steps": 715, "total_steps": 4403, "loss": 0.2511, "lr": 3.953323365098082e-05, "epoch": 1.136724960254372, "percentage": 16.24, "elapsed_time": "1:10:50", "remaining_time": "6:05:26"}
{"current_steps": 720, "total_steps": 4403, "loss": 0.2423, "lr": 3.9516049328516795e-05, "epoch": 1.1446740858505564, "percentage": 16.35, "elapsed_time": "1:11:19", "remaining_time": "6:04:51"}
{"current_steps": 725, "total_steps": 4403, "loss": 0.239, "lr": 3.949855824363647e-05, "epoch": 1.1526232114467407, "percentage": 16.47, "elapsed_time": "1:11:49", "remaining_time": "6:04:24"}
{"current_steps": 730, "total_steps": 4403, "loss": 0.2408, "lr": 3.948076067127294e-05, "epoch": 1.1605723370429253, "percentage": 16.58, "elapsed_time": "1:12:19", "remaining_time": "6:03:55"}
{"current_steps": 735, "total_steps": 4403, "loss": 0.2467, "lr": 3.946265689117677e-05, "epoch": 1.1685214626391096, "percentage": 16.69, "elapsed_time": "1:12:45", "remaining_time": "6:03:06"}
{"current_steps": 740, "total_steps": 4403, "loss": 0.2642, "lr": 3.944424718791169e-05, "epoch": 1.1764705882352942, "percentage": 16.81, "elapsed_time": "1:13:16", "remaining_time": "6:02:40"}
{"current_steps": 745, "total_steps": 4403, "loss": 0.2347, "lr": 3.942553185085003e-05, "epoch": 1.1844197138314785, "percentage": 16.92, "elapsed_time": "1:13:45", "remaining_time": "6:02:07"}
{"current_steps": 750, "total_steps": 4403, "loss": 0.2415, "lr": 3.940651117416824e-05, "epoch": 1.192368839427663, "percentage": 17.03, "elapsed_time": "1:14:16", "remaining_time": "6:01:44"}
{"current_steps": 755, "total_steps": 4403, "loss": 0.2463, "lr": 3.9387185456842247e-05, "epoch": 1.2003179650238474, "percentage": 17.15, "elapsed_time": "1:14:43", "remaining_time": "6:01:04"}
{"current_steps": 760, "total_steps": 4403, "loss": 0.2653, "lr": 3.936755500264274e-05, "epoch": 1.2082670906200317, "percentage": 17.26, "elapsed_time": "1:15:12", "remaining_time": "6:00:31"}
{"current_steps": 765, "total_steps": 4403, "loss": 0.2491, "lr": 3.9347620120130384e-05, "epoch": 1.2162162162162162, "percentage": 17.37, "elapsed_time": "1:15:43", "remaining_time": "6:00:07"}
{"current_steps": 770, "total_steps": 4403, "loss": 0.2669, "lr": 3.932738112265103e-05, "epoch": 1.2241653418124006, "percentage": 17.49, "elapsed_time": "1:16:07", "remaining_time": "5:59:12"}
{"current_steps": 775, "total_steps": 4403, "loss": 0.2472, "lr": 3.930683832833073e-05, "epoch": 1.232114467408585, "percentage": 17.6, "elapsed_time": "1:16:37", "remaining_time": "5:58:40"}
{"current_steps": 780, "total_steps": 4403, "loss": 0.2317, "lr": 3.928599206007076e-05, "epoch": 1.2400635930047694, "percentage": 17.72, "elapsed_time": "1:16:59", "remaining_time": "5:57:38"}
{"current_steps": 785, "total_steps": 4403, "loss": 0.2347, "lr": 3.926484264554253e-05, "epoch": 1.248012718600954, "percentage": 17.83, "elapsed_time": "1:17:29", "remaining_time": "5:57:10"}
{"current_steps": 790, "total_steps": 4403, "loss": 0.2528, "lr": 3.924339041718247e-05, "epoch": 1.2559618441971383, "percentage": 17.94, "elapsed_time": "1:17:55", "remaining_time": "5:56:24"}
{"current_steps": 795, "total_steps": 4403, "loss": 0.2377, "lr": 3.922163571218676e-05, "epoch": 1.2639109697933226, "percentage": 18.06, "elapsed_time": "1:18:25", "remaining_time": "5:55:56"}
{"current_steps": 800, "total_steps": 4403, "loss": 0.2574, "lr": 3.919957887250606e-05, "epoch": 1.2718600953895072, "percentage": 18.17, "elapsed_time": "1:18:54", "remaining_time": "5:55:24"}
{"current_steps": 805, "total_steps": 4403, "loss": 0.2543, "lr": 3.917722024484011e-05, "epoch": 1.2798092209856915, "percentage": 18.28, "elapsed_time": "1:19:16", "remaining_time": "5:54:19"}
{"current_steps": 810, "total_steps": 4403, "loss": 0.241, "lr": 3.915456018063232e-05, "epoch": 1.287758346581876, "percentage": 18.4, "elapsed_time": "1:19:42", "remaining_time": "5:53:34"}
{"current_steps": 815, "total_steps": 4403, "loss": 0.2469, "lr": 3.9131599036064204e-05, "epoch": 1.2957074721780604, "percentage": 18.51, "elapsed_time": "1:20:09", "remaining_time": "5:52:55"}
{"current_steps": 820, "total_steps": 4403, "loss": 0.2492, "lr": 3.9108337172049794e-05, "epoch": 1.303656597774245, "percentage": 18.62, "elapsed_time": "1:20:41", "remaining_time": "5:52:34"}
{"current_steps": 825, "total_steps": 4403, "loss": 0.2865, "lr": 3.908477495422998e-05, "epoch": 1.3116057233704292, "percentage": 18.74, "elapsed_time": "1:21:09", "remaining_time": "5:51:57"}
{"current_steps": 830, "total_steps": 4403, "loss": 0.2417, "lr": 3.906091275296676e-05, "epoch": 1.3195548489666136, "percentage": 18.85, "elapsed_time": "1:21:38", "remaining_time": "5:51:28"}
{"current_steps": 835, "total_steps": 4403, "loss": 0.2326, "lr": 3.903675094333739e-05, "epoch": 1.3275039745627981, "percentage": 18.96, "elapsed_time": "1:22:09", "remaining_time": "5:51:02"}
{"current_steps": 840, "total_steps": 4403, "loss": 0.2293, "lr": 3.901228990512854e-05, "epoch": 1.3354531001589824, "percentage": 19.08, "elapsed_time": "1:22:34", "remaining_time": "5:50:15"}
{"current_steps": 845, "total_steps": 4403, "loss": 0.2672, "lr": 3.898753002283027e-05, "epoch": 1.343402225755167, "percentage": 19.19, "elapsed_time": "1:23:02", "remaining_time": "5:49:38"}
{"current_steps": 850, "total_steps": 4403, "loss": 0.2596, "lr": 3.896247168563004e-05, "epoch": 1.3513513513513513, "percentage": 19.31, "elapsed_time": "1:23:37", "remaining_time": "5:49:32"}
{"current_steps": 855, "total_steps": 4403, "loss": 0.2408, "lr": 3.8937115287406524e-05, "epoch": 1.3593004769475359, "percentage": 19.42, "elapsed_time": "1:24:04", "remaining_time": "5:48:54"}
{"current_steps": 860, "total_steps": 4403, "loss": 0.2563, "lr": 3.891146122672349e-05, "epoch": 1.3672496025437202, "percentage": 19.53, "elapsed_time": "1:24:37", "remaining_time": "5:48:39"}
{"current_steps": 865, "total_steps": 4403, "loss": 0.2321, "lr": 3.8885509906823496e-05, "epoch": 1.3751987281399045, "percentage": 19.65, "elapsed_time": "1:25:05", "remaining_time": "5:48:04"}
{"current_steps": 870, "total_steps": 4403, "loss": 0.2735, "lr": 3.885926173562157e-05, "epoch": 1.383147853736089, "percentage": 19.76, "elapsed_time": "1:25:36", "remaining_time": "5:47:40"}
{"current_steps": 875, "total_steps": 4403, "loss": 0.2223, "lr": 3.883271712569875e-05, "epoch": 1.3910969793322734, "percentage": 19.87, "elapsed_time": "1:26:09", "remaining_time": "5:47:23"}
{"current_steps": 880, "total_steps": 4403, "loss": 0.2665, "lr": 3.8805876494295694e-05, "epoch": 1.399046104928458, "percentage": 19.99, "elapsed_time": "1:26:37", "remaining_time": "5:46:49"}
{"current_steps": 885, "total_steps": 4403, "loss": 0.2351, "lr": 3.877874026330602e-05, "epoch": 1.4069952305246423, "percentage": 20.1, "elapsed_time": "1:27:07", "remaining_time": "5:46:19"}
{"current_steps": 890, "total_steps": 4403, "loss": 0.2224, "lr": 3.875130885926973e-05, "epoch": 1.4149443561208268, "percentage": 20.21, "elapsed_time": "1:27:37", "remaining_time": "5:45:52"}
{"current_steps": 895, "total_steps": 4403, "loss": 0.2541, "lr": 3.872358271336651e-05, "epoch": 1.4228934817170111, "percentage": 20.33, "elapsed_time": "1:28:06", "remaining_time": "5:45:20"}
{"current_steps": 900, "total_steps": 4403, "loss": 0.2221, "lr": 3.8695562261408915e-05, "epoch": 1.4308426073131955, "percentage": 20.44, "elapsed_time": "1:28:35", "remaining_time": "5:44:49"}
{"current_steps": 905, "total_steps": 4403, "loss": 0.2427, "lr": 3.8667247943835555e-05, "epoch": 1.43879173290938, "percentage": 20.55, "elapsed_time": "1:29:05", "remaining_time": "5:44:22"}
{"current_steps": 910, "total_steps": 4403, "loss": 0.2356, "lr": 3.863864020570414e-05, "epoch": 1.4467408585055643, "percentage": 20.67, "elapsed_time": "1:29:35", "remaining_time": "5:43:52"}
{"current_steps": 915, "total_steps": 4403, "loss": 0.2353, "lr": 3.860973949668454e-05, "epoch": 1.4546899841017489, "percentage": 20.78, "elapsed_time": "1:30:02", "remaining_time": "5:43:14"}
{"current_steps": 920, "total_steps": 4403, "loss": 0.2221, "lr": 3.8580546271051634e-05, "epoch": 1.4626391096979332, "percentage": 20.89, "elapsed_time": "1:30:34", "remaining_time": "5:42:54"}
{"current_steps": 925, "total_steps": 4403, "loss": 0.2444, "lr": 3.8551060987678236e-05, "epoch": 1.4705882352941178, "percentage": 21.01, "elapsed_time": "1:31:04", "remaining_time": "5:42:25"}
{"current_steps": 930, "total_steps": 4403, "loss": 0.2453, "lr": 3.852128411002787e-05, "epoch": 1.478537360890302, "percentage": 21.12, "elapsed_time": "1:31:33", "remaining_time": "5:41:53"}
{"current_steps": 935, "total_steps": 4403, "loss": 0.2453, "lr": 3.849121610614745e-05, "epoch": 1.4864864864864864, "percentage": 21.24, "elapsed_time": "1:31:57", "remaining_time": "5:41:04"}
{"current_steps": 940, "total_steps": 4403, "loss": 0.2494, "lr": 3.8460857448659975e-05, "epoch": 1.494435612082671, "percentage": 21.35, "elapsed_time": "1:32:25", "remaining_time": "5:40:29"}
{"current_steps": 945, "total_steps": 4403, "loss": 0.2531, "lr": 3.8430208614757044e-05, "epoch": 1.5023847376788553, "percentage": 21.46, "elapsed_time": "1:32:54", "remaining_time": "5:40:00"}
{"current_steps": 950, "total_steps": 4403, "loss": 0.2543, "lr": 3.8399270086191425e-05, "epoch": 1.5103338632750396, "percentage": 21.58, "elapsed_time": "1:33:26", "remaining_time": "5:39:37"}
{"current_steps": 955, "total_steps": 4403, "loss": 0.2558, "lr": 3.8368042349269405e-05, "epoch": 1.5182829888712241, "percentage": 21.69, "elapsed_time": "1:33:52", "remaining_time": "5:38:57"}
{"current_steps": 960, "total_steps": 4403, "loss": 0.2423, "lr": 3.83365258948432e-05, "epoch": 1.5262321144674087, "percentage": 21.8, "elapsed_time": "1:34:21", "remaining_time": "5:38:23"}
{"current_steps": 965, "total_steps": 4403, "loss": 0.2313, "lr": 3.830472121830323e-05, "epoch": 1.534181240063593, "percentage": 21.92, "elapsed_time": "1:34:48", "remaining_time": "5:37:45"}
{"current_steps": 970, "total_steps": 4403, "loss": 0.2442, "lr": 3.82726288195703e-05, "epoch": 1.5421303656597773, "percentage": 22.03, "elapsed_time": "1:35:22", "remaining_time": "5:37:32"}
{"current_steps": 975, "total_steps": 4403, "loss": 0.2356, "lr": 3.824024920308781e-05, "epoch": 1.550079491255962, "percentage": 22.14, "elapsed_time": "1:35:52", "remaining_time": "5:37:05"}
{"current_steps": 980, "total_steps": 4403, "loss": 0.2538, "lr": 3.820758287781374e-05, "epoch": 1.5580286168521462, "percentage": 22.26, "elapsed_time": "1:36:21", "remaining_time": "5:36:34"}
{"current_steps": 985, "total_steps": 4403, "loss": 0.2408, "lr": 3.8174630357212714e-05, "epoch": 1.5659777424483305, "percentage": 22.37, "elapsed_time": "1:36:50", "remaining_time": "5:36:02"}
{"current_steps": 990, "total_steps": 4403, "loss": 0.227, "lr": 3.8141392159247905e-05, "epoch": 1.573926868044515, "percentage": 22.48, "elapsed_time": "1:37:19", "remaining_time": "5:35:30"}
{"current_steps": 995, "total_steps": 4403, "loss": 0.2461, "lr": 3.81078688063729e-05, "epoch": 1.5818759936406996, "percentage": 22.6, "elapsed_time": "1:37:46", "remaining_time": "5:34:54"}
{"current_steps": 1000, "total_steps": 4403, "loss": 0.2406, "lr": 3.807406082552348e-05, "epoch": 1.589825119236884, "percentage": 22.71, "elapsed_time": "1:38:16", "remaining_time": "5:34:26"}
{"current_steps": 1005, "total_steps": 4403, "loss": 0.2438, "lr": 3.803996874810934e-05, "epoch": 1.5977742448330683, "percentage": 22.83, "elapsed_time": "1:38:48", "remaining_time": "5:34:04"}
{"current_steps": 1010, "total_steps": 4403, "loss": 0.2533, "lr": 3.800559311000575e-05, "epoch": 1.6057233704292528, "percentage": 22.94, "elapsed_time": "1:39:15", "remaining_time": "5:33:25"}
{"current_steps": 1015, "total_steps": 4403, "loss": 0.2231, "lr": 3.7970934451545104e-05, "epoch": 1.6136724960254372, "percentage": 23.05, "elapsed_time": "1:39:46", "remaining_time": "5:33:03"}
{"current_steps": 1020, "total_steps": 4403, "loss": 0.2418, "lr": 3.7935993317508455e-05, "epoch": 1.6216216216216215, "percentage": 23.17, "elapsed_time": "1:40:14", "remaining_time": "5:32:26"}
{"current_steps": 1025, "total_steps": 4403, "loss": 0.2457, "lr": 3.790077025711694e-05, "epoch": 1.629570747217806, "percentage": 23.28, "elapsed_time": "1:40:40", "remaining_time": "5:31:47"}
{"current_steps": 1030, "total_steps": 4403, "loss": 0.2448, "lr": 3.786526582402313e-05, "epoch": 1.6375198728139906, "percentage": 23.39, "elapsed_time": "1:41:13", "remaining_time": "5:31:28"}
{"current_steps": 1035, "total_steps": 4403, "loss": 0.2558, "lr": 3.782948057630236e-05, "epoch": 1.645468998410175, "percentage": 23.51, "elapsed_time": "1:41:40", "remaining_time": "5:30:50"}
{"current_steps": 1040, "total_steps": 4403, "loss": 0.2636, "lr": 3.779341507644394e-05, "epoch": 1.6534181240063592, "percentage": 23.62, "elapsed_time": "1:42:12", "remaining_time": "5:30:30"}
{"current_steps": 1045, "total_steps": 4403, "loss": 0.2614, "lr": 3.775706989134231e-05, "epoch": 1.6613672496025438, "percentage": 23.73, "elapsed_time": "1:42:48", "remaining_time": "5:30:21"}
{"current_steps": 1050, "total_steps": 4403, "loss": 0.2392, "lr": 3.772044559228813e-05, "epoch": 1.669316375198728, "percentage": 23.85, "elapsed_time": "1:43:14", "remaining_time": "5:29:42"}
{"current_steps": 1055, "total_steps": 4403, "loss": 0.2353, "lr": 3.768354275495933e-05, "epoch": 1.6772655007949124, "percentage": 23.96, "elapsed_time": "1:43:41", "remaining_time": "5:29:05"}
{"current_steps": 1060, "total_steps": 4403, "loss": 0.2417, "lr": 3.764636195941198e-05, "epoch": 1.685214626391097, "percentage": 24.07, "elapsed_time": "1:44:13", "remaining_time": "5:28:42"}
{"current_steps": 1065, "total_steps": 4403, "loss": 0.2349, "lr": 3.760890379007129e-05, "epoch": 1.6931637519872815, "percentage": 24.19, "elapsed_time": "1:44:43", "remaining_time": "5:28:14"}
{"current_steps": 1070, "total_steps": 4403, "loss": 0.2372, "lr": 3.757116883572232e-05, "epoch": 1.7011128775834659, "percentage": 24.3, "elapsed_time": "1:45:15", "remaining_time": "5:27:53"}
{"current_steps": 1075, "total_steps": 4403, "loss": 0.2584, "lr": 3.753315768950079e-05, "epoch": 1.7090620031796502, "percentage": 24.42, "elapsed_time": "1:45:42", "remaining_time": "5:27:15"}
{"current_steps": 1080, "total_steps": 4403, "loss": 0.2386, "lr": 3.74948709488837e-05, "epoch": 1.7170111287758347, "percentage": 24.53, "elapsed_time": "1:46:11", "remaining_time": "5:26:44"}
{"current_steps": 1085, "total_steps": 4403, "loss": 0.2448, "lr": 3.745630921568004e-05, "epoch": 1.724960254372019, "percentage": 24.64, "elapsed_time": "1:46:41", "remaining_time": "5:26:14"}
{"current_steps": 1090, "total_steps": 4403, "loss": 0.2444, "lr": 3.741747309602117e-05, "epoch": 1.7329093799682034, "percentage": 24.76, "elapsed_time": "1:47:10", "remaining_time": "5:25:45"}
{"current_steps": 1095, "total_steps": 4403, "loss": 0.2674, "lr": 3.737836320035146e-05, "epoch": 1.740858505564388, "percentage": 24.87, "elapsed_time": "1:47:34", "remaining_time": "5:25:00"}
{"current_steps": 1100, "total_steps": 4403, "loss": 0.239, "lr": 3.733898014341858e-05, "epoch": 1.7488076311605725, "percentage": 24.98, "elapsed_time": "1:48:02", "remaining_time": "5:24:26"}
{"current_steps": 1105, "total_steps": 4403, "loss": 0.2351, "lr": 3.729932454426391e-05, "epoch": 1.7567567567567568, "percentage": 25.1, "elapsed_time": "1:48:32", "remaining_time": "5:23:56"}
{"current_steps": 1110, "total_steps": 4403, "loss": 0.241, "lr": 3.725939702621273e-05, "epoch": 1.7647058823529411, "percentage": 25.21, "elapsed_time": "1:49:00", "remaining_time": "5:23:22"}
{"current_steps": 1115, "total_steps": 4403, "loss": 0.2263, "lr": 3.72191982168645e-05, "epoch": 1.7726550079491257, "percentage": 25.32, "elapsed_time": "1:49:28", "remaining_time": "5:22:48"}
{"current_steps": 1120, "total_steps": 4403, "loss": 0.2287, "lr": 3.717872874808298e-05, "epoch": 1.78060413354531, "percentage": 25.44, "elapsed_time": "1:49:57", "remaining_time": "5:22:18"}
{"current_steps": 1125, "total_steps": 4403, "loss": 0.2306, "lr": 3.713798925598623e-05, "epoch": 1.7885532591414943, "percentage": 25.55, "elapsed_time": "1:50:27", "remaining_time": "5:21:51"}
{"current_steps": 1130, "total_steps": 4403, "loss": 0.2299, "lr": 3.709698038093671e-05, "epoch": 1.7965023847376789, "percentage": 25.66, "elapsed_time": "1:50:51", "remaining_time": "5:21:06"}
{"current_steps": 1135, "total_steps": 4403, "loss": 0.2326, "lr": 3.705570276753116e-05, "epoch": 1.8044515103338634, "percentage": 25.78, "elapsed_time": "1:51:19", "remaining_time": "5:20:33"}
{"current_steps": 1140, "total_steps": 4403, "loss": 0.2581, "lr": 3.701415706459044e-05, "epoch": 1.8124006359300477, "percentage": 25.89, "elapsed_time": "1:51:49", "remaining_time": "5:20:04"}
{"current_steps": 1145, "total_steps": 4403, "loss": 0.2344, "lr": 3.697234392514942e-05, "epoch": 1.820349761526232, "percentage": 26.0, "elapsed_time": "1:52:20", "remaining_time": "5:19:38"}
{"current_steps": 1150, "total_steps": 4403, "loss": 0.2341, "lr": 3.693026400644662e-05, "epoch": 1.8282988871224166, "percentage": 26.12, "elapsed_time": "1:52:47", "remaining_time": "5:19:03"}
{"current_steps": 1155, "total_steps": 4403, "loss": 0.2352, "lr": 3.6887917969913944e-05, "epoch": 1.836248012718601, "percentage": 26.23, "elapsed_time": "1:53:20", "remaining_time": "5:18:43"}
{"current_steps": 1160, "total_steps": 4403, "loss": 0.2451, "lr": 3.684530648116625e-05, "epoch": 1.8441971383147853, "percentage": 26.35, "elapsed_time": "1:53:55", "remaining_time": "5:18:29"}
{"current_steps": 1165, "total_steps": 4403, "loss": 0.2429, "lr": 3.68024302099909e-05, "epoch": 1.8521462639109698, "percentage": 26.46, "elapsed_time": "1:54:25", "remaining_time": "5:18:01"}
{"current_steps": 1170, "total_steps": 4403, "loss": 0.2663, "lr": 3.6759289830337246e-05, "epoch": 1.8600953895071544, "percentage": 26.57, "elapsed_time": "1:54:56", "remaining_time": "5:17:36"}
{"current_steps": 1175, "total_steps": 4403, "loss": 0.2586, "lr": 3.6715886020306e-05, "epoch": 1.8680445151033387, "percentage": 26.69, "elapsed_time": "1:55:23", "remaining_time": "5:17:01"}
{"current_steps": 1180, "total_steps": 4403, "loss": 0.2302, "lr": 3.6672219462138604e-05, "epoch": 1.875993640699523, "percentage": 26.8, "elapsed_time": "1:55:54", "remaining_time": "5:16:35"}
{"current_steps": 1185, "total_steps": 4403, "loss": 0.2273, "lr": 3.6628290842206495e-05, "epoch": 1.8839427662957076, "percentage": 26.91, "elapsed_time": "1:56:23", "remaining_time": "5:16:05"}
{"current_steps": 1190, "total_steps": 4403, "loss": 0.2362, "lr": 3.658410085100034e-05, "epoch": 1.8918918918918919, "percentage": 27.03, "elapsed_time": "1:56:54", "remaining_time": "5:15:37"}
{"current_steps": 1195, "total_steps": 4403, "loss": 0.2305, "lr": 3.6539650183119126e-05, "epoch": 1.8998410174880762, "percentage": 27.14, "elapsed_time": "1:57:24", "remaining_time": "5:15:10"}
{"current_steps": 1200, "total_steps": 4403, "loss": 0.2482, "lr": 3.64949395372593e-05, "epoch": 1.9077901430842608, "percentage": 27.25, "elapsed_time": "1:57:52", "remaining_time": "5:14:36"}
{"current_steps": 1205, "total_steps": 4403, "loss": 0.2512, "lr": 3.644996961620378e-05, "epoch": 1.9157392686804453, "percentage": 27.37, "elapsed_time": "1:58:18", "remaining_time": "5:13:59"}
{"current_steps": 1210, "total_steps": 4403, "loss": 0.2466, "lr": 3.6404741126810854e-05, "epoch": 1.9236883942766294, "percentage": 27.48, "elapsed_time": "1:58:46", "remaining_time": "5:13:24"}
{"current_steps": 1215, "total_steps": 4403, "loss": 0.2421, "lr": 3.635925478000315e-05, "epoch": 1.931637519872814, "percentage": 27.59, "elapsed_time": "1:59:16", "remaining_time": "5:12:57"}
{"current_steps": 1220, "total_steps": 4403, "loss": 0.2343, "lr": 3.631351129075638e-05, "epoch": 1.9395866454689985, "percentage": 27.71, "elapsed_time": "1:59:46", "remaining_time": "5:12:29"}
{"current_steps": 1225, "total_steps": 4403, "loss": 0.2276, "lr": 3.6267511378088174e-05, "epoch": 1.9475357710651828, "percentage": 27.82, "elapsed_time": "2:00:12", "remaining_time": "5:11:51"}
{"current_steps": 1230, "total_steps": 4403, "loss": 0.2317, "lr": 3.622125576504674e-05, "epoch": 1.9554848966613672, "percentage": 27.94, "elapsed_time": "2:00:39", "remaining_time": "5:11:16"}
{"current_steps": 1235, "total_steps": 4403, "loss": 0.2345, "lr": 3.6174745178699484e-05, "epoch": 1.9634340222575517, "percentage": 28.05, "elapsed_time": "2:01:04", "remaining_time": "5:10:35"}
{"current_steps": 1240, "total_steps": 4403, "loss": 0.2672, "lr": 3.612798035012161e-05, "epoch": 1.9713831478537363, "percentage": 28.16, "elapsed_time": "2:01:30", "remaining_time": "5:09:56"}
{"current_steps": 1245, "total_steps": 4403, "loss": 0.2469, "lr": 3.608096201438465e-05, "epoch": 1.9793322734499204, "percentage": 28.28, "elapsed_time": "2:01:57", "remaining_time": "5:09:20"}
{"current_steps": 1250, "total_steps": 4403, "loss": 0.2495, "lr": 3.603369091054484e-05, "epoch": 1.987281399046105, "percentage": 28.39, "elapsed_time": "2:02:31", "remaining_time": "5:09:02"}
{"current_steps": 1255, "total_steps": 4403, "loss": 0.2158, "lr": 3.5986167781631556e-05, "epoch": 1.9952305246422894, "percentage": 28.5, "elapsed_time": "2:03:01", "remaining_time": "5:08:34"}
{"current_steps": 1260, "total_steps": 4403, "loss": 0.2326, "lr": 3.5938393374635634e-05, "epoch": 2.0031796502384736, "percentage": 28.62, "elapsed_time": "2:03:24", "remaining_time": "5:07:50"}
{"current_steps": 1265, "total_steps": 4403, "loss": 0.2126, "lr": 3.589036844049762e-05, "epoch": 2.011128775834658, "percentage": 28.73, "elapsed_time": "2:03:53", "remaining_time": "5:07:19"}
{"current_steps": 1270, "total_steps": 4403, "loss": 0.2042, "lr": 3.584209373409593e-05, "epoch": 2.0190779014308426, "percentage": 28.84, "elapsed_time": "2:04:24", "remaining_time": "5:06:54"}
{"current_steps": 1275, "total_steps": 4403, "loss": 0.2097, "lr": 3.579357001423505e-05, "epoch": 2.027027027027027, "percentage": 28.96, "elapsed_time": "2:04:56", "remaining_time": "5:06:30"}
{"current_steps": 1280, "total_steps": 4403, "loss": 0.2158, "lr": 3.5744798043633566e-05, "epoch": 2.0349761526232113, "percentage": 29.07, "elapsed_time": "2:05:28", "remaining_time": "5:06:08"}
{"current_steps": 1285, "total_steps": 4403, "loss": 0.2329, "lr": 3.569577858891219e-05, "epoch": 2.042925278219396, "percentage": 29.18, "elapsed_time": "2:05:54", "remaining_time": "5:05:31"}
{"current_steps": 1290, "total_steps": 4403, "loss": 0.2388, "lr": 3.56465124205817e-05, "epoch": 2.0508744038155804, "percentage": 29.3, "elapsed_time": "2:06:25", "remaining_time": "5:05:04"}
{"current_steps": 1295, "total_steps": 4403, "loss": 0.2085, "lr": 3.559700031303082e-05, "epoch": 2.0588235294117645, "percentage": 29.41, "elapsed_time": "2:06:52", "remaining_time": "5:04:29"}
{"current_steps": 1300, "total_steps": 4403, "loss": 0.2361, "lr": 3.554724304451411e-05, "epoch": 2.066772655007949, "percentage": 29.53, "elapsed_time": "2:07:21", "remaining_time": "5:03:59"}
{"current_steps": 1305, "total_steps": 4403, "loss": 0.214, "lr": 3.549724139713962e-05, "epoch": 2.0747217806041336, "percentage": 29.64, "elapsed_time": "2:07:51", "remaining_time": "5:03:32"}
{"current_steps": 1310, "total_steps": 4403, "loss": 0.2256, "lr": 3.544699615685671e-05, "epoch": 2.082670906200318, "percentage": 29.75, "elapsed_time": "2:08:21", "remaining_time": "5:03:03"}
{"current_steps": 1315, "total_steps": 4403, "loss": 0.2266, "lr": 3.539650811344363e-05, "epoch": 2.0906200317965022, "percentage": 29.87, "elapsed_time": "2:08:52", "remaining_time": "5:02:39"}
{"current_steps": 1320, "total_steps": 4403, "loss": 0.2241, "lr": 3.534577806049512e-05, "epoch": 2.098569157392687, "percentage": 29.98, "elapsed_time": "2:09:17", "remaining_time": "5:01:57"}
{"current_steps": 1325, "total_steps": 4403, "loss": 0.206, "lr": 3.529480679540996e-05, "epoch": 2.1065182829888713, "percentage": 30.09, "elapsed_time": "2:09:42", "remaining_time": "5:01:18"}
{"current_steps": 1330, "total_steps": 4403, "loss": 0.2008, "lr": 3.524359511937838e-05, "epoch": 2.1144674085850554, "percentage": 30.21, "elapsed_time": "2:10:10", "remaining_time": "5:00:45"}
{"current_steps": 1335, "total_steps": 4403, "loss": 0.2077, "lr": 3.5192143837369523e-05, "epoch": 2.12241653418124, "percentage": 30.32, "elapsed_time": "2:10:42", "remaining_time": "5:00:23"}
{"current_steps": 1340, "total_steps": 4403, "loss": 0.2159, "lr": 3.514045375811878e-05, "epoch": 2.1303656597774245, "percentage": 30.43, "elapsed_time": "2:11:10", "remaining_time": "4:59:49"}
{"current_steps": 1345, "total_steps": 4403, "loss": 0.228, "lr": 3.508852569411506e-05, "epoch": 2.138314785373609, "percentage": 30.55, "elapsed_time": "2:11:38", "remaining_time": "4:59:18"}
{"current_steps": 1350, "total_steps": 4403, "loss": 0.2251, "lr": 3.503636046158803e-05, "epoch": 2.146263910969793, "percentage": 30.66, "elapsed_time": "2:12:08", "remaining_time": "4:58:49"}
{"current_steps": 1355, "total_steps": 4403, "loss": 0.2087, "lr": 3.498395888049526e-05, "epoch": 2.1542130365659777, "percentage": 30.77, "elapsed_time": "2:12:32", "remaining_time": "4:58:08"}
{"current_steps": 1360, "total_steps": 4403, "loss": 0.2224, "lr": 3.4931321774509396e-05, "epoch": 2.1621621621621623, "percentage": 30.89, "elapsed_time": "2:13:04", "remaining_time": "4:57:45"}
{"current_steps": 1365, "total_steps": 4403, "loss": 0.214, "lr": 3.487844997100515e-05, "epoch": 2.1701112877583464, "percentage": 31.0, "elapsed_time": "2:13:34", "remaining_time": "4:57:17"}
{"current_steps": 1370, "total_steps": 4403, "loss": 0.2082, "lr": 3.482534430104633e-05, "epoch": 2.178060413354531, "percentage": 31.12, "elapsed_time": "2:14:01", "remaining_time": "4:56:43"}
{"current_steps": 1375, "total_steps": 4403, "loss": 0.2017, "lr": 3.4772005599372764e-05, "epoch": 2.1860095389507155, "percentage": 31.23, "elapsed_time": "2:14:28", "remaining_time": "4:56:08"}
{"current_steps": 1380, "total_steps": 4403, "loss": 0.2442, "lr": 3.4718434704387174e-05, "epoch": 2.1939586645469, "percentage": 31.34, "elapsed_time": "2:14:57", "remaining_time": "4:55:39"}
{"current_steps": 1385, "total_steps": 4403, "loss": 0.2138, "lr": 3.4664632458142016e-05, "epoch": 2.201907790143084, "percentage": 31.46, "elapsed_time": "2:15:32", "remaining_time": "4:55:20"}
{"current_steps": 1390, "total_steps": 4403, "loss": 0.2186, "lr": 3.461059970632622e-05, "epoch": 2.2098569157392687, "percentage": 31.57, "elapsed_time": "2:16:04", "remaining_time": "4:54:56"}
{"current_steps": 1395, "total_steps": 4403, "loss": 0.196, "lr": 3.4556337298251943e-05, "epoch": 2.2178060413354532, "percentage": 31.68, "elapsed_time": "2:16:32", "remaining_time": "4:54:24"}
{"current_steps": 1400, "total_steps": 4403, "loss": 0.2336, "lr": 3.450184608684114e-05, "epoch": 2.2257551669316373, "percentage": 31.8, "elapsed_time": "2:17:02", "remaining_time": "4:53:56"}
{"current_steps": 1405, "total_steps": 4403, "loss": 0.219, "lr": 3.444712692861224e-05, "epoch": 2.233704292527822, "percentage": 31.91, "elapsed_time": "2:17:31", "remaining_time": "4:53:26"}
{"current_steps": 1410, "total_steps": 4403, "loss": 0.202, "lr": 3.439218068366663e-05, "epoch": 2.2416534181240064, "percentage": 32.02, "elapsed_time": "2:18:01", "remaining_time": "4:52:59"}
{"current_steps": 1415, "total_steps": 4403, "loss": 0.2451, "lr": 3.433700821567516e-05, "epoch": 2.249602543720191, "percentage": 32.14, "elapsed_time": "2:18:33", "remaining_time": "4:52:35"}
{"current_steps": 1420, "total_steps": 4403, "loss": 0.2046, "lr": 3.428161039186456e-05, "epoch": 2.257551669316375, "percentage": 32.25, "elapsed_time": "2:19:02", "remaining_time": "4:52:05"}
{"current_steps": 1425, "total_steps": 4403, "loss": 0.2218, "lr": 3.42259880830038e-05, "epoch": 2.2655007949125596, "percentage": 32.36, "elapsed_time": "2:19:34", "remaining_time": "4:51:41"}
{"current_steps": 1430, "total_steps": 4403, "loss": 0.2243, "lr": 3.417014216339043e-05, "epoch": 2.273449920508744, "percentage": 32.48, "elapsed_time": "2:20:06", "remaining_time": "4:51:16"}
{"current_steps": 1435, "total_steps": 4403, "loss": 0.2174, "lr": 3.4114073510836794e-05, "epoch": 2.2813990461049283, "percentage": 32.59, "elapsed_time": "2:20:31", "remaining_time": "4:50:39"}
{"current_steps": 1440, "total_steps": 4403, "loss": 0.2131, "lr": 3.4057783006656274e-05, "epoch": 2.289348171701113, "percentage": 32.7, "elapsed_time": "2:20:59", "remaining_time": "4:50:07"}
{"current_steps": 1445, "total_steps": 4403, "loss": 0.2198, "lr": 3.400127153564941e-05, "epoch": 2.2972972972972974, "percentage": 32.82, "elapsed_time": "2:21:25", "remaining_time": "4:49:30"}
{"current_steps": 1450, "total_steps": 4403, "loss": 0.2084, "lr": 3.394453998609001e-05, "epoch": 2.3052464228934815, "percentage": 32.93, "elapsed_time": "2:21:53", "remaining_time": "4:48:58"}
{"current_steps": 1455, "total_steps": 4403, "loss": 0.2053, "lr": 3.388758924971117e-05, "epoch": 2.313195548489666, "percentage": 33.05, "elapsed_time": "2:22:19", "remaining_time": "4:48:22"}
{"current_steps": 1460, "total_steps": 4403, "loss": 0.2116, "lr": 3.3830420221691286e-05, "epoch": 2.3211446740858506, "percentage": 33.16, "elapsed_time": "2:22:47", "remaining_time": "4:47:50"}
{"current_steps": 1465, "total_steps": 4403, "loss": 0.216, "lr": 3.377303380063995e-05, "epoch": 2.329093799682035, "percentage": 33.27, "elapsed_time": "2:23:15", "remaining_time": "4:47:18"}
{"current_steps": 1470, "total_steps": 4403, "loss": 0.2126, "lr": 3.371543088858384e-05, "epoch": 2.337042925278219, "percentage": 33.39, "elapsed_time": "2:23:43", "remaining_time": "4:46:45"}
{"current_steps": 1475, "total_steps": 4403, "loss": 0.2412, "lr": 3.365761239095253e-05, "epoch": 2.3449920508744038, "percentage": 33.5, "elapsed_time": "2:24:11", "remaining_time": "4:46:13"}
{"current_steps": 1480, "total_steps": 4403, "loss": 0.2073, "lr": 3.3599579216564314e-05, "epoch": 2.3529411764705883, "percentage": 33.61, "elapsed_time": "2:24:36", "remaining_time": "4:45:36"}
{"current_steps": 1485, "total_steps": 4403, "loss": 0.2292, "lr": 3.354133227761181e-05, "epoch": 2.360890302066773, "percentage": 33.73, "elapsed_time": "2:25:07", "remaining_time": "4:45:09"}
{"current_steps": 1490, "total_steps": 4403, "loss": 0.2278, "lr": 3.3482872489647745e-05, "epoch": 2.368839427662957, "percentage": 33.84, "elapsed_time": "2:25:30", "remaining_time": "4:44:28"}
{"current_steps": 1495, "total_steps": 4403, "loss": 0.2229, "lr": 3.342420077157047e-05, "epoch": 2.3767885532591415, "percentage": 33.95, "elapsed_time": "2:25:54", "remaining_time": "4:43:48"}
{"current_steps": 1500, "total_steps": 4403, "loss": 0.2068, "lr": 3.336531804560957e-05, "epoch": 2.384737678855326, "percentage": 34.07, "elapsed_time": "2:26:18", "remaining_time": "4:43:10"}
{"current_steps": 1505, "total_steps": 4403, "loss": 0.218, "lr": 3.330622523731136e-05, "epoch": 2.39268680445151, "percentage": 34.18, "elapsed_time": "2:26:56", "remaining_time": "4:42:57"}
{"current_steps": 1510, "total_steps": 4403, "loss": 0.2346, "lr": 3.32469232755243e-05, "epoch": 2.4006359300476947, "percentage": 34.29, "elapsed_time": "2:27:27", "remaining_time": "4:42:30"}
{"current_steps": 1515, "total_steps": 4403, "loss": 0.2328, "lr": 3.318741309238444e-05, "epoch": 2.4085850556438793, "percentage": 34.41, "elapsed_time": "2:27:56", "remaining_time": "4:42:01"}
{"current_steps": 1520, "total_steps": 4403, "loss": 0.2278, "lr": 3.312769562330075e-05, "epoch": 2.4165341812400634, "percentage": 34.52, "elapsed_time": "2:28:24", "remaining_time": "4:41:29"}
{"current_steps": 1525, "total_steps": 4403, "loss": 0.2271, "lr": 3.306777180694042e-05, "epoch": 2.424483306836248, "percentage": 34.64, "elapsed_time": "2:28:55", "remaining_time": "4:41:02"}
{"current_steps": 1530, "total_steps": 4403, "loss": 0.2287, "lr": 3.30076425852141e-05, "epoch": 2.4324324324324325, "percentage": 34.75, "elapsed_time": "2:29:21", "remaining_time": "4:40:27"}
{"current_steps": 1535, "total_steps": 4403, "loss": 0.2405, "lr": 3.294730890326109e-05, "epoch": 2.440381558028617, "percentage": 34.86, "elapsed_time": "2:29:51", "remaining_time": "4:39:59"}
{"current_steps": 1540, "total_steps": 4403, "loss": 0.2042, "lr": 3.2886771709434504e-05, "epoch": 2.448330683624801, "percentage": 34.98, "elapsed_time": "2:30:22", "remaining_time": "4:39:33"}
{"current_steps": 1545, "total_steps": 4403, "loss": 0.2068, "lr": 3.282603195528635e-05, "epoch": 2.4562798092209857, "percentage": 35.09, "elapsed_time": "2:30:49", "remaining_time": "4:38:59"}
{"current_steps": 1550, "total_steps": 4403, "loss": 0.22, "lr": 3.276509059555257e-05, "epoch": 2.46422893481717, "percentage": 35.2, "elapsed_time": "2:31:16", "remaining_time": "4:38:26"}
{"current_steps": 1555, "total_steps": 4403, "loss": 0.2112, "lr": 3.270394858813802e-05, "epoch": 2.4721780604133547, "percentage": 35.32, "elapsed_time": "2:31:46", "remaining_time": "4:37:57"}
{"current_steps": 1560, "total_steps": 4403, "loss": 0.2035, "lr": 3.264260689410147e-05, "epoch": 2.480127186009539, "percentage": 35.43, "elapsed_time": "2:32:14", "remaining_time": "4:37:26"}
{"current_steps": 1565, "total_steps": 4403, "loss": 0.2236, "lr": 3.2581066477640435e-05, "epoch": 2.4880763116057234, "percentage": 35.54, "elapsed_time": "2:32:41", "remaining_time": "4:36:53"}
{"current_steps": 1570, "total_steps": 4403, "loss": 0.222, "lr": 3.251932830607603e-05, "epoch": 2.496025437201908, "percentage": 35.66, "elapsed_time": "2:33:11", "remaining_time": "4:36:25"}
{"current_steps": 1575, "total_steps": 4403, "loss": 0.2139, "lr": 3.245739334983779e-05, "epoch": 2.503974562798092, "percentage": 35.77, "elapsed_time": "2:33:42", "remaining_time": "4:35:58"}
{"current_steps": 1580, "total_steps": 4403, "loss": 0.2029, "lr": 3.239526258244842e-05, "epoch": 2.5119236883942766, "percentage": 35.88, "elapsed_time": "2:34:15", "remaining_time": "4:35:36"}
{"current_steps": 1585, "total_steps": 4403, "loss": 0.226, "lr": 3.233293698050845e-05, "epoch": 2.519872813990461, "percentage": 36.0, "elapsed_time": "2:34:44", "remaining_time": "4:35:07"}
{"current_steps": 1590, "total_steps": 4403, "loss": 0.2165, "lr": 3.227041752368091e-05, "epoch": 2.5278219395866453, "percentage": 36.11, "elapsed_time": "2:35:17", "remaining_time": "4:34:44"}
{"current_steps": 1595, "total_steps": 4403, "loss": 0.2205, "lr": 3.220770519467597e-05, "epoch": 2.53577106518283, "percentage": 36.23, "elapsed_time": "2:35:45", "remaining_time": "4:34:12"}
{"current_steps": 1600, "total_steps": 4403, "loss": 0.2094, "lr": 3.214480097923542e-05, "epoch": 2.5437201907790143, "percentage": 36.34, "elapsed_time": "2:36:16", "remaining_time": "4:33:45"}
{"current_steps": 1605, "total_steps": 4403, "loss": 0.2175, "lr": 3.208170586611721e-05, "epoch": 2.551669316375199, "percentage": 36.45, "elapsed_time": "2:36:48", "remaining_time": "4:33:21"}
{"current_steps": 1610, "total_steps": 4403, "loss": 0.2143, "lr": 3.201842084707993e-05, "epoch": 2.559618441971383, "percentage": 36.57, "elapsed_time": "2:37:20", "remaining_time": "4:32:57"}
{"current_steps": 1615, "total_steps": 4403, "loss": 0.2011, "lr": 3.195494691686718e-05, "epoch": 2.5675675675675675, "percentage": 36.68, "elapsed_time": "2:37:48", "remaining_time": "4:32:25"}
{"current_steps": 1620, "total_steps": 4403, "loss": 0.2132, "lr": 3.189128507319197e-05, "epoch": 2.575516693163752, "percentage": 36.79, "elapsed_time": "2:38:10", "remaining_time": "4:31:44"}
{"current_steps": 1625, "total_steps": 4403, "loss": 0.2276, "lr": 3.182743631672102e-05, "epoch": 2.5834658187599366, "percentage": 36.91, "elapsed_time": "2:38:44", "remaining_time": "4:31:21"}
{"current_steps": 1630, "total_steps": 4403, "loss": 0.2239, "lr": 3.1763401651059025e-05, "epoch": 2.5914149443561207, "percentage": 37.02, "elapsed_time": "2:39:12", "remaining_time": "4:30:50"}
{"current_steps": 1635, "total_steps": 4403, "loss": 0.2306, "lr": 3.1699182082732886e-05, "epoch": 2.5993640699523053, "percentage": 37.13, "elapsed_time": "2:39:42", "remaining_time": "4:30:22"}
{"current_steps": 1640, "total_steps": 4403, "loss": 0.1989, "lr": 3.1634778621175905e-05, "epoch": 2.60731319554849, "percentage": 37.25, "elapsed_time": "2:40:10", "remaining_time": "4:29:51"}
{"current_steps": 1645, "total_steps": 4403, "loss": 0.2259, "lr": 3.157019227871189e-05, "epoch": 2.615262321144674, "percentage": 37.36, "elapsed_time": "2:40:43", "remaining_time": "4:29:28"}
{"current_steps": 1650, "total_steps": 4403, "loss": 0.2251, "lr": 3.150542407053927e-05, "epoch": 2.6232114467408585, "percentage": 37.47, "elapsed_time": "2:41:12", "remaining_time": "4:28:57"}
{"current_steps": 1655, "total_steps": 4403, "loss": 0.2075, "lr": 3.144047501471511e-05, "epoch": 2.631160572337043, "percentage": 37.59, "elapsed_time": "2:41:42", "remaining_time": "4:28:29"}
{"current_steps": 1660, "total_steps": 4403, "loss": 0.2206, "lr": 3.1375346132139135e-05, "epoch": 2.639109697933227, "percentage": 37.7, "elapsed_time": "2:42:12", "remaining_time": "4:28:02"}
{"current_steps": 1665, "total_steps": 4403, "loss": 0.2167, "lr": 3.131003844653766e-05, "epoch": 2.6470588235294117, "percentage": 37.82, "elapsed_time": "2:42:39", "remaining_time": "4:27:28"}
{"current_steps": 1670, "total_steps": 4403, "loss": 0.2174, "lr": 3.124455298444752e-05, "epoch": 2.6550079491255962, "percentage": 37.93, "elapsed_time": "2:43:09", "remaining_time": "4:27:01"}
{"current_steps": 1675, "total_steps": 4403, "loss": 0.2123, "lr": 3.1178890775199925e-05, "epoch": 2.6629570747217803, "percentage": 38.04, "elapsed_time": "2:43:41", "remaining_time": "4:26:36"}
{"current_steps": 1680, "total_steps": 4403, "loss": 0.2256, "lr": 3.1113052850904275e-05, "epoch": 2.670906200317965, "percentage": 38.16, "elapsed_time": "2:44:14", "remaining_time": "4:26:12"}
{"current_steps": 1685, "total_steps": 4403, "loss": 0.2395, "lr": 3.1047040246431936e-05, "epoch": 2.6788553259141494, "percentage": 38.27, "elapsed_time": "2:44:45", "remaining_time": "4:25:46"}
{"current_steps": 1690, "total_steps": 4403, "loss": 0.2186, "lr": 3.098085399939998e-05, "epoch": 2.686804451510334, "percentage": 38.38, "elapsed_time": "2:45:11", "remaining_time": "4:25:11"}
{"current_steps": 1695, "total_steps": 4403, "loss": 0.2203, "lr": 3.091449515015489e-05, "epoch": 2.6947535771065185, "percentage": 38.5, "elapsed_time": "2:45:43", "remaining_time": "4:24:45"}
{"current_steps": 1700, "total_steps": 4403, "loss": 0.2163, "lr": 3.084796474175618e-05, "epoch": 2.7027027027027026, "percentage": 38.61, "elapsed_time": "2:46:06", "remaining_time": "4:24:06"}
{"current_steps": 1705, "total_steps": 4403, "loss": 0.2285, "lr": 3.078126381996001e-05, "epoch": 2.710651828298887, "percentage": 38.72, "elapsed_time": "2:46:39", "remaining_time": "4:23:43"}
{"current_steps": 1710, "total_steps": 4403, "loss": 0.2114, "lr": 3.071439343320274e-05, "epoch": 2.7186009538950717, "percentage": 38.84, "elapsed_time": "2:47:11", "remaining_time": "4:23:17"}
{"current_steps": 1715, "total_steps": 4403, "loss": 0.2169, "lr": 3.064735463258449e-05, "epoch": 2.726550079491256, "percentage": 38.95, "elapsed_time": "2:47:42", "remaining_time": "4:22:50"}
{"current_steps": 1720, "total_steps": 4403, "loss": 0.2135, "lr": 3.0580148471852544e-05, "epoch": 2.7344992050874404, "percentage": 39.06, "elapsed_time": "2:48:14", "remaining_time": "4:22:26"}
{"current_steps": 1725, "total_steps": 4403, "loss": 0.2028, "lr": 3.0512776007384882e-05, "epoch": 2.742448330683625, "percentage": 39.18, "elapsed_time": "2:48:44", "remaining_time": "4:21:58"}
{"current_steps": 1730, "total_steps": 4403, "loss": 0.2209, "lr": 3.0445238298173492e-05, "epoch": 2.750397456279809, "percentage": 39.29, "elapsed_time": "2:49:16", "remaining_time": "4:21:32"}
{"current_steps": 1735, "total_steps": 4403, "loss": 0.2145, "lr": 3.0377536405807753e-05, "epoch": 2.7583465818759936, "percentage": 39.4, "elapsed_time": "2:49:45", "remaining_time": "4:21:03"}
{"current_steps": 1740, "total_steps": 4403, "loss": 0.2074, "lr": 3.030967139445776e-05, "epoch": 2.766295707472178, "percentage": 39.52, "elapsed_time": "2:50:10", "remaining_time": "4:20:26"}
{"current_steps": 1745, "total_steps": 4403, "loss": 0.217, "lr": 3.0241644330857604e-05, "epoch": 2.7742448330683622, "percentage": 39.63, "elapsed_time": "2:50:38", "remaining_time": "4:19:55"}
{"current_steps": 1750, "total_steps": 4403, "loss": 0.2211, "lr": 3.0173456284288565e-05, "epoch": 2.7821939586645468, "percentage": 39.75, "elapsed_time": "2:51:07", "remaining_time": "4:19:25"}
{"current_steps": 1755, "total_steps": 4403, "loss": 0.2377, "lr": 3.010510832656233e-05, "epoch": 2.7901430842607313, "percentage": 39.86, "elapsed_time": "2:51:36", "remaining_time": "4:18:55"}
{"current_steps": 1760, "total_steps": 4403, "loss": 0.2146, "lr": 3.0036601532004175e-05, "epoch": 2.798092209856916, "percentage": 39.97, "elapsed_time": "2:52:05", "remaining_time": "4:18:25"}
{"current_steps": 1765, "total_steps": 4403, "loss": 0.205, "lr": 2.996793697743601e-05, "epoch": 2.8060413354531004, "percentage": 40.09, "elapsed_time": "2:52:31", "remaining_time": "4:17:51"}
{"current_steps": 1770, "total_steps": 4403, "loss": 0.1954, "lr": 2.9899115742159512e-05, "epoch": 2.8139904610492845, "percentage": 40.2, "elapsed_time": "2:53:02", "remaining_time": "4:17:25"}
{"current_steps": 1775, "total_steps": 4403, "loss": 0.2284, "lr": 2.9830138907939137e-05, "epoch": 2.821939586645469, "percentage": 40.31, "elapsed_time": "2:53:34", "remaining_time": "4:16:59"}
{"current_steps": 1780, "total_steps": 4403, "loss": 0.2366, "lr": 2.976100755898511e-05, "epoch": 2.8298887122416536, "percentage": 40.43, "elapsed_time": "2:54:03", "remaining_time": "4:16:29"}
{"current_steps": 1785, "total_steps": 4403, "loss": 0.2152, "lr": 2.9691722781936398e-05, "epoch": 2.8378378378378377, "percentage": 40.54, "elapsed_time": "2:54:30", "remaining_time": "4:15:56"}
{"current_steps": 1790, "total_steps": 4403, "loss": 0.1938, "lr": 2.962228566584362e-05, "epoch": 2.8457869634340223, "percentage": 40.65, "elapsed_time": "2:54:56", "remaining_time": "4:15:22"}
{"current_steps": 1795, "total_steps": 4403, "loss": 0.2293, "lr": 2.9552697302151937e-05, "epoch": 2.853736089030207, "percentage": 40.77, "elapsed_time": "2:55:20", "remaining_time": "4:14:45"}
{"current_steps": 1800, "total_steps": 4403, "loss": 0.1969, "lr": 2.9482958784683883e-05, "epoch": 2.861685214626391, "percentage": 40.88, "elapsed_time": "2:55:45", "remaining_time": "4:14:09"}
{"current_steps": 1805, "total_steps": 4403, "loss": 0.2056, "lr": 2.9413071209622174e-05, "epoch": 2.8696343402225755, "percentage": 40.99, "elapsed_time": "2:56:12", "remaining_time": "4:13:37"}
{"current_steps": 1810, "total_steps": 4403, "loss": 0.2102, "lr": 2.934303567549251e-05, "epoch": 2.87758346581876, "percentage": 41.11, "elapsed_time": "2:56:41", "remaining_time": "4:13:07"}
{"current_steps": 1815, "total_steps": 4403, "loss": 0.2209, "lr": 2.9272853283146255e-05, "epoch": 2.885532591414944, "percentage": 41.22, "elapsed_time": "2:57:12", "remaining_time": "4:12:40"}
{"current_steps": 1820, "total_steps": 4403, "loss": 0.2281, "lr": 2.9202525135743158e-05, "epoch": 2.8934817170111287, "percentage": 41.34, "elapsed_time": "2:57:39", "remaining_time": "4:12:08"}
{"current_steps": 1825, "total_steps": 4403, "loss": 0.2025, "lr": 2.9132052338734033e-05, "epoch": 2.901430842607313, "percentage": 41.45, "elapsed_time": "2:58:07", "remaining_time": "4:11:37"}
{"current_steps": 1830, "total_steps": 4403, "loss": 0.2095, "lr": 2.9061435999843354e-05, "epoch": 2.9093799682034978, "percentage": 41.56, "elapsed_time": "2:58:34", "remaining_time": "4:11:04"}
{"current_steps": 1835, "total_steps": 4403, "loss": 0.217, "lr": 2.8990677229051855e-05, "epoch": 2.9173290937996823, "percentage": 41.68, "elapsed_time": "2:59:04", "remaining_time": "4:10:36"}
{"current_steps": 1840, "total_steps": 4403, "loss": 0.2152, "lr": 2.8919777138579074e-05, "epoch": 2.9252782193958664, "percentage": 41.79, "elapsed_time": "2:59:34", "remaining_time": "4:10:08"}
{"current_steps": 1845, "total_steps": 4403, "loss": 0.2209, "lr": 2.8848736842865893e-05, "epoch": 2.933227344992051, "percentage": 41.9, "elapsed_time": "3:00:01", "remaining_time": "4:09:35"}
{"current_steps": 1850, "total_steps": 4403, "loss": 0.2082, "lr": 2.8777557458556993e-05, "epoch": 2.9411764705882355, "percentage": 42.02, "elapsed_time": "3:00:28", "remaining_time": "4:09:02"}
{"current_steps": 1855, "total_steps": 4403, "loss": 0.2169, "lr": 2.870624010448332e-05, "epoch": 2.9491255961844196, "percentage": 42.13, "elapsed_time": "3:00:55", "remaining_time": "4:08:30"}
{"current_steps": 1860, "total_steps": 4403, "loss": 0.2132, "lr": 2.8634785901644497e-05, "epoch": 2.957074721780604, "percentage": 42.24, "elapsed_time": "3:01:27", "remaining_time": "4:08:06"}
{"current_steps": 1865, "total_steps": 4403, "loss": 0.2181, "lr": 2.856319597319119e-05, "epoch": 2.9650238473767887, "percentage": 42.36, "elapsed_time": "3:01:56", "remaining_time": "4:07:35"}
{"current_steps": 1870, "total_steps": 4403, "loss": 0.2135, "lr": 2.849147144440747e-05, "epoch": 2.972972972972973, "percentage": 42.47, "elapsed_time": "3:02:20", "remaining_time": "4:06:59"}
{"current_steps": 1875, "total_steps": 4403, "loss": 0.22, "lr": 2.8419613442693127e-05, "epoch": 2.9809220985691574, "percentage": 42.58, "elapsed_time": "3:02:47", "remaining_time": "4:06:26"}
{"current_steps": 1880, "total_steps": 4403, "loss": 0.2111, "lr": 2.834762309754593e-05, "epoch": 2.988871224165342, "percentage": 42.7, "elapsed_time": "3:03:16", "remaining_time": "4:05:58"}
{"current_steps": 1885, "total_steps": 4403, "loss": 0.234, "lr": 2.8275501540543877e-05, "epoch": 2.996820349761526, "percentage": 42.81, "elapsed_time": "3:03:39", "remaining_time": "4:05:19"}
{"current_steps": 1890, "total_steps": 4403, "loss": 0.2201, "lr": 2.8203249905327434e-05, "epoch": 3.0047694753577106, "percentage": 42.93, "elapsed_time": "3:04:06", "remaining_time": "4:04:48"}
{"current_steps": 1895, "total_steps": 4403, "loss": 0.1789, "lr": 2.81308693275817e-05, "epoch": 3.012718600953895, "percentage": 43.04, "elapsed_time": "3:04:32", "remaining_time": "4:04:14"}
{"current_steps": 1900, "total_steps": 4403, "loss": 0.1829, "lr": 2.8058360945018518e-05, "epoch": 3.0206677265500796, "percentage": 43.15, "elapsed_time": "3:04:57", "remaining_time": "4:03:38"}
{"current_steps": 1905, "total_steps": 4403, "loss": 0.1825, "lr": 2.7985725897358665e-05, "epoch": 3.0286168521462637, "percentage": 43.27, "elapsed_time": "3:05:24", "remaining_time": "4:03:06"}
{"current_steps": 1910, "total_steps": 4403, "loss": 0.2102, "lr": 2.791296532631389e-05, "epoch": 3.0365659777424483, "percentage": 43.38, "elapsed_time": "3:05:53", "remaining_time": "4:02:37"}
{"current_steps": 1915, "total_steps": 4403, "loss": 0.1957, "lr": 2.7840080375568964e-05, "epoch": 3.044515103338633, "percentage": 43.49, "elapsed_time": "3:06:21", "remaining_time": "4:02:06"}
{"current_steps": 1920, "total_steps": 4403, "loss": 0.1956, "lr": 2.7767072190763733e-05, "epoch": 3.0524642289348174, "percentage": 43.61, "elapsed_time": "3:06:50", "remaining_time": "4:01:38"}
{"current_steps": 1925, "total_steps": 4403, "loss": 0.2066, "lr": 2.7693941919475076e-05, "epoch": 3.0604133545310015, "percentage": 43.72, "elapsed_time": "3:07:21", "remaining_time": "4:01:11"}
{"current_steps": 1930, "total_steps": 4403, "loss": 0.2122, "lr": 2.7620690711198906e-05, "epoch": 3.068362480127186, "percentage": 43.83, "elapsed_time": "3:07:51", "remaining_time": "4:00:43"}
{"current_steps": 1935, "total_steps": 4403, "loss": 0.1903, "lr": 2.7547319717332066e-05, "epoch": 3.0763116057233706, "percentage": 43.95, "elapsed_time": "3:08:15", "remaining_time": "4:00:07"}
{"current_steps": 1940, "total_steps": 4403, "loss": 0.1874, "lr": 2.7473830091154243e-05, "epoch": 3.0842607313195547, "percentage": 44.06, "elapsed_time": "3:08:43", "remaining_time": "3:59:36"}
{"current_steps": 1945, "total_steps": 4403, "loss": 0.2077, "lr": 2.7400222987809856e-05, "epoch": 3.0922098569157392, "percentage": 44.17, "elapsed_time": "3:09:14", "remaining_time": "3:59:08"}
{"current_steps": 1950, "total_steps": 4403, "loss": 0.2051, "lr": 2.7326499564289867e-05, "epoch": 3.100158982511924, "percentage": 44.29, "elapsed_time": "3:09:42", "remaining_time": "3:58:38"}
{"current_steps": 1955, "total_steps": 4403, "loss": 0.1963, "lr": 2.725266097941363e-05, "epoch": 3.108108108108108, "percentage": 44.4, "elapsed_time": "3:10:07", "remaining_time": "3:58:04"}
{"current_steps": 1960, "total_steps": 4403, "loss": 0.185, "lr": 2.717870839381066e-05, "epoch": 3.1160572337042924, "percentage": 44.52, "elapsed_time": "3:10:39", "remaining_time": "3:57:38"}
{"current_steps": 1965, "total_steps": 4403, "loss": 0.1837, "lr": 2.7104642969902357e-05, "epoch": 3.124006359300477, "percentage": 44.63, "elapsed_time": "3:11:08", "remaining_time": "3:57:08"}
{"current_steps": 1970, "total_steps": 4403, "loss": 0.1978, "lr": 2.7030465871883812e-05, "epoch": 3.1319554848966615, "percentage": 44.74, "elapsed_time": "3:11:37", "remaining_time": "3:56:39"}
{"current_steps": 1975, "total_steps": 4403, "loss": 0.1905, "lr": 2.6956178265705434e-05, "epoch": 3.1399046104928456, "percentage": 44.86, "elapsed_time": "3:12:05", "remaining_time": "3:56:08"}
{"current_steps": 1980, "total_steps": 4403, "loss": 0.1966, "lr": 2.688178131905465e-05, "epoch": 3.14785373608903, "percentage": 44.97, "elapsed_time": "3:12:37", "remaining_time": "3:55:43"}
{"current_steps": 1985, "total_steps": 4403, "loss": 0.1977, "lr": 2.680727620133757e-05, "epoch": 3.1558028616852147, "percentage": 45.08, "elapsed_time": "3:13:04", "remaining_time": "3:55:11"}
{"current_steps": 1990, "total_steps": 4403, "loss": 0.2029, "lr": 2.673266408366057e-05, "epoch": 3.1637519872813993, "percentage": 45.2, "elapsed_time": "3:13:36", "remaining_time": "3:54:46"}
{"current_steps": 1995, "total_steps": 4403, "loss": 0.2023, "lr": 2.6657946138811915e-05, "epoch": 3.1717011128775834, "percentage": 45.31, "elapsed_time": "3:14:04", "remaining_time": "3:54:14"}
{"current_steps": 2000, "total_steps": 4403, "loss": 0.2112, "lr": 2.6583123541243302e-05, "epoch": 3.179650238473768, "percentage": 45.42, "elapsed_time": "3:14:29", "remaining_time": "3:53:41"}
{"current_steps": 2005, "total_steps": 4403, "loss": 0.2078, "lr": 2.6508197467051406e-05, "epoch": 3.1875993640699525, "percentage": 45.54, "elapsed_time": "3:14:56", "remaining_time": "3:53:09"}
{"current_steps": 2010, "total_steps": 4403, "loss": 0.2065, "lr": 2.6433169093959405e-05, "epoch": 3.1955484896661366, "percentage": 45.65, "elapsed_time": "3:15:25", "remaining_time": "3:52:39"}
{"current_steps": 2015, "total_steps": 4403, "loss": 0.2054, "lr": 2.6358039601298454e-05, "epoch": 3.203497615262321, "percentage": 45.76, "elapsed_time": "3:15:56", "remaining_time": "3:52:13"}
{"current_steps": 2020, "total_steps": 4403, "loss": 0.1916, "lr": 2.6282810169989158e-05, "epoch": 3.2114467408585057, "percentage": 45.88, "elapsed_time": "3:16:22", "remaining_time": "3:51:39"}
{"current_steps": 2025, "total_steps": 4403, "loss": 0.1904, "lr": 2.6207481982523e-05, "epoch": 3.21939586645469, "percentage": 45.99, "elapsed_time": "3:16:49", "remaining_time": "3:51:08"}
{"current_steps": 2030, "total_steps": 4403, "loss": 0.1992, "lr": 2.6132056222943757e-05, "epoch": 3.2273449920508743, "percentage": 46.1, "elapsed_time": "3:17:16", "remaining_time": "3:50:36"}
{"current_steps": 2035, "total_steps": 4403, "loss": 0.1986, "lr": 2.6056534076828883e-05, "epoch": 3.235294117647059, "percentage": 46.22, "elapsed_time": "3:17:43", "remaining_time": "3:50:04"}
{"current_steps": 2040, "total_steps": 4403, "loss": 0.1772, "lr": 2.598091673127091e-05, "epoch": 3.2432432432432434, "percentage": 46.33, "elapsed_time": "3:18:11", "remaining_time": "3:49:33"}
{"current_steps": 2045, "total_steps": 4403, "loss": 0.1987, "lr": 2.5905205374858728e-05, "epoch": 3.2511923688394275, "percentage": 46.45, "elapsed_time": "3:18:42", "remaining_time": "3:49:07"}
{"current_steps": 2050, "total_steps": 4403, "loss": 0.1768, "lr": 2.5829401197658946e-05, "epoch": 3.259141494435612, "percentage": 46.56, "elapsed_time": "3:19:14", "remaining_time": "3:48:41"}
{"current_steps": 2055, "total_steps": 4403, "loss": 0.181, "lr": 2.5753505391197173e-05, "epoch": 3.2670906200317966, "percentage": 46.67, "elapsed_time": "3:19:45", "remaining_time": "3:48:14"}
{"current_steps": 2060, "total_steps": 4403, "loss": 0.1928, "lr": 2.5677519148439286e-05, "epoch": 3.275039745627981, "percentage": 46.79, "elapsed_time": "3:20:10", "remaining_time": "3:47:40"}
{"current_steps": 2065, "total_steps": 4403, "loss": 0.1967, "lr": 2.56014436637727e-05, "epoch": 3.2829888712241653, "percentage": 46.9, "elapsed_time": "3:20:44", "remaining_time": "3:47:16"}
{"current_steps": 2070, "total_steps": 4403, "loss": 0.1992, "lr": 2.5525280132987544e-05, "epoch": 3.29093799682035, "percentage": 47.01, "elapsed_time": "3:21:10", "remaining_time": "3:46:43"}
{"current_steps": 2075, "total_steps": 4403, "loss": 0.218, "lr": 2.544902975325793e-05, "epoch": 3.2988871224165344, "percentage": 47.13, "elapsed_time": "3:21:40", "remaining_time": "3:46:15"}
{"current_steps": 2080, "total_steps": 4403, "loss": 0.2137, "lr": 2.5372693723123075e-05, "epoch": 3.3068362480127185, "percentage": 47.24, "elapsed_time": "3:22:09", "remaining_time": "3:45:46"}
{"current_steps": 2085, "total_steps": 4403, "loss": 0.2067, "lr": 2.5296273242468514e-05, "epoch": 3.314785373608903, "percentage": 47.35, "elapsed_time": "3:22:38", "remaining_time": "3:45:17"}
{"current_steps": 2090, "total_steps": 4403, "loss": 0.2145, "lr": 2.5219769512507202e-05, "epoch": 3.3227344992050876, "percentage": 47.47, "elapsed_time": "3:23:07", "remaining_time": "3:44:47"}
{"current_steps": 2095, "total_steps": 4403, "loss": 0.1937, "lr": 2.5143183735760638e-05, "epoch": 3.3306836248012717, "percentage": 47.58, "elapsed_time": "3:23:34", "remaining_time": "3:44:16"}
{"current_steps": 2100, "total_steps": 4403, "loss": 0.1894, "lr": 2.5066517116039978e-05, "epoch": 3.338632750397456, "percentage": 47.69, "elapsed_time": "3:24:02", "remaining_time": "3:43:45"}
{"current_steps": 2105, "total_steps": 4403, "loss": 0.1808, "lr": 2.4989770858427113e-05, "epoch": 3.3465818759936408, "percentage": 47.81, "elapsed_time": "3:24:26", "remaining_time": "3:43:11"}
{"current_steps": 2110, "total_steps": 4403, "loss": 0.1975, "lr": 2.4912946169255722e-05, "epoch": 3.3545310015898253, "percentage": 47.92, "elapsed_time": "3:24:55", "remaining_time": "3:42:41"}
{"current_steps": 2115, "total_steps": 4403, "loss": 0.1929, "lr": 2.4836044256092288e-05, "epoch": 3.3624801271860094, "percentage": 48.04, "elapsed_time": "3:25:27", "remaining_time": "3:42:15"}
{"current_steps": 2120, "total_steps": 4403, "loss": 0.1997, "lr": 2.475906632771714e-05, "epoch": 3.370429252782194, "percentage": 48.15, "elapsed_time": "3:25:56", "remaining_time": "3:41:46"}
{"current_steps": 2125, "total_steps": 4403, "loss": 0.1848, "lr": 2.468201359410548e-05, "epoch": 3.3783783783783785, "percentage": 48.26, "elapsed_time": "3:26:26", "remaining_time": "3:41:18"}
{"current_steps": 2130, "total_steps": 4403, "loss": 0.2151, "lr": 2.4604887266408304e-05, "epoch": 3.3863275039745626, "percentage": 48.38, "elapsed_time": "3:26:54", "remaining_time": "3:40:48"}
{"current_steps": 2135, "total_steps": 4403, "loss": 0.1917, "lr": 2.4527688556933402e-05, "epoch": 3.394276629570747, "percentage": 48.49, "elapsed_time": "3:27:26", "remaining_time": "3:40:22"}
{"current_steps": 2140, "total_steps": 4403, "loss": 0.2116, "lr": 2.445041867912629e-05, "epoch": 3.4022257551669317, "percentage": 48.6, "elapsed_time": "3:27:53", "remaining_time": "3:39:50"}
{"current_steps": 2145, "total_steps": 4403, "loss": 0.1781, "lr": 2.4373078847551154e-05, "epoch": 3.4101748807631163, "percentage": 48.72, "elapsed_time": "3:28:20", "remaining_time": "3:39:19"}
{"current_steps": 2150, "total_steps": 4403, "loss": 0.1908, "lr": 2.4295670277871736e-05, "epoch": 3.4181240063593004, "percentage": 48.83, "elapsed_time": "3:28:49", "remaining_time": "3:38:49"}
{"current_steps": 2155, "total_steps": 4403, "loss": 0.1824, "lr": 2.4218194186832237e-05, "epoch": 3.426073131955485, "percentage": 48.94, "elapsed_time": "3:29:14", "remaining_time": "3:38:16"}
{"current_steps": 2160, "total_steps": 4403, "loss": 0.2182, "lr": 2.4140651792238193e-05, "epoch": 3.4340222575516695, "percentage": 49.06, "elapsed_time": "3:29:46", "remaining_time": "3:37:50"}
{"current_steps": 2165, "total_steps": 4403, "loss": 0.2022, "lr": 2.4063044312937332e-05, "epoch": 3.4419713831478536, "percentage": 49.17, "elapsed_time": "3:30:14", "remaining_time": "3:37:20"}
{"current_steps": 2170, "total_steps": 4403, "loss": 0.19, "lr": 2.3985372968800407e-05, "epoch": 3.449920508744038, "percentage": 49.28, "elapsed_time": "3:30:47", "remaining_time": "3:36:54"}
{"current_steps": 2175, "total_steps": 4403, "loss": 0.2018, "lr": 2.3907638980702043e-05, "epoch": 3.4578696343402227, "percentage": 49.4, "elapsed_time": "3:31:23", "remaining_time": "3:36:32"}
{"current_steps": 2180, "total_steps": 4403, "loss": 0.2043, "lr": 2.382984357050151e-05, "epoch": 3.4658187599364068, "percentage": 49.51, "elapsed_time": "3:31:52", "remaining_time": "3:36:02"}
{"current_steps": 2185, "total_steps": 4403, "loss": 0.2044, "lr": 2.3751987961023545e-05, "epoch": 3.4737678855325913, "percentage": 49.63, "elapsed_time": "3:32:20", "remaining_time": "3:35:33"}
{"current_steps": 2190, "total_steps": 4403, "loss": 0.2004, "lr": 2.3674073376039152e-05, "epoch": 3.481717011128776, "percentage": 49.74, "elapsed_time": "3:32:51", "remaining_time": "3:35:05"}
{"current_steps": 2195, "total_steps": 4403, "loss": 0.2211, "lr": 2.359610104024631e-05, "epoch": 3.4896661367249604, "percentage": 49.85, "elapsed_time": "3:33:21", "remaining_time": "3:34:37"}
{"current_steps": 2200, "total_steps": 4403, "loss": 0.211, "lr": 2.3518072179250753e-05, "epoch": 3.4976152623211445, "percentage": 49.97, "elapsed_time": "3:33:54", "remaining_time": "3:34:11"}
{"current_steps": 2205, "total_steps": 4403, "loss": 0.1866, "lr": 2.343998801954673e-05, "epoch": 3.505564387917329, "percentage": 50.08, "elapsed_time": "3:34:20", "remaining_time": "3:33:39"}
{"current_steps": 2210, "total_steps": 4403, "loss": 0.1949, "lr": 2.3361849788497666e-05, "epoch": 3.5135135135135136, "percentage": 50.19, "elapsed_time": "3:34:49", "remaining_time": "3:33:10"}
{"current_steps": 2215, "total_steps": 4403, "loss": 0.1879, "lr": 2.3283658714316935e-05, "epoch": 3.521462639109698, "percentage": 50.31, "elapsed_time": "3:35:14", "remaining_time": "3:32:37"}
{"current_steps": 2220, "total_steps": 4403, "loss": 0.1985, "lr": 2.320541602604851e-05, "epoch": 3.5294117647058822, "percentage": 50.42, "elapsed_time": "3:35:46", "remaining_time": "3:32:10"}
{"current_steps": 2225, "total_steps": 4403, "loss": 0.1935, "lr": 2.3127122953547663e-05, "epoch": 3.537360890302067, "percentage": 50.53, "elapsed_time": "3:36:15", "remaining_time": "3:31:41"}
{"current_steps": 2230, "total_steps": 4403, "loss": 0.1894, "lr": 2.3048780727461627e-05, "epoch": 3.5453100158982513, "percentage": 50.65, "elapsed_time": "3:36:44", "remaining_time": "3:31:11"}
{"current_steps": 2235, "total_steps": 4403, "loss": 0.1824, "lr": 2.2970390579210246e-05, "epoch": 3.5532591414944354, "percentage": 50.76, "elapsed_time": "3:37:11", "remaining_time": "3:30:40"}
{"current_steps": 2240, "total_steps": 4403, "loss": 0.1975, "lr": 2.2891953740966643e-05, "epoch": 3.56120826709062, "percentage": 50.87, "elapsed_time": "3:37:41", "remaining_time": "3:30:12"}
{"current_steps": 2245, "total_steps": 4403, "loss": 0.1802, "lr": 2.281347144563782e-05, "epoch": 3.5691573926868045, "percentage": 50.99, "elapsed_time": "3:38:03", "remaining_time": "3:29:36"}
{"current_steps": 2250, "total_steps": 4403, "loss": 0.2073, "lr": 2.273494492684531e-05, "epoch": 3.5771065182829886, "percentage": 51.1, "elapsed_time": "3:38:34", "remaining_time": "3:29:08"}
{"current_steps": 2255, "total_steps": 4403, "loss": 0.2094, "lr": 2.265637541890577e-05, "epoch": 3.585055643879173, "percentage": 51.22, "elapsed_time": "3:39:01", "remaining_time": "3:28:37"}
{"current_steps": 2260, "total_steps": 4403, "loss": 0.1939, "lr": 2.2577764156811563e-05, "epoch": 3.5930047694753577, "percentage": 51.33, "elapsed_time": "3:39:27", "remaining_time": "3:28:05"}
{"current_steps": 2265, "total_steps": 4403, "loss": 0.2008, "lr": 2.2499112376211373e-05, "epoch": 3.6009538950715423, "percentage": 51.44, "elapsed_time": "3:39:59", "remaining_time": "3:27:39"}
{"current_steps": 2270, "total_steps": 4403, "loss": 0.193, "lr": 2.2420421313390776e-05, "epoch": 3.6089030206677264, "percentage": 51.56, "elapsed_time": "3:40:30", "remaining_time": "3:27:11"}
{"current_steps": 2275, "total_steps": 4403, "loss": 0.2092, "lr": 2.234169220525282e-05, "epoch": 3.616852146263911, "percentage": 51.67, "elapsed_time": "3:40:56", "remaining_time": "3:26:39"}
{"current_steps": 2280, "total_steps": 4403, "loss": 0.1922, "lr": 2.226292628929853e-05, "epoch": 3.6248012718600955, "percentage": 51.78, "elapsed_time": "3:41:28", "remaining_time": "3:26:13"}
{"current_steps": 2285, "total_steps": 4403, "loss": 0.188, "lr": 2.2184124803607525e-05, "epoch": 3.63275039745628, "percentage": 51.9, "elapsed_time": "3:41:56", "remaining_time": "3:25:43"}
{"current_steps": 2290, "total_steps": 4403, "loss": 0.204, "lr": 2.210528898681851e-05, "epoch": 3.640699523052464, "percentage": 52.01, "elapsed_time": "3:42:23", "remaining_time": "3:25:11"}
{"current_steps": 2295, "total_steps": 4403, "loss": 0.1913, "lr": 2.2026420078109825e-05, "epoch": 3.6486486486486487, "percentage": 52.12, "elapsed_time": "3:42:50", "remaining_time": "3:24:40"}
{"current_steps": 2300, "total_steps": 4403, "loss": 0.1971, "lr": 2.1947519317179972e-05, "epoch": 3.6565977742448332, "percentage": 52.24, "elapsed_time": "3:43:18", "remaining_time": "3:24:10"}
{"current_steps": 2305, "total_steps": 4403, "loss": 0.2054, "lr": 2.1868587944228118e-05, "epoch": 3.6645468998410173, "percentage": 52.35, "elapsed_time": "3:43:45", "remaining_time": "3:23:40"}
{"current_steps": 2310, "total_steps": 4403, "loss": 0.1861, "lr": 2.1789627199934588e-05, "epoch": 3.672496025437202, "percentage": 52.46, "elapsed_time": "3:44:19", "remaining_time": "3:23:15"}
{"current_steps": 2315, "total_steps": 4403, "loss": 0.1781, "lr": 2.1710638325441408e-05, "epoch": 3.6804451510333864, "percentage": 52.58, "elapsed_time": "3:44:52", "remaining_time": "3:22:49"}
{"current_steps": 2320, "total_steps": 4403, "loss": 0.205, "lr": 2.1631622562332744e-05, "epoch": 3.6883942766295705, "percentage": 52.69, "elapsed_time": "3:45:22", "remaining_time": "3:22:21"}
{"current_steps": 2325, "total_steps": 4403, "loss": 0.198, "lr": 2.155258115261542e-05, "epoch": 3.696343402225755, "percentage": 52.8, "elapsed_time": "3:45:53", "remaining_time": "3:21:53"}
{"current_steps": 2330, "total_steps": 4403, "loss": 0.1954, "lr": 2.1473515338699383e-05, "epoch": 3.7042925278219396, "percentage": 52.92, "elapsed_time": "3:46:23", "remaining_time": "3:21:24"}
{"current_steps": 2335, "total_steps": 4403, "loss": 0.1994, "lr": 2.1394426363378186e-05, "epoch": 3.7122416534181237, "percentage": 53.03, "elapsed_time": "3:46:58", "remaining_time": "3:21:01"}
{"current_steps": 2340, "total_steps": 4403, "loss": 0.1973, "lr": 2.1315315469809426e-05, "epoch": 3.7201907790143083, "percentage": 53.15, "elapsed_time": "3:47:27", "remaining_time": "3:20:32"}
{"current_steps": 2345, "total_steps": 4403, "loss": 0.2018, "lr": 2.1236183901495236e-05, "epoch": 3.728139904610493, "percentage": 53.26, "elapsed_time": "3:47:50", "remaining_time": "3:19:57"}
{"current_steps": 2350, "total_steps": 4403, "loss": 0.2054, "lr": 2.1157032902262716e-05, "epoch": 3.7360890302066774, "percentage": 53.37, "elapsed_time": "3:48:17", "remaining_time": "3:19:26"}
{"current_steps": 2355, "total_steps": 4403, "loss": 0.1878, "lr": 2.1077863716244388e-05, "epoch": 3.744038155802862, "percentage": 53.49, "elapsed_time": "3:48:46", "remaining_time": "3:18:56"}
{"current_steps": 2360, "total_steps": 4403, "loss": 0.2086, "lr": 2.099867758785866e-05, "epoch": 3.751987281399046, "percentage": 53.6, "elapsed_time": "3:49:18", "remaining_time": "3:18:30"}
{"current_steps": 2365, "total_steps": 4403, "loss": 0.2087, "lr": 2.091947576179023e-05, "epoch": 3.7599364069952306, "percentage": 53.71, "elapsed_time": "3:49:44", "remaining_time": "3:17:58"}
{"current_steps": 2370, "total_steps": 4403, "loss": 0.1952, "lr": 2.084025948297055e-05, "epoch": 3.767885532591415, "percentage": 53.83, "elapsed_time": "3:50:15", "remaining_time": "3:17:31"}
{"current_steps": 2375, "total_steps": 4403, "loss": 0.1858, "lr": 2.0761029996558233e-05, "epoch": 3.7758346581875992, "percentage": 53.94, "elapsed_time": "3:50:40", "remaining_time": "3:16:58"}
{"current_steps": 2380, "total_steps": 4403, "loss": 0.2044, "lr": 2.068178854791951e-05, "epoch": 3.7837837837837838, "percentage": 54.05, "elapsed_time": "3:51:07", "remaining_time": "3:16:27"}
{"current_steps": 2385, "total_steps": 4403, "loss": 0.2019, "lr": 2.0602536382608638e-05, "epoch": 3.7917329093799683, "percentage": 54.17, "elapsed_time": "3:51:33", "remaining_time": "3:15:55"}
{"current_steps": 2390, "total_steps": 4403, "loss": 0.201, "lr": 2.0523274746348315e-05, "epoch": 3.7996820349761524, "percentage": 54.28, "elapsed_time": "3:52:04", "remaining_time": "3:15:28"}
{"current_steps": 2395, "total_steps": 4403, "loss": 0.2044, "lr": 2.0444004885010114e-05, "epoch": 3.807631160572337, "percentage": 54.39, "elapsed_time": "3:52:29", "remaining_time": "3:14:55"}
{"current_steps": 2400, "total_steps": 4403, "loss": 0.2049, "lr": 2.0364728044594897e-05, "epoch": 3.8155802861685215, "percentage": 54.51, "elapsed_time": "3:52:59", "remaining_time": "3:14:27"}
{"current_steps": 2405, "total_steps": 4403, "loss": 0.2048, "lr": 2.0285445471213218e-05, "epoch": 3.8235294117647056, "percentage": 54.62, "elapsed_time": "3:53:24", "remaining_time": "3:13:54"}
{"current_steps": 2410, "total_steps": 4403, "loss": 0.1823, "lr": 2.020615841106575e-05, "epoch": 3.83147853736089, "percentage": 54.74, "elapsed_time": "3:53:54", "remaining_time": "3:13:25"}
{"current_steps": 2415, "total_steps": 4403, "loss": 0.1911, "lr": 2.0126868110423685e-05, "epoch": 3.8394276629570747, "percentage": 54.85, "elapsed_time": "3:54:24", "remaining_time": "3:12:58"}
{"current_steps": 2420, "total_steps": 4403, "loss": 0.1852, "lr": 2.0047575815609166e-05, "epoch": 3.8473767885532593, "percentage": 54.96, "elapsed_time": "3:54:52", "remaining_time": "3:12:27"}
{"current_steps": 2425, "total_steps": 4403, "loss": 0.1792, "lr": 1.996828277297566e-05, "epoch": 3.855325914149444, "percentage": 55.08, "elapsed_time": "3:55:19", "remaining_time": "3:11:56"}
{"current_steps": 2430, "total_steps": 4403, "loss": 0.2009, "lr": 1.988899022888841e-05, "epoch": 3.863275039745628, "percentage": 55.19, "elapsed_time": "3:55:49", "remaining_time": "3:11:28"}
{"current_steps": 2435, "total_steps": 4403, "loss": 0.1927, "lr": 1.98096994297048e-05, "epoch": 3.8712241653418125, "percentage": 55.3, "elapsed_time": "3:56:17", "remaining_time": "3:10:58"}
{"current_steps": 2440, "total_steps": 4403, "loss": 0.1976, "lr": 1.9730411621754798e-05, "epoch": 3.879173290937997, "percentage": 55.42, "elapsed_time": "3:56:43", "remaining_time": "3:10:26"}
{"current_steps": 2445, "total_steps": 4403, "loss": 0.186, "lr": 1.9651128051321376e-05, "epoch": 3.887122416534181, "percentage": 55.53, "elapsed_time": "3:57:10", "remaining_time": "3:09:56"}
{"current_steps": 2450, "total_steps": 4403, "loss": 0.2025, "lr": 1.9571849964620858e-05, "epoch": 3.8950715421303657, "percentage": 55.64, "elapsed_time": "3:57:41", "remaining_time": "3:09:28"}
{"current_steps": 2455, "total_steps": 4403, "loss": 0.2011, "lr": 1.949257860778339e-05, "epoch": 3.90302066772655, "percentage": 55.76, "elapsed_time": "3:58:11", "remaining_time": "3:08:59"}
{"current_steps": 2460, "total_steps": 4403, "loss": 0.2003, "lr": 1.9413315226833343e-05, "epoch": 3.9109697933227343, "percentage": 55.87, "elapsed_time": "3:58:40", "remaining_time": "3:08:30"}
{"current_steps": 2465, "total_steps": 4403, "loss": 0.2058, "lr": 1.9334061067669725e-05, "epoch": 3.918918918918919, "percentage": 55.98, "elapsed_time": "3:59:05", "remaining_time": "3:07:58"}
{"current_steps": 2470, "total_steps": 4403, "loss": 0.1994, "lr": 1.9254817376046556e-05, "epoch": 3.9268680445151034, "percentage": 56.1, "elapsed_time": "3:59:39", "remaining_time": "3:07:33"}
{"current_steps": 2475, "total_steps": 4403, "loss": 0.186, "lr": 1.9175585397553368e-05, "epoch": 3.9348171701112875, "percentage": 56.21, "elapsed_time": "4:00:08", "remaining_time": "3:07:03"}
{"current_steps": 2480, "total_steps": 4403, "loss": 0.1989, "lr": 1.909636637759554e-05, "epoch": 3.942766295707472, "percentage": 56.33, "elapsed_time": "4:00:38", "remaining_time": "3:06:35"}
{"current_steps": 2485, "total_steps": 4403, "loss": 0.1973, "lr": 1.9017161561374787e-05, "epoch": 3.9507154213036566, "percentage": 56.44, "elapsed_time": "4:01:01", "remaining_time": "3:06:01"}
{"current_steps": 2490, "total_steps": 4403, "loss": 0.1918, "lr": 1.893797219386957e-05, "epoch": 3.958664546899841, "percentage": 56.55, "elapsed_time": "4:01:27", "remaining_time": "3:05:30"}
{"current_steps": 2495, "total_steps": 4403, "loss": 0.1838, "lr": 1.885879951981549e-05, "epoch": 3.9666136724960257, "percentage": 56.67, "elapsed_time": "4:01:55", "remaining_time": "3:05:00"}
{"current_steps": 2500, "total_steps": 4403, "loss": 0.1928, "lr": 1.877964478368577e-05, "epoch": 3.97456279809221, "percentage": 56.78, "elapsed_time": "4:02:22", "remaining_time": "3:04:30"}
{"current_steps": 2505, "total_steps": 4403, "loss": 0.193, "lr": 1.8700509229671696e-05, "epoch": 3.9825119236883944, "percentage": 56.89, "elapsed_time": "4:02:52", "remaining_time": "3:04:01"}
{"current_steps": 2510, "total_steps": 4403, "loss": 0.1764, "lr": 1.8621394101663003e-05, "epoch": 3.990461049284579, "percentage": 57.01, "elapsed_time": "4:03:20", "remaining_time": "3:03:31"}
{"current_steps": 2515, "total_steps": 4403, "loss": 0.1949, "lr": 1.854230064322837e-05, "epoch": 3.998410174880763, "percentage": 57.12, "elapsed_time": "4:03:52", "remaining_time": "3:03:04"}
{"current_steps": 2520, "total_steps": 4403, "loss": 0.1865, "lr": 1.8463230097595887e-05, "epoch": 4.006359300476947, "percentage": 57.23, "elapsed_time": "4:04:22", "remaining_time": "3:02:35"}
{"current_steps": 2525, "total_steps": 4403, "loss": 0.2, "lr": 1.8384183707633475e-05, "epoch": 4.014308426073132, "percentage": 57.35, "elapsed_time": "4:04:48", "remaining_time": "3:02:04"}
{"current_steps": 2530, "total_steps": 4403, "loss": 0.1708, "lr": 1.8305162715829348e-05, "epoch": 4.022257551669316, "percentage": 57.46, "elapsed_time": "4:05:18", "remaining_time": "3:01:36"}
{"current_steps": 2535, "total_steps": 4403, "loss": 0.1941, "lr": 1.8226168364272534e-05, "epoch": 4.030206677265501, "percentage": 57.57, "elapsed_time": "4:05:49", "remaining_time": "3:01:08"}
{"current_steps": 2540, "total_steps": 4403, "loss": 0.1877, "lr": 1.8147201894633282e-05, "epoch": 4.038155802861685, "percentage": 57.69, "elapsed_time": "4:06:21", "remaining_time": "3:00:41"}
{"current_steps": 2545, "total_steps": 4403, "loss": 0.1671, "lr": 1.8068264548143605e-05, "epoch": 4.046104928457869, "percentage": 57.8, "elapsed_time": "4:06:46", "remaining_time": "3:00:09"}
{"current_steps": 2550, "total_steps": 4403, "loss": 0.1882, "lr": 1.7989357565577746e-05, "epoch": 4.054054054054054, "percentage": 57.92, "elapsed_time": "4:07:17", "remaining_time": "2:59:42"}
{"current_steps": 2555, "total_steps": 4403, "loss": 0.1781, "lr": 1.7910482187232643e-05, "epoch": 4.0620031796502385, "percentage": 58.03, "elapsed_time": "4:07:45", "remaining_time": "2:59:12"}
{"current_steps": 2560, "total_steps": 4403, "loss": 0.1854, "lr": 1.7831639652908507e-05, "epoch": 4.069952305246423, "percentage": 58.14, "elapsed_time": "4:08:11", "remaining_time": "2:58:41"}
{"current_steps": 2565, "total_steps": 4403, "loss": 0.2041, "lr": 1.775283120188925e-05, "epoch": 4.077901430842608, "percentage": 58.26, "elapsed_time": "4:08:44", "remaining_time": "2:58:14"}
{"current_steps": 2570, "total_steps": 4403, "loss": 0.1718, "lr": 1.7674058072923075e-05, "epoch": 4.085850556438792, "percentage": 58.37, "elapsed_time": "4:09:15", "remaining_time": "2:57:46"}
{"current_steps": 2575, "total_steps": 4403, "loss": 0.1805, "lr": 1.7595321504202977e-05, "epoch": 4.093799682034976, "percentage": 58.48, "elapsed_time": "4:09:42", "remaining_time": "2:57:16"}
{"current_steps": 2580, "total_steps": 4403, "loss": 0.1806, "lr": 1.751662273334725e-05, "epoch": 4.101748807631161, "percentage": 58.6, "elapsed_time": "4:10:11", "remaining_time": "2:56:47"}
{"current_steps": 2585, "total_steps": 4403, "loss": 0.1738, "lr": 1.7437962997380093e-05, "epoch": 4.109697933227345, "percentage": 58.71, "elapsed_time": "4:10:39", "remaining_time": "2:56:17"}
{"current_steps": 2590, "total_steps": 4403, "loss": 0.1658, "lr": 1.7359343532712135e-05, "epoch": 4.117647058823529, "percentage": 58.82, "elapsed_time": "4:11:06", "remaining_time": "2:55:46"}
{"current_steps": 2595, "total_steps": 4403, "loss": 0.1881, "lr": 1.7280765575120992e-05, "epoch": 4.125596184419714, "percentage": 58.94, "elapsed_time": "4:11:33", "remaining_time": "2:55:15"}
{"current_steps": 2600, "total_steps": 4403, "loss": 0.1957, "lr": 1.7202230359731835e-05, "epoch": 4.133545310015898, "percentage": 59.05, "elapsed_time": "4:12:04", "remaining_time": "2:54:48"}
{"current_steps": 2605, "total_steps": 4403, "loss": 0.1847, "lr": 1.7123739120998033e-05, "epoch": 4.141494435612083, "percentage": 59.16, "elapsed_time": "4:12:34", "remaining_time": "2:54:19"}
{"current_steps": 2610, "total_steps": 4403, "loss": 0.183, "lr": 1.7045293092681686e-05, "epoch": 4.149443561208267, "percentage": 59.28, "elapsed_time": "4:13:00", "remaining_time": "2:53:48"}
{"current_steps": 2615, "total_steps": 4403, "loss": 0.1775, "lr": 1.6966893507834242e-05, "epoch": 4.157392686804451, "percentage": 59.39, "elapsed_time": "4:13:24", "remaining_time": "2:53:15"}
{"current_steps": 2620, "total_steps": 4403, "loss": 0.1913, "lr": 1.6888541598777167e-05, "epoch": 4.165341812400636, "percentage": 59.5, "elapsed_time": "4:13:53", "remaining_time": "2:52:47"}
{"current_steps": 2625, "total_steps": 4403, "loss": 0.1895, "lr": 1.68102385970825e-05, "epoch": 4.17329093799682, "percentage": 59.62, "elapsed_time": "4:14:23", "remaining_time": "2:52:18"}
{"current_steps": 2630, "total_steps": 4403, "loss": 0.1608, "lr": 1.6731985733553545e-05, "epoch": 4.1812400635930045, "percentage": 59.73, "elapsed_time": "4:14:53", "remaining_time": "2:51:49"}
{"current_steps": 2635, "total_steps": 4403, "loss": 0.1718, "lr": 1.6653784238205525e-05, "epoch": 4.1891891891891895, "percentage": 59.85, "elapsed_time": "4:15:18", "remaining_time": "2:51:18"}
{"current_steps": 2640, "total_steps": 4403, "loss": 0.1794, "lr": 1.6575635340246203e-05, "epoch": 4.197138314785374, "percentage": 59.96, "elapsed_time": "4:15:44", "remaining_time": "2:50:47"}
{"current_steps": 2645, "total_steps": 4403, "loss": 0.1851, "lr": 1.649754026805662e-05, "epoch": 4.205087440381558, "percentage": 60.07, "elapsed_time": "4:16:12", "remaining_time": "2:50:17"}
{"current_steps": 2650, "total_steps": 4403, "loss": 0.188, "lr": 1.6419500249171737e-05, "epoch": 4.213036565977743, "percentage": 60.19, "elapsed_time": "4:16:39", "remaining_time": "2:49:47"}
{"current_steps": 2655, "total_steps": 4403, "loss": 0.1704, "lr": 1.634151651026118e-05, "epoch": 4.220985691573927, "percentage": 60.3, "elapsed_time": "4:17:07", "remaining_time": "2:49:16"}
{"current_steps": 2660, "total_steps": 4403, "loss": 0.1744, "lr": 1.626359027710993e-05, "epoch": 4.228934817170111, "percentage": 60.41, "elapsed_time": "4:17:30", "remaining_time": "2:48:44"}
{"current_steps": 2665, "total_steps": 4403, "loss": 0.1741, "lr": 1.6185722774599064e-05, "epoch": 4.236883942766296, "percentage": 60.53, "elapsed_time": "4:17:56", "remaining_time": "2:48:13"}
{"current_steps": 2670, "total_steps": 4403, "loss": 0.184, "lr": 1.6107915226686504e-05, "epoch": 4.24483306836248, "percentage": 60.64, "elapsed_time": "4:18:24", "remaining_time": "2:47:43"}
{"current_steps": 2675, "total_steps": 4403, "loss": 0.1833, "lr": 1.603016885638779e-05, "epoch": 4.252782193958664, "percentage": 60.75, "elapsed_time": "4:18:54", "remaining_time": "2:47:14"}
{"current_steps": 2680, "total_steps": 4403, "loss": 0.1692, "lr": 1.5952484885756827e-05, "epoch": 4.260731319554849, "percentage": 60.87, "elapsed_time": "4:19:23", "remaining_time": "2:46:46"}
{"current_steps": 2685, "total_steps": 4403, "loss": 0.1801, "lr": 1.587486453586669e-05, "epoch": 4.268680445151033, "percentage": 60.98, "elapsed_time": "4:19:47", "remaining_time": "2:46:13"}
{"current_steps": 2690, "total_steps": 4403, "loss": 0.1826, "lr": 1.579730902679045e-05, "epoch": 4.276629570747218, "percentage": 61.09, "elapsed_time": "4:20:14", "remaining_time": "2:45:43"}
{"current_steps": 2695, "total_steps": 4403, "loss": 0.1955, "lr": 1.5719819577581982e-05, "epoch": 4.284578696343402, "percentage": 61.21, "elapsed_time": "4:20:45", "remaining_time": "2:45:15"}
{"current_steps": 2700, "total_steps": 4403, "loss": 0.1633, "lr": 1.5642397406256768e-05, "epoch": 4.292527821939586, "percentage": 61.32, "elapsed_time": "4:21:14", "remaining_time": "2:44:46"}
{"current_steps": 2705, "total_steps": 4403, "loss": 0.1641, "lr": 1.556504372977283e-05, "epoch": 4.300476947535771, "percentage": 61.44, "elapsed_time": "4:21:44", "remaining_time": "2:44:18"}
{"current_steps": 2710, "total_steps": 4403, "loss": 0.1715, "lr": 1.548775976401152e-05, "epoch": 4.3084260731319555, "percentage": 61.55, "elapsed_time": "4:22:16", "remaining_time": "2:43:50"}
{"current_steps": 2715, "total_steps": 4403, "loss": 0.1666, "lr": 1.5410546723758452e-05, "epoch": 4.31637519872814, "percentage": 61.66, "elapsed_time": "4:22:40", "remaining_time": "2:43:19"}
{"current_steps": 2720, "total_steps": 4403, "loss": 0.1818, "lr": 1.5333405822684428e-05, "epoch": 4.324324324324325, "percentage": 61.78, "elapsed_time": "4:23:12", "remaining_time": "2:42:51"}
{"current_steps": 2725, "total_steps": 4403, "loss": 0.1756, "lr": 1.5256338273326293e-05, "epoch": 4.332273449920509, "percentage": 61.89, "elapsed_time": "4:23:36", "remaining_time": "2:42:19"}
{"current_steps": 2730, "total_steps": 4403, "loss": 0.1808, "lr": 1.5179345287067935e-05, "epoch": 4.340222575516693, "percentage": 62.0, "elapsed_time": "4:24:04", "remaining_time": "2:41:49"}
{"current_steps": 2735, "total_steps": 4403, "loss": 0.1804, "lr": 1.5102428074121222e-05, "epoch": 4.348171701112878, "percentage": 62.12, "elapsed_time": "4:24:31", "remaining_time": "2:41:19"}
{"current_steps": 2740, "total_steps": 4403, "loss": 0.1815, "lr": 1.5025587843506986e-05, "epoch": 4.356120826709062, "percentage": 62.23, "elapsed_time": "4:25:01", "remaining_time": "2:40:51"}
{"current_steps": 2745, "total_steps": 4403, "loss": 0.1939, "lr": 1.4948825803035996e-05, "epoch": 4.364069952305247, "percentage": 62.34, "elapsed_time": "4:25:34", "remaining_time": "2:40:24"}
{"current_steps": 2750, "total_steps": 4403, "loss": 0.1603, "lr": 1.4872143159290016e-05, "epoch": 4.372019077901431, "percentage": 62.46, "elapsed_time": "4:26:04", "remaining_time": "2:39:56"}
{"current_steps": 2755, "total_steps": 4403, "loss": 0.1684, "lr": 1.4795541117602808e-05, "epoch": 4.379968203497615, "percentage": 62.57, "elapsed_time": "4:26:34", "remaining_time": "2:39:27"}
{"current_steps": 2760, "total_steps": 4403, "loss": 0.1957, "lr": 1.4719020882041175e-05, "epoch": 4.3879173290938, "percentage": 62.68, "elapsed_time": "4:27:03", "remaining_time": "2:38:58"}
{"current_steps": 2765, "total_steps": 4403, "loss": 0.1722, "lr": 1.4642583655386084e-05, "epoch": 4.395866454689984, "percentage": 62.8, "elapsed_time": "4:27:33", "remaining_time": "2:38:30"}
{"current_steps": 2770, "total_steps": 4403, "loss": 0.1792, "lr": 1.4566230639113696e-05, "epoch": 4.403815580286168, "percentage": 62.91, "elapsed_time": "4:28:02", "remaining_time": "2:38:01"}
{"current_steps": 2775, "total_steps": 4403, "loss": 0.1797, "lr": 1.448996303337654e-05, "epoch": 4.411764705882353, "percentage": 63.03, "elapsed_time": "4:28:34", "remaining_time": "2:37:33"}
{"current_steps": 2780, "total_steps": 4403, "loss": 0.176, "lr": 1.4413782036984616e-05, "epoch": 4.419713831478537, "percentage": 63.14, "elapsed_time": "4:29:01", "remaining_time": "2:37:03"}
{"current_steps": 2785, "total_steps": 4403, "loss": 0.1786, "lr": 1.4337688847386542e-05, "epoch": 4.4276629570747215, "percentage": 63.25, "elapsed_time": "4:29:26", "remaining_time": "2:36:32"}
{"current_steps": 2790, "total_steps": 4403, "loss": 0.1866, "lr": 1.426168466065077e-05, "epoch": 4.4356120826709065, "percentage": 63.37, "elapsed_time": "4:29:54", "remaining_time": "2:36:02"}
{"current_steps": 2795, "total_steps": 4403, "loss": 0.185, "lr": 1.4185770671446743e-05, "epoch": 4.443561208267091, "percentage": 63.48, "elapsed_time": "4:30:27", "remaining_time": "2:35:35"}
{"current_steps": 2800, "total_steps": 4403, "loss": 0.1739, "lr": 1.4109948073026153e-05, "epoch": 4.451510333863275, "percentage": 63.59, "elapsed_time": "4:30:58", "remaining_time": "2:35:07"}
{"current_steps": 2805, "total_steps": 4403, "loss": 0.1887, "lr": 1.4034218057204165e-05, "epoch": 4.45945945945946, "percentage": 63.71, "elapsed_time": "4:31:30", "remaining_time": "2:34:40"}
{"current_steps": 2810, "total_steps": 4403, "loss": 0.1879, "lr": 1.3958581814340679e-05, "epoch": 4.467408585055644, "percentage": 63.82, "elapsed_time": "4:32:02", "remaining_time": "2:34:13"}
{"current_steps": 2815, "total_steps": 4403, "loss": 0.2001, "lr": 1.3883040533321637e-05, "epoch": 4.475357710651828, "percentage": 63.93, "elapsed_time": "4:32:27", "remaining_time": "2:33:41"}
{"current_steps": 2820, "total_steps": 4403, "loss": 0.2001, "lr": 1.3807595401540322e-05, "epoch": 4.483306836248013, "percentage": 64.05, "elapsed_time": "4:32:57", "remaining_time": "2:33:13"}
{"current_steps": 2825, "total_steps": 4403, "loss": 0.1734, "lr": 1.3732247604878697e-05, "epoch": 4.491255961844197, "percentage": 64.16, "elapsed_time": "4:33:24", "remaining_time": "2:32:43"}
{"current_steps": 2830, "total_steps": 4403, "loss": 0.1799, "lr": 1.3656998327688764e-05, "epoch": 4.499205087440382, "percentage": 64.27, "elapsed_time": "4:33:54", "remaining_time": "2:32:14"}
{"current_steps": 2835, "total_steps": 4403, "loss": 0.1982, "lr": 1.3581848752773961e-05, "epoch": 4.507154213036566, "percentage": 64.39, "elapsed_time": "4:34:18", "remaining_time": "2:31:42"}
{"current_steps": 2840, "total_steps": 4403, "loss": 0.1748, "lr": 1.3506800061370555e-05, "epoch": 4.51510333863275, "percentage": 64.5, "elapsed_time": "4:34:49", "remaining_time": "2:31:15"}
{"current_steps": 2845, "total_steps": 4403, "loss": 0.1843, "lr": 1.3431853433129058e-05, "epoch": 4.523052464228935, "percentage": 64.62, "elapsed_time": "4:35:18", "remaining_time": "2:30:46"}
{"current_steps": 2850, "total_steps": 4403, "loss": 0.1697, "lr": 1.3357010046095741e-05, "epoch": 4.531001589825119, "percentage": 64.73, "elapsed_time": "4:35:47", "remaining_time": "2:30:16"}
{"current_steps": 2855, "total_steps": 4403, "loss": 0.1639, "lr": 1.3282271076694052e-05, "epoch": 4.538950715421303, "percentage": 64.84, "elapsed_time": "4:36:16", "remaining_time": "2:29:47"}
{"current_steps": 2860, "total_steps": 4403, "loss": 0.1703, "lr": 1.3207637699706162e-05, "epoch": 4.546899841017488, "percentage": 64.96, "elapsed_time": "4:36:44", "remaining_time": "2:29:18"}
{"current_steps": 2865, "total_steps": 4403, "loss": 0.2036, "lr": 1.3133111088254507e-05, "epoch": 4.5548489666136724, "percentage": 65.07, "elapsed_time": "4:37:17", "remaining_time": "2:28:51"}
{"current_steps": 2870, "total_steps": 4403, "loss": 0.1818, "lr": 1.3058692413783307e-05, "epoch": 4.5627980922098565, "percentage": 65.18, "elapsed_time": "4:37:48", "remaining_time": "2:28:23"}
{"current_steps": 2875, "total_steps": 4403, "loss": 0.1654, "lr": 1.2984382846040187e-05, "epoch": 4.5707472178060415, "percentage": 65.3, "elapsed_time": "4:38:15", "remaining_time": "2:27:53"}
{"current_steps": 2880, "total_steps": 4403, "loss": 0.1779, "lr": 1.2910183553057788e-05, "epoch": 4.578696343402226, "percentage": 65.41, "elapsed_time": "4:38:38", "remaining_time": "2:27:21"}
{"current_steps": 2885, "total_steps": 4403, "loss": 0.1915, "lr": 1.2836095701135398e-05, "epoch": 4.586645468998411, "percentage": 65.52, "elapsed_time": "4:39:08", "remaining_time": "2:26:52"}
{"current_steps": 2890, "total_steps": 4403, "loss": 0.1783, "lr": 1.2762120454820628e-05, "epoch": 4.594594594594595, "percentage": 65.64, "elapsed_time": "4:39:39", "remaining_time": "2:26:24"}
{"current_steps": 2895, "total_steps": 4403, "loss": 0.1863, "lr": 1.268825897689108e-05, "epoch": 4.602543720190779, "percentage": 65.75, "elapsed_time": "4:40:05", "remaining_time": "2:25:53"}
{"current_steps": 2900, "total_steps": 4403, "loss": 0.1929, "lr": 1.2614512428336105e-05, "epoch": 4.610492845786963, "percentage": 65.86, "elapsed_time": "4:40:37", "remaining_time": "2:25:26"}
{"current_steps": 2905, "total_steps": 4403, "loss": 0.2023, "lr": 1.254088196833855e-05, "epoch": 4.618441971383148, "percentage": 65.98, "elapsed_time": "4:41:07", "remaining_time": "2:24:57"}
{"current_steps": 2910, "total_steps": 4403, "loss": 0.1787, "lr": 1.2467368754256513e-05, "epoch": 4.626391096979332, "percentage": 66.09, "elapsed_time": "4:41:37", "remaining_time": "2:24:29"}
{"current_steps": 2915, "total_steps": 4403, "loss": 0.1756, "lr": 1.2393973941605161e-05, "epoch": 4.634340222575517, "percentage": 66.2, "elapsed_time": "4:42:09", "remaining_time": "2:24:01"}
{"current_steps": 2920, "total_steps": 4403, "loss": 0.1737, "lr": 1.2320698684038599e-05, "epoch": 4.642289348171701, "percentage": 66.32, "elapsed_time": "4:42:35", "remaining_time": "2:23:31"}
{"current_steps": 2925, "total_steps": 4403, "loss": 0.1723, "lr": 1.2247544133331681e-05, "epoch": 4.650238473767885, "percentage": 66.43, "elapsed_time": "4:43:06", "remaining_time": "2:23:03"}
{"current_steps": 2930, "total_steps": 4403, "loss": 0.1725, "lr": 1.2174511439361943e-05, "epoch": 4.65818759936407, "percentage": 66.55, "elapsed_time": "4:43:32", "remaining_time": "2:22:32"}
{"current_steps": 2935, "total_steps": 4403, "loss": 0.1784, "lr": 1.2101601750091528e-05, "epoch": 4.666136724960254, "percentage": 66.66, "elapsed_time": "4:43:59", "remaining_time": "2:22:02"}
{"current_steps": 2940, "total_steps": 4403, "loss": 0.1837, "lr": 1.2028816211549117e-05, "epoch": 4.674085850556438, "percentage": 66.77, "elapsed_time": "4:44:29", "remaining_time": "2:21:34"}
{"current_steps": 2945, "total_steps": 4403, "loss": 0.1741, "lr": 1.195615596781194e-05, "epoch": 4.682034976152623, "percentage": 66.89, "elapsed_time": "4:44:54", "remaining_time": "2:21:03"}
{"current_steps": 2950, "total_steps": 4403, "loss": 0.1847, "lr": 1.18836221609878e-05, "epoch": 4.6899841017488075, "percentage": 67.0, "elapsed_time": "4:45:27", "remaining_time": "2:20:35"}
{"current_steps": 2955, "total_steps": 4403, "loss": 0.1791, "lr": 1.1811215931197084e-05, "epoch": 4.697933227344992, "percentage": 67.11, "elapsed_time": "4:45:54", "remaining_time": "2:20:06"}
{"current_steps": 2960, "total_steps": 4403, "loss": 0.1775, "lr": 1.1738938416554857e-05, "epoch": 4.705882352941177, "percentage": 67.23, "elapsed_time": "4:46:23", "remaining_time": "2:19:36"}
{"current_steps": 2965, "total_steps": 4403, "loss": 0.1807, "lr": 1.1666790753153009e-05, "epoch": 4.713831478537361, "percentage": 67.34, "elapsed_time": "4:46:50", "remaining_time": "2:19:07"}
{"current_steps": 2970, "total_steps": 4403, "loss": 0.1694, "lr": 1.1594774075042345e-05, "epoch": 4.721780604133546, "percentage": 67.45, "elapsed_time": "4:47:22", "remaining_time": "2:18:39"}
{"current_steps": 2975, "total_steps": 4403, "loss": 0.1855, "lr": 1.152288951421478e-05, "epoch": 4.72972972972973, "percentage": 67.57, "elapsed_time": "4:47:53", "remaining_time": "2:18:11"}
{"current_steps": 2980, "total_steps": 4403, "loss": 0.1671, "lr": 1.1451138200585567e-05, "epoch": 4.737678855325914, "percentage": 67.68, "elapsed_time": "4:48:24", "remaining_time": "2:17:43"}
{"current_steps": 2985, "total_steps": 4403, "loss": 0.1905, "lr": 1.13795212619755e-05, "epoch": 4.745627980922099, "percentage": 67.79, "elapsed_time": "4:48:52", "remaining_time": "2:17:13"}
{"current_steps": 2990, "total_steps": 4403, "loss": 0.1832, "lr": 1.1308039824093197e-05, "epoch": 4.753577106518283, "percentage": 67.91, "elapsed_time": "4:49:21", "remaining_time": "2:16:44"}
{"current_steps": 2995, "total_steps": 4403, "loss": 0.189, "lr": 1.1236695010517434e-05, "epoch": 4.761526232114467, "percentage": 68.02, "elapsed_time": "4:49:46", "remaining_time": "2:16:13"}
{"current_steps": 3000, "total_steps": 4403, "loss": 0.1912, "lr": 1.116548794267945e-05, "epoch": 4.769475357710652, "percentage": 68.14, "elapsed_time": "4:50:12", "remaining_time": "2:15:43"}
{"current_steps": 3005, "total_steps": 4403, "loss": 0.1789, "lr": 1.109441973984534e-05, "epoch": 4.777424483306836, "percentage": 68.25, "elapsed_time": "4:50:53", "remaining_time": "2:15:19"}
{"current_steps": 3010, "total_steps": 4403, "loss": 0.1722, "lr": 1.1023491519098439e-05, "epoch": 4.78537360890302, "percentage": 68.36, "elapsed_time": "4:51:19", "remaining_time": "2:14:49"}
{"current_steps": 3015, "total_steps": 4403, "loss": 0.1787, "lr": 1.0952704395321781e-05, "epoch": 4.793322734499205, "percentage": 68.48, "elapsed_time": "4:51:45", "remaining_time": "2:14:18"}
{"current_steps": 3020, "total_steps": 4403, "loss": 0.1722, "lr": 1.0882059481180588e-05, "epoch": 4.801271860095389, "percentage": 68.59, "elapsed_time": "4:52:14", "remaining_time": "2:13:49"}
{"current_steps": 3025, "total_steps": 4403, "loss": 0.1688, "lr": 1.0811557887104747e-05, "epoch": 4.809220985691574, "percentage": 68.7, "elapsed_time": "4:52:42", "remaining_time": "2:13:20"}
{"current_steps": 3030, "total_steps": 4403, "loss": 0.1863, "lr": 1.074120072127137e-05, "epoch": 4.8171701112877585, "percentage": 68.82, "elapsed_time": "4:53:10", "remaining_time": "2:12:50"}
{"current_steps": 3035, "total_steps": 4403, "loss": 0.1833, "lr": 1.0670989089587395e-05, "epoch": 4.825119236883943, "percentage": 68.93, "elapsed_time": "4:53:38", "remaining_time": "2:12:21"}
{"current_steps": 3040, "total_steps": 4403, "loss": 0.1767, "lr": 1.0600924095672184e-05, "epoch": 4.833068362480127, "percentage": 69.04, "elapsed_time": "4:54:09", "remaining_time": "2:11:53"}
{"current_steps": 3045, "total_steps": 4403, "loss": 0.1862, "lr": 1.0531006840840162e-05, "epoch": 4.841017488076312, "percentage": 69.16, "elapsed_time": "4:54:38", "remaining_time": "2:11:24"}
{"current_steps": 3050, "total_steps": 4403, "loss": 0.1665, "lr": 1.046123842408354e-05, "epoch": 4.848966613672496, "percentage": 69.27, "elapsed_time": "4:55:05", "remaining_time": "2:10:54"}
{"current_steps": 3055, "total_steps": 4403, "loss": 0.1743, "lr": 1.0391619942055007e-05, "epoch": 4.856915739268681, "percentage": 69.38, "elapsed_time": "4:55:36", "remaining_time": "2:10:25"}
{"current_steps": 3060, "total_steps": 4403, "loss": 0.1888, "lr": 1.0322152489050508e-05, "epoch": 4.864864864864865, "percentage": 69.5, "elapsed_time": "4:56:00", "remaining_time": "2:09:55"}
{"current_steps": 3065, "total_steps": 4403, "loss": 0.1742, "lr": 1.0252837156992065e-05, "epoch": 4.872813990461049, "percentage": 69.61, "elapsed_time": "4:56:31", "remaining_time": "2:09:26"}
{"current_steps": 3070, "total_steps": 4403, "loss": 0.1817, "lr": 1.018367503541057e-05, "epoch": 4.880763116057234, "percentage": 69.73, "elapsed_time": "4:57:01", "remaining_time": "2:08:58"}
{"current_steps": 3075, "total_steps": 4403, "loss": 0.1947, "lr": 1.0114667211428675e-05, "epoch": 4.888712241653418, "percentage": 69.84, "elapsed_time": "4:57:33", "remaining_time": "2:08:30"}
{"current_steps": 3080, "total_steps": 4403, "loss": 0.169, "lr": 1.0045814769743731e-05, "epoch": 4.896661367249602, "percentage": 69.95, "elapsed_time": "4:58:06", "remaining_time": "2:08:03"}
{"current_steps": 3085, "total_steps": 4403, "loss": 0.1716, "lr": 9.977118792610719e-06, "epoch": 4.904610492845787, "percentage": 70.07, "elapsed_time": "4:58:38", "remaining_time": "2:07:35"}
{"current_steps": 3090, "total_steps": 4403, "loss": 0.1896, "lr": 9.908580359825204e-06, "epoch": 4.912559618441971, "percentage": 70.18, "elapsed_time": "4:59:03", "remaining_time": "2:07:04"}
{"current_steps": 3095, "total_steps": 4403, "loss": 0.1775, "lr": 9.840200548706435e-06, "epoch": 4.920508744038155, "percentage": 70.29, "elapsed_time": "4:59:27", "remaining_time": "2:06:33"}
{"current_steps": 3100, "total_steps": 4403, "loss": 0.1778, "lr": 9.771980434080348e-06, "epoch": 4.92845786963434, "percentage": 70.41, "elapsed_time": "4:59:54", "remaining_time": "2:06:03"}
{"current_steps": 3105, "total_steps": 4403, "loss": 0.1822, "lr": 9.70392108826269e-06, "epoch": 4.9364069952305245, "percentage": 70.52, "elapsed_time": "5:00:24", "remaining_time": "2:05:34"}
{"current_steps": 3110, "total_steps": 4403, "loss": 0.1709, "lr": 9.636023581042191e-06, "epoch": 4.9443561208267095, "percentage": 70.63, "elapsed_time": "5:00:53", "remaining_time": "2:05:05"}
{"current_steps": 3115, "total_steps": 4403, "loss": 0.1994, "lr": 9.5682889796637e-06, "epoch": 4.952305246422894, "percentage": 70.75, "elapsed_time": "5:01:24", "remaining_time": "2:04:37"}
{"current_steps": 3120, "total_steps": 4403, "loss": 0.1806, "lr": 9.500718348811457e-06, "epoch": 4.960254372019078, "percentage": 70.86, "elapsed_time": "5:01:55", "remaining_time": "2:04:09"}
{"current_steps": 3125, "total_steps": 4403, "loss": 0.1919, "lr": 9.433312750592337e-06, "epoch": 4.968203497615263, "percentage": 70.97, "elapsed_time": "5:02:22", "remaining_time": "2:03:39"}
{"current_steps": 3130, "total_steps": 4403, "loss": 0.1734, "lr": 9.366073244519124e-06, "epoch": 4.976152623211447, "percentage": 71.09, "elapsed_time": "5:02:51", "remaining_time": "2:03:10"}
{"current_steps": 3135, "total_steps": 4403, "loss": 0.1722, "lr": 9.299000887493934e-06, "epoch": 4.984101748807631, "percentage": 71.2, "elapsed_time": "5:03:14", "remaining_time": "2:02:39"}
{"current_steps": 3140, "total_steps": 4403, "loss": 0.1794, "lr": 9.232096733791518e-06, "epoch": 4.992050874403816, "percentage": 71.32, "elapsed_time": "5:03:45", "remaining_time": "2:02:10"}
{"current_steps": 3145, "total_steps": 4403, "loss": 0.1693, "lr": 9.165361835042734e-06, "epoch": 5.0, "percentage": 71.43, "elapsed_time": "5:04:11", "remaining_time": "2:01:40"}
{"current_steps": 3150, "total_steps": 4403, "loss": 0.1648, "lr": 9.098797240218036e-06, "epoch": 5.007949125596184, "percentage": 71.54, "elapsed_time": "5:04:43", "remaining_time": "2:01:12"}
{"current_steps": 3155, "total_steps": 4403, "loss": 0.1519, "lr": 9.032403995610937e-06, "epoch": 5.015898251192369, "percentage": 71.66, "elapsed_time": "5:05:16", "remaining_time": "2:00:45"}
{"current_steps": 3160, "total_steps": 4403, "loss": 0.1665, "lr": 8.966183144821583e-06, "epoch": 5.023847376788553, "percentage": 71.77, "elapsed_time": "5:05:40", "remaining_time": "2:00:14"}
{"current_steps": 3165, "total_steps": 4403, "loss": 0.1689, "lr": 8.900135728740373e-06, "epoch": 5.031796502384737, "percentage": 71.88, "elapsed_time": "5:06:11", "remaining_time": "1:59:46"}
{"current_steps": 3170, "total_steps": 4403, "loss": 0.159, "lr": 8.83426278553158e-06, "epoch": 5.039745627980922, "percentage": 72.0, "elapsed_time": "5:06:42", "remaining_time": "1:59:17"}
{"current_steps": 3175, "total_steps": 4403, "loss": 0.1687, "lr": 8.768565350616998e-06, "epoch": 5.047694753577106, "percentage": 72.11, "elapsed_time": "5:07:12", "remaining_time": "1:58:49"}
{"current_steps": 3180, "total_steps": 4403, "loss": 0.1677, "lr": 8.703044456659741e-06, "epoch": 5.0556438791732905, "percentage": 72.22, "elapsed_time": "5:07:41", "remaining_time": "1:58:19"}
{"current_steps": 3185, "total_steps": 4403, "loss": 0.1551, "lr": 8.63770113354794e-06, "epoch": 5.0635930047694755, "percentage": 72.34, "elapsed_time": "5:08:09", "remaining_time": "1:57:50"}
{"current_steps": 3190, "total_steps": 4403, "loss": 0.1675, "lr": 8.572536408378587e-06, "epoch": 5.07154213036566, "percentage": 72.45, "elapsed_time": "5:08:39", "remaining_time": "1:57:21"}
{"current_steps": 3195, "total_steps": 4403, "loss": 0.1632, "lr": 8.507551305441408e-06, "epoch": 5.079491255961845, "percentage": 72.56, "elapsed_time": "5:09:06", "remaining_time": "1:56:52"}
{"current_steps": 3200, "total_steps": 4403, "loss": 0.1796, "lr": 8.442746846202711e-06, "epoch": 5.087440381558029, "percentage": 72.68, "elapsed_time": "5:09:33", "remaining_time": "1:56:22"}
{"current_steps": 3205, "total_steps": 4403, "loss": 0.1771, "lr": 8.378124049289394e-06, "epoch": 5.095389507154213, "percentage": 72.79, "elapsed_time": "5:10:02", "remaining_time": "1:55:53"}
{"current_steps": 3210, "total_steps": 4403, "loss": 0.1653, "lr": 8.313683930472889e-06, "epoch": 5.103338632750398, "percentage": 72.9, "elapsed_time": "5:10:34", "remaining_time": "1:55:25"}
{"current_steps": 3215, "total_steps": 4403, "loss": 0.1585, "lr": 8.249427502653198e-06, "epoch": 5.111287758346582, "percentage": 73.02, "elapsed_time": "5:10:58", "remaining_time": "1:54:54"}
{"current_steps": 3220, "total_steps": 4403, "loss": 0.1863, "lr": 8.185355775842982e-06, "epoch": 5.119236883942766, "percentage": 73.13, "elapsed_time": "5:11:32", "remaining_time": "1:54:27"}
{"current_steps": 3225, "total_steps": 4403, "loss": 0.1484, "lr": 8.12146975715171e-06, "epoch": 5.127186009538951, "percentage": 73.25, "elapsed_time": "5:12:00", "remaining_time": "1:53:58"}
{"current_steps": 3230, "total_steps": 4403, "loss": 0.1631, "lr": 8.057770450769771e-06, "epoch": 5.135135135135135, "percentage": 73.36, "elapsed_time": "5:12:34", "remaining_time": "1:53:30"}
{"current_steps": 3235, "total_steps": 4403, "loss": 0.1631, "lr": 7.994258857952748e-06, "epoch": 5.143084260731319, "percentage": 73.47, "elapsed_time": "5:13:04", "remaining_time": "1:53:02"}
{"current_steps": 3240, "total_steps": 4403, "loss": 0.1627, "lr": 7.93093597700564e-06, "epoch": 5.151033386327504, "percentage": 73.59, "elapsed_time": "5:13:34", "remaining_time": "1:52:33"}
{"current_steps": 3245, "total_steps": 4403, "loss": 0.1687, "lr": 7.867802803267182e-06, "epoch": 5.158982511923688, "percentage": 73.7, "elapsed_time": "5:14:00", "remaining_time": "1:52:03"}
{"current_steps": 3250, "total_steps": 4403, "loss": 0.1647, "lr": 7.80486032909421e-06, "epoch": 5.166931637519872, "percentage": 73.81, "elapsed_time": "5:14:25", "remaining_time": "1:51:33"}
{"current_steps": 3255, "total_steps": 4403, "loss": 0.1625, "lr": 7.742109543846063e-06, "epoch": 5.174880763116057, "percentage": 73.93, "elapsed_time": "5:14:55", "remaining_time": "1:51:04"}
{"current_steps": 3260, "total_steps": 4403, "loss": 0.1879, "lr": 7.679551433869001e-06, "epoch": 5.1828298887122415, "percentage": 74.04, "elapsed_time": "5:15:22", "remaining_time": "1:50:34"}
{"current_steps": 3265, "total_steps": 4403, "loss": 0.1876, "lr": 7.617186982480749e-06, "epoch": 5.1907790143084265, "percentage": 74.15, "elapsed_time": "5:15:47", "remaining_time": "1:50:04"}
{"current_steps": 3270, "total_steps": 4403, "loss": 0.1612, "lr": 7.5550171699549945e-06, "epoch": 5.198728139904611, "percentage": 74.27, "elapsed_time": "5:16:12", "remaining_time": "1:49:33"}
{"current_steps": 3275, "total_steps": 4403, "loss": 0.1724, "lr": 7.493042973506e-06, "epoch": 5.206677265500795, "percentage": 74.38, "elapsed_time": "5:16:41", "remaining_time": "1:49:04"}
{"current_steps": 3280, "total_steps": 4403, "loss": 0.16, "lr": 7.431265367273268e-06, "epoch": 5.21462639109698, "percentage": 74.49, "elapsed_time": "5:17:11", "remaining_time": "1:48:36"}
{"current_steps": 3285, "total_steps": 4403, "loss": 0.1823, "lr": 7.36968532230617e-06, "epoch": 5.222575516693164, "percentage": 74.61, "elapsed_time": "5:17:47", "remaining_time": "1:48:09"}
{"current_steps": 3290, "total_steps": 4403, "loss": 0.1914, "lr": 7.308303806548742e-06, "epoch": 5.230524642289348, "percentage": 74.72, "elapsed_time": "5:18:13", "remaining_time": "1:47:39"}
{"current_steps": 3295, "total_steps": 4403, "loss": 0.1761, "lr": 7.247121784824445e-06, "epoch": 5.238473767885533, "percentage": 74.84, "elapsed_time": "5:18:42", "remaining_time": "1:47:10"}
{"current_steps": 3300, "total_steps": 4403, "loss": 0.1649, "lr": 7.186140218820979e-06, "epoch": 5.246422893481717, "percentage": 74.95, "elapsed_time": "5:19:11", "remaining_time": "1:46:41"}
{"current_steps": 3305, "total_steps": 4403, "loss": 0.1624, "lr": 7.125360067075196e-06, "epoch": 5.254372019077901, "percentage": 75.06, "elapsed_time": "5:19:38", "remaining_time": "1:46:11"}
{"current_steps": 3310, "total_steps": 4403, "loss": 0.1737, "lr": 7.0647822849580385e-06, "epoch": 5.262321144674086, "percentage": 75.18, "elapsed_time": "5:20:07", "remaining_time": "1:45:42"}
{"current_steps": 3315, "total_steps": 4403, "loss": 0.1684, "lr": 7.004407824659491e-06, "epoch": 5.27027027027027, "percentage": 75.29, "elapsed_time": "5:20:35", "remaining_time": "1:45:13"}
{"current_steps": 3320, "total_steps": 4403, "loss": 0.1754, "lr": 6.944237635173627e-06, "epoch": 5.278219395866454, "percentage": 75.4, "elapsed_time": "5:21:02", "remaining_time": "1:44:43"}
{"current_steps": 3325, "total_steps": 4403, "loss": 0.1731, "lr": 6.88427266228372e-06, "epoch": 5.286168521462639, "percentage": 75.52, "elapsed_time": "5:21:28", "remaining_time": "1:44:13"}
{"current_steps": 3330, "total_steps": 4403, "loss": 0.1745, "lr": 6.824513848547323e-06, "epoch": 5.294117647058823, "percentage": 75.63, "elapsed_time": "5:21:53", "remaining_time": "1:43:43"}
{"current_steps": 3335, "total_steps": 4403, "loss": 0.1893, "lr": 6.764962133281503e-06, "epoch": 5.302066772655008, "percentage": 75.74, "elapsed_time": "5:22:26", "remaining_time": "1:43:15"}
{"current_steps": 3340, "total_steps": 4403, "loss": 0.183, "lr": 6.705618452548057e-06, "epoch": 5.3100158982511925, "percentage": 75.86, "elapsed_time": "5:22:57", "remaining_time": "1:42:47"}
{"current_steps": 3345, "total_steps": 4403, "loss": 0.1724, "lr": 6.646483739138778e-06, "epoch": 5.317965023847377, "percentage": 75.97, "elapsed_time": "5:23:24", "remaining_time": "1:42:17"}
{"current_steps": 3350, "total_steps": 4403, "loss": 0.1632, "lr": 6.5875589225608376e-06, "epoch": 5.325914149443562, "percentage": 76.08, "elapsed_time": "5:23:48", "remaining_time": "1:41:46"}
{"current_steps": 3355, "total_steps": 4403, "loss": 0.1831, "lr": 6.528844929022134e-06, "epoch": 5.333863275039746, "percentage": 76.2, "elapsed_time": "5:24:17", "remaining_time": "1:41:17"}
{"current_steps": 3360, "total_steps": 4403, "loss": 0.1659, "lr": 6.4703426814167434e-06, "epoch": 5.34181240063593, "percentage": 76.31, "elapsed_time": "5:24:46", "remaining_time": "1:40:48"}
{"current_steps": 3365, "total_steps": 4403, "loss": 0.1866, "lr": 6.412053099310449e-06, "epoch": 5.349761526232115, "percentage": 76.43, "elapsed_time": "5:25:17", "remaining_time": "1:40:20"}
{"current_steps": 3370, "total_steps": 4403, "loss": 0.1661, "lr": 6.353977098926225e-06, "epoch": 5.357710651828299, "percentage": 76.54, "elapsed_time": "5:25:39", "remaining_time": "1:39:49"}
{"current_steps": 3375, "total_steps": 4403, "loss": 0.1746, "lr": 6.296115593129888e-06, "epoch": 5.365659777424483, "percentage": 76.65, "elapsed_time": "5:26:09", "remaining_time": "1:39:20"}
{"current_steps": 3380, "total_steps": 4403, "loss": 0.155, "lr": 6.238469491415728e-06, "epoch": 5.373608903020668, "percentage": 76.77, "elapsed_time": "5:26:39", "remaining_time": "1:38:51"}
{"current_steps": 3385, "total_steps": 4403, "loss": 0.1474, "lr": 6.181039699892206e-06, "epoch": 5.381558028616852, "percentage": 76.88, "elapsed_time": "5:27:08", "remaining_time": "1:38:23"}
{"current_steps": 3390, "total_steps": 4403, "loss": 0.154, "lr": 6.123827121267709e-06, "epoch": 5.389507154213036, "percentage": 76.99, "elapsed_time": "5:27:38", "remaining_time": "1:37:54"}
{"current_steps": 3395, "total_steps": 4403, "loss": 0.186, "lr": 6.066832654836396e-06, "epoch": 5.397456279809221, "percentage": 77.11, "elapsed_time": "5:28:05", "remaining_time": "1:37:24"}
{"current_steps": 3400, "total_steps": 4403, "loss": 0.1651, "lr": 6.010057196464012e-06, "epoch": 5.405405405405405, "percentage": 77.22, "elapsed_time": "5:28:31", "remaining_time": "1:36:55"}
{"current_steps": 3405, "total_steps": 4403, "loss": 0.1721, "lr": 5.9535016385738335e-06, "epoch": 5.413354531001589, "percentage": 77.33, "elapsed_time": "5:28:56", "remaining_time": "1:36:24"}
{"current_steps": 3410, "total_steps": 4403, "loss": 0.1687, "lr": 5.897166870132658e-06, "epoch": 5.421303656597774, "percentage": 77.45, "elapsed_time": "5:29:28", "remaining_time": "1:35:56"}
{"current_steps": 3415, "total_steps": 4403, "loss": 0.1831, "lr": 5.841053776636781e-06, "epoch": 5.4292527821939585, "percentage": 77.56, "elapsed_time": "5:29:56", "remaining_time": "1:35:27"}
{"current_steps": 3420, "total_steps": 4403, "loss": 0.1626, "lr": 5.7851632400981285e-06, "epoch": 5.4372019077901435, "percentage": 77.67, "elapsed_time": "5:30:21", "remaining_time": "1:34:57"}
{"current_steps": 3425, "total_steps": 4403, "loss": 0.1682, "lr": 5.729496139030377e-06, "epoch": 5.4451510333863276, "percentage": 77.79, "elapsed_time": "5:30:48", "remaining_time": "1:34:27"}
{"current_steps": 3430, "total_steps": 4403, "loss": 0.1518, "lr": 5.67405334843512e-06, "epoch": 5.453100158982512, "percentage": 77.9, "elapsed_time": "5:31:14", "remaining_time": "1:33:57"}
{"current_steps": 3435, "total_steps": 4403, "loss": 0.1584, "lr": 5.618835739788136e-06, "epoch": 5.461049284578697, "percentage": 78.01, "elapsed_time": "5:31:41", "remaining_time": "1:33:28"}
{"current_steps": 3440, "total_steps": 4403, "loss": 0.1616, "lr": 5.563844181025706e-06, "epoch": 5.468998410174881, "percentage": 78.13, "elapsed_time": "5:32:11", "remaining_time": "1:32:59"}
{"current_steps": 3445, "total_steps": 4403, "loss": 0.1673, "lr": 5.509079536530939e-06, "epoch": 5.476947535771065, "percentage": 78.24, "elapsed_time": "5:32:38", "remaining_time": "1:32:30"}
{"current_steps": 3450, "total_steps": 4403, "loss": 0.1596, "lr": 5.4545426671201905e-06, "epoch": 5.48489666136725, "percentage": 78.36, "elapsed_time": "5:33:09", "remaining_time": "1:32:01"}
{"current_steps": 3455, "total_steps": 4403, "loss": 0.1793, "lr": 5.400234430029561e-06, "epoch": 5.492845786963434, "percentage": 78.47, "elapsed_time": "5:33:42", "remaining_time": "1:31:34"}
{"current_steps": 3460, "total_steps": 4403, "loss": 0.1765, "lr": 5.346155678901392e-06, "epoch": 5.500794912559618, "percentage": 78.58, "elapsed_time": "5:34:16", "remaining_time": "1:31:06"}
{"current_steps": 3465, "total_steps": 4403, "loss": 0.1684, "lr": 5.292307263770859e-06, "epoch": 5.508744038155803, "percentage": 78.7, "elapsed_time": "5:34:46", "remaining_time": "1:30:37"}
{"current_steps": 3470, "total_steps": 4403, "loss": 0.1796, "lr": 5.238690031052603e-06, "epoch": 5.516693163751987, "percentage": 78.81, "elapsed_time": "5:35:11", "remaining_time": "1:30:07"}
{"current_steps": 3475, "total_steps": 4403, "loss": 0.1721, "lr": 5.185304823527426e-06, "epoch": 5.524642289348172, "percentage": 78.92, "elapsed_time": "5:35:34", "remaining_time": "1:29:36"}
{"current_steps": 3480, "total_steps": 4403, "loss": 0.1701, "lr": 5.132152480329072e-06, "epoch": 5.532591414944356, "percentage": 79.04, "elapsed_time": "5:36:03", "remaining_time": "1:29:08"}
{"current_steps": 3485, "total_steps": 4403, "loss": 0.1553, "lr": 5.07923383693099e-06, "epoch": 5.54054054054054, "percentage": 79.15, "elapsed_time": "5:36:31", "remaining_time": "1:28:38"}
{"current_steps": 3490, "total_steps": 4403, "loss": 0.157, "lr": 5.0265497251332314e-06, "epoch": 5.548489666136725, "percentage": 79.26, "elapsed_time": "5:36:55", "remaining_time": "1:28:08"}
{"current_steps": 3495, "total_steps": 4403, "loss": 0.1608, "lr": 4.974100973049385e-06, "epoch": 5.556438791732909, "percentage": 79.38, "elapsed_time": "5:37:21", "remaining_time": "1:27:38"}
{"current_steps": 3500, "total_steps": 4403, "loss": 0.1614, "lr": 4.921888405093525e-06, "epoch": 5.5643879173290935, "percentage": 79.49, "elapsed_time": "5:37:48", "remaining_time": "1:27:09"}
{"current_steps": 3505, "total_steps": 4403, "loss": 0.1689, "lr": 4.869912841967286e-06, "epoch": 5.5723370429252785, "percentage": 79.6, "elapsed_time": "5:38:17", "remaining_time": "1:26:40"}
{"current_steps": 3510, "total_steps": 4403, "loss": 0.1623, "lr": 4.818175100646952e-06, "epoch": 5.580286168521463, "percentage": 79.72, "elapsed_time": "5:38:45", "remaining_time": "1:26:11"}
{"current_steps": 3515, "total_steps": 4403, "loss": 0.1759, "lr": 4.766675994370598e-06, "epoch": 5.588235294117647, "percentage": 79.83, "elapsed_time": "5:39:16", "remaining_time": "1:25:42"}
{"current_steps": 3520, "total_steps": 4403, "loss": 0.1819, "lr": 4.7154163326253265e-06, "epoch": 5.596184419713832, "percentage": 79.95, "elapsed_time": "5:39:43", "remaining_time": "1:25:13"}
{"current_steps": 3525, "total_steps": 4403, "loss": 0.1665, "lr": 4.664396921134551e-06, "epoch": 5.604133545310016, "percentage": 80.06, "elapsed_time": "5:40:14", "remaining_time": "1:24:44"}
{"current_steps": 3530, "total_steps": 4403, "loss": 0.1866, "lr": 4.613618561845306e-06, "epoch": 5.6120826709062, "percentage": 80.17, "elapsed_time": "5:40:43", "remaining_time": "1:24:15"}
{"current_steps": 3535, "total_steps": 4403, "loss": 0.1645, "lr": 4.563082052915649e-06, "epoch": 5.620031796502385, "percentage": 80.29, "elapsed_time": "5:41:10", "remaining_time": "1:23:46"}
{"current_steps": 3540, "total_steps": 4403, "loss": 0.1576, "lr": 4.512788188702135e-06, "epoch": 5.627980922098569, "percentage": 80.4, "elapsed_time": "5:41:39", "remaining_time": "1:23:17"}
{"current_steps": 3545, "total_steps": 4403, "loss": 0.1535, "lr": 4.462737759747315e-06, "epoch": 5.635930047694753, "percentage": 80.51, "elapsed_time": "5:42:08", "remaining_time": "1:22:48"}
{"current_steps": 3550, "total_steps": 4403, "loss": 0.1754, "lr": 4.412931552767295e-06, "epoch": 5.643879173290938, "percentage": 80.63, "elapsed_time": "5:42:38", "remaining_time": "1:22:19"}
{"current_steps": 3555, "total_steps": 4403, "loss": 0.1724, "lr": 4.363370350639405e-06, "epoch": 5.651828298887122, "percentage": 80.74, "elapsed_time": "5:43:05", "remaining_time": "1:21:50"}
{"current_steps": 3560, "total_steps": 4403, "loss": 0.1767, "lr": 4.314054932389859e-06, "epoch": 5.659777424483307, "percentage": 80.85, "elapsed_time": "5:43:32", "remaining_time": "1:21:21"}
{"current_steps": 3565, "total_steps": 4403, "loss": 0.1673, "lr": 4.2649860731815255e-06, "epoch": 5.667726550079491, "percentage": 80.97, "elapsed_time": "5:44:01", "remaining_time": "1:20:52"}
{"current_steps": 3570, "total_steps": 4403, "loss": 0.168, "lr": 4.216164544301755e-06, "epoch": 5.675675675675675, "percentage": 81.08, "elapsed_time": "5:44:32", "remaining_time": "1:20:23"}
{"current_steps": 3575, "total_steps": 4403, "loss": 0.1764, "lr": 4.167591113150225e-06, "epoch": 5.68362480127186, "percentage": 81.19, "elapsed_time": "5:45:04", "remaining_time": "1:19:55"}
{"current_steps": 3580, "total_steps": 4403, "loss": 0.1708, "lr": 4.119266543226921e-06, "epoch": 5.6915739268680445, "percentage": 81.31, "elapsed_time": "5:45:32", "remaining_time": "1:19:26"}
{"current_steps": 3585, "total_steps": 4403, "loss": 0.1605, "lr": 4.071191594120081e-06, "epoch": 5.699523052464229, "percentage": 81.42, "elapsed_time": "5:45:58", "remaining_time": "1:18:56"}
{"current_steps": 3590, "total_steps": 4403, "loss": 0.174, "lr": 4.023367021494313e-06, "epoch": 5.707472178060414, "percentage": 81.54, "elapsed_time": "5:46:28", "remaining_time": "1:18:27"}
{"current_steps": 3595, "total_steps": 4403, "loss": 0.1755, "lr": 3.975793577078682e-06, "epoch": 5.715421303656598, "percentage": 81.65, "elapsed_time": "5:46:56", "remaining_time": "1:17:58"}
{"current_steps": 3600, "total_steps": 4403, "loss": 0.1587, "lr": 3.928472008654891e-06, "epoch": 5.723370429252782, "percentage": 81.76, "elapsed_time": "5:47:29", "remaining_time": "1:17:30"}
{"current_steps": 3605, "total_steps": 4403, "loss": 0.1605, "lr": 3.881403060045545e-06, "epoch": 5.731319554848967, "percentage": 81.88, "elapsed_time": "5:47:55", "remaining_time": "1:17:00"}
{"current_steps": 3610, "total_steps": 4403, "loss": 0.1667, "lr": 3.834587471102464e-06, "epoch": 5.739268680445151, "percentage": 81.99, "elapsed_time": "5:48:27", "remaining_time": "1:16:32"}
{"current_steps": 3615, "total_steps": 4403, "loss": 0.1634, "lr": 3.7880259776950224e-06, "epoch": 5.747217806041336, "percentage": 82.1, "elapsed_time": "5:48:58", "remaining_time": "1:16:04"}
{"current_steps": 3620, "total_steps": 4403, "loss": 0.154, "lr": 3.741719311698608e-06, "epoch": 5.75516693163752, "percentage": 82.22, "elapsed_time": "5:49:26", "remaining_time": "1:15:34"}
{"current_steps": 3625, "total_steps": 4403, "loss": 0.1521, "lr": 3.69566820098312e-06, "epoch": 5.763116057233704, "percentage": 82.33, "elapsed_time": "5:49:53", "remaining_time": "1:15:05"}
{"current_steps": 3630, "total_steps": 4403, "loss": 0.1729, "lr": 3.6498733694015197e-06, "epoch": 5.771065182829888, "percentage": 82.44, "elapsed_time": "5:50:25", "remaining_time": "1:14:37"}
{"current_steps": 3635, "total_steps": 4403, "loss": 0.1761, "lr": 3.604335536778434e-06, "epoch": 5.779014308426073, "percentage": 82.56, "elapsed_time": "5:50:53", "remaining_time": "1:14:08"}
{"current_steps": 3640, "total_steps": 4403, "loss": 0.1614, "lr": 3.559055418898887e-06, "epoch": 5.786963434022257, "percentage": 82.67, "elapsed_time": "5:51:22", "remaining_time": "1:13:39"}
{"current_steps": 3645, "total_steps": 4403, "loss": 0.1696, "lr": 3.5140337274970014e-06, "epoch": 5.794912559618442, "percentage": 82.78, "elapsed_time": "5:51:55", "remaining_time": "1:13:11"}
{"current_steps": 3650, "total_steps": 4403, "loss": 0.1653, "lr": 3.469271170244832e-06, "epoch": 5.802861685214626, "percentage": 82.9, "elapsed_time": "5:52:24", "remaining_time": "1:12:42"}
{"current_steps": 3655, "total_steps": 4403, "loss": 0.1677, "lr": 3.4247684507412605e-06, "epoch": 5.8108108108108105, "percentage": 83.01, "elapsed_time": "5:52:52", "remaining_time": "1:12:12"}
{"current_steps": 3660, "total_steps": 4403, "loss": 0.1674, "lr": 3.380526268500892e-06, "epoch": 5.8187599364069955, "percentage": 83.13, "elapsed_time": "5:53:18", "remaining_time": "1:11:43"}
{"current_steps": 3665, "total_steps": 4403, "loss": 0.1616, "lr": 3.3365453189430984e-06, "epoch": 5.82670906200318, "percentage": 83.24, "elapsed_time": "5:53:42", "remaining_time": "1:11:13"}
{"current_steps": 3670, "total_steps": 4403, "loss": 0.1817, "lr": 3.292826293381071e-06, "epoch": 5.834658187599364, "percentage": 83.35, "elapsed_time": "5:54:15", "remaining_time": "1:10:45"}
{"current_steps": 3675, "total_steps": 4403, "loss": 0.1626, "lr": 3.2493698790109664e-06, "epoch": 5.842607313195549, "percentage": 83.47, "elapsed_time": "5:54:43", "remaining_time": "1:10:16"}
{"current_steps": 3680, "total_steps": 4403, "loss": 0.1685, "lr": 3.2061767589010763e-06, "epoch": 5.850556438791733, "percentage": 83.58, "elapsed_time": "5:55:13", "remaining_time": "1:09:47"}
{"current_steps": 3685, "total_steps": 4403, "loss": 0.166, "lr": 3.1632476119811285e-06, "epoch": 5.858505564387917, "percentage": 83.69, "elapsed_time": "5:55:46", "remaining_time": "1:09:19"}
{"current_steps": 3690, "total_steps": 4403, "loss": 0.1566, "lr": 3.120583113031579e-06, "epoch": 5.866454689984102, "percentage": 83.81, "elapsed_time": "5:56:13", "remaining_time": "1:08:49"}
{"current_steps": 3695, "total_steps": 4403, "loss": 0.1683, "lr": 3.07818393267304e-06, "epoch": 5.874403815580286, "percentage": 83.92, "elapsed_time": "5:56:47", "remaining_time": "1:08:21"}
{"current_steps": 3700, "total_steps": 4403, "loss": 0.1737, "lr": 3.036050737355709e-06, "epoch": 5.882352941176471, "percentage": 84.03, "elapsed_time": "5:57:17", "remaining_time": "1:07:53"}
{"current_steps": 3705, "total_steps": 4403, "loss": 0.1727, "lr": 2.9941841893489075e-06, "epoch": 5.890302066772655, "percentage": 84.15, "elapsed_time": "5:57:46", "remaining_time": "1:07:24"}
{"current_steps": 3710, "total_steps": 4403, "loss": 0.1648, "lr": 2.9525849467306766e-06, "epoch": 5.898251192368839, "percentage": 84.26, "elapsed_time": "5:58:13", "remaining_time": "1:06:54"}
{"current_steps": 3715, "total_steps": 4403, "loss": 0.1803, "lr": 2.9112536633774245e-06, "epoch": 5.906200317965024, "percentage": 84.37, "elapsed_time": "5:58:41", "remaining_time": "1:06:25"}
{"current_steps": 3720, "total_steps": 4403, "loss": 0.1613, "lr": 2.8701909889536384e-06, "epoch": 5.914149443561208, "percentage": 84.49, "elapsed_time": "5:59:08", "remaining_time": "1:05:56"}
{"current_steps": 3725, "total_steps": 4403, "loss": 0.1628, "lr": 2.8293975689017018e-06, "epoch": 5.922098569157392, "percentage": 84.6, "elapsed_time": "5:59:35", "remaining_time": "1:05:27"}
{"current_steps": 3730, "total_steps": 4403, "loss": 0.1705, "lr": 2.788874044431722e-06, "epoch": 5.930047694753577, "percentage": 84.71, "elapsed_time": "6:00:08", "remaining_time": "1:04:58"}
{"current_steps": 3735, "total_steps": 4403, "loss": 0.1711, "lr": 2.7486210525114533e-06, "epoch": 5.9379968203497615, "percentage": 84.83, "elapsed_time": "6:00:36", "remaining_time": "1:04:29"}
{"current_steps": 3740, "total_steps": 4403, "loss": 0.1764, "lr": 2.708639225856311e-06, "epoch": 5.945945945945946, "percentage": 84.94, "elapsed_time": "6:01:07", "remaining_time": "1:04:01"}
{"current_steps": 3745, "total_steps": 4403, "loss": 0.1525, "lr": 2.6689291929193962e-06, "epoch": 5.953895071542131, "percentage": 85.06, "elapsed_time": "6:01:30", "remaining_time": "1:03:31"}
{"current_steps": 3750, "total_steps": 4403, "loss": 0.1654, "lr": 2.629491577881622e-06, "epoch": 5.961844197138315, "percentage": 85.17, "elapsed_time": "6:01:56", "remaining_time": "1:03:01"}
{"current_steps": 3755, "total_steps": 4403, "loss": 0.1736, "lr": 2.5903270006419236e-06, "epoch": 5.9697933227345, "percentage": 85.28, "elapsed_time": "6:02:25", "remaining_time": "1:02:32"}
{"current_steps": 3760, "total_steps": 4403, "loss": 0.1667, "lr": 2.551436076807501e-06, "epoch": 5.977742448330684, "percentage": 85.4, "elapsed_time": "6:02:52", "remaining_time": "1:02:03"}
{"current_steps": 3765, "total_steps": 4403, "loss": 0.1717, "lr": 2.5128194176841226e-06, "epoch": 5.985691573926868, "percentage": 85.51, "elapsed_time": "6:03:23", "remaining_time": "1:01:34"}
{"current_steps": 3770, "total_steps": 4403, "loss": 0.1747, "lr": 2.4744776302665563e-06, "epoch": 5.993640699523052, "percentage": 85.62, "elapsed_time": "6:03:58", "remaining_time": "1:01:06"}
{"current_steps": 3775, "total_steps": 4403, "loss": 0.1631, "lr": 2.436411317228997e-06, "epoch": 6.001589825119237, "percentage": 85.74, "elapsed_time": "6:04:24", "remaining_time": "1:00:37"}
{"current_steps": 3780, "total_steps": 4403, "loss": 0.1655, "lr": 2.3986210769155994e-06, "epoch": 6.009538950715421, "percentage": 85.85, "elapsed_time": "6:04:53", "remaining_time": "1:00:08"}
{"current_steps": 3785, "total_steps": 4403, "loss": 0.1759, "lr": 2.361107503331095e-06, "epoch": 6.017488076311606, "percentage": 85.96, "elapsed_time": "6:05:25", "remaining_time": "0:59:39"}
{"current_steps": 3790, "total_steps": 4403, "loss": 0.1692, "lr": 2.3238711861314165e-06, "epoch": 6.02543720190779, "percentage": 86.08, "elapsed_time": "6:05:55", "remaining_time": "0:59:11"}
{"current_steps": 3795, "total_steps": 4403, "loss": 0.1557, "lr": 2.2869127106144663e-06, "epoch": 6.033386327503974, "percentage": 86.19, "elapsed_time": "6:06:25", "remaining_time": "0:58:42"}
{"current_steps": 3800, "total_steps": 4403, "loss": 0.1652, "lr": 2.2502326577109e-06, "epoch": 6.041335453100159, "percentage": 86.3, "elapsed_time": "6:06:51", "remaining_time": "0:58:12"}
{"current_steps": 3805, "total_steps": 4403, "loss": 0.1497, "lr": 2.213831603974985e-06, "epoch": 6.049284578696343, "percentage": 86.42, "elapsed_time": "6:07:15", "remaining_time": "0:57:43"}
{"current_steps": 3810, "total_steps": 4403, "loss": 0.1606, "lr": 2.1777101215755624e-06, "epoch": 6.0572337042925275, "percentage": 86.53, "elapsed_time": "6:07:41", "remaining_time": "0:57:13"}
{"current_steps": 3815, "total_steps": 4403, "loss": 0.1554, "lr": 2.1418687782870284e-06, "epoch": 6.0651828298887125, "percentage": 86.65, "elapsed_time": "6:08:10", "remaining_time": "0:56:44"}
{"current_steps": 3820, "total_steps": 4403, "loss": 0.1551, "lr": 2.1063081374804263e-06, "epoch": 6.073131955484897, "percentage": 86.76, "elapsed_time": "6:08:40", "remaining_time": "0:56:15"}
{"current_steps": 3825, "total_steps": 4403, "loss": 0.1608, "lr": 2.0710287581145884e-06, "epoch": 6.081081081081081, "percentage": 86.87, "elapsed_time": "6:09:07", "remaining_time": "0:55:46"}
{"current_steps": 3830, "total_steps": 4403, "loss": 0.1653, "lr": 2.036031194727346e-06, "epoch": 6.089030206677266, "percentage": 86.99, "elapsed_time": "6:09:38", "remaining_time": "0:55:18"}
{"current_steps": 3835, "total_steps": 4403, "loss": 0.1393, "lr": 2.0013159974268094e-06, "epoch": 6.09697933227345, "percentage": 87.1, "elapsed_time": "6:10:08", "remaining_time": "0:54:49"}
{"current_steps": 3840, "total_steps": 4403, "loss": 0.1647, "lr": 1.9668837118827346e-06, "epoch": 6.104928457869635, "percentage": 87.21, "elapsed_time": "6:10:37", "remaining_time": "0:54:20"}
{"current_steps": 3845, "total_steps": 4403, "loss": 0.1539, "lr": 1.932734879317937e-06, "epoch": 6.112877583465819, "percentage": 87.33, "elapsed_time": "6:11:01", "remaining_time": "0:53:50"}
{"current_steps": 3850, "total_steps": 4403, "loss": 0.1639, "lr": 1.8988700364997758e-06, "epoch": 6.120826709062003, "percentage": 87.44, "elapsed_time": "6:11:31", "remaining_time": "0:53:21"}
{"current_steps": 3855, "total_steps": 4403, "loss": 0.1603, "lr": 1.8652897157317395e-06, "epoch": 6.128775834658188, "percentage": 87.55, "elapsed_time": "6:11:58", "remaining_time": "0:52:52"}
{"current_steps": 3860, "total_steps": 4403, "loss": 0.1598, "lr": 1.8319944448450578e-06, "epoch": 6.136724960254372, "percentage": 87.67, "elapsed_time": "6:12:27", "remaining_time": "0:52:23"}
{"current_steps": 3865, "total_steps": 4403, "loss": 0.1561, "lr": 1.7989847471904065e-06, "epoch": 6.144674085850556, "percentage": 87.78, "elapsed_time": "6:12:56", "remaining_time": "0:51:54"}
{"current_steps": 3870, "total_steps": 4403, "loss": 0.1585, "lr": 1.766261141629706e-06, "epoch": 6.152623211446741, "percentage": 87.89, "elapsed_time": "6:13:21", "remaining_time": "0:51:25"}
{"current_steps": 3875, "total_steps": 4403, "loss": 0.1618, "lr": 1.7338241425279244e-06, "epoch": 6.160572337042925, "percentage": 88.01, "elapsed_time": "6:13:50", "remaining_time": "0:50:56"}
{"current_steps": 3880, "total_steps": 4403, "loss": 0.1496, "lr": 1.7016742597450341e-06, "epoch": 6.168521462639109, "percentage": 88.12, "elapsed_time": "6:14:19", "remaining_time": "0:50:27"}
{"current_steps": 3885, "total_steps": 4403, "loss": 0.1553, "lr": 1.6698119986279726e-06, "epoch": 6.176470588235294, "percentage": 88.24, "elapsed_time": "6:14:47", "remaining_time": "0:49:58"}
{"current_steps": 3890, "total_steps": 4403, "loss": 0.1721, "lr": 1.6382378600026982e-06, "epoch": 6.1844197138314785, "percentage": 88.35, "elapsed_time": "6:15:16", "remaining_time": "0:49:29"}
{"current_steps": 3895, "total_steps": 4403, "loss": 0.1813, "lr": 1.60695234016633e-06, "epoch": 6.192368839427663, "percentage": 88.46, "elapsed_time": "6:15:43", "remaining_time": "0:49:00"}
{"current_steps": 3900, "total_steps": 4403, "loss": 0.1542, "lr": 1.5759559308793448e-06, "epoch": 6.200317965023848, "percentage": 88.58, "elapsed_time": "6:16:17", "remaining_time": "0:48:31"}
{"current_steps": 3905, "total_steps": 4403, "loss": 0.171, "lr": 1.5452491193578412e-06, "epoch": 6.208267090620032, "percentage": 88.69, "elapsed_time": "6:16:45", "remaining_time": "0:48:02"}
{"current_steps": 3910, "total_steps": 4403, "loss": 0.1512, "lr": 1.5148323882658767e-06, "epoch": 6.216216216216216, "percentage": 88.8, "elapsed_time": "6:17:18", "remaining_time": "0:47:34"}
{"current_steps": 3915, "total_steps": 4403, "loss": 0.1483, "lr": 1.484706215707905e-06, "epoch": 6.224165341812401, "percentage": 88.92, "elapsed_time": "6:17:46", "remaining_time": "0:47:05"}
{"current_steps": 3920, "total_steps": 4403, "loss": 0.1616, "lr": 1.4548710752212292e-06, "epoch": 6.232114467408585, "percentage": 89.03, "elapsed_time": "6:18:12", "remaining_time": "0:46:36"}
{"current_steps": 3925, "total_steps": 4403, "loss": 0.17, "lr": 1.425327435768582e-06, "epoch": 6.24006359300477, "percentage": 89.14, "elapsed_time": "6:18:42", "remaining_time": "0:46:07"}
{"current_steps": 3930, "total_steps": 4403, "loss": 0.1627, "lr": 1.3960757617307486e-06, "epoch": 6.248012718600954, "percentage": 89.26, "elapsed_time": "6:19:09", "remaining_time": "0:45:38"}
{"current_steps": 3935, "total_steps": 4403, "loss": 0.1422, "lr": 1.3671165128992514e-06, "epoch": 6.255961844197138, "percentage": 89.37, "elapsed_time": "6:19:36", "remaining_time": "0:45:08"}
{"current_steps": 3940, "total_steps": 4403, "loss": 0.1512, "lr": 1.3384501444691544e-06, "epoch": 6.263910969793323, "percentage": 89.48, "elapsed_time": "6:20:04", "remaining_time": "0:44:39"}
{"current_steps": 3945, "total_steps": 4403, "loss": 0.1642, "lr": 1.3100771070318796e-06, "epoch": 6.271860095389507, "percentage": 89.6, "elapsed_time": "6:20:33", "remaining_time": "0:44:10"}
{"current_steps": 3950, "total_steps": 4403, "loss": 0.1688, "lr": 1.2819978465681283e-06, "epoch": 6.279809220985691, "percentage": 89.71, "elapsed_time": "6:21:02", "remaining_time": "0:43:41"}
{"current_steps": 3955, "total_steps": 4403, "loss": 0.1699, "lr": 1.254212804440893e-06, "epoch": 6.287758346581876, "percentage": 89.83, "elapsed_time": "6:21:29", "remaining_time": "0:43:12"}
{"current_steps": 3960, "total_steps": 4403, "loss": 0.1821, "lr": 1.2267224173884929e-06, "epoch": 6.29570747217806, "percentage": 89.94, "elapsed_time": "6:22:04", "remaining_time": "0:42:44"}
{"current_steps": 3965, "total_steps": 4403, "loss": 0.1573, "lr": 1.199527117517727e-06, "epoch": 6.3036565977742445, "percentage": 90.05, "elapsed_time": "6:22:33", "remaining_time": "0:42:15"}
{"current_steps": 3970, "total_steps": 4403, "loss": 0.1583, "lr": 1.172627332297076e-06, "epoch": 6.3116057233704295, "percentage": 90.17, "elapsed_time": "6:22:57", "remaining_time": "0:41:46"}
{"current_steps": 3975, "total_steps": 4403, "loss": 0.1514, "lr": 1.1460234845499763e-06, "epoch": 6.319554848966614, "percentage": 90.28, "elapsed_time": "6:23:23", "remaining_time": "0:41:16"}
{"current_steps": 3980, "total_steps": 4403, "loss": 0.1651, "lr": 1.1197159924481804e-06, "epoch": 6.327503974562799, "percentage": 90.39, "elapsed_time": "6:23:52", "remaining_time": "0:40:47"}
{"current_steps": 3985, "total_steps": 4403, "loss": 0.1703, "lr": 1.0937052695051965e-06, "epoch": 6.335453100158983, "percentage": 90.51, "elapsed_time": "6:24:15", "remaining_time": "0:40:18"}
{"current_steps": 3990, "total_steps": 4403, "loss": 0.1574, "lr": 1.067991724569759e-06, "epoch": 6.343402225755167, "percentage": 90.62, "elapsed_time": "6:24:43", "remaining_time": "0:39:49"}
{"current_steps": 3995, "total_steps": 4403, "loss": 0.1484, "lr": 1.0425757618194265e-06, "epoch": 6.351351351351352, "percentage": 90.73, "elapsed_time": "6:25:15", "remaining_time": "0:39:20"}
{"current_steps": 4000, "total_steps": 4403, "loss": 0.1781, "lr": 1.0174577807542273e-06, "epoch": 6.359300476947536, "percentage": 90.85, "elapsed_time": "6:25:47", "remaining_time": "0:38:52"}
{"current_steps": 4005, "total_steps": 4403, "loss": 0.1687, "lr": 9.926381761903614e-07, "epoch": 6.36724960254372, "percentage": 90.96, "elapsed_time": "6:26:15", "remaining_time": "0:38:23"}
{"current_steps": 4010, "total_steps": 4403, "loss": 0.1583, "lr": 9.681173382540177e-07, "epoch": 6.375198728139905, "percentage": 91.07, "elapsed_time": "6:26:44", "remaining_time": "0:37:54"}
{"current_steps": 4015, "total_steps": 4403, "loss": 0.1523, "lr": 9.438956523752263e-07, "epoch": 6.383147853736089, "percentage": 91.19, "elapsed_time": "6:27:12", "remaining_time": "0:37:25"}
{"current_steps": 4020, "total_steps": 4403, "loss": 0.1796, "lr": 9.199734992818099e-07, "epoch": 6.391096979332273, "percentage": 91.3, "elapsed_time": "6:27:43", "remaining_time": "0:36:56"}
{"current_steps": 4025, "total_steps": 4403, "loss": 0.1676, "lr": 8.963512549933795e-07, "epoch": 6.399046104928458, "percentage": 91.41, "elapsed_time": "6:28:11", "remaining_time": "0:36:27"}
{"current_steps": 4030, "total_steps": 4403, "loss": 0.1628, "lr": 8.730292908154614e-07, "epoch": 6.406995230524642, "percentage": 91.53, "elapsed_time": "6:28:39", "remaining_time": "0:35:58"}
{"current_steps": 4035, "total_steps": 4403, "loss": 0.1647, "lr": 8.500079733336175e-07, "epoch": 6.414944356120826, "percentage": 91.64, "elapsed_time": "6:29:09", "remaining_time": "0:35:29"}
{"current_steps": 4040, "total_steps": 4403, "loss": 0.1537, "lr": 8.272876644077188e-07, "epoch": 6.422893481717011, "percentage": 91.76, "elapsed_time": "6:29:36", "remaining_time": "0:35:00"}
{"current_steps": 4045, "total_steps": 4403, "loss": 0.1647, "lr": 8.048687211662343e-07, "epoch": 6.4308426073131955, "percentage": 91.87, "elapsed_time": "6:30:00", "remaining_time": "0:34:31"}
{"current_steps": 4050, "total_steps": 4403, "loss": 0.1652, "lr": 7.827514960006266e-07, "epoch": 6.43879173290938, "percentage": 91.98, "elapsed_time": "6:30:33", "remaining_time": "0:34:02"}
{"current_steps": 4055, "total_steps": 4403, "loss": 0.1586, "lr": 7.609363365598165e-07, "epoch": 6.4467408585055646, "percentage": 92.1, "elapsed_time": "6:31:00", "remaining_time": "0:33:33"}
{"current_steps": 4060, "total_steps": 4403, "loss": 0.1642, "lr": 7.394235857447119e-07, "epoch": 6.454689984101749, "percentage": 92.21, "elapsed_time": "6:31:22", "remaining_time": "0:33:03"}
{"current_steps": 4065, "total_steps": 4403, "loss": 0.1483, "lr": 7.182135817028157e-07, "epoch": 6.462639109697934, "percentage": 92.32, "elapsed_time": "6:31:51", "remaining_time": "0:32:34"}
{"current_steps": 4070, "total_steps": 4403, "loss": 0.1596, "lr": 6.973066578229248e-07, "epoch": 6.470588235294118, "percentage": 92.44, "elapsed_time": "6:32:16", "remaining_time": "0:32:05"}
{"current_steps": 4075, "total_steps": 4403, "loss": 0.1491, "lr": 6.767031427298687e-07, "epoch": 6.478537360890302, "percentage": 92.55, "elapsed_time": "6:32:45", "remaining_time": "0:31:36"}
{"current_steps": 4080, "total_steps": 4403, "loss": 0.1656, "lr": 6.564033602793584e-07, "epoch": 6.486486486486487, "percentage": 92.66, "elapsed_time": "6:33:13", "remaining_time": "0:31:07"}
{"current_steps": 4085, "total_steps": 4403, "loss": 0.1714, "lr": 6.364076295529042e-07, "epoch": 6.494435612082671, "percentage": 92.78, "elapsed_time": "6:33:45", "remaining_time": "0:30:39"}
{"current_steps": 4090, "total_steps": 4403, "loss": 0.1821, "lr": 6.167162648527703e-07, "epoch": 6.502384737678855, "percentage": 92.89, "elapsed_time": "6:34:14", "remaining_time": "0:30:10"}
{"current_steps": 4095, "total_steps": 4403, "loss": 0.1641, "lr": 5.973295756970653e-07, "epoch": 6.51033386327504, "percentage": 93.0, "elapsed_time": "6:34:39", "remaining_time": "0:29:41"}
{"current_steps": 4100, "total_steps": 4403, "loss": 0.1553, "lr": 5.782478668148672e-07, "epoch": 6.518282988871224, "percentage": 93.12, "elapsed_time": "6:35:11", "remaining_time": "0:29:12"}
{"current_steps": 4105, "total_steps": 4403, "loss": 0.1581, "lr": 5.59471438141419e-07, "epoch": 6.526232114467408, "percentage": 93.23, "elapsed_time": "6:35:39", "remaining_time": "0:28:43"}
{"current_steps": 4110, "total_steps": 4403, "loss": 0.1709, "lr": 5.410005848134315e-07, "epoch": 6.534181240063593, "percentage": 93.35, "elapsed_time": "6:36:11", "remaining_time": "0:28:14"}
{"current_steps": 4115, "total_steps": 4403, "loss": 0.1657, "lr": 5.228355971644461e-07, "epoch": 6.542130365659777, "percentage": 93.46, "elapsed_time": "6:36:43", "remaining_time": "0:27:45"}
{"current_steps": 4120, "total_steps": 4403, "loss": 0.162, "lr": 5.049767607202549e-07, "epoch": 6.550079491255962, "percentage": 93.57, "elapsed_time": "6:37:07", "remaining_time": "0:27:16"}
{"current_steps": 4125, "total_steps": 4403, "loss": 0.169, "lr": 4.874243561944214e-07, "epoch": 6.558028616852146, "percentage": 93.69, "elapsed_time": "6:37:39", "remaining_time": "0:26:47"}
{"current_steps": 4130, "total_steps": 4403, "loss": 0.1689, "lr": 4.701786594838753e-07, "epoch": 6.5659777424483305, "percentage": 93.8, "elapsed_time": "6:38:08", "remaining_time": "0:26:19"}
{"current_steps": 4135, "total_steps": 4403, "loss": 0.1669, "lr": 4.532399416645694e-07, "epoch": 6.573926868044515, "percentage": 93.91, "elapsed_time": "6:38:32", "remaining_time": "0:25:49"}
{"current_steps": 4140, "total_steps": 4403, "loss": 0.1587, "lr": 4.366084689872074e-07, "epoch": 6.5818759936407, "percentage": 94.03, "elapsed_time": "6:39:03", "remaining_time": "0:25:21"}
{"current_steps": 4145, "total_steps": 4403, "loss": 0.1542, "lr": 4.202845028730829e-07, "epoch": 6.589825119236884, "percentage": 94.14, "elapsed_time": "6:39:33", "remaining_time": "0:24:52"}
{"current_steps": 4150, "total_steps": 4403, "loss": 0.161, "lr": 4.0426829990994677e-07, "epoch": 6.597774244833069, "percentage": 94.25, "elapsed_time": "6:40:04", "remaining_time": "0:24:23"}
{"current_steps": 4155, "total_steps": 4403, "loss": 0.1558, "lr": 3.885601118479909e-07, "epoch": 6.605723370429253, "percentage": 94.37, "elapsed_time": "6:40:34", "remaining_time": "0:23:54"}
{"current_steps": 4160, "total_steps": 4403, "loss": 0.1725, "lr": 3.731601855958844e-07, "epoch": 6.613672496025437, "percentage": 94.48, "elapsed_time": "6:41:00", "remaining_time": "0:23:25"}
{"current_steps": 4165, "total_steps": 4403, "loss": 0.1637, "lr": 3.5806876321688553e-07, "epoch": 6.621621621621622, "percentage": 94.59, "elapsed_time": "6:41:33", "remaining_time": "0:22:56"}
{"current_steps": 4170, "total_steps": 4403, "loss": 0.1659, "lr": 3.4328608192505164e-07, "epoch": 6.629570747217806, "percentage": 94.71, "elapsed_time": "6:42:02", "remaining_time": "0:22:27"}
{"current_steps": 4175, "total_steps": 4403, "loss": 0.1617, "lr": 3.288123740814997e-07, "epoch": 6.63751987281399, "percentage": 94.82, "elapsed_time": "6:42:33", "remaining_time": "0:21:59"}
{"current_steps": 4180, "total_steps": 4403, "loss": 0.1707, "lr": 3.1464786719075825e-07, "epoch": 6.645468998410175, "percentage": 94.94, "elapsed_time": "6:43:05", "remaining_time": "0:21:30"}
{"current_steps": 4185, "total_steps": 4403, "loss": 0.1542, "lr": 3.0079278389719246e-07, "epoch": 6.653418124006359, "percentage": 95.05, "elapsed_time": "6:43:37", "remaining_time": "0:21:01"}
{"current_steps": 4190, "total_steps": 4403, "loss": 0.1736, "lr": 2.8724734198149585e-07, "epoch": 6.661367249602543, "percentage": 95.16, "elapsed_time": "6:44:05", "remaining_time": "0:20:32"}
{"current_steps": 4195, "total_steps": 4403, "loss": 0.1595, "lr": 2.7401175435727735e-07, "epoch": 6.669316375198728, "percentage": 95.28, "elapsed_time": "6:44:37", "remaining_time": "0:20:03"}
{"current_steps": 4200, "total_steps": 4403, "loss": 0.1498, "lr": 2.61086229067713e-07, "epoch": 6.677265500794912, "percentage": 95.39, "elapsed_time": "6:45:07", "remaining_time": "0:19:34"}
{"current_steps": 4205, "total_steps": 4403, "loss": 0.1558, "lr": 2.4847096928226846e-07, "epoch": 6.685214626391097, "percentage": 95.5, "elapsed_time": "6:45:36", "remaining_time": "0:19:05"}
{"current_steps": 4210, "total_steps": 4403, "loss": 0.1534, "lr": 2.3616617329351499e-07, "epoch": 6.6931637519872815, "percentage": 95.62, "elapsed_time": "6:46:02", "remaining_time": "0:18:36"}
{"current_steps": 4215, "total_steps": 4403, "loss": 0.1685, "lr": 2.2417203451400749e-07, "epoch": 6.701112877583466, "percentage": 95.73, "elapsed_time": "6:46:28", "remaining_time": "0:18:07"}
{"current_steps": 4220, "total_steps": 4403, "loss": 0.1609, "lr": 2.124887414732424e-07, "epoch": 6.709062003179651, "percentage": 95.84, "elapsed_time": "6:46:57", "remaining_time": "0:17:38"}
{"current_steps": 4225, "total_steps": 4403, "loss": 0.1744, "lr": 2.0111647781470233e-07, "epoch": 6.717011128775835, "percentage": 95.96, "elapsed_time": "6:47:25", "remaining_time": "0:17:09"}
{"current_steps": 4230, "total_steps": 4403, "loss": 0.1655, "lr": 1.9005542229295848e-07, "epoch": 6.724960254372019, "percentage": 96.07, "elapsed_time": "6:47:51", "remaining_time": "0:16:40"}
{"current_steps": 4235, "total_steps": 4403, "loss": 0.1809, "lr": 1.793057487708705e-07, "epoch": 6.732909379968204, "percentage": 96.18, "elapsed_time": "6:48:24", "remaining_time": "0:16:12"}
{"current_steps": 4240, "total_steps": 4403, "loss": 0.1658, "lr": 1.688676262168465e-07, "epoch": 6.740858505564388, "percentage": 96.3, "elapsed_time": "6:48:56", "remaining_time": "0:15:43"}
{"current_steps": 4245, "total_steps": 4403, "loss": 0.1816, "lr": 1.5874121870219415e-07, "epoch": 6.748807631160572, "percentage": 96.41, "elapsed_time": "6:49:25", "remaining_time": "0:15:14"}
{"current_steps": 4250, "total_steps": 4403, "loss": 0.1434, "lr": 1.4892668539853606e-07, "epoch": 6.756756756756757, "percentage": 96.53, "elapsed_time": "6:49:55", "remaining_time": "0:14:45"}
{"current_steps": 4255, "total_steps": 4403, "loss": 0.166, "lr": 1.3942418057530714e-07, "epoch": 6.764705882352941, "percentage": 96.64, "elapsed_time": "6:50:24", "remaining_time": "0:14:16"}
{"current_steps": 4260, "total_steps": 4403, "loss": 0.1631, "lr": 1.3023385359733687e-07, "epoch": 6.772655007949125, "percentage": 96.75, "elapsed_time": "6:50:51", "remaining_time": "0:13:47"}
{"current_steps": 4265, "total_steps": 4403, "loss": 0.1783, "lr": 1.213558489224953e-07, "epoch": 6.78060413354531, "percentage": 96.87, "elapsed_time": "6:51:26", "remaining_time": "0:13:18"}
{"current_steps": 4270, "total_steps": 4403, "loss": 0.1613, "lr": 1.1279030609942177e-07, "epoch": 6.788553259141494, "percentage": 96.98, "elapsed_time": "6:51:54", "remaining_time": "0:12:49"}
{"current_steps": 4275, "total_steps": 4403, "loss": 0.1502, "lr": 1.0453735976533985e-07, "epoch": 6.796502384737678, "percentage": 97.09, "elapsed_time": "6:52:21", "remaining_time": "0:12:20"}
{"current_steps": 4280, "total_steps": 4403, "loss": 0.1498, "lr": 9.659713964392358e-08, "epoch": 6.804451510333863, "percentage": 97.21, "elapsed_time": "6:52:55", "remaining_time": "0:11:51"}
{"current_steps": 4285, "total_steps": 4403, "loss": 0.1513, "lr": 8.896977054328349e-08, "epoch": 6.8124006359300475, "percentage": 97.32, "elapsed_time": "6:53:22", "remaining_time": "0:11:23"}
{"current_steps": 4290, "total_steps": 4403, "loss": 0.1756, "lr": 8.165537235398146e-08, "epoch": 6.8203497615262325, "percentage": 97.43, "elapsed_time": "6:53:54", "remaining_time": "0:10:54"}
{"current_steps": 4295, "total_steps": 4403, "loss": 0.158, "lr": 7.465406004715903e-08, "epoch": 6.828298887122417, "percentage": 97.55, "elapsed_time": "6:54:21", "remaining_time": "0:10:25"}
{"current_steps": 4300, "total_steps": 4403, "loss": 0.1573, "lr": 6.796594367272535e-08, "epoch": 6.836248012718601, "percentage": 97.66, "elapsed_time": "6:54:50", "remaining_time": "0:09:56"}
{"current_steps": 4305, "total_steps": 4403, "loss": 0.1565, "lr": 6.159112835763204e-08, "epoch": 6.844197138314786, "percentage": 97.77, "elapsed_time": "6:55:21", "remaining_time": "0:09:27"}
{"current_steps": 4310, "total_steps": 4403, "loss": 0.1572, "lr": 5.552971430421439e-08, "epoch": 6.85214626391097, "percentage": 97.89, "elapsed_time": "6:55:43", "remaining_time": "0:08:58"}
{"current_steps": 4315, "total_steps": 4403, "loss": 0.1936, "lr": 4.9781796788621605e-08, "epoch": 6.860095389507154, "percentage": 98.0, "elapsed_time": "6:56:14", "remaining_time": "0:08:29"}
{"current_steps": 4320, "total_steps": 4403, "loss": 0.1627, "lr": 4.434746615932018e-08, "epoch": 6.868044515103339, "percentage": 98.11, "elapsed_time": "6:56:46", "remaining_time": "0:08:00"}
{"current_steps": 4325, "total_steps": 4403, "loss": 0.1683, "lr": 3.922680783566168e-08, "epoch": 6.875993640699523, "percentage": 98.23, "elapsed_time": "6:57:15", "remaining_time": "0:07:31"}
{"current_steps": 4330, "total_steps": 4403, "loss": 0.1691, "lr": 3.441990230656167e-08, "epoch": 6.883942766295707, "percentage": 98.34, "elapsed_time": "6:57:43", "remaining_time": "0:07:02"}
{"current_steps": 4335, "total_steps": 4403, "loss": 0.1505, "lr": 2.992682512921175e-08, "epoch": 6.891891891891892, "percentage": 98.46, "elapsed_time": "6:58:15", "remaining_time": "0:06:33"}
{"current_steps": 4340, "total_steps": 4403, "loss": 0.1622, "lr": 2.574764692790499e-08, "epoch": 6.899841017488076, "percentage": 98.57, "elapsed_time": "6:58:41", "remaining_time": "0:06:04"}
{"current_steps": 4345, "total_steps": 4403, "loss": 0.161, "lr": 2.188243339292795e-08, "epoch": 6.907790143084261, "percentage": 98.68, "elapsed_time": "6:59:10", "remaining_time": "0:05:35"}
{"current_steps": 4350, "total_steps": 4403, "loss": 0.1592, "lr": 1.8331245279517017e-08, "epoch": 6.915739268680445, "percentage": 98.8, "elapsed_time": "6:59:41", "remaining_time": "0:05:06"}
{"current_steps": 4355, "total_steps": 4403, "loss": 0.1498, "lr": 1.509413840691476e-08, "epoch": 6.923688394276629, "percentage": 98.91, "elapsed_time": "7:00:05", "remaining_time": "0:04:37"}
{"current_steps": 4360, "total_steps": 4403, "loss": 0.1601, "lr": 1.2171163657481722e-08, "epoch": 6.9316375198728135, "percentage": 99.02, "elapsed_time": "7:00:32", "remaining_time": "0:04:08"}
{"current_steps": 4365, "total_steps": 4403, "loss": 0.173, "lr": 9.562366975910397e-09, "epoch": 6.9395866454689985, "percentage": 99.14, "elapsed_time": "7:00:58", "remaining_time": "0:03:39"}
{"current_steps": 4370, "total_steps": 4403, "loss": 0.1555, "lr": 7.2677893684880425e-09, "epoch": 6.947535771065183, "percentage": 99.25, "elapsed_time": "7:01:24", "remaining_time": "0:03:10"}
{"current_steps": 4375, "total_steps": 4403, "loss": 0.1669, "lr": 5.2874669024616246e-09, "epoch": 6.955484896661368, "percentage": 99.36, "elapsed_time": "7:01:53", "remaining_time": "0:02:42"}
{"current_steps": 4380, "total_steps": 4403, "loss": 0.1614, "lr": 3.621430705467166e-09, "epoch": 6.963434022257552, "percentage": 99.48, "elapsed_time": "7:02:24", "remaining_time": "0:02:13"}
{"current_steps": 4385, "total_steps": 4403, "loss": 0.1638, "lr": 2.2697069650456927e-09, "epoch": 6.971383147853736, "percentage": 99.59, "elapsed_time": "7:03:12", "remaining_time": "0:01:44"}
{"current_steps": 4390, "total_steps": 4403, "loss": 0.1598, "lr": 1.2323169282257852e-09, "epoch": 6.979332273449921, "percentage": 99.7, "elapsed_time": "7:03:39", "remaining_time": "0:01:15"}
{"current_steps": 4395, "total_steps": 4403, "loss": 0.1641, "lr": 5.092769011860732e-10, "epoch": 6.987281399046105, "percentage": 99.82, "elapsed_time": "7:04:09", "remaining_time": "0:00:46"}
{"current_steps": 4400, "total_steps": 4403, "loss": 0.1507, "lr": 1.0059824901098581e-10, "epoch": 6.995230524642289, "percentage": 99.93, "elapsed_time": "7:04:35", "remaining_time": "0:00:17"}
{"current_steps": 4403, "total_steps": 4403, "epoch": 7.0, "percentage": 100.0, "elapsed_time": "7:05:03", "remaining_time": "0:00:00"}

BIN
training_loss.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 42 KiB

1
vocab.json Normal file

File diff suppressed because one or more lines are too long