初始化项目,由ModelHub XC社区提供模型
Model: DCAgent/a1-nemotron_bash_withtests_gpt5mini Source: Original Platform
This commit is contained in:
36
.gitattributes
vendored
Normal file
36
.gitattributes
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||
60
README.md
Normal file
60
README.md
Normal file
@@ -0,0 +1,60 @@
|
||||
---
|
||||
library_name: transformers
|
||||
license: other
|
||||
base_model: Qwen/Qwen3-8B
|
||||
tags:
|
||||
- llama-factory
|
||||
- full
|
||||
- generated_from_trainer
|
||||
model-index:
|
||||
- name: sft_a1_nemotron_bash_withtests_gpt5mini__Qwen3-8B
|
||||
results: []
|
||||
---
|
||||
|
||||
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
||||
should probably proofread and complete it, then remove this comment. -->
|
||||
|
||||
# sft_a1_nemotron_bash_withtests_gpt5mini__Qwen3-8B
|
||||
|
||||
This model is a fine-tuned version of [Qwen/Qwen3-8B](https://huggingface.co/Qwen/Qwen3-8B) on the /e/scratch/jureap59/raoof1/sft_data/hf_hub/datasets--DCAgent--exp_rpt_nemotron-bash-withtests-gpt5mini_glm_4.7_traces_jupiter/snapshots/2b382d4f2b58dcd58a2a90c31203ccf2063bf064_thinking_preprocessed dataset.
|
||||
|
||||
## Model description
|
||||
|
||||
More information needed
|
||||
|
||||
## Intended uses & limitations
|
||||
|
||||
More information needed
|
||||
|
||||
## Training and evaluation data
|
||||
|
||||
More information needed
|
||||
|
||||
## Training procedure
|
||||
|
||||
### Training hyperparameters
|
||||
|
||||
The following hyperparameters were used during training:
|
||||
- learning_rate: 4e-05
|
||||
- train_batch_size: 1
|
||||
- eval_batch_size: 8
|
||||
- seed: 42
|
||||
- distributed_type: multi-GPU
|
||||
- num_devices: 16
|
||||
- total_train_batch_size: 16
|
||||
- total_eval_batch_size: 128
|
||||
- optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.98) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
||||
- lr_scheduler_type: cosine
|
||||
- lr_scheduler_warmup_ratio: 0.1
|
||||
- num_epochs: 7.0
|
||||
|
||||
### Training results
|
||||
|
||||
|
||||
|
||||
### Framework versions
|
||||
|
||||
- Transformers 4.57.6
|
||||
- Pytorch 2.9.1+cu130
|
||||
- Datasets 4.7.0
|
||||
- Tokenizers 0.22.2
|
||||
28
added_tokens.json
Normal file
28
added_tokens.json
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"</think>": 151668,
|
||||
"</tool_call>": 151658,
|
||||
"</tool_response>": 151666,
|
||||
"<think>": 151667,
|
||||
"<tool_call>": 151657,
|
||||
"<tool_response>": 151665,
|
||||
"<|box_end|>": 151649,
|
||||
"<|box_start|>": 151648,
|
||||
"<|endoftext|>": 151643,
|
||||
"<|file_sep|>": 151664,
|
||||
"<|fim_middle|>": 151660,
|
||||
"<|fim_pad|>": 151662,
|
||||
"<|fim_prefix|>": 151659,
|
||||
"<|fim_suffix|>": 151661,
|
||||
"<|im_end|>": 151645,
|
||||
"<|im_start|>": 151644,
|
||||
"<|image_pad|>": 151655,
|
||||
"<|object_ref_end|>": 151647,
|
||||
"<|object_ref_start|>": 151646,
|
||||
"<|quad_end|>": 151651,
|
||||
"<|quad_start|>": 151650,
|
||||
"<|repo_name|>": 151663,
|
||||
"<|video_pad|>": 151656,
|
||||
"<|vision_end|>": 151653,
|
||||
"<|vision_pad|>": 151654,
|
||||
"<|vision_start|>": 151652
|
||||
}
|
||||
16
all_results.json
Normal file
16
all_results.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"achieved_tflops_per_gpu": 0.002924690961750274,
|
||||
"achieved_tflops_per_gpu_theoretical": 538.6768254888838,
|
||||
"epoch": 7.0,
|
||||
"loss_nan_ranks": 0,
|
||||
"loss_rank_avg": 0.25932279229164124,
|
||||
"mfu_percent": 0.00020669194075973666,
|
||||
"mfu_percent_theoretical": 38.069033603454685,
|
||||
"total_flos": 1114110277320704.0,
|
||||
"train_loss": 0.27588593444180864,
|
||||
"train_runtime": 23808.2906,
|
||||
"train_samples_per_second": 2.54,
|
||||
"train_steps_per_second": 0.159,
|
||||
"valid_targets_mean": 5070.4,
|
||||
"valid_targets_min": 1372
|
||||
}
|
||||
89
chat_template.jinja
Normal file
89
chat_template.jinja
Normal file
@@ -0,0 +1,89 @@
|
||||
{%- if tools %}
|
||||
{{- '<|im_start|>system\n' }}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- messages[0].content + '\n\n' }}
|
||||
{%- endif %}
|
||||
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
||||
{%- for tool in tools %}
|
||||
{{- "\n" }}
|
||||
{{- tool | tojson }}
|
||||
{%- endfor %}
|
||||
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
||||
{%- else %}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
|
||||
{%- for message in messages[::-1] %}
|
||||
{%- set index = (messages|length - 1) - loop.index0 %}
|
||||
{%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
|
||||
{%- set ns.multi_step_tool = false %}
|
||||
{%- set ns.last_query_index = index %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- for message in messages %}
|
||||
{%- if message.content is string %}
|
||||
{%- set content = message.content %}
|
||||
{%- else %}
|
||||
{%- set content = '' %}
|
||||
{%- endif %}
|
||||
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
|
||||
{%- elif message.role == "assistant" %}
|
||||
{%- set reasoning_content = '' %}
|
||||
{%- if message.reasoning_content is string %}
|
||||
{%- set reasoning_content = message.reasoning_content %}
|
||||
{%- else %}
|
||||
{%- if '</think>' in content %}
|
||||
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
||||
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- if loop.index0 > ns.last_query_index %}
|
||||
{%- if loop.last or (not loop.last and reasoning_content) %}
|
||||
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- if message.tool_calls %}
|
||||
{%- for tool_call in message.tool_calls %}
|
||||
{%- if (loop.first and content) or (not loop.first) %}
|
||||
{{- '\n' }}
|
||||
{%- endif %}
|
||||
{%- if tool_call.function %}
|
||||
{%- set tool_call = tool_call.function %}
|
||||
{%- endif %}
|
||||
{{- '<tool_call>\n{"name": "' }}
|
||||
{{- tool_call.name }}
|
||||
{{- '", "arguments": ' }}
|
||||
{%- if tool_call.arguments is string %}
|
||||
{{- tool_call.arguments }}
|
||||
{%- else %}
|
||||
{{- tool_call.arguments | tojson }}
|
||||
{%- endif %}
|
||||
{{- '}\n</tool_call>' }}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- elif message.role == "tool" %}
|
||||
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
||||
{{- '<|im_start|>user' }}
|
||||
{%- endif %}
|
||||
{{- '\n<tool_response>\n' }}
|
||||
{{- content }}
|
||||
{{- '\n</tool_response>' }}
|
||||
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- if add_generation_prompt %}
|
||||
{{- '<|im_start|>assistant\n' }}
|
||||
{%- if enable_thinking is defined and enable_thinking is false %}
|
||||
{{- '<think>\n\n</think>\n\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
68
config.json
Normal file
68
config.json
Normal file
@@ -0,0 +1,68 @@
|
||||
{
|
||||
"architectures": [
|
||||
"Qwen3ForCausalLM"
|
||||
],
|
||||
"attention_bias": false,
|
||||
"attention_dropout": 0.0,
|
||||
"dtype": "bfloat16",
|
||||
"eos_token_id": 151645,
|
||||
"head_dim": 128,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 4096,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 12288,
|
||||
"layer_types": [
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention"
|
||||
],
|
||||
"max_position_embeddings": 40960,
|
||||
"max_window_layers": 36,
|
||||
"model_type": "qwen3",
|
||||
"num_attention_heads": 32,
|
||||
"num_hidden_layers": 36,
|
||||
"num_key_value_heads": 8,
|
||||
"pad_token_id": 151643,
|
||||
"rms_norm_eps": 1e-06,
|
||||
"rope_scaling": null,
|
||||
"rope_theta": 1000000,
|
||||
"sliding_window": null,
|
||||
"tie_word_embeddings": false,
|
||||
"transformers_version": "4.57.6",
|
||||
"use_cache": false,
|
||||
"use_sliding_window": false,
|
||||
"vocab_size": 151936
|
||||
}
|
||||
12
generation_config.json
Normal file
12
generation_config.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"do_sample": true,
|
||||
"eos_token_id": [
|
||||
151645,
|
||||
151643
|
||||
],
|
||||
"pad_token_id": 151643,
|
||||
"temperature": 0.6,
|
||||
"top_k": 20,
|
||||
"top_p": 0.95,
|
||||
"transformers_version": "4.57.6"
|
||||
}
|
||||
151388
merges.txt
Normal file
151388
merges.txt
Normal file
File diff suppressed because it is too large
Load Diff
3
model-00001-of-00004.safetensors
Normal file
3
model-00001-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d38bdc2a09da89877d1b75db07fbc77e04962e8f09a0d0ccd129d9dd99c50b0b
|
||||
size 4902257696
|
||||
3
model-00002-of-00004.safetensors
Normal file
3
model-00002-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:6fc8a3c5531b95d705497ff74817f97005ed3507eb263ba6a0467969d17c4737
|
||||
size 4915960368
|
||||
3
model-00003-of-00004.safetensors
Normal file
3
model-00003-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:f560276f3f84e678e151b04700c776f8a9ffc8eeb7994c0b086605602e314ded
|
||||
size 4983068496
|
||||
3
model-00004-of-00004.safetensors
Normal file
3
model-00004-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:91bb883e76dff734acc5251be0841b17b8993b613444e2717a85bc1d14bdd46b
|
||||
size 1580230264
|
||||
407
model.safetensors.index.json
Normal file
407
model.safetensors.index.json
Normal file
@@ -0,0 +1,407 @@
|
||||
{
|
||||
"metadata": {
|
||||
"total_parameters": 308224,
|
||||
"total_size": 16381470720
|
||||
},
|
||||
"weight_map": {
|
||||
"lm_head.weight": "model-00004-of-00004.safetensors",
|
||||
"model.embed_tokens.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.35.input_layernorm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.q_norm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.norm.weight": "model-00004-of-00004.safetensors"
|
||||
}
|
||||
}
|
||||
12
run_summary.json
Normal file
12
run_summary.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"agent_name": "2b382d4f2b58dcd58a2a90c31203ccf2063bf064_thinking_preprocessed",
|
||||
"training_start": null,
|
||||
"training_end": null,
|
||||
"created_by": "raoof1",
|
||||
"base_model_name": "Qwen/Qwen3-8B",
|
||||
"dataset_name": "/e/scratch/jureap59/raoof1/sft_data/hf_hub/datasets--DCAgent--exp_rpt_nemotron-bash-withtests-gpt5mini_glm_4.7_traces_jupiter/snapshots/2b382d4f2b58dcd58a2a90c31203ccf2063bf064_thinking_preprocessed",
|
||||
"training_type": "SFT",
|
||||
"training_parameters": "https://huggingface.co/DCAgent/a1-nemotron_bash_withtests_gpt5mini/blob/main/config.json",
|
||||
"wandb_link": null,
|
||||
"traces_location_s3": null
|
||||
}
|
||||
31
special_tokens_map.json
Normal file
31
special_tokens_map.json
Normal file
@@ -0,0 +1,31 @@
|
||||
{
|
||||
"additional_special_tokens": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|object_ref_start|>",
|
||||
"<|object_ref_end|>",
|
||||
"<|box_start|>",
|
||||
"<|box_end|>",
|
||||
"<|quad_start|>",
|
||||
"<|quad_end|>",
|
||||
"<|vision_start|>",
|
||||
"<|vision_end|>",
|
||||
"<|vision_pad|>",
|
||||
"<|image_pad|>",
|
||||
"<|video_pad|>"
|
||||
],
|
||||
"eos_token": {
|
||||
"content": "<|im_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"pad_token": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
}
|
||||
}
|
||||
3
tokenizer.json
Normal file
3
tokenizer.json
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
|
||||
size 11422654
|
||||
240
tokenizer_config.json
Normal file
240
tokenizer_config.json
Normal file
@@ -0,0 +1,240 @@
|
||||
{
|
||||
"add_bos_token": false,
|
||||
"add_prefix_space": false,
|
||||
"added_tokens_decoder": {
|
||||
"151643": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151644": {
|
||||
"content": "<|im_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151645": {
|
||||
"content": "<|im_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151646": {
|
||||
"content": "<|object_ref_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151647": {
|
||||
"content": "<|object_ref_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151648": {
|
||||
"content": "<|box_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151649": {
|
||||
"content": "<|box_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151650": {
|
||||
"content": "<|quad_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151651": {
|
||||
"content": "<|quad_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151652": {
|
||||
"content": "<|vision_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151653": {
|
||||
"content": "<|vision_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151654": {
|
||||
"content": "<|vision_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151655": {
|
||||
"content": "<|image_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151656": {
|
||||
"content": "<|video_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151657": {
|
||||
"content": "<tool_call>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151658": {
|
||||
"content": "</tool_call>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151659": {
|
||||
"content": "<|fim_prefix|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151660": {
|
||||
"content": "<|fim_middle|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151661": {
|
||||
"content": "<|fim_suffix|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151662": {
|
||||
"content": "<|fim_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151663": {
|
||||
"content": "<|repo_name|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151664": {
|
||||
"content": "<|file_sep|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151665": {
|
||||
"content": "<tool_response>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151666": {
|
||||
"content": "</tool_response>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151667": {
|
||||
"content": "<think>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151668": {
|
||||
"content": "</think>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
}
|
||||
},
|
||||
"additional_special_tokens": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|object_ref_start|>",
|
||||
"<|object_ref_end|>",
|
||||
"<|box_start|>",
|
||||
"<|box_end|>",
|
||||
"<|quad_start|>",
|
||||
"<|quad_end|>",
|
||||
"<|vision_start|>",
|
||||
"<|vision_end|>",
|
||||
"<|vision_pad|>",
|
||||
"<|image_pad|>",
|
||||
"<|video_pad|>"
|
||||
],
|
||||
"bos_token": null,
|
||||
"clean_up_tokenization_spaces": false,
|
||||
"eos_token": "<|im_end|>",
|
||||
"errors": "replace",
|
||||
"extra_special_tokens": {},
|
||||
"model_max_length": 32768,
|
||||
"pad_token": "<|endoftext|>",
|
||||
"padding_side": "right",
|
||||
"split_special_tokens": false,
|
||||
"tokenizer_class": "Qwen2Tokenizer",
|
||||
"unk_token": null
|
||||
}
|
||||
16
train_results.json
Normal file
16
train_results.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"achieved_tflops_per_gpu": 0.002924690961750274,
|
||||
"achieved_tflops_per_gpu_theoretical": 538.6768254888838,
|
||||
"epoch": 7.0,
|
||||
"loss_nan_ranks": 0,
|
||||
"loss_rank_avg": 0.25932279229164124,
|
||||
"mfu_percent": 0.00020669194075973666,
|
||||
"mfu_percent_theoretical": 38.069033603454685,
|
||||
"total_flos": 1114110277320704.0,
|
||||
"train_loss": 0.27588593444180864,
|
||||
"train_runtime": 23808.2906,
|
||||
"train_samples_per_second": 2.54,
|
||||
"train_steps_per_second": 0.159,
|
||||
"valid_targets_mean": 5070.4,
|
||||
"valid_targets_min": 1372
|
||||
}
|
||||
757
trainer_log.jsonl
Normal file
757
trainer_log.jsonl
Normal file
@@ -0,0 +1,757 @@
|
||||
{"current_steps": 5, "total_steps": 3780, "loss": 0.7137, "lr": 4.232804232804233e-07, "epoch": 0.009259259259259259, "percentage": 0.13, "elapsed_time": "0:00:43", "remaining_time": "9:05:43"}
|
||||
{"current_steps": 10, "total_steps": 3780, "loss": 0.7478, "lr": 9.523809523809525e-07, "epoch": 0.018518518518518517, "percentage": 0.26, "elapsed_time": "0:01:27", "remaining_time": "9:09:03"}
|
||||
{"current_steps": 15, "total_steps": 3780, "loss": 0.7248, "lr": 1.4814814814814815e-06, "epoch": 0.027777777777777776, "percentage": 0.4, "elapsed_time": "0:02:00", "remaining_time": "8:23:01"}
|
||||
{"current_steps": 20, "total_steps": 3780, "loss": 0.6861, "lr": 2.0105820105820108e-06, "epoch": 0.037037037037037035, "percentage": 0.53, "elapsed_time": "0:02:29", "remaining_time": "7:49:03"}
|
||||
{"current_steps": 25, "total_steps": 3780, "loss": 0.6491, "lr": 2.53968253968254e-06, "epoch": 0.046296296296296294, "percentage": 0.66, "elapsed_time": "0:03:03", "remaining_time": "7:39:11"}
|
||||
{"current_steps": 30, "total_steps": 3780, "loss": 0.605, "lr": 3.068783068783069e-06, "epoch": 0.05555555555555555, "percentage": 0.79, "elapsed_time": "0:03:36", "remaining_time": "7:30:59"}
|
||||
{"current_steps": 35, "total_steps": 3780, "loss": 0.5525, "lr": 3.597883597883598e-06, "epoch": 0.06481481481481481, "percentage": 0.93, "elapsed_time": "0:04:11", "remaining_time": "7:28:14"}
|
||||
{"current_steps": 40, "total_steps": 3780, "loss": 0.5372, "lr": 4.126984126984127e-06, "epoch": 0.07407407407407407, "percentage": 1.06, "elapsed_time": "0:04:52", "remaining_time": "7:36:03"}
|
||||
{"current_steps": 45, "total_steps": 3780, "loss": 0.5424, "lr": 4.656084656084656e-06, "epoch": 0.08333333333333333, "percentage": 1.19, "elapsed_time": "0:05:24", "remaining_time": "7:29:01"}
|
||||
{"current_steps": 50, "total_steps": 3780, "loss": 0.5095, "lr": 5.185185185185185e-06, "epoch": 0.09259259259259259, "percentage": 1.32, "elapsed_time": "0:05:52", "remaining_time": "7:18:44"}
|
||||
{"current_steps": 55, "total_steps": 3780, "loss": 0.489, "lr": 5.7142857142857145e-06, "epoch": 0.10185185185185185, "percentage": 1.46, "elapsed_time": "0:06:22", "remaining_time": "7:11:18"}
|
||||
{"current_steps": 60, "total_steps": 3780, "loss": 0.4944, "lr": 6.243386243386243e-06, "epoch": 0.1111111111111111, "percentage": 1.59, "elapsed_time": "0:06:58", "remaining_time": "7:11:57"}
|
||||
{"current_steps": 65, "total_steps": 3780, "loss": 0.4908, "lr": 6.772486772486773e-06, "epoch": 0.12037037037037036, "percentage": 1.72, "elapsed_time": "0:07:27", "remaining_time": "7:06:37"}
|
||||
{"current_steps": 70, "total_steps": 3780, "loss": 0.4823, "lr": 7.301587301587301e-06, "epoch": 0.12962962962962962, "percentage": 1.85, "elapsed_time": "0:08:04", "remaining_time": "7:07:48"}
|
||||
{"current_steps": 75, "total_steps": 3780, "loss": 0.4865, "lr": 7.830687830687831e-06, "epoch": 0.1388888888888889, "percentage": 1.98, "elapsed_time": "0:08:34", "remaining_time": "7:03:48"}
|
||||
{"current_steps": 80, "total_steps": 3780, "loss": 0.4568, "lr": 8.35978835978836e-06, "epoch": 0.14814814814814814, "percentage": 2.12, "elapsed_time": "0:08:59", "remaining_time": "6:55:33"}
|
||||
{"current_steps": 85, "total_steps": 3780, "loss": 0.4572, "lr": 8.888888888888888e-06, "epoch": 0.1574074074074074, "percentage": 2.25, "elapsed_time": "0:09:31", "remaining_time": "6:54:24"}
|
||||
{"current_steps": 90, "total_steps": 3780, "loss": 0.4703, "lr": 9.417989417989418e-06, "epoch": 0.16666666666666666, "percentage": 2.38, "elapsed_time": "0:10:03", "remaining_time": "6:52:36"}
|
||||
{"current_steps": 95, "total_steps": 3780, "loss": 0.4622, "lr": 9.947089947089947e-06, "epoch": 0.17592592592592593, "percentage": 2.51, "elapsed_time": "0:10:39", "remaining_time": "6:53:35"}
|
||||
{"current_steps": 100, "total_steps": 3780, "loss": 0.4053, "lr": 1.0476190476190477e-05, "epoch": 0.18518518518518517, "percentage": 2.65, "elapsed_time": "0:11:08", "remaining_time": "6:50:11"}
|
||||
{"current_steps": 105, "total_steps": 3780, "loss": 0.4844, "lr": 1.1005291005291006e-05, "epoch": 0.19444444444444445, "percentage": 2.78, "elapsed_time": "0:11:41", "remaining_time": "6:49:27"}
|
||||
{"current_steps": 110, "total_steps": 3780, "loss": 0.4127, "lr": 1.1534391534391536e-05, "epoch": 0.2037037037037037, "percentage": 2.91, "elapsed_time": "0:12:10", "remaining_time": "6:46:20"}
|
||||
{"current_steps": 115, "total_steps": 3780, "loss": 0.4565, "lr": 1.2063492063492064e-05, "epoch": 0.21296296296296297, "percentage": 3.04, "elapsed_time": "0:12:41", "remaining_time": "6:44:21"}
|
||||
{"current_steps": 120, "total_steps": 3780, "loss": 0.4195, "lr": 1.2592592592592593e-05, "epoch": 0.2222222222222222, "percentage": 3.17, "elapsed_time": "0:13:16", "remaining_time": "6:44:49"}
|
||||
{"current_steps": 125, "total_steps": 3780, "loss": 0.3917, "lr": 1.3121693121693123e-05, "epoch": 0.23148148148148148, "percentage": 3.31, "elapsed_time": "0:13:39", "remaining_time": "6:39:21"}
|
||||
{"current_steps": 130, "total_steps": 3780, "loss": 0.3883, "lr": 1.3650793650793652e-05, "epoch": 0.24074074074074073, "percentage": 3.44, "elapsed_time": "0:14:08", "remaining_time": "6:37:10"}
|
||||
{"current_steps": 135, "total_steps": 3780, "loss": 0.4364, "lr": 1.417989417989418e-05, "epoch": 0.25, "percentage": 3.57, "elapsed_time": "0:14:36", "remaining_time": "6:34:35"}
|
||||
{"current_steps": 140, "total_steps": 3780, "loss": 0.4167, "lr": 1.470899470899471e-05, "epoch": 0.25925925925925924, "percentage": 3.7, "elapsed_time": "0:15:02", "remaining_time": "6:31:13"}
|
||||
{"current_steps": 145, "total_steps": 3780, "loss": 0.4128, "lr": 1.523809523809524e-05, "epoch": 0.26851851851851855, "percentage": 3.84, "elapsed_time": "0:15:35", "remaining_time": "6:30:59"}
|
||||
{"current_steps": 150, "total_steps": 3780, "loss": 0.358, "lr": 1.576719576719577e-05, "epoch": 0.2777777777777778, "percentage": 3.97, "elapsed_time": "0:16:05", "remaining_time": "6:29:34"}
|
||||
{"current_steps": 155, "total_steps": 3780, "loss": 0.4209, "lr": 1.6296296296296297e-05, "epoch": 0.28703703703703703, "percentage": 4.1, "elapsed_time": "0:16:34", "remaining_time": "6:27:34"}
|
||||
{"current_steps": 160, "total_steps": 3780, "loss": 0.4165, "lr": 1.6825396825396828e-05, "epoch": 0.2962962962962963, "percentage": 4.23, "elapsed_time": "0:17:04", "remaining_time": "6:26:28"}
|
||||
{"current_steps": 165, "total_steps": 3780, "loss": 0.3873, "lr": 1.7354497354497356e-05, "epoch": 0.3055555555555556, "percentage": 4.37, "elapsed_time": "0:17:35", "remaining_time": "6:25:30"}
|
||||
{"current_steps": 170, "total_steps": 3780, "loss": 0.3664, "lr": 1.7883597883597884e-05, "epoch": 0.3148148148148148, "percentage": 4.5, "elapsed_time": "0:18:07", "remaining_time": "6:24:45"}
|
||||
{"current_steps": 175, "total_steps": 3780, "loss": 0.4059, "lr": 1.8412698412698415e-05, "epoch": 0.32407407407407407, "percentage": 4.63, "elapsed_time": "0:18:39", "remaining_time": "6:24:13"}
|
||||
{"current_steps": 180, "total_steps": 3780, "loss": 0.4079, "lr": 1.8941798941798943e-05, "epoch": 0.3333333333333333, "percentage": 4.76, "elapsed_time": "0:19:13", "remaining_time": "6:24:35"}
|
||||
{"current_steps": 185, "total_steps": 3780, "loss": 0.3756, "lr": 1.947089947089947e-05, "epoch": 0.3425925925925926, "percentage": 4.89, "elapsed_time": "0:19:48", "remaining_time": "6:24:46"}
|
||||
{"current_steps": 190, "total_steps": 3780, "loss": 0.3672, "lr": 2e-05, "epoch": 0.35185185185185186, "percentage": 5.03, "elapsed_time": "0:20:19", "remaining_time": "6:24:09"}
|
||||
{"current_steps": 195, "total_steps": 3780, "loss": 0.3556, "lr": 2.0529100529100533e-05, "epoch": 0.3611111111111111, "percentage": 5.16, "elapsed_time": "0:20:47", "remaining_time": "6:22:11"}
|
||||
{"current_steps": 200, "total_steps": 3780, "loss": 0.3901, "lr": 2.105820105820106e-05, "epoch": 0.37037037037037035, "percentage": 5.29, "elapsed_time": "0:21:28", "remaining_time": "6:24:29"}
|
||||
{"current_steps": 205, "total_steps": 3780, "loss": 0.3355, "lr": 2.158730158730159e-05, "epoch": 0.37962962962962965, "percentage": 5.42, "elapsed_time": "0:21:58", "remaining_time": "6:23:06"}
|
||||
{"current_steps": 210, "total_steps": 3780, "loss": 0.3628, "lr": 2.211640211640212e-05, "epoch": 0.3888888888888889, "percentage": 5.56, "elapsed_time": "0:22:29", "remaining_time": "6:22:14"}
|
||||
{"current_steps": 215, "total_steps": 3780, "loss": 0.3548, "lr": 2.2645502645502648e-05, "epoch": 0.39814814814814814, "percentage": 5.69, "elapsed_time": "0:23:11", "remaining_time": "6:24:31"}
|
||||
{"current_steps": 220, "total_steps": 3780, "loss": 0.3853, "lr": 2.317460317460318e-05, "epoch": 0.4074074074074074, "percentage": 5.82, "elapsed_time": "0:23:46", "remaining_time": "6:24:38"}
|
||||
{"current_steps": 225, "total_steps": 3780, "loss": 0.3995, "lr": 2.3703703703703703e-05, "epoch": 0.4166666666666667, "percentage": 5.95, "elapsed_time": "0:24:19", "remaining_time": "6:24:12"}
|
||||
{"current_steps": 230, "total_steps": 3780, "loss": 0.4098, "lr": 2.4232804232804234e-05, "epoch": 0.42592592592592593, "percentage": 6.08, "elapsed_time": "0:24:56", "remaining_time": "6:24:55"}
|
||||
{"current_steps": 235, "total_steps": 3780, "loss": 0.3682, "lr": 2.4761904761904766e-05, "epoch": 0.4351851851851852, "percentage": 6.22, "elapsed_time": "0:25:29", "remaining_time": "6:24:37"}
|
||||
{"current_steps": 240, "total_steps": 3780, "loss": 0.3622, "lr": 2.5291005291005294e-05, "epoch": 0.4444444444444444, "percentage": 6.35, "elapsed_time": "0:25:52", "remaining_time": "6:21:39"}
|
||||
{"current_steps": 245, "total_steps": 3780, "loss": 0.3695, "lr": 2.582010582010582e-05, "epoch": 0.4537037037037037, "percentage": 6.48, "elapsed_time": "0:26:26", "remaining_time": "6:21:27"}
|
||||
{"current_steps": 250, "total_steps": 3780, "loss": 0.3469, "lr": 2.6349206349206353e-05, "epoch": 0.46296296296296297, "percentage": 6.61, "elapsed_time": "0:26:56", "remaining_time": "6:20:25"}
|
||||
{"current_steps": 255, "total_steps": 3780, "loss": 0.3413, "lr": 2.687830687830688e-05, "epoch": 0.4722222222222222, "percentage": 6.75, "elapsed_time": "0:27:23", "remaining_time": "6:18:39"}
|
||||
{"current_steps": 260, "total_steps": 3780, "loss": 0.3628, "lr": 2.740740740740741e-05, "epoch": 0.48148148148148145, "percentage": 6.88, "elapsed_time": "0:27:54", "remaining_time": "6:17:44"}
|
||||
{"current_steps": 265, "total_steps": 3780, "loss": 0.389, "lr": 2.7936507936507936e-05, "epoch": 0.49074074074074076, "percentage": 7.01, "elapsed_time": "0:28:26", "remaining_time": "6:17:16"}
|
||||
{"current_steps": 270, "total_steps": 3780, "loss": 0.3642, "lr": 2.8465608465608467e-05, "epoch": 0.5, "percentage": 7.14, "elapsed_time": "0:29:04", "remaining_time": "6:17:52"}
|
||||
{"current_steps": 275, "total_steps": 3780, "loss": 0.3695, "lr": 2.8994708994709e-05, "epoch": 0.5092592592592593, "percentage": 7.28, "elapsed_time": "0:29:42", "remaining_time": "6:18:33"}
|
||||
{"current_steps": 280, "total_steps": 3780, "loss": 0.3325, "lr": 2.9523809523809526e-05, "epoch": 0.5185185185185185, "percentage": 7.41, "elapsed_time": "0:30:12", "remaining_time": "6:17:30"}
|
||||
{"current_steps": 285, "total_steps": 3780, "loss": 0.3729, "lr": 3.0052910052910054e-05, "epoch": 0.5277777777777778, "percentage": 7.54, "elapsed_time": "0:30:48", "remaining_time": "6:17:45"}
|
||||
{"current_steps": 290, "total_steps": 3780, "loss": 0.3421, "lr": 3.058201058201058e-05, "epoch": 0.5370370370370371, "percentage": 7.67, "elapsed_time": "0:31:20", "remaining_time": "6:17:13"}
|
||||
{"current_steps": 295, "total_steps": 3780, "loss": 0.3765, "lr": 3.111111111111112e-05, "epoch": 0.5462962962962963, "percentage": 7.8, "elapsed_time": "0:31:49", "remaining_time": "6:15:58"}
|
||||
{"current_steps": 300, "total_steps": 3780, "loss": 0.3484, "lr": 3.1640211640211645e-05, "epoch": 0.5555555555555556, "percentage": 7.94, "elapsed_time": "0:32:20", "remaining_time": "6:15:12"}
|
||||
{"current_steps": 305, "total_steps": 3780, "loss": 0.349, "lr": 3.216931216931217e-05, "epoch": 0.5648148148148148, "percentage": 8.07, "elapsed_time": "0:32:49", "remaining_time": "6:13:57"}
|
||||
{"current_steps": 310, "total_steps": 3780, "loss": 0.3554, "lr": 3.26984126984127e-05, "epoch": 0.5740740740740741, "percentage": 8.2, "elapsed_time": "0:33:22", "remaining_time": "6:13:31"}
|
||||
{"current_steps": 315, "total_steps": 3780, "loss": 0.3638, "lr": 3.322751322751323e-05, "epoch": 0.5833333333333334, "percentage": 8.33, "elapsed_time": "0:33:58", "remaining_time": "6:13:48"}
|
||||
{"current_steps": 320, "total_steps": 3780, "loss": 0.3499, "lr": 3.375661375661376e-05, "epoch": 0.5925925925925926, "percentage": 8.47, "elapsed_time": "0:34:27", "remaining_time": "6:12:36"}
|
||||
{"current_steps": 325, "total_steps": 3780, "loss": 0.3427, "lr": 3.4285714285714284e-05, "epoch": 0.6018518518518519, "percentage": 8.6, "elapsed_time": "0:34:57", "remaining_time": "6:11:33"}
|
||||
{"current_steps": 330, "total_steps": 3780, "loss": 0.3081, "lr": 3.481481481481482e-05, "epoch": 0.6111111111111112, "percentage": 8.73, "elapsed_time": "0:35:25", "remaining_time": "6:10:19"}
|
||||
{"current_steps": 335, "total_steps": 3780, "loss": 0.3891, "lr": 3.5343915343915346e-05, "epoch": 0.6203703703703703, "percentage": 8.86, "elapsed_time": "0:35:56", "remaining_time": "6:09:34"}
|
||||
{"current_steps": 340, "total_steps": 3780, "loss": 0.3278, "lr": 3.5873015873015874e-05, "epoch": 0.6296296296296297, "percentage": 8.99, "elapsed_time": "0:36:34", "remaining_time": "6:10:06"}
|
||||
{"current_steps": 345, "total_steps": 3780, "loss": 0.3429, "lr": 3.64021164021164e-05, "epoch": 0.6388888888888888, "percentage": 9.13, "elapsed_time": "0:37:12", "remaining_time": "6:10:23"}
|
||||
{"current_steps": 350, "total_steps": 3780, "loss": 0.3149, "lr": 3.6931216931216936e-05, "epoch": 0.6481481481481481, "percentage": 9.26, "elapsed_time": "0:37:40", "remaining_time": "6:09:09"}
|
||||
{"current_steps": 355, "total_steps": 3780, "loss": 0.3418, "lr": 3.7460317460317464e-05, "epoch": 0.6574074074074074, "percentage": 9.39, "elapsed_time": "0:38:09", "remaining_time": "6:08:08"}
|
||||
{"current_steps": 360, "total_steps": 3780, "loss": 0.3606, "lr": 3.798941798941799e-05, "epoch": 0.6666666666666666, "percentage": 9.52, "elapsed_time": "0:38:48", "remaining_time": "6:08:39"}
|
||||
{"current_steps": 365, "total_steps": 3780, "loss": 0.3768, "lr": 3.851851851851852e-05, "epoch": 0.6759259259259259, "percentage": 9.66, "elapsed_time": "0:39:23", "remaining_time": "6:08:35"}
|
||||
{"current_steps": 370, "total_steps": 3780, "loss": 0.3578, "lr": 3.904761904761905e-05, "epoch": 0.6851851851851852, "percentage": 9.79, "elapsed_time": "0:39:58", "remaining_time": "6:08:22"}
|
||||
{"current_steps": 375, "total_steps": 3780, "loss": 0.3343, "lr": 3.957671957671958e-05, "epoch": 0.6944444444444444, "percentage": 9.92, "elapsed_time": "0:40:30", "remaining_time": "6:07:46"}
|
||||
{"current_steps": 380, "total_steps": 3780, "loss": 0.3651, "lr": 3.999999147231606e-05, "epoch": 0.7037037037037037, "percentage": 10.05, "elapsed_time": "0:41:04", "remaining_time": "6:07:31"}
|
||||
{"current_steps": 385, "total_steps": 3780, "loss": 0.3322, "lr": 3.9999693004141615e-05, "epoch": 0.7129629629629629, "percentage": 10.19, "elapsed_time": "0:41:42", "remaining_time": "6:07:43"}
|
||||
{"current_steps": 390, "total_steps": 3780, "loss": 0.335, "lr": 3.999896815904212e-05, "epoch": 0.7222222222222222, "percentage": 10.32, "elapsed_time": "0:42:17", "remaining_time": "6:07:36"}
|
||||
{"current_steps": 395, "total_steps": 3780, "loss": 0.3451, "lr": 3.999781695247067e-05, "epoch": 0.7314814814814815, "percentage": 10.45, "elapsed_time": "0:42:53", "remaining_time": "6:07:35"}
|
||||
{"current_steps": 400, "total_steps": 3780, "loss": 0.3475, "lr": 3.999623940897003e-05, "epoch": 0.7407407407407407, "percentage": 10.58, "elapsed_time": "0:43:27", "remaining_time": "6:07:10"}
|
||||
{"current_steps": 405, "total_steps": 3780, "loss": 0.352, "lr": 3.9994235562172135e-05, "epoch": 0.75, "percentage": 10.71, "elapsed_time": "0:44:01", "remaining_time": "6:06:55"}
|
||||
{"current_steps": 410, "total_steps": 3780, "loss": 0.3489, "lr": 3.999180545479734e-05, "epoch": 0.7592592592592593, "percentage": 10.85, "elapsed_time": "0:44:35", "remaining_time": "6:06:34"}
|
||||
{"current_steps": 415, "total_steps": 3780, "loss": 0.3215, "lr": 3.998894913865352e-05, "epoch": 0.7685185185185185, "percentage": 10.98, "elapsed_time": "0:45:06", "remaining_time": "6:05:44"}
|
||||
{"current_steps": 420, "total_steps": 3780, "loss": 0.34, "lr": 3.9985666674634976e-05, "epoch": 0.7777777777777778, "percentage": 11.11, "elapsed_time": "0:45:28", "remaining_time": "6:03:49"}
|
||||
{"current_steps": 425, "total_steps": 3780, "loss": 0.3309, "lr": 3.998195813272113e-05, "epoch": 0.7870370370370371, "percentage": 11.24, "elapsed_time": "0:45:55", "remaining_time": "6:02:30"}
|
||||
{"current_steps": 430, "total_steps": 3780, "loss": 0.3947, "lr": 3.997782359197503e-05, "epoch": 0.7962962962962963, "percentage": 11.38, "elapsed_time": "0:46:33", "remaining_time": "6:02:39"}
|
||||
{"current_steps": 435, "total_steps": 3780, "loss": 0.33, "lr": 3.997326314054167e-05, "epoch": 0.8055555555555556, "percentage": 11.51, "elapsed_time": "0:47:01", "remaining_time": "6:01:36"}
|
||||
{"current_steps": 440, "total_steps": 3780, "loss": 0.3256, "lr": 3.9968276875646095e-05, "epoch": 0.8148148148148148, "percentage": 11.64, "elapsed_time": "0:47:30", "remaining_time": "6:00:40"}
|
||||
{"current_steps": 445, "total_steps": 3780, "loss": 0.3235, "lr": 3.9962864903591375e-05, "epoch": 0.8240740740740741, "percentage": 11.77, "elapsed_time": "0:47:56", "remaining_time": "5:59:14"}
|
||||
{"current_steps": 450, "total_steps": 3780, "loss": 0.3853, "lr": 3.995702733975625e-05, "epoch": 0.8333333333333334, "percentage": 11.9, "elapsed_time": "0:48:30", "remaining_time": "5:58:56"}
|
||||
{"current_steps": 455, "total_steps": 3780, "loss": 0.3476, "lr": 3.9950764308592783e-05, "epoch": 0.8425925925925926, "percentage": 12.04, "elapsed_time": "0:48:59", "remaining_time": "5:58:04"}
|
||||
{"current_steps": 460, "total_steps": 3780, "loss": 0.333, "lr": 3.9944075943623605e-05, "epoch": 0.8518518518518519, "percentage": 12.17, "elapsed_time": "0:49:30", "remaining_time": "5:57:19"}
|
||||
{"current_steps": 465, "total_steps": 3780, "loss": 0.339, "lr": 3.9936962387439135e-05, "epoch": 0.8611111111111112, "percentage": 12.3, "elapsed_time": "0:50:04", "remaining_time": "5:57:01"}
|
||||
{"current_steps": 470, "total_steps": 3780, "loss": 0.3426, "lr": 3.992942379169452e-05, "epoch": 0.8703703703703703, "percentage": 12.43, "elapsed_time": "0:50:32", "remaining_time": "5:55:53"}
|
||||
{"current_steps": 475, "total_steps": 3780, "loss": 0.3298, "lr": 3.992146031710637e-05, "epoch": 0.8796296296296297, "percentage": 12.57, "elapsed_time": "0:51:08", "remaining_time": "5:55:48"}
|
||||
{"current_steps": 480, "total_steps": 3780, "loss": 0.3507, "lr": 3.99130721334494e-05, "epoch": 0.8888888888888888, "percentage": 12.7, "elapsed_time": "0:51:46", "remaining_time": "5:55:56"}
|
||||
{"current_steps": 485, "total_steps": 3780, "loss": 0.3014, "lr": 3.9904259419552744e-05, "epoch": 0.8981481481481481, "percentage": 12.83, "elapsed_time": "0:52:20", "remaining_time": "5:55:39"}
|
||||
{"current_steps": 490, "total_steps": 3780, "loss": 0.3342, "lr": 3.989502236329618e-05, "epoch": 0.9074074074074074, "percentage": 12.96, "elapsed_time": "0:52:54", "remaining_time": "5:55:15"}
|
||||
{"current_steps": 495, "total_steps": 3780, "loss": 0.3126, "lr": 3.988536116160612e-05, "epoch": 0.9166666666666666, "percentage": 13.1, "elapsed_time": "0:53:27", "remaining_time": "5:54:45"}
|
||||
{"current_steps": 500, "total_steps": 3780, "loss": 0.322, "lr": 3.987527602045139e-05, "epoch": 0.9259259259259259, "percentage": 13.23, "elapsed_time": "0:53:55", "remaining_time": "5:53:45"}
|
||||
{"current_steps": 505, "total_steps": 3780, "loss": 0.3175, "lr": 3.9864767154838864e-05, "epoch": 0.9351851851851852, "percentage": 13.36, "elapsed_time": "0:54:22", "remaining_time": "5:52:34"}
|
||||
{"current_steps": 510, "total_steps": 3780, "loss": 0.3024, "lr": 3.985383478880887e-05, "epoch": 0.9444444444444444, "percentage": 13.49, "elapsed_time": "0:54:51", "remaining_time": "5:51:45"}
|
||||
{"current_steps": 515, "total_steps": 3780, "loss": 0.2932, "lr": 3.984247915543043e-05, "epoch": 0.9537037037037037, "percentage": 13.62, "elapsed_time": "0:55:20", "remaining_time": "5:50:50"}
|
||||
{"current_steps": 520, "total_steps": 3780, "loss": 0.3278, "lr": 3.9830700496796246e-05, "epoch": 0.9629629629629629, "percentage": 13.76, "elapsed_time": "0:55:49", "remaining_time": "5:50:00"}
|
||||
{"current_steps": 525, "total_steps": 3780, "loss": 0.3282, "lr": 3.98184990640176e-05, "epoch": 0.9722222222222222, "percentage": 13.89, "elapsed_time": "0:56:28", "remaining_time": "5:50:06"}
|
||||
{"current_steps": 530, "total_steps": 3780, "loss": 0.3633, "lr": 3.9805875117218934e-05, "epoch": 0.9814814814814815, "percentage": 14.02, "elapsed_time": "0:57:02", "remaining_time": "5:49:46"}
|
||||
{"current_steps": 535, "total_steps": 3780, "loss": 0.3386, "lr": 3.9792828925532376e-05, "epoch": 0.9907407407407407, "percentage": 14.15, "elapsed_time": "0:57:30", "remaining_time": "5:48:48"}
|
||||
{"current_steps": 540, "total_steps": 3780, "loss": 0.3092, "lr": 3.977936076709195e-05, "epoch": 1.0, "percentage": 14.29, "elapsed_time": "0:58:01", "remaining_time": "5:48:08"}
|
||||
{"current_steps": 545, "total_steps": 3780, "loss": 0.3073, "lr": 3.976547092902765e-05, "epoch": 1.0092592592592593, "percentage": 14.42, "elapsed_time": "0:58:29", "remaining_time": "5:47:13"}
|
||||
{"current_steps": 550, "total_steps": 3780, "loss": 0.3138, "lr": 3.9751159707459354e-05, "epoch": 1.0185185185185186, "percentage": 14.55, "elapsed_time": "0:58:56", "remaining_time": "5:46:11"}
|
||||
{"current_steps": 555, "total_steps": 3780, "loss": 0.2885, "lr": 3.973642740749048e-05, "epoch": 1.0277777777777777, "percentage": 14.68, "elapsed_time": "0:59:31", "remaining_time": "5:45:50"}
|
||||
{"current_steps": 560, "total_steps": 3780, "loss": 0.3245, "lr": 3.972127434320148e-05, "epoch": 1.037037037037037, "percentage": 14.81, "elapsed_time": "0:59:59", "remaining_time": "5:44:59"}
|
||||
{"current_steps": 565, "total_steps": 3780, "loss": 0.3301, "lr": 3.970570083764316e-05, "epoch": 1.0462962962962963, "percentage": 14.95, "elapsed_time": "1:00:37", "remaining_time": "5:45:00"}
|
||||
{"current_steps": 570, "total_steps": 3780, "loss": 0.2943, "lr": 3.968970722282979e-05, "epoch": 1.0555555555555556, "percentage": 15.08, "elapsed_time": "1:01:11", "remaining_time": "5:44:36"}
|
||||
{"current_steps": 575, "total_steps": 3780, "loss": 0.3161, "lr": 3.9673293839732024e-05, "epoch": 1.0648148148148149, "percentage": 15.21, "elapsed_time": "1:01:41", "remaining_time": "5:43:49"}
|
||||
{"current_steps": 580, "total_steps": 3780, "loss": 0.3053, "lr": 3.965646103826962e-05, "epoch": 1.074074074074074, "percentage": 15.34, "elapsed_time": "1:02:18", "remaining_time": "5:43:48"}
|
||||
{"current_steps": 585, "total_steps": 3780, "loss": 0.3154, "lr": 3.963920917730399e-05, "epoch": 1.0833333333333333, "percentage": 15.48, "elapsed_time": "1:02:41", "remaining_time": "5:42:21"}
|
||||
{"current_steps": 590, "total_steps": 3780, "loss": 0.2808, "lr": 3.9621538624630546e-05, "epoch": 1.0925925925925926, "percentage": 15.61, "elapsed_time": "1:03:08", "remaining_time": "5:41:22"}
|
||||
{"current_steps": 595, "total_steps": 3780, "loss": 0.2979, "lr": 3.9603449756970877e-05, "epoch": 1.1018518518518519, "percentage": 15.74, "elapsed_time": "1:03:38", "remaining_time": "5:40:39"}
|
||||
{"current_steps": 600, "total_steps": 3780, "loss": 0.3379, "lr": 3.9584942959964695e-05, "epoch": 1.1111111111111112, "percentage": 15.87, "elapsed_time": "1:04:16", "remaining_time": "5:40:39"}
|
||||
{"current_steps": 605, "total_steps": 3780, "loss": 0.3057, "lr": 3.9566018628161595e-05, "epoch": 1.1203703703703705, "percentage": 16.01, "elapsed_time": "1:04:44", "remaining_time": "5:39:43"}
|
||||
{"current_steps": 610, "total_steps": 3780, "loss": 0.3267, "lr": 3.9546677165012714e-05, "epoch": 1.1296296296296295, "percentage": 16.14, "elapsed_time": "1:05:13", "remaining_time": "5:38:59"}
|
||||
{"current_steps": 615, "total_steps": 3780, "loss": 0.2848, "lr": 3.9526918982862045e-05, "epoch": 1.1388888888888888, "percentage": 16.27, "elapsed_time": "1:05:42", "remaining_time": "5:38:11"}
|
||||
{"current_steps": 620, "total_steps": 3780, "loss": 0.3377, "lr": 3.950674450293771e-05, "epoch": 1.1481481481481481, "percentage": 16.4, "elapsed_time": "1:06:11", "remaining_time": "5:37:22"}
|
||||
{"current_steps": 625, "total_steps": 3780, "loss": 0.3019, "lr": 3.948615415534294e-05, "epoch": 1.1574074074074074, "percentage": 16.53, "elapsed_time": "1:06:43", "remaining_time": "5:36:48"}
|
||||
{"current_steps": 630, "total_steps": 3780, "loss": 0.3512, "lr": 3.946514837904693e-05, "epoch": 1.1666666666666667, "percentage": 16.67, "elapsed_time": "1:07:20", "remaining_time": "5:36:40"}
|
||||
{"current_steps": 635, "total_steps": 3780, "loss": 0.3348, "lr": 3.944372762187547e-05, "epoch": 1.175925925925926, "percentage": 16.8, "elapsed_time": "1:07:55", "remaining_time": "5:36:26"}
|
||||
{"current_steps": 640, "total_steps": 3780, "loss": 0.3201, "lr": 3.9421892340501405e-05, "epoch": 1.1851851851851851, "percentage": 16.93, "elapsed_time": "1:08:26", "remaining_time": "5:35:47"}
|
||||
{"current_steps": 645, "total_steps": 3780, "loss": 0.2968, "lr": 3.939964300043487e-05, "epoch": 1.1944444444444444, "percentage": 17.06, "elapsed_time": "1:08:52", "remaining_time": "5:34:47"}
|
||||
{"current_steps": 650, "total_steps": 3780, "loss": 0.3188, "lr": 3.9376980076013426e-05, "epoch": 1.2037037037037037, "percentage": 17.2, "elapsed_time": "1:09:20", "remaining_time": "5:33:52"}
|
||||
{"current_steps": 655, "total_steps": 3780, "loss": 0.3043, "lr": 3.9353904050391874e-05, "epoch": 1.212962962962963, "percentage": 17.33, "elapsed_time": "1:09:48", "remaining_time": "5:33:04"}
|
||||
{"current_steps": 660, "total_steps": 3780, "loss": 0.3059, "lr": 3.933041541553202e-05, "epoch": 1.2222222222222223, "percentage": 17.46, "elapsed_time": "1:10:24", "remaining_time": "5:32:49"}
|
||||
{"current_steps": 665, "total_steps": 3780, "loss": 0.3286, "lr": 3.930651467219214e-05, "epoch": 1.2314814814814814, "percentage": 17.59, "elapsed_time": "1:10:56", "remaining_time": "5:32:17"}
|
||||
{"current_steps": 670, "total_steps": 3780, "loss": 0.2983, "lr": 3.928220232991633e-05, "epoch": 1.2407407407407407, "percentage": 17.72, "elapsed_time": "1:11:26", "remaining_time": "5:31:35"}
|
||||
{"current_steps": 675, "total_steps": 3780, "loss": 0.3506, "lr": 3.925747890702363e-05, "epoch": 1.25, "percentage": 17.86, "elapsed_time": "1:12:00", "remaining_time": "5:31:16"}
|
||||
{"current_steps": 680, "total_steps": 3780, "loss": 0.2906, "lr": 3.9232344930596983e-05, "epoch": 1.2592592592592593, "percentage": 17.99, "elapsed_time": "1:12:37", "remaining_time": "5:31:07"}
|
||||
{"current_steps": 685, "total_steps": 3780, "loss": 0.3374, "lr": 3.9206800936472e-05, "epoch": 1.2685185185185186, "percentage": 18.12, "elapsed_time": "1:13:10", "remaining_time": "5:30:36"}
|
||||
{"current_steps": 690, "total_steps": 3780, "loss": 0.2861, "lr": 3.9180847469225514e-05, "epoch": 1.2777777777777777, "percentage": 18.25, "elapsed_time": "1:13:41", "remaining_time": "5:29:59"}
|
||||
{"current_steps": 695, "total_steps": 3780, "loss": 0.3001, "lr": 3.9154485082164e-05, "epoch": 1.287037037037037, "percentage": 18.39, "elapsed_time": "1:14:13", "remaining_time": "5:29:28"}
|
||||
{"current_steps": 700, "total_steps": 3780, "loss": 0.3211, "lr": 3.912771433731176e-05, "epoch": 1.2962962962962963, "percentage": 18.52, "elapsed_time": "1:14:45", "remaining_time": "5:28:56"}
|
||||
{"current_steps": 705, "total_steps": 3780, "loss": 0.3009, "lr": 3.910053580539896e-05, "epoch": 1.3055555555555556, "percentage": 18.65, "elapsed_time": "1:15:22", "remaining_time": "5:28:45"}
|
||||
{"current_steps": 710, "total_steps": 3780, "loss": 0.3373, "lr": 3.907295006584941e-05, "epoch": 1.3148148148148149, "percentage": 18.78, "elapsed_time": "1:15:52", "remaining_time": "5:28:04"}
|
||||
{"current_steps": 715, "total_steps": 3780, "loss": 0.3085, "lr": 3.904495770676831e-05, "epoch": 1.324074074074074, "percentage": 18.92, "elapsed_time": "1:16:25", "remaining_time": "5:27:35"}
|
||||
{"current_steps": 720, "total_steps": 3780, "loss": 0.3044, "lr": 3.9016559324929594e-05, "epoch": 1.3333333333333333, "percentage": 19.05, "elapsed_time": "1:16:57", "remaining_time": "5:27:04"}
|
||||
{"current_steps": 725, "total_steps": 3780, "loss": 0.2739, "lr": 3.8987755525763315e-05, "epoch": 1.3425925925925926, "percentage": 19.18, "elapsed_time": "1:17:28", "remaining_time": "5:26:26"}
|
||||
{"current_steps": 730, "total_steps": 3780, "loss": 0.3196, "lr": 3.895854692334264e-05, "epoch": 1.3518518518518519, "percentage": 19.31, "elapsed_time": "1:17:55", "remaining_time": "5:25:32"}
|
||||
{"current_steps": 735, "total_steps": 3780, "loss": 0.2971, "lr": 3.892893414037084e-05, "epoch": 1.3611111111111112, "percentage": 19.44, "elapsed_time": "1:18:25", "remaining_time": "5:24:55"}
|
||||
{"current_steps": 740, "total_steps": 3780, "loss": 0.3124, "lr": 3.889891780816799e-05, "epoch": 1.3703703703703702, "percentage": 19.58, "elapsed_time": "1:18:55", "remaining_time": "5:24:15"}
|
||||
{"current_steps": 745, "total_steps": 3780, "loss": 0.3289, "lr": 3.886849856665746e-05, "epoch": 1.3796296296296298, "percentage": 19.71, "elapsed_time": "1:19:30", "remaining_time": "5:23:53"}
|
||||
{"current_steps": 750, "total_steps": 3780, "loss": 0.3118, "lr": 3.8837677064352345e-05, "epoch": 1.3888888888888888, "percentage": 19.84, "elapsed_time": "1:20:03", "remaining_time": "5:23:27"}
|
||||
{"current_steps": 755, "total_steps": 3780, "loss": 0.2869, "lr": 3.8806453958341615e-05, "epoch": 1.3981481481481481, "percentage": 19.97, "elapsed_time": "1:20:42", "remaining_time": "5:23:21"}
|
||||
{"current_steps": 760, "total_steps": 3780, "loss": 0.2967, "lr": 3.877482991427607e-05, "epoch": 1.4074074074074074, "percentage": 20.11, "elapsed_time": "1:21:17", "remaining_time": "5:23:00"}
|
||||
{"current_steps": 765, "total_steps": 3780, "loss": 0.2884, "lr": 3.874280560635418e-05, "epoch": 1.4166666666666667, "percentage": 20.24, "elapsed_time": "1:21:51", "remaining_time": "5:22:36"}
|
||||
{"current_steps": 770, "total_steps": 3780, "loss": 0.3134, "lr": 3.871038171730775e-05, "epoch": 1.425925925925926, "percentage": 20.37, "elapsed_time": "1:22:23", "remaining_time": "5:22:05"}
|
||||
{"current_steps": 775, "total_steps": 3780, "loss": 0.3092, "lr": 3.8677558938387276e-05, "epoch": 1.4351851851851851, "percentage": 20.5, "elapsed_time": "1:22:50", "remaining_time": "5:21:14"}
|
||||
{"current_steps": 780, "total_steps": 3780, "loss": 0.3123, "lr": 3.864433796934728e-05, "epoch": 1.4444444444444444, "percentage": 20.63, "elapsed_time": "1:23:26", "remaining_time": "5:20:57"}
|
||||
{"current_steps": 785, "total_steps": 3780, "loss": 0.2985, "lr": 3.861071951843137e-05, "epoch": 1.4537037037037037, "percentage": 20.77, "elapsed_time": "1:23:56", "remaining_time": "5:20:14"}
|
||||
{"current_steps": 790, "total_steps": 3780, "loss": 0.2845, "lr": 3.8576704302357135e-05, "epoch": 1.462962962962963, "percentage": 20.9, "elapsed_time": "1:24:19", "remaining_time": "5:19:08"}
|
||||
{"current_steps": 795, "total_steps": 3780, "loss": 0.3451, "lr": 3.854229304630086e-05, "epoch": 1.4722222222222223, "percentage": 21.03, "elapsed_time": "1:24:54", "remaining_time": "5:18:49"}
|
||||
{"current_steps": 800, "total_steps": 3780, "loss": 0.3119, "lr": 3.8507486483882084e-05, "epoch": 1.4814814814814814, "percentage": 21.16, "elapsed_time": "1:25:34", "remaining_time": "5:18:44"}
|
||||
{"current_steps": 805, "total_steps": 3780, "loss": 0.3258, "lr": 3.8472285357147966e-05, "epoch": 1.4907407407407407, "percentage": 21.3, "elapsed_time": "1:26:11", "remaining_time": "5:18:30"}
|
||||
{"current_steps": 810, "total_steps": 3780, "loss": 0.2876, "lr": 3.843669041655741e-05, "epoch": 1.5, "percentage": 21.43, "elapsed_time": "1:26:42", "remaining_time": "5:17:56"}
|
||||
{"current_steps": 815, "total_steps": 3780, "loss": 0.2794, "lr": 3.840070242096514e-05, "epoch": 1.5092592592592593, "percentage": 21.56, "elapsed_time": "1:27:12", "remaining_time": "5:17:15"}
|
||||
{"current_steps": 820, "total_steps": 3780, "loss": 0.2976, "lr": 3.8364322137605484e-05, "epoch": 1.5185185185185186, "percentage": 21.69, "elapsed_time": "1:27:44", "remaining_time": "5:16:44"}
|
||||
{"current_steps": 825, "total_steps": 3780, "loss": 0.3098, "lr": 3.832755034207601e-05, "epoch": 1.5277777777777777, "percentage": 21.83, "elapsed_time": "1:28:16", "remaining_time": "5:16:11"}
|
||||
{"current_steps": 830, "total_steps": 3780, "loss": 0.3361, "lr": 3.8290387818321e-05, "epoch": 1.5370370370370372, "percentage": 21.96, "elapsed_time": "1:28:43", "remaining_time": "5:15:20"}
|
||||
{"current_steps": 835, "total_steps": 3780, "loss": 0.2845, "lr": 3.825283535861476e-05, "epoch": 1.5462962962962963, "percentage": 22.09, "elapsed_time": "1:29:07", "remaining_time": "5:14:21"}
|
||||
{"current_steps": 840, "total_steps": 3780, "loss": 0.3161, "lr": 3.8214893763544684e-05, "epoch": 1.5555555555555556, "percentage": 22.22, "elapsed_time": "1:29:34", "remaining_time": "5:13:32"}
|
||||
{"current_steps": 845, "total_steps": 3780, "loss": 0.2897, "lr": 3.817656384199422e-05, "epoch": 1.5648148148148149, "percentage": 22.35, "elapsed_time": "1:30:00", "remaining_time": "5:12:38"}
|
||||
{"current_steps": 850, "total_steps": 3780, "loss": 0.3087, "lr": 3.813784641112563e-05, "epoch": 1.574074074074074, "percentage": 22.49, "elapsed_time": "1:30:31", "remaining_time": "5:12:03"}
|
||||
{"current_steps": 855, "total_steps": 3780, "loss": 0.3276, "lr": 3.8098742296362506e-05, "epoch": 1.5833333333333335, "percentage": 22.62, "elapsed_time": "1:31:09", "remaining_time": "5:11:52"}
|
||||
{"current_steps": 860, "total_steps": 3780, "loss": 0.286, "lr": 3.805925233137229e-05, "epoch": 1.5925925925925926, "percentage": 22.75, "elapsed_time": "1:31:40", "remaining_time": "5:11:17"}
|
||||
{"current_steps": 865, "total_steps": 3780, "loss": 0.3016, "lr": 3.801937735804838e-05, "epoch": 1.6018518518518519, "percentage": 22.88, "elapsed_time": "1:32:08", "remaining_time": "5:10:32"}
|
||||
{"current_steps": 870, "total_steps": 3780, "loss": 0.3373, "lr": 3.7979118226492266e-05, "epoch": 1.6111111111111112, "percentage": 23.02, "elapsed_time": "1:32:42", "remaining_time": "5:10:06"}
|
||||
{"current_steps": 875, "total_steps": 3780, "loss": 0.2853, "lr": 3.793847579499534e-05, "epoch": 1.6203703703703702, "percentage": 23.15, "elapsed_time": "1:33:09", "remaining_time": "5:09:17"}
|
||||
{"current_steps": 880, "total_steps": 3780, "loss": 0.3037, "lr": 3.789745093002065e-05, "epoch": 1.6296296296296298, "percentage": 23.28, "elapsed_time": "1:33:42", "remaining_time": "5:08:48"}
|
||||
{"current_steps": 885, "total_steps": 3780, "loss": 0.3489, "lr": 3.785604450618443e-05, "epoch": 1.6388888888888888, "percentage": 23.41, "elapsed_time": "1:34:16", "remaining_time": "5:08:23"}
|
||||
{"current_steps": 890, "total_steps": 3780, "loss": 0.302, "lr": 3.781425740623739e-05, "epoch": 1.6481481481481481, "percentage": 23.54, "elapsed_time": "1:34:46", "remaining_time": "5:07:46"}
|
||||
{"current_steps": 895, "total_steps": 3780, "loss": 0.2812, "lr": 3.777209052104598e-05, "epoch": 1.6574074074074074, "percentage": 23.68, "elapsed_time": "1:35:13", "remaining_time": "5:06:55"}
|
||||
{"current_steps": 900, "total_steps": 3780, "loss": 0.3162, "lr": 3.7729544749573335e-05, "epoch": 1.6666666666666665, "percentage": 23.81, "elapsed_time": "1:35:45", "remaining_time": "5:06:24"}
|
||||
{"current_steps": 905, "total_steps": 3780, "loss": 0.2879, "lr": 3.768662099886014e-05, "epoch": 1.675925925925926, "percentage": 23.94, "elapsed_time": "1:36:14", "remaining_time": "5:05:45"}
|
||||
{"current_steps": 910, "total_steps": 3780, "loss": 0.2844, "lr": 3.7643320184005284e-05, "epoch": 1.6851851851851851, "percentage": 24.07, "elapsed_time": "1:36:42", "remaining_time": "5:04:59"}
|
||||
{"current_steps": 915, "total_steps": 3780, "loss": 0.3083, "lr": 3.7599643228146355e-05, "epoch": 1.6944444444444444, "percentage": 24.21, "elapsed_time": "1:37:11", "remaining_time": "5:04:19"}
|
||||
{"current_steps": 920, "total_steps": 3780, "loss": 0.2949, "lr": 3.755559106243994e-05, "epoch": 1.7037037037037037, "percentage": 24.34, "elapsed_time": "1:37:45", "remaining_time": "5:03:53"}
|
||||
{"current_steps": 925, "total_steps": 3780, "loss": 0.3055, "lr": 3.7511164626041823e-05, "epoch": 1.7129629629629628, "percentage": 24.47, "elapsed_time": "1:38:20", "remaining_time": "5:03:32"}
|
||||
{"current_steps": 930, "total_steps": 3780, "loss": 0.2983, "lr": 3.746636486608689e-05, "epoch": 1.7222222222222223, "percentage": 24.6, "elapsed_time": "1:38:54", "remaining_time": "5:03:05"}
|
||||
{"current_steps": 935, "total_steps": 3780, "loss": 0.2909, "lr": 3.7421192737669005e-05, "epoch": 1.7314814814814814, "percentage": 24.74, "elapsed_time": "1:39:13", "remaining_time": "5:01:55"}
|
||||
{"current_steps": 940, "total_steps": 3780, "loss": 0.2824, "lr": 3.737564920382061e-05, "epoch": 1.7407407407407407, "percentage": 24.87, "elapsed_time": "1:39:40", "remaining_time": "5:01:07"}
|
||||
{"current_steps": 945, "total_steps": 3780, "loss": 0.3079, "lr": 3.732973523549221e-05, "epoch": 1.75, "percentage": 25.0, "elapsed_time": "1:40:09", "remaining_time": "5:00:28"}
|
||||
{"current_steps": 950, "total_steps": 3780, "loss": 0.2833, "lr": 3.728345181153165e-05, "epoch": 1.7592592592592593, "percentage": 25.13, "elapsed_time": "1:40:34", "remaining_time": "4:59:35"}
|
||||
{"current_steps": 955, "total_steps": 3780, "loss": 0.2943, "lr": 3.7236799918663284e-05, "epoch": 1.7685185185185186, "percentage": 25.26, "elapsed_time": "1:40:59", "remaining_time": "4:58:43"}
|
||||
{"current_steps": 960, "total_steps": 3780, "loss": 0.2797, "lr": 3.7189780551466905e-05, "epoch": 1.7777777777777777, "percentage": 25.4, "elapsed_time": "1:41:26", "remaining_time": "4:57:59"}
|
||||
{"current_steps": 965, "total_steps": 3780, "loss": 0.2975, "lr": 3.714239471235657e-05, "epoch": 1.7870370370370372, "percentage": 25.53, "elapsed_time": "1:42:01", "remaining_time": "4:57:36"}
|
||||
{"current_steps": 970, "total_steps": 3780, "loss": 0.2955, "lr": 3.7094643411559194e-05, "epoch": 1.7962962962962963, "percentage": 25.66, "elapsed_time": "1:42:26", "remaining_time": "4:56:46"}
|
||||
{"current_steps": 975, "total_steps": 3780, "loss": 0.3247, "lr": 3.704652766709305e-05, "epoch": 1.8055555555555556, "percentage": 25.79, "elapsed_time": "1:43:05", "remaining_time": "4:56:36"}
|
||||
{"current_steps": 980, "total_steps": 3780, "loss": 0.2864, "lr": 3.699804850474603e-05, "epoch": 1.8148148148148149, "percentage": 25.93, "elapsed_time": "1:43:34", "remaining_time": "4:55:54"}
|
||||
{"current_steps": 985, "total_steps": 3780, "loss": 0.2728, "lr": 3.6949206958053825e-05, "epoch": 1.824074074074074, "percentage": 26.06, "elapsed_time": "1:44:01", "remaining_time": "4:55:11"}
|
||||
{"current_steps": 990, "total_steps": 3780, "loss": 0.3028, "lr": 3.690000406827783e-05, "epoch": 1.8333333333333335, "percentage": 26.19, "elapsed_time": "1:44:40", "remaining_time": "4:55:00"}
|
||||
{"current_steps": 995, "total_steps": 3780, "loss": 0.3255, "lr": 3.685044088438299e-05, "epoch": 1.8425925925925926, "percentage": 26.32, "elapsed_time": "1:45:14", "remaining_time": "4:54:34"}
|
||||
{"current_steps": 1000, "total_steps": 3780, "loss": 0.2913, "lr": 3.680051846301543e-05, "epoch": 1.8518518518518519, "percentage": 26.46, "elapsed_time": "1:45:42", "remaining_time": "4:53:50"}
|
||||
{"current_steps": 1005, "total_steps": 3780, "loss": 0.3253, "lr": 3.675023786847991e-05, "epoch": 1.8611111111111112, "percentage": 26.59, "elapsed_time": "1:46:25", "remaining_time": "4:53:51"}
|
||||
{"current_steps": 1010, "total_steps": 3780, "loss": 0.2855, "lr": 3.6699600172717137e-05, "epoch": 1.8703703703703702, "percentage": 26.72, "elapsed_time": "1:46:51", "remaining_time": "4:53:04"}
|
||||
{"current_steps": 1015, "total_steps": 3780, "loss": 0.3389, "lr": 3.6648606455280944e-05, "epoch": 1.8796296296296298, "percentage": 26.85, "elapsed_time": "1:47:22", "remaining_time": "4:52:30"}
|
||||
{"current_steps": 1020, "total_steps": 3780, "loss": 0.3175, "lr": 3.659725780331524e-05, "epoch": 1.8888888888888888, "percentage": 26.98, "elapsed_time": "1:47:54", "remaining_time": "4:51:59"}
|
||||
{"current_steps": 1025, "total_steps": 3780, "loss": 0.2937, "lr": 3.654555531153084e-05, "epoch": 1.8981481481481481, "percentage": 27.12, "elapsed_time": "1:48:14", "remaining_time": "4:50:57"}
|
||||
{"current_steps": 1030, "total_steps": 3780, "loss": 0.3193, "lr": 3.649350008218214e-05, "epoch": 1.9074074074074074, "percentage": 27.25, "elapsed_time": "1:48:48", "remaining_time": "4:50:31"}
|
||||
{"current_steps": 1035, "total_steps": 3780, "loss": 0.3097, "lr": 3.64410932250436e-05, "epoch": 1.9166666666666665, "percentage": 27.38, "elapsed_time": "1:49:25", "remaining_time": "4:50:12"}
|
||||
{"current_steps": 1040, "total_steps": 3780, "loss": 0.3063, "lr": 3.638833585738611e-05, "epoch": 1.925925925925926, "percentage": 27.51, "elapsed_time": "1:49:51", "remaining_time": "4:49:26"}
|
||||
{"current_steps": 1045, "total_steps": 3780, "loss": 0.2944, "lr": 3.633522910395314e-05, "epoch": 1.9351851851851851, "percentage": 27.65, "elapsed_time": "1:50:23", "remaining_time": "4:48:55"}
|
||||
{"current_steps": 1050, "total_steps": 3780, "loss": 0.2792, "lr": 3.628177409693677e-05, "epoch": 1.9444444444444444, "percentage": 27.78, "elapsed_time": "1:51:02", "remaining_time": "4:48:43"}
|
||||
{"current_steps": 1055, "total_steps": 3780, "loss": 0.3049, "lr": 3.622797197595359e-05, "epoch": 1.9537037037037037, "percentage": 27.91, "elapsed_time": "1:51:37", "remaining_time": "4:48:19"}
|
||||
{"current_steps": 1060, "total_steps": 3780, "loss": 0.2843, "lr": 3.6173823888020335e-05, "epoch": 1.9629629629629628, "percentage": 28.04, "elapsed_time": "1:52:03", "remaining_time": "4:47:32"}
|
||||
{"current_steps": 1065, "total_steps": 3780, "loss": 0.2933, "lr": 3.611933098752949e-05, "epoch": 1.9722222222222223, "percentage": 28.17, "elapsed_time": "1:52:45", "remaining_time": "4:47:26"}
|
||||
{"current_steps": 1070, "total_steps": 3780, "loss": 0.2992, "lr": 3.6064494436224655e-05, "epoch": 1.9814814814814814, "percentage": 28.31, "elapsed_time": "1:53:14", "remaining_time": "4:46:49"}
|
||||
{"current_steps": 1075, "total_steps": 3780, "loss": 0.3318, "lr": 3.6009315403175786e-05, "epoch": 1.9907407407407407, "percentage": 28.44, "elapsed_time": "1:53:48", "remaining_time": "4:46:21"}
|
||||
{"current_steps": 1080, "total_steps": 3780, "loss": 0.314, "lr": 3.595379506475426e-05, "epoch": 2.0, "percentage": 28.57, "elapsed_time": "1:54:24", "remaining_time": "4:46:00"}
|
||||
{"current_steps": 1085, "total_steps": 3780, "loss": 0.2771, "lr": 3.5897934604607795e-05, "epoch": 2.009259259259259, "percentage": 28.7, "elapsed_time": "1:54:49", "remaining_time": "4:45:11"}
|
||||
{"current_steps": 1090, "total_steps": 3780, "loss": 0.2703, "lr": 3.584173521363525e-05, "epoch": 2.0185185185185186, "percentage": 28.84, "elapsed_time": "1:55:10", "remaining_time": "4:44:13"}
|
||||
{"current_steps": 1095, "total_steps": 3780, "loss": 0.2652, "lr": 3.578519808996117e-05, "epoch": 2.0277777777777777, "percentage": 28.97, "elapsed_time": "1:55:39", "remaining_time": "4:43:36"}
|
||||
{"current_steps": 1100, "total_steps": 3780, "loss": 0.2893, "lr": 3.572832443891033e-05, "epoch": 2.037037037037037, "percentage": 29.1, "elapsed_time": "1:56:06", "remaining_time": "4:42:53"}
|
||||
{"current_steps": 1105, "total_steps": 3780, "loss": 0.2755, "lr": 3.567111547298194e-05, "epoch": 2.0462962962962963, "percentage": 29.23, "elapsed_time": "1:56:41", "remaining_time": "4:42:29"}
|
||||
{"current_steps": 1110, "total_steps": 3780, "loss": 0.2831, "lr": 3.561357241182388e-05, "epoch": 2.0555555555555554, "percentage": 29.37, "elapsed_time": "1:57:15", "remaining_time": "4:42:03"}
|
||||
{"current_steps": 1115, "total_steps": 3780, "loss": 0.2801, "lr": 3.555569648220666e-05, "epoch": 2.064814814814815, "percentage": 29.5, "elapsed_time": "1:57:48", "remaining_time": "4:41:35"}
|
||||
{"current_steps": 1120, "total_steps": 3780, "loss": 0.2801, "lr": 3.549748891799726e-05, "epoch": 2.074074074074074, "percentage": 29.63, "elapsed_time": "1:58:21", "remaining_time": "4:41:06"}
|
||||
{"current_steps": 1125, "total_steps": 3780, "loss": 0.273, "lr": 3.543895096013284e-05, "epoch": 2.0833333333333335, "percentage": 29.76, "elapsed_time": "1:58:49", "remaining_time": "4:40:26"}
|
||||
{"current_steps": 1130, "total_steps": 3780, "loss": 0.2746, "lr": 3.538008385659427e-05, "epoch": 2.0925925925925926, "percentage": 29.89, "elapsed_time": "1:59:24", "remaining_time": "4:40:00"}
|
||||
{"current_steps": 1135, "total_steps": 3780, "loss": 0.2888, "lr": 3.532088886237956e-05, "epoch": 2.1018518518518516, "percentage": 30.03, "elapsed_time": "1:59:59", "remaining_time": "4:39:37"}
|
||||
{"current_steps": 1140, "total_steps": 3780, "loss": 0.2689, "lr": 3.5261367239477055e-05, "epoch": 2.111111111111111, "percentage": 30.16, "elapsed_time": "2:00:33", "remaining_time": "4:39:11"}
|
||||
{"current_steps": 1145, "total_steps": 3780, "loss": 0.2872, "lr": 3.520152025683856e-05, "epoch": 2.1203703703703702, "percentage": 30.29, "elapsed_time": "2:01:08", "remaining_time": "4:38:47"}
|
||||
{"current_steps": 1150, "total_steps": 3780, "loss": 0.2957, "lr": 3.514134919035229e-05, "epoch": 2.1296296296296298, "percentage": 30.42, "elapsed_time": "2:01:48", "remaining_time": "4:38:33"}
|
||||
{"current_steps": 1155, "total_steps": 3780, "loss": 0.2852, "lr": 3.5080855322815635e-05, "epoch": 2.138888888888889, "percentage": 30.56, "elapsed_time": "2:02:16", "remaining_time": "4:37:54"}
|
||||
{"current_steps": 1160, "total_steps": 3780, "loss": 0.2989, "lr": 3.5020039943907855e-05, "epoch": 2.148148148148148, "percentage": 30.69, "elapsed_time": "2:02:53", "remaining_time": "4:37:34"}
|
||||
{"current_steps": 1165, "total_steps": 3780, "loss": 0.2764, "lr": 3.495890435016258e-05, "epoch": 2.1574074074074074, "percentage": 30.82, "elapsed_time": "2:03:32", "remaining_time": "4:37:18"}
|
||||
{"current_steps": 1170, "total_steps": 3780, "loss": 0.2789, "lr": 3.489744984494012e-05, "epoch": 2.1666666666666665, "percentage": 30.95, "elapsed_time": "2:04:08", "remaining_time": "4:36:56"}
|
||||
{"current_steps": 1175, "total_steps": 3780, "loss": 0.2715, "lr": 3.4835677738399745e-05, "epoch": 2.175925925925926, "percentage": 31.08, "elapsed_time": "2:04:42", "remaining_time": "4:36:29"}
|
||||
{"current_steps": 1180, "total_steps": 3780, "loss": 0.3211, "lr": 3.477358934747172e-05, "epoch": 2.185185185185185, "percentage": 31.22, "elapsed_time": "2:05:20", "remaining_time": "4:36:10"}
|
||||
{"current_steps": 1185, "total_steps": 3780, "loss": 0.2838, "lr": 3.47111859958292e-05, "epoch": 2.1944444444444446, "percentage": 31.35, "elapsed_time": "2:05:53", "remaining_time": "4:35:40"}
|
||||
{"current_steps": 1190, "total_steps": 3780, "loss": 0.2656, "lr": 3.464846901386008e-05, "epoch": 2.2037037037037037, "percentage": 31.48, "elapsed_time": "2:06:23", "remaining_time": "4:35:05"}
|
||||
{"current_steps": 1195, "total_steps": 3780, "loss": 0.275, "lr": 3.458543973863859e-05, "epoch": 2.212962962962963, "percentage": 31.61, "elapsed_time": "2:06:51", "remaining_time": "4:34:24"}
|
||||
{"current_steps": 1200, "total_steps": 3780, "loss": 0.2785, "lr": 3.452209951389677e-05, "epoch": 2.2222222222222223, "percentage": 31.75, "elapsed_time": "2:07:32", "remaining_time": "4:34:12"}
|
||||
{"current_steps": 1205, "total_steps": 3780, "loss": 0.2952, "lr": 3.445844968999586e-05, "epoch": 2.2314814814814814, "percentage": 31.88, "elapsed_time": "2:08:02", "remaining_time": "4:33:37"}
|
||||
{"current_steps": 1210, "total_steps": 3780, "loss": 0.2492, "lr": 3.4394491623897506e-05, "epoch": 2.240740740740741, "percentage": 32.01, "elapsed_time": "2:08:27", "remaining_time": "4:32:50"}
|
||||
{"current_steps": 1215, "total_steps": 3780, "loss": 0.2776, "lr": 3.4330226679134805e-05, "epoch": 2.25, "percentage": 32.14, "elapsed_time": "2:08:59", "remaining_time": "4:32:18"}
|
||||
{"current_steps": 1220, "total_steps": 3780, "loss": 0.257, "lr": 3.426565622578327e-05, "epoch": 2.259259259259259, "percentage": 32.28, "elapsed_time": "2:09:30", "remaining_time": "4:31:44"}
|
||||
{"current_steps": 1225, "total_steps": 3780, "loss": 0.278, "lr": 3.420078164043161e-05, "epoch": 2.2685185185185186, "percentage": 32.41, "elapsed_time": "2:10:01", "remaining_time": "4:31:11"}
|
||||
{"current_steps": 1230, "total_steps": 3780, "loss": 0.3378, "lr": 3.413560430615235e-05, "epoch": 2.2777777777777777, "percentage": 32.54, "elapsed_time": "2:10:43", "remaining_time": "4:31:01"}
|
||||
{"current_steps": 1235, "total_steps": 3780, "loss": 0.2656, "lr": 3.407012561247239e-05, "epoch": 2.287037037037037, "percentage": 32.67, "elapsed_time": "2:11:19", "remaining_time": "4:30:37"}
|
||||
{"current_steps": 1240, "total_steps": 3780, "loss": 0.2342, "lr": 3.400434695534337e-05, "epoch": 2.2962962962962963, "percentage": 32.8, "elapsed_time": "2:11:54", "remaining_time": "4:30:12"}
|
||||
{"current_steps": 1245, "total_steps": 3780, "loss": 0.3009, "lr": 3.393826973711189e-05, "epoch": 2.3055555555555554, "percentage": 32.94, "elapsed_time": "2:12:20", "remaining_time": "4:29:28"}
|
||||
{"current_steps": 1250, "total_steps": 3780, "loss": 0.2828, "lr": 3.3871895366489624e-05, "epoch": 2.314814814814815, "percentage": 33.07, "elapsed_time": "2:12:52", "remaining_time": "4:28:56"}
|
||||
{"current_steps": 1255, "total_steps": 3780, "loss": 0.2723, "lr": 3.38052252585233e-05, "epoch": 2.324074074074074, "percentage": 33.2, "elapsed_time": "2:13:26", "remaining_time": "4:28:28"}
|
||||
{"current_steps": 1260, "total_steps": 3780, "loss": 0.2729, "lr": 3.373826083456451e-05, "epoch": 2.3333333333333335, "percentage": 33.33, "elapsed_time": "2:14:01", "remaining_time": "4:28:03"}
|
||||
{"current_steps": 1265, "total_steps": 3780, "loss": 0.2815, "lr": 3.367100352223944e-05, "epoch": 2.3425925925925926, "percentage": 33.47, "elapsed_time": "2:14:29", "remaining_time": "4:27:24"}
|
||||
{"current_steps": 1270, "total_steps": 3780, "loss": 0.2379, "lr": 3.360345475541839e-05, "epoch": 2.351851851851852, "percentage": 33.6, "elapsed_time": "2:14:58", "remaining_time": "4:26:44"}
|
||||
{"current_steps": 1275, "total_steps": 3780, "loss": 0.2858, "lr": 3.353561597418524e-05, "epoch": 2.361111111111111, "percentage": 33.73, "elapsed_time": "2:15:28", "remaining_time": "4:26:10"}
|
||||
{"current_steps": 1280, "total_steps": 3780, "loss": 0.2584, "lr": 3.346748862480674e-05, "epoch": 2.3703703703703702, "percentage": 33.86, "elapsed_time": "2:16:01", "remaining_time": "4:25:41"}
|
||||
{"current_steps": 1285, "total_steps": 3780, "loss": 0.2829, "lr": 3.339907415970168e-05, "epoch": 2.3796296296296298, "percentage": 33.99, "elapsed_time": "2:16:39", "remaining_time": "4:25:19"}
|
||||
{"current_steps": 1290, "total_steps": 3780, "loss": 0.2844, "lr": 3.333037403740989e-05, "epoch": 2.388888888888889, "percentage": 34.13, "elapsed_time": "2:17:08", "remaining_time": "4:24:42"}
|
||||
{"current_steps": 1295, "total_steps": 3780, "loss": 0.2962, "lr": 3.326138972256121e-05, "epoch": 2.398148148148148, "percentage": 34.26, "elapsed_time": "2:17:43", "remaining_time": "4:24:16"}
|
||||
{"current_steps": 1300, "total_steps": 3780, "loss": 0.2645, "lr": 3.3192122685844214e-05, "epoch": 2.4074074074074074, "percentage": 34.39, "elapsed_time": "2:18:10", "remaining_time": "4:23:35"}
|
||||
{"current_steps": 1305, "total_steps": 3780, "loss": 0.2771, "lr": 3.312257440397488e-05, "epoch": 2.4166666666666665, "percentage": 34.52, "elapsed_time": "2:18:39", "remaining_time": "4:22:58"}
|
||||
{"current_steps": 1310, "total_steps": 3780, "loss": 0.2697, "lr": 3.305274635966509e-05, "epoch": 2.425925925925926, "percentage": 34.66, "elapsed_time": "2:19:13", "remaining_time": "4:22:31"}
|
||||
{"current_steps": 1315, "total_steps": 3780, "loss": 0.3072, "lr": 3.298264004159104e-05, "epoch": 2.435185185185185, "percentage": 34.79, "elapsed_time": "2:19:45", "remaining_time": "4:21:58"}
|
||||
{"current_steps": 1320, "total_steps": 3780, "loss": 0.2794, "lr": 3.2912256944361484e-05, "epoch": 2.4444444444444446, "percentage": 34.92, "elapsed_time": "2:20:13", "remaining_time": "4:21:20"}
|
||||
{"current_steps": 1325, "total_steps": 3780, "loss": 0.2775, "lr": 3.284159856848589e-05, "epoch": 2.4537037037037037, "percentage": 35.05, "elapsed_time": "2:20:48", "remaining_time": "4:20:52"}
|
||||
{"current_steps": 1330, "total_steps": 3780, "loss": 0.2757, "lr": 3.2770666420342426e-05, "epoch": 2.462962962962963, "percentage": 35.19, "elapsed_time": "2:21:16", "remaining_time": "4:20:14"}
|
||||
{"current_steps": 1335, "total_steps": 3780, "loss": 0.2614, "lr": 3.269946201214586e-05, "epoch": 2.4722222222222223, "percentage": 35.32, "elapsed_time": "2:21:45", "remaining_time": "4:19:38"}
|
||||
{"current_steps": 1340, "total_steps": 3780, "loss": 0.3151, "lr": 3.262798686191533e-05, "epoch": 2.4814814814814814, "percentage": 35.45, "elapsed_time": "2:22:18", "remaining_time": "4:19:08"}
|
||||
{"current_steps": 1345, "total_steps": 3780, "loss": 0.2747, "lr": 3.255624249344198e-05, "epoch": 2.490740740740741, "percentage": 35.58, "elapsed_time": "2:22:43", "remaining_time": "4:18:24"}
|
||||
{"current_steps": 1350, "total_steps": 3780, "loss": 0.2737, "lr": 3.248423043625642e-05, "epoch": 2.5, "percentage": 35.71, "elapsed_time": "2:23:13", "remaining_time": "4:17:47"}
|
||||
{"current_steps": 1355, "total_steps": 3780, "loss": 0.2813, "lr": 3.241195222559621e-05, "epoch": 2.5092592592592595, "percentage": 35.85, "elapsed_time": "2:23:46", "remaining_time": "4:17:17"}
|
||||
{"current_steps": 1360, "total_steps": 3780, "loss": 0.2862, "lr": 3.2339409402373056e-05, "epoch": 2.5185185185185186, "percentage": 35.98, "elapsed_time": "2:24:13", "remaining_time": "4:16:38"}
|
||||
{"current_steps": 1365, "total_steps": 3780, "loss": 0.2842, "lr": 3.2266603513139995e-05, "epoch": 2.5277777777777777, "percentage": 36.11, "elapsed_time": "2:24:47", "remaining_time": "4:16:10"}
|
||||
{"current_steps": 1370, "total_steps": 3780, "loss": 0.2767, "lr": 3.2193536110058414e-05, "epoch": 2.537037037037037, "percentage": 36.24, "elapsed_time": "2:25:19", "remaining_time": "4:15:38"}
|
||||
{"current_steps": 1375, "total_steps": 3780, "loss": 0.2592, "lr": 3.212020875086495e-05, "epoch": 2.5462962962962963, "percentage": 36.38, "elapsed_time": "2:25:43", "remaining_time": "4:14:53"}
|
||||
{"current_steps": 1380, "total_steps": 3780, "loss": 0.2603, "lr": 3.20466229988383e-05, "epoch": 2.5555555555555554, "percentage": 36.51, "elapsed_time": "2:26:11", "remaining_time": "4:14:14"}
|
||||
{"current_steps": 1385, "total_steps": 3780, "loss": 0.2618, "lr": 3.197278042276587e-05, "epoch": 2.564814814814815, "percentage": 36.64, "elapsed_time": "2:26:38", "remaining_time": "4:13:35"}
|
||||
{"current_steps": 1390, "total_steps": 3780, "loss": 0.2479, "lr": 3.189868259691036e-05, "epoch": 2.574074074074074, "percentage": 36.77, "elapsed_time": "2:27:10", "remaining_time": "4:13:02"}
|
||||
{"current_steps": 1395, "total_steps": 3780, "loss": 0.2828, "lr": 3.182433110097618e-05, "epoch": 2.5833333333333335, "percentage": 36.9, "elapsed_time": "2:27:43", "remaining_time": "4:12:33"}
|
||||
{"current_steps": 1400, "total_steps": 3780, "loss": 0.3069, "lr": 3.174972752007577e-05, "epoch": 2.5925925925925926, "percentage": 37.04, "elapsed_time": "2:28:17", "remaining_time": "4:12:05"}
|
||||
{"current_steps": 1405, "total_steps": 3780, "loss": 0.2703, "lr": 3.1674873444695804e-05, "epoch": 2.601851851851852, "percentage": 37.17, "elapsed_time": "2:28:48", "remaining_time": "4:11:33"}
|
||||
{"current_steps": 1410, "total_steps": 3780, "loss": 0.269, "lr": 3.15997704706633e-05, "epoch": 2.611111111111111, "percentage": 37.3, "elapsed_time": "2:29:19", "remaining_time": "4:10:59"}
|
||||
{"current_steps": 1415, "total_steps": 3780, "loss": 0.2669, "lr": 3.152442019911161e-05, "epoch": 2.6203703703703702, "percentage": 37.43, "elapsed_time": "2:29:42", "remaining_time": "4:10:13"}
|
||||
{"current_steps": 1420, "total_steps": 3780, "loss": 0.2824, "lr": 3.144882423644623e-05, "epoch": 2.6296296296296298, "percentage": 37.57, "elapsed_time": "2:30:13", "remaining_time": "4:09:39"}
|
||||
{"current_steps": 1425, "total_steps": 3780, "loss": 0.2909, "lr": 3.1372984194310614e-05, "epoch": 2.638888888888889, "percentage": 37.7, "elapsed_time": "2:30:36", "remaining_time": "4:08:53"}
|
||||
{"current_steps": 1430, "total_steps": 3780, "loss": 0.2572, "lr": 3.1296901689551766e-05, "epoch": 2.648148148148148, "percentage": 37.83, "elapsed_time": "2:31:01", "remaining_time": "4:08:10"}
|
||||
{"current_steps": 1435, "total_steps": 3780, "loss": 0.2965, "lr": 3.122057834418582e-05, "epoch": 2.6574074074074074, "percentage": 37.96, "elapsed_time": "2:31:31", "remaining_time": "4:07:36"}
|
||||
{"current_steps": 1440, "total_steps": 3780, "loss": 0.3095, "lr": 3.1144015785363405e-05, "epoch": 2.6666666666666665, "percentage": 38.1, "elapsed_time": "2:32:08", "remaining_time": "4:07:14"}
|
||||
{"current_steps": 1445, "total_steps": 3780, "loss": 0.2989, "lr": 3.1067215645335e-05, "epoch": 2.675925925925926, "percentage": 38.23, "elapsed_time": "2:32:52", "remaining_time": "4:07:01"}
|
||||
{"current_steps": 1450, "total_steps": 3780, "loss": 0.2843, "lr": 3.0990179561416124e-05, "epoch": 2.685185185185185, "percentage": 38.36, "elapsed_time": "2:33:24", "remaining_time": "4:06:30"}
|
||||
{"current_steps": 1455, "total_steps": 3780, "loss": 0.2937, "lr": 3.0912909175952404e-05, "epoch": 2.6944444444444446, "percentage": 38.49, "elapsed_time": "2:33:57", "remaining_time": "4:06:01"}
|
||||
{"current_steps": 1460, "total_steps": 3780, "loss": 0.2675, "lr": 3.08354061362846e-05, "epoch": 2.7037037037037037, "percentage": 38.62, "elapsed_time": "2:34:20", "remaining_time": "4:05:15"}
|
||||
{"current_steps": 1465, "total_steps": 3780, "loss": 0.2677, "lr": 3.075767209471345e-05, "epoch": 2.712962962962963, "percentage": 38.76, "elapsed_time": "2:34:52", "remaining_time": "4:04:43"}
|
||||
{"current_steps": 1470, "total_steps": 3780, "loss": 0.2633, "lr": 3.06797087084645e-05, "epoch": 2.7222222222222223, "percentage": 38.89, "elapsed_time": "2:35:30", "remaining_time": "4:04:21"}
|
||||
{"current_steps": 1475, "total_steps": 3780, "loss": 0.334, "lr": 3.060151763965267e-05, "epoch": 2.7314814814814814, "percentage": 39.02, "elapsed_time": "2:36:06", "remaining_time": "4:03:57"}
|
||||
{"current_steps": 1480, "total_steps": 3780, "loss": 0.2816, "lr": 3.052310055524696e-05, "epoch": 2.7407407407407405, "percentage": 39.15, "elapsed_time": "2:36:37", "remaining_time": "4:03:23"}
|
||||
{"current_steps": 1485, "total_steps": 3780, "loss": 0.2749, "lr": 3.044445912703477e-05, "epoch": 2.75, "percentage": 39.29, "elapsed_time": "2:37:12", "remaining_time": "4:02:57"}
|
||||
{"current_steps": 1490, "total_steps": 3780, "loss": 0.2834, "lr": 3.036559503158637e-05, "epoch": 2.7592592592592595, "percentage": 39.42, "elapsed_time": "2:37:42", "remaining_time": "4:02:23"}
|
||||
{"current_steps": 1495, "total_steps": 3780, "loss": 0.2685, "lr": 3.0286509950219077e-05, "epoch": 2.7685185185185186, "percentage": 39.55, "elapsed_time": "2:38:13", "remaining_time": "4:01:49"}
|
||||
{"current_steps": 1500, "total_steps": 3780, "loss": 0.3213, "lr": 3.020720556896147e-05, "epoch": 2.7777777777777777, "percentage": 39.68, "elapsed_time": "2:38:43", "remaining_time": "4:01:16"}
|
||||
{"current_steps": 1505, "total_steps": 3780, "loss": 0.2866, "lr": 3.0127683578517418e-05, "epoch": 2.787037037037037, "percentage": 39.81, "elapsed_time": "2:39:25", "remaining_time": "4:00:59"}
|
||||
{"current_steps": 1510, "total_steps": 3780, "loss": 0.2733, "lr": 3.004794567423002e-05, "epoch": 2.7962962962962963, "percentage": 39.95, "elapsed_time": "2:40:01", "remaining_time": "4:00:33"}
|
||||
{"current_steps": 1515, "total_steps": 3780, "loss": 0.2822, "lr": 2.9967993556045504e-05, "epoch": 2.8055555555555554, "percentage": 40.08, "elapsed_time": "2:40:36", "remaining_time": "4:00:06"}
|
||||
{"current_steps": 1520, "total_steps": 3780, "loss": 0.2769, "lr": 2.988782892847694e-05, "epoch": 2.814814814814815, "percentage": 40.21, "elapsed_time": "2:41:08", "remaining_time": "3:59:34"}
|
||||
{"current_steps": 1525, "total_steps": 3780, "loss": 0.2823, "lr": 2.9807453500567937e-05, "epoch": 2.824074074074074, "percentage": 40.34, "elapsed_time": "2:41:42", "remaining_time": "3:59:07"}
|
||||
{"current_steps": 1530, "total_steps": 3780, "loss": 0.2886, "lr": 2.9726868985856186e-05, "epoch": 2.8333333333333335, "percentage": 40.48, "elapsed_time": "2:42:14", "remaining_time": "3:58:35"}
|
||||
{"current_steps": 1535, "total_steps": 3780, "loss": 0.2523, "lr": 2.9646077102336933e-05, "epoch": 2.8425925925925926, "percentage": 40.61, "elapsed_time": "2:42:41", "remaining_time": "3:57:57"}
|
||||
{"current_steps": 1540, "total_steps": 3780, "loss": 0.2735, "lr": 2.956507957242637e-05, "epoch": 2.851851851851852, "percentage": 40.74, "elapsed_time": "2:43:08", "remaining_time": "3:57:17"}
|
||||
{"current_steps": 1545, "total_steps": 3780, "loss": 0.2732, "lr": 2.9483878122924874e-05, "epoch": 2.861111111111111, "percentage": 40.87, "elapsed_time": "2:43:35", "remaining_time": "3:56:38"}
|
||||
{"current_steps": 1550, "total_steps": 3780, "loss": 0.2878, "lr": 2.940247448498025e-05, "epoch": 2.8703703703703702, "percentage": 41.01, "elapsed_time": "2:44:06", "remaining_time": "3:56:05"}
|
||||
{"current_steps": 1555, "total_steps": 3780, "loss": 0.2621, "lr": 2.9320870394050783e-05, "epoch": 2.8796296296296298, "percentage": 41.14, "elapsed_time": "2:44:40", "remaining_time": "3:55:37"}
|
||||
{"current_steps": 1560, "total_steps": 3780, "loss": 0.2698, "lr": 2.9239067589868228e-05, "epoch": 2.888888888888889, "percentage": 41.27, "elapsed_time": "2:45:12", "remaining_time": "3:55:05"}
|
||||
{"current_steps": 1565, "total_steps": 3780, "loss": 0.2724, "lr": 2.9157067816400765e-05, "epoch": 2.898148148148148, "percentage": 41.4, "elapsed_time": "2:45:45", "remaining_time": "3:54:36"}
|
||||
{"current_steps": 1570, "total_steps": 3780, "loss": 0.2578, "lr": 2.90748728218158e-05, "epoch": 2.9074074074074074, "percentage": 41.53, "elapsed_time": "2:46:15", "remaining_time": "3:54:02"}
|
||||
{"current_steps": 1575, "total_steps": 3780, "loss": 0.2808, "lr": 2.8992484358442673e-05, "epoch": 2.9166666666666665, "percentage": 41.67, "elapsed_time": "2:46:48", "remaining_time": "3:53:32"}
|
||||
{"current_steps": 1580, "total_steps": 3780, "loss": 0.2887, "lr": 2.8909904182735337e-05, "epoch": 2.925925925925926, "percentage": 41.8, "elapsed_time": "2:47:16", "remaining_time": "3:52:54"}
|
||||
{"current_steps": 1585, "total_steps": 3780, "loss": 0.2686, "lr": 2.8827134055234883e-05, "epoch": 2.935185185185185, "percentage": 41.93, "elapsed_time": "2:47:49", "remaining_time": "3:52:24"}
|
||||
{"current_steps": 1590, "total_steps": 3780, "loss": 0.266, "lr": 2.874417574053202e-05, "epoch": 2.9444444444444446, "percentage": 42.06, "elapsed_time": "2:48:12", "remaining_time": "3:51:41"}
|
||||
{"current_steps": 1595, "total_steps": 3780, "loss": 0.2668, "lr": 2.8661031007229443e-05, "epoch": 2.9537037037037037, "percentage": 42.2, "elapsed_time": "2:48:48", "remaining_time": "3:51:15"}
|
||||
{"current_steps": 1600, "total_steps": 3780, "loss": 0.2815, "lr": 2.857770162790416e-05, "epoch": 2.962962962962963, "percentage": 42.33, "elapsed_time": "2:49:20", "remaining_time": "3:50:43"}
|
||||
{"current_steps": 1605, "total_steps": 3780, "loss": 0.237, "lr": 2.8494189379069662e-05, "epoch": 2.9722222222222223, "percentage": 42.46, "elapsed_time": "2:49:51", "remaining_time": "3:50:10"}
|
||||
{"current_steps": 1610, "total_steps": 3780, "loss": 0.2898, "lr": 2.8410496041138067e-05, "epoch": 2.9814814814814814, "percentage": 42.59, "elapsed_time": "2:50:18", "remaining_time": "3:49:33"}
|
||||
{"current_steps": 1615, "total_steps": 3780, "loss": 0.287, "lr": 2.8326623398382174e-05, "epoch": 2.9907407407407405, "percentage": 42.72, "elapsed_time": "2:50:45", "remaining_time": "3:48:54"}
|
||||
{"current_steps": 1620, "total_steps": 3780, "loss": 0.2701, "lr": 2.8242573238897395e-05, "epoch": 3.0, "percentage": 42.86, "elapsed_time": "2:51:14", "remaining_time": "3:48:18"}
|
||||
{"current_steps": 1625, "total_steps": 3780, "loss": 0.2137, "lr": 2.815834735456367e-05, "epoch": 3.009259259259259, "percentage": 42.99, "elapsed_time": "2:51:47", "remaining_time": "3:47:49"}
|
||||
{"current_steps": 1630, "total_steps": 3780, "loss": 0.2835, "lr": 2.8073947541007246e-05, "epoch": 3.0185185185185186, "percentage": 43.12, "elapsed_time": "2:52:24", "remaining_time": "3:47:24"}
|
||||
{"current_steps": 1635, "total_steps": 3780, "loss": 0.2607, "lr": 2.7989375597562386e-05, "epoch": 3.0277777777777777, "percentage": 43.25, "elapsed_time": "2:53:01", "remaining_time": "3:46:59"}
|
||||
{"current_steps": 1640, "total_steps": 3780, "loss": 0.2346, "lr": 2.7904633327233016e-05, "epoch": 3.037037037037037, "percentage": 43.39, "elapsed_time": "2:53:25", "remaining_time": "3:46:17"}
|
||||
{"current_steps": 1645, "total_steps": 3780, "loss": 0.2557, "lr": 2.781972253665431e-05, "epoch": 3.0462962962962963, "percentage": 43.52, "elapsed_time": "2:53:57", "remaining_time": "3:45:46"}
|
||||
{"current_steps": 1650, "total_steps": 3780, "loss": 0.2561, "lr": 2.773464503605414e-05, "epoch": 3.0555555555555554, "percentage": 43.65, "elapsed_time": "2:54:29", "remaining_time": "3:45:14"}
|
||||
{"current_steps": 1655, "total_steps": 3780, "loss": 0.2548, "lr": 2.764940263921451e-05, "epoch": 3.064814814814815, "percentage": 43.78, "elapsed_time": "2:55:04", "remaining_time": "3:44:47"}
|
||||
{"current_steps": 1660, "total_steps": 3780, "loss": 0.2916, "lr": 2.7563997163432853e-05, "epoch": 3.074074074074074, "percentage": 43.92, "elapsed_time": "2:55:39", "remaining_time": "3:44:20"}
|
||||
{"current_steps": 1665, "total_steps": 3780, "loss": 0.2479, "lr": 2.7478430429483336e-05, "epoch": 3.0833333333333335, "percentage": 44.05, "elapsed_time": "2:56:18", "remaining_time": "3:43:57"}
|
||||
{"current_steps": 1670, "total_steps": 3780, "loss": 0.281, "lr": 2.7392704261578e-05, "epoch": 3.0925925925925926, "percentage": 44.18, "elapsed_time": "2:56:53", "remaining_time": "3:43:29"}
|
||||
{"current_steps": 1675, "total_steps": 3780, "loss": 0.251, "lr": 2.7306820487327906e-05, "epoch": 3.1018518518518516, "percentage": 44.31, "elapsed_time": "2:57:19", "remaining_time": "3:42:50"}
|
||||
{"current_steps": 1680, "total_steps": 3780, "loss": 0.2638, "lr": 2.7220780937704118e-05, "epoch": 3.111111111111111, "percentage": 44.44, "elapsed_time": "2:57:46", "remaining_time": "3:42:13"}
|
||||
{"current_steps": 1685, "total_steps": 3780, "loss": 0.2468, "lr": 2.713458744699873e-05, "epoch": 3.1203703703703702, "percentage": 44.58, "elapsed_time": "2:58:13", "remaining_time": "3:41:35"}
|
||||
{"current_steps": 1690, "total_steps": 3780, "loss": 0.2513, "lr": 2.704824185278573e-05, "epoch": 3.1296296296296298, "percentage": 44.71, "elapsed_time": "2:58:43", "remaining_time": "3:41:01"}
|
||||
{"current_steps": 1695, "total_steps": 3780, "loss": 0.2602, "lr": 2.6961745995881813e-05, "epoch": 3.138888888888889, "percentage": 44.84, "elapsed_time": "2:59:16", "remaining_time": "3:40:31"}
|
||||
{"current_steps": 1700, "total_steps": 3780, "loss": 0.2469, "lr": 2.6875101720307168e-05, "epoch": 3.148148148148148, "percentage": 44.97, "elapsed_time": "2:59:40", "remaining_time": "3:39:50"}
|
||||
{"current_steps": 1705, "total_steps": 3780, "loss": 0.268, "lr": 2.6788310873246133e-05, "epoch": 3.1574074074074074, "percentage": 45.11, "elapsed_time": "3:00:08", "remaining_time": "3:39:13"}
|
||||
{"current_steps": 1710, "total_steps": 3780, "loss": 0.2842, "lr": 2.670137530500783e-05, "epoch": 3.1666666666666665, "percentage": 45.24, "elapsed_time": "3:00:38", "remaining_time": "3:38:39"}
|
||||
{"current_steps": 1715, "total_steps": 3780, "loss": 0.2678, "lr": 2.661429686898673e-05, "epoch": 3.175925925925926, "percentage": 45.37, "elapsed_time": "3:01:02", "remaining_time": "3:37:59"}
|
||||
{"current_steps": 1720, "total_steps": 3780, "loss": 0.2429, "lr": 2.6527077421623117e-05, "epoch": 3.185185185185185, "percentage": 45.5, "elapsed_time": "3:01:32", "remaining_time": "3:37:25"}
|
||||
{"current_steps": 1725, "total_steps": 3780, "loss": 0.2537, "lr": 2.6439718822363515e-05, "epoch": 3.1944444444444446, "percentage": 45.63, "elapsed_time": "3:01:58", "remaining_time": "3:36:47"}
|
||||
{"current_steps": 1730, "total_steps": 3780, "loss": 0.249, "lr": 2.6352222933621065e-05, "epoch": 3.2037037037037037, "percentage": 45.77, "elapsed_time": "3:02:26", "remaining_time": "3:36:11"}
|
||||
{"current_steps": 1735, "total_steps": 3780, "loss": 0.2541, "lr": 2.62645916207358e-05, "epoch": 3.212962962962963, "percentage": 45.9, "elapsed_time": "3:02:54", "remaining_time": "3:35:35"}
|
||||
{"current_steps": 1740, "total_steps": 3780, "loss": 0.2645, "lr": 2.6176826751934882e-05, "epoch": 3.2222222222222223, "percentage": 46.03, "elapsed_time": "3:03:27", "remaining_time": "3:35:05"}
|
||||
{"current_steps": 1745, "total_steps": 3780, "loss": 0.2573, "lr": 2.6088930198292773e-05, "epoch": 3.2314814814814814, "percentage": 46.16, "elapsed_time": "3:03:53", "remaining_time": "3:34:26"}
|
||||
{"current_steps": 1750, "total_steps": 3780, "loss": 0.2659, "lr": 2.600090383369135e-05, "epoch": 3.240740740740741, "percentage": 46.3, "elapsed_time": "3:04:19", "remaining_time": "3:33:48"}
|
||||
{"current_steps": 1755, "total_steps": 3780, "loss": 0.2577, "lr": 2.5912749534779958e-05, "epoch": 3.25, "percentage": 46.43, "elapsed_time": "3:04:44", "remaining_time": "3:33:09"}
|
||||
{"current_steps": 1760, "total_steps": 3780, "loss": 0.2683, "lr": 2.5824469180935377e-05, "epoch": 3.259259259259259, "percentage": 46.56, "elapsed_time": "3:05:16", "remaining_time": "3:32:38"}
|
||||
{"current_steps": 1765, "total_steps": 3780, "loss": 0.2951, "lr": 2.5736064654221808e-05, "epoch": 3.2685185185185186, "percentage": 46.69, "elapsed_time": "3:05:46", "remaining_time": "3:32:05"}
|
||||
{"current_steps": 1770, "total_steps": 3780, "loss": 0.2498, "lr": 2.564753783935068e-05, "epoch": 3.2777777777777777, "percentage": 46.83, "elapsed_time": "3:06:23", "remaining_time": "3:31:39"}
|
||||
{"current_steps": 1775, "total_steps": 3780, "loss": 0.2397, "lr": 2.5558890623640513e-05, "epoch": 3.287037037037037, "percentage": 46.96, "elapsed_time": "3:06:56", "remaining_time": "3:31:10"}
|
||||
{"current_steps": 1780, "total_steps": 3780, "loss": 0.2445, "lr": 2.5470124896976687e-05, "epoch": 3.2962962962962963, "percentage": 47.09, "elapsed_time": "3:07:34", "remaining_time": "3:30:46"}
|
||||
{"current_steps": 1785, "total_steps": 3780, "loss": 0.2569, "lr": 2.538124255177113e-05, "epoch": 3.3055555555555554, "percentage": 47.22, "elapsed_time": "3:08:09", "remaining_time": "3:30:17"}
|
||||
{"current_steps": 1790, "total_steps": 3780, "loss": 0.2788, "lr": 2.5292245482921982e-05, "epoch": 3.314814814814815, "percentage": 47.35, "elapsed_time": "3:08:47", "remaining_time": "3:29:52"}
|
||||
{"current_steps": 1795, "total_steps": 3780, "loss": 0.2301, "lr": 2.5203135587773196e-05, "epoch": 3.324074074074074, "percentage": 47.49, "elapsed_time": "3:09:10", "remaining_time": "3:29:12"}
|
||||
{"current_steps": 1800, "total_steps": 3780, "loss": 0.2507, "lr": 2.5113914766074075e-05, "epoch": 3.3333333333333335, "percentage": 47.62, "elapsed_time": "3:09:42", "remaining_time": "3:28:40"}
|
||||
{"current_steps": 1805, "total_steps": 3780, "loss": 0.2375, "lr": 2.5024584919938805e-05, "epoch": 3.3425925925925926, "percentage": 47.75, "elapsed_time": "3:10:14", "remaining_time": "3:28:09"}
|
||||
{"current_steps": 1810, "total_steps": 3780, "loss": 0.2755, "lr": 2.493514795380587e-05, "epoch": 3.351851851851852, "percentage": 47.88, "elapsed_time": "3:10:45", "remaining_time": "3:27:36"}
|
||||
{"current_steps": 1815, "total_steps": 3780, "loss": 0.2444, "lr": 2.4845605774397482e-05, "epoch": 3.361111111111111, "percentage": 48.02, "elapsed_time": "3:11:15", "remaining_time": "3:27:04"}
|
||||
{"current_steps": 1820, "total_steps": 3780, "loss": 0.242, "lr": 2.4755960290678884e-05, "epoch": 3.3703703703703702, "percentage": 48.15, "elapsed_time": "3:11:43", "remaining_time": "3:26:28"}
|
||||
{"current_steps": 1825, "total_steps": 3780, "loss": 0.2221, "lr": 2.4666213413817696e-05, "epoch": 3.3796296296296298, "percentage": 48.28, "elapsed_time": "3:12:18", "remaining_time": "3:26:00"}
|
||||
{"current_steps": 1830, "total_steps": 3780, "loss": 0.2518, "lr": 2.4576367057143167e-05, "epoch": 3.388888888888889, "percentage": 48.41, "elapsed_time": "3:12:56", "remaining_time": "3:25:35"}
|
||||
{"current_steps": 1835, "total_steps": 3780, "loss": 0.2463, "lr": 2.4486423136105356e-05, "epoch": 3.398148148148148, "percentage": 48.54, "elapsed_time": "3:13:31", "remaining_time": "3:25:07"}
|
||||
{"current_steps": 1840, "total_steps": 3780, "loss": 0.2518, "lr": 2.4396383568234322e-05, "epoch": 3.4074074074074074, "percentage": 48.68, "elapsed_time": "3:14:02", "remaining_time": "3:24:35"}
|
||||
{"current_steps": 1845, "total_steps": 3780, "loss": 0.2561, "lr": 2.4306250273099236e-05, "epoch": 3.4166666666666665, "percentage": 48.81, "elapsed_time": "3:14:34", "remaining_time": "3:24:03"}
|
||||
{"current_steps": 1850, "total_steps": 3780, "loss": 0.273, "lr": 2.421602517226745e-05, "epoch": 3.425925925925926, "percentage": 48.94, "elapsed_time": "3:15:10", "remaining_time": "3:23:37"}
|
||||
{"current_steps": 1855, "total_steps": 3780, "loss": 0.2727, "lr": 2.4125710189263555e-05, "epoch": 3.435185185185185, "percentage": 49.07, "elapsed_time": "3:15:46", "remaining_time": "3:23:09"}
|
||||
{"current_steps": 1860, "total_steps": 3780, "loss": 0.2847, "lr": 2.4035307249528326e-05, "epoch": 3.4444444444444446, "percentage": 49.21, "elapsed_time": "3:16:23", "remaining_time": "3:22:43"}
|
||||
{"current_steps": 1865, "total_steps": 3780, "loss": 0.2481, "lr": 2.3944818280377732e-05, "epoch": 3.4537037037037037, "percentage": 49.34, "elapsed_time": "3:16:54", "remaining_time": "3:22:11"}
|
||||
{"current_steps": 1870, "total_steps": 3780, "loss": 0.2702, "lr": 2.3854245210961798e-05, "epoch": 3.462962962962963, "percentage": 49.47, "elapsed_time": "3:17:27", "remaining_time": "3:21:41"}
|
||||
{"current_steps": 1875, "total_steps": 3780, "loss": 0.2525, "lr": 2.376358997222351e-05, "epoch": 3.4722222222222223, "percentage": 49.6, "elapsed_time": "3:17:57", "remaining_time": "3:21:07"}
|
||||
{"current_steps": 1880, "total_steps": 3780, "loss": 0.2489, "lr": 2.367285449685763e-05, "epoch": 3.4814814814814814, "percentage": 49.74, "elapsed_time": "3:18:26", "remaining_time": "3:20:32"}
|
||||
{"current_steps": 1885, "total_steps": 3780, "loss": 0.2679, "lr": 2.3582040719269504e-05, "epoch": 3.490740740740741, "percentage": 49.87, "elapsed_time": "3:18:56", "remaining_time": "3:19:59"}
|
||||
{"current_steps": 1890, "total_steps": 3780, "loss": 0.2381, "lr": 2.3491150575533808e-05, "epoch": 3.5, "percentage": 50.0, "elapsed_time": "3:19:31", "remaining_time": "3:19:31"}
|
||||
{"current_steps": 1895, "total_steps": 3780, "loss": 0.2638, "lr": 2.340018600335328e-05, "epoch": 3.5092592592592595, "percentage": 50.13, "elapsed_time": "3:19:58", "remaining_time": "3:18:55"}
|
||||
{"current_steps": 1900, "total_steps": 3780, "loss": 0.2837, "lr": 2.3309148942017424e-05, "epoch": 3.5185185185185186, "percentage": 50.26, "elapsed_time": "3:20:29", "remaining_time": "3:18:22"}
|
||||
{"current_steps": 1905, "total_steps": 3780, "loss": 0.267, "lr": 2.321804133236115e-05, "epoch": 3.5277777777777777, "percentage": 50.4, "elapsed_time": "3:21:02", "remaining_time": "3:17:52"}
|
||||
{"current_steps": 1910, "total_steps": 3780, "loss": 0.2714, "lr": 2.312686511672338e-05, "epoch": 3.537037037037037, "percentage": 50.53, "elapsed_time": "3:21:31", "remaining_time": "3:17:18"}
|
||||
{"current_steps": 1915, "total_steps": 3780, "loss": 0.2772, "lr": 2.3035622238905694e-05, "epoch": 3.5462962962962963, "percentage": 50.66, "elapsed_time": "3:22:01", "remaining_time": "3:16:45"}
|
||||
{"current_steps": 1920, "total_steps": 3780, "loss": 0.2309, "lr": 2.2944314644130814e-05, "epoch": 3.5555555555555554, "percentage": 50.79, "elapsed_time": "3:22:31", "remaining_time": "3:16:11"}
|
||||
{"current_steps": 1925, "total_steps": 3780, "loss": 0.2617, "lr": 2.2852944279001207e-05, "epoch": 3.564814814814815, "percentage": 50.93, "elapsed_time": "3:22:58", "remaining_time": "3:15:36"}
|
||||
{"current_steps": 1930, "total_steps": 3780, "loss": 0.2679, "lr": 2.2761513091457537e-05, "epoch": 3.574074074074074, "percentage": 51.06, "elapsed_time": "3:23:35", "remaining_time": "3:15:09"}
|
||||
{"current_steps": 1935, "total_steps": 3780, "loss": 0.2195, "lr": 2.2670023030737153e-05, "epoch": 3.5833333333333335, "percentage": 51.19, "elapsed_time": "3:24:02", "remaining_time": "3:14:33"}
|
||||
{"current_steps": 1940, "total_steps": 3780, "loss": 0.2866, "lr": 2.2578476047332535e-05, "epoch": 3.5925925925925926, "percentage": 51.32, "elapsed_time": "3:24:38", "remaining_time": "3:14:05"}
|
||||
{"current_steps": 1945, "total_steps": 3780, "loss": 0.2522, "lr": 2.2486874092949708e-05, "epoch": 3.601851851851852, "percentage": 51.46, "elapsed_time": "3:25:10", "remaining_time": "3:13:34"}
|
||||
{"current_steps": 1950, "total_steps": 3780, "loss": 0.2736, "lr": 2.2395219120466622e-05, "epoch": 3.611111111111111, "percentage": 51.59, "elapsed_time": "3:25:46", "remaining_time": "3:13:06"}
|
||||
{"current_steps": 1955, "total_steps": 3780, "loss": 0.2511, "lr": 2.2303513083891542e-05, "epoch": 3.6203703703703702, "percentage": 51.72, "elapsed_time": "3:26:19", "remaining_time": "3:12:36"}
|
||||
{"current_steps": 1960, "total_steps": 3780, "loss": 0.2587, "lr": 2.2211757938321373e-05, "epoch": 3.6296296296296298, "percentage": 51.85, "elapsed_time": "3:26:47", "remaining_time": "3:12:01"}
|
||||
{"current_steps": 1965, "total_steps": 3780, "loss": 0.2524, "lr": 2.2119955639899983e-05, "epoch": 3.638888888888889, "percentage": 51.98, "elapsed_time": "3:27:22", "remaining_time": "3:11:32"}
|
||||
{"current_steps": 1970, "total_steps": 3780, "loss": 0.262, "lr": 2.20281081457765e-05, "epoch": 3.648148148148148, "percentage": 52.12, "elapsed_time": "3:27:53", "remaining_time": "3:11:00"}
|
||||
{"current_steps": 1975, "total_steps": 3780, "loss": 0.2377, "lr": 2.1936217414063584e-05, "epoch": 3.6574074074074074, "percentage": 52.25, "elapsed_time": "3:28:21", "remaining_time": "3:10:25"}
|
||||
{"current_steps": 1980, "total_steps": 3780, "loss": 0.2764, "lr": 2.184428540379569e-05, "epoch": 3.6666666666666665, "percentage": 52.38, "elapsed_time": "3:28:59", "remaining_time": "3:09:59"}
|
||||
{"current_steps": 1985, "total_steps": 3780, "loss": 0.2612, "lr": 2.1752314074887287e-05, "epoch": 3.675925925925926, "percentage": 52.51, "elapsed_time": "3:29:32", "remaining_time": "3:09:29"}
|
||||
{"current_steps": 1990, "total_steps": 3780, "loss": 0.2451, "lr": 2.1660305388091106e-05, "epoch": 3.685185185185185, "percentage": 52.65, "elapsed_time": "3:30:01", "remaining_time": "3:08:55"}
|
||||
{"current_steps": 1995, "total_steps": 3780, "loss": 0.2683, "lr": 2.1568261304956298e-05, "epoch": 3.6944444444444446, "percentage": 52.78, "elapsed_time": "3:30:31", "remaining_time": "3:08:21"}
|
||||
{"current_steps": 2000, "total_steps": 3780, "loss": 0.2588, "lr": 2.1476183787786638e-05, "epoch": 3.7037037037037037, "percentage": 52.91, "elapsed_time": "3:31:03", "remaining_time": "3:07:50"}
|
||||
{"current_steps": 2005, "total_steps": 3780, "loss": 0.2648, "lr": 2.138407479959869e-05, "epoch": 3.712962962962963, "percentage": 53.04, "elapsed_time": "3:31:40", "remaining_time": "3:07:23"}
|
||||
{"current_steps": 2010, "total_steps": 3780, "loss": 0.2559, "lr": 2.129193630407996e-05, "epoch": 3.7222222222222223, "percentage": 53.17, "elapsed_time": "3:32:13", "remaining_time": "3:06:53"}
|
||||
{"current_steps": 2015, "total_steps": 3780, "loss": 0.2653, "lr": 2.119977026554701e-05, "epoch": 3.7314814814814814, "percentage": 53.31, "elapsed_time": "3:32:42", "remaining_time": "3:06:18"}
|
||||
{"current_steps": 2020, "total_steps": 3780, "loss": 0.2572, "lr": 2.1107578648903614e-05, "epoch": 3.7407407407407405, "percentage": 53.44, "elapsed_time": "3:33:10", "remaining_time": "3:05:44"}
|
||||
{"current_steps": 2025, "total_steps": 3780, "loss": 0.2623, "lr": 2.1015363419598835e-05, "epoch": 3.75, "percentage": 53.57, "elapsed_time": "3:33:45", "remaining_time": "3:05:15"}
|
||||
{"current_steps": 2030, "total_steps": 3780, "loss": 0.2403, "lr": 2.0923126543585156e-05, "epoch": 3.7592592592592595, "percentage": 53.7, "elapsed_time": "3:34:14", "remaining_time": "3:04:41"}
|
||||
{"current_steps": 2035, "total_steps": 3780, "loss": 0.2441, "lr": 2.0830869987276537e-05, "epoch": 3.7685185185185186, "percentage": 53.84, "elapsed_time": "3:34:43", "remaining_time": "3:04:07"}
|
||||
{"current_steps": 2040, "total_steps": 3780, "loss": 0.2408, "lr": 2.0738595717506496e-05, "epoch": 3.7777777777777777, "percentage": 53.97, "elapsed_time": "3:35:15", "remaining_time": "3:03:35"}
|
||||
{"current_steps": 2045, "total_steps": 3780, "loss": 0.2324, "lr": 2.0646305701486215e-05, "epoch": 3.787037037037037, "percentage": 54.1, "elapsed_time": "3:35:44", "remaining_time": "3:03:02"}
|
||||
{"current_steps": 2050, "total_steps": 3780, "loss": 0.2639, "lr": 2.055400190676255e-05, "epoch": 3.7962962962962963, "percentage": 54.23, "elapsed_time": "3:36:14", "remaining_time": "3:02:28"}
|
||||
{"current_steps": 2055, "total_steps": 3780, "loss": 0.2766, "lr": 2.046168630117612e-05, "epoch": 3.8055555555555554, "percentage": 54.37, "elapsed_time": "3:36:47", "remaining_time": "3:01:58"}
|
||||
{"current_steps": 2060, "total_steps": 3780, "loss": 0.2252, "lr": 2.0369360852819327e-05, "epoch": 3.814814814814815, "percentage": 54.5, "elapsed_time": "3:37:14", "remaining_time": "3:01:23"}
|
||||
{"current_steps": 2065, "total_steps": 3780, "loss": 0.2657, "lr": 2.027702752999444e-05, "epoch": 3.824074074074074, "percentage": 54.63, "elapsed_time": "3:37:45", "remaining_time": "3:00:51"}
|
||||
{"current_steps": 2070, "total_steps": 3780, "loss": 0.2577, "lr": 2.0184688301171567e-05, "epoch": 3.8333333333333335, "percentage": 54.76, "elapsed_time": "3:38:18", "remaining_time": "3:00:20"}
|
||||
{"current_steps": 2075, "total_steps": 3780, "loss": 0.302, "lr": 2.009234513494676e-05, "epoch": 3.8425925925925926, "percentage": 54.89, "elapsed_time": "3:38:57", "remaining_time": "2:59:54"}
|
||||
{"current_steps": 2080, "total_steps": 3780, "loss": 0.262, "lr": 2e-05, "epoch": 3.851851851851852, "percentage": 55.03, "elapsed_time": "3:39:29", "remaining_time": "2:59:23"}
|
||||
{"current_steps": 2085, "total_steps": 3780, "loss": 0.262, "lr": 1.9907654865053248e-05, "epoch": 3.861111111111111, "percentage": 55.16, "elapsed_time": "3:40:01", "remaining_time": "2:58:52"}
|
||||
{"current_steps": 2090, "total_steps": 3780, "loss": 0.2444, "lr": 1.981531169882844e-05, "epoch": 3.8703703703703702, "percentage": 55.29, "elapsed_time": "3:40:35", "remaining_time": "2:58:22"}
|
||||
{"current_steps": 2095, "total_steps": 3780, "loss": 0.2574, "lr": 1.9722972470005573e-05, "epoch": 3.8796296296296298, "percentage": 55.42, "elapsed_time": "3:41:10", "remaining_time": "2:57:53"}
|
||||
{"current_steps": 2100, "total_steps": 3780, "loss": 0.2692, "lr": 1.9630639147180673e-05, "epoch": 3.888888888888889, "percentage": 55.56, "elapsed_time": "3:41:44", "remaining_time": "2:57:23"}
|
||||
{"current_steps": 2105, "total_steps": 3780, "loss": 0.2767, "lr": 1.9538313698823887e-05, "epoch": 3.898148148148148, "percentage": 55.69, "elapsed_time": "3:42:27", "remaining_time": "2:57:00"}
|
||||
{"current_steps": 2110, "total_steps": 3780, "loss": 0.2352, "lr": 1.944599809323745e-05, "epoch": 3.9074074074074074, "percentage": 55.82, "elapsed_time": "3:43:02", "remaining_time": "2:56:31"}
|
||||
{"current_steps": 2115, "total_steps": 3780, "loss": 0.2711, "lr": 1.935369429851379e-05, "epoch": 3.9166666666666665, "percentage": 55.95, "elapsed_time": "3:43:36", "remaining_time": "2:56:01"}
|
||||
{"current_steps": 2120, "total_steps": 3780, "loss": 0.2347, "lr": 1.926140428249351e-05, "epoch": 3.925925925925926, "percentage": 56.08, "elapsed_time": "3:44:04", "remaining_time": "2:55:27"}
|
||||
{"current_steps": 2125, "total_steps": 3780, "loss": 0.2425, "lr": 1.916913001272347e-05, "epoch": 3.935185185185185, "percentage": 56.22, "elapsed_time": "3:44:36", "remaining_time": "2:54:55"}
|
||||
{"current_steps": 2130, "total_steps": 3780, "loss": 0.2523, "lr": 1.907687345641485e-05, "epoch": 3.9444444444444446, "percentage": 56.35, "elapsed_time": "3:45:00", "remaining_time": "2:54:17"}
|
||||
{"current_steps": 2135, "total_steps": 3780, "loss": 0.2477, "lr": 1.8984636580401165e-05, "epoch": 3.9537037037037037, "percentage": 56.48, "elapsed_time": "3:45:31", "remaining_time": "2:53:45"}
|
||||
{"current_steps": 2140, "total_steps": 3780, "loss": 0.2713, "lr": 1.8892421351096393e-05, "epoch": 3.962962962962963, "percentage": 56.61, "elapsed_time": "3:46:05", "remaining_time": "2:53:15"}
|
||||
{"current_steps": 2145, "total_steps": 3780, "loss": 0.2525, "lr": 1.8800229734452998e-05, "epoch": 3.9722222222222223, "percentage": 56.75, "elapsed_time": "3:46:33", "remaining_time": "2:52:41"}
|
||||
{"current_steps": 2150, "total_steps": 3780, "loss": 0.2692, "lr": 1.8708063695920047e-05, "epoch": 3.9814814814814814, "percentage": 56.88, "elapsed_time": "3:46:58", "remaining_time": "2:52:04"}
|
||||
{"current_steps": 2155, "total_steps": 3780, "loss": 0.2203, "lr": 1.8615925200401318e-05, "epoch": 3.9907407407407405, "percentage": 57.01, "elapsed_time": "3:47:26", "remaining_time": "2:51:30"}
|
||||
{"current_steps": 2160, "total_steps": 3780, "loss": 0.2415, "lr": 1.852381621221337e-05, "epoch": 4.0, "percentage": 57.14, "elapsed_time": "3:47:51", "remaining_time": "2:50:53"}
|
||||
{"current_steps": 2165, "total_steps": 3780, "loss": 0.2296, "lr": 1.843173869504371e-05, "epoch": 4.0092592592592595, "percentage": 57.28, "elapsed_time": "3:48:19", "remaining_time": "2:50:19"}
|
||||
{"current_steps": 2170, "total_steps": 3780, "loss": 0.2373, "lr": 1.8339694611908897e-05, "epoch": 4.018518518518518, "percentage": 57.41, "elapsed_time": "3:48:54", "remaining_time": "2:49:50"}
|
||||
{"current_steps": 2175, "total_steps": 3780, "loss": 0.2353, "lr": 1.8247685925112716e-05, "epoch": 4.027777777777778, "percentage": 57.54, "elapsed_time": "3:49:28", "remaining_time": "2:49:19"}
|
||||
{"current_steps": 2180, "total_steps": 3780, "loss": 0.2158, "lr": 1.8155714596204318e-05, "epoch": 4.037037037037037, "percentage": 57.67, "elapsed_time": "3:49:55", "remaining_time": "2:48:45"}
|
||||
{"current_steps": 2185, "total_steps": 3780, "loss": 0.246, "lr": 1.806378258593642e-05, "epoch": 4.046296296296297, "percentage": 57.8, "elapsed_time": "3:50:20", "remaining_time": "2:48:08"}
|
||||
{"current_steps": 2190, "total_steps": 3780, "loss": 0.2215, "lr": 1.797189185422351e-05, "epoch": 4.055555555555555, "percentage": 57.94, "elapsed_time": "3:50:48", "remaining_time": "2:47:34"}
|
||||
{"current_steps": 2195, "total_steps": 3780, "loss": 0.2116, "lr": 1.788004436010002e-05, "epoch": 4.064814814814815, "percentage": 58.07, "elapsed_time": "3:51:15", "remaining_time": "2:46:59"}
|
||||
{"current_steps": 2200, "total_steps": 3780, "loss": 0.2692, "lr": 1.778824206167863e-05, "epoch": 4.074074074074074, "percentage": 58.2, "elapsed_time": "3:51:42", "remaining_time": "2:46:24"}
|
||||
{"current_steps": 2205, "total_steps": 3780, "loss": 0.22, "lr": 1.7696486916108468e-05, "epoch": 4.083333333333333, "percentage": 58.33, "elapsed_time": "3:52:11", "remaining_time": "2:45:51"}
|
||||
{"current_steps": 2210, "total_steps": 3780, "loss": 0.2254, "lr": 1.7604780879533384e-05, "epoch": 4.092592592592593, "percentage": 58.47, "elapsed_time": "3:52:44", "remaining_time": "2:45:20"}
|
||||
{"current_steps": 2215, "total_steps": 3780, "loss": 0.2312, "lr": 1.7513125907050302e-05, "epoch": 4.101851851851852, "percentage": 58.6, "elapsed_time": "3:53:22", "remaining_time": "2:44:53"}
|
||||
{"current_steps": 2220, "total_steps": 3780, "loss": 0.2259, "lr": 1.742152395266747e-05, "epoch": 4.111111111111111, "percentage": 58.73, "elapsed_time": "3:53:55", "remaining_time": "2:44:23"}
|
||||
{"current_steps": 2225, "total_steps": 3780, "loss": 0.2585, "lr": 1.7329976969262854e-05, "epoch": 4.12037037037037, "percentage": 58.86, "elapsed_time": "3:54:31", "remaining_time": "2:43:54"}
|
||||
{"current_steps": 2230, "total_steps": 3780, "loss": 0.2367, "lr": 1.7238486908542463e-05, "epoch": 4.12962962962963, "percentage": 58.99, "elapsed_time": "3:55:04", "remaining_time": "2:43:23"}
|
||||
{"current_steps": 2235, "total_steps": 3780, "loss": 0.2649, "lr": 1.71470557209988e-05, "epoch": 4.138888888888889, "percentage": 59.13, "elapsed_time": "3:55:43", "remaining_time": "2:42:57"}
|
||||
{"current_steps": 2240, "total_steps": 3780, "loss": 0.2392, "lr": 1.7055685355869196e-05, "epoch": 4.148148148148148, "percentage": 59.26, "elapsed_time": "3:56:13", "remaining_time": "2:42:24"}
|
||||
{"current_steps": 2245, "total_steps": 3780, "loss": 0.2281, "lr": 1.6964377761094313e-05, "epoch": 4.157407407407407, "percentage": 59.39, "elapsed_time": "3:56:37", "remaining_time": "2:41:47"}
|
||||
{"current_steps": 2250, "total_steps": 3780, "loss": 0.2329, "lr": 1.6873134883276626e-05, "epoch": 4.166666666666667, "percentage": 59.52, "elapsed_time": "3:57:10", "remaining_time": "2:41:16"}
|
||||
{"current_steps": 2255, "total_steps": 3780, "loss": 0.2792, "lr": 1.6781958667638855e-05, "epoch": 4.175925925925926, "percentage": 59.66, "elapsed_time": "3:57:37", "remaining_time": "2:40:41"}
|
||||
{"current_steps": 2260, "total_steps": 3780, "loss": 0.2256, "lr": 1.669085105798258e-05, "epoch": 4.185185185185185, "percentage": 59.79, "elapsed_time": "3:58:07", "remaining_time": "2:40:09"}
|
||||
{"current_steps": 2265, "total_steps": 3780, "loss": 0.2078, "lr": 1.6599813996646727e-05, "epoch": 4.194444444444445, "percentage": 59.92, "elapsed_time": "3:58:37", "remaining_time": "2:39:36"}
|
||||
{"current_steps": 2270, "total_steps": 3780, "loss": 0.224, "lr": 1.65088494244662e-05, "epoch": 4.203703703703703, "percentage": 60.05, "elapsed_time": "3:59:07", "remaining_time": "2:39:03"}
|
||||
{"current_steps": 2275, "total_steps": 3780, "loss": 0.2518, "lr": 1.6417959280730506e-05, "epoch": 4.212962962962963, "percentage": 60.19, "elapsed_time": "3:59:42", "remaining_time": "2:38:34"}
|
||||
{"current_steps": 2280, "total_steps": 3780, "loss": 0.2449, "lr": 1.632714550314237e-05, "epoch": 4.222222222222222, "percentage": 60.32, "elapsed_time": "4:00:06", "remaining_time": "2:37:57"}
|
||||
{"current_steps": 2285, "total_steps": 3780, "loss": 0.2521, "lr": 1.6236410027776494e-05, "epoch": 4.231481481481482, "percentage": 60.45, "elapsed_time": "4:00:38", "remaining_time": "2:37:26"}
|
||||
{"current_steps": 2290, "total_steps": 3780, "loss": 0.2445, "lr": 1.6145754789038205e-05, "epoch": 4.2407407407407405, "percentage": 60.58, "elapsed_time": "4:01:09", "remaining_time": "2:36:54"}
|
||||
{"current_steps": 2295, "total_steps": 3780, "loss": 0.2342, "lr": 1.6055181719622278e-05, "epoch": 4.25, "percentage": 60.71, "elapsed_time": "4:01:41", "remaining_time": "2:36:23"}
|
||||
{"current_steps": 2300, "total_steps": 3780, "loss": 0.2591, "lr": 1.5964692750471684e-05, "epoch": 4.2592592592592595, "percentage": 60.85, "elapsed_time": "4:02:21", "remaining_time": "2:35:56"}
|
||||
{"current_steps": 2305, "total_steps": 3780, "loss": 0.2329, "lr": 1.5874289810736452e-05, "epoch": 4.268518518518518, "percentage": 60.98, "elapsed_time": "4:02:52", "remaining_time": "2:35:25"}
|
||||
{"current_steps": 2310, "total_steps": 3780, "loss": 0.2352, "lr": 1.5783974827732555e-05, "epoch": 4.277777777777778, "percentage": 61.11, "elapsed_time": "4:03:27", "remaining_time": "2:34:55"}
|
||||
{"current_steps": 2315, "total_steps": 3780, "loss": 0.2206, "lr": 1.5693749726900767e-05, "epoch": 4.287037037037037, "percentage": 61.24, "elapsed_time": "4:03:48", "remaining_time": "2:34:17"}
|
||||
{"current_steps": 2320, "total_steps": 3780, "loss": 0.2417, "lr": 1.560361643176568e-05, "epoch": 4.296296296296296, "percentage": 61.38, "elapsed_time": "4:04:20", "remaining_time": "2:33:46"}
|
||||
{"current_steps": 2325, "total_steps": 3780, "loss": 0.2377, "lr": 1.5513576863894654e-05, "epoch": 4.305555555555555, "percentage": 61.51, "elapsed_time": "4:04:52", "remaining_time": "2:33:14"}
|
||||
{"current_steps": 2330, "total_steps": 3780, "loss": 0.2749, "lr": 1.5423632942856836e-05, "epoch": 4.314814814814815, "percentage": 61.64, "elapsed_time": "4:05:29", "remaining_time": "2:32:46"}
|
||||
{"current_steps": 2335, "total_steps": 3780, "loss": 0.2419, "lr": 1.5333786586182308e-05, "epoch": 4.324074074074074, "percentage": 61.77, "elapsed_time": "4:06:02", "remaining_time": "2:32:15"}
|
||||
{"current_steps": 2340, "total_steps": 3780, "loss": 0.2325, "lr": 1.5244039709321123e-05, "epoch": 4.333333333333333, "percentage": 61.9, "elapsed_time": "4:06:34", "remaining_time": "2:31:44"}
|
||||
{"current_steps": 2345, "total_steps": 3780, "loss": 0.2308, "lr": 1.5154394225602525e-05, "epoch": 4.342592592592593, "percentage": 62.04, "elapsed_time": "4:07:01", "remaining_time": "2:31:09"}
|
||||
{"current_steps": 2350, "total_steps": 3780, "loss": 0.2112, "lr": 1.5064852046194127e-05, "epoch": 4.351851851851852, "percentage": 62.17, "elapsed_time": "4:07:30", "remaining_time": "2:30:36"}
|
||||
{"current_steps": 2355, "total_steps": 3780, "loss": 0.2443, "lr": 1.49754150800612e-05, "epoch": 4.361111111111111, "percentage": 62.3, "elapsed_time": "4:08:01", "remaining_time": "2:30:04"}
|
||||
{"current_steps": 2360, "total_steps": 3780, "loss": 0.2234, "lr": 1.4886085233925931e-05, "epoch": 4.37037037037037, "percentage": 62.43, "elapsed_time": "4:08:35", "remaining_time": "2:29:34"}
|
||||
{"current_steps": 2365, "total_steps": 3780, "loss": 0.2178, "lr": 1.4796864412226812e-05, "epoch": 4.37962962962963, "percentage": 62.57, "elapsed_time": "4:09:09", "remaining_time": "2:29:04"}
|
||||
{"current_steps": 2370, "total_steps": 3780, "loss": 0.238, "lr": 1.4707754517078021e-05, "epoch": 4.388888888888889, "percentage": 62.7, "elapsed_time": "4:09:38", "remaining_time": "2:28:31"}
|
||||
{"current_steps": 2375, "total_steps": 3780, "loss": 0.2481, "lr": 1.4618757448228869e-05, "epoch": 4.398148148148148, "percentage": 62.83, "elapsed_time": "4:10:09", "remaining_time": "2:27:59"}
|
||||
{"current_steps": 2380, "total_steps": 3780, "loss": 0.2349, "lr": 1.4529875103023316e-05, "epoch": 4.407407407407407, "percentage": 62.96, "elapsed_time": "4:10:38", "remaining_time": "2:27:26"}
|
||||
{"current_steps": 2385, "total_steps": 3780, "loss": 0.2319, "lr": 1.4441109376359498e-05, "epoch": 4.416666666666667, "percentage": 63.1, "elapsed_time": "4:11:12", "remaining_time": "2:26:56"}
|
||||
{"current_steps": 2390, "total_steps": 3780, "loss": 0.2539, "lr": 1.435246216064933e-05, "epoch": 4.425925925925926, "percentage": 63.23, "elapsed_time": "4:11:40", "remaining_time": "2:26:22"}
|
||||
{"current_steps": 2395, "total_steps": 3780, "loss": 0.2505, "lr": 1.4263935345778202e-05, "epoch": 4.435185185185185, "percentage": 63.36, "elapsed_time": "4:12:17", "remaining_time": "2:25:53"}
|
||||
{"current_steps": 2400, "total_steps": 3780, "loss": 0.2129, "lr": 1.417553081906462e-05, "epoch": 4.444444444444445, "percentage": 63.49, "elapsed_time": "4:12:42", "remaining_time": "2:25:18"}
|
||||
{"current_steps": 2405, "total_steps": 3780, "loss": 0.2408, "lr": 1.408725046522005e-05, "epoch": 4.453703703703704, "percentage": 63.62, "elapsed_time": "4:13:14", "remaining_time": "2:24:46"}
|
||||
{"current_steps": 2410, "total_steps": 3780, "loss": 0.2903, "lr": 1.399909616630865e-05, "epoch": 4.462962962962963, "percentage": 63.76, "elapsed_time": "4:13:46", "remaining_time": "2:24:15"}
|
||||
{"current_steps": 2415, "total_steps": 3780, "loss": 0.2179, "lr": 1.3911069801707232e-05, "epoch": 4.472222222222222, "percentage": 63.89, "elapsed_time": "4:14:07", "remaining_time": "2:23:37"}
|
||||
{"current_steps": 2420, "total_steps": 3780, "loss": 0.2435, "lr": 1.3823173248065125e-05, "epoch": 4.481481481481482, "percentage": 64.02, "elapsed_time": "4:14:42", "remaining_time": "2:23:08"}
|
||||
{"current_steps": 2425, "total_steps": 3780, "loss": 0.2547, "lr": 1.37354083792642e-05, "epoch": 4.4907407407407405, "percentage": 64.15, "elapsed_time": "4:15:18", "remaining_time": "2:22:39"}
|
||||
{"current_steps": 2430, "total_steps": 3780, "loss": 0.2534, "lr": 1.3647777066378938e-05, "epoch": 4.5, "percentage": 64.29, "elapsed_time": "4:16:02", "remaining_time": "2:22:14"}
|
||||
{"current_steps": 2435, "total_steps": 3780, "loss": 0.2395, "lr": 1.3560281177636484e-05, "epoch": 4.5092592592592595, "percentage": 64.42, "elapsed_time": "4:16:29", "remaining_time": "2:21:40"}
|
||||
{"current_steps": 2440, "total_steps": 3780, "loss": 0.255, "lr": 1.347292257837689e-05, "epoch": 4.518518518518518, "percentage": 64.55, "elapsed_time": "4:17:04", "remaining_time": "2:21:11"}
|
||||
{"current_steps": 2445, "total_steps": 3780, "loss": 0.2429, "lr": 1.3385703131013279e-05, "epoch": 4.527777777777778, "percentage": 64.68, "elapsed_time": "4:17:30", "remaining_time": "2:20:36"}
|
||||
{"current_steps": 2450, "total_steps": 3780, "loss": 0.2318, "lr": 1.3298624694992175e-05, "epoch": 4.537037037037037, "percentage": 64.81, "elapsed_time": "4:18:01", "remaining_time": "2:20:04"}
|
||||
{"current_steps": 2455, "total_steps": 3780, "loss": 0.2478, "lr": 1.3211689126753879e-05, "epoch": 4.546296296296296, "percentage": 64.95, "elapsed_time": "4:18:27", "remaining_time": "2:19:29"}
|
||||
{"current_steps": 2460, "total_steps": 3780, "loss": 0.2252, "lr": 1.3124898279692837e-05, "epoch": 4.555555555555555, "percentage": 65.08, "elapsed_time": "4:18:54", "remaining_time": "2:18:55"}
|
||||
{"current_steps": 2465, "total_steps": 3780, "loss": 0.2563, "lr": 1.3038254004118192e-05, "epoch": 4.564814814814815, "percentage": 65.21, "elapsed_time": "4:19:20", "remaining_time": "2:18:21"}
|
||||
{"current_steps": 2470, "total_steps": 3780, "loss": 0.2488, "lr": 1.2951758147214272e-05, "epoch": 4.574074074074074, "percentage": 65.34, "elapsed_time": "4:19:48", "remaining_time": "2:17:47"}
|
||||
{"current_steps": 2475, "total_steps": 3780, "loss": 0.237, "lr": 1.2865412553001274e-05, "epoch": 4.583333333333333, "percentage": 65.48, "elapsed_time": "4:20:25", "remaining_time": "2:17:19"}
|
||||
{"current_steps": 2480, "total_steps": 3780, "loss": 0.2441, "lr": 1.2779219062295892e-05, "epoch": 4.592592592592593, "percentage": 65.61, "elapsed_time": "4:20:56", "remaining_time": "2:16:46"}
|
||||
{"current_steps": 2485, "total_steps": 3780, "loss": 0.2634, "lr": 1.26931795126721e-05, "epoch": 4.601851851851852, "percentage": 65.74, "elapsed_time": "4:21:29", "remaining_time": "2:16:16"}
|
||||
{"current_steps": 2490, "total_steps": 3780, "loss": 0.2519, "lr": 1.2607295738422e-05, "epoch": 4.611111111111111, "percentage": 65.87, "elapsed_time": "4:22:04", "remaining_time": "2:15:46"}
|
||||
{"current_steps": 2495, "total_steps": 3780, "loss": 0.241, "lr": 1.2521569570516666e-05, "epoch": 4.62037037037037, "percentage": 66.01, "elapsed_time": "4:22:29", "remaining_time": "2:15:11"}
|
||||
{"current_steps": 2500, "total_steps": 3780, "loss": 0.2566, "lr": 1.2436002836567154e-05, "epoch": 4.62962962962963, "percentage": 66.14, "elapsed_time": "4:22:55", "remaining_time": "2:14:37"}
|
||||
{"current_steps": 2505, "total_steps": 3780, "loss": 0.2537, "lr": 1.2350597360785503e-05, "epoch": 4.638888888888889, "percentage": 66.27, "elapsed_time": "4:23:37", "remaining_time": "2:14:10"}
|
||||
{"current_steps": 2510, "total_steps": 3780, "loss": 0.213, "lr": 1.2265354963945861e-05, "epoch": 4.648148148148148, "percentage": 66.4, "elapsed_time": "4:24:04", "remaining_time": "2:13:36"}
|
||||
{"current_steps": 2515, "total_steps": 3780, "loss": 0.2492, "lr": 1.2180277463345697e-05, "epoch": 4.657407407407407, "percentage": 66.53, "elapsed_time": "4:24:31", "remaining_time": "2:13:03"}
|
||||
{"current_steps": 2520, "total_steps": 3780, "loss": 0.2533, "lr": 1.209536667276699e-05, "epoch": 4.666666666666667, "percentage": 66.67, "elapsed_time": "4:25:11", "remaining_time": "2:12:35"}
|
||||
{"current_steps": 2525, "total_steps": 3780, "loss": 0.2255, "lr": 1.2010624402437622e-05, "epoch": 4.675925925925926, "percentage": 66.8, "elapsed_time": "4:25:44", "remaining_time": "2:12:04"}
|
||||
{"current_steps": 2530, "total_steps": 3780, "loss": 0.2475, "lr": 1.1926052458992756e-05, "epoch": 4.685185185185185, "percentage": 66.93, "elapsed_time": "4:26:16", "remaining_time": "2:11:33"}
|
||||
{"current_steps": 2535, "total_steps": 3780, "loss": 0.2493, "lr": 1.184165264543633e-05, "epoch": 4.694444444444445, "percentage": 67.06, "elapsed_time": "4:26:50", "remaining_time": "2:11:03"}
|
||||
{"current_steps": 2540, "total_steps": 3780, "loss": 0.2407, "lr": 1.1757426761102608e-05, "epoch": 4.703703703703704, "percentage": 67.2, "elapsed_time": "4:27:26", "remaining_time": "2:10:33"}
|
||||
{"current_steps": 2545, "total_steps": 3780, "loss": 0.2298, "lr": 1.167337660161783e-05, "epoch": 4.712962962962963, "percentage": 67.33, "elapsed_time": "4:27:53", "remaining_time": "2:10:00"}
|
||||
{"current_steps": 2550, "total_steps": 3780, "loss": 0.2492, "lr": 1.1589503958861936e-05, "epoch": 4.722222222222222, "percentage": 67.46, "elapsed_time": "4:28:32", "remaining_time": "2:09:31"}
|
||||
{"current_steps": 2555, "total_steps": 3780, "loss": 0.2458, "lr": 1.1505810620930338e-05, "epoch": 4.731481481481482, "percentage": 67.59, "elapsed_time": "4:29:07", "remaining_time": "2:09:01"}
|
||||
{"current_steps": 2560, "total_steps": 3780, "loss": 0.2261, "lr": 1.1422298372095841e-05, "epoch": 4.7407407407407405, "percentage": 67.72, "elapsed_time": "4:29:37", "remaining_time": "2:08:29"}
|
||||
{"current_steps": 2565, "total_steps": 3780, "loss": 0.2474, "lr": 1.133896899277056e-05, "epoch": 4.75, "percentage": 67.86, "elapsed_time": "4:30:03", "remaining_time": "2:07:55"}
|
||||
{"current_steps": 2570, "total_steps": 3780, "loss": 0.2589, "lr": 1.1255824259467985e-05, "epoch": 4.7592592592592595, "percentage": 67.99, "elapsed_time": "4:30:37", "remaining_time": "2:07:24"}
|
||||
{"current_steps": 2575, "total_steps": 3780, "loss": 0.2656, "lr": 1.1172865944765122e-05, "epoch": 4.768518518518518, "percentage": 68.12, "elapsed_time": "4:31:07", "remaining_time": "2:06:52"}
|
||||
{"current_steps": 2580, "total_steps": 3780, "loss": 0.2702, "lr": 1.109009581726466e-05, "epoch": 4.777777777777778, "percentage": 68.25, "elapsed_time": "4:31:39", "remaining_time": "2:06:21"}
|
||||
{"current_steps": 2585, "total_steps": 3780, "loss": 0.2322, "lr": 1.1007515641557329e-05, "epoch": 4.787037037037037, "percentage": 68.39, "elapsed_time": "4:32:09", "remaining_time": "2:05:48"}
|
||||
{"current_steps": 2590, "total_steps": 3780, "loss": 0.272, "lr": 1.092512717818421e-05, "epoch": 4.796296296296296, "percentage": 68.52, "elapsed_time": "4:32:41", "remaining_time": "2:05:17"}
|
||||
{"current_steps": 2595, "total_steps": 3780, "loss": 0.2492, "lr": 1.0842932183599238e-05, "epoch": 4.805555555555555, "percentage": 68.65, "elapsed_time": "4:33:07", "remaining_time": "2:04:43"}
|
||||
{"current_steps": 2600, "total_steps": 3780, "loss": 0.2253, "lr": 1.076093241013178e-05, "epoch": 4.814814814814815, "percentage": 68.78, "elapsed_time": "4:33:33", "remaining_time": "2:04:09"}
|
||||
{"current_steps": 2605, "total_steps": 3780, "loss": 0.2419, "lr": 1.067912960594923e-05, "epoch": 4.824074074074074, "percentage": 68.92, "elapsed_time": "4:34:04", "remaining_time": "2:03:37"}
|
||||
{"current_steps": 2610, "total_steps": 3780, "loss": 0.2321, "lr": 1.0597525515019749e-05, "epoch": 4.833333333333333, "percentage": 69.05, "elapsed_time": "4:34:36", "remaining_time": "2:03:06"}
|
||||
{"current_steps": 2615, "total_steps": 3780, "loss": 0.2209, "lr": 1.0516121877075129e-05, "epoch": 4.842592592592593, "percentage": 69.18, "elapsed_time": "4:35:09", "remaining_time": "2:02:35"}
|
||||
{"current_steps": 2620, "total_steps": 3780, "loss": 0.2595, "lr": 1.0434920427573643e-05, "epoch": 4.851851851851852, "percentage": 69.31, "elapsed_time": "4:35:47", "remaining_time": "2:02:06"}
|
||||
{"current_steps": 2625, "total_steps": 3780, "loss": 0.2376, "lr": 1.035392289766307e-05, "epoch": 4.861111111111111, "percentage": 69.44, "elapsed_time": "4:36:17", "remaining_time": "2:01:34"}
|
||||
{"current_steps": 2630, "total_steps": 3780, "loss": 0.256, "lr": 1.0273131014143822e-05, "epoch": 4.87037037037037, "percentage": 69.58, "elapsed_time": "4:36:45", "remaining_time": "2:01:01"}
|
||||
{"current_steps": 2635, "total_steps": 3780, "loss": 0.2299, "lr": 1.0192546499432066e-05, "epoch": 4.87962962962963, "percentage": 69.71, "elapsed_time": "4:37:12", "remaining_time": "2:00:27"}
|
||||
{"current_steps": 2640, "total_steps": 3780, "loss": 0.2302, "lr": 1.0112171071523064e-05, "epoch": 4.888888888888889, "percentage": 69.84, "elapsed_time": "4:37:40", "remaining_time": "1:59:54"}
|
||||
{"current_steps": 2645, "total_steps": 3780, "loss": 0.2318, "lr": 1.0032006443954506e-05, "epoch": 4.898148148148148, "percentage": 69.97, "elapsed_time": "4:38:13", "remaining_time": "1:59:23"}
|
||||
{"current_steps": 2650, "total_steps": 3780, "loss": 0.2653, "lr": 9.952054325769984e-06, "epoch": 4.907407407407407, "percentage": 70.11, "elapsed_time": "4:38:45", "remaining_time": "1:58:52"}
|
||||
{"current_steps": 2655, "total_steps": 3780, "loss": 0.2143, "lr": 9.872316421482592e-06, "epoch": 4.916666666666667, "percentage": 70.24, "elapsed_time": "4:39:13", "remaining_time": "1:58:19"}
|
||||
{"current_steps": 2660, "total_steps": 3780, "loss": 0.263, "lr": 9.792794431038542e-06, "epoch": 4.925925925925926, "percentage": 70.37, "elapsed_time": "4:39:42", "remaining_time": "1:57:46"}
|
||||
{"current_steps": 2665, "total_steps": 3780, "loss": 0.2469, "lr": 9.713490049780931e-06, "epoch": 4.935185185185185, "percentage": 70.5, "elapsed_time": "4:40:12", "remaining_time": "1:57:14"}
|
||||
{"current_steps": 2670, "total_steps": 3780, "loss": 0.242, "lr": 9.634404968413644e-06, "epoch": 4.944444444444445, "percentage": 70.63, "elapsed_time": "4:40:44", "remaining_time": "1:56:42"}
|
||||
{"current_steps": 2675, "total_steps": 3780, "loss": 0.2383, "lr": 9.555540872965235e-06, "epoch": 4.953703703703704, "percentage": 70.77, "elapsed_time": "4:41:19", "remaining_time": "1:56:12"}
|
||||
{"current_steps": 2680, "total_steps": 3780, "loss": 0.2343, "lr": 9.47689944475305e-06, "epoch": 4.962962962962963, "percentage": 70.9, "elapsed_time": "4:41:50", "remaining_time": "1:55:41"}
|
||||
{"current_steps": 2685, "total_steps": 3780, "loss": 0.2361, "lr": 9.398482360347327e-06, "epoch": 4.972222222222222, "percentage": 71.03, "elapsed_time": "4:42:19", "remaining_time": "1:55:08"}
|
||||
{"current_steps": 2690, "total_steps": 3780, "loss": 0.2275, "lr": 9.32029129153551e-06, "epoch": 4.981481481481482, "percentage": 71.16, "elapsed_time": "4:43:02", "remaining_time": "1:54:41"}
|
||||
{"current_steps": 2695, "total_steps": 3780, "loss": 0.2306, "lr": 9.242327905286552e-06, "epoch": 4.9907407407407405, "percentage": 71.3, "elapsed_time": "4:43:33", "remaining_time": "1:54:09"}
|
||||
{"current_steps": 2700, "total_steps": 3780, "loss": 0.2428, "lr": 9.164593863715405e-06, "epoch": 5.0, "percentage": 71.43, "elapsed_time": "4:44:12", "remaining_time": "1:53:41"}
|
||||
{"current_steps": 2705, "total_steps": 3780, "loss": 0.2279, "lr": 9.087090824047604e-06, "epoch": 5.0092592592592595, "percentage": 71.56, "elapsed_time": "4:44:49", "remaining_time": "1:53:11"}
|
||||
{"current_steps": 2710, "total_steps": 3780, "loss": 0.233, "lr": 9.009820438583881e-06, "epoch": 5.018518518518518, "percentage": 71.69, "elapsed_time": "4:45:24", "remaining_time": "1:52:41"}
|
||||
{"current_steps": 2715, "total_steps": 3780, "loss": 0.227, "lr": 8.932784354665002e-06, "epoch": 5.027777777777778, "percentage": 71.83, "elapsed_time": "4:45:50", "remaining_time": "1:52:07"}
|
||||
{"current_steps": 2720, "total_steps": 3780, "loss": 0.2015, "lr": 8.855984214636606e-06, "epoch": 5.037037037037037, "percentage": 71.96, "elapsed_time": "4:46:16", "remaining_time": "1:51:33"}
|
||||
{"current_steps": 2725, "total_steps": 3780, "loss": 0.2666, "lr": 8.779421655814189e-06, "epoch": 5.046296296296297, "percentage": 72.09, "elapsed_time": "4:46:53", "remaining_time": "1:51:04"}
|
||||
{"current_steps": 2730, "total_steps": 3780, "loss": 0.2521, "lr": 8.703098310448244e-06, "epoch": 5.055555555555555, "percentage": 72.22, "elapsed_time": "4:47:34", "remaining_time": "1:50:36"}
|
||||
{"current_steps": 2735, "total_steps": 3780, "loss": 0.2588, "lr": 8.627015805689394e-06, "epoch": 5.064814814814815, "percentage": 72.35, "elapsed_time": "4:48:08", "remaining_time": "1:50:05"}
|
||||
{"current_steps": 2740, "total_steps": 3780, "loss": 0.264, "lr": 8.551175763553778e-06, "epoch": 5.074074074074074, "percentage": 72.49, "elapsed_time": "4:48:47", "remaining_time": "1:49:37"}
|
||||
{"current_steps": 2745, "total_steps": 3780, "loss": 0.2147, "lr": 8.475579800888395e-06, "epoch": 5.083333333333333, "percentage": 72.62, "elapsed_time": "4:49:19", "remaining_time": "1:49:05"}
|
||||
{"current_steps": 2750, "total_steps": 3780, "loss": 0.2322, "lr": 8.400229529336704e-06, "epoch": 5.092592592592593, "percentage": 72.75, "elapsed_time": "4:49:49", "remaining_time": "1:48:33"}
|
||||
{"current_steps": 2755, "total_steps": 3780, "loss": 0.2115, "lr": 8.325126555304208e-06, "epoch": 5.101851851851852, "percentage": 72.88, "elapsed_time": "4:50:11", "remaining_time": "1:47:58"}
|
||||
{"current_steps": 2760, "total_steps": 3780, "loss": 0.2274, "lr": 8.250272479924234e-06, "epoch": 5.111111111111111, "percentage": 73.02, "elapsed_time": "4:50:50", "remaining_time": "1:47:29"}
|
||||
{"current_steps": 2765, "total_steps": 3780, "loss": 0.2658, "lr": 8.17566889902382e-06, "epoch": 5.12037037037037, "percentage": 73.15, "elapsed_time": "4:51:21", "remaining_time": "1:46:57"}
|
||||
{"current_steps": 2770, "total_steps": 3780, "loss": 0.2234, "lr": 8.101317403089635e-06, "epoch": 5.12962962962963, "percentage": 73.28, "elapsed_time": "4:51:48", "remaining_time": "1:46:23"}
|
||||
{"current_steps": 2775, "total_steps": 3780, "loss": 0.2518, "lr": 8.027219577234133e-06, "epoch": 5.138888888888889, "percentage": 73.41, "elapsed_time": "4:52:22", "remaining_time": "1:45:53"}
|
||||
{"current_steps": 2780, "total_steps": 3780, "loss": 0.2269, "lr": 7.953377001161714e-06, "epoch": 5.148148148148148, "percentage": 73.54, "elapsed_time": "4:52:58", "remaining_time": "1:45:23"}
|
||||
{"current_steps": 2785, "total_steps": 3780, "loss": 0.2341, "lr": 7.879791249135059e-06, "epoch": 5.157407407407407, "percentage": 73.68, "elapsed_time": "4:53:26", "remaining_time": "1:44:50"}
|
||||
{"current_steps": 2790, "total_steps": 3780, "loss": 0.2543, "lr": 7.806463889941598e-06, "epoch": 5.166666666666667, "percentage": 73.81, "elapsed_time": "4:54:00", "remaining_time": "1:44:19"}
|
||||
{"current_steps": 2795, "total_steps": 3780, "loss": 0.2092, "lr": 7.73339648686001e-06, "epoch": 5.175925925925926, "percentage": 73.94, "elapsed_time": "4:54:26", "remaining_time": "1:43:45"}
|
||||
{"current_steps": 2800, "total_steps": 3780, "loss": 0.2404, "lr": 7.66059059762695e-06, "epoch": 5.185185185185185, "percentage": 74.07, "elapsed_time": "4:54:58", "remaining_time": "1:43:14"}
|
||||
{"current_steps": 2805, "total_steps": 3780, "loss": 0.2234, "lr": 7.588047774403795e-06, "epoch": 5.194444444444445, "percentage": 74.21, "elapsed_time": "4:55:29", "remaining_time": "1:42:42"}
|
||||
{"current_steps": 2810, "total_steps": 3780, "loss": 0.2231, "lr": 7.5157695637435864e-06, "epoch": 5.203703703703703, "percentage": 74.34, "elapsed_time": "4:55:58", "remaining_time": "1:42:10"}
|
||||
{"current_steps": 2815, "total_steps": 3780, "loss": 0.228, "lr": 7.443757506558033e-06, "epoch": 5.212962962962963, "percentage": 74.47, "elapsed_time": "4:56:35", "remaining_time": "1:41:40"}
|
||||
{"current_steps": 2820, "total_steps": 3780, "loss": 0.2218, "lr": 7.3720131380846685e-06, "epoch": 5.222222222222222, "percentage": 74.6, "elapsed_time": "4:57:04", "remaining_time": "1:41:07"}
|
||||
{"current_steps": 2825, "total_steps": 3780, "loss": 0.2277, "lr": 7.300537987854146e-06, "epoch": 5.231481481481482, "percentage": 74.74, "elapsed_time": "4:57:33", "remaining_time": "1:40:35"}
|
||||
{"current_steps": 2830, "total_steps": 3780, "loss": 0.2151, "lr": 7.22933357965758e-06, "epoch": 5.2407407407407405, "percentage": 74.87, "elapsed_time": "4:58:04", "remaining_time": "1:40:03"}
|
||||
{"current_steps": 2835, "total_steps": 3780, "loss": 0.2185, "lr": 7.158401431514117e-06, "epoch": 5.25, "percentage": 75.0, "elapsed_time": "4:58:29", "remaining_time": "1:39:29"}
|
||||
{"current_steps": 2840, "total_steps": 3780, "loss": 0.2053, "lr": 7.0877430556385205e-06, "epoch": 5.2592592592592595, "percentage": 75.13, "elapsed_time": "4:58:57", "remaining_time": "1:38:57"}
|
||||
{"current_steps": 2845, "total_steps": 3780, "loss": 0.2197, "lr": 7.0173599584089625e-06, "epoch": 5.268518518518518, "percentage": 75.26, "elapsed_time": "4:59:29", "remaining_time": "1:38:25"}
|
||||
{"current_steps": 2850, "total_steps": 3780, "loss": 0.2454, "lr": 6.947253640334914e-06, "epoch": 5.277777777777778, "percentage": 75.4, "elapsed_time": "5:00:07", "remaining_time": "1:37:56"}
|
||||
{"current_steps": 2855, "total_steps": 3780, "loss": 0.213, "lr": 6.87742559602512e-06, "epoch": 5.287037037037037, "percentage": 75.53, "elapsed_time": "5:00:39", "remaining_time": "1:37:24"}
|
||||
{"current_steps": 2860, "total_steps": 3780, "loss": 0.2131, "lr": 6.807877314155788e-06, "epoch": 5.296296296296296, "percentage": 75.66, "elapsed_time": "5:01:08", "remaining_time": "1:36:52"}
|
||||
{"current_steps": 2865, "total_steps": 3780, "loss": 0.26, "lr": 6.738610277438791e-06, "epoch": 5.305555555555555, "percentage": 75.79, "elapsed_time": "5:01:44", "remaining_time": "1:36:21"}
|
||||
{"current_steps": 2870, "total_steps": 3780, "loss": 0.2461, "lr": 6.669625962590114e-06, "epoch": 5.314814814814815, "percentage": 75.93, "elapsed_time": "5:02:13", "remaining_time": "1:35:49"}
|
||||
{"current_steps": 2875, "total_steps": 3780, "loss": 0.2276, "lr": 6.600925840298331e-06, "epoch": 5.324074074074074, "percentage": 76.06, "elapsed_time": "5:02:44", "remaining_time": "1:35:17"}
|
||||
{"current_steps": 2880, "total_steps": 3780, "loss": 0.2235, "lr": 6.532511375193258e-06, "epoch": 5.333333333333333, "percentage": 76.19, "elapsed_time": "5:03:22", "remaining_time": "1:34:48"}
|
||||
{"current_steps": 2885, "total_steps": 3780, "loss": 0.2254, "lr": 6.464384025814763e-06, "epoch": 5.342592592592593, "percentage": 76.32, "elapsed_time": "5:03:58", "remaining_time": "1:34:18"}
|
||||
{"current_steps": 2890, "total_steps": 3780, "loss": 0.2305, "lr": 6.396545244581609e-06, "epoch": 5.351851851851852, "percentage": 76.46, "elapsed_time": "5:04:24", "remaining_time": "1:33:44"}
|
||||
{"current_steps": 2895, "total_steps": 3780, "loss": 0.2582, "lr": 6.3289964777605624e-06, "epoch": 5.361111111111111, "percentage": 76.59, "elapsed_time": "5:05:00", "remaining_time": "1:33:14"}
|
||||
{"current_steps": 2900, "total_steps": 3780, "loss": 0.2185, "lr": 6.261739165435492e-06, "epoch": 5.37037037037037, "percentage": 76.72, "elapsed_time": "5:05:21", "remaining_time": "1:32:39"}
|
||||
{"current_steps": 2905, "total_steps": 3780, "loss": 0.2258, "lr": 6.1947747414767035e-06, "epoch": 5.37962962962963, "percentage": 76.85, "elapsed_time": "5:05:50", "remaining_time": "1:32:07"}
|
||||
{"current_steps": 2910, "total_steps": 3780, "loss": 0.2262, "lr": 6.128104633510381e-06, "epoch": 5.388888888888889, "percentage": 76.98, "elapsed_time": "5:06:19", "remaining_time": "1:31:34"}
|
||||
{"current_steps": 2915, "total_steps": 3780, "loss": 0.2406, "lr": 6.0617302628881104e-06, "epoch": 5.398148148148148, "percentage": 77.12, "elapsed_time": "5:06:57", "remaining_time": "1:31:05"}
|
||||
{"current_steps": 2920, "total_steps": 3780, "loss": 0.2456, "lr": 5.9956530446566305e-06, "epoch": 5.407407407407407, "percentage": 77.25, "elapsed_time": "5:07:25", "remaining_time": "1:30:32"}
|
||||
{"current_steps": 2925, "total_steps": 3780, "loss": 0.2293, "lr": 5.929874387527605e-06, "epoch": 5.416666666666667, "percentage": 77.38, "elapsed_time": "5:07:55", "remaining_time": "1:30:00"}
|
||||
{"current_steps": 2930, "total_steps": 3780, "loss": 0.2441, "lr": 5.864395693847651e-06, "epoch": 5.425925925925926, "percentage": 77.51, "elapsed_time": "5:08:29", "remaining_time": "1:29:29"}
|
||||
{"current_steps": 2935, "total_steps": 3780, "loss": 0.224, "lr": 5.799218359568395e-06, "epoch": 5.435185185185185, "percentage": 77.65, "elapsed_time": "5:09:02", "remaining_time": "1:28:58"}
|
||||
{"current_steps": 2940, "total_steps": 3780, "loss": 0.235, "lr": 5.734343774216726e-06, "epoch": 5.444444444444445, "percentage": 77.78, "elapsed_time": "5:09:33", "remaining_time": "1:28:26"}
|
||||
{"current_steps": 2945, "total_steps": 3780, "loss": 0.2425, "lr": 5.669773320865198e-06, "epoch": 5.453703703703704, "percentage": 77.91, "elapsed_time": "5:10:01", "remaining_time": "1:27:54"}
|
||||
{"current_steps": 2950, "total_steps": 3780, "loss": 0.2294, "lr": 5.605508376102504e-06, "epoch": 5.462962962962963, "percentage": 78.04, "elapsed_time": "5:10:32", "remaining_time": "1:27:22"}
|
||||
{"current_steps": 2955, "total_steps": 3780, "loss": 0.2096, "lr": 5.541550310004142e-06, "epoch": 5.472222222222222, "percentage": 78.17, "elapsed_time": "5:11:03", "remaining_time": "1:26:50"}
|
||||
{"current_steps": 2960, "total_steps": 3780, "loss": 0.2275, "lr": 5.4779004861032355e-06, "epoch": 5.481481481481482, "percentage": 78.31, "elapsed_time": "5:11:34", "remaining_time": "1:26:18"}
|
||||
{"current_steps": 2965, "total_steps": 3780, "loss": 0.2166, "lr": 5.414560261361415e-06, "epoch": 5.4907407407407405, "percentage": 78.44, "elapsed_time": "5:12:08", "remaining_time": "1:25:48"}
|
||||
{"current_steps": 2970, "total_steps": 3780, "loss": 0.2557, "lr": 5.351530986139917e-06, "epoch": 5.5, "percentage": 78.57, "elapsed_time": "5:12:49", "remaining_time": "1:25:18"}
|
||||
{"current_steps": 2975, "total_steps": 3780, "loss": 0.184, "lr": 5.288814004170804e-06, "epoch": 5.5092592592592595, "percentage": 78.7, "elapsed_time": "5:13:14", "remaining_time": "1:24:45"}
|
||||
{"current_steps": 2980, "total_steps": 3780, "loss": 0.2168, "lr": 5.226410652528293e-06, "epoch": 5.518518518518518, "percentage": 78.84, "elapsed_time": "5:13:45", "remaining_time": "1:24:13"}
|
||||
{"current_steps": 2985, "total_steps": 3780, "loss": 0.2345, "lr": 5.164322261600257e-06, "epoch": 5.527777777777778, "percentage": 78.97, "elapsed_time": "5:14:14", "remaining_time": "1:23:41"}
|
||||
{"current_steps": 2990, "total_steps": 3780, "loss": 0.2451, "lr": 5.102550155059887e-06, "epoch": 5.537037037037037, "percentage": 79.1, "elapsed_time": "5:14:49", "remaining_time": "1:23:10"}
|
||||
{"current_steps": 2995, "total_steps": 3780, "loss": 0.2251, "lr": 5.041095649837429e-06, "epoch": 5.546296296296296, "percentage": 79.23, "elapsed_time": "5:15:20", "remaining_time": "1:22:39"}
|
||||
{"current_steps": 3000, "total_steps": 3780, "loss": 0.2085, "lr": 4.97996005609215e-06, "epoch": 5.555555555555555, "percentage": 79.37, "elapsed_time": "5:15:53", "remaining_time": "1:22:07"}
|
||||
{"current_steps": 3005, "total_steps": 3780, "loss": 0.2469, "lr": 4.919144677184377e-06, "epoch": 5.564814814814815, "percentage": 79.5, "elapsed_time": "5:16:36", "remaining_time": "1:21:39"}
|
||||
{"current_steps": 3010, "total_steps": 3780, "loss": 0.2381, "lr": 4.858650809647718e-06, "epoch": 5.574074074074074, "percentage": 79.63, "elapsed_time": "5:17:15", "remaining_time": "1:21:09"}
|
||||
{"current_steps": 3015, "total_steps": 3780, "loss": 0.2218, "lr": 4.798479743161443e-06, "epoch": 5.583333333333333, "percentage": 79.76, "elapsed_time": "5:17:44", "remaining_time": "1:20:37"}
|
||||
{"current_steps": 3020, "total_steps": 3780, "loss": 0.2512, "lr": 4.73863276052295e-06, "epoch": 5.592592592592593, "percentage": 79.89, "elapsed_time": "5:18:18", "remaining_time": "1:20:06"}
|
||||
{"current_steps": 3025, "total_steps": 3780, "loss": 0.2318, "lr": 4.679111137620442e-06, "epoch": 5.601851851851852, "percentage": 80.03, "elapsed_time": "5:18:49", "remaining_time": "1:19:34"}
|
||||
{"current_steps": 3030, "total_steps": 3780, "loss": 0.2177, "lr": 4.619916143405734e-06, "epoch": 5.611111111111111, "percentage": 80.16, "elapsed_time": "5:19:23", "remaining_time": "1:19:03"}
|
||||
{"current_steps": 3035, "total_steps": 3780, "loss": 0.1926, "lr": 4.561049039867167e-06, "epoch": 5.62037037037037, "percentage": 80.29, "elapsed_time": "5:19:50", "remaining_time": "1:18:30"}
|
||||
{"current_steps": 3040, "total_steps": 3780, "loss": 0.2243, "lr": 4.502511082002748e-06, "epoch": 5.62962962962963, "percentage": 80.42, "elapsed_time": "5:20:15", "remaining_time": "1:17:57"}
|
||||
{"current_steps": 3045, "total_steps": 3780, "loss": 0.2423, "lr": 4.44430351779334e-06, "epoch": 5.638888888888889, "percentage": 80.56, "elapsed_time": "5:20:51", "remaining_time": "1:17:27"}
|
||||
{"current_steps": 3050, "total_steps": 3780, "loss": 0.226, "lr": 4.386427588176121e-06, "epoch": 5.648148148148148, "percentage": 80.69, "elapsed_time": "5:21:26", "remaining_time": "1:16:56"}
|
||||
{"current_steps": 3055, "total_steps": 3780, "loss": 0.2337, "lr": 4.328884527018067e-06, "epoch": 5.657407407407407, "percentage": 80.82, "elapsed_time": "5:22:02", "remaining_time": "1:16:25"}
|
||||
{"current_steps": 3060, "total_steps": 3780, "loss": 0.2347, "lr": 4.271675561089676e-06, "epoch": 5.666666666666667, "percentage": 80.95, "elapsed_time": "5:22:31", "remaining_time": "1:15:53"}
|
||||
{"current_steps": 3065, "total_steps": 3780, "loss": 0.2435, "lr": 4.214801910038831e-06, "epoch": 5.675925925925926, "percentage": 81.08, "elapsed_time": "5:23:00", "remaining_time": "1:15:20"}
|
||||
{"current_steps": 3070, "total_steps": 3780, "loss": 0.2401, "lr": 4.1582647863647565e-06, "epoch": 5.685185185185185, "percentage": 81.22, "elapsed_time": "5:23:38", "remaining_time": "1:14:50"}
|
||||
{"current_steps": 3075, "total_steps": 3780, "loss": 0.2376, "lr": 4.102065395392208e-06, "epoch": 5.694444444444445, "percentage": 81.35, "elapsed_time": "5:24:15", "remaining_time": "1:14:20"}
|
||||
{"current_steps": 3080, "total_steps": 3780, "loss": 0.2578, "lr": 4.04620493524575e-06, "epoch": 5.703703703703704, "percentage": 81.48, "elapsed_time": "5:24:41", "remaining_time": "1:13:47"}
|
||||
{"current_steps": 3085, "total_steps": 3780, "loss": 0.2128, "lr": 3.990684596824219e-06, "epoch": 5.712962962962963, "percentage": 81.61, "elapsed_time": "5:25:07", "remaining_time": "1:13:14"}
|
||||
{"current_steps": 3090, "total_steps": 3780, "loss": 0.2199, "lr": 3.93550556377535e-06, "epoch": 5.722222222222222, "percentage": 81.75, "elapsed_time": "5:25:34", "remaining_time": "1:12:42"}
|
||||
{"current_steps": 3095, "total_steps": 3780, "loss": 0.2339, "lr": 3.880669012470515e-06, "epoch": 5.731481481481482, "percentage": 81.88, "elapsed_time": "5:26:07", "remaining_time": "1:12:10"}
|
||||
{"current_steps": 3100, "total_steps": 3780, "loss": 0.2401, "lr": 3.826176111979673e-06, "epoch": 5.7407407407407405, "percentage": 82.01, "elapsed_time": "5:26:42", "remaining_time": "1:11:39"}
|
||||
{"current_steps": 3105, "total_steps": 3780, "loss": 0.2471, "lr": 3.7720280240464145e-06, "epoch": 5.75, "percentage": 82.14, "elapsed_time": "5:27:07", "remaining_time": "1:11:06"}
|
||||
{"current_steps": 3110, "total_steps": 3780, "loss": 0.2393, "lr": 3.7182259030632305e-06, "epoch": 5.7592592592592595, "percentage": 82.28, "elapsed_time": "5:27:35", "remaining_time": "1:10:34"}
|
||||
{"current_steps": 3115, "total_steps": 3780, "loss": 0.2176, "lr": 3.6647708960468696e-06, "epoch": 5.768518518518518, "percentage": 82.41, "elapsed_time": "5:28:05", "remaining_time": "1:10:02"}
|
||||
{"current_steps": 3120, "total_steps": 3780, "loss": 0.2306, "lr": 3.6116641426138933e-06, "epoch": 5.777777777777778, "percentage": 82.54, "elapsed_time": "5:28:40", "remaining_time": "1:09:31"}
|
||||
{"current_steps": 3125, "total_steps": 3780, "loss": 0.2301, "lr": 3.5589067749564054e-06, "epoch": 5.787037037037037, "percentage": 82.67, "elapsed_time": "5:29:17", "remaining_time": "1:09:01"}
|
||||
{"current_steps": 3130, "total_steps": 3780, "loss": 0.2149, "lr": 3.5064999178178648e-06, "epoch": 5.796296296296296, "percentage": 82.8, "elapsed_time": "5:29:47", "remaining_time": "1:08:29"}
|
||||
{"current_steps": 3135, "total_steps": 3780, "loss": 0.234, "lr": 3.454444688469165e-06, "epoch": 5.805555555555555, "percentage": 82.94, "elapsed_time": "5:30:11", "remaining_time": "1:07:56"}
|
||||
{"current_steps": 3140, "total_steps": 3780, "loss": 0.2275, "lr": 3.4027421966847675e-06, "epoch": 5.814814814814815, "percentage": 83.07, "elapsed_time": "5:30:44", "remaining_time": "1:07:24"}
|
||||
{"current_steps": 3145, "total_steps": 3780, "loss": 0.2155, "lr": 3.3513935447190595e-06, "epoch": 5.824074074074074, "percentage": 83.2, "elapsed_time": "5:31:09", "remaining_time": "1:06:51"}
|
||||
{"current_steps": 3150, "total_steps": 3780, "loss": 0.2603, "lr": 3.3003998272828676e-06, "epoch": 5.833333333333333, "percentage": 83.33, "elapsed_time": "5:31:43", "remaining_time": "1:06:20"}
|
||||
{"current_steps": 3155, "total_steps": 3780, "loss": 0.2349, "lr": 3.2497621315200958e-06, "epoch": 5.842592592592593, "percentage": 83.47, "elapsed_time": "5:32:15", "remaining_time": "1:05:49"}
|
||||
{"current_steps": 3160, "total_steps": 3780, "loss": 0.2078, "lr": 3.199481536984572e-06, "epoch": 5.851851851851852, "percentage": 83.6, "elapsed_time": "5:32:42", "remaining_time": "1:05:16"}
|
||||
{"current_steps": 3165, "total_steps": 3780, "loss": 0.2004, "lr": 3.149559115617009e-06, "epoch": 5.861111111111111, "percentage": 83.73, "elapsed_time": "5:33:11", "remaining_time": "1:04:44"}
|
||||
{"current_steps": 3170, "total_steps": 3780, "loss": 0.2272, "lr": 3.099995931722175e-06, "epoch": 5.87037037037037, "percentage": 83.86, "elapsed_time": "5:33:44", "remaining_time": "1:04:13"}
|
||||
{"current_steps": 3175, "total_steps": 3780, "loss": 0.218, "lr": 3.050793041946183e-06, "epoch": 5.87962962962963, "percentage": 83.99, "elapsed_time": "5:34:10", "remaining_time": "1:03:40"}
|
||||
{"current_steps": 3180, "total_steps": 3780, "loss": 0.2158, "lr": 3.001951495253972e-06, "epoch": 5.888888888888889, "percentage": 84.13, "elapsed_time": "5:34:45", "remaining_time": "1:03:09"}
|
||||
{"current_steps": 3185, "total_steps": 3780, "loss": 0.2123, "lr": 2.953472332906959e-06, "epoch": 5.898148148148148, "percentage": 84.26, "elapsed_time": "5:35:18", "remaining_time": "1:02:38"}
|
||||
{"current_steps": 3190, "total_steps": 3780, "loss": 0.2207, "lr": 2.905356588440811e-06, "epoch": 5.907407407407407, "percentage": 84.39, "elapsed_time": "5:35:45", "remaining_time": "1:02:05"}
|
||||
{"current_steps": 3195, "total_steps": 3780, "loss": 0.2117, "lr": 2.857605287643437e-06, "epoch": 5.916666666666667, "percentage": 84.52, "elapsed_time": "5:36:11", "remaining_time": "1:01:33"}
|
||||
{"current_steps": 3200, "total_steps": 3780, "loss": 0.2286, "lr": 2.8102194485331e-06, "epoch": 5.925925925925926, "percentage": 84.66, "elapsed_time": "5:36:45", "remaining_time": "1:01:02"}
|
||||
{"current_steps": 3205, "total_steps": 3780, "loss": 0.2345, "lr": 2.763200081336721e-06, "epoch": 5.935185185185185, "percentage": 84.79, "elapsed_time": "5:37:17", "remaining_time": "1:00:30"}
|
||||
{"current_steps": 3210, "total_steps": 3780, "loss": 0.2184, "lr": 2.7165481884683576e-06, "epoch": 5.944444444444445, "percentage": 84.92, "elapsed_time": "5:37:52", "remaining_time": "0:59:59"}
|
||||
{"current_steps": 3215, "total_steps": 3780, "loss": 0.2346, "lr": 2.6702647645077973e-06, "epoch": 5.953703703703704, "percentage": 85.05, "elapsed_time": "5:38:23", "remaining_time": "0:59:28"}
|
||||
{"current_steps": 3220, "total_steps": 3780, "loss": 0.2472, "lr": 2.6243507961793936e-06, "epoch": 5.962962962962963, "percentage": 85.19, "elapsed_time": "5:38:52", "remaining_time": "0:58:56"}
|
||||
{"current_steps": 3225, "total_steps": 3780, "loss": 0.2146, "lr": 2.5788072623309977e-06, "epoch": 5.972222222222222, "percentage": 85.32, "elapsed_time": "5:39:19", "remaining_time": "0:58:23"}
|
||||
{"current_steps": 3230, "total_steps": 3780, "loss": 0.2365, "lr": 2.5336351339131147e-06, "epoch": 5.981481481481482, "percentage": 85.45, "elapsed_time": "5:39:52", "remaining_time": "0:57:52"}
|
||||
{"current_steps": 3235, "total_steps": 3780, "loss": 0.2391, "lr": 2.488835373958185e-06, "epoch": 5.9907407407407405, "percentage": 85.58, "elapsed_time": "5:40:20", "remaining_time": "0:57:20"}
|
||||
{"current_steps": 3240, "total_steps": 3780, "loss": 0.2503, "lr": 2.444408937560059e-06, "epoch": 6.0, "percentage": 85.71, "elapsed_time": "5:40:50", "remaining_time": "0:56:48"}
|
||||
{"current_steps": 3245, "total_steps": 3780, "loss": 0.2234, "lr": 2.400356771853651e-06, "epoch": 6.0092592592592595, "percentage": 85.85, "elapsed_time": "5:41:20", "remaining_time": "0:56:16"}
|
||||
{"current_steps": 3250, "total_steps": 3780, "loss": 0.2167, "lr": 2.3566798159947157e-06, "epoch": 6.018518518518518, "percentage": 85.98, "elapsed_time": "5:41:55", "remaining_time": "0:55:45"}
|
||||
{"current_steps": 3255, "total_steps": 3780, "loss": 0.214, "lr": 2.3133790011398618e-06, "epoch": 6.027777777777778, "percentage": 86.11, "elapsed_time": "5:42:27", "remaining_time": "0:55:14"}
|
||||
{"current_steps": 3260, "total_steps": 3780, "loss": 0.2459, "lr": 2.2704552504266664e-06, "epoch": 6.037037037037037, "percentage": 86.24, "elapsed_time": "5:42:57", "remaining_time": "0:54:42"}
|
||||
{"current_steps": 3265, "total_steps": 3780, "loss": 0.2193, "lr": 2.2279094789540244e-06, "epoch": 6.046296296296297, "percentage": 86.38, "elapsed_time": "5:43:29", "remaining_time": "0:54:10"}
|
||||
{"current_steps": 3270, "total_steps": 3780, "loss": 0.2346, "lr": 2.185742593762614e-06, "epoch": 6.055555555555555, "percentage": 86.51, "elapsed_time": "5:44:01", "remaining_time": "0:53:39"}
|
||||
{"current_steps": 3275, "total_steps": 3780, "loss": 0.2106, "lr": 2.143955493815577e-06, "epoch": 6.064814814814815, "percentage": 86.64, "elapsed_time": "5:44:30", "remaining_time": "0:53:07"}
|
||||
{"current_steps": 3280, "total_steps": 3780, "loss": 0.2374, "lr": 2.1025490699793516e-06, "epoch": 6.074074074074074, "percentage": 86.77, "elapsed_time": "5:45:05", "remaining_time": "0:52:36"}
|
||||
{"current_steps": 3285, "total_steps": 3780, "loss": 0.2285, "lr": 2.0615242050046656e-06, "epoch": 6.083333333333333, "percentage": 86.9, "elapsed_time": "5:45:27", "remaining_time": "0:52:03"}
|
||||
{"current_steps": 3290, "total_steps": 3780, "loss": 0.2118, "lr": 2.020881773507739e-06, "epoch": 6.092592592592593, "percentage": 87.04, "elapsed_time": "5:45:58", "remaining_time": "0:51:31"}
|
||||
{"current_steps": 3295, "total_steps": 3780, "loss": 0.219, "lr": 1.9806226419516195e-06, "epoch": 6.101851851851852, "percentage": 87.17, "elapsed_time": "5:46:30", "remaining_time": "0:51:00"}
|
||||
{"current_steps": 3300, "total_steps": 3780, "loss": 0.2487, "lr": 1.9407476686277095e-06, "epoch": 6.111111111111111, "percentage": 87.3, "elapsed_time": "5:47:05", "remaining_time": "0:50:29"}
|
||||
{"current_steps": 3305, "total_steps": 3780, "loss": 0.2215, "lr": 1.9012577036374936e-06, "epoch": 6.12037037037037, "percentage": 87.43, "elapsed_time": "5:47:36", "remaining_time": "0:49:57"}
|
||||
{"current_steps": 3310, "total_steps": 3780, "loss": 0.2292, "lr": 1.8621535888743825e-06, "epoch": 6.12962962962963, "percentage": 87.57, "elapsed_time": "5:48:12", "remaining_time": "0:49:26"}
|
||||
{"current_steps": 3315, "total_steps": 3780, "loss": 0.228, "lr": 1.8234361580057802e-06, "epoch": 6.138888888888889, "percentage": 87.7, "elapsed_time": "5:48:47", "remaining_time": "0:48:55"}
|
||||
{"current_steps": 3320, "total_steps": 3780, "loss": 0.2415, "lr": 1.7851062364553184e-06, "epoch": 6.148148148148148, "percentage": 87.83, "elapsed_time": "5:49:25", "remaining_time": "0:48:24"}
|
||||
{"current_steps": 3325, "total_steps": 3780, "loss": 0.2387, "lr": 1.7471646413852439e-06, "epoch": 6.157407407407407, "percentage": 87.96, "elapsed_time": "5:49:56", "remaining_time": "0:47:53"}
|
||||
{"current_steps": 3330, "total_steps": 3780, "loss": 0.2165, "lr": 1.709612181678999e-06, "epoch": 6.166666666666667, "percentage": 88.1, "elapsed_time": "5:50:17", "remaining_time": "0:47:20"}
|
||||
{"current_steps": 3335, "total_steps": 3780, "loss": 0.2263, "lr": 1.6724496579239979e-06, "epoch": 6.175925925925926, "percentage": 88.23, "elapsed_time": "5:50:53", "remaining_time": "0:46:49"}
|
||||
{"current_steps": 3340, "total_steps": 3780, "loss": 0.2477, "lr": 1.6356778623945223e-06, "epoch": 6.185185185185185, "percentage": 88.36, "elapsed_time": "5:51:23", "remaining_time": "0:46:17"}
|
||||
{"current_steps": 3345, "total_steps": 3780, "loss": 0.2209, "lr": 1.5992975790348642e-06, "epoch": 6.194444444444445, "percentage": 88.49, "elapsed_time": "5:52:04", "remaining_time": "0:45:47"}
|
||||
{"current_steps": 3350, "total_steps": 3780, "loss": 0.216, "lr": 1.5633095834425983e-06, "epoch": 6.203703703703703, "percentage": 88.62, "elapsed_time": "5:52:30", "remaining_time": "0:45:14"}
|
||||
{"current_steps": 3355, "total_steps": 3780, "loss": 0.2231, "lr": 1.527714642852045e-06, "epoch": 6.212962962962963, "percentage": 88.76, "elapsed_time": "5:52:54", "remaining_time": "0:44:42"}
|
||||
{"current_steps": 3360, "total_steps": 3780, "loss": 0.2134, "lr": 1.492513516117915e-06, "epoch": 6.222222222222222, "percentage": 88.89, "elapsed_time": "5:53:28", "remaining_time": "0:44:11"}
|
||||
{"current_steps": 3365, "total_steps": 3780, "loss": 0.2151, "lr": 1.457706953699145e-06, "epoch": 6.231481481481482, "percentage": 89.02, "elapsed_time": "5:53:57", "remaining_time": "0:43:39"}
|
||||
{"current_steps": 3370, "total_steps": 3780, "loss": 0.1995, "lr": 1.423295697642868e-06, "epoch": 6.2407407407407405, "percentage": 89.15, "elapsed_time": "5:54:33", "remaining_time": "0:43:08"}
|
||||
{"current_steps": 3375, "total_steps": 3780, "loss": 0.2084, "lr": 1.3892804815686312e-06, "epoch": 6.25, "percentage": 89.29, "elapsed_time": "5:55:01", "remaining_time": "0:42:36"}
|
||||
{"current_steps": 3380, "total_steps": 3780, "loss": 0.2366, "lr": 1.35566203065272e-06, "epoch": 6.2592592592592595, "percentage": 89.42, "elapsed_time": "5:55:34", "remaining_time": "0:42:04"}
|
||||
{"current_steps": 3385, "total_steps": 3780, "loss": 0.2255, "lr": 1.3224410616127292e-06, "epoch": 6.268518518518518, "percentage": 89.55, "elapsed_time": "5:56:06", "remaining_time": "0:41:33"}
|
||||
{"current_steps": 3390, "total_steps": 3780, "loss": 0.225, "lr": 1.2896182826922577e-06, "epoch": 6.277777777777778, "percentage": 89.68, "elapsed_time": "5:56:33", "remaining_time": "0:41:01"}
|
||||
{"current_steps": 3395, "total_steps": 3780, "loss": 0.2108, "lr": 1.2571943936458197e-06, "epoch": 6.287037037037037, "percentage": 89.81, "elapsed_time": "5:57:12", "remaining_time": "0:40:30"}
|
||||
{"current_steps": 3400, "total_steps": 3780, "loss": 0.1997, "lr": 1.2251700857239412e-06, "epoch": 6.296296296296296, "percentage": 89.95, "elapsed_time": "5:57:41", "remaining_time": "0:39:58"}
|
||||
{"current_steps": 3405, "total_steps": 3780, "loss": 0.25, "lr": 1.1935460416583889e-06, "epoch": 6.305555555555555, "percentage": 90.08, "elapsed_time": "5:58:09", "remaining_time": "0:39:26"}
|
||||
{"current_steps": 3410, "total_steps": 3780, "loss": 0.2051, "lr": 1.162322935647655e-06, "epoch": 6.314814814814815, "percentage": 90.21, "elapsed_time": "5:58:34", "remaining_time": "0:38:54"}
|
||||
{"current_steps": 3415, "total_steps": 3780, "loss": 0.2374, "lr": 1.1315014333425455e-06, "epoch": 6.324074074074074, "percentage": 90.34, "elapsed_time": "5:59:06", "remaining_time": "0:38:22"}
|
||||
{"current_steps": 3420, "total_steps": 3780, "loss": 0.2205, "lr": 1.101082191832017e-06, "epoch": 6.333333333333333, "percentage": 90.48, "elapsed_time": "5:59:35", "remaining_time": "0:37:51"}
|
||||
{"current_steps": 3425, "total_steps": 3780, "loss": 0.2277, "lr": 1.0710658596291612e-06, "epoch": 6.342592592592593, "percentage": 90.61, "elapsed_time": "6:00:04", "remaining_time": "0:37:19"}
|
||||
{"current_steps": 3430, "total_steps": 3780, "loss": 0.2592, "lr": 1.0414530766573661e-06, "epoch": 6.351851851851852, "percentage": 90.74, "elapsed_time": "6:00:39", "remaining_time": "0:36:48"}
|
||||
{"current_steps": 3435, "total_steps": 3780, "loss": 0.2175, "lr": 1.0122444742366945e-06, "epoch": 6.361111111111111, "percentage": 90.87, "elapsed_time": "6:01:09", "remaining_time": "0:36:16"}
|
||||
{"current_steps": 3440, "total_steps": 3780, "loss": 0.228, "lr": 9.83440675070404e-07, "epoch": 6.37037037037037, "percentage": 91.01, "elapsed_time": "6:01:49", "remaining_time": "0:35:45"}
|
||||
{"current_steps": 3445, "total_steps": 3780, "loss": 0.2085, "lr": 9.550422932316938e-07, "epoch": 6.37962962962963, "percentage": 91.14, "elapsed_time": "6:02:19", "remaining_time": "0:35:14"}
|
||||
{"current_steps": 3450, "total_steps": 3780, "loss": 0.2299, "lr": 9.270499341505901e-07, "epoch": 6.388888888888889, "percentage": 91.27, "elapsed_time": "6:02:48", "remaining_time": "0:34:42"}
|
||||
{"current_steps": 3455, "total_steps": 3780, "loss": 0.2255, "lr": 8.994641946010474e-07, "epoch": 6.398148148148148, "percentage": 91.4, "elapsed_time": "6:03:23", "remaining_time": "0:34:10"}
|
||||
{"current_steps": 3460, "total_steps": 3780, "loss": 0.2151, "lr": 8.722856626882415e-07, "epoch": 6.407407407407407, "percentage": 91.53, "elapsed_time": "6:03:55", "remaining_time": "0:33:39"}
|
||||
{"current_steps": 3465, "total_steps": 3780, "loss": 0.2341, "lr": 8.455149178360012e-07, "epoch": 6.416666666666667, "percentage": 91.67, "elapsed_time": "6:04:26", "remaining_time": "0:33:07"}
|
||||
{"current_steps": 3470, "total_steps": 3780, "loss": 0.2231, "lr": 8.191525307744896e-07, "epoch": 6.425925925925926, "percentage": 91.8, "elapsed_time": "6:04:50", "remaining_time": "0:32:35"}
|
||||
{"current_steps": 3475, "total_steps": 3780, "loss": 0.2323, "lr": 7.931990635280052e-07, "epoch": 6.435185185185185, "percentage": 91.93, "elapsed_time": "6:05:17", "remaining_time": "0:32:03"}
|
||||
{"current_steps": 3480, "total_steps": 3780, "loss": 0.2077, "lr": 7.676550694030172e-07, "epoch": 6.444444444444445, "percentage": 92.06, "elapsed_time": "6:05:45", "remaining_time": "0:31:31"}
|
||||
{"current_steps": 3485, "total_steps": 3780, "loss": 0.2265, "lr": 7.425210929763738e-07, "epoch": 6.453703703703704, "percentage": 92.2, "elapsed_time": "6:06:19", "remaining_time": "0:31:00"}
|
||||
{"current_steps": 3490, "total_steps": 3780, "loss": 0.2288, "lr": 7.17797670083673e-07, "epoch": 6.462962962962963, "percentage": 92.33, "elapsed_time": "6:06:48", "remaining_time": "0:30:28"}
|
||||
{"current_steps": 3495, "total_steps": 3780, "loss": 0.2442, "lr": 6.934853278078635e-07, "epoch": 6.472222222222222, "percentage": 92.46, "elapsed_time": "6:07:19", "remaining_time": "0:29:57"}
|
||||
{"current_steps": 3500, "total_steps": 3780, "loss": 0.2404, "lr": 6.695845844679816e-07, "epoch": 6.481481481481482, "percentage": 92.59, "elapsed_time": "6:07:53", "remaining_time": "0:29:25"}
|
||||
{"current_steps": 3505, "total_steps": 3780, "loss": 0.2194, "lr": 6.460959496081276e-07, "epoch": 6.4907407407407405, "percentage": 92.72, "elapsed_time": "6:08:19", "remaining_time": "0:28:53"}
|
||||
{"current_steps": 3510, "total_steps": 3780, "loss": 0.2537, "lr": 6.230199239865808e-07, "epoch": 6.5, "percentage": 92.86, "elapsed_time": "6:08:56", "remaining_time": "0:28:22"}
|
||||
{"current_steps": 3515, "total_steps": 3780, "loss": 0.2325, "lr": 6.003569995651304e-07, "epoch": 6.5092592592592595, "percentage": 92.99, "elapsed_time": "6:09:30", "remaining_time": "0:27:51"}
|
||||
{"current_steps": 3520, "total_steps": 3780, "loss": 0.2151, "lr": 5.781076594986035e-07, "epoch": 6.518518518518518, "percentage": 93.12, "elapsed_time": "6:09:57", "remaining_time": "0:27:19"}
|
||||
{"current_steps": 3525, "total_steps": 3780, "loss": 0.2125, "lr": 5.562723781245316e-07, "epoch": 6.527777777777778, "percentage": 93.25, "elapsed_time": "6:10:24", "remaining_time": "0:26:47"}
|
||||
{"current_steps": 3530, "total_steps": 3780, "loss": 0.2385, "lr": 5.348516209530741e-07, "epoch": 6.537037037037037, "percentage": 93.39, "elapsed_time": "6:10:49", "remaining_time": "0:26:15"}
|
||||
{"current_steps": 3535, "total_steps": 3780, "loss": 0.2104, "lr": 5.13845844657066e-07, "epoch": 6.546296296296296, "percentage": 93.52, "elapsed_time": "6:11:20", "remaining_time": "0:25:44"}
|
||||
{"current_steps": 3540, "total_steps": 3780, "loss": 0.218, "lr": 4.93255497062295e-07, "epoch": 6.555555555555555, "percentage": 93.65, "elapsed_time": "6:11:52", "remaining_time": "0:25:12"}
|
||||
{"current_steps": 3545, "total_steps": 3780, "loss": 0.2464, "lr": 4.730810171379574e-07, "epoch": 6.564814814814815, "percentage": 93.78, "elapsed_time": "6:12:32", "remaining_time": "0:24:41"}
|
||||
{"current_steps": 3550, "total_steps": 3780, "loss": 0.2461, "lr": 4.533228349872887e-07, "epoch": 6.574074074074074, "percentage": 93.92, "elapsed_time": "6:13:06", "remaining_time": "0:24:10"}
|
||||
{"current_steps": 3555, "total_steps": 3780, "loss": 0.21, "lr": 4.339813718384056e-07, "epoch": 6.583333333333333, "percentage": 94.05, "elapsed_time": "6:13:34", "remaining_time": "0:23:38"}
|
||||
{"current_steps": 3560, "total_steps": 3780, "loss": 0.2236, "lr": 4.1505704003531155e-07, "epoch": 6.592592592592593, "percentage": 94.18, "elapsed_time": "6:14:08", "remaining_time": "0:23:07"}
|
||||
{"current_steps": 3565, "total_steps": 3780, "loss": 0.2038, "lr": 3.965502430291235e-07, "epoch": 6.601851851851852, "percentage": 94.31, "elapsed_time": "6:14:36", "remaining_time": "0:22:35"}
|
||||
{"current_steps": 3570, "total_steps": 3780, "loss": 0.2127, "lr": 3.784613753694566e-07, "epoch": 6.611111111111111, "percentage": 94.44, "elapsed_time": "6:15:00", "remaining_time": "0:22:03"}
|
||||
{"current_steps": 3575, "total_steps": 3780, "loss": 0.2395, "lr": 3.607908226960155e-07, "epoch": 6.62037037037037, "percentage": 94.58, "elapsed_time": "6:15:32", "remaining_time": "0:21:32"}
|
||||
{"current_steps": 3580, "total_steps": 3780, "loss": 0.234, "lr": 3.4353896173038524e-07, "epoch": 6.62962962962963, "percentage": 94.71, "elapsed_time": "6:16:09", "remaining_time": "0:21:00"}
|
||||
{"current_steps": 3585, "total_steps": 3780, "loss": 0.2069, "lr": 3.2670616026797776e-07, "epoch": 6.638888888888889, "percentage": 94.84, "elapsed_time": "6:16:45", "remaining_time": "0:20:29"}
|
||||
{"current_steps": 3590, "total_steps": 3780, "loss": 0.1989, "lr": 3.102927771702091e-07, "epoch": 6.648148148148148, "percentage": 94.97, "elapsed_time": "6:17:15", "remaining_time": "0:19:57"}
|
||||
{"current_steps": 3595, "total_steps": 3780, "loss": 0.212, "lr": 2.942991623568436e-07, "epoch": 6.657407407407407, "percentage": 95.11, "elapsed_time": "6:17:43", "remaining_time": "0:19:26"}
|
||||
{"current_steps": 3600, "total_steps": 3780, "loss": 0.2308, "lr": 2.7872565679852414e-07, "epoch": 6.666666666666667, "percentage": 95.24, "elapsed_time": "6:18:11", "remaining_time": "0:18:54"}
|
||||
{"current_steps": 3605, "total_steps": 3780, "loss": 0.243, "lr": 2.635725925095245e-07, "epoch": 6.675925925925926, "percentage": 95.37, "elapsed_time": "6:18:35", "remaining_time": "0:18:22"}
|
||||
{"current_steps": 3610, "total_steps": 3780, "loss": 0.2013, "lr": 2.4884029254064636e-07, "epoch": 6.685185185185185, "percentage": 95.5, "elapsed_time": "6:19:03", "remaining_time": "0:17:51"}
|
||||
{"current_steps": 3615, "total_steps": 3780, "loss": 0.2199, "lr": 2.3452907097235355e-07, "epoch": 6.694444444444445, "percentage": 95.63, "elapsed_time": "6:19:35", "remaining_time": "0:17:19"}
|
||||
{"current_steps": 3620, "total_steps": 3780, "loss": 0.2168, "lr": 2.2063923290805756e-07, "epoch": 6.703703703703704, "percentage": 95.77, "elapsed_time": "6:20:07", "remaining_time": "0:16:48"}
|
||||
{"current_steps": 3625, "total_steps": 3780, "loss": 0.1968, "lr": 2.0717107446762696e-07, "epoch": 6.712962962962963, "percentage": 95.9, "elapsed_time": "6:20:31", "remaining_time": "0:16:16"}
|
||||
{"current_steps": 3630, "total_steps": 3780, "loss": 0.2174, "lr": 1.9412488278107044e-07, "epoch": 6.722222222222222, "percentage": 96.03, "elapsed_time": "6:21:01", "remaining_time": "0:15:44"}
|
||||
{"current_steps": 3635, "total_steps": 3780, "loss": 0.2248, "lr": 1.8150093598240825e-07, "epoch": 6.731481481481482, "percentage": 96.16, "elapsed_time": "6:21:36", "remaining_time": "0:15:13"}
|
||||
{"current_steps": 3640, "total_steps": 3780, "loss": 0.2314, "lr": 1.69299503203757e-07, "epoch": 6.7407407407407405, "percentage": 96.3, "elapsed_time": "6:22:06", "remaining_time": "0:14:41"}
|
||||
{"current_steps": 3645, "total_steps": 3780, "loss": 0.2078, "lr": 1.5752084456957416e-07, "epoch": 6.75, "percentage": 96.43, "elapsed_time": "6:22:33", "remaining_time": "0:14:10"}
|
||||
{"current_steps": 3650, "total_steps": 3780, "loss": 0.2194, "lr": 1.4616521119112937e-07, "epoch": 6.7592592592592595, "percentage": 96.56, "elapsed_time": "6:23:02", "remaining_time": "0:13:38"}
|
||||
{"current_steps": 3655, "total_steps": 3780, "loss": 0.2109, "lr": 1.3523284516113955e-07, "epoch": 6.768518518518518, "percentage": 96.69, "elapsed_time": "6:23:30", "remaining_time": "0:13:06"}
|
||||
{"current_steps": 3660, "total_steps": 3780, "loss": 0.2034, "lr": 1.2472397954861549e-07, "epoch": 6.777777777777778, "percentage": 96.83, "elapsed_time": "6:23:57", "remaining_time": "0:12:35"}
|
||||
{"current_steps": 3665, "total_steps": 3780, "loss": 0.2482, "lr": 1.1463883839388346e-07, "epoch": 6.787037037037037, "percentage": 96.96, "elapsed_time": "6:24:30", "remaining_time": "0:12:03"}
|
||||
{"current_steps": 3670, "total_steps": 3780, "loss": 0.2226, "lr": 1.0497763670382022e-07, "epoch": 6.796296296296296, "percentage": 97.09, "elapsed_time": "6:24:59", "remaining_time": "0:11:32"}
|
||||
{"current_steps": 3675, "total_steps": 3780, "loss": 0.246, "lr": 9.574058044725665e-08, "epoch": 6.805555555555555, "percentage": 97.22, "elapsed_time": "6:25:29", "remaining_time": "0:11:00"}
|
||||
{"current_steps": 3680, "total_steps": 3780, "loss": 0.2129, "lr": 8.692786655060348e-08, "epoch": 6.814814814814815, "percentage": 97.35, "elapsed_time": "6:26:02", "remaining_time": "0:10:29"}
|
||||
{"current_steps": 3685, "total_steps": 3780, "loss": 0.2151, "lr": 7.853968289363245e-08, "epoch": 6.824074074074074, "percentage": 97.49, "elapsed_time": "6:26:27", "remaining_time": "0:09:57"}
|
||||
{"current_steps": 3690, "total_steps": 3780, "loss": 0.1961, "lr": 7.057620830548617e-08, "epoch": 6.833333333333333, "percentage": 97.62, "elapsed_time": "6:26:54", "remaining_time": "0:09:26"}
|
||||
{"current_steps": 3695, "total_steps": 3780, "loss": 0.2292, "lr": 6.30376125608656e-08, "epoch": 6.842592592592593, "percentage": 97.75, "elapsed_time": "6:27:23", "remaining_time": "0:08:54"}
|
||||
{"current_steps": 3700, "total_steps": 3780, "loss": 0.2155, "lr": 5.592405637639742e-08, "epoch": 6.851851851851852, "percentage": 97.88, "elapsed_time": "6:27:49", "remaining_time": "0:08:23"}
|
||||
{"current_steps": 3705, "total_steps": 3780, "loss": 0.2649, "lr": 4.923569140722118e-08, "epoch": 6.861111111111111, "percentage": 98.02, "elapsed_time": "6:28:23", "remaining_time": "0:07:51"}
|
||||
{"current_steps": 3710, "total_steps": 3780, "loss": 0.2319, "lr": 4.2972660243749686e-08, "epoch": 6.87037037037037, "percentage": 98.15, "elapsed_time": "6:28:54", "remaining_time": "0:07:20"}
|
||||
{"current_steps": 3715, "total_steps": 3780, "loss": 0.2415, "lr": 3.7135096408631443e-08, "epoch": 6.87962962962963, "percentage": 98.28, "elapsed_time": "6:29:36", "remaining_time": "0:06:49"}
|
||||
{"current_steps": 3720, "total_steps": 3780, "loss": 0.2267, "lr": 3.172312435390401e-08, "epoch": 6.888888888888889, "percentage": 98.41, "elapsed_time": "6:30:09", "remaining_time": "0:06:17"}
|
||||
{"current_steps": 3725, "total_steps": 3780, "loss": 0.2181, "lr": 2.673685945833615e-08, "epoch": 6.898148148148148, "percentage": 98.54, "elapsed_time": "6:30:40", "remaining_time": "0:05:46"}
|
||||
{"current_steps": 3730, "total_steps": 3780, "loss": 0.2125, "lr": 2.2176408024974228e-08, "epoch": 6.907407407407407, "percentage": 98.68, "elapsed_time": "6:31:12", "remaining_time": "0:05:14"}
|
||||
{"current_steps": 3735, "total_steps": 3780, "loss": 0.2379, "lr": 1.8041867278875137e-08, "epoch": 6.916666666666667, "percentage": 98.81, "elapsed_time": "6:31:44", "remaining_time": "0:04:43"}
|
||||
{"current_steps": 3740, "total_steps": 3780, "loss": 0.2166, "lr": 1.4333325365030181e-08, "epoch": 6.925925925925926, "percentage": 98.94, "elapsed_time": "6:32:17", "remaining_time": "0:04:11"}
|
||||
{"current_steps": 3745, "total_steps": 3780, "loss": 0.238, "lr": 1.1050861346488806e-08, "epoch": 6.935185185185185, "percentage": 99.07, "elapsed_time": "6:32:49", "remaining_time": "0:03:40"}
|
||||
{"current_steps": 3750, "total_steps": 3780, "loss": 0.2428, "lr": 8.194545202666604e-09, "epoch": 6.944444444444445, "percentage": 99.21, "elapsed_time": "6:33:21", "remaining_time": "0:03:08"}
|
||||
{"current_steps": 3755, "total_steps": 3780, "loss": 0.1914, "lr": 5.76443782786873e-09, "epoch": 6.953703703703704, "percentage": 99.34, "elapsed_time": "6:33:48", "remaining_time": "0:02:37"}
|
||||
{"current_steps": 3760, "total_steps": 3780, "loss": 0.2195, "lr": 3.760591029973171e-09, "epoch": 6.962962962962963, "percentage": 99.47, "elapsed_time": "6:34:21", "remaining_time": "0:02:05"}
|
||||
{"current_steps": 3765, "total_steps": 3780, "loss": 0.2229, "lr": 2.1830475293360686e-09, "epoch": 6.972222222222222, "percentage": 99.6, "elapsed_time": "6:34:48", "remaining_time": "0:01:34"}
|
||||
{"current_steps": 3770, "total_steps": 3780, "loss": 0.2245, "lr": 1.0318409578835564e-09, "epoch": 6.981481481481482, "percentage": 99.74, "elapsed_time": "6:35:27", "remaining_time": "0:01:02"}
|
||||
{"current_steps": 3775, "total_steps": 3780, "loss": 0.2235, "lr": 3.069958583856725e-10, "epoch": 6.9907407407407405, "percentage": 99.87, "elapsed_time": "6:36:03", "remaining_time": "0:00:31"}
|
||||
{"current_steps": 3780, "total_steps": 3780, "loss": 0.2239, "lr": 8.527683943437837e-12, "epoch": 7.0, "percentage": 100.0, "elapsed_time": "6:36:34", "remaining_time": "0:00:00"}
|
||||
{"current_steps": 3780, "total_steps": 3780, "epoch": 7.0, "percentage": 100.0, "elapsed_time": "6:36:45", "remaining_time": "0:00:00"}
|
||||
BIN
training_loss.png
Normal file
BIN
training_loss.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 46 KiB |
1
vocab.json
Normal file
1
vocab.json
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user