初始化项目,由ModelHub XC社区提供模型
Model: DCAgent/a1-agenttuning_kg Source: Original Platform
This commit is contained in:
36
.gitattributes
vendored
Normal file
36
.gitattributes
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||
60
README.md
Normal file
60
README.md
Normal file
@@ -0,0 +1,60 @@
|
||||
---
|
||||
library_name: transformers
|
||||
license: other
|
||||
base_model: Qwen/Qwen3-8B
|
||||
tags:
|
||||
- llama-factory
|
||||
- full
|
||||
- generated_from_trainer
|
||||
model-index:
|
||||
- name: sft_a1_agenttuning_kg__Qwen3-8B
|
||||
results: []
|
||||
---
|
||||
|
||||
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
||||
should probably proofread and complete it, then remove this comment. -->
|
||||
|
||||
# sft_a1_agenttuning_kg__Qwen3-8B
|
||||
|
||||
This model is a fine-tuned version of [Qwen/Qwen3-8B](https://huggingface.co/Qwen/Qwen3-8B) on the /e/scratch/jureap59/raoof1/sft_data/hf_hub/datasets--DCAgent--neulab-agenttuning-kg-sandboxes_glm_4.7_traces_jupiter/snapshots/c80a285cabf3716d58e581d3a513181f0413d543_thinking_preprocessed dataset.
|
||||
|
||||
## Model description
|
||||
|
||||
More information needed
|
||||
|
||||
## Intended uses & limitations
|
||||
|
||||
More information needed
|
||||
|
||||
## Training and evaluation data
|
||||
|
||||
More information needed
|
||||
|
||||
## Training procedure
|
||||
|
||||
### Training hyperparameters
|
||||
|
||||
The following hyperparameters were used during training:
|
||||
- learning_rate: 4e-05
|
||||
- train_batch_size: 1
|
||||
- eval_batch_size: 8
|
||||
- seed: 42
|
||||
- distributed_type: multi-GPU
|
||||
- num_devices: 16
|
||||
- total_train_batch_size: 16
|
||||
- total_eval_batch_size: 128
|
||||
- optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.98) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
||||
- lr_scheduler_type: cosine
|
||||
- lr_scheduler_warmup_ratio: 0.1
|
||||
- num_epochs: 7.0
|
||||
|
||||
### Training results
|
||||
|
||||
|
||||
|
||||
### Framework versions
|
||||
|
||||
- Transformers 4.57.6
|
||||
- Pytorch 2.9.1+cu130
|
||||
- Datasets 4.7.0
|
||||
- Tokenizers 0.22.2
|
||||
28
added_tokens.json
Normal file
28
added_tokens.json
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"</think>": 151668,
|
||||
"</tool_call>": 151658,
|
||||
"</tool_response>": 151666,
|
||||
"<think>": 151667,
|
||||
"<tool_call>": 151657,
|
||||
"<tool_response>": 151665,
|
||||
"<|box_end|>": 151649,
|
||||
"<|box_start|>": 151648,
|
||||
"<|endoftext|>": 151643,
|
||||
"<|file_sep|>": 151664,
|
||||
"<|fim_middle|>": 151660,
|
||||
"<|fim_pad|>": 151662,
|
||||
"<|fim_prefix|>": 151659,
|
||||
"<|fim_suffix|>": 151661,
|
||||
"<|im_end|>": 151645,
|
||||
"<|im_start|>": 151644,
|
||||
"<|image_pad|>": 151655,
|
||||
"<|object_ref_end|>": 151647,
|
||||
"<|object_ref_start|>": 151646,
|
||||
"<|quad_end|>": 151651,
|
||||
"<|quad_start|>": 151650,
|
||||
"<|repo_name|>": 151663,
|
||||
"<|video_pad|>": 151656,
|
||||
"<|vision_end|>": 151653,
|
||||
"<|vision_pad|>": 151654,
|
||||
"<|vision_start|>": 151652
|
||||
}
|
||||
16
all_results.json
Normal file
16
all_results.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"achieved_tflops_per_gpu": 0.0035238641201377078,
|
||||
"achieved_tflops_per_gpu_theoretical": 443.815497729835,
|
||||
"epoch": 7.0,
|
||||
"loss_nan_ranks": 0,
|
||||
"loss_rank_avg": 0.26912549138069153,
|
||||
"mfu_percent": 0.0002490363335786366,
|
||||
"mfu_percent_theoretical": 31.365052843097878,
|
||||
"total_flos": 1898150523305984.0,
|
||||
"train_loss": 0.2966247021476157,
|
||||
"train_runtime": 33665.9995,
|
||||
"train_samples_per_second": 2.094,
|
||||
"train_steps_per_second": 0.131,
|
||||
"valid_targets_mean": 6379.4,
|
||||
"valid_targets_min": 827
|
||||
}
|
||||
89
chat_template.jinja
Normal file
89
chat_template.jinja
Normal file
@@ -0,0 +1,89 @@
|
||||
{%- if tools %}
|
||||
{{- '<|im_start|>system\n' }}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- messages[0].content + '\n\n' }}
|
||||
{%- endif %}
|
||||
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
||||
{%- for tool in tools %}
|
||||
{{- "\n" }}
|
||||
{{- tool | tojson }}
|
||||
{%- endfor %}
|
||||
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
||||
{%- else %}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
|
||||
{%- for message in messages[::-1] %}
|
||||
{%- set index = (messages|length - 1) - loop.index0 %}
|
||||
{%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
|
||||
{%- set ns.multi_step_tool = false %}
|
||||
{%- set ns.last_query_index = index %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- for message in messages %}
|
||||
{%- if message.content is string %}
|
||||
{%- set content = message.content %}
|
||||
{%- else %}
|
||||
{%- set content = '' %}
|
||||
{%- endif %}
|
||||
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
|
||||
{%- elif message.role == "assistant" %}
|
||||
{%- set reasoning_content = '' %}
|
||||
{%- if message.reasoning_content is string %}
|
||||
{%- set reasoning_content = message.reasoning_content %}
|
||||
{%- else %}
|
||||
{%- if '</think>' in content %}
|
||||
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
||||
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- if loop.index0 > ns.last_query_index %}
|
||||
{%- if loop.last or (not loop.last and reasoning_content) %}
|
||||
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- if message.tool_calls %}
|
||||
{%- for tool_call in message.tool_calls %}
|
||||
{%- if (loop.first and content) or (not loop.first) %}
|
||||
{{- '\n' }}
|
||||
{%- endif %}
|
||||
{%- if tool_call.function %}
|
||||
{%- set tool_call = tool_call.function %}
|
||||
{%- endif %}
|
||||
{{- '<tool_call>\n{"name": "' }}
|
||||
{{- tool_call.name }}
|
||||
{{- '", "arguments": ' }}
|
||||
{%- if tool_call.arguments is string %}
|
||||
{{- tool_call.arguments }}
|
||||
{%- else %}
|
||||
{{- tool_call.arguments | tojson }}
|
||||
{%- endif %}
|
||||
{{- '}\n</tool_call>' }}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- elif message.role == "tool" %}
|
||||
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
||||
{{- '<|im_start|>user' }}
|
||||
{%- endif %}
|
||||
{{- '\n<tool_response>\n' }}
|
||||
{{- content }}
|
||||
{{- '\n</tool_response>' }}
|
||||
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- if add_generation_prompt %}
|
||||
{{- '<|im_start|>assistant\n' }}
|
||||
{%- if enable_thinking is defined and enable_thinking is false %}
|
||||
{{- '<think>\n\n</think>\n\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
68
config.json
Normal file
68
config.json
Normal file
@@ -0,0 +1,68 @@
|
||||
{
|
||||
"architectures": [
|
||||
"Qwen3ForCausalLM"
|
||||
],
|
||||
"attention_bias": false,
|
||||
"attention_dropout": 0.0,
|
||||
"dtype": "bfloat16",
|
||||
"eos_token_id": 151645,
|
||||
"head_dim": 128,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 4096,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 12288,
|
||||
"layer_types": [
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention"
|
||||
],
|
||||
"max_position_embeddings": 40960,
|
||||
"max_window_layers": 36,
|
||||
"model_type": "qwen3",
|
||||
"num_attention_heads": 32,
|
||||
"num_hidden_layers": 36,
|
||||
"num_key_value_heads": 8,
|
||||
"pad_token_id": 151643,
|
||||
"rms_norm_eps": 1e-06,
|
||||
"rope_scaling": null,
|
||||
"rope_theta": 1000000,
|
||||
"sliding_window": null,
|
||||
"tie_word_embeddings": false,
|
||||
"transformers_version": "4.57.6",
|
||||
"use_cache": false,
|
||||
"use_sliding_window": false,
|
||||
"vocab_size": 151936
|
||||
}
|
||||
12
generation_config.json
Normal file
12
generation_config.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"do_sample": true,
|
||||
"eos_token_id": [
|
||||
151645,
|
||||
151643
|
||||
],
|
||||
"pad_token_id": 151643,
|
||||
"temperature": 0.6,
|
||||
"top_k": 20,
|
||||
"top_p": 0.95,
|
||||
"transformers_version": "4.57.6"
|
||||
}
|
||||
151388
merges.txt
Normal file
151388
merges.txt
Normal file
File diff suppressed because it is too large
Load Diff
3
model-00001-of-00004.safetensors
Normal file
3
model-00001-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b69d98dcfa63737d882b6291bcca6b7b0f68a502bccc0aa65fc657560509564f
|
||||
size 4902257696
|
||||
3
model-00002-of-00004.safetensors
Normal file
3
model-00002-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:bd0bbee28559e0404e6cfbbee3fb709ca4b5de79b4a1fba8d6b8af6ae588c639
|
||||
size 4915960368
|
||||
3
model-00003-of-00004.safetensors
Normal file
3
model-00003-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:4d71ca964cdea2f29c087a7ffdd762c209eabf44f00ef3afac76aa1b67302abd
|
||||
size 4983068496
|
||||
3
model-00004-of-00004.safetensors
Normal file
3
model-00004-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:61a900209fda4bb3280d095e06290849b200ff990e02293f74d49de4d3ab2f87
|
||||
size 1580230264
|
||||
407
model.safetensors.index.json
Normal file
407
model.safetensors.index.json
Normal file
@@ -0,0 +1,407 @@
|
||||
{
|
||||
"metadata": {
|
||||
"total_parameters": 308224,
|
||||
"total_size": 16381470720
|
||||
},
|
||||
"weight_map": {
|
||||
"lm_head.weight": "model-00004-of-00004.safetensors",
|
||||
"model.embed_tokens.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.35.input_layernorm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.q_norm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.norm.weight": "model-00004-of-00004.safetensors"
|
||||
}
|
||||
}
|
||||
12
run_summary.json
Normal file
12
run_summary.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"agent_name": "c80a285cabf3716d58e581d3a513181f0413d543_thinking_preprocessed",
|
||||
"training_start": null,
|
||||
"training_end": null,
|
||||
"created_by": "raoof1",
|
||||
"base_model_name": "Qwen/Qwen3-8B",
|
||||
"dataset_name": "/e/scratch/jureap59/raoof1/sft_data/hf_hub/datasets--DCAgent--neulab-agenttuning-kg-sandboxes_glm_4.7_traces_jupiter/snapshots/c80a285cabf3716d58e581d3a513181f0413d543_thinking_preprocessed",
|
||||
"training_type": "SFT",
|
||||
"training_parameters": "https://huggingface.co/DCAgent/a1-agenttuning_kg/blob/main/config.json",
|
||||
"wandb_link": null,
|
||||
"traces_location_s3": null
|
||||
}
|
||||
31
special_tokens_map.json
Normal file
31
special_tokens_map.json
Normal file
@@ -0,0 +1,31 @@
|
||||
{
|
||||
"additional_special_tokens": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|object_ref_start|>",
|
||||
"<|object_ref_end|>",
|
||||
"<|box_start|>",
|
||||
"<|box_end|>",
|
||||
"<|quad_start|>",
|
||||
"<|quad_end|>",
|
||||
"<|vision_start|>",
|
||||
"<|vision_end|>",
|
||||
"<|vision_pad|>",
|
||||
"<|image_pad|>",
|
||||
"<|video_pad|>"
|
||||
],
|
||||
"eos_token": {
|
||||
"content": "<|im_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"pad_token": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
}
|
||||
}
|
||||
BIN
tokenizer.json
(Stored with Git LFS)
Normal file
BIN
tokenizer.json
(Stored with Git LFS)
Normal file
Binary file not shown.
240
tokenizer_config.json
Normal file
240
tokenizer_config.json
Normal file
@@ -0,0 +1,240 @@
|
||||
{
|
||||
"add_bos_token": false,
|
||||
"add_prefix_space": false,
|
||||
"added_tokens_decoder": {
|
||||
"151643": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151644": {
|
||||
"content": "<|im_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151645": {
|
||||
"content": "<|im_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151646": {
|
||||
"content": "<|object_ref_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151647": {
|
||||
"content": "<|object_ref_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151648": {
|
||||
"content": "<|box_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151649": {
|
||||
"content": "<|box_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151650": {
|
||||
"content": "<|quad_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151651": {
|
||||
"content": "<|quad_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151652": {
|
||||
"content": "<|vision_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151653": {
|
||||
"content": "<|vision_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151654": {
|
||||
"content": "<|vision_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151655": {
|
||||
"content": "<|image_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151656": {
|
||||
"content": "<|video_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151657": {
|
||||
"content": "<tool_call>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151658": {
|
||||
"content": "</tool_call>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151659": {
|
||||
"content": "<|fim_prefix|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151660": {
|
||||
"content": "<|fim_middle|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151661": {
|
||||
"content": "<|fim_suffix|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151662": {
|
||||
"content": "<|fim_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151663": {
|
||||
"content": "<|repo_name|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151664": {
|
||||
"content": "<|file_sep|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151665": {
|
||||
"content": "<tool_response>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151666": {
|
||||
"content": "</tool_response>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151667": {
|
||||
"content": "<think>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151668": {
|
||||
"content": "</think>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
}
|
||||
},
|
||||
"additional_special_tokens": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|object_ref_start|>",
|
||||
"<|object_ref_end|>",
|
||||
"<|box_start|>",
|
||||
"<|box_end|>",
|
||||
"<|quad_start|>",
|
||||
"<|quad_end|>",
|
||||
"<|vision_start|>",
|
||||
"<|vision_end|>",
|
||||
"<|vision_pad|>",
|
||||
"<|image_pad|>",
|
||||
"<|video_pad|>"
|
||||
],
|
||||
"bos_token": null,
|
||||
"clean_up_tokenization_spaces": false,
|
||||
"eos_token": "<|im_end|>",
|
||||
"errors": "replace",
|
||||
"extra_special_tokens": {},
|
||||
"model_max_length": 32768,
|
||||
"pad_token": "<|endoftext|>",
|
||||
"padding_side": "right",
|
||||
"split_special_tokens": false,
|
||||
"tokenizer_class": "Qwen2Tokenizer",
|
||||
"unk_token": null
|
||||
}
|
||||
16
train_results.json
Normal file
16
train_results.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"achieved_tflops_per_gpu": 0.0035238641201377078,
|
||||
"achieved_tflops_per_gpu_theoretical": 443.815497729835,
|
||||
"epoch": 7.0,
|
||||
"loss_nan_ranks": 0,
|
||||
"loss_rank_avg": 0.26912549138069153,
|
||||
"mfu_percent": 0.0002490363335786366,
|
||||
"mfu_percent_theoretical": 31.365052843097878,
|
||||
"total_flos": 1898150523305984.0,
|
||||
"train_loss": 0.2966247021476157,
|
||||
"train_runtime": 33665.9995,
|
||||
"train_samples_per_second": 2.094,
|
||||
"train_steps_per_second": 0.131,
|
||||
"valid_targets_mean": 6379.4,
|
||||
"valid_targets_min": 827
|
||||
}
|
||||
883
trainer_log.jsonl
Normal file
883
trainer_log.jsonl
Normal file
@@ -0,0 +1,883 @@
|
||||
{"current_steps": 5, "total_steps": 4410, "loss": 0.9521, "lr": 3.6281179138322e-07, "epoch": 0.007936507936507936, "percentage": 0.11, "elapsed_time": "0:00:39", "remaining_time": "9:36:11"}
|
||||
{"current_steps": 10, "total_steps": 4410, "loss": 0.9331, "lr": 8.163265306122449e-07, "epoch": 0.015873015873015872, "percentage": 0.23, "elapsed_time": "0:01:19", "remaining_time": "9:42:58"}
|
||||
{"current_steps": 15, "total_steps": 4410, "loss": 0.9074, "lr": 1.26984126984127e-06, "epoch": 0.023809523809523808, "percentage": 0.34, "elapsed_time": "0:02:01", "remaining_time": "9:50:55"}
|
||||
{"current_steps": 20, "total_steps": 4410, "loss": 0.8451, "lr": 1.723356009070295e-06, "epoch": 0.031746031746031744, "percentage": 0.45, "elapsed_time": "0:02:35", "remaining_time": "9:28:47"}
|
||||
{"current_steps": 25, "total_steps": 4410, "loss": 0.7665, "lr": 2.17687074829932e-06, "epoch": 0.03968253968253968, "percentage": 0.57, "elapsed_time": "0:03:09", "remaining_time": "9:15:00"}
|
||||
{"current_steps": 30, "total_steps": 4410, "loss": 0.6856, "lr": 2.6303854875283447e-06, "epoch": 0.047619047619047616, "percentage": 0.68, "elapsed_time": "0:03:45", "remaining_time": "9:07:51"}
|
||||
{"current_steps": 35, "total_steps": 4410, "loss": 0.6668, "lr": 3.08390022675737e-06, "epoch": 0.05555555555555555, "percentage": 0.79, "elapsed_time": "0:04:24", "remaining_time": "9:11:58"}
|
||||
{"current_steps": 40, "total_steps": 4410, "loss": 0.6301, "lr": 3.537414965986395e-06, "epoch": 0.06349206349206349, "percentage": 0.91, "elapsed_time": "0:05:05", "remaining_time": "9:16:01"}
|
||||
{"current_steps": 45, "total_steps": 4410, "loss": 0.5935, "lr": 3.99092970521542e-06, "epoch": 0.07142857142857142, "percentage": 1.02, "elapsed_time": "0:05:42", "remaining_time": "9:13:01"}
|
||||
{"current_steps": 50, "total_steps": 4410, "loss": 0.5962, "lr": 4.444444444444444e-06, "epoch": 0.07936507936507936, "percentage": 1.13, "elapsed_time": "0:06:23", "remaining_time": "9:17:42"}
|
||||
{"current_steps": 55, "total_steps": 4410, "loss": 0.572, "lr": 4.897959183673469e-06, "epoch": 0.0873015873015873, "percentage": 1.25, "elapsed_time": "0:07:00", "remaining_time": "9:14:29"}
|
||||
{"current_steps": 60, "total_steps": 4410, "loss": 0.5371, "lr": 5.3514739229024945e-06, "epoch": 0.09523809523809523, "percentage": 1.36, "elapsed_time": "0:07:38", "remaining_time": "9:13:57"}
|
||||
{"current_steps": 65, "total_steps": 4410, "loss": 0.5277, "lr": 5.80498866213152e-06, "epoch": 0.10317460317460317, "percentage": 1.47, "elapsed_time": "0:08:17", "remaining_time": "9:14:47"}
|
||||
{"current_steps": 70, "total_steps": 4410, "loss": 0.5102, "lr": 6.258503401360545e-06, "epoch": 0.1111111111111111, "percentage": 1.59, "elapsed_time": "0:08:53", "remaining_time": "9:11:20"}
|
||||
{"current_steps": 75, "total_steps": 4410, "loss": 0.5007, "lr": 6.71201814058957e-06, "epoch": 0.11904761904761904, "percentage": 1.7, "elapsed_time": "0:09:30", "remaining_time": "9:09:54"}
|
||||
{"current_steps": 80, "total_steps": 4410, "loss": 0.4853, "lr": 7.165532879818595e-06, "epoch": 0.12698412698412698, "percentage": 1.81, "elapsed_time": "0:10:06", "remaining_time": "9:06:56"}
|
||||
{"current_steps": 85, "total_steps": 4410, "loss": 0.4609, "lr": 7.61904761904762e-06, "epoch": 0.1349206349206349, "percentage": 1.93, "elapsed_time": "0:10:49", "remaining_time": "9:11:09"}
|
||||
{"current_steps": 90, "total_steps": 4410, "loss": 0.4583, "lr": 8.072562358276645e-06, "epoch": 0.14285714285714285, "percentage": 2.04, "elapsed_time": "0:11:29", "remaining_time": "9:11:47"}
|
||||
{"current_steps": 95, "total_steps": 4410, "loss": 0.445, "lr": 8.52607709750567e-06, "epoch": 0.15079365079365079, "percentage": 2.15, "elapsed_time": "0:12:16", "remaining_time": "9:17:12"}
|
||||
{"current_steps": 100, "total_steps": 4410, "loss": 0.4215, "lr": 8.979591836734695e-06, "epoch": 0.15873015873015872, "percentage": 2.27, "elapsed_time": "0:12:52", "remaining_time": "9:15:14"}
|
||||
{"current_steps": 105, "total_steps": 4410, "loss": 0.4209, "lr": 9.43310657596372e-06, "epoch": 0.16666666666666666, "percentage": 2.38, "elapsed_time": "0:13:30", "remaining_time": "9:13:36"}
|
||||
{"current_steps": 110, "total_steps": 4410, "loss": 0.4035, "lr": 9.886621315192746e-06, "epoch": 0.1746031746031746, "percentage": 2.49, "elapsed_time": "0:14:10", "remaining_time": "9:13:51"}
|
||||
{"current_steps": 115, "total_steps": 4410, "loss": 0.406, "lr": 1.034013605442177e-05, "epoch": 0.18253968253968253, "percentage": 2.61, "elapsed_time": "0:14:44", "remaining_time": "9:10:41"}
|
||||
{"current_steps": 120, "total_steps": 4410, "loss": 0.399, "lr": 1.0793650793650794e-05, "epoch": 0.19047619047619047, "percentage": 2.72, "elapsed_time": "0:15:24", "remaining_time": "9:11:02"}
|
||||
{"current_steps": 125, "total_steps": 4410, "loss": 0.3777, "lr": 1.124716553287982e-05, "epoch": 0.1984126984126984, "percentage": 2.83, "elapsed_time": "0:16:02", "remaining_time": "9:10:09"}
|
||||
{"current_steps": 130, "total_steps": 4410, "loss": 0.3897, "lr": 1.1700680272108845e-05, "epoch": 0.20634920634920634, "percentage": 2.95, "elapsed_time": "0:16:40", "remaining_time": "9:09:03"}
|
||||
{"current_steps": 135, "total_steps": 4410, "loss": 0.389, "lr": 1.215419501133787e-05, "epoch": 0.21428571428571427, "percentage": 3.06, "elapsed_time": "0:17:18", "remaining_time": "9:08:11"}
|
||||
{"current_steps": 140, "total_steps": 4410, "loss": 0.3841, "lr": 1.2607709750566895e-05, "epoch": 0.2222222222222222, "percentage": 3.17, "elapsed_time": "0:17:56", "remaining_time": "9:07:02"}
|
||||
{"current_steps": 145, "total_steps": 4410, "loss": 0.3702, "lr": 1.3061224489795918e-05, "epoch": 0.23015873015873015, "percentage": 3.29, "elapsed_time": "0:18:30", "remaining_time": "9:04:23"}
|
||||
{"current_steps": 150, "total_steps": 4410, "loss": 0.3688, "lr": 1.3514739229024945e-05, "epoch": 0.23809523809523808, "percentage": 3.4, "elapsed_time": "0:19:12", "remaining_time": "9:05:30"}
|
||||
{"current_steps": 155, "total_steps": 4410, "loss": 0.3703, "lr": 1.3968253968253968e-05, "epoch": 0.24603174603174602, "percentage": 3.51, "elapsed_time": "0:19:47", "remaining_time": "9:03:26"}
|
||||
{"current_steps": 160, "total_steps": 4410, "loss": 0.3697, "lr": 1.4421768707482994e-05, "epoch": 0.25396825396825395, "percentage": 3.63, "elapsed_time": "0:20:29", "remaining_time": "9:04:09"}
|
||||
{"current_steps": 165, "total_steps": 4410, "loss": 0.3666, "lr": 1.4875283446712018e-05, "epoch": 0.2619047619047619, "percentage": 3.74, "elapsed_time": "0:21:12", "remaining_time": "9:05:28"}
|
||||
{"current_steps": 170, "total_steps": 4410, "loss": 0.3536, "lr": 1.5328798185941044e-05, "epoch": 0.2698412698412698, "percentage": 3.85, "elapsed_time": "0:21:53", "remaining_time": "9:05:57"}
|
||||
{"current_steps": 175, "total_steps": 4410, "loss": 0.3685, "lr": 1.578231292517007e-05, "epoch": 0.2777777777777778, "percentage": 3.97, "elapsed_time": "0:22:26", "remaining_time": "9:03:11"}
|
||||
{"current_steps": 180, "total_steps": 4410, "loss": 0.3587, "lr": 1.6235827664399097e-05, "epoch": 0.2857142857142857, "percentage": 4.08, "elapsed_time": "0:23:09", "remaining_time": "9:04:03"}
|
||||
{"current_steps": 185, "total_steps": 4410, "loss": 0.3563, "lr": 1.668934240362812e-05, "epoch": 0.29365079365079366, "percentage": 4.2, "elapsed_time": "0:23:45", "remaining_time": "9:02:34"}
|
||||
{"current_steps": 190, "total_steps": 4410, "loss": 0.3623, "lr": 1.7142857142857142e-05, "epoch": 0.30158730158730157, "percentage": 4.31, "elapsed_time": "0:24:27", "remaining_time": "9:03:17"}
|
||||
{"current_steps": 195, "total_steps": 4410, "loss": 0.348, "lr": 1.759637188208617e-05, "epoch": 0.30952380952380953, "percentage": 4.42, "elapsed_time": "0:25:04", "remaining_time": "9:01:51"}
|
||||
{"current_steps": 200, "total_steps": 4410, "loss": 0.3581, "lr": 1.8049886621315194e-05, "epoch": 0.31746031746031744, "percentage": 4.54, "elapsed_time": "0:25:41", "remaining_time": "9:00:43"}
|
||||
{"current_steps": 205, "total_steps": 4410, "loss": 0.3324, "lr": 1.8503401360544218e-05, "epoch": 0.3253968253968254, "percentage": 4.65, "elapsed_time": "0:26:20", "remaining_time": "9:00:11"}
|
||||
{"current_steps": 210, "total_steps": 4410, "loss": 0.3588, "lr": 1.8956916099773243e-05, "epoch": 0.3333333333333333, "percentage": 4.76, "elapsed_time": "0:26:58", "remaining_time": "8:59:23"}
|
||||
{"current_steps": 215, "total_steps": 4410, "loss": 0.3488, "lr": 1.941043083900227e-05, "epoch": 0.3412698412698413, "percentage": 4.88, "elapsed_time": "0:27:37", "remaining_time": "8:59:01"}
|
||||
{"current_steps": 220, "total_steps": 4410, "loss": 0.3472, "lr": 1.9863945578231295e-05, "epoch": 0.3492063492063492, "percentage": 4.99, "elapsed_time": "0:28:16", "remaining_time": "8:58:23"}
|
||||
{"current_steps": 225, "total_steps": 4410, "loss": 0.3371, "lr": 2.031746031746032e-05, "epoch": 0.35714285714285715, "percentage": 5.1, "elapsed_time": "0:28:57", "remaining_time": "8:58:31"}
|
||||
{"current_steps": 230, "total_steps": 4410, "loss": 0.354, "lr": 2.0770975056689343e-05, "epoch": 0.36507936507936506, "percentage": 5.22, "elapsed_time": "0:29:35", "remaining_time": "8:57:46"}
|
||||
{"current_steps": 235, "total_steps": 4410, "loss": 0.3442, "lr": 2.122448979591837e-05, "epoch": 0.373015873015873, "percentage": 5.33, "elapsed_time": "0:30:17", "remaining_time": "8:58:02"}
|
||||
{"current_steps": 240, "total_steps": 4410, "loss": 0.3397, "lr": 2.1678004535147395e-05, "epoch": 0.38095238095238093, "percentage": 5.44, "elapsed_time": "0:30:55", "remaining_time": "8:57:17"}
|
||||
{"current_steps": 245, "total_steps": 4410, "loss": 0.3471, "lr": 2.213151927437642e-05, "epoch": 0.3888888888888889, "percentage": 5.56, "elapsed_time": "0:31:38", "remaining_time": "8:57:51"}
|
||||
{"current_steps": 250, "total_steps": 4410, "loss": 0.3376, "lr": 2.2585034013605444e-05, "epoch": 0.3968253968253968, "percentage": 5.67, "elapsed_time": "0:32:19", "remaining_time": "8:57:46"}
|
||||
{"current_steps": 255, "total_steps": 4410, "loss": 0.3408, "lr": 2.3038548752834472e-05, "epoch": 0.40476190476190477, "percentage": 5.78, "elapsed_time": "0:32:54", "remaining_time": "8:56:10"}
|
||||
{"current_steps": 260, "total_steps": 4410, "loss": 0.3403, "lr": 2.3492063492063496e-05, "epoch": 0.4126984126984127, "percentage": 5.9, "elapsed_time": "0:33:29", "remaining_time": "8:54:41"}
|
||||
{"current_steps": 265, "total_steps": 4410, "loss": 0.3277, "lr": 2.394557823129252e-05, "epoch": 0.42063492063492064, "percentage": 6.01, "elapsed_time": "0:34:08", "remaining_time": "8:54:04"}
|
||||
{"current_steps": 270, "total_steps": 4410, "loss": 0.3379, "lr": 2.439909297052154e-05, "epoch": 0.42857142857142855, "percentage": 6.12, "elapsed_time": "0:34:50", "remaining_time": "8:54:09"}
|
||||
{"current_steps": 275, "total_steps": 4410, "loss": 0.344, "lr": 2.4852607709750566e-05, "epoch": 0.4365079365079365, "percentage": 6.24, "elapsed_time": "0:35:33", "remaining_time": "8:54:35"}
|
||||
{"current_steps": 280, "total_steps": 4410, "loss": 0.3283, "lr": 2.5306122448979597e-05, "epoch": 0.4444444444444444, "percentage": 6.35, "elapsed_time": "0:36:12", "remaining_time": "8:53:58"}
|
||||
{"current_steps": 285, "total_steps": 4410, "loss": 0.3427, "lr": 2.5759637188208618e-05, "epoch": 0.4523809523809524, "percentage": 6.46, "elapsed_time": "0:36:47", "remaining_time": "8:52:36"}
|
||||
{"current_steps": 290, "total_steps": 4410, "loss": 0.3395, "lr": 2.6213151927437642e-05, "epoch": 0.4603174603174603, "percentage": 6.58, "elapsed_time": "0:37:28", "remaining_time": "8:52:24"}
|
||||
{"current_steps": 295, "total_steps": 4410, "loss": 0.3357, "lr": 2.6666666666666667e-05, "epoch": 0.46825396825396826, "percentage": 6.69, "elapsed_time": "0:38:06", "remaining_time": "8:51:34"}
|
||||
{"current_steps": 300, "total_steps": 4410, "loss": 0.3254, "lr": 2.7120181405895694e-05, "epoch": 0.47619047619047616, "percentage": 6.8, "elapsed_time": "0:38:46", "remaining_time": "8:51:15"}
|
||||
{"current_steps": 305, "total_steps": 4410, "loss": 0.3126, "lr": 2.757369614512472e-05, "epoch": 0.48412698412698413, "percentage": 6.92, "elapsed_time": "0:39:20", "remaining_time": "8:49:36"}
|
||||
{"current_steps": 310, "total_steps": 4410, "loss": 0.314, "lr": 2.8027210884353743e-05, "epoch": 0.49206349206349204, "percentage": 7.03, "elapsed_time": "0:39:57", "remaining_time": "8:48:30"}
|
||||
{"current_steps": 315, "total_steps": 4410, "loss": 0.3331, "lr": 2.8480725623582767e-05, "epoch": 0.5, "percentage": 7.14, "elapsed_time": "0:40:41", "remaining_time": "8:48:54"}
|
||||
{"current_steps": 320, "total_steps": 4410, "loss": 0.3354, "lr": 2.893424036281179e-05, "epoch": 0.5079365079365079, "percentage": 7.26, "elapsed_time": "0:41:21", "remaining_time": "8:48:34"}
|
||||
{"current_steps": 325, "total_steps": 4410, "loss": 0.3285, "lr": 2.938775510204082e-05, "epoch": 0.5158730158730159, "percentage": 7.37, "elapsed_time": "0:41:57", "remaining_time": "8:47:25"}
|
||||
{"current_steps": 330, "total_steps": 4410, "loss": 0.316, "lr": 2.9841269841269844e-05, "epoch": 0.5238095238095238, "percentage": 7.48, "elapsed_time": "0:42:40", "remaining_time": "8:47:31"}
|
||||
{"current_steps": 335, "total_steps": 4410, "loss": 0.3291, "lr": 3.0294784580498868e-05, "epoch": 0.5317460317460317, "percentage": 7.6, "elapsed_time": "0:43:22", "remaining_time": "8:47:41"}
|
||||
{"current_steps": 340, "total_steps": 4410, "loss": 0.3351, "lr": 3.074829931972789e-05, "epoch": 0.5396825396825397, "percentage": 7.71, "elapsed_time": "0:44:00", "remaining_time": "8:46:52"}
|
||||
{"current_steps": 345, "total_steps": 4410, "loss": 0.338, "lr": 3.1201814058956924e-05, "epoch": 0.5476190476190477, "percentage": 7.82, "elapsed_time": "0:44:36", "remaining_time": "8:45:39"}
|
||||
{"current_steps": 350, "total_steps": 4410, "loss": 0.3296, "lr": 3.1655328798185945e-05, "epoch": 0.5555555555555556, "percentage": 7.94, "elapsed_time": "0:45:13", "remaining_time": "8:44:38"}
|
||||
{"current_steps": 355, "total_steps": 4410, "loss": 0.3199, "lr": 3.2108843537414965e-05, "epoch": 0.5634920634920635, "percentage": 8.05, "elapsed_time": "0:45:44", "remaining_time": "8:42:33"}
|
||||
{"current_steps": 360, "total_steps": 4410, "loss": 0.3263, "lr": 3.256235827664399e-05, "epoch": 0.5714285714285714, "percentage": 8.16, "elapsed_time": "0:46:23", "remaining_time": "8:41:56"}
|
||||
{"current_steps": 365, "total_steps": 4410, "loss": 0.3099, "lr": 3.3015873015873014e-05, "epoch": 0.5793650793650794, "percentage": 8.28, "elapsed_time": "0:46:55", "remaining_time": "8:40:03"}
|
||||
{"current_steps": 370, "total_steps": 4410, "loss": 0.3355, "lr": 3.346938775510204e-05, "epoch": 0.5873015873015873, "percentage": 8.39, "elapsed_time": "0:47:36", "remaining_time": "8:39:45"}
|
||||
{"current_steps": 375, "total_steps": 4410, "loss": 0.3134, "lr": 3.392290249433107e-05, "epoch": 0.5952380952380952, "percentage": 8.5, "elapsed_time": "0:48:14", "remaining_time": "8:39:07"}
|
||||
{"current_steps": 380, "total_steps": 4410, "loss": 0.318, "lr": 3.437641723356009e-05, "epoch": 0.6031746031746031, "percentage": 8.62, "elapsed_time": "0:48:52", "remaining_time": "8:38:17"}
|
||||
{"current_steps": 385, "total_steps": 4410, "loss": 0.3295, "lr": 3.482993197278912e-05, "epoch": 0.6111111111111112, "percentage": 8.73, "elapsed_time": "0:49:28", "remaining_time": "8:37:15"}
|
||||
{"current_steps": 390, "total_steps": 4410, "loss": 0.3283, "lr": 3.5283446712018146e-05, "epoch": 0.6190476190476191, "percentage": 8.84, "elapsed_time": "0:50:10", "remaining_time": "8:37:07"}
|
||||
{"current_steps": 395, "total_steps": 4410, "loss": 0.3272, "lr": 3.573696145124717e-05, "epoch": 0.626984126984127, "percentage": 8.96, "elapsed_time": "0:50:49", "remaining_time": "8:36:40"}
|
||||
{"current_steps": 400, "total_steps": 4410, "loss": 0.3279, "lr": 3.6190476190476195e-05, "epoch": 0.6349206349206349, "percentage": 9.07, "elapsed_time": "0:51:30", "remaining_time": "8:36:17"}
|
||||
{"current_steps": 405, "total_steps": 4410, "loss": 0.3124, "lr": 3.6643990929705216e-05, "epoch": 0.6428571428571429, "percentage": 9.18, "elapsed_time": "0:52:05", "remaining_time": "8:35:06"}
|
||||
{"current_steps": 410, "total_steps": 4410, "loss": 0.3223, "lr": 3.7097505668934243e-05, "epoch": 0.6507936507936508, "percentage": 9.3, "elapsed_time": "0:52:51", "remaining_time": "8:35:41"}
|
||||
{"current_steps": 415, "total_steps": 4410, "loss": 0.3254, "lr": 3.755102040816327e-05, "epoch": 0.6587301587301587, "percentage": 9.41, "elapsed_time": "0:53:32", "remaining_time": "8:35:27"}
|
||||
{"current_steps": 420, "total_steps": 4410, "loss": 0.3217, "lr": 3.800453514739229e-05, "epoch": 0.6666666666666666, "percentage": 9.52, "elapsed_time": "0:54:11", "remaining_time": "8:34:53"}
|
||||
{"current_steps": 425, "total_steps": 4410, "loss": 0.3274, "lr": 3.845804988662132e-05, "epoch": 0.6746031746031746, "percentage": 9.64, "elapsed_time": "0:54:53", "remaining_time": "8:34:38"}
|
||||
{"current_steps": 430, "total_steps": 4410, "loss": 0.3304, "lr": 3.891156462585034e-05, "epoch": 0.6825396825396826, "percentage": 9.75, "elapsed_time": "0:55:26", "remaining_time": "8:33:06"}
|
||||
{"current_steps": 435, "total_steps": 4410, "loss": 0.3172, "lr": 3.936507936507937e-05, "epoch": 0.6904761904761905, "percentage": 9.86, "elapsed_time": "0:55:56", "remaining_time": "8:31:14"}
|
||||
{"current_steps": 440, "total_steps": 4410, "loss": 0.3148, "lr": 3.9818594104308396e-05, "epoch": 0.6984126984126984, "percentage": 9.98, "elapsed_time": "0:56:34", "remaining_time": "8:30:25"}
|
||||
{"current_steps": 445, "total_steps": 4410, "loss": 0.3262, "lr": 3.999994361288785e-05, "epoch": 0.7063492063492064, "percentage": 10.09, "elapsed_time": "0:57:13", "remaining_time": "8:29:48"}
|
||||
{"current_steps": 450, "total_steps": 4410, "loss": 0.3216, "lr": 3.9999599026131644e-05, "epoch": 0.7142857142857143, "percentage": 10.2, "elapsed_time": "0:57:52", "remaining_time": "8:29:16"}
|
||||
{"current_steps": 455, "total_steps": 4410, "loss": 0.3143, "lr": 3.999894118418342e-05, "epoch": 0.7222222222222222, "percentage": 10.32, "elapsed_time": "0:58:31", "remaining_time": "8:28:44"}
|
||||
{"current_steps": 460, "total_steps": 4410, "loss": 0.328, "lr": 3.999797009734697e-05, "epoch": 0.7301587301587301, "percentage": 10.43, "elapsed_time": "0:59:09", "remaining_time": "8:28:00"}
|
||||
{"current_steps": 465, "total_steps": 4410, "loss": 0.3233, "lr": 3.999668578083253e-05, "epoch": 0.7380952380952381, "percentage": 10.54, "elapsed_time": "0:59:45", "remaining_time": "8:26:55"}
|
||||
{"current_steps": 470, "total_steps": 4410, "loss": 0.3025, "lr": 3.9995088254756434e-05, "epoch": 0.746031746031746, "percentage": 10.66, "elapsed_time": "1:00:20", "remaining_time": "8:25:53"}
|
||||
{"current_steps": 475, "total_steps": 4410, "loss": 0.3169, "lr": 3.999317754414084e-05, "epoch": 0.753968253968254, "percentage": 10.77, "elapsed_time": "1:00:58", "remaining_time": "8:25:08"}
|
||||
{"current_steps": 480, "total_steps": 4410, "loss": 0.3218, "lr": 3.999095367891337e-05, "epoch": 0.7619047619047619, "percentage": 10.88, "elapsed_time": "1:01:39", "remaining_time": "8:24:47"}
|
||||
{"current_steps": 485, "total_steps": 4410, "loss": 0.3133, "lr": 3.9988416693906563e-05, "epoch": 0.7698412698412699, "percentage": 11.0, "elapsed_time": "1:02:13", "remaining_time": "8:23:37"}
|
||||
{"current_steps": 490, "total_steps": 4410, "loss": 0.3277, "lr": 3.9985566628857425e-05, "epoch": 0.7777777777777778, "percentage": 11.11, "elapsed_time": "1:02:54", "remaining_time": "8:23:15"}
|
||||
{"current_steps": 495, "total_steps": 4410, "loss": 0.3239, "lr": 3.998240352840672e-05, "epoch": 0.7857142857142857, "percentage": 11.22, "elapsed_time": "1:03:34", "remaining_time": "8:22:51"}
|
||||
{"current_steps": 500, "total_steps": 4410, "loss": 0.3196, "lr": 3.997892744209833e-05, "epoch": 0.7936507936507936, "percentage": 11.34, "elapsed_time": "1:04:16", "remaining_time": "8:22:36"}
|
||||
{"current_steps": 505, "total_steps": 4410, "loss": 0.3244, "lr": 3.997513842437845e-05, "epoch": 0.8015873015873016, "percentage": 11.45, "elapsed_time": "1:04:55", "remaining_time": "8:21:59"}
|
||||
{"current_steps": 510, "total_steps": 4410, "loss": 0.317, "lr": 3.997103653459475e-05, "epoch": 0.8095238095238095, "percentage": 11.56, "elapsed_time": "1:05:34", "remaining_time": "8:21:29"}
|
||||
{"current_steps": 515, "total_steps": 4410, "loss": 0.3061, "lr": 3.996662183699541e-05, "epoch": 0.8174603174603174, "percentage": 11.68, "elapsed_time": "1:06:13", "remaining_time": "8:20:54"}
|
||||
{"current_steps": 520, "total_steps": 4410, "loss": 0.3042, "lr": 3.996189440072818e-05, "epoch": 0.8253968253968254, "percentage": 11.79, "elapsed_time": "1:06:53", "remaining_time": "8:20:24"}
|
||||
{"current_steps": 525, "total_steps": 4410, "loss": 0.3106, "lr": 3.9956854299839246e-05, "epoch": 0.8333333333333334, "percentage": 11.9, "elapsed_time": "1:07:39", "remaining_time": "8:20:41"}
|
||||
{"current_steps": 530, "total_steps": 4410, "loss": 0.3064, "lr": 3.9951501613272076e-05, "epoch": 0.8412698412698413, "percentage": 12.02, "elapsed_time": "1:08:17", "remaining_time": "8:19:53"}
|
||||
{"current_steps": 535, "total_steps": 4410, "loss": 0.3218, "lr": 3.994583642486618e-05, "epoch": 0.8492063492063492, "percentage": 12.13, "elapsed_time": "1:08:52", "remaining_time": "8:18:53"}
|
||||
{"current_steps": 540, "total_steps": 4410, "loss": 0.3118, "lr": 3.993985882335584e-05, "epoch": 0.8571428571428571, "percentage": 12.24, "elapsed_time": "1:09:35", "remaining_time": "8:18:44"}
|
||||
{"current_steps": 545, "total_steps": 4410, "loss": 0.3099, "lr": 3.993356890236866e-05, "epoch": 0.8650793650793651, "percentage": 12.36, "elapsed_time": "1:10:19", "remaining_time": "8:18:42"}
|
||||
{"current_steps": 550, "total_steps": 4410, "loss": 0.3248, "lr": 3.992696676042414e-05, "epoch": 0.873015873015873, "percentage": 12.47, "elapsed_time": "1:10:54", "remaining_time": "8:17:40"}
|
||||
{"current_steps": 555, "total_steps": 4410, "loss": 0.3125, "lr": 3.992005250093211e-05, "epoch": 0.8809523809523809, "percentage": 12.59, "elapsed_time": "1:11:32", "remaining_time": "8:16:53"}
|
||||
{"current_steps": 560, "total_steps": 4410, "loss": 0.3126, "lr": 3.991282623219113e-05, "epoch": 0.8888888888888888, "percentage": 12.7, "elapsed_time": "1:12:08", "remaining_time": "8:15:55"}
|
||||
{"current_steps": 565, "total_steps": 4410, "loss": 0.3086, "lr": 3.9905288067386776e-05, "epoch": 0.8968253968253969, "percentage": 12.81, "elapsed_time": "1:12:43", "remaining_time": "8:14:52"}
|
||||
{"current_steps": 570, "total_steps": 4410, "loss": 0.3184, "lr": 3.989743812458987e-05, "epoch": 0.9047619047619048, "percentage": 12.93, "elapsed_time": "1:13:15", "remaining_time": "8:13:34"}
|
||||
{"current_steps": 575, "total_steps": 4410, "loss": 0.3115, "lr": 3.9889276526754664e-05, "epoch": 0.9126984126984127, "percentage": 13.04, "elapsed_time": "1:13:54", "remaining_time": "8:12:57"}
|
||||
{"current_steps": 580, "total_steps": 4410, "loss": 0.3051, "lr": 3.988080340171685e-05, "epoch": 0.9206349206349206, "percentage": 13.15, "elapsed_time": "1:14:36", "remaining_time": "8:12:42"}
|
||||
{"current_steps": 585, "total_steps": 4410, "loss": 0.3096, "lr": 3.987201888219161e-05, "epoch": 0.9285714285714286, "percentage": 13.27, "elapsed_time": "1:15:10", "remaining_time": "8:11:32"}
|
||||
{"current_steps": 590, "total_steps": 4410, "loss": 0.3104, "lr": 3.986292310577153e-05, "epoch": 0.9365079365079365, "percentage": 13.38, "elapsed_time": "1:15:48", "remaining_time": "8:10:50"}
|
||||
{"current_steps": 595, "total_steps": 4410, "loss": 0.3121, "lr": 3.9853516214924416e-05, "epoch": 0.9444444444444444, "percentage": 13.49, "elapsed_time": "1:16:28", "remaining_time": "8:10:19"}
|
||||
{"current_steps": 600, "total_steps": 4410, "loss": 0.3149, "lr": 3.9843798356991096e-05, "epoch": 0.9523809523809523, "percentage": 13.61, "elapsed_time": "1:17:10", "remaining_time": "8:10:03"}
|
||||
{"current_steps": 605, "total_steps": 4410, "loss": 0.3057, "lr": 3.9833769684183104e-05, "epoch": 0.9603174603174603, "percentage": 13.72, "elapsed_time": "1:17:43", "remaining_time": "8:08:49"}
|
||||
{"current_steps": 610, "total_steps": 4410, "loss": 0.3099, "lr": 3.982343035358026e-05, "epoch": 0.9682539682539683, "percentage": 13.83, "elapsed_time": "1:18:09", "remaining_time": "8:06:52"}
|
||||
{"current_steps": 615, "total_steps": 4410, "loss": 0.3148, "lr": 3.981278052712827e-05, "epoch": 0.9761904761904762, "percentage": 13.95, "elapsed_time": "1:18:47", "remaining_time": "8:06:14"}
|
||||
{"current_steps": 620, "total_steps": 4410, "loss": 0.31, "lr": 3.9801820371636157e-05, "epoch": 0.9841269841269841, "percentage": 14.06, "elapsed_time": "1:19:30", "remaining_time": "8:05:58"}
|
||||
{"current_steps": 625, "total_steps": 4410, "loss": 0.3095, "lr": 3.979055005877364e-05, "epoch": 0.9920634920634921, "percentage": 14.17, "elapsed_time": "1:20:05", "remaining_time": "8:05:00"}
|
||||
{"current_steps": 630, "total_steps": 4410, "loss": 0.308, "lr": 3.977896976506845e-05, "epoch": 1.0, "percentage": 14.29, "elapsed_time": "1:20:41", "remaining_time": "8:04:09"}
|
||||
{"current_steps": 635, "total_steps": 4410, "loss": 0.312, "lr": 3.976707967190358e-05, "epoch": 1.007936507936508, "percentage": 14.4, "elapsed_time": "1:21:17", "remaining_time": "8:03:18"}
|
||||
{"current_steps": 640, "total_steps": 4410, "loss": 0.3078, "lr": 3.9754879965514456e-05, "epoch": 1.0158730158730158, "percentage": 14.51, "elapsed_time": "1:21:58", "remaining_time": "8:02:51"}
|
||||
{"current_steps": 645, "total_steps": 4410, "loss": 0.3094, "lr": 3.9742370836985956e-05, "epoch": 1.0238095238095237, "percentage": 14.63, "elapsed_time": "1:22:37", "remaining_time": "8:02:16"}
|
||||
{"current_steps": 650, "total_steps": 4410, "loss": 0.3065, "lr": 3.972955248224949e-05, "epoch": 1.0317460317460316, "percentage": 14.74, "elapsed_time": "1:23:13", "remaining_time": "8:01:25"}
|
||||
{"current_steps": 655, "total_steps": 4410, "loss": 0.3005, "lr": 3.971642510207989e-05, "epoch": 1.0396825396825398, "percentage": 14.85, "elapsed_time": "1:23:55", "remaining_time": "8:01:08"}
|
||||
{"current_steps": 660, "total_steps": 4410, "loss": 0.299, "lr": 3.9702988902092274e-05, "epoch": 1.0476190476190477, "percentage": 14.97, "elapsed_time": "1:24:40", "remaining_time": "8:01:06"}
|
||||
{"current_steps": 665, "total_steps": 4410, "loss": 0.3, "lr": 3.968924409273884e-05, "epoch": 1.0555555555555556, "percentage": 15.08, "elapsed_time": "1:25:16", "remaining_time": "8:00:12"}
|
||||
{"current_steps": 670, "total_steps": 4410, "loss": 0.3073, "lr": 3.9675190889305545e-05, "epoch": 1.0634920634920635, "percentage": 15.19, "elapsed_time": "1:25:49", "remaining_time": "7:59:06"}
|
||||
{"current_steps": 675, "total_steps": 4410, "loss": 0.309, "lr": 3.966082951190874e-05, "epoch": 1.0714285714285714, "percentage": 15.31, "elapsed_time": "1:26:23", "remaining_time": "7:58:03"}
|
||||
{"current_steps": 680, "total_steps": 4410, "loss": 0.3138, "lr": 3.9646160185491756e-05, "epoch": 1.0793650793650793, "percentage": 15.42, "elapsed_time": "1:27:03", "remaining_time": "7:57:29"}
|
||||
{"current_steps": 685, "total_steps": 4410, "loss": 0.3115, "lr": 3.963118313982131e-05, "epoch": 1.0873015873015872, "percentage": 15.53, "elapsed_time": "1:27:30", "remaining_time": "7:55:52"}
|
||||
{"current_steps": 690, "total_steps": 4410, "loss": 0.31, "lr": 3.961589860948399e-05, "epoch": 1.0952380952380953, "percentage": 15.65, "elapsed_time": "1:28:12", "remaining_time": "7:55:30"}
|
||||
{"current_steps": 695, "total_steps": 4410, "loss": 0.3022, "lr": 3.960030683388251e-05, "epoch": 1.1031746031746033, "percentage": 15.76, "elapsed_time": "1:28:45", "remaining_time": "7:54:27"}
|
||||
{"current_steps": 700, "total_steps": 4410, "loss": 0.3083, "lr": 3.9584408057232e-05, "epoch": 1.1111111111111112, "percentage": 15.87, "elapsed_time": "1:29:29", "remaining_time": "7:54:16"}
|
||||
{"current_steps": 705, "total_steps": 4410, "loss": 0.3141, "lr": 3.956820252855618e-05, "epoch": 1.119047619047619, "percentage": 15.99, "elapsed_time": "1:30:09", "remaining_time": "7:53:48"}
|
||||
{"current_steps": 710, "total_steps": 4410, "loss": 0.3043, "lr": 3.955169050168343e-05, "epoch": 1.126984126984127, "percentage": 16.1, "elapsed_time": "1:30:43", "remaining_time": "7:52:48"}
|
||||
{"current_steps": 715, "total_steps": 4410, "loss": 0.3054, "lr": 3.953487223524283e-05, "epoch": 1.1349206349206349, "percentage": 16.21, "elapsed_time": "1:31:22", "remaining_time": "7:52:14"}
|
||||
{"current_steps": 720, "total_steps": 4410, "loss": 0.3087, "lr": 3.951774799266014e-05, "epoch": 1.1428571428571428, "percentage": 16.33, "elapsed_time": "1:32:01", "remaining_time": "7:51:35"}
|
||||
{"current_steps": 725, "total_steps": 4410, "loss": 0.3115, "lr": 3.950031804215364e-05, "epoch": 1.1507936507936507, "percentage": 16.44, "elapsed_time": "1:32:34", "remaining_time": "7:50:34"}
|
||||
{"current_steps": 730, "total_steps": 4410, "loss": 0.2937, "lr": 3.948258265672991e-05, "epoch": 1.1587301587301586, "percentage": 16.55, "elapsed_time": "1:33:14", "remaining_time": "7:50:01"}
|
||||
{"current_steps": 735, "total_steps": 4410, "loss": 0.3058, "lr": 3.946454211417961e-05, "epoch": 1.1666666666666667, "percentage": 16.67, "elapsed_time": "1:33:44", "remaining_time": "7:48:43"}
|
||||
{"current_steps": 740, "total_steps": 4410, "loss": 0.3146, "lr": 3.944619669707309e-05, "epoch": 1.1746031746031746, "percentage": 16.78, "elapsed_time": "1:34:23", "remaining_time": "7:48:07"}
|
||||
{"current_steps": 745, "total_steps": 4410, "loss": 0.3098, "lr": 3.9427546692755946e-05, "epoch": 1.1825396825396826, "percentage": 16.89, "elapsed_time": "1:34:58", "remaining_time": "7:47:12"}
|
||||
{"current_steps": 750, "total_steps": 4410, "loss": 0.2976, "lr": 3.9408592393344596e-05, "epoch": 1.1904761904761905, "percentage": 17.01, "elapsed_time": "1:35:38", "remaining_time": "7:46:45"}
|
||||
{"current_steps": 755, "total_steps": 4410, "loss": 0.2985, "lr": 3.9389334095721606e-05, "epoch": 1.1984126984126984, "percentage": 17.12, "elapsed_time": "1:36:16", "remaining_time": "7:46:04"}
|
||||
{"current_steps": 760, "total_steps": 4410, "loss": 0.2992, "lr": 3.936977210153113e-05, "epoch": 1.2063492063492063, "percentage": 17.23, "elapsed_time": "1:36:57", "remaining_time": "7:45:39"}
|
||||
{"current_steps": 765, "total_steps": 4410, "loss": 0.2954, "lr": 3.93499067171741e-05, "epoch": 1.2142857142857142, "percentage": 17.35, "elapsed_time": "1:37:42", "remaining_time": "7:45:31"}
|
||||
{"current_steps": 770, "total_steps": 4410, "loss": 0.3123, "lr": 3.932973825380351e-05, "epoch": 1.2222222222222223, "percentage": 17.46, "elapsed_time": "1:38:22", "remaining_time": "7:45:00"}
|
||||
{"current_steps": 775, "total_steps": 4410, "loss": 0.3154, "lr": 3.9309267027319485e-05, "epoch": 1.2301587301587302, "percentage": 17.57, "elapsed_time": "1:38:56", "remaining_time": "7:44:03"}
|
||||
{"current_steps": 780, "total_steps": 4410, "loss": 0.2961, "lr": 3.928849335836435e-05, "epoch": 1.2380952380952381, "percentage": 17.69, "elapsed_time": "1:39:37", "remaining_time": "7:43:39"}
|
||||
{"current_steps": 785, "total_steps": 4410, "loss": 0.2944, "lr": 3.926741757231761e-05, "epoch": 1.246031746031746, "percentage": 17.8, "elapsed_time": "1:40:13", "remaining_time": "7:42:49"}
|
||||
{"current_steps": 790, "total_steps": 4410, "loss": 0.3007, "lr": 3.924603999929086e-05, "epoch": 1.253968253968254, "percentage": 17.91, "elapsed_time": "1:40:52", "remaining_time": "7:42:14"}
|
||||
{"current_steps": 795, "total_steps": 4410, "loss": 0.3001, "lr": 3.9224360974122584e-05, "epoch": 1.2619047619047619, "percentage": 18.03, "elapsed_time": "1:41:34", "remaining_time": "7:41:53"}
|
||||
{"current_steps": 800, "total_steps": 4410, "loss": 0.3107, "lr": 3.920238083637297e-05, "epoch": 1.2698412698412698, "percentage": 18.14, "elapsed_time": "1:42:11", "remaining_time": "7:41:09"}
|
||||
{"current_steps": 805, "total_steps": 4410, "loss": 0.3049, "lr": 3.9180099930318524e-05, "epoch": 1.2777777777777777, "percentage": 18.25, "elapsed_time": "1:42:53", "remaining_time": "7:40:47"}
|
||||
{"current_steps": 810, "total_steps": 4410, "loss": 0.3022, "lr": 3.915751860494672e-05, "epoch": 1.2857142857142856, "percentage": 18.37, "elapsed_time": "1:43:35", "remaining_time": "7:40:22"}
|
||||
{"current_steps": 815, "total_steps": 4410, "loss": 0.2996, "lr": 3.913463721395051e-05, "epoch": 1.2936507936507937, "percentage": 18.48, "elapsed_time": "1:44:16", "remaining_time": "7:39:59"}
|
||||
{"current_steps": 820, "total_steps": 4410, "loss": 0.3033, "lr": 3.911145611572282e-05, "epoch": 1.3015873015873016, "percentage": 18.59, "elapsed_time": "1:44:51", "remaining_time": "7:39:03"}
|
||||
{"current_steps": 825, "total_steps": 4410, "loss": 0.2948, "lr": 3.908797567335089e-05, "epoch": 1.3095238095238095, "percentage": 18.71, "elapsed_time": "1:45:29", "remaining_time": "7:38:24"}
|
||||
{"current_steps": 830, "total_steps": 4410, "loss": 0.3029, "lr": 3.906419625461062e-05, "epoch": 1.3174603174603174, "percentage": 18.82, "elapsed_time": "1:46:13", "remaining_time": "7:38:08"}
|
||||
{"current_steps": 835, "total_steps": 4410, "loss": 0.2979, "lr": 3.90401182319608e-05, "epoch": 1.3253968253968254, "percentage": 18.93, "elapsed_time": "1:46:50", "remaining_time": "7:37:27"}
|
||||
{"current_steps": 840, "total_steps": 4410, "loss": 0.2962, "lr": 3.9015741982537265e-05, "epoch": 1.3333333333333333, "percentage": 19.05, "elapsed_time": "1:47:30", "remaining_time": "7:36:54"}
|
||||
{"current_steps": 845, "total_steps": 4410, "loss": 0.309, "lr": 3.899106788814701e-05, "epoch": 1.3412698412698414, "percentage": 19.16, "elapsed_time": "1:48:12", "remaining_time": "7:36:29"}
|
||||
{"current_steps": 850, "total_steps": 4410, "loss": 0.2996, "lr": 3.896609633526219e-05, "epoch": 1.3492063492063493, "percentage": 19.27, "elapsed_time": "1:48:51", "remaining_time": "7:35:57"}
|
||||
{"current_steps": 855, "total_steps": 4410, "loss": 0.3027, "lr": 3.894082771501407e-05, "epoch": 1.3571428571428572, "percentage": 19.39, "elapsed_time": "1:49:26", "remaining_time": "7:35:04"}
|
||||
{"current_steps": 860, "total_steps": 4410, "loss": 0.304, "lr": 3.891526242318692e-05, "epoch": 1.3650793650793651, "percentage": 19.5, "elapsed_time": "1:50:01", "remaining_time": "7:34:10"}
|
||||
{"current_steps": 865, "total_steps": 4410, "loss": 0.3007, "lr": 3.8889400860211785e-05, "epoch": 1.373015873015873, "percentage": 19.61, "elapsed_time": "1:50:37", "remaining_time": "7:33:20"}
|
||||
{"current_steps": 870, "total_steps": 4410, "loss": 0.3089, "lr": 3.886324343116023e-05, "epoch": 1.380952380952381, "percentage": 19.73, "elapsed_time": "1:51:15", "remaining_time": "7:32:42"}
|
||||
{"current_steps": 875, "total_steps": 4410, "loss": 0.2964, "lr": 3.883679054573799e-05, "epoch": 1.3888888888888888, "percentage": 19.84, "elapsed_time": "1:51:51", "remaining_time": "7:31:56"}
|
||||
{"current_steps": 880, "total_steps": 4410, "loss": 0.3075, "lr": 3.881004261827856e-05, "epoch": 1.3968253968253967, "percentage": 19.95, "elapsed_time": "1:52:27", "remaining_time": "7:31:05"}
|
||||
{"current_steps": 885, "total_steps": 4410, "loss": 0.2966, "lr": 3.878300006773669e-05, "epoch": 1.4047619047619047, "percentage": 20.07, "elapsed_time": "1:53:02", "remaining_time": "7:30:16"}
|
||||
{"current_steps": 890, "total_steps": 4410, "loss": 0.2976, "lr": 3.875566331768184e-05, "epoch": 1.4126984126984126, "percentage": 20.18, "elapsed_time": "1:53:44", "remaining_time": "7:29:49"}
|
||||
{"current_steps": 895, "total_steps": 4410, "loss": 0.29, "lr": 3.872803279629155e-05, "epoch": 1.4206349206349207, "percentage": 20.29, "elapsed_time": "1:54:17", "remaining_time": "7:28:52"}
|
||||
{"current_steps": 900, "total_steps": 4410, "loss": 0.3026, "lr": 3.8700108936344705e-05, "epoch": 1.4285714285714286, "percentage": 20.41, "elapsed_time": "1:54:58", "remaining_time": "7:28:23"}
|
||||
{"current_steps": 905, "total_steps": 4410, "loss": 0.3073, "lr": 3.867189217521477e-05, "epoch": 1.4365079365079365, "percentage": 20.52, "elapsed_time": "1:55:34", "remaining_time": "7:27:38"}
|
||||
{"current_steps": 910, "total_steps": 4410, "loss": 0.2995, "lr": 3.864338295486297e-05, "epoch": 1.4444444444444444, "percentage": 20.63, "elapsed_time": "1:56:06", "remaining_time": "7:26:34"}
|
||||
{"current_steps": 915, "total_steps": 4410, "loss": 0.2962, "lr": 3.8614581721831316e-05, "epoch": 1.4523809523809523, "percentage": 20.75, "elapsed_time": "1:56:47", "remaining_time": "7:26:05"}
|
||||
{"current_steps": 920, "total_steps": 4410, "loss": 0.3019, "lr": 3.858548892723563e-05, "epoch": 1.4603174603174602, "percentage": 20.86, "elapsed_time": "1:57:29", "remaining_time": "7:25:42"}
|
||||
{"current_steps": 925, "total_steps": 4410, "loss": 0.3024, "lr": 3.855610502675851e-05, "epoch": 1.4682539682539684, "percentage": 20.98, "elapsed_time": "1:58:12", "remaining_time": "7:25:22"}
|
||||
{"current_steps": 930, "total_steps": 4410, "loss": 0.3118, "lr": 3.852643048064215e-05, "epoch": 1.4761904761904763, "percentage": 21.09, "elapsed_time": "1:58:49", "remaining_time": "7:24:39"}
|
||||
{"current_steps": 935, "total_steps": 4410, "loss": 0.2965, "lr": 3.8496465753681145e-05, "epoch": 1.4841269841269842, "percentage": 21.2, "elapsed_time": "1:59:32", "remaining_time": "7:24:17"}
|
||||
{"current_steps": 940, "total_steps": 4410, "loss": 0.3082, "lr": 3.846621131521522e-05, "epoch": 1.492063492063492, "percentage": 21.32, "elapsed_time": "2:00:13", "remaining_time": "7:23:49"}
|
||||
{"current_steps": 945, "total_steps": 4410, "loss": 0.2989, "lr": 3.843566763912187e-05, "epoch": 1.5, "percentage": 21.43, "elapsed_time": "2:00:55", "remaining_time": "7:23:24"}
|
||||
{"current_steps": 950, "total_steps": 4410, "loss": 0.2962, "lr": 3.840483520380896e-05, "epoch": 1.507936507936508, "percentage": 21.54, "elapsed_time": "2:01:35", "remaining_time": "7:22:49"}
|
||||
{"current_steps": 955, "total_steps": 4410, "loss": 0.3046, "lr": 3.837371449220717e-05, "epoch": 1.5158730158730158, "percentage": 21.66, "elapsed_time": "2:02:15", "remaining_time": "7:22:18"}
|
||||
{"current_steps": 960, "total_steps": 4410, "loss": 0.2967, "lr": 3.834230599176251e-05, "epoch": 1.5238095238095237, "percentage": 21.77, "elapsed_time": "2:02:53", "remaining_time": "7:21:37"}
|
||||
{"current_steps": 965, "total_steps": 4410, "loss": 0.3093, "lr": 3.831061019442864e-05, "epoch": 1.5317460317460316, "percentage": 21.88, "elapsed_time": "2:03:33", "remaining_time": "7:21:06"}
|
||||
{"current_steps": 970, "total_steps": 4410, "loss": 0.3094, "lr": 3.827862759665916e-05, "epoch": 1.5396825396825395, "percentage": 22.0, "elapsed_time": "2:04:10", "remaining_time": "7:20:23"}
|
||||
{"current_steps": 975, "total_steps": 4410, "loss": 0.3067, "lr": 3.8246358699399853e-05, "epoch": 1.5476190476190477, "percentage": 22.11, "elapsed_time": "2:04:45", "remaining_time": "7:19:30"}
|
||||
{"current_steps": 980, "total_steps": 4410, "loss": 0.3004, "lr": 3.8213804008080824e-05, "epoch": 1.5555555555555556, "percentage": 22.22, "elapsed_time": "2:05:22", "remaining_time": "7:18:50"}
|
||||
{"current_steps": 985, "total_steps": 4410, "loss": 0.2947, "lr": 3.818096403260862e-05, "epoch": 1.5634920634920635, "percentage": 22.34, "elapsed_time": "2:06:02", "remaining_time": "7:18:16"}
|
||||
{"current_steps": 990, "total_steps": 4410, "loss": 0.2959, "lr": 3.8147839287358185e-05, "epoch": 1.5714285714285714, "percentage": 22.45, "elapsed_time": "2:06:43", "remaining_time": "7:17:48"}
|
||||
{"current_steps": 995, "total_steps": 4410, "loss": 0.2886, "lr": 3.8114430291164836e-05, "epoch": 1.5793650793650795, "percentage": 22.56, "elapsed_time": "2:07:24", "remaining_time": "7:17:17"}
|
||||
{"current_steps": 1000, "total_steps": 4410, "loss": 0.2988, "lr": 3.808073756731615e-05, "epoch": 1.5873015873015874, "percentage": 22.68, "elapsed_time": "2:07:58", "remaining_time": "7:16:23"}
|
||||
{"current_steps": 1005, "total_steps": 4410, "loss": 0.2994, "lr": 3.8046761643543734e-05, "epoch": 1.5952380952380953, "percentage": 22.79, "elapsed_time": "2:08:43", "remaining_time": "7:16:08"}
|
||||
{"current_steps": 1010, "total_steps": 4410, "loss": 0.3035, "lr": 3.8012503052014996e-05, "epoch": 1.6031746031746033, "percentage": 22.9, "elapsed_time": "2:09:24", "remaining_time": "7:15:36"}
|
||||
{"current_steps": 1015, "total_steps": 4410, "loss": 0.3114, "lr": 3.797796232932476e-05, "epoch": 1.6111111111111112, "percentage": 23.02, "elapsed_time": "2:09:55", "remaining_time": "7:14:35"}
|
||||
{"current_steps": 1020, "total_steps": 4410, "loss": 0.2971, "lr": 3.794314001648692e-05, "epoch": 1.619047619047619, "percentage": 23.13, "elapsed_time": "2:10:32", "remaining_time": "7:13:50"}
|
||||
{"current_steps": 1025, "total_steps": 4410, "loss": 0.2952, "lr": 3.7908036658925926e-05, "epoch": 1.626984126984127, "percentage": 23.24, "elapsed_time": "2:11:10", "remaining_time": "7:13:10"}
|
||||
{"current_steps": 1030, "total_steps": 4410, "loss": 0.2928, "lr": 3.787265280646825e-05, "epoch": 1.6349206349206349, "percentage": 23.36, "elapsed_time": "2:11:44", "remaining_time": "7:12:19"}
|
||||
{"current_steps": 1035, "total_steps": 4410, "loss": 0.2994, "lr": 3.7836989013333776e-05, "epoch": 1.6428571428571428, "percentage": 23.47, "elapsed_time": "2:12:22", "remaining_time": "7:11:40"}
|
||||
{"current_steps": 1040, "total_steps": 4410, "loss": 0.3089, "lr": 3.780104583812712e-05, "epoch": 1.6507936507936507, "percentage": 23.58, "elapsed_time": "2:12:58", "remaining_time": "7:10:52"}
|
||||
{"current_steps": 1045, "total_steps": 4410, "loss": 0.3009, "lr": 3.7764823843828883e-05, "epoch": 1.6587301587301586, "percentage": 23.7, "elapsed_time": "2:13:35", "remaining_time": "7:10:11"}
|
||||
{"current_steps": 1050, "total_steps": 4410, "loss": 0.2951, "lr": 3.7728323597786834e-05, "epoch": 1.6666666666666665, "percentage": 23.81, "elapsed_time": "2:14:12", "remaining_time": "7:09:27"}
|
||||
{"current_steps": 1055, "total_steps": 4410, "loss": 0.3034, "lr": 3.7691545671707007e-05, "epoch": 1.6746031746031746, "percentage": 23.92, "elapsed_time": "2:14:52", "remaining_time": "7:08:53"}
|
||||
{"current_steps": 1060, "total_steps": 4410, "loss": 0.2934, "lr": 3.765449064164477e-05, "epoch": 1.6825396825396826, "percentage": 24.04, "elapsed_time": "2:15:30", "remaining_time": "7:08:16"}
|
||||
{"current_steps": 1065, "total_steps": 4410, "loss": 0.298, "lr": 3.7617159087995784e-05, "epoch": 1.6904761904761905, "percentage": 24.15, "elapsed_time": "2:16:16", "remaining_time": "7:07:59"}
|
||||
{"current_steps": 1070, "total_steps": 4410, "loss": 0.293, "lr": 3.757955159548693e-05, "epoch": 1.6984126984126984, "percentage": 24.26, "elapsed_time": "2:16:53", "remaining_time": "7:07:19"}
|
||||
{"current_steps": 1075, "total_steps": 4410, "loss": 0.2997, "lr": 3.754166875316713e-05, "epoch": 1.7063492063492065, "percentage": 24.38, "elapsed_time": "2:17:35", "remaining_time": "7:06:52"}
|
||||
{"current_steps": 1080, "total_steps": 4410, "loss": 0.2927, "lr": 3.750351115439812e-05, "epoch": 1.7142857142857144, "percentage": 24.49, "elapsed_time": "2:18:18", "remaining_time": "7:06:26"}
|
||||
{"current_steps": 1085, "total_steps": 4410, "loss": 0.289, "lr": 3.746507939684519e-05, "epoch": 1.7222222222222223, "percentage": 24.6, "elapsed_time": "2:18:56", "remaining_time": "7:05:47"}
|
||||
{"current_steps": 1090, "total_steps": 4410, "loss": 0.2977, "lr": 3.742637408246779e-05, "epoch": 1.7301587301587302, "percentage": 24.72, "elapsed_time": "2:19:33", "remaining_time": "7:05:05"}
|
||||
{"current_steps": 1095, "total_steps": 4410, "loss": 0.2951, "lr": 3.73873958175101e-05, "epoch": 1.7380952380952381, "percentage": 24.83, "elapsed_time": "2:20:13", "remaining_time": "7:04:30"}
|
||||
{"current_steps": 1100, "total_steps": 4410, "loss": 0.2921, "lr": 3.734814521249156e-05, "epoch": 1.746031746031746, "percentage": 24.94, "elapsed_time": "2:20:46", "remaining_time": "7:03:37"}
|
||||
{"current_steps": 1105, "total_steps": 4410, "loss": 0.2968, "lr": 3.7308622882197294e-05, "epoch": 1.753968253968254, "percentage": 25.06, "elapsed_time": "2:21:26", "remaining_time": "7:03:01"}
|
||||
{"current_steps": 1110, "total_steps": 4410, "loss": 0.3008, "lr": 3.7268829445668456e-05, "epoch": 1.7619047619047619, "percentage": 25.17, "elapsed_time": "2:22:03", "remaining_time": "7:02:20"}
|
||||
{"current_steps": 1115, "total_steps": 4410, "loss": 0.2962, "lr": 3.722876552619257e-05, "epoch": 1.7698412698412698, "percentage": 25.28, "elapsed_time": "2:22:39", "remaining_time": "7:01:33"}
|
||||
{"current_steps": 1120, "total_steps": 4410, "loss": 0.2928, "lr": 3.718843175129378e-05, "epoch": 1.7777777777777777, "percentage": 25.4, "elapsed_time": "2:23:18", "remaining_time": "7:00:57"}
|
||||
{"current_steps": 1125, "total_steps": 4410, "loss": 0.2942, "lr": 3.7147828752722944e-05, "epoch": 1.7857142857142856, "percentage": 25.51, "elapsed_time": "2:23:53", "remaining_time": "7:00:09"}
|
||||
{"current_steps": 1130, "total_steps": 4410, "loss": 0.2986, "lr": 3.7106957166447834e-05, "epoch": 1.7936507936507935, "percentage": 25.62, "elapsed_time": "2:24:35", "remaining_time": "6:59:42"}
|
||||
{"current_steps": 1135, "total_steps": 4410, "loss": 0.2937, "lr": 3.7065817632643115e-05, "epoch": 1.8015873015873016, "percentage": 25.74, "elapsed_time": "2:25:17", "remaining_time": "6:59:14"}
|
||||
{"current_steps": 1140, "total_steps": 4410, "loss": 0.2901, "lr": 3.7024410795680326e-05, "epoch": 1.8095238095238095, "percentage": 25.85, "elapsed_time": "2:25:57", "remaining_time": "6:58:39"}
|
||||
{"current_steps": 1145, "total_steps": 4410, "loss": 0.3018, "lr": 3.698273730411782e-05, "epoch": 1.8174603174603174, "percentage": 25.96, "elapsed_time": "2:26:31", "remaining_time": "6:57:48"}
|
||||
{"current_steps": 1150, "total_steps": 4410, "loss": 0.2877, "lr": 3.694079781069053e-05, "epoch": 1.8253968253968254, "percentage": 26.08, "elapsed_time": "2:27:10", "remaining_time": "6:57:11"}
|
||||
{"current_steps": 1155, "total_steps": 4410, "loss": 0.2915, "lr": 3.6898592972299875e-05, "epoch": 1.8333333333333335, "percentage": 26.19, "elapsed_time": "2:27:43", "remaining_time": "6:56:20"}
|
||||
{"current_steps": 1160, "total_steps": 4410, "loss": 0.2994, "lr": 3.6856123450003306e-05, "epoch": 1.8412698412698414, "percentage": 26.3, "elapsed_time": "2:28:23", "remaining_time": "6:55:45"}
|
||||
{"current_steps": 1165, "total_steps": 4410, "loss": 0.2955, "lr": 3.68133899090041e-05, "epoch": 1.8492063492063493, "percentage": 26.42, "elapsed_time": "2:29:01", "remaining_time": "6:55:06"}
|
||||
{"current_steps": 1170, "total_steps": 4410, "loss": 0.2918, "lr": 3.677039301864085e-05, "epoch": 1.8571428571428572, "percentage": 26.53, "elapsed_time": "2:29:40", "remaining_time": "6:54:29"}
|
||||
{"current_steps": 1175, "total_steps": 4410, "loss": 0.295, "lr": 3.672713345237701e-05, "epoch": 1.8650793650793651, "percentage": 26.64, "elapsed_time": "2:30:16", "remaining_time": "6:53:44"}
|
||||
{"current_steps": 1180, "total_steps": 4410, "loss": 0.2902, "lr": 3.6683611887790356e-05, "epoch": 1.873015873015873, "percentage": 26.76, "elapsed_time": "2:30:49", "remaining_time": "6:52:51"}
|
||||
{"current_steps": 1185, "total_steps": 4410, "loss": 0.2996, "lr": 3.663982900656236e-05, "epoch": 1.880952380952381, "percentage": 26.87, "elapsed_time": "2:31:30", "remaining_time": "6:52:18"}
|
||||
{"current_steps": 1190, "total_steps": 4410, "loss": 0.293, "lr": 3.6595785494467516e-05, "epoch": 1.8888888888888888, "percentage": 26.98, "elapsed_time": "2:32:08", "remaining_time": "6:51:41"}
|
||||
{"current_steps": 1195, "total_steps": 4410, "loss": 0.2923, "lr": 3.655148204136259e-05, "epoch": 1.8968253968253967, "percentage": 27.1, "elapsed_time": "2:32:46", "remaining_time": "6:51:00"}
|
||||
{"current_steps": 1200, "total_steps": 4410, "loss": 0.2926, "lr": 3.650691934117584e-05, "epoch": 1.9047619047619047, "percentage": 27.21, "elapsed_time": "2:33:28", "remaining_time": "6:50:32"}
|
||||
{"current_steps": 1205, "total_steps": 4410, "loss": 0.2889, "lr": 3.646209809189611e-05, "epoch": 1.9126984126984126, "percentage": 27.32, "elapsed_time": "2:34:00", "remaining_time": "6:49:37"}
|
||||
{"current_steps": 1210, "total_steps": 4410, "loss": 0.2912, "lr": 3.641701899556192e-05, "epoch": 1.9206349206349205, "percentage": 27.44, "elapsed_time": "2:34:44", "remaining_time": "6:49:15"}
|
||||
{"current_steps": 1215, "total_steps": 4410, "loss": 0.3064, "lr": 3.63716827582505e-05, "epoch": 1.9285714285714286, "percentage": 27.55, "elapsed_time": "2:35:21", "remaining_time": "6:48:32"}
|
||||
{"current_steps": 1220, "total_steps": 4410, "loss": 0.3005, "lr": 3.632609009006665e-05, "epoch": 1.9365079365079365, "percentage": 27.66, "elapsed_time": "2:35:59", "remaining_time": "6:47:51"}
|
||||
{"current_steps": 1225, "total_steps": 4410, "loss": 0.2968, "lr": 3.62802417051317e-05, "epoch": 1.9444444444444444, "percentage": 27.78, "elapsed_time": "2:36:36", "remaining_time": "6:47:10"}
|
||||
{"current_steps": 1230, "total_steps": 4410, "loss": 0.2905, "lr": 3.6234138321572274e-05, "epoch": 1.9523809523809523, "percentage": 27.89, "elapsed_time": "2:37:16", "remaining_time": "6:46:36"}
|
||||
{"current_steps": 1235, "total_steps": 4410, "loss": 0.2962, "lr": 3.6187780661509074e-05, "epoch": 1.9603174603174605, "percentage": 28.0, "elapsed_time": "2:37:51", "remaining_time": "6:45:49"}
|
||||
{"current_steps": 1240, "total_steps": 4410, "loss": 0.3089, "lr": 3.6141169451045526e-05, "epoch": 1.9682539682539684, "percentage": 28.12, "elapsed_time": "2:38:32", "remaining_time": "6:45:18"}
|
||||
{"current_steps": 1245, "total_steps": 4410, "loss": 0.2909, "lr": 3.609430542025646e-05, "epoch": 1.9761904761904763, "percentage": 28.23, "elapsed_time": "2:39:13", "remaining_time": "6:44:47"}
|
||||
{"current_steps": 1250, "total_steps": 4410, "loss": 0.2933, "lr": 3.604718930317664e-05, "epoch": 1.9841269841269842, "percentage": 28.34, "elapsed_time": "2:39:49", "remaining_time": "6:44:01"}
|
||||
{"current_steps": 1255, "total_steps": 4410, "loss": 0.2965, "lr": 3.5999821837789275e-05, "epoch": 1.992063492063492, "percentage": 28.46, "elapsed_time": "2:40:27", "remaining_time": "6:43:24"}
|
||||
{"current_steps": 1260, "total_steps": 4410, "loss": 0.2893, "lr": 3.595220376601447e-05, "epoch": 2.0, "percentage": 28.57, "elapsed_time": "2:41:04", "remaining_time": "6:42:41"}
|
||||
{"current_steps": 1265, "total_steps": 4410, "loss": 0.2915, "lr": 3.590433583369758e-05, "epoch": 2.007936507936508, "percentage": 28.68, "elapsed_time": "2:41:36", "remaining_time": "6:41:47"}
|
||||
{"current_steps": 1270, "total_steps": 4410, "loss": 0.29, "lr": 3.5856218790597554e-05, "epoch": 2.015873015873016, "percentage": 28.8, "elapsed_time": "2:42:16", "remaining_time": "6:41:11"}
|
||||
{"current_steps": 1275, "total_steps": 4410, "loss": 0.2857, "lr": 3.580785339037519e-05, "epoch": 2.0238095238095237, "percentage": 28.91, "elapsed_time": "2:42:49", "remaining_time": "6:40:22"}
|
||||
{"current_steps": 1280, "total_steps": 4410, "loss": 0.2893, "lr": 3.57592403905813e-05, "epoch": 2.0317460317460316, "percentage": 29.02, "elapsed_time": "2:43:24", "remaining_time": "6:39:34"}
|
||||
{"current_steps": 1285, "total_steps": 4410, "loss": 0.2933, "lr": 3.571038055264489e-05, "epoch": 2.0396825396825395, "percentage": 29.14, "elapsed_time": "2:43:57", "remaining_time": "6:38:44"}
|
||||
{"current_steps": 1290, "total_steps": 4410, "loss": 0.2959, "lr": 3.566127464186119e-05, "epoch": 2.0476190476190474, "percentage": 29.25, "elapsed_time": "2:44:36", "remaining_time": "6:38:06"}
|
||||
{"current_steps": 1295, "total_steps": 4410, "loss": 0.2958, "lr": 3.56119234273797e-05, "epoch": 2.0555555555555554, "percentage": 29.37, "elapsed_time": "2:45:15", "remaining_time": "6:37:31"}
|
||||
{"current_steps": 1300, "total_steps": 4410, "loss": 0.2876, "lr": 3.5562327682192134e-05, "epoch": 2.0634920634920633, "percentage": 29.48, "elapsed_time": "2:45:58", "remaining_time": "6:37:04"}
|
||||
{"current_steps": 1305, "total_steps": 4410, "loss": 0.2838, "lr": 3.5512488183120286e-05, "epoch": 2.0714285714285716, "percentage": 29.59, "elapsed_time": "2:46:38", "remaining_time": "6:36:30"}
|
||||
{"current_steps": 1310, "total_steps": 4410, "loss": 0.2988, "lr": 3.54624057108039e-05, "epoch": 2.0793650793650795, "percentage": 29.71, "elapsed_time": "2:47:15", "remaining_time": "6:35:48"}
|
||||
{"current_steps": 1315, "total_steps": 4410, "loss": 0.2935, "lr": 3.5412081049688444e-05, "epoch": 2.0873015873015874, "percentage": 29.82, "elapsed_time": "2:47:56", "remaining_time": "6:35:15"}
|
||||
{"current_steps": 1320, "total_steps": 4410, "loss": 0.2835, "lr": 3.5361514988012774e-05, "epoch": 2.0952380952380953, "percentage": 29.93, "elapsed_time": "2:48:33", "remaining_time": "6:34:33"}
|
||||
{"current_steps": 1325, "total_steps": 4410, "loss": 0.2861, "lr": 3.5310708317796844e-05, "epoch": 2.1031746031746033, "percentage": 30.05, "elapsed_time": "2:49:13", "remaining_time": "6:34:00"}
|
||||
{"current_steps": 1330, "total_steps": 4410, "loss": 0.2972, "lr": 3.5259661834829266e-05, "epoch": 2.111111111111111, "percentage": 30.16, "elapsed_time": "2:49:54", "remaining_time": "6:33:27"}
|
||||
{"current_steps": 1335, "total_steps": 4410, "loss": 0.2889, "lr": 3.5208376338654866e-05, "epoch": 2.119047619047619, "percentage": 30.27, "elapsed_time": "2:50:27", "remaining_time": "6:32:37"}
|
||||
{"current_steps": 1340, "total_steps": 4410, "loss": 0.2964, "lr": 3.515685263256214e-05, "epoch": 2.126984126984127, "percentage": 30.39, "elapsed_time": "2:51:00", "remaining_time": "6:31:48"}
|
||||
{"current_steps": 1345, "total_steps": 4410, "loss": 0.2884, "lr": 3.51050915235707e-05, "epoch": 2.134920634920635, "percentage": 30.5, "elapsed_time": "2:51:32", "remaining_time": "6:30:53"}
|
||||
{"current_steps": 1350, "total_steps": 4410, "loss": 0.2938, "lr": 3.5053093822418596e-05, "epoch": 2.142857142857143, "percentage": 30.61, "elapsed_time": "2:52:13", "remaining_time": "6:30:21"}
|
||||
{"current_steps": 1355, "total_steps": 4410, "loss": 0.2949, "lr": 3.500086034354966e-05, "epoch": 2.1507936507936507, "percentage": 30.73, "elapsed_time": "2:52:53", "remaining_time": "6:29:48"}
|
||||
{"current_steps": 1360, "total_steps": 4410, "loss": 0.2926, "lr": 3.494839190510071e-05, "epoch": 2.1587301587301586, "percentage": 30.84, "elapsed_time": "2:53:27", "remaining_time": "6:28:59"}
|
||||
{"current_steps": 1365, "total_steps": 4410, "loss": 0.2934, "lr": 3.489568932888877e-05, "epoch": 2.1666666666666665, "percentage": 30.95, "elapsed_time": "2:54:10", "remaining_time": "6:28:33"}
|
||||
{"current_steps": 1370, "total_steps": 4410, "loss": 0.2918, "lr": 3.484275344039815e-05, "epoch": 2.1746031746031744, "percentage": 31.07, "elapsed_time": "2:54:43", "remaining_time": "6:27:42"}
|
||||
{"current_steps": 1375, "total_steps": 4410, "loss": 0.2967, "lr": 3.478958506876759e-05, "epoch": 2.1825396825396823, "percentage": 31.18, "elapsed_time": "2:55:29", "remaining_time": "6:27:21"}
|
||||
{"current_steps": 1380, "total_steps": 4410, "loss": 0.2885, "lr": 3.47361850467772e-05, "epoch": 2.1904761904761907, "percentage": 31.29, "elapsed_time": "2:56:11", "remaining_time": "6:26:51"}
|
||||
{"current_steps": 1385, "total_steps": 4410, "loss": 0.2918, "lr": 3.468255421083546e-05, "epoch": 2.1984126984126986, "percentage": 31.41, "elapsed_time": "2:56:49", "remaining_time": "6:26:13"}
|
||||
{"current_steps": 1390, "total_steps": 4410, "loss": 0.294, "lr": 3.46286934009661e-05, "epoch": 2.2063492063492065, "percentage": 31.52, "elapsed_time": "2:57:30", "remaining_time": "6:25:38"}
|
||||
{"current_steps": 1395, "total_steps": 4410, "loss": 0.294, "lr": 3.457460346079495e-05, "epoch": 2.2142857142857144, "percentage": 31.63, "elapsed_time": "2:58:11", "remaining_time": "6:25:08"}
|
||||
{"current_steps": 1400, "total_steps": 4410, "loss": 0.293, "lr": 3.452028523753673e-05, "epoch": 2.2222222222222223, "percentage": 31.75, "elapsed_time": "2:58:53", "remaining_time": "6:24:37"}
|
||||
{"current_steps": 1405, "total_steps": 4410, "loss": 0.2872, "lr": 3.446573958198176e-05, "epoch": 2.2301587301587302, "percentage": 31.86, "elapsed_time": "2:59:33", "remaining_time": "6:24:01"}
|
||||
{"current_steps": 1410, "total_steps": 4410, "loss": 0.2929, "lr": 3.4410967348482666e-05, "epoch": 2.238095238095238, "percentage": 31.97, "elapsed_time": "3:00:15", "remaining_time": "6:23:31"}
|
||||
{"current_steps": 1415, "total_steps": 4410, "loss": 0.2862, "lr": 3.435596939494098e-05, "epoch": 2.246031746031746, "percentage": 32.09, "elapsed_time": "3:00:54", "remaining_time": "6:22:55"}
|
||||
{"current_steps": 1420, "total_steps": 4410, "loss": 0.2886, "lr": 3.430074658279369e-05, "epoch": 2.253968253968254, "percentage": 32.2, "elapsed_time": "3:01:35", "remaining_time": "6:22:21"}
|
||||
{"current_steps": 1425, "total_steps": 4410, "loss": 0.2975, "lr": 3.424529977699977e-05, "epoch": 2.261904761904762, "percentage": 32.31, "elapsed_time": "3:02:13", "remaining_time": "6:21:42"}
|
||||
{"current_steps": 1430, "total_steps": 4410, "loss": 0.2934, "lr": 3.418962984602661e-05, "epoch": 2.2698412698412698, "percentage": 32.43, "elapsed_time": "3:02:50", "remaining_time": "6:21:00"}
|
||||
{"current_steps": 1435, "total_steps": 4410, "loss": 0.2872, "lr": 3.413373766183646e-05, "epoch": 2.2777777777777777, "percentage": 32.54, "elapsed_time": "3:03:24", "remaining_time": "6:20:15"}
|
||||
{"current_steps": 1440, "total_steps": 4410, "loss": 0.2876, "lr": 3.40776240998727e-05, "epoch": 2.2857142857142856, "percentage": 32.65, "elapsed_time": "3:03:59", "remaining_time": "6:19:29"}
|
||||
{"current_steps": 1445, "total_steps": 4410, "loss": 0.2924, "lr": 3.4021290039046184e-05, "epoch": 2.2936507936507935, "percentage": 32.77, "elapsed_time": "3:04:36", "remaining_time": "6:18:48"}
|
||||
{"current_steps": 1450, "total_steps": 4410, "loss": 0.2855, "lr": 3.396473636172146e-05, "epoch": 2.3015873015873014, "percentage": 32.88, "elapsed_time": "3:05:12", "remaining_time": "6:18:03"}
|
||||
{"current_steps": 1455, "total_steps": 4410, "loss": 0.2875, "lr": 3.390796395370294e-05, "epoch": 2.3095238095238093, "percentage": 32.99, "elapsed_time": "3:05:48", "remaining_time": "6:17:22"}
|
||||
{"current_steps": 1460, "total_steps": 4410, "loss": 0.2914, "lr": 3.385097370422102e-05, "epoch": 2.317460317460317, "percentage": 33.11, "elapsed_time": "3:06:24", "remaining_time": "6:16:39"}
|
||||
{"current_steps": 1465, "total_steps": 4410, "loss": 0.288, "lr": 3.3793766505918185e-05, "epoch": 2.3253968253968256, "percentage": 33.22, "elapsed_time": "3:06:59", "remaining_time": "6:15:53"}
|
||||
{"current_steps": 1470, "total_steps": 4410, "loss": 0.2889, "lr": 3.3736343254834994e-05, "epoch": 2.3333333333333335, "percentage": 33.33, "elapsed_time": "3:07:33", "remaining_time": "6:15:07"}
|
||||
{"current_steps": 1475, "total_steps": 4410, "loss": 0.2932, "lr": 3.3678704850396045e-05, "epoch": 2.3412698412698414, "percentage": 33.45, "elapsed_time": "3:08:10", "remaining_time": "6:14:25"}
|
||||
{"current_steps": 1480, "total_steps": 4410, "loss": 0.2927, "lr": 3.362085219539592e-05, "epoch": 2.3492063492063493, "percentage": 33.56, "elapsed_time": "3:08:47", "remaining_time": "6:13:45"}
|
||||
{"current_steps": 1485, "total_steps": 4410, "loss": 0.2947, "lr": 3.3562786195985025e-05, "epoch": 2.357142857142857, "percentage": 33.67, "elapsed_time": "3:09:26", "remaining_time": "6:13:08"}
|
||||
{"current_steps": 1490, "total_steps": 4410, "loss": 0.291, "lr": 3.350450776165535e-05, "epoch": 2.365079365079365, "percentage": 33.79, "elapsed_time": "3:10:09", "remaining_time": "6:12:38"}
|
||||
{"current_steps": 1495, "total_steps": 4410, "loss": 0.2945, "lr": 3.344601780522634e-05, "epoch": 2.373015873015873, "percentage": 33.9, "elapsed_time": "3:10:45", "remaining_time": "6:11:57"}
|
||||
{"current_steps": 1500, "total_steps": 4410, "loss": 0.2937, "lr": 3.3387317242830466e-05, "epoch": 2.380952380952381, "percentage": 34.01, "elapsed_time": "3:11:22", "remaining_time": "6:11:16"}
|
||||
{"current_steps": 1505, "total_steps": 4410, "loss": 0.2935, "lr": 3.332840699389897e-05, "epoch": 2.388888888888889, "percentage": 34.13, "elapsed_time": "3:12:01", "remaining_time": "6:10:39"}
|
||||
{"current_steps": 1510, "total_steps": 4410, "loss": 0.289, "lr": 3.32692879811474e-05, "epoch": 2.3968253968253967, "percentage": 34.24, "elapsed_time": "3:12:41", "remaining_time": "6:10:04"}
|
||||
{"current_steps": 1515, "total_steps": 4410, "loss": 0.2934, "lr": 3.320996113056123e-05, "epoch": 2.4047619047619047, "percentage": 34.35, "elapsed_time": "3:13:21", "remaining_time": "6:09:28"}
|
||||
{"current_steps": 1520, "total_steps": 4410, "loss": 0.2904, "lr": 3.315042737138128e-05, "epoch": 2.4126984126984126, "percentage": 34.47, "elapsed_time": "3:13:58", "remaining_time": "6:08:49"}
|
||||
{"current_steps": 1525, "total_steps": 4410, "loss": 0.283, "lr": 3.309068763608919e-05, "epoch": 2.4206349206349205, "percentage": 34.58, "elapsed_time": "3:14:40", "remaining_time": "6:08:16"}
|
||||
{"current_steps": 1530, "total_steps": 4410, "loss": 0.2949, "lr": 3.303074286039285e-05, "epoch": 2.4285714285714284, "percentage": 34.69, "elapsed_time": "3:15:18", "remaining_time": "6:07:37"}
|
||||
{"current_steps": 1535, "total_steps": 4410, "loss": 0.2916, "lr": 3.2970593983211694e-05, "epoch": 2.4365079365079367, "percentage": 34.81, "elapsed_time": "3:15:57", "remaining_time": "6:07:01"}
|
||||
{"current_steps": 1540, "total_steps": 4410, "loss": 0.2936, "lr": 3.2910241946661993e-05, "epoch": 2.4444444444444446, "percentage": 34.92, "elapsed_time": "3:16:37", "remaining_time": "6:06:25"}
|
||||
{"current_steps": 1545, "total_steps": 4410, "loss": 0.2845, "lr": 3.2849687696042165e-05, "epoch": 2.4523809523809526, "percentage": 35.03, "elapsed_time": "3:17:13", "remaining_time": "6:05:44"}
|
||||
{"current_steps": 1550, "total_steps": 4410, "loss": 0.2828, "lr": 3.2788932179817886e-05, "epoch": 2.4603174603174605, "percentage": 35.15, "elapsed_time": "3:17:55", "remaining_time": "6:05:12"}
|
||||
{"current_steps": 1555, "total_steps": 4410, "loss": 0.2927, "lr": 3.2727976349607276e-05, "epoch": 2.4682539682539684, "percentage": 35.26, "elapsed_time": "3:18:34", "remaining_time": "6:04:35"}
|
||||
{"current_steps": 1560, "total_steps": 4410, "loss": 0.2912, "lr": 3.266682116016599e-05, "epoch": 2.4761904761904763, "percentage": 35.37, "elapsed_time": "3:19:16", "remaining_time": "6:04:02"}
|
||||
{"current_steps": 1565, "total_steps": 4410, "loss": 0.288, "lr": 3.260546756937227e-05, "epoch": 2.484126984126984, "percentage": 35.49, "elapsed_time": "3:19:52", "remaining_time": "6:03:21"}
|
||||
{"current_steps": 1570, "total_steps": 4410, "loss": 0.2954, "lr": 3.254391653821192e-05, "epoch": 2.492063492063492, "percentage": 35.6, "elapsed_time": "3:20:31", "remaining_time": "6:02:44"}
|
||||
{"current_steps": 1575, "total_steps": 4410, "loss": 0.2869, "lr": 3.248216903076328e-05, "epoch": 2.5, "percentage": 35.71, "elapsed_time": "3:21:08", "remaining_time": "6:02:03"}
|
||||
{"current_steps": 1580, "total_steps": 4410, "loss": 0.2792, "lr": 3.24202260141821e-05, "epoch": 2.507936507936508, "percentage": 35.83, "elapsed_time": "3:21:50", "remaining_time": "6:01:31"}
|
||||
{"current_steps": 1585, "total_steps": 4410, "loss": 0.2981, "lr": 3.235808845868641e-05, "epoch": 2.515873015873016, "percentage": 35.94, "elapsed_time": "3:22:25", "remaining_time": "6:00:46"}
|
||||
{"current_steps": 1590, "total_steps": 4410, "loss": 0.2905, "lr": 3.229575733754132e-05, "epoch": 2.5238095238095237, "percentage": 36.05, "elapsed_time": "3:23:03", "remaining_time": "6:00:08"}
|
||||
{"current_steps": 1595, "total_steps": 4410, "loss": 0.2994, "lr": 3.2233233627043765e-05, "epoch": 2.5317460317460316, "percentage": 36.17, "elapsed_time": "3:23:46", "remaining_time": "5:59:39"}
|
||||
{"current_steps": 1600, "total_steps": 4410, "loss": 0.2959, "lr": 3.217051830650722e-05, "epoch": 2.5396825396825395, "percentage": 36.28, "elapsed_time": "3:24:26", "remaining_time": "5:59:03"}
|
||||
{"current_steps": 1605, "total_steps": 4410, "loss": 0.2934, "lr": 3.210761235824639e-05, "epoch": 2.5476190476190474, "percentage": 36.39, "elapsed_time": "3:25:04", "remaining_time": "5:58:23"}
|
||||
{"current_steps": 1610, "total_steps": 4410, "loss": 0.2883, "lr": 3.204451676756175e-05, "epoch": 2.5555555555555554, "percentage": 36.51, "elapsed_time": "3:25:46", "remaining_time": "5:57:52"}
|
||||
{"current_steps": 1615, "total_steps": 4410, "loss": 0.2839, "lr": 3.198123252272419e-05, "epoch": 2.5634920634920633, "percentage": 36.62, "elapsed_time": "3:26:23", "remaining_time": "5:57:11"}
|
||||
{"current_steps": 1620, "total_steps": 4410, "loss": 0.2876, "lr": 3.1917760614959505e-05, "epoch": 2.571428571428571, "percentage": 36.73, "elapsed_time": "3:27:00", "remaining_time": "5:56:30"}
|
||||
{"current_steps": 1625, "total_steps": 4410, "loss": 0.2911, "lr": 3.1854102038432856e-05, "epoch": 2.5793650793650795, "percentage": 36.85, "elapsed_time": "3:27:41", "remaining_time": "5:55:56"}
|
||||
{"current_steps": 1630, "total_steps": 4410, "loss": 0.2923, "lr": 3.17902577902332e-05, "epoch": 2.5873015873015874, "percentage": 36.96, "elapsed_time": "3:28:21", "remaining_time": "5:55:21"}
|
||||
{"current_steps": 1635, "total_steps": 4410, "loss": 0.2899, "lr": 3.172622887035771e-05, "epoch": 2.5952380952380953, "percentage": 37.07, "elapsed_time": "3:28:56", "remaining_time": "5:54:37"}
|
||||
{"current_steps": 1640, "total_steps": 4410, "loss": 0.2897, "lr": 3.1662016281696073e-05, "epoch": 2.6031746031746033, "percentage": 37.19, "elapsed_time": "3:29:31", "remaining_time": "5:53:53"}
|
||||
{"current_steps": 1645, "total_steps": 4410, "loss": 0.29, "lr": 3.15976210300148e-05, "epoch": 2.611111111111111, "percentage": 37.3, "elapsed_time": "3:30:13", "remaining_time": "5:53:21"}
|
||||
{"current_steps": 1650, "total_steps": 4410, "loss": 0.2869, "lr": 3.153304412394143e-05, "epoch": 2.619047619047619, "percentage": 37.41, "elapsed_time": "3:30:48", "remaining_time": "5:52:38"}
|
||||
{"current_steps": 1655, "total_steps": 4410, "loss": 0.2964, "lr": 3.146828657494883e-05, "epoch": 2.626984126984127, "percentage": 37.53, "elapsed_time": "3:31:31", "remaining_time": "5:52:06"}
|
||||
{"current_steps": 1660, "total_steps": 4410, "loss": 0.2924, "lr": 3.140334939733924e-05, "epoch": 2.634920634920635, "percentage": 37.64, "elapsed_time": "3:32:07", "remaining_time": "5:51:25"}
|
||||
{"current_steps": 1665, "total_steps": 4410, "loss": 0.2843, "lr": 3.1338233608228455e-05, "epoch": 2.642857142857143, "percentage": 37.76, "elapsed_time": "3:32:34", "remaining_time": "5:50:27"}
|
||||
{"current_steps": 1670, "total_steps": 4410, "loss": 0.2792, "lr": 3.127294022752988e-05, "epoch": 2.6507936507936507, "percentage": 37.87, "elapsed_time": "3:33:11", "remaining_time": "5:49:46"}
|
||||
{"current_steps": 1675, "total_steps": 4410, "loss": 0.2886, "lr": 3.120747027793854e-05, "epoch": 2.6587301587301586, "percentage": 37.98, "elapsed_time": "3:33:48", "remaining_time": "5:49:07"}
|
||||
{"current_steps": 1680, "total_steps": 4410, "loss": 0.2901, "lr": 3.114182478491509e-05, "epoch": 2.6666666666666665, "percentage": 38.1, "elapsed_time": "3:34:25", "remaining_time": "5:48:26"}
|
||||
{"current_steps": 1685, "total_steps": 4410, "loss": 0.2867, "lr": 3.107600477666969e-05, "epoch": 2.674603174603175, "percentage": 38.21, "elapsed_time": "3:34:59", "remaining_time": "5:47:41"}
|
||||
{"current_steps": 1690, "total_steps": 4410, "loss": 0.288, "lr": 3.1010011284146004e-05, "epoch": 2.682539682539683, "percentage": 38.32, "elapsed_time": "3:35:38", "remaining_time": "5:47:04"}
|
||||
{"current_steps": 1695, "total_steps": 4410, "loss": 0.2867, "lr": 3.0943845341004944e-05, "epoch": 2.6904761904761907, "percentage": 38.44, "elapsed_time": "3:36:18", "remaining_time": "5:46:29"}
|
||||
{"current_steps": 1700, "total_steps": 4410, "loss": 0.29, "lr": 3.087750798360856e-05, "epoch": 2.6984126984126986, "percentage": 38.55, "elapsed_time": "3:36:58", "remaining_time": "5:45:53"}
|
||||
{"current_steps": 1705, "total_steps": 4410, "loss": 0.2831, "lr": 3.0811000251003774e-05, "epoch": 2.7063492063492065, "percentage": 38.66, "elapsed_time": "3:37:38", "remaining_time": "5:45:18"}
|
||||
{"current_steps": 1710, "total_steps": 4410, "loss": 0.2922, "lr": 3.074432318490608e-05, "epoch": 2.7142857142857144, "percentage": 38.78, "elapsed_time": "3:38:16", "remaining_time": "5:44:38"}
|
||||
{"current_steps": 1715, "total_steps": 4410, "loss": 0.2863, "lr": 3.067747782968328e-05, "epoch": 2.7222222222222223, "percentage": 38.89, "elapsed_time": "3:38:50", "remaining_time": "5:43:53"}
|
||||
{"current_steps": 1720, "total_steps": 4410, "loss": 0.2832, "lr": 3.0610465232339096e-05, "epoch": 2.7301587301587302, "percentage": 39.0, "elapsed_time": "3:39:31", "remaining_time": "5:43:20"}
|
||||
{"current_steps": 1725, "total_steps": 4410, "loss": 0.2866, "lr": 3.054328644249677e-05, "epoch": 2.738095238095238, "percentage": 39.12, "elapsed_time": "3:40:09", "remaining_time": "5:42:40"}
|
||||
{"current_steps": 1730, "total_steps": 4410, "loss": 0.2926, "lr": 3.047594251238265e-05, "epoch": 2.746031746031746, "percentage": 39.23, "elapsed_time": "3:40:48", "remaining_time": "5:42:03"}
|
||||
{"current_steps": 1735, "total_steps": 4410, "loss": 0.2844, "lr": 3.0408434496809643e-05, "epoch": 2.753968253968254, "percentage": 39.34, "elapsed_time": "3:41:29", "remaining_time": "5:41:29"}
|
||||
{"current_steps": 1740, "total_steps": 4410, "loss": 0.2855, "lr": 3.034076345316079e-05, "epoch": 2.761904761904762, "percentage": 39.46, "elapsed_time": "3:42:03", "remaining_time": "5:40:44"}
|
||||
{"current_steps": 1745, "total_steps": 4410, "loss": 0.2861, "lr": 3.0272930441372628e-05, "epoch": 2.7698412698412698, "percentage": 39.57, "elapsed_time": "3:42:39", "remaining_time": "5:40:02"}
|
||||
{"current_steps": 1750, "total_steps": 4410, "loss": 0.2843, "lr": 3.02049365239186e-05, "epoch": 2.7777777777777777, "percentage": 39.68, "elapsed_time": "3:43:22", "remaining_time": "5:39:31"}
|
||||
{"current_steps": 1755, "total_steps": 4410, "loss": 0.2811, "lr": 3.0136782765792455e-05, "epoch": 2.7857142857142856, "percentage": 39.8, "elapsed_time": "3:44:01", "remaining_time": "5:38:55"}
|
||||
{"current_steps": 1760, "total_steps": 4410, "loss": 0.2936, "lr": 3.0068470234491517e-05, "epoch": 2.7936507936507935, "percentage": 39.91, "elapsed_time": "3:44:41", "remaining_time": "5:38:18"}
|
||||
{"current_steps": 1765, "total_steps": 4410, "loss": 0.2844, "lr": 3.0000000000000004e-05, "epoch": 2.8015873015873014, "percentage": 40.02, "elapsed_time": "3:45:23", "remaining_time": "5:37:45"}
|
||||
{"current_steps": 1770, "total_steps": 4410, "loss": 0.2843, "lr": 2.993137313477223e-05, "epoch": 2.8095238095238093, "percentage": 40.14, "elapsed_time": "3:45:50", "remaining_time": "5:36:51"}
|
||||
{"current_steps": 1775, "total_steps": 4410, "loss": 0.2884, "lr": 2.9862590713715837e-05, "epoch": 2.817460317460317, "percentage": 40.25, "elapsed_time": "3:46:33", "remaining_time": "5:36:19"}
|
||||
{"current_steps": 1780, "total_steps": 4410, "loss": 0.2919, "lr": 2.9793653814174957e-05, "epoch": 2.825396825396825, "percentage": 40.36, "elapsed_time": "3:47:09", "remaining_time": "5:35:38"}
|
||||
{"current_steps": 1785, "total_steps": 4410, "loss": 0.2935, "lr": 2.9724563515913317e-05, "epoch": 2.8333333333333335, "percentage": 40.48, "elapsed_time": "3:47:51", "remaining_time": "5:35:05"}
|
||||
{"current_steps": 1790, "total_steps": 4410, "loss": 0.2899, "lr": 2.9655320901097348e-05, "epoch": 2.8412698412698414, "percentage": 40.59, "elapsed_time": "3:48:29", "remaining_time": "5:34:26"}
|
||||
{"current_steps": 1795, "total_steps": 4410, "loss": 0.2841, "lr": 2.9585927054279224e-05, "epoch": 2.8492063492063493, "percentage": 40.7, "elapsed_time": "3:49:06", "remaining_time": "5:33:45"}
|
||||
{"current_steps": 1800, "total_steps": 4410, "loss": 0.2861, "lr": 2.951638306237988e-05, "epoch": 2.857142857142857, "percentage": 40.82, "elapsed_time": "3:49:47", "remaining_time": "5:33:12"}
|
||||
{"current_steps": 1805, "total_steps": 4410, "loss": 0.2896, "lr": 2.9446690014671976e-05, "epoch": 2.865079365079365, "percentage": 40.93, "elapsed_time": "3:50:26", "remaining_time": "5:32:35"}
|
||||
{"current_steps": 1810, "total_steps": 4410, "loss": 0.2944, "lr": 2.937684900276285e-05, "epoch": 2.873015873015873, "percentage": 41.04, "elapsed_time": "3:51:02", "remaining_time": "5:31:53"}
|
||||
{"current_steps": 1815, "total_steps": 4410, "loss": 0.2829, "lr": 2.9306861120577416e-05, "epoch": 2.880952380952381, "percentage": 41.16, "elapsed_time": "3:51:42", "remaining_time": "5:31:16"}
|
||||
{"current_steps": 1820, "total_steps": 4410, "loss": 0.283, "lr": 2.923672746434103e-05, "epoch": 2.888888888888889, "percentage": 41.27, "elapsed_time": "3:52:19", "remaining_time": "5:30:37"}
|
||||
{"current_steps": 1825, "total_steps": 4410, "loss": 0.2863, "lr": 2.9166449132562303e-05, "epoch": 2.8968253968253967, "percentage": 41.38, "elapsed_time": "3:52:51", "remaining_time": "5:29:49"}
|
||||
{"current_steps": 1830, "total_steps": 4410, "loss": 0.2837, "lr": 2.9096027226015927e-05, "epoch": 2.9047619047619047, "percentage": 41.5, "elapsed_time": "3:53:29", "remaining_time": "5:29:11"}
|
||||
{"current_steps": 1835, "total_steps": 4410, "loss": 0.2887, "lr": 2.9025462847725405e-05, "epoch": 2.9126984126984126, "percentage": 41.61, "elapsed_time": "3:54:04", "remaining_time": "5:28:28"}
|
||||
{"current_steps": 1840, "total_steps": 4410, "loss": 0.2992, "lr": 2.8954757102945798e-05, "epoch": 2.9206349206349205, "percentage": 41.72, "elapsed_time": "3:54:37", "remaining_time": "5:27:42"}
|
||||
{"current_steps": 1845, "total_steps": 4410, "loss": 0.2891, "lr": 2.888391109914638e-05, "epoch": 2.928571428571429, "percentage": 41.84, "elapsed_time": "3:55:17", "remaining_time": "5:27:06"}
|
||||
{"current_steps": 1850, "total_steps": 4410, "loss": 0.2984, "lr": 2.8812925945993333e-05, "epoch": 2.9365079365079367, "percentage": 41.95, "elapsed_time": "3:56:00", "remaining_time": "5:26:35"}
|
||||
{"current_steps": 1855, "total_steps": 4410, "loss": 0.279, "lr": 2.8741802755332332e-05, "epoch": 2.9444444444444446, "percentage": 42.06, "elapsed_time": "3:56:34", "remaining_time": "5:25:51"}
|
||||
{"current_steps": 1860, "total_steps": 4410, "loss": 0.2913, "lr": 2.8670542641171155e-05, "epoch": 2.9523809523809526, "percentage": 42.18, "elapsed_time": "3:57:16", "remaining_time": "5:25:17"}
|
||||
{"current_steps": 1865, "total_steps": 4410, "loss": 0.2835, "lr": 2.859914671966221e-05, "epoch": 2.9603174603174605, "percentage": 42.29, "elapsed_time": "3:57:55", "remaining_time": "5:24:40"}
|
||||
{"current_steps": 1870, "total_steps": 4410, "loss": 0.2834, "lr": 2.8527616109085082e-05, "epoch": 2.9682539682539684, "percentage": 42.4, "elapsed_time": "3:58:32", "remaining_time": "5:24:01"}
|
||||
{"current_steps": 1875, "total_steps": 4410, "loss": 0.282, "lr": 2.8455951929828977e-05, "epoch": 2.9761904761904763, "percentage": 42.52, "elapsed_time": "3:59:05", "remaining_time": "5:23:15"}
|
||||
{"current_steps": 1880, "total_steps": 4410, "loss": 0.2849, "lr": 2.8384155304375223e-05, "epoch": 2.984126984126984, "percentage": 42.63, "elapsed_time": "3:59:39", "remaining_time": "5:22:31"}
|
||||
{"current_steps": 1885, "total_steps": 4410, "loss": 0.2818, "lr": 2.8312227357279646e-05, "epoch": 2.992063492063492, "percentage": 42.74, "elapsed_time": "4:00:18", "remaining_time": "5:21:54"}
|
||||
{"current_steps": 1890, "total_steps": 4410, "loss": 0.2905, "lr": 2.8240169215154977e-05, "epoch": 3.0, "percentage": 42.86, "elapsed_time": "4:00:54", "remaining_time": "5:21:13"}
|
||||
{"current_steps": 1895, "total_steps": 4410, "loss": 0.2737, "lr": 2.8167982006653196e-05, "epoch": 3.007936507936508, "percentage": 42.97, "elapsed_time": "4:01:36", "remaining_time": "5:20:39"}
|
||||
{"current_steps": 1900, "total_steps": 4410, "loss": 0.2895, "lr": 2.8095666862447876e-05, "epoch": 3.015873015873016, "percentage": 43.08, "elapsed_time": "4:02:13", "remaining_time": "5:19:59"}
|
||||
{"current_steps": 1905, "total_steps": 4410, "loss": 0.2882, "lr": 2.8023224915216442e-05, "epoch": 3.0238095238095237, "percentage": 43.2, "elapsed_time": "4:02:52", "remaining_time": "5:19:22"}
|
||||
{"current_steps": 1910, "total_steps": 4410, "loss": 0.2865, "lr": 2.795065729962244e-05, "epoch": 3.0317460317460316, "percentage": 43.31, "elapsed_time": "4:03:30", "remaining_time": "5:18:43"}
|
||||
{"current_steps": 1915, "total_steps": 4410, "loss": 0.2809, "lr": 2.7877965152297785e-05, "epoch": 3.0396825396825395, "percentage": 43.42, "elapsed_time": "4:04:01", "remaining_time": "5:17:55"}
|
||||
{"current_steps": 1920, "total_steps": 4410, "loss": 0.2856, "lr": 2.780514961182492e-05, "epoch": 3.0476190476190474, "percentage": 43.54, "elapsed_time": "4:04:41", "remaining_time": "5:17:20"}
|
||||
{"current_steps": 1925, "total_steps": 4410, "loss": 0.2839, "lr": 2.773221181871903e-05, "epoch": 3.0555555555555554, "percentage": 43.65, "elapsed_time": "4:05:22", "remaining_time": "5:16:45"}
|
||||
{"current_steps": 1930, "total_steps": 4410, "loss": 0.287, "lr": 2.765915291541013e-05, "epoch": 3.0634920634920633, "percentage": 43.76, "elapsed_time": "4:06:02", "remaining_time": "5:16:09"}
|
||||
{"current_steps": 1935, "total_steps": 4410, "loss": 0.2862, "lr": 2.7585974046225206e-05, "epoch": 3.0714285714285716, "percentage": 43.88, "elapsed_time": "4:06:44", "remaining_time": "5:15:36"}
|
||||
{"current_steps": 1940, "total_steps": 4410, "loss": 0.2807, "lr": 2.751267635737027e-05, "epoch": 3.0793650793650795, "percentage": 43.99, "elapsed_time": "4:07:15", "remaining_time": "5:14:48"}
|
||||
{"current_steps": 1945, "total_steps": 4410, "loss": 0.2811, "lr": 2.7439260996912423e-05, "epoch": 3.0873015873015874, "percentage": 44.1, "elapsed_time": "4:07:53", "remaining_time": "5:14:10"}
|
||||
{"current_steps": 1950, "total_steps": 4410, "loss": 0.2865, "lr": 2.7365729114761862e-05, "epoch": 3.0952380952380953, "percentage": 44.22, "elapsed_time": "4:08:33", "remaining_time": "5:13:33"}
|
||||
{"current_steps": 1955, "total_steps": 4410, "loss": 0.2779, "lr": 2.729208186265386e-05, "epoch": 3.1031746031746033, "percentage": 44.33, "elapsed_time": "4:09:13", "remaining_time": "5:12:57"}
|
||||
{"current_steps": 1960, "total_steps": 4410, "loss": 0.292, "lr": 2.721832039413077e-05, "epoch": 3.111111111111111, "percentage": 44.44, "elapsed_time": "4:09:47", "remaining_time": "5:12:14"}
|
||||
{"current_steps": 1965, "total_steps": 4410, "loss": 0.2911, "lr": 2.7144445864523887e-05, "epoch": 3.119047619047619, "percentage": 44.56, "elapsed_time": "4:10:19", "remaining_time": "5:11:28"}
|
||||
{"current_steps": 1970, "total_steps": 4410, "loss": 0.2858, "lr": 2.7070459430935407e-05, "epoch": 3.126984126984127, "percentage": 44.67, "elapsed_time": "4:10:55", "remaining_time": "5:10:47"}
|
||||
{"current_steps": 1975, "total_steps": 4410, "loss": 0.2894, "lr": 2.69963622522203e-05, "epoch": 3.134920634920635, "percentage": 44.78, "elapsed_time": "4:11:34", "remaining_time": "5:10:09"}
|
||||
{"current_steps": 1980, "total_steps": 4410, "loss": 0.2872, "lr": 2.6922155488968117e-05, "epoch": 3.142857142857143, "percentage": 44.9, "elapsed_time": "4:12:17", "remaining_time": "5:09:37"}
|
||||
{"current_steps": 1985, "total_steps": 4410, "loss": 0.2795, "lr": 2.684784030348486e-05, "epoch": 3.1507936507936507, "percentage": 45.01, "elapsed_time": "4:12:58", "remaining_time": "5:09:02"}
|
||||
{"current_steps": 1990, "total_steps": 4410, "loss": 0.2825, "lr": 2.6773417859774755e-05, "epoch": 3.1587301587301586, "percentage": 45.12, "elapsed_time": "4:13:33", "remaining_time": "5:08:20"}
|
||||
{"current_steps": 1995, "total_steps": 4410, "loss": 0.2718, "lr": 2.669888932352201e-05, "epoch": 3.1666666666666665, "percentage": 45.24, "elapsed_time": "4:14:04", "remaining_time": "5:07:33"}
|
||||
{"current_steps": 2000, "total_steps": 4410, "loss": 0.2827, "lr": 2.662425586207259e-05, "epoch": 3.1746031746031744, "percentage": 45.35, "elapsed_time": "4:14:39", "remaining_time": "5:06:51"}
|
||||
{"current_steps": 2005, "total_steps": 4410, "loss": 0.2773, "lr": 2.6549518644415876e-05, "epoch": 3.1825396825396823, "percentage": 45.46, "elapsed_time": "4:15:18", "remaining_time": "5:06:14"}
|
||||
{"current_steps": 2010, "total_steps": 4410, "loss": 0.2847, "lr": 2.6474678841166426e-05, "epoch": 3.1904761904761907, "percentage": 45.58, "elapsed_time": "4:15:55", "remaining_time": "5:05:35"}
|
||||
{"current_steps": 2015, "total_steps": 4410, "loss": 0.2863, "lr": 2.639973762454558e-05, "epoch": 3.1984126984126986, "percentage": 45.69, "elapsed_time": "4:16:31", "remaining_time": "5:04:53"}
|
||||
{"current_steps": 2020, "total_steps": 4410, "loss": 0.2794, "lr": 2.6324696168363134e-05, "epoch": 3.2063492063492065, "percentage": 45.8, "elapsed_time": "4:16:59", "remaining_time": "5:04:03"}
|
||||
{"current_steps": 2025, "total_steps": 4410, "loss": 0.2862, "lr": 2.624955564799894e-05, "epoch": 3.2142857142857144, "percentage": 45.92, "elapsed_time": "4:17:38", "remaining_time": "5:03:26"}
|
||||
{"current_steps": 2030, "total_steps": 4410, "loss": 0.2752, "lr": 2.617431724038451e-05, "epoch": 3.2222222222222223, "percentage": 46.03, "elapsed_time": "4:18:18", "remaining_time": "5:02:50"}
|
||||
{"current_steps": 2035, "total_steps": 4410, "loss": 0.2829, "lr": 2.609898212398455e-05, "epoch": 3.2301587301587302, "percentage": 46.15, "elapsed_time": "4:18:52", "remaining_time": "5:02:07"}
|
||||
{"current_steps": 2040, "total_steps": 4410, "loss": 0.283, "lr": 2.6023551478778535e-05, "epoch": 3.238095238095238, "percentage": 46.26, "elapsed_time": "4:19:30", "remaining_time": "5:01:29"}
|
||||
{"current_steps": 2045, "total_steps": 4410, "loss": 0.2724, "lr": 2.5948026486242225e-05, "epoch": 3.246031746031746, "percentage": 46.37, "elapsed_time": "4:20:09", "remaining_time": "5:00:52"}
|
||||
{"current_steps": 2050, "total_steps": 4410, "loss": 0.2892, "lr": 2.5872408329329136e-05, "epoch": 3.253968253968254, "percentage": 46.49, "elapsed_time": "4:20:43", "remaining_time": "5:00:08"}
|
||||
{"current_steps": 2055, "total_steps": 4410, "loss": 0.2846, "lr": 2.5796698192452016e-05, "epoch": 3.261904761904762, "percentage": 46.6, "elapsed_time": "4:21:24", "remaining_time": "4:59:34"}
|
||||
{"current_steps": 2060, "total_steps": 4410, "loss": 0.2927, "lr": 2.572089726146432e-05, "epoch": 3.2698412698412698, "percentage": 46.71, "elapsed_time": "4:22:07", "remaining_time": "4:59:02"}
|
||||
{"current_steps": 2065, "total_steps": 4410, "loss": 0.2856, "lr": 2.564500672364162e-05, "epoch": 3.2777777777777777, "percentage": 46.83, "elapsed_time": "4:22:39", "remaining_time": "4:58:16"}
|
||||
{"current_steps": 2070, "total_steps": 4410, "loss": 0.2833, "lr": 2.556902776766298e-05, "epoch": 3.2857142857142856, "percentage": 46.94, "elapsed_time": "4:23:20", "remaining_time": "4:57:41"}
|
||||
{"current_steps": 2075, "total_steps": 4410, "loss": 0.2751, "lr": 2.5492961583592397e-05, "epoch": 3.2936507936507935, "percentage": 47.05, "elapsed_time": "4:23:59", "remaining_time": "4:57:04"}
|
||||
{"current_steps": 2080, "total_steps": 4410, "loss": 0.2787, "lr": 2.5416809362860107e-05, "epoch": 3.3015873015873014, "percentage": 47.17, "elapsed_time": "4:24:40", "remaining_time": "4:56:29"}
|
||||
{"current_steps": 2085, "total_steps": 4410, "loss": 0.28, "lr": 2.5340572298243946e-05, "epoch": 3.3095238095238093, "percentage": 47.28, "elapsed_time": "4:25:26", "remaining_time": "4:55:59"}
|
||||
{"current_steps": 2090, "total_steps": 4410, "loss": 0.2852, "lr": 2.5264251583850677e-05, "epoch": 3.317460317460317, "percentage": 47.39, "elapsed_time": "4:25:58", "remaining_time": "4:55:14"}
|
||||
{"current_steps": 2095, "total_steps": 4410, "loss": 0.275, "lr": 2.518784841509726e-05, "epoch": 3.3253968253968256, "percentage": 47.51, "elapsed_time": "4:26:37", "remaining_time": "4:54:36"}
|
||||
{"current_steps": 2100, "total_steps": 4410, "loss": 0.2831, "lr": 2.511136398869216e-05, "epoch": 3.3333333333333335, "percentage": 47.62, "elapsed_time": "4:27:16", "remaining_time": "4:53:59"}
|
||||
{"current_steps": 2105, "total_steps": 4410, "loss": 0.2885, "lr": 2.503479950261658e-05, "epoch": 3.3412698412698414, "percentage": 47.73, "elapsed_time": "4:27:59", "remaining_time": "4:53:27"}
|
||||
{"current_steps": 2110, "total_steps": 4410, "loss": 0.2814, "lr": 2.4958156156105693e-05, "epoch": 3.3492063492063493, "percentage": 47.85, "elapsed_time": "4:28:42", "remaining_time": "4:52:54"}
|
||||
{"current_steps": 2115, "total_steps": 4410, "loss": 0.283, "lr": 2.4881435149629892e-05, "epoch": 3.357142857142857, "percentage": 47.96, "elapsed_time": "4:29:18", "remaining_time": "4:52:14"}
|
||||
{"current_steps": 2120, "total_steps": 4410, "loss": 0.2813, "lr": 2.4804637684875937e-05, "epoch": 3.365079365079365, "percentage": 48.07, "elapsed_time": "4:29:57", "remaining_time": "4:51:36"}
|
||||
{"current_steps": 2125, "total_steps": 4410, "loss": 0.289, "lr": 2.4727764964728177e-05, "epoch": 3.373015873015873, "percentage": 48.19, "elapsed_time": "4:30:38", "remaining_time": "4:51:01"}
|
||||
{"current_steps": 2130, "total_steps": 4410, "loss": 0.2859, "lr": 2.4650818193249693e-05, "epoch": 3.380952380952381, "percentage": 48.3, "elapsed_time": "4:31:13", "remaining_time": "4:50:18"}
|
||||
{"current_steps": 2135, "total_steps": 4410, "loss": 0.2894, "lr": 2.4573798575663425e-05, "epoch": 3.388888888888889, "percentage": 48.41, "elapsed_time": "4:31:52", "remaining_time": "4:49:42"}
|
||||
{"current_steps": 2140, "total_steps": 4410, "loss": 0.2871, "lr": 2.4496707318333323e-05, "epoch": 3.3968253968253967, "percentage": 48.53, "elapsed_time": "4:32:31", "remaining_time": "4:49:04"}
|
||||
{"current_steps": 2145, "total_steps": 4410, "loss": 0.2783, "lr": 2.441954562874541e-05, "epoch": 3.4047619047619047, "percentage": 48.64, "elapsed_time": "4:33:01", "remaining_time": "4:48:18"}
|
||||
{"current_steps": 2150, "total_steps": 4410, "loss": 0.2828, "lr": 2.434231471548893e-05, "epoch": 3.4126984126984126, "percentage": 48.75, "elapsed_time": "4:33:31", "remaining_time": "4:47:30"}
|
||||
{"current_steps": 2155, "total_steps": 4410, "loss": 0.29, "lr": 2.4265015788237348e-05, "epoch": 3.4206349206349205, "percentage": 48.87, "elapsed_time": "4:34:10", "remaining_time": "4:46:54"}
|
||||
{"current_steps": 2160, "total_steps": 4410, "loss": 0.29, "lr": 2.4187650057729465e-05, "epoch": 3.4285714285714284, "percentage": 48.98, "elapsed_time": "4:34:50", "remaining_time": "4:46:17"}
|
||||
{"current_steps": 2165, "total_steps": 4410, "loss": 0.279, "lr": 2.4110218735750403e-05, "epoch": 3.4365079365079367, "percentage": 49.09, "elapsed_time": "4:35:22", "remaining_time": "4:45:33"}
|
||||
{"current_steps": 2170, "total_steps": 4410, "loss": 0.2742, "lr": 2.4032723035112667e-05, "epoch": 3.4444444444444446, "percentage": 49.21, "elapsed_time": "4:36:01", "remaining_time": "4:44:56"}
|
||||
{"current_steps": 2175, "total_steps": 4410, "loss": 0.2799, "lr": 2.3955164169637124e-05, "epoch": 3.4523809523809526, "percentage": 49.32, "elapsed_time": "4:36:37", "remaining_time": "4:44:15"}
|
||||
{"current_steps": 2180, "total_steps": 4410, "loss": 0.2821, "lr": 2.387754335413398e-05, "epoch": 3.4603174603174605, "percentage": 49.43, "elapsed_time": "4:37:11", "remaining_time": "4:43:33"}
|
||||
{"current_steps": 2185, "total_steps": 4410, "loss": 0.2863, "lr": 2.3799861804383807e-05, "epoch": 3.4682539682539684, "percentage": 49.55, "elapsed_time": "4:37:50", "remaining_time": "4:42:55"}
|
||||
{"current_steps": 2190, "total_steps": 4410, "loss": 0.2824, "lr": 2.3722120737118414e-05, "epoch": 3.4761904761904763, "percentage": 49.66, "elapsed_time": "4:38:25", "remaining_time": "4:42:14"}
|
||||
{"current_steps": 2195, "total_steps": 4410, "loss": 0.2821, "lr": 2.3644321370001868e-05, "epoch": 3.484126984126984, "percentage": 49.77, "elapsed_time": "4:39:03", "remaining_time": "4:41:36"}
|
||||
{"current_steps": 2200, "total_steps": 4410, "loss": 0.2806, "lr": 2.3566464921611393e-05, "epoch": 3.492063492063492, "percentage": 49.89, "elapsed_time": "4:39:40", "remaining_time": "4:40:56"}
|
||||
{"current_steps": 2205, "total_steps": 4410, "loss": 0.2868, "lr": 2.348855261141827e-05, "epoch": 3.5, "percentage": 50.0, "elapsed_time": "4:40:11", "remaining_time": "4:40:11"}
|
||||
{"current_steps": 2210, "total_steps": 4410, "loss": 0.2858, "lr": 2.341058565976874e-05, "epoch": 3.507936507936508, "percentage": 50.11, "elapsed_time": "4:40:55", "remaining_time": "4:39:39"}
|
||||
{"current_steps": 2215, "total_steps": 4410, "loss": 0.2856, "lr": 2.3332565287864918e-05, "epoch": 3.515873015873016, "percentage": 50.23, "elapsed_time": "4:41:33", "remaining_time": "4:39:00"}
|
||||
{"current_steps": 2220, "total_steps": 4410, "loss": 0.2801, "lr": 2.325449271774563e-05, "epoch": 3.5238095238095237, "percentage": 50.34, "elapsed_time": "4:42:13", "remaining_time": "4:38:25"}
|
||||
{"current_steps": 2225, "total_steps": 4410, "loss": 0.2841, "lr": 2.3176369172267286e-05, "epoch": 3.5317460317460316, "percentage": 50.45, "elapsed_time": "4:42:51", "remaining_time": "4:37:46"}
|
||||
{"current_steps": 2230, "total_steps": 4410, "loss": 0.2841, "lr": 2.3098195875084732e-05, "epoch": 3.5396825396825395, "percentage": 50.57, "elapsed_time": "4:43:27", "remaining_time": "4:37:06"}
|
||||
{"current_steps": 2235, "total_steps": 4410, "loss": 0.2808, "lr": 2.301997405063208e-05, "epoch": 3.5476190476190474, "percentage": 50.68, "elapsed_time": "4:43:59", "remaining_time": "4:36:22"}
|
||||
{"current_steps": 2240, "total_steps": 4410, "loss": 0.2826, "lr": 2.2941704924103535e-05, "epoch": 3.5555555555555554, "percentage": 50.79, "elapsed_time": "4:44:44", "remaining_time": "4:35:50"}
|
||||
{"current_steps": 2245, "total_steps": 4410, "loss": 0.2838, "lr": 2.2863389721434165e-05, "epoch": 3.5634920634920633, "percentage": 50.91, "elapsed_time": "4:45:25", "remaining_time": "4:35:15"}
|
||||
{"current_steps": 2250, "total_steps": 4410, "loss": 0.2857, "lr": 2.2785029669280775e-05, "epoch": 3.571428571428571, "percentage": 51.02, "elapsed_time": "4:46:05", "remaining_time": "4:34:38"}
|
||||
{"current_steps": 2255, "total_steps": 4410, "loss": 0.2779, "lr": 2.2706625995002626e-05, "epoch": 3.5793650793650795, "percentage": 51.13, "elapsed_time": "4:46:42", "remaining_time": "4:33:59"}
|
||||
{"current_steps": 2260, "total_steps": 4410, "loss": 0.2869, "lr": 2.262817992664224e-05, "epoch": 3.5873015873015874, "percentage": 51.25, "elapsed_time": "4:47:20", "remaining_time": "4:33:21"}
|
||||
{"current_steps": 2265, "total_steps": 4410, "loss": 0.2819, "lr": 2.2549692692906158e-05, "epoch": 3.5952380952380953, "percentage": 51.36, "elapsed_time": "4:47:59", "remaining_time": "4:32:44"}
|
||||
{"current_steps": 2270, "total_steps": 4410, "loss": 0.2867, "lr": 2.24711655231457e-05, "epoch": 3.6031746031746033, "percentage": 51.47, "elapsed_time": "4:48:41", "remaining_time": "4:32:09"}
|
||||
{"current_steps": 2275, "total_steps": 4410, "loss": 0.28, "lr": 2.2392599647337724e-05, "epoch": 3.611111111111111, "percentage": 51.59, "elapsed_time": "4:49:12", "remaining_time": "4:31:24"}
|
||||
{"current_steps": 2280, "total_steps": 4410, "loss": 0.2818, "lr": 2.23139962960653e-05, "epoch": 3.619047619047619, "percentage": 51.7, "elapsed_time": "4:49:52", "remaining_time": "4:30:47"}
|
||||
{"current_steps": 2285, "total_steps": 4410, "loss": 0.2747, "lr": 2.2235356700498528e-05, "epoch": 3.626984126984127, "percentage": 51.81, "elapsed_time": "4:50:31", "remaining_time": "4:30:10"}
|
||||
{"current_steps": 2290, "total_steps": 4410, "loss": 0.2811, "lr": 2.2156682092375175e-05, "epoch": 3.634920634920635, "percentage": 51.93, "elapsed_time": "4:51:09", "remaining_time": "4:29:32"}
|
||||
{"current_steps": 2295, "total_steps": 4410, "loss": 0.2827, "lr": 2.2077973703981423e-05, "epoch": 3.642857142857143, "percentage": 52.04, "elapsed_time": "4:51:44", "remaining_time": "4:28:51"}
|
||||
{"current_steps": 2300, "total_steps": 4410, "loss": 0.2663, "lr": 2.1999232768132552e-05, "epoch": 3.6507936507936507, "percentage": 52.15, "elapsed_time": "4:52:16", "remaining_time": "4:28:07"}
|
||||
{"current_steps": 2305, "total_steps": 4410, "loss": 0.2861, "lr": 2.1920460518153637e-05, "epoch": 3.6587301587301586, "percentage": 52.27, "elapsed_time": "4:52:55", "remaining_time": "4:27:30"}
|
||||
{"current_steps": 2310, "total_steps": 4410, "loss": 0.2905, "lr": 2.1841658187860232e-05, "epoch": 3.6666666666666665, "percentage": 52.38, "elapsed_time": "4:53:34", "remaining_time": "4:26:53"}
|
||||
{"current_steps": 2315, "total_steps": 4410, "loss": 0.2809, "lr": 2.176282701153904e-05, "epoch": 3.674603174603175, "percentage": 52.49, "elapsed_time": "4:54:16", "remaining_time": "4:26:18"}
|
||||
{"current_steps": 2320, "total_steps": 4410, "loss": 0.288, "lr": 2.1683968223928572e-05, "epoch": 3.682539682539683, "percentage": 52.61, "elapsed_time": "4:54:58", "remaining_time": "4:25:44"}
|
||||
{"current_steps": 2325, "total_steps": 4410, "loss": 0.2875, "lr": 2.1605083060199835e-05, "epoch": 3.6904761904761907, "percentage": 52.72, "elapsed_time": "4:55:40", "remaining_time": "4:25:08"}
|
||||
{"current_steps": 2330, "total_steps": 4410, "loss": 0.2824, "lr": 2.152617275593694e-05, "epoch": 3.6984126984126986, "percentage": 52.83, "elapsed_time": "4:56:16", "remaining_time": "4:24:29"}
|
||||
{"current_steps": 2335, "total_steps": 4410, "loss": 0.2734, "lr": 2.144723854711781e-05, "epoch": 3.7063492063492065, "percentage": 52.95, "elapsed_time": "4:56:51", "remaining_time": "4:23:48"}
|
||||
{"current_steps": 2340, "total_steps": 4410, "loss": 0.2789, "lr": 2.1368281670094766e-05, "epoch": 3.7142857142857144, "percentage": 53.06, "elapsed_time": "4:57:29", "remaining_time": "4:23:10"}
|
||||
{"current_steps": 2345, "total_steps": 4410, "loss": 0.276, "lr": 2.1289303361575175e-05, "epoch": 3.7222222222222223, "percentage": 53.17, "elapsed_time": "4:58:08", "remaining_time": "4:22:32"}
|
||||
{"current_steps": 2350, "total_steps": 4410, "loss": 0.2844, "lr": 2.121030485860211e-05, "epoch": 3.7301587301587302, "percentage": 53.29, "elapsed_time": "4:58:50", "remaining_time": "4:21:57"}
|
||||
{"current_steps": 2355, "total_steps": 4410, "loss": 0.2827, "lr": 2.113128739853493e-05, "epoch": 3.738095238095238, "percentage": 53.4, "elapsed_time": "4:59:28", "remaining_time": "4:21:19"}
|
||||
{"current_steps": 2360, "total_steps": 4410, "loss": 0.2828, "lr": 2.1052252219029944e-05, "epoch": 3.746031746031746, "percentage": 53.51, "elapsed_time": "5:00:01", "remaining_time": "4:20:36"}
|
||||
{"current_steps": 2365, "total_steps": 4410, "loss": 0.2854, "lr": 2.0973200558020967e-05, "epoch": 3.753968253968254, "percentage": 53.63, "elapsed_time": "5:00:35", "remaining_time": "4:19:54"}
|
||||
{"current_steps": 2370, "total_steps": 4410, "loss": 0.286, "lr": 2.0894133653700005e-05, "epoch": 3.761904761904762, "percentage": 53.74, "elapsed_time": "5:01:13", "remaining_time": "4:19:16"}
|
||||
{"current_steps": 2375, "total_steps": 4410, "loss": 0.2877, "lr": 2.0815052744497795e-05, "epoch": 3.7698412698412698, "percentage": 53.85, "elapsed_time": "5:01:54", "remaining_time": "4:18:40"}
|
||||
{"current_steps": 2380, "total_steps": 4410, "loss": 0.2896, "lr": 2.0735959069064434e-05, "epoch": 3.7777777777777777, "percentage": 53.97, "elapsed_time": "5:02:36", "remaining_time": "4:18:06"}
|
||||
{"current_steps": 2385, "total_steps": 4410, "loss": 0.2804, "lr": 2.065685386624999e-05, "epoch": 3.7857142857142856, "percentage": 54.08, "elapsed_time": "5:03:15", "remaining_time": "4:17:29"}
|
||||
{"current_steps": 2390, "total_steps": 4410, "loss": 0.2852, "lr": 2.0577738375085076e-05, "epoch": 3.7936507936507935, "percentage": 54.2, "elapsed_time": "5:03:51", "remaining_time": "4:16:48"}
|
||||
{"current_steps": 2395, "total_steps": 4410, "loss": 0.2815, "lr": 2.0498613834761462e-05, "epoch": 3.8015873015873014, "percentage": 54.31, "elapsed_time": "5:04:33", "remaining_time": "4:16:13"}
|
||||
{"current_steps": 2400, "total_steps": 4410, "loss": 0.2742, "lr": 2.041948148461264e-05, "epoch": 3.8095238095238093, "percentage": 54.42, "elapsed_time": "5:05:12", "remaining_time": "4:15:36"}
|
||||
{"current_steps": 2405, "total_steps": 4410, "loss": 0.2811, "lr": 2.0340342564094436e-05, "epoch": 3.817460317460317, "percentage": 54.54, "elapsed_time": "5:05:48", "remaining_time": "4:14:57"}
|
||||
{"current_steps": 2410, "total_steps": 4410, "loss": 0.2791, "lr": 2.0261198312765597e-05, "epoch": 3.825396825396825, "percentage": 54.65, "elapsed_time": "5:06:31", "remaining_time": "4:14:22"}
|
||||
{"current_steps": 2415, "total_steps": 4410, "loss": 0.2798, "lr": 2.0182049970268355e-05, "epoch": 3.8333333333333335, "percentage": 54.76, "elapsed_time": "5:07:04", "remaining_time": "4:13:39"}
|
||||
{"current_steps": 2420, "total_steps": 4410, "loss": 0.2839, "lr": 2.010289877630902e-05, "epoch": 3.8412698412698414, "percentage": 54.88, "elapsed_time": "5:07:47", "remaining_time": "4:13:06"}
|
||||
{"current_steps": 2425, "total_steps": 4410, "loss": 0.2883, "lr": 2.002374597063858e-05, "epoch": 3.8492063492063493, "percentage": 54.99, "elapsed_time": "5:08:30", "remaining_time": "4:12:31"}
|
||||
{"current_steps": 2430, "total_steps": 4410, "loss": 0.2822, "lr": 1.9944592793033255e-05, "epoch": 3.857142857142857, "percentage": 55.1, "elapsed_time": "5:09:10", "remaining_time": "4:11:55"}
|
||||
{"current_steps": 2435, "total_steps": 4410, "loss": 0.2884, "lr": 1.9865440483275086e-05, "epoch": 3.865079365079365, "percentage": 55.22, "elapsed_time": "5:09:56", "remaining_time": "4:11:23"}
|
||||
{"current_steps": 2440, "total_steps": 4410, "loss": 0.2752, "lr": 1.978629028113254e-05, "epoch": 3.873015873015873, "percentage": 55.33, "elapsed_time": "5:10:35", "remaining_time": "4:10:45"}
|
||||
{"current_steps": 2445, "total_steps": 4410, "loss": 0.2823, "lr": 1.9707143426341058e-05, "epoch": 3.880952380952381, "percentage": 55.44, "elapsed_time": "5:11:17", "remaining_time": "4:10:10"}
|
||||
{"current_steps": 2450, "total_steps": 4410, "loss": 0.2869, "lr": 1.962800115858364e-05, "epoch": 3.888888888888889, "percentage": 55.56, "elapsed_time": "5:11:59", "remaining_time": "4:09:35"}
|
||||
{"current_steps": 2455, "total_steps": 4410, "loss": 0.2837, "lr": 1.9548864717471472e-05, "epoch": 3.8968253968253967, "percentage": 55.67, "elapsed_time": "5:12:38", "remaining_time": "4:08:58"}
|
||||
{"current_steps": 2460, "total_steps": 4410, "loss": 0.289, "lr": 1.9469735342524454e-05, "epoch": 3.9047619047619047, "percentage": 55.78, "elapsed_time": "5:13:16", "remaining_time": "4:08:19"}
|
||||
{"current_steps": 2465, "total_steps": 4410, "loss": 0.2755, "lr": 1.939061427315179e-05, "epoch": 3.9126984126984126, "percentage": 55.9, "elapsed_time": "5:13:55", "remaining_time": "4:07:42"}
|
||||
{"current_steps": 2470, "total_steps": 4410, "loss": 0.2829, "lr": 1.931150274863265e-05, "epoch": 3.9206349206349205, "percentage": 56.01, "elapsed_time": "5:14:32", "remaining_time": "4:07:02"}
|
||||
{"current_steps": 2475, "total_steps": 4410, "loss": 0.273, "lr": 1.9232402008096643e-05, "epoch": 3.928571428571429, "percentage": 56.12, "elapsed_time": "5:15:10", "remaining_time": "4:06:24"}
|
||||
{"current_steps": 2480, "total_steps": 4410, "loss": 0.2843, "lr": 1.9153313290504495e-05, "epoch": 3.9365079365079367, "percentage": 56.24, "elapsed_time": "5:15:51", "remaining_time": "4:05:48"}
|
||||
{"current_steps": 2485, "total_steps": 4410, "loss": 0.2764, "lr": 1.9074237834628623e-05, "epoch": 3.9444444444444446, "percentage": 56.35, "elapsed_time": "5:16:32", "remaining_time": "4:05:12"}
|
||||
{"current_steps": 2490, "total_steps": 4410, "loss": 0.2699, "lr": 1.8995176879033698e-05, "epoch": 3.9523809523809526, "percentage": 56.46, "elapsed_time": "5:17:09", "remaining_time": "4:04:33"}
|
||||
{"current_steps": 2495, "total_steps": 4410, "loss": 0.2811, "lr": 1.89161316620573e-05, "epoch": 3.9603174603174605, "percentage": 56.58, "elapsed_time": "5:17:49", "remaining_time": "4:03:56"}
|
||||
{"current_steps": 2500, "total_steps": 4410, "loss": 0.289, "lr": 1.8837103421790486e-05, "epoch": 3.9682539682539684, "percentage": 56.69, "elapsed_time": "5:18:26", "remaining_time": "4:03:17"}
|
||||
{"current_steps": 2505, "total_steps": 4410, "loss": 0.2771, "lr": 1.8758093396058386e-05, "epoch": 3.9761904761904763, "percentage": 56.8, "elapsed_time": "5:19:00", "remaining_time": "4:02:36"}
|
||||
{"current_steps": 2510, "total_steps": 4410, "loss": 0.2839, "lr": 1.8679102822400874e-05, "epoch": 3.984126984126984, "percentage": 56.92, "elapsed_time": "5:19:28", "remaining_time": "4:01:50"}
|
||||
{"current_steps": 2515, "total_steps": 4410, "loss": 0.2854, "lr": 1.8600132938053098e-05, "epoch": 3.992063492063492, "percentage": 57.03, "elapsed_time": "5:20:02", "remaining_time": "4:01:08"}
|
||||
{"current_steps": 2520, "total_steps": 4410, "loss": 0.2866, "lr": 1.8521184979926177e-05, "epoch": 4.0, "percentage": 57.14, "elapsed_time": "5:20:41", "remaining_time": "4:00:30"}
|
||||
{"current_steps": 2525, "total_steps": 4410, "loss": 0.284, "lr": 1.8442260184587804e-05, "epoch": 4.007936507936508, "percentage": 57.26, "elapsed_time": "5:21:21", "remaining_time": "3:59:54"}
|
||||
{"current_steps": 2530, "total_steps": 4410, "loss": 0.2752, "lr": 1.8363359788242842e-05, "epoch": 4.015873015873016, "percentage": 57.37, "elapsed_time": "5:21:59", "remaining_time": "3:59:16"}
|
||||
{"current_steps": 2535, "total_steps": 4410, "loss": 0.2771, "lr": 1.8284485026714013e-05, "epoch": 4.023809523809524, "percentage": 57.48, "elapsed_time": "5:22:39", "remaining_time": "3:58:39"}
|
||||
{"current_steps": 2540, "total_steps": 4410, "loss": 0.2771, "lr": 1.8205637135422525e-05, "epoch": 4.031746031746032, "percentage": 57.6, "elapsed_time": "5:23:16", "remaining_time": "3:58:00"}
|
||||
{"current_steps": 2545, "total_steps": 4410, "loss": 0.275, "lr": 1.8126817349368697e-05, "epoch": 4.0396825396825395, "percentage": 57.71, "elapsed_time": "5:23:46", "remaining_time": "3:57:15"}
|
||||
{"current_steps": 2550, "total_steps": 4410, "loss": 0.2729, "lr": 1.8048026903112632e-05, "epoch": 4.0476190476190474, "percentage": 57.82, "elapsed_time": "5:24:23", "remaining_time": "3:56:37"}
|
||||
{"current_steps": 2555, "total_steps": 4410, "loss": 0.2766, "lr": 1.7969267030754903e-05, "epoch": 4.055555555555555, "percentage": 57.94, "elapsed_time": "5:25:03", "remaining_time": "3:56:00"}
|
||||
{"current_steps": 2560, "total_steps": 4410, "loss": 0.274, "lr": 1.7890538965917184e-05, "epoch": 4.063492063492063, "percentage": 58.05, "elapsed_time": "5:25:41", "remaining_time": "3:55:21"}
|
||||
{"current_steps": 2565, "total_steps": 4410, "loss": 0.2848, "lr": 1.7811843941722952e-05, "epoch": 4.071428571428571, "percentage": 58.16, "elapsed_time": "5:26:22", "remaining_time": "3:54:45"}
|
||||
{"current_steps": 2570, "total_steps": 4410, "loss": 0.2777, "lr": 1.7733183190778174e-05, "epoch": 4.079365079365079, "percentage": 58.28, "elapsed_time": "5:27:01", "remaining_time": "3:54:08"}
|
||||
{"current_steps": 2575, "total_steps": 4410, "loss": 0.2752, "lr": 1.7654557945151968e-05, "epoch": 4.087301587301587, "percentage": 58.39, "elapsed_time": "5:27:40", "remaining_time": "3:53:30"}
|
||||
{"current_steps": 2580, "total_steps": 4410, "loss": 0.2809, "lr": 1.7575969436357352e-05, "epoch": 4.095238095238095, "percentage": 58.5, "elapsed_time": "5:28:20", "remaining_time": "3:52:53"}
|
||||
{"current_steps": 2585, "total_steps": 4410, "loss": 0.2713, "lr": 1.7497418895331934e-05, "epoch": 4.103174603174603, "percentage": 58.62, "elapsed_time": "5:28:57", "remaining_time": "3:52:14"}
|
||||
{"current_steps": 2590, "total_steps": 4410, "loss": 0.282, "lr": 1.7418907552418597e-05, "epoch": 4.111111111111111, "percentage": 58.73, "elapsed_time": "5:29:32", "remaining_time": "3:51:34"}
|
||||
{"current_steps": 2595, "total_steps": 4410, "loss": 0.2857, "lr": 1.7340436637346315e-05, "epoch": 4.119047619047619, "percentage": 58.84, "elapsed_time": "5:30:08", "remaining_time": "3:50:54"}
|
||||
{"current_steps": 2600, "total_steps": 4410, "loss": 0.2827, "lr": 1.726200737921079e-05, "epoch": 4.1269841269841265, "percentage": 58.96, "elapsed_time": "5:30:49", "remaining_time": "3:50:18"}
|
||||
{"current_steps": 2605, "total_steps": 4410, "loss": 0.2777, "lr": 1.718362100645527e-05, "epoch": 4.134920634920635, "percentage": 59.07, "elapsed_time": "5:31:28", "remaining_time": "3:49:40"}
|
||||
{"current_steps": 2610, "total_steps": 4410, "loss": 0.2774, "lr": 1.710527874685129e-05, "epoch": 4.142857142857143, "percentage": 59.18, "elapsed_time": "5:32:01", "remaining_time": "3:48:58"}
|
||||
{"current_steps": 2615, "total_steps": 4410, "loss": 0.2787, "lr": 1.702698182747942e-05, "epoch": 4.150793650793651, "percentage": 59.3, "elapsed_time": "5:32:37", "remaining_time": "3:48:19"}
|
||||
{"current_steps": 2620, "total_steps": 4410, "loss": 0.2731, "lr": 1.6948731474710075e-05, "epoch": 4.158730158730159, "percentage": 59.41, "elapsed_time": "5:33:19", "remaining_time": "3:47:44"}
|
||||
{"current_steps": 2625, "total_steps": 4410, "loss": 0.2734, "lr": 1.68705289141843e-05, "epoch": 4.166666666666667, "percentage": 59.52, "elapsed_time": "5:33:58", "remaining_time": "3:47:06"}
|
||||
{"current_steps": 2630, "total_steps": 4410, "loss": 0.2752, "lr": 1.679237537079454e-05, "epoch": 4.174603174603175, "percentage": 59.64, "elapsed_time": "5:34:37", "remaining_time": "3:46:28"}
|
||||
{"current_steps": 2635, "total_steps": 4410, "loss": 0.2702, "lr": 1.6714272068665526e-05, "epoch": 4.182539682539683, "percentage": 59.75, "elapsed_time": "5:35:12", "remaining_time": "3:45:48"}
|
||||
{"current_steps": 2640, "total_steps": 4410, "loss": 0.2728, "lr": 1.663622023113501e-05, "epoch": 4.190476190476191, "percentage": 59.86, "elapsed_time": "5:35:45", "remaining_time": "3:45:06"}
|
||||
{"current_steps": 2645, "total_steps": 4410, "loss": 0.2793, "lr": 1.655822108073467e-05, "epoch": 4.198412698412699, "percentage": 59.98, "elapsed_time": "5:36:25", "remaining_time": "3:44:29"}
|
||||
{"current_steps": 2650, "total_steps": 4410, "loss": 0.2787, "lr": 1.648027583917095e-05, "epoch": 4.2063492063492065, "percentage": 60.09, "elapsed_time": "5:37:03", "remaining_time": "3:43:51"}
|
||||
{"current_steps": 2655, "total_steps": 4410, "loss": 0.2891, "lr": 1.640238572730591e-05, "epoch": 4.214285714285714, "percentage": 60.2, "elapsed_time": "5:37:42", "remaining_time": "3:43:13"}
|
||||
{"current_steps": 2660, "total_steps": 4410, "loss": 0.2825, "lr": 1.632455196513809e-05, "epoch": 4.222222222222222, "percentage": 60.32, "elapsed_time": "5:38:20", "remaining_time": "3:42:35"}
|
||||
{"current_steps": 2665, "total_steps": 4410, "loss": 0.2801, "lr": 1.624677577178345e-05, "epoch": 4.23015873015873, "percentage": 60.43, "elapsed_time": "5:38:56", "remaining_time": "3:41:56"}
|
||||
{"current_steps": 2670, "total_steps": 4410, "loss": 0.2772, "lr": 1.616905836545624e-05, "epoch": 4.238095238095238, "percentage": 60.54, "elapsed_time": "5:39:34", "remaining_time": "3:41:17"}
|
||||
{"current_steps": 2675, "total_steps": 4410, "loss": 0.2805, "lr": 1.6091400963449894e-05, "epoch": 4.246031746031746, "percentage": 60.66, "elapsed_time": "5:40:14", "remaining_time": "3:40:40"}
|
||||
{"current_steps": 2680, "total_steps": 4410, "loss": 0.2727, "lr": 1.6013804782118043e-05, "epoch": 4.253968253968254, "percentage": 60.77, "elapsed_time": "5:40:56", "remaining_time": "3:40:04"}
|
||||
{"current_steps": 2685, "total_steps": 4410, "loss": 0.2823, "lr": 1.5936271036855372e-05, "epoch": 4.261904761904762, "percentage": 60.88, "elapsed_time": "5:41:34", "remaining_time": "3:39:26"}
|
||||
{"current_steps": 2690, "total_steps": 4410, "loss": 0.2784, "lr": 1.585880094207864e-05, "epoch": 4.26984126984127, "percentage": 61.0, "elapsed_time": "5:42:07", "remaining_time": "3:38:45"}
|
||||
{"current_steps": 2695, "total_steps": 4410, "loss": 0.2816, "lr": 1.5781395711207664e-05, "epoch": 4.277777777777778, "percentage": 61.11, "elapsed_time": "5:42:48", "remaining_time": "3:38:08"}
|
||||
{"current_steps": 2700, "total_steps": 4410, "loss": 0.2798, "lr": 1.5704056556646255e-05, "epoch": 4.285714285714286, "percentage": 61.22, "elapsed_time": "5:43:24", "remaining_time": "3:37:29"}
|
||||
{"current_steps": 2705, "total_steps": 4410, "loss": 0.2813, "lr": 1.562678468976329e-05, "epoch": 4.2936507936507935, "percentage": 61.34, "elapsed_time": "5:44:03", "remaining_time": "3:36:52"}
|
||||
{"current_steps": 2710, "total_steps": 4410, "loss": 0.2883, "lr": 1.5549581320873715e-05, "epoch": 4.301587301587301, "percentage": 61.45, "elapsed_time": "5:44:42", "remaining_time": "3:36:14"}
|
||||
{"current_steps": 2715, "total_steps": 4410, "loss": 0.2801, "lr": 1.5472447659219573e-05, "epoch": 4.309523809523809, "percentage": 61.56, "elapsed_time": "5:45:22", "remaining_time": "3:35:37"}
|
||||
{"current_steps": 2720, "total_steps": 4410, "loss": 0.2771, "lr": 1.5395384912951096e-05, "epoch": 4.317460317460317, "percentage": 61.68, "elapsed_time": "5:46:01", "remaining_time": "3:34:59"}
|
||||
{"current_steps": 2725, "total_steps": 4410, "loss": 0.2737, "lr": 1.531839428910774e-05, "epoch": 4.325396825396825, "percentage": 61.79, "elapsed_time": "5:46:36", "remaining_time": "3:34:19"}
|
||||
{"current_steps": 2730, "total_steps": 4410, "loss": 0.2782, "lr": 1.5241476993599318e-05, "epoch": 4.333333333333333, "percentage": 61.9, "elapsed_time": "5:47:13", "remaining_time": "3:33:40"}
|
||||
{"current_steps": 2735, "total_steps": 4410, "loss": 0.2843, "lr": 1.5164634231187106e-05, "epoch": 4.341269841269841, "percentage": 62.02, "elapsed_time": "5:47:51", "remaining_time": "3:33:02"}
|
||||
{"current_steps": 2740, "total_steps": 4410, "loss": 0.2795, "lr": 1.5087867205464933e-05, "epoch": 4.349206349206349, "percentage": 62.13, "elapsed_time": "5:48:26", "remaining_time": "3:32:22"}
|
||||
{"current_steps": 2745, "total_steps": 4410, "loss": 0.2794, "lr": 1.5011177118840376e-05, "epoch": 4.357142857142857, "percentage": 62.24, "elapsed_time": "5:49:01", "remaining_time": "3:31:42"}
|
||||
{"current_steps": 2750, "total_steps": 4410, "loss": 0.283, "lr": 1.4934565172515917e-05, "epoch": 4.365079365079365, "percentage": 62.36, "elapsed_time": "5:49:42", "remaining_time": "3:31:06"}
|
||||
{"current_steps": 2755, "total_steps": 4410, "loss": 0.2759, "lr": 1.4858032566470107e-05, "epoch": 4.3730158730158735, "percentage": 62.47, "elapsed_time": "5:50:22", "remaining_time": "3:30:28"}
|
||||
{"current_steps": 2760, "total_steps": 4410, "loss": 0.2753, "lr": 1.4781580499438794e-05, "epoch": 4.380952380952381, "percentage": 62.59, "elapsed_time": "5:50:53", "remaining_time": "3:29:46"}
|
||||
{"current_steps": 2765, "total_steps": 4410, "loss": 0.2754, "lr": 1.4705210168896327e-05, "epoch": 4.388888888888889, "percentage": 62.7, "elapsed_time": "5:51:38", "remaining_time": "3:29:12"}
|
||||
{"current_steps": 2770, "total_steps": 4410, "loss": 0.2863, "lr": 1.462892277103681e-05, "epoch": 4.396825396825397, "percentage": 62.81, "elapsed_time": "5:52:20", "remaining_time": "3:28:36"}
|
||||
{"current_steps": 2775, "total_steps": 4410, "loss": 0.2771, "lr": 1.455271950075539e-05, "epoch": 4.404761904761905, "percentage": 62.93, "elapsed_time": "5:53:04", "remaining_time": "3:28:01"}
|
||||
{"current_steps": 2780, "total_steps": 4410, "loss": 0.2767, "lr": 1.4476601551629493e-05, "epoch": 4.412698412698413, "percentage": 63.04, "elapsed_time": "5:53:47", "remaining_time": "3:27:26"}
|
||||
{"current_steps": 2785, "total_steps": 4410, "loss": 0.2733, "lr": 1.4400570115900147e-05, "epoch": 4.420634920634921, "percentage": 63.15, "elapsed_time": "5:54:24", "remaining_time": "3:26:47"}
|
||||
{"current_steps": 2790, "total_steps": 4410, "loss": 0.2691, "lr": 1.4324626384453345e-05, "epoch": 4.428571428571429, "percentage": 63.27, "elapsed_time": "5:55:06", "remaining_time": "3:26:11"}
|
||||
{"current_steps": 2795, "total_steps": 4410, "loss": 0.28, "lr": 1.4248771546801339e-05, "epoch": 4.436507936507937, "percentage": 63.38, "elapsed_time": "5:55:49", "remaining_time": "3:25:36"}
|
||||
{"current_steps": 2800, "total_steps": 4410, "loss": 0.2736, "lr": 1.4173006791064023e-05, "epoch": 4.444444444444445, "percentage": 63.49, "elapsed_time": "5:56:33", "remaining_time": "3:25:01"}
|
||||
{"current_steps": 2805, "total_steps": 4410, "loss": 0.2765, "lr": 1.4097333303950368e-05, "epoch": 4.4523809523809526, "percentage": 63.61, "elapsed_time": "5:57:05", "remaining_time": "3:24:19"}
|
||||
{"current_steps": 2810, "total_steps": 4410, "loss": 0.2843, "lr": 1.4021752270739759e-05, "epoch": 4.4603174603174605, "percentage": 63.72, "elapsed_time": "5:57:43", "remaining_time": "3:23:41"}
|
||||
{"current_steps": 2815, "total_steps": 4410, "loss": 0.2655, "lr": 1.3946264875263485e-05, "epoch": 4.468253968253968, "percentage": 63.83, "elapsed_time": "5:58:13", "remaining_time": "3:22:58"}
|
||||
{"current_steps": 2820, "total_steps": 4410, "loss": 0.2767, "lr": 1.3870872299886184e-05, "epoch": 4.476190476190476, "percentage": 63.95, "elapsed_time": "5:58:48", "remaining_time": "3:22:18"}
|
||||
{"current_steps": 2825, "total_steps": 4410, "loss": 0.2746, "lr": 1.3795575725487303e-05, "epoch": 4.484126984126984, "percentage": 64.06, "elapsed_time": "5:59:24", "remaining_time": "3:21:39"}
|
||||
{"current_steps": 2830, "total_steps": 4410, "loss": 0.2844, "lr": 1.3720376331442652e-05, "epoch": 4.492063492063492, "percentage": 64.17, "elapsed_time": "6:00:00", "remaining_time": "3:20:59"}
|
||||
{"current_steps": 2835, "total_steps": 4410, "loss": 0.2727, "lr": 1.364527529560586e-05, "epoch": 4.5, "percentage": 64.29, "elapsed_time": "6:00:38", "remaining_time": "3:20:21"}
|
||||
{"current_steps": 2840, "total_steps": 4410, "loss": 0.2776, "lr": 1.3570273794289978e-05, "epoch": 4.507936507936508, "percentage": 64.4, "elapsed_time": "6:01:17", "remaining_time": "3:19:43"}
|
||||
{"current_steps": 2845, "total_steps": 4410, "loss": 0.2811, "lr": 1.3495373002249061e-05, "epoch": 4.515873015873016, "percentage": 64.51, "elapsed_time": "6:01:57", "remaining_time": "3:19:06"}
|
||||
{"current_steps": 2850, "total_steps": 4410, "loss": 0.273, "lr": 1.3420574092659713e-05, "epoch": 4.523809523809524, "percentage": 64.63, "elapsed_time": "6:02:43", "remaining_time": "3:18:32"}
|
||||
{"current_steps": 2855, "total_steps": 4410, "loss": 0.2878, "lr": 1.3345878237102766e-05, "epoch": 4.531746031746032, "percentage": 64.74, "elapsed_time": "6:03:21", "remaining_time": "3:17:54"}
|
||||
{"current_steps": 2860, "total_steps": 4410, "loss": 0.2804, "lr": 1.3271286605544906e-05, "epoch": 4.5396825396825395, "percentage": 64.85, "elapsed_time": "6:03:51", "remaining_time": "3:17:11"}
|
||||
{"current_steps": 2865, "total_steps": 4410, "loss": 0.2765, "lr": 1.3196800366320357e-05, "epoch": 4.5476190476190474, "percentage": 64.97, "elapsed_time": "6:04:32", "remaining_time": "3:16:34"}
|
||||
{"current_steps": 2870, "total_steps": 4410, "loss": 0.2834, "lr": 1.3122420686112554e-05, "epoch": 4.555555555555555, "percentage": 65.08, "elapsed_time": "6:05:12", "remaining_time": "3:15:57"}
|
||||
{"current_steps": 2875, "total_steps": 4410, "loss": 0.2789, "lr": 1.3048148729935917e-05, "epoch": 4.563492063492063, "percentage": 65.19, "elapsed_time": "6:05:53", "remaining_time": "3:15:21"}
|
||||
{"current_steps": 2880, "total_steps": 4410, "loss": 0.2711, "lr": 1.297398566111756e-05, "epoch": 4.571428571428571, "percentage": 65.31, "elapsed_time": "6:06:33", "remaining_time": "3:14:44"}
|
||||
{"current_steps": 2885, "total_steps": 4410, "loss": 0.2778, "lr": 1.2899932641279082e-05, "epoch": 4.579365079365079, "percentage": 65.42, "elapsed_time": "6:07:14", "remaining_time": "3:14:07"}
|
||||
{"current_steps": 2890, "total_steps": 4410, "loss": 0.2706, "lr": 1.2825990830318395e-05, "epoch": 4.587301587301587, "percentage": 65.53, "elapsed_time": "6:07:59", "remaining_time": "3:13:32"}
|
||||
{"current_steps": 2895, "total_steps": 4410, "loss": 0.2801, "lr": 1.2752161386391526e-05, "epoch": 4.595238095238095, "percentage": 65.65, "elapsed_time": "6:08:37", "remaining_time": "3:12:54"}
|
||||
{"current_steps": 2900, "total_steps": 4410, "loss": 0.2778, "lr": 1.2678445465894491e-05, "epoch": 4.603174603174603, "percentage": 65.76, "elapsed_time": "6:09:15", "remaining_time": "3:12:15"}
|
||||
{"current_steps": 2905, "total_steps": 4410, "loss": 0.2747, "lr": 1.2604844223445181e-05, "epoch": 4.611111111111111, "percentage": 65.87, "elapsed_time": "6:09:51", "remaining_time": "3:11:36"}
|
||||
{"current_steps": 2910, "total_steps": 4410, "loss": 0.2724, "lr": 1.2531358811865268e-05, "epoch": 4.619047619047619, "percentage": 65.99, "elapsed_time": "6:10:34", "remaining_time": "3:11:00"}
|
||||
{"current_steps": 2915, "total_steps": 4410, "loss": 0.2801, "lr": 1.2457990382162173e-05, "epoch": 4.6269841269841265, "percentage": 66.1, "elapsed_time": "6:11:12", "remaining_time": "3:10:22"}
|
||||
{"current_steps": 2920, "total_steps": 4410, "loss": 0.2739, "lr": 1.238474008351101e-05, "epoch": 4.634920634920634, "percentage": 66.21, "elapsed_time": "6:11:55", "remaining_time": "3:09:46"}
|
||||
{"current_steps": 2925, "total_steps": 4410, "loss": 0.2738, "lr": 1.2311609063236594e-05, "epoch": 4.642857142857143, "percentage": 66.33, "elapsed_time": "6:12:32", "remaining_time": "3:09:08"}
|
||||
{"current_steps": 2930, "total_steps": 4410, "loss": 0.2808, "lr": 1.2238598466795493e-05, "epoch": 4.650793650793651, "percentage": 66.44, "elapsed_time": "6:13:06", "remaining_time": "3:08:28"}
|
||||
{"current_steps": 2935, "total_steps": 4410, "loss": 0.2787, "lr": 1.2165709437758042e-05, "epoch": 4.658730158730159, "percentage": 66.55, "elapsed_time": "6:13:45", "remaining_time": "3:07:49"}
|
||||
{"current_steps": 2940, "total_steps": 4410, "loss": 0.2785, "lr": 1.209294311779047e-05, "epoch": 4.666666666666667, "percentage": 66.67, "elapsed_time": "6:14:19", "remaining_time": "3:07:09"}
|
||||
{"current_steps": 2945, "total_steps": 4410, "loss": 0.2777, "lr": 1.2020300646637018e-05, "epoch": 4.674603174603175, "percentage": 66.78, "elapsed_time": "6:14:58", "remaining_time": "3:06:31"}
|
||||
{"current_steps": 2950, "total_steps": 4410, "loss": 0.2798, "lr": 1.1947783162102043e-05, "epoch": 4.682539682539683, "percentage": 66.89, "elapsed_time": "6:15:37", "remaining_time": "3:05:54"}
|
||||
{"current_steps": 2955, "total_steps": 4410, "loss": 0.2797, "lr": 1.1875391800032248e-05, "epoch": 4.690476190476191, "percentage": 67.01, "elapsed_time": "6:16:08", "remaining_time": "3:05:12"}
|
||||
{"current_steps": 2960, "total_steps": 4410, "loss": 0.2784, "lr": 1.1803127694298873e-05, "epoch": 4.698412698412699, "percentage": 67.12, "elapsed_time": "6:16:50", "remaining_time": "3:04:36"}
|
||||
{"current_steps": 2965, "total_steps": 4410, "loss": 0.2801, "lr": 1.173099197677992e-05, "epoch": 4.7063492063492065, "percentage": 67.23, "elapsed_time": "6:17:33", "remaining_time": "3:04:00"}
|
||||
{"current_steps": 2970, "total_steps": 4410, "loss": 0.2778, "lr": 1.1658985777342458e-05, "epoch": 4.714285714285714, "percentage": 67.35, "elapsed_time": "6:18:08", "remaining_time": "3:03:20"}
|
||||
{"current_steps": 2975, "total_steps": 4410, "loss": 0.2787, "lr": 1.1587110223824874e-05, "epoch": 4.722222222222222, "percentage": 67.46, "elapsed_time": "6:18:41", "remaining_time": "3:02:39"}
|
||||
{"current_steps": 2980, "total_steps": 4410, "loss": 0.2775, "lr": 1.151536644201925e-05, "epoch": 4.73015873015873, "percentage": 67.57, "elapsed_time": "6:19:14", "remaining_time": "3:01:59"}
|
||||
{"current_steps": 2985, "total_steps": 4410, "loss": 0.273, "lr": 1.1443755555653751e-05, "epoch": 4.738095238095238, "percentage": 67.69, "elapsed_time": "6:19:57", "remaining_time": "3:01:23"}
|
||||
{"current_steps": 2990, "total_steps": 4410, "loss": 0.2763, "lr": 1.1372278686374935e-05, "epoch": 4.746031746031746, "percentage": 67.8, "elapsed_time": "6:20:35", "remaining_time": "3:00:44"}
|
||||
{"current_steps": 2995, "total_steps": 4410, "loss": 0.2748, "lr": 1.1300936953730273e-05, "epoch": 4.753968253968254, "percentage": 67.91, "elapsed_time": "6:21:09", "remaining_time": "3:00:04"}
|
||||
{"current_steps": 3000, "total_steps": 4410, "loss": 0.2716, "lr": 1.1229731475150594e-05, "epoch": 4.761904761904762, "percentage": 68.03, "elapsed_time": "6:21:43", "remaining_time": "2:59:24"}
|
||||
{"current_steps": 3005, "total_steps": 4410, "loss": 0.2788, "lr": 1.1158663365932529e-05, "epoch": 4.76984126984127, "percentage": 68.14, "elapsed_time": "6:22:31", "remaining_time": "2:58:51"}
|
||||
{"current_steps": 3010, "total_steps": 4410, "loss": 0.2855, "lr": 1.1087733739221109e-05, "epoch": 4.777777777777778, "percentage": 68.25, "elapsed_time": "6:23:11", "remaining_time": "2:58:13"}
|
||||
{"current_steps": 3015, "total_steps": 4410, "loss": 0.2717, "lr": 1.1016943705992311e-05, "epoch": 4.785714285714286, "percentage": 68.37, "elapsed_time": "6:23:46", "remaining_time": "2:57:33"}
|
||||
{"current_steps": 3020, "total_steps": 4410, "loss": 0.2779, "lr": 1.0946294375035639e-05, "epoch": 4.7936507936507935, "percentage": 68.48, "elapsed_time": "6:24:20", "remaining_time": "2:56:53"}
|
||||
{"current_steps": 3025, "total_steps": 4410, "loss": 0.2751, "lr": 1.087578685293674e-05, "epoch": 4.801587301587301, "percentage": 68.59, "elapsed_time": "6:24:59", "remaining_time": "2:56:16"}
|
||||
{"current_steps": 3030, "total_steps": 4410, "loss": 0.2787, "lr": 1.080542224406015e-05, "epoch": 4.809523809523809, "percentage": 68.71, "elapsed_time": "6:25:36", "remaining_time": "2:55:37"}
|
||||
{"current_steps": 3035, "total_steps": 4410, "loss": 0.2787, "lr": 1.0735201650531915e-05, "epoch": 4.817460317460317, "percentage": 68.82, "elapsed_time": "6:26:15", "remaining_time": "2:54:59"}
|
||||
{"current_steps": 3040, "total_steps": 4410, "loss": 0.2754, "lr": 1.066512617222235e-05, "epoch": 4.825396825396825, "percentage": 68.93, "elapsed_time": "6:26:57", "remaining_time": "2:54:23"}
|
||||
{"current_steps": 3045, "total_steps": 4410, "loss": 0.2764, "lr": 1.059519690672884e-05, "epoch": 4.833333333333333, "percentage": 69.05, "elapsed_time": "6:27:32", "remaining_time": "2:53:43"}
|
||||
{"current_steps": 3050, "total_steps": 4410, "loss": 0.2788, "lr": 1.0525414949358614e-05, "epoch": 4.841269841269841, "percentage": 69.16, "elapsed_time": "6:28:06", "remaining_time": "2:53:03"}
|
||||
{"current_steps": 3055, "total_steps": 4410, "loss": 0.2781, "lr": 1.0455781393111613e-05, "epoch": 4.849206349206349, "percentage": 69.27, "elapsed_time": "6:28:41", "remaining_time": "2:52:23"}
|
||||
{"current_steps": 3060, "total_steps": 4410, "loss": 0.2724, "lr": 1.0386297328663353e-05, "epoch": 4.857142857142857, "percentage": 69.39, "elapsed_time": "6:29:13", "remaining_time": "2:51:43"}
|
||||
{"current_steps": 3065, "total_steps": 4410, "loss": 0.2723, "lr": 1.0316963844347843e-05, "epoch": 4.865079365079366, "percentage": 69.5, "elapsed_time": "6:29:47", "remaining_time": "2:51:02"}
|
||||
{"current_steps": 3070, "total_steps": 4410, "loss": 0.2746, "lr": 1.0247782026140576e-05, "epoch": 4.8730158730158735, "percentage": 69.61, "elapsed_time": "6:30:30", "remaining_time": "2:50:27"}
|
||||
{"current_steps": 3075, "total_steps": 4410, "loss": 0.2876, "lr": 1.017875295764144e-05, "epoch": 4.880952380952381, "percentage": 69.73, "elapsed_time": "6:31:10", "remaining_time": "2:49:49"}
|
||||
{"current_steps": 3080, "total_steps": 4410, "loss": 0.2754, "lr": 1.0109877720057818e-05, "epoch": 4.888888888888889, "percentage": 69.84, "elapsed_time": "6:31:48", "remaining_time": "2:49:11"}
|
||||
{"current_steps": 3085, "total_steps": 4410, "loss": 0.2772, "lr": 1.0041157392187651e-05, "epoch": 4.896825396825397, "percentage": 69.95, "elapsed_time": "6:32:22", "remaining_time": "2:48:31"}
|
||||
{"current_steps": 3090, "total_steps": 4410, "loss": 0.2797, "lr": 9.972593050402471e-06, "epoch": 4.904761904761905, "percentage": 70.07, "elapsed_time": "6:32:59", "remaining_time": "2:47:52"}
|
||||
{"current_steps": 3095, "total_steps": 4410, "loss": 0.2824, "lr": 9.904185768630612e-06, "epoch": 4.912698412698413, "percentage": 70.18, "elapsed_time": "6:33:35", "remaining_time": "2:47:13"}
|
||||
{"current_steps": 3100, "total_steps": 4410, "loss": 0.2713, "lr": 9.835936618340377e-06, "epoch": 4.920634920634921, "percentage": 70.29, "elapsed_time": "6:34:14", "remaining_time": "2:46:35"}
|
||||
{"current_steps": 3105, "total_steps": 4410, "loss": 0.2773, "lr": 9.76784666852323e-06, "epoch": 4.928571428571429, "percentage": 70.41, "elapsed_time": "6:34:51", "remaining_time": "2:45:57"}
|
||||
{"current_steps": 3110, "total_steps": 4410, "loss": 0.2748, "lr": 9.699916985677062e-06, "epoch": 4.936507936507937, "percentage": 70.52, "elapsed_time": "6:35:22", "remaining_time": "2:45:16"}
|
||||
{"current_steps": 3115, "total_steps": 4410, "loss": 0.2812, "lr": 9.6321486337895e-06, "epoch": 4.944444444444445, "percentage": 70.63, "elapsed_time": "6:35:58", "remaining_time": "2:44:36"}
|
||||
{"current_steps": 3120, "total_steps": 4410, "loss": 0.2843, "lr": 9.564542674321228e-06, "epoch": 4.9523809523809526, "percentage": 70.75, "elapsed_time": "6:36:38", "remaining_time": "2:43:59"}
|
||||
{"current_steps": 3125, "total_steps": 4410, "loss": 0.2827, "lr": 9.49710016618937e-06, "epoch": 4.9603174603174605, "percentage": 70.86, "elapsed_time": "6:37:18", "remaining_time": "2:43:22"}
|
||||
{"current_steps": 3130, "total_steps": 4410, "loss": 0.273, "lr": 9.429822165750893e-06, "epoch": 4.968253968253968, "percentage": 70.98, "elapsed_time": "6:37:56", "remaining_time": "2:42:44"}
|
||||
{"current_steps": 3135, "total_steps": 4410, "loss": 0.2811, "lr": 9.36270972678607e-06, "epoch": 4.976190476190476, "percentage": 71.09, "elapsed_time": "6:38:36", "remaining_time": "2:42:06"}
|
||||
{"current_steps": 3140, "total_steps": 4410, "loss": 0.2757, "lr": 9.295763900481977e-06, "epoch": 4.984126984126984, "percentage": 71.2, "elapsed_time": "6:39:18", "remaining_time": "2:41:30"}
|
||||
{"current_steps": 3145, "total_steps": 4410, "loss": 0.2769, "lr": 9.22898573541602e-06, "epoch": 4.992063492063492, "percentage": 71.32, "elapsed_time": "6:39:57", "remaining_time": "2:40:52"}
|
||||
{"current_steps": 3150, "total_steps": 4410, "loss": 0.2664, "lr": 9.162376277539513e-06, "epoch": 5.0, "percentage": 71.43, "elapsed_time": "6:40:30", "remaining_time": "2:40:12"}
|
||||
{"current_steps": 3155, "total_steps": 4410, "loss": 0.2708, "lr": 9.095936570161301e-06, "epoch": 5.007936507936508, "percentage": 71.54, "elapsed_time": "6:41:11", "remaining_time": "2:39:35"}
|
||||
{"current_steps": 3160, "total_steps": 4410, "loss": 0.2783, "lr": 9.029667653931411e-06, "epoch": 5.015873015873016, "percentage": 71.66, "elapsed_time": "6:41:49", "remaining_time": "2:38:57"}
|
||||
{"current_steps": 3165, "total_steps": 4410, "loss": 0.2709, "lr": 8.96357056682475e-06, "epoch": 5.023809523809524, "percentage": 71.77, "elapsed_time": "6:42:19", "remaining_time": "2:38:15"}
|
||||
{"current_steps": 3170, "total_steps": 4410, "loss": 0.2804, "lr": 8.897646344124882e-06, "epoch": 5.031746031746032, "percentage": 71.88, "elapsed_time": "6:43:05", "remaining_time": "2:37:40"}
|
||||
{"current_steps": 3175, "total_steps": 4410, "loss": 0.2787, "lr": 8.83189601840773e-06, "epoch": 5.0396825396825395, "percentage": 72.0, "elapsed_time": "6:43:41", "remaining_time": "2:37:01"}
|
||||
{"current_steps": 3180, "total_steps": 4410, "loss": 0.2756, "lr": 8.766320619525511e-06, "epoch": 5.0476190476190474, "percentage": 72.11, "elapsed_time": "6:44:17", "remaining_time": "2:36:22"}
|
||||
{"current_steps": 3185, "total_steps": 4410, "loss": 0.2759, "lr": 8.700921174590525e-06, "epoch": 5.055555555555555, "percentage": 72.22, "elapsed_time": "6:44:53", "remaining_time": "2:35:43"}
|
||||
{"current_steps": 3190, "total_steps": 4410, "loss": 0.2687, "lr": 8.63569870795907e-06, "epoch": 5.063492063492063, "percentage": 72.34, "elapsed_time": "6:45:34", "remaining_time": "2:35:06"}
|
||||
{"current_steps": 3195, "total_steps": 4410, "loss": 0.2788, "lr": 8.570654241215466e-06, "epoch": 5.071428571428571, "percentage": 72.45, "elapsed_time": "6:46:06", "remaining_time": "2:34:26"}
|
||||
{"current_steps": 3200, "total_steps": 4410, "loss": 0.2727, "lr": 8.505788793155978e-06, "epoch": 5.079365079365079, "percentage": 72.56, "elapsed_time": "6:46:45", "remaining_time": "2:33:48"}
|
||||
{"current_steps": 3205, "total_steps": 4410, "loss": 0.2786, "lr": 8.441103379772893e-06, "epoch": 5.087301587301587, "percentage": 72.68, "elapsed_time": "6:47:25", "remaining_time": "2:33:11"}
|
||||
{"current_steps": 3210, "total_steps": 4410, "loss": 0.2696, "lr": 8.376599014238605e-06, "epoch": 5.095238095238095, "percentage": 72.79, "elapsed_time": "6:48:02", "remaining_time": "2:32:32"}
|
||||
{"current_steps": 3215, "total_steps": 4410, "loss": 0.2741, "lr": 8.312276706889738e-06, "epoch": 5.103174603174603, "percentage": 72.9, "elapsed_time": "6:48:36", "remaining_time": "2:31:52"}
|
||||
{"current_steps": 3220, "total_steps": 4410, "loss": 0.2791, "lr": 8.24813746521133e-06, "epoch": 5.111111111111111, "percentage": 73.02, "elapsed_time": "6:49:16", "remaining_time": "2:31:15"}
|
||||
{"current_steps": 3225, "total_steps": 4410, "loss": 0.2798, "lr": 8.184182293821046e-06, "epoch": 5.119047619047619, "percentage": 73.13, "elapsed_time": "6:49:52", "remaining_time": "2:30:36"}
|
||||
{"current_steps": 3230, "total_steps": 4410, "loss": 0.2718, "lr": 8.120412194453442e-06, "epoch": 5.1269841269841265, "percentage": 73.24, "elapsed_time": "6:50:27", "remaining_time": "2:29:57"}
|
||||
{"current_steps": 3235, "total_steps": 4410, "loss": 0.2792, "lr": 8.056828165944282e-06, "epoch": 5.134920634920635, "percentage": 73.36, "elapsed_time": "6:51:07", "remaining_time": "2:29:19"}
|
||||
{"current_steps": 3240, "total_steps": 4410, "loss": 0.2713, "lr": 7.993431204214883e-06, "epoch": 5.142857142857143, "percentage": 73.47, "elapsed_time": "6:51:42", "remaining_time": "2:28:40"}
|
||||
{"current_steps": 3245, "total_steps": 4410, "loss": 0.272, "lr": 7.93022230225652e-06, "epoch": 5.150793650793651, "percentage": 73.58, "elapsed_time": "6:52:21", "remaining_time": "2:28:02"}
|
||||
{"current_steps": 3250, "total_steps": 4410, "loss": 0.2686, "lr": 7.867202450114892e-06, "epoch": 5.158730158730159, "percentage": 73.7, "elapsed_time": "6:53:03", "remaining_time": "2:27:25"}
|
||||
{"current_steps": 3255, "total_steps": 4410, "loss": 0.2739, "lr": 7.804372634874582e-06, "epoch": 5.166666666666667, "percentage": 73.81, "elapsed_time": "6:53:41", "remaining_time": "2:26:47"}
|
||||
{"current_steps": 3260, "total_steps": 4410, "loss": 0.2795, "lr": 7.74173384064359e-06, "epoch": 5.174603174603175, "percentage": 73.92, "elapsed_time": "6:54:21", "remaining_time": "2:26:10"}
|
||||
{"current_steps": 3265, "total_steps": 4410, "loss": 0.2841, "lr": 7.679287048537987e-06, "epoch": 5.182539682539683, "percentage": 74.04, "elapsed_time": "6:55:01", "remaining_time": "2:25:32"}
|
||||
{"current_steps": 3270, "total_steps": 4410, "loss": 0.2801, "lr": 7.617033236666469e-06, "epoch": 5.190476190476191, "percentage": 74.15, "elapsed_time": "6:55:40", "remaining_time": "2:24:54"}
|
||||
{"current_steps": 3275, "total_steps": 4410, "loss": 0.271, "lr": 7.55497338011506e-06, "epoch": 5.198412698412699, "percentage": 74.26, "elapsed_time": "6:56:17", "remaining_time": "2:24:16"}
|
||||
{"current_steps": 3280, "total_steps": 4410, "loss": 0.2735, "lr": 7.493108450931879e-06, "epoch": 5.2063492063492065, "percentage": 74.38, "elapsed_time": "6:56:50", "remaining_time": "2:23:36"}
|
||||
{"current_steps": 3285, "total_steps": 4410, "loss": 0.2687, "lr": 7.4314394181118636e-06, "epoch": 5.214285714285714, "percentage": 74.49, "elapsed_time": "6:57:33", "remaining_time": "2:22:59"}
|
||||
{"current_steps": 3290, "total_steps": 4410, "loss": 0.2755, "lr": 7.369967247581611e-06, "epoch": 5.222222222222222, "percentage": 74.6, "elapsed_time": "6:58:18", "remaining_time": "2:22:24"}
|
||||
{"current_steps": 3295, "total_steps": 4410, "loss": 0.2799, "lr": 7.3086929021842575e-06, "epoch": 5.23015873015873, "percentage": 74.72, "elapsed_time": "6:58:59", "remaining_time": "2:21:47"}
|
||||
{"current_steps": 3300, "total_steps": 4410, "loss": 0.2695, "lr": 7.247617341664384e-06, "epoch": 5.238095238095238, "percentage": 74.83, "elapsed_time": "6:59:36", "remaining_time": "2:21:08"}
|
||||
{"current_steps": 3305, "total_steps": 4410, "loss": 0.2758, "lr": 7.186741522652994e-06, "epoch": 5.246031746031746, "percentage": 74.94, "elapsed_time": "7:00:15", "remaining_time": "2:20:30"}
|
||||
{"current_steps": 3310, "total_steps": 4410, "loss": 0.2833, "lr": 7.12606639865252e-06, "epoch": 5.253968253968254, "percentage": 75.06, "elapsed_time": "7:00:45", "remaining_time": "2:19:49"}
|
||||
{"current_steps": 3315, "total_steps": 4410, "loss": 0.283, "lr": 7.065592920021893e-06, "epoch": 5.261904761904762, "percentage": 75.17, "elapsed_time": "7:01:31", "remaining_time": "2:19:14"}
|
||||
{"current_steps": 3320, "total_steps": 4410, "loss": 0.2771, "lr": 7.005322033961679e-06, "epoch": 5.26984126984127, "percentage": 75.28, "elapsed_time": "7:02:13", "remaining_time": "2:18:37"}
|
||||
{"current_steps": 3325, "total_steps": 4410, "loss": 0.2743, "lr": 6.945254684499185e-06, "epoch": 5.277777777777778, "percentage": 75.4, "elapsed_time": "7:02:47", "remaining_time": "2:17:57"}
|
||||
{"current_steps": 3330, "total_steps": 4410, "loss": 0.275, "lr": 6.8853918124737274e-06, "epoch": 5.285714285714286, "percentage": 75.51, "elapsed_time": "7:03:19", "remaining_time": "2:17:17"}
|
||||
{"current_steps": 3335, "total_steps": 4410, "loss": 0.2766, "lr": 6.825734355521898e-06, "epoch": 5.2936507936507935, "percentage": 75.62, "elapsed_time": "7:03:58", "remaining_time": "2:16:39"}
|
||||
{"current_steps": 3340, "total_steps": 4410, "loss": 0.2736, "lr": 6.766283248062817e-06, "epoch": 5.301587301587301, "percentage": 75.74, "elapsed_time": "7:04:37", "remaining_time": "2:16:01"}
|
||||
{"current_steps": 3345, "total_steps": 4410, "loss": 0.2759, "lr": 6.707039421283559e-06, "epoch": 5.309523809523809, "percentage": 75.85, "elapsed_time": "7:05:18", "remaining_time": "2:15:24"}
|
||||
{"current_steps": 3350, "total_steps": 4410, "loss": 0.2692, "lr": 6.648003803124559e-06, "epoch": 5.317460317460317, "percentage": 75.96, "elapsed_time": "7:05:59", "remaining_time": "2:14:47"}
|
||||
{"current_steps": 3355, "total_steps": 4410, "loss": 0.2663, "lr": 6.589177318265047e-06, "epoch": 5.325396825396825, "percentage": 76.08, "elapsed_time": "7:06:39", "remaining_time": "2:14:09"}
|
||||
{"current_steps": 3360, "total_steps": 4410, "loss": 0.271, "lr": 6.53056088810857e-06, "epoch": 5.333333333333333, "percentage": 76.19, "elapsed_time": "7:07:16", "remaining_time": "2:13:31"}
|
||||
{"current_steps": 3365, "total_steps": 4410, "loss": 0.2783, "lr": 6.472155430768608e-06, "epoch": 5.341269841269841, "percentage": 76.3, "elapsed_time": "7:07:52", "remaining_time": "2:12:52"}
|
||||
{"current_steps": 3370, "total_steps": 4410, "loss": 0.2729, "lr": 6.413961861054132e-06, "epoch": 5.349206349206349, "percentage": 76.42, "elapsed_time": "7:08:29", "remaining_time": "2:12:14"}
|
||||
{"current_steps": 3375, "total_steps": 4410, "loss": 0.2824, "lr": 6.3559810904553095e-06, "epoch": 5.357142857142857, "percentage": 76.53, "elapsed_time": "7:09:09", "remaining_time": "2:11:36"}
|
||||
{"current_steps": 3380, "total_steps": 4410, "loss": 0.2759, "lr": 6.298214027129219e-06, "epoch": 5.365079365079365, "percentage": 76.64, "elapsed_time": "7:09:46", "remaining_time": "2:10:57"}
|
||||
{"current_steps": 3385, "total_steps": 4410, "loss": 0.2752, "lr": 6.240661575885629e-06, "epoch": 5.3730158730158735, "percentage": 76.76, "elapsed_time": "7:10:27", "remaining_time": "2:10:20"}
|
||||
{"current_steps": 3390, "total_steps": 4410, "loss": 0.2611, "lr": 6.183324638172819e-06, "epoch": 5.380952380952381, "percentage": 76.87, "elapsed_time": "7:11:02", "remaining_time": "2:09:41"}
|
||||
{"current_steps": 3395, "total_steps": 4410, "loss": 0.2705, "lr": 6.126204112063463e-06, "epoch": 5.388888888888889, "percentage": 76.98, "elapsed_time": "7:11:45", "remaining_time": "2:09:05"}
|
||||
{"current_steps": 3400, "total_steps": 4410, "loss": 0.273, "lr": 6.069300892240564e-06, "epoch": 5.396825396825397, "percentage": 77.1, "elapsed_time": "7:12:19", "remaining_time": "2:08:25"}
|
||||
{"current_steps": 3405, "total_steps": 4410, "loss": 0.2761, "lr": 6.0126158699834625e-06, "epoch": 5.404761904761905, "percentage": 77.21, "elapsed_time": "7:12:58", "remaining_time": "2:07:47"}
|
||||
{"current_steps": 3410, "total_steps": 4410, "loss": 0.272, "lr": 5.956149933153816e-06, "epoch": 5.412698412698413, "percentage": 77.32, "elapsed_time": "7:13:38", "remaining_time": "2:07:10"}
|
||||
{"current_steps": 3415, "total_steps": 4410, "loss": 0.2675, "lr": 5.899903966181751e-06, "epoch": 5.420634920634921, "percentage": 77.44, "elapsed_time": "7:14:16", "remaining_time": "2:06:31"}
|
||||
{"current_steps": 3420, "total_steps": 4410, "loss": 0.2711, "lr": 5.843878850052007e-06, "epoch": 5.428571428571429, "percentage": 77.55, "elapsed_time": "7:15:00", "remaining_time": "2:05:55"}
|
||||
{"current_steps": 3425, "total_steps": 4410, "loss": 0.2707, "lr": 5.788075462290084e-06, "epoch": 5.436507936507937, "percentage": 77.66, "elapsed_time": "7:15:34", "remaining_time": "2:05:15"}
|
||||
{"current_steps": 3430, "total_steps": 4410, "loss": 0.2743, "lr": 5.732494676948554e-06, "epoch": 5.444444444444445, "percentage": 77.78, "elapsed_time": "7:16:12", "remaining_time": "2:04:37"}
|
||||
{"current_steps": 3435, "total_steps": 4410, "loss": 0.2719, "lr": 5.677137364593363e-06, "epoch": 5.4523809523809526, "percentage": 77.89, "elapsed_time": "7:16:48", "remaining_time": "2:03:59"}
|
||||
{"current_steps": 3440, "total_steps": 4410, "loss": 0.2755, "lr": 5.622004392290163e-06, "epoch": 5.4603174603174605, "percentage": 78.0, "elapsed_time": "7:17:30", "remaining_time": "2:03:21"}
|
||||
{"current_steps": 3445, "total_steps": 4410, "loss": 0.2771, "lr": 5.567096623590758e-06, "epoch": 5.468253968253968, "percentage": 78.12, "elapsed_time": "7:18:08", "remaining_time": "2:02:43"}
|
||||
{"current_steps": 3450, "total_steps": 4410, "loss": 0.2737, "lr": 5.512414918519573e-06, "epoch": 5.476190476190476, "percentage": 78.23, "elapsed_time": "7:18:45", "remaining_time": "2:02:05"}
|
||||
{"current_steps": 3455, "total_steps": 4410, "loss": 0.2798, "lr": 5.457960133560179e-06, "epoch": 5.484126984126984, "percentage": 78.34, "elapsed_time": "7:19:27", "remaining_time": "2:01:28"}
|
||||
{"current_steps": 3460, "total_steps": 4410, "loss": 0.2807, "lr": 5.403733121641883e-06, "epoch": 5.492063492063492, "percentage": 78.46, "elapsed_time": "7:20:08", "remaining_time": "2:00:51"}
|
||||
{"current_steps": 3465, "total_steps": 4410, "loss": 0.2691, "lr": 5.349734732126366e-06, "epoch": 5.5, "percentage": 78.57, "elapsed_time": "7:20:45", "remaining_time": "2:00:12"}
|
||||
{"current_steps": 3470, "total_steps": 4410, "loss": 0.2818, "lr": 5.295965810794376e-06, "epoch": 5.507936507936508, "percentage": 78.68, "elapsed_time": "7:21:28", "remaining_time": "1:59:35"}
|
||||
{"current_steps": 3475, "total_steps": 4410, "loss": 0.2694, "lr": 5.2424271998324895e-06, "epoch": 5.515873015873016, "percentage": 78.8, "elapsed_time": "7:22:04", "remaining_time": "1:58:56"}
|
||||
{"current_steps": 3480, "total_steps": 4410, "loss": 0.2755, "lr": 5.189119737819912e-06, "epoch": 5.523809523809524, "percentage": 78.91, "elapsed_time": "7:22:40", "remaining_time": "1:58:18"}
|
||||
{"current_steps": 3485, "total_steps": 4410, "loss": 0.2716, "lr": 5.136044259715342e-06, "epoch": 5.531746031746032, "percentage": 79.02, "elapsed_time": "7:23:14", "remaining_time": "1:57:38"}
|
||||
{"current_steps": 3490, "total_steps": 4410, "loss": 0.2772, "lr": 5.083201596843905e-06, "epoch": 5.5396825396825395, "percentage": 79.14, "elapsed_time": "7:23:51", "remaining_time": "1:57:00"}
|
||||
{"current_steps": 3495, "total_steps": 4410, "loss": 0.2757, "lr": 5.030592576884117e-06, "epoch": 5.5476190476190474, "percentage": 79.25, "elapsed_time": "7:24:28", "remaining_time": "1:56:21"}
|
||||
{"current_steps": 3500, "total_steps": 4410, "loss": 0.2798, "lr": 4.978218023854928e-06, "epoch": 5.555555555555555, "percentage": 79.37, "elapsed_time": "7:25:09", "remaining_time": "1:55:44"}
|
||||
{"current_steps": 3505, "total_steps": 4410, "loss": 0.2759, "lr": 4.926078758102834e-06, "epoch": 5.563492063492063, "percentage": 79.48, "elapsed_time": "7:25:48", "remaining_time": "1:55:06"}
|
||||
{"current_steps": 3510, "total_steps": 4410, "loss": 0.2735, "lr": 4.87417559628897e-06, "epoch": 5.571428571428571, "percentage": 79.59, "elapsed_time": "7:26:32", "remaining_time": "1:54:29"}
|
||||
{"current_steps": 3515, "total_steps": 4410, "loss": 0.2748, "lr": 4.822509351376399e-06, "epoch": 5.579365079365079, "percentage": 79.71, "elapsed_time": "7:27:10", "remaining_time": "1:53:51"}
|
||||
{"current_steps": 3520, "total_steps": 4410, "loss": 0.2762, "lr": 4.7710808326173115e-06, "epoch": 5.587301587301587, "percentage": 79.82, "elapsed_time": "7:27:49", "remaining_time": "1:53:13"}
|
||||
{"current_steps": 3525, "total_steps": 4410, "loss": 0.2727, "lr": 4.719890845540385e-06, "epoch": 5.595238095238095, "percentage": 79.93, "elapsed_time": "7:28:31", "remaining_time": "1:52:36"}
|
||||
{"current_steps": 3530, "total_steps": 4410, "loss": 0.2724, "lr": 4.668940191938156e-06, "epoch": 5.603174603174603, "percentage": 80.05, "elapsed_time": "7:29:12", "remaining_time": "1:51:59"}
|
||||
{"current_steps": 3535, "total_steps": 4410, "loss": 0.2759, "lr": 4.618229669854464e-06, "epoch": 5.611111111111111, "percentage": 80.16, "elapsed_time": "7:29:46", "remaining_time": "1:51:19"}
|
||||
{"current_steps": 3540, "total_steps": 4410, "loss": 0.2659, "lr": 4.567760073571947e-06, "epoch": 5.619047619047619, "percentage": 80.27, "elapsed_time": "7:30:26", "remaining_time": "1:50:42"}
|
||||
{"current_steps": 3545, "total_steps": 4410, "loss": 0.279, "lr": 4.51753219359961e-06, "epoch": 5.6269841269841265, "percentage": 80.39, "elapsed_time": "7:31:09", "remaining_time": "1:50:05"}
|
||||
{"current_steps": 3550, "total_steps": 4410, "loss": 0.2756, "lr": 4.467546816660433e-06, "epoch": 5.634920634920634, "percentage": 80.5, "elapsed_time": "7:31:47", "remaining_time": "1:49:26"}
|
||||
{"current_steps": 3555, "total_steps": 4410, "loss": 0.2745, "lr": 4.417804725679058e-06, "epoch": 5.642857142857143, "percentage": 80.61, "elapsed_time": "7:32:24", "remaining_time": "1:48:48"}
|
||||
{"current_steps": 3560, "total_steps": 4410, "loss": 0.2724, "lr": 4.368306699769518e-06, "epoch": 5.650793650793651, "percentage": 80.73, "elapsed_time": "7:33:06", "remaining_time": "1:48:11"}
|
||||
{"current_steps": 3565, "total_steps": 4410, "loss": 0.2617, "lr": 4.319053514223033e-06, "epoch": 5.658730158730159, "percentage": 80.84, "elapsed_time": "7:33:44", "remaining_time": "1:47:32"}
|
||||
{"current_steps": 3570, "total_steps": 4410, "loss": 0.2724, "lr": 4.270045940495879e-06, "epoch": 5.666666666666667, "percentage": 80.95, "elapsed_time": "7:34:19", "remaining_time": "1:46:53"}
|
||||
{"current_steps": 3575, "total_steps": 4410, "loss": 0.2686, "lr": 4.221284746197292e-06, "epoch": 5.674603174603175, "percentage": 81.07, "elapsed_time": "7:35:00", "remaining_time": "1:46:16"}
|
||||
{"current_steps": 3580, "total_steps": 4410, "loss": 0.2684, "lr": 4.172770695077437e-06, "epoch": 5.682539682539683, "percentage": 81.18, "elapsed_time": "7:35:36", "remaining_time": "1:45:37"}
|
||||
{"current_steps": 3585, "total_steps": 4410, "loss": 0.2698, "lr": 4.124504547015487e-06, "epoch": 5.690476190476191, "percentage": 81.29, "elapsed_time": "7:36:15", "remaining_time": "1:44:59"}
|
||||
{"current_steps": 3590, "total_steps": 4410, "loss": 0.2779, "lr": 4.0764870580076675e-06, "epoch": 5.698412698412699, "percentage": 81.41, "elapsed_time": "7:36:56", "remaining_time": "1:44:22"}
|
||||
{"current_steps": 3595, "total_steps": 4410, "loss": 0.2782, "lr": 4.0287189801554304e-06, "epoch": 5.7063492063492065, "percentage": 81.52, "elapsed_time": "7:37:37", "remaining_time": "1:43:44"}
|
||||
{"current_steps": 3600, "total_steps": 4410, "loss": 0.2664, "lr": 3.98120106165371e-06, "epoch": 5.714285714285714, "percentage": 81.63, "elapsed_time": "7:38:09", "remaining_time": "1:43:05"}
|
||||
{"current_steps": 3605, "total_steps": 4410, "loss": 0.2769, "lr": 3.933934046779164e-06, "epoch": 5.722222222222222, "percentage": 81.75, "elapsed_time": "7:38:47", "remaining_time": "1:42:26"}
|
||||
{"current_steps": 3610, "total_steps": 4410, "loss": 0.2769, "lr": 3.886918675878513e-06, "epoch": 5.73015873015873, "percentage": 81.86, "elapsed_time": "7:39:25", "remaining_time": "1:41:48"}
|
||||
{"current_steps": 3615, "total_steps": 4410, "loss": 0.2737, "lr": 3.840155685356983e-06, "epoch": 5.738095238095238, "percentage": 81.97, "elapsed_time": "7:40:03", "remaining_time": "1:41:10"}
|
||||
{"current_steps": 3620, "total_steps": 4410, "loss": 0.2715, "lr": 3.793645807666735e-06, "epoch": 5.746031746031746, "percentage": 82.09, "elapsed_time": "7:40:38", "remaining_time": "1:40:31"}
|
||||
{"current_steps": 3625, "total_steps": 4410, "loss": 0.2805, "lr": 3.747389771295411e-06, "epoch": 5.753968253968254, "percentage": 82.2, "elapsed_time": "7:41:14", "remaining_time": "1:39:52"}
|
||||
{"current_steps": 3630, "total_steps": 4410, "loss": 0.2741, "lr": 3.701388300754709e-06, "epoch": 5.761904761904762, "percentage": 82.31, "elapsed_time": "7:41:54", "remaining_time": "1:39:15"}
|
||||
{"current_steps": 3635, "total_steps": 4410, "loss": 0.2683, "lr": 3.6556421165690516e-06, "epoch": 5.76984126984127, "percentage": 82.43, "elapsed_time": "7:42:33", "remaining_time": "1:38:37"}
|
||||
{"current_steps": 3640, "total_steps": 4410, "loss": 0.2778, "lr": 3.610151935264288e-06, "epoch": 5.777777777777778, "percentage": 82.54, "elapsed_time": "7:43:09", "remaining_time": "1:37:58"}
|
||||
{"current_steps": 3645, "total_steps": 4410, "loss": 0.2736, "lr": 3.5649184693564797e-06, "epoch": 5.785714285714286, "percentage": 82.65, "elapsed_time": "7:43:44", "remaining_time": "1:37:19"}
|
||||
{"current_steps": 3650, "total_steps": 4410, "loss": 0.2744, "lr": 3.5199424273407277e-06, "epoch": 5.7936507936507935, "percentage": 82.77, "elapsed_time": "7:44:26", "remaining_time": "1:36:42"}
|
||||
{"current_steps": 3655, "total_steps": 4410, "loss": 0.2742, "lr": 3.4752245136801065e-06, "epoch": 5.801587301587301, "percentage": 82.88, "elapsed_time": "7:45:00", "remaining_time": "1:36:03"}
|
||||
{"current_steps": 3660, "total_steps": 4410, "loss": 0.2723, "lr": 3.430765428794569e-06, "epoch": 5.809523809523809, "percentage": 82.99, "elapsed_time": "7:45:39", "remaining_time": "1:35:25"}
|
||||
{"current_steps": 3665, "total_steps": 4410, "loss": 0.2674, "lr": 3.3865658690500424e-06, "epoch": 5.817460317460317, "percentage": 83.11, "elapsed_time": "7:46:19", "remaining_time": "1:34:47"}
|
||||
{"current_steps": 3670, "total_steps": 4410, "loss": 0.2705, "lr": 3.34262652674749e-06, "epoch": 5.825396825396825, "percentage": 83.22, "elapsed_time": "7:46:59", "remaining_time": "1:34:09"}
|
||||
{"current_steps": 3675, "total_steps": 4410, "loss": 0.274, "lr": 3.2989480901120684e-06, "epoch": 5.833333333333333, "percentage": 83.33, "elapsed_time": "7:47:35", "remaining_time": "1:33:31"}
|
||||
{"current_steps": 3680, "total_steps": 4410, "loss": 0.2666, "lr": 3.2555312432823283e-06, "epoch": 5.841269841269841, "percentage": 83.45, "elapsed_time": "7:48:13", "remaining_time": "1:32:52"}
|
||||
{"current_steps": 3685, "total_steps": 4410, "loss": 0.2757, "lr": 3.2123766662995572e-06, "epoch": 5.849206349206349, "percentage": 83.56, "elapsed_time": "7:48:57", "remaining_time": "1:32:15"}
|
||||
{"current_steps": 3690, "total_steps": 4410, "loss": 0.2792, "lr": 3.1694850350970686e-06, "epoch": 5.857142857142857, "percentage": 83.67, "elapsed_time": "7:49:39", "remaining_time": "1:31:38"}
|
||||
{"current_steps": 3695, "total_steps": 4410, "loss": 0.2788, "lr": 3.1268570214896265e-06, "epoch": 5.865079365079366, "percentage": 83.79, "elapsed_time": "7:50:14", "remaining_time": "1:30:59"}
|
||||
{"current_steps": 3700, "total_steps": 4410, "loss": 0.2786, "lr": 3.0844932931629602e-06, "epoch": 5.8730158730158735, "percentage": 83.9, "elapsed_time": "7:50:48", "remaining_time": "1:30:20"}
|
||||
{"current_steps": 3705, "total_steps": 4410, "loss": 0.2727, "lr": 3.0423945136632626e-06, "epoch": 5.880952380952381, "percentage": 84.01, "elapsed_time": "7:51:23", "remaining_time": "1:29:41"}
|
||||
{"current_steps": 3710, "total_steps": 4410, "loss": 0.2733, "lr": 3.000561342386814e-06, "epoch": 5.888888888888889, "percentage": 84.13, "elapsed_time": "7:52:02", "remaining_time": "1:29:03"}
|
||||
{"current_steps": 3715, "total_steps": 4410, "loss": 0.2769, "lr": 2.9589944345696596e-06, "epoch": 5.896825396825397, "percentage": 84.24, "elapsed_time": "7:52:37", "remaining_time": "1:28:25"}
|
||||
{"current_steps": 3720, "total_steps": 4410, "loss": 0.2781, "lr": 2.9176944412773322e-06, "epoch": 5.904761904761905, "percentage": 84.35, "elapsed_time": "7:53:17", "remaining_time": "1:27:47"}
|
||||
{"current_steps": 3725, "total_steps": 4410, "loss": 0.2712, "lr": 2.876662009394673e-06, "epoch": 5.912698412698413, "percentage": 84.47, "elapsed_time": "7:54:00", "remaining_time": "1:27:09"}
|
||||
{"current_steps": 3730, "total_steps": 4410, "loss": 0.2658, "lr": 2.8358977816156796e-06, "epoch": 5.920634920634921, "percentage": 84.58, "elapsed_time": "7:54:36", "remaining_time": "1:26:31"}
|
||||
{"current_steps": 3735, "total_steps": 4410, "loss": 0.2754, "lr": 2.7954023964334485e-06, "epoch": 5.928571428571429, "percentage": 84.69, "elapsed_time": "7:55:04", "remaining_time": "1:25:51"}
|
||||
{"current_steps": 3740, "total_steps": 4410, "loss": 0.2737, "lr": 2.7551764881301955e-06, "epoch": 5.936507936507937, "percentage": 84.81, "elapsed_time": "7:55:47", "remaining_time": "1:25:14"}
|
||||
{"current_steps": 3745, "total_steps": 4410, "loss": 0.2664, "lr": 2.715220686767268e-06, "epoch": 5.944444444444445, "percentage": 84.92, "elapsed_time": "7:56:25", "remaining_time": "1:24:35"}
|
||||
{"current_steps": 3750, "total_steps": 4410, "loss": 0.2697, "lr": 2.6755356181753247e-06, "epoch": 5.9523809523809526, "percentage": 85.03, "elapsed_time": "7:57:04", "remaining_time": "1:23:57"}
|
||||
{"current_steps": 3755, "total_steps": 4410, "loss": 0.2747, "lr": 2.6361219039445328e-06, "epoch": 5.9603174603174605, "percentage": 85.15, "elapsed_time": "7:57:38", "remaining_time": "1:23:19"}
|
||||
{"current_steps": 3760, "total_steps": 4410, "loss": 0.2773, "lr": 2.5969801614147838e-06, "epoch": 5.968253968253968, "percentage": 85.26, "elapsed_time": "7:58:16", "remaining_time": "1:22:40"}
|
||||
{"current_steps": 3765, "total_steps": 4410, "loss": 0.2762, "lr": 2.558111003666075e-06, "epoch": 5.976190476190476, "percentage": 85.37, "elapsed_time": "7:58:49", "remaining_time": "1:22:01"}
|
||||
{"current_steps": 3770, "total_steps": 4410, "loss": 0.2781, "lr": 2.519515039508893e-06, "epoch": 5.984126984126984, "percentage": 85.49, "elapsed_time": "7:59:25", "remaining_time": "1:21:23"}
|
||||
{"current_steps": 3775, "total_steps": 4410, "loss": 0.2706, "lr": 2.481192873474667e-06, "epoch": 5.992063492063492, "percentage": 85.6, "elapsed_time": "8:00:02", "remaining_time": "1:20:44"}
|
||||
{"current_steps": 3780, "total_steps": 4410, "loss": 0.2726, "lr": 2.4431451058062928e-06, "epoch": 6.0, "percentage": 85.71, "elapsed_time": "8:00:38", "remaining_time": "1:20:06"}
|
||||
{"current_steps": 3785, "total_steps": 4410, "loss": 0.2763, "lr": 2.4053723324487677e-06, "epoch": 6.007936507936508, "percentage": 85.83, "elapsed_time": "8:01:13", "remaining_time": "1:19:27"}
|
||||
{"current_steps": 3790, "total_steps": 4410, "loss": 0.2657, "lr": 2.3678751450398196e-06, "epoch": 6.015873015873016, "percentage": 85.94, "elapsed_time": "8:01:51", "remaining_time": "1:18:49"}
|
||||
{"current_steps": 3795, "total_steps": 4410, "loss": 0.2785, "lr": 2.330654130900656e-06, "epoch": 6.023809523809524, "percentage": 86.05, "elapsed_time": "8:02:28", "remaining_time": "1:18:11"}
|
||||
{"current_steps": 3800, "total_steps": 4410, "loss": 0.2727, "lr": 2.2937098730267572e-06, "epoch": 6.031746031746032, "percentage": 86.17, "elapsed_time": "8:03:05", "remaining_time": "1:17:32"}
|
||||
{"current_steps": 3805, "total_steps": 4410, "loss": 0.2662, "lr": 2.2570429500787604e-06, "epoch": 6.0396825396825395, "percentage": 86.28, "elapsed_time": "8:03:44", "remaining_time": "1:16:54"}
|
||||
{"current_steps": 3810, "total_steps": 4410, "loss": 0.2683, "lr": 2.2206539363733738e-06, "epoch": 6.0476190476190474, "percentage": 86.39, "elapsed_time": "8:04:25", "remaining_time": "1:16:17"}
|
||||
{"current_steps": 3815, "total_steps": 4410, "loss": 0.2756, "lr": 2.1845434018744038e-06, "epoch": 6.055555555555555, "percentage": 86.51, "elapsed_time": "8:05:08", "remaining_time": "1:15:39"}
|
||||
{"current_steps": 3820, "total_steps": 4410, "loss": 0.2723, "lr": 2.148711912183803e-06, "epoch": 6.063492063492063, "percentage": 86.62, "elapsed_time": "8:05:47", "remaining_time": "1:15:01"}
|
||||
{"current_steps": 3825, "total_steps": 4410, "loss": 0.2724, "lr": 2.1131600285328458e-06, "epoch": 6.071428571428571, "percentage": 86.73, "elapsed_time": "8:06:31", "remaining_time": "1:14:24"}
|
||||
{"current_steps": 3830, "total_steps": 4410, "loss": 0.2768, "lr": 2.0778883077732903e-06, "epoch": 6.079365079365079, "percentage": 86.85, "elapsed_time": "8:07:09", "remaining_time": "1:13:46"}
|
||||
{"current_steps": 3835, "total_steps": 4410, "loss": 0.2771, "lr": 2.0428973023686983e-06, "epoch": 6.087301587301587, "percentage": 86.96, "elapsed_time": "8:07:46", "remaining_time": "1:13:08"}
|
||||
{"current_steps": 3840, "total_steps": 4410, "loss": 0.2704, "lr": 2.0081875603857726e-06, "epoch": 6.095238095238095, "percentage": 87.07, "elapsed_time": "8:08:22", "remaining_time": "1:12:29"}
|
||||
{"current_steps": 3845, "total_steps": 4410, "loss": 0.2769, "lr": 1.973759625485743e-06, "epoch": 6.103174603174603, "percentage": 87.19, "elapsed_time": "8:08:59", "remaining_time": "1:11:51"}
|
||||
{"current_steps": 3850, "total_steps": 4410, "loss": 0.2803, "lr": 1.9396140369159e-06, "epoch": 6.111111111111111, "percentage": 87.3, "elapsed_time": "8:09:34", "remaining_time": "1:11:12"}
|
||||
{"current_steps": 3855, "total_steps": 4410, "loss": 0.2737, "lr": 1.9057513295011087e-06, "epoch": 6.119047619047619, "percentage": 87.41, "elapsed_time": "8:10:14", "remaining_time": "1:10:34"}
|
||||
{"current_steps": 3860, "total_steps": 4410, "loss": 0.2727, "lr": 1.8721720336354487e-06, "epoch": 6.1269841269841265, "percentage": 87.53, "elapsed_time": "8:10:52", "remaining_time": "1:09:56"}
|
||||
{"current_steps": 3865, "total_steps": 4410, "loss": 0.2723, "lr": 1.8388766752739017e-06, "epoch": 6.134920634920635, "percentage": 87.64, "elapsed_time": "8:11:33", "remaining_time": "1:09:18"}
|
||||
{"current_steps": 3870, "total_steps": 4410, "loss": 0.2678, "lr": 1.805865775924116e-06, "epoch": 6.142857142857143, "percentage": 87.76, "elapsed_time": "8:12:10", "remaining_time": "1:08:40"}
|
||||
{"current_steps": 3875, "total_steps": 4410, "loss": 0.2729, "lr": 1.7731398526382416e-06, "epoch": 6.150793650793651, "percentage": 87.87, "elapsed_time": "8:12:53", "remaining_time": "1:08:03"}
|
||||
{"current_steps": 3880, "total_steps": 4410, "loss": 0.2769, "lr": 1.7406994180048231e-06, "epoch": 6.158730158730159, "percentage": 87.98, "elapsed_time": "8:13:36", "remaining_time": "1:07:25"}
|
||||
{"current_steps": 3885, "total_steps": 4410, "loss": 0.2697, "lr": 1.7085449801407783e-06, "epoch": 6.166666666666667, "percentage": 88.1, "elapsed_time": "8:14:12", "remaining_time": "1:06:47"}
|
||||
{"current_steps": 3890, "total_steps": 4410, "loss": 0.2702, "lr": 1.67667704268343e-06, "epoch": 6.174603174603175, "percentage": 88.21, "elapsed_time": "8:14:55", "remaining_time": "1:06:09"}
|
||||
{"current_steps": 3895, "total_steps": 4410, "loss": 0.2624, "lr": 1.6450961047826353e-06, "epoch": 6.182539682539683, "percentage": 88.32, "elapsed_time": "8:15:25", "remaining_time": "1:05:30"}
|
||||
{"current_steps": 3900, "total_steps": 4410, "loss": 0.2759, "lr": 1.6138026610929446e-06, "epoch": 6.190476190476191, "percentage": 88.44, "elapsed_time": "8:16:03", "remaining_time": "1:04:52"}
|
||||
{"current_steps": 3905, "total_steps": 4410, "loss": 0.2728, "lr": 1.5827972017658732e-06, "epoch": 6.198412698412699, "percentage": 88.55, "elapsed_time": "8:16:44", "remaining_time": "1:04:14"}
|
||||
{"current_steps": 3910, "total_steps": 4410, "loss": 0.2743, "lr": 1.5520802124422108e-06, "epoch": 6.2063492063492065, "percentage": 88.66, "elapsed_time": "8:17:25", "remaining_time": "1:03:36"}
|
||||
{"current_steps": 3915, "total_steps": 4410, "loss": 0.2712, "lr": 1.5216521742444236e-06, "epoch": 6.214285714285714, "percentage": 88.78, "elapsed_time": "8:18:02", "remaining_time": "1:02:58"}
|
||||
{"current_steps": 3920, "total_steps": 4410, "loss": 0.2741, "lr": 1.491513563769118e-06, "epoch": 6.222222222222222, "percentage": 88.89, "elapsed_time": "8:18:42", "remaining_time": "1:02:20"}
|
||||
{"current_steps": 3925, "total_steps": 4410, "loss": 0.2769, "lr": 1.4616648530795673e-06, "epoch": 6.23015873015873, "percentage": 89.0, "elapsed_time": "8:19:23", "remaining_time": "1:01:42"}
|
||||
{"current_steps": 3930, "total_steps": 4410, "loss": 0.273, "lr": 1.432106509698319e-06, "epoch": 6.238095238095238, "percentage": 89.12, "elapsed_time": "8:19:56", "remaining_time": "1:01:03"}
|
||||
{"current_steps": 3935, "total_steps": 4410, "loss": 0.2782, "lr": 1.4028389965998867e-06, "epoch": 6.246031746031746, "percentage": 89.23, "elapsed_time": "8:20:38", "remaining_time": "1:00:26"}
|
||||
{"current_steps": 3940, "total_steps": 4410, "loss": 0.2747, "lr": 1.3738627722034848e-06, "epoch": 6.253968253968254, "percentage": 89.34, "elapsed_time": "8:21:15", "remaining_time": "0:59:47"}
|
||||
{"current_steps": 3945, "total_steps": 4410, "loss": 0.2762, "lr": 1.345178290365845e-06, "epoch": 6.261904761904762, "percentage": 89.46, "elapsed_time": "8:21:55", "remaining_time": "0:59:09"}
|
||||
{"current_steps": 3950, "total_steps": 4410, "loss": 0.2754, "lr": 1.3167860003741218e-06, "epoch": 6.26984126984127, "percentage": 89.57, "elapsed_time": "8:22:31", "remaining_time": "0:58:31"}
|
||||
{"current_steps": 3955, "total_steps": 4410, "loss": 0.2755, "lr": 1.2886863469388389e-06, "epoch": 6.277777777777778, "percentage": 89.68, "elapsed_time": "8:23:14", "remaining_time": "0:57:53"}
|
||||
{"current_steps": 3960, "total_steps": 4410, "loss": 0.2754, "lr": 1.2608797701869425e-06, "epoch": 6.285714285714286, "percentage": 89.8, "elapsed_time": "8:23:55", "remaining_time": "0:57:15"}
|
||||
{"current_steps": 3965, "total_steps": 4410, "loss": 0.2779, "lr": 1.2333667056548881e-06, "epoch": 6.2936507936507935, "percentage": 89.91, "elapsed_time": "8:24:31", "remaining_time": "0:56:37"}
|
||||
{"current_steps": 3970, "total_steps": 4410, "loss": 0.2748, "lr": 1.2061475842818337e-06, "epoch": 6.301587301587301, "percentage": 90.02, "elapsed_time": "8:25:08", "remaining_time": "0:55:59"}
|
||||
{"current_steps": 3975, "total_steps": 4410, "loss": 0.2789, "lr": 1.1792228324028776e-06, "epoch": 6.309523809523809, "percentage": 90.14, "elapsed_time": "8:25:44", "remaining_time": "0:55:20"}
|
||||
{"current_steps": 3980, "total_steps": 4410, "loss": 0.2647, "lr": 1.152592871742395e-06, "epoch": 6.317460317460317, "percentage": 90.25, "elapsed_time": "8:26:17", "remaining_time": "0:54:42"}
|
||||
{"current_steps": 3985, "total_steps": 4410, "loss": 0.2682, "lr": 1.1262581194074152e-06, "epoch": 6.325396825396825, "percentage": 90.36, "elapsed_time": "8:26:52", "remaining_time": "0:54:03"}
|
||||
{"current_steps": 3990, "total_steps": 4410, "loss": 0.2696, "lr": 1.100218987881112e-06, "epoch": 6.333333333333333, "percentage": 90.48, "elapsed_time": "8:27:31", "remaining_time": "0:53:25"}
|
||||
{"current_steps": 3995, "total_steps": 4410, "loss": 0.2743, "lr": 1.0744758850163085e-06, "epoch": 6.341269841269841, "percentage": 90.59, "elapsed_time": "8:28:14", "remaining_time": "0:52:47"}
|
||||
{"current_steps": 4000, "total_steps": 4410, "loss": 0.2736, "lr": 1.0490292140291247e-06, "epoch": 6.349206349206349, "percentage": 90.7, "elapsed_time": "8:28:51", "remaining_time": "0:52:09"}
|
||||
{"current_steps": 4005, "total_steps": 4410, "loss": 0.2725, "lr": 1.0238793734926467e-06, "epoch": 6.357142857142857, "percentage": 90.82, "elapsed_time": "8:29:29", "remaining_time": "0:51:31"}
|
||||
{"current_steps": 4010, "total_steps": 4410, "loss": 0.2728, "lr": 9.990267573306745e-07, "epoch": 6.365079365079365, "percentage": 90.93, "elapsed_time": "8:30:10", "remaining_time": "0:50:53"}
|
||||
{"current_steps": 4015, "total_steps": 4410, "loss": 0.2708, "lr": 9.744717548115613e-07, "epoch": 6.3730158730158735, "percentage": 91.04, "elapsed_time": "8:30:47", "remaining_time": "0:50:15"}
|
||||
{"current_steps": 4020, "total_steps": 4410, "loss": 0.273, "lr": 9.502147505421244e-07, "epoch": 6.380952380952381, "percentage": 91.16, "elapsed_time": "8:31:29", "remaining_time": "0:49:37"}
|
||||
{"current_steps": 4025, "total_steps": 4410, "loss": 0.2691, "lr": 9.262561244616108e-07, "epoch": 6.388888888888889, "percentage": 91.27, "elapsed_time": "8:32:04", "remaining_time": "0:48:58"}
|
||||
{"current_steps": 4030, "total_steps": 4410, "loss": 0.2757, "lr": 9.025962518357323e-07, "epoch": 6.396825396825397, "percentage": 91.38, "elapsed_time": "8:32:39", "remaining_time": "0:48:20"}
|
||||
{"current_steps": 4035, "total_steps": 4410, "loss": 0.2697, "lr": 8.792355032508282e-07, "epoch": 6.404761904761905, "percentage": 91.5, "elapsed_time": "8:33:10", "remaining_time": "0:47:41"}
|
||||
{"current_steps": 4040, "total_steps": 4410, "loss": 0.2742, "lr": 8.561742446080168e-07, "epoch": 6.412698412698413, "percentage": 91.61, "elapsed_time": "8:33:47", "remaining_time": "0:47:03"}
|
||||
{"current_steps": 4045, "total_steps": 4410, "loss": 0.2662, "lr": 8.334128371174955e-07, "epoch": 6.420634920634921, "percentage": 91.72, "elapsed_time": "8:34:26", "remaining_time": "0:46:25"}
|
||||
{"current_steps": 4050, "total_steps": 4410, "loss": 0.2643, "lr": 8.109516372928605e-07, "epoch": 6.428571428571429, "percentage": 91.84, "elapsed_time": "8:34:59", "remaining_time": "0:45:46"}
|
||||
{"current_steps": 4055, "total_steps": 4410, "loss": 0.2706, "lr": 7.887909969455366e-07, "epoch": 6.436507936507937, "percentage": 91.95, "elapsed_time": "8:35:34", "remaining_time": "0:45:08"}
|
||||
{"current_steps": 4060, "total_steps": 4410, "loss": 0.2784, "lr": 7.669312631792758e-07, "epoch": 6.444444444444445, "percentage": 92.06, "elapsed_time": "8:36:18", "remaining_time": "0:44:30"}
|
||||
{"current_steps": 4065, "total_steps": 4410, "loss": 0.2694, "lr": 7.453727783846876e-07, "epoch": 6.4523809523809526, "percentage": 92.18, "elapsed_time": "8:36:54", "remaining_time": "0:43:52"}
|
||||
{"current_steps": 4070, "total_steps": 4410, "loss": 0.2756, "lr": 7.241158802339065e-07, "epoch": 6.4603174603174605, "percentage": 92.29, "elapsed_time": "8:37:34", "remaining_time": "0:43:14"}
|
||||
{"current_steps": 4075, "total_steps": 4410, "loss": 0.2691, "lr": 7.031609016753016e-07, "epoch": 6.468253968253968, "percentage": 92.4, "elapsed_time": "8:38:12", "remaining_time": "0:42:36"}
|
||||
{"current_steps": 4080, "total_steps": 4410, "loss": 0.2779, "lr": 6.825081709282377e-07, "epoch": 6.476190476190476, "percentage": 92.52, "elapsed_time": "8:38:49", "remaining_time": "0:41:57"}
|
||||
{"current_steps": 4085, "total_steps": 4410, "loss": 0.2713, "lr": 6.62158011477958e-07, "epoch": 6.484126984126984, "percentage": 92.63, "elapsed_time": "8:39:30", "remaining_time": "0:41:19"}
|
||||
{"current_steps": 4090, "total_steps": 4410, "loss": 0.2734, "lr": 6.421107420705097e-07, "epoch": 6.492063492063492, "percentage": 92.74, "elapsed_time": "8:40:07", "remaining_time": "0:40:41"}
|
||||
{"current_steps": 4095, "total_steps": 4410, "loss": 0.2707, "lr": 6.223666767077508e-07, "epoch": 6.5, "percentage": 92.86, "elapsed_time": "8:40:43", "remaining_time": "0:40:03"}
|
||||
{"current_steps": 4100, "total_steps": 4410, "loss": 0.2701, "lr": 6.029261246424267e-07, "epoch": 6.507936507936508, "percentage": 92.97, "elapsed_time": "8:41:23", "remaining_time": "0:39:25"}
|
||||
{"current_steps": 4105, "total_steps": 4410, "loss": 0.2665, "lr": 5.837893903733394e-07, "epoch": 6.515873015873016, "percentage": 93.08, "elapsed_time": "8:42:01", "remaining_time": "0:38:47"}
|
||||
{"current_steps": 4110, "total_steps": 4410, "loss": 0.2764, "lr": 5.649567736405681e-07, "epoch": 6.523809523809524, "percentage": 93.2, "elapsed_time": "8:42:35", "remaining_time": "0:38:08"}
|
||||
{"current_steps": 4115, "total_steps": 4410, "loss": 0.2749, "lr": 5.464285694207672e-07, "epoch": 6.531746031746032, "percentage": 93.31, "elapsed_time": "8:43:13", "remaining_time": "0:37:30"}
|
||||
{"current_steps": 4120, "total_steps": 4410, "loss": 0.2791, "lr": 5.282050679225714e-07, "epoch": 6.5396825396825395, "percentage": 93.42, "elapsed_time": "8:43:52", "remaining_time": "0:36:52"}
|
||||
{"current_steps": 4125, "total_steps": 4410, "loss": 0.2674, "lr": 5.102865545820245e-07, "epoch": 6.5476190476190474, "percentage": 93.54, "elapsed_time": "8:44:32", "remaining_time": "0:36:14"}
|
||||
{"current_steps": 4130, "total_steps": 4410, "loss": 0.2728, "lr": 4.926733100581182e-07, "epoch": 6.555555555555555, "percentage": 93.65, "elapsed_time": "8:45:14", "remaining_time": "0:35:36"}
|
||||
{"current_steps": 4135, "total_steps": 4410, "loss": 0.2706, "lr": 4.7536561022840213e-07, "epoch": 6.563492063492063, "percentage": 93.76, "elapsed_time": "8:45:57", "remaining_time": "0:34:58"}
|
||||
{"current_steps": 4140, "total_steps": 4410, "loss": 0.269, "lr": 4.5836372618464964e-07, "epoch": 6.571428571428571, "percentage": 93.88, "elapsed_time": "8:46:42", "remaining_time": "0:34:21"}
|
||||
{"current_steps": 4145, "total_steps": 4410, "loss": 0.2748, "lr": 4.416679242286215e-07, "epoch": 6.579365079365079, "percentage": 93.99, "elapsed_time": "8:47:23", "remaining_time": "0:33:43"}
|
||||
{"current_steps": 4150, "total_steps": 4410, "loss": 0.265, "lr": 4.2527846586789547e-07, "epoch": 6.587301587301587, "percentage": 94.1, "elapsed_time": "8:47:56", "remaining_time": "0:33:04"}
|
||||
{"current_steps": 4155, "total_steps": 4410, "loss": 0.2763, "lr": 4.0919560781176317e-07, "epoch": 6.595238095238095, "percentage": 94.22, "elapsed_time": "8:48:32", "remaining_time": "0:32:26"}
|
||||
{"current_steps": 4160, "total_steps": 4410, "loss": 0.2701, "lr": 3.934196019672176e-07, "epoch": 6.603174603174603, "percentage": 94.33, "elapsed_time": "8:49:11", "remaining_time": "0:31:48"}
|
||||
{"current_steps": 4165, "total_steps": 4410, "loss": 0.2695, "lr": 3.779506954349965e-07, "epoch": 6.611111111111111, "percentage": 94.44, "elapsed_time": "8:49:48", "remaining_time": "0:31:09"}
|
||||
{"current_steps": 4170, "total_steps": 4410, "loss": 0.28, "lr": 3.6278913050572076e-07, "epoch": 6.619047619047619, "percentage": 94.56, "elapsed_time": "8:50:24", "remaining_time": "0:30:31"}
|
||||
{"current_steps": 4175, "total_steps": 4410, "loss": 0.2747, "lr": 3.4793514465610414e-07, "epoch": 6.6269841269841265, "percentage": 94.67, "elapsed_time": "8:51:05", "remaining_time": "0:29:53"}
|
||||
{"current_steps": 4180, "total_steps": 4410, "loss": 0.2758, "lr": 3.3338897054521205e-07, "epoch": 6.634920634920634, "percentage": 94.78, "elapsed_time": "8:51:42", "remaining_time": "0:29:15"}
|
||||
{"current_steps": 4185, "total_steps": 4410, "loss": 0.2673, "lr": 3.191508360108464e-07, "epoch": 6.642857142857143, "percentage": 94.9, "elapsed_time": "8:52:25", "remaining_time": "0:28:37"}
|
||||
{"current_steps": 4190, "total_steps": 4410, "loss": 0.2656, "lr": 3.0522096406595536e-07, "epoch": 6.650793650793651, "percentage": 95.01, "elapsed_time": "8:53:03", "remaining_time": "0:27:59"}
|
||||
{"current_steps": 4195, "total_steps": 4410, "loss": 0.274, "lr": 2.9159957289514926e-07, "epoch": 6.658730158730159, "percentage": 95.12, "elapsed_time": "8:53:44", "remaining_time": "0:27:21"}
|
||||
{"current_steps": 4200, "total_steps": 4410, "loss": 0.2751, "lr": 2.782868758512791e-07, "epoch": 6.666666666666667, "percentage": 95.24, "elapsed_time": "8:54:21", "remaining_time": "0:26:43"}
|
||||
{"current_steps": 4205, "total_steps": 4410, "loss": 0.2748, "lr": 2.6528308145210125e-07, "epoch": 6.674603174603175, "percentage": 95.35, "elapsed_time": "8:55:01", "remaining_time": "0:26:04"}
|
||||
{"current_steps": 4210, "total_steps": 4410, "loss": 0.2749, "lr": 2.525883933770046e-07, "epoch": 6.682539682539683, "percentage": 95.46, "elapsed_time": "8:55:38", "remaining_time": "0:25:26"}
|
||||
{"current_steps": 4215, "total_steps": 4410, "loss": 0.2756, "lr": 2.402030104638198e-07, "epoch": 6.690476190476191, "percentage": 95.58, "elapsed_time": "8:56:15", "remaining_time": "0:24:48"}
|
||||
{"current_steps": 4220, "total_steps": 4410, "loss": 0.274, "lr": 2.2812712670571502e-07, "epoch": 6.698412698412699, "percentage": 95.69, "elapsed_time": "8:56:54", "remaining_time": "0:24:10"}
|
||||
{"current_steps": 4225, "total_steps": 4410, "loss": 0.2749, "lr": 2.1636093124814738e-07, "epoch": 6.7063492063492065, "percentage": 95.8, "elapsed_time": "8:57:28", "remaining_time": "0:23:32"}
|
||||
{"current_steps": 4230, "total_steps": 4410, "loss": 0.2733, "lr": 2.0490460838589855e-07, "epoch": 6.714285714285714, "percentage": 95.92, "elapsed_time": "8:58:03", "remaining_time": "0:22:53"}
|
||||
{"current_steps": 4235, "total_steps": 4410, "loss": 0.2631, "lr": 1.9375833756019923e-07, "epoch": 6.722222222222222, "percentage": 96.03, "elapsed_time": "8:58:41", "remaining_time": "0:22:15"}
|
||||
{"current_steps": 4240, "total_steps": 4410, "loss": 0.2719, "lr": 1.8292229335590716e-07, "epoch": 6.73015873015873, "percentage": 96.15, "elapsed_time": "8:59:15", "remaining_time": "0:21:37"}
|
||||
{"current_steps": 4245, "total_steps": 4410, "loss": 0.2811, "lr": 1.7239664549878688e-07, "epoch": 6.738095238095238, "percentage": 96.26, "elapsed_time": "8:59:58", "remaining_time": "0:20:59"}
|
||||
{"current_steps": 4250, "total_steps": 4410, "loss": 0.2717, "lr": 1.6218155885283192e-07, "epoch": 6.746031746031746, "percentage": 96.37, "elapsed_time": "9:00:39", "remaining_time": "0:20:21"}
|
||||
{"current_steps": 4255, "total_steps": 4410, "loss": 0.2694, "lr": 1.5227719341769364e-07, "epoch": 6.753968253968254, "percentage": 96.49, "elapsed_time": "9:01:16", "remaining_time": "0:19:43"}
|
||||
{"current_steps": 4260, "total_steps": 4410, "loss": 0.2694, "lr": 1.4268370432618306e-07, "epoch": 6.761904761904762, "percentage": 96.6, "elapsed_time": "9:01:54", "remaining_time": "0:19:04"}
|
||||
{"current_steps": 4265, "total_steps": 4410, "loss": 0.2655, "lr": 1.3340124184182178e-07, "epoch": 6.76984126984127, "percentage": 96.71, "elapsed_time": "9:02:35", "remaining_time": "0:18:26"}
|
||||
{"current_steps": 4270, "total_steps": 4410, "loss": 0.2714, "lr": 1.2442995135650393e-07, "epoch": 6.777777777777778, "percentage": 96.83, "elapsed_time": "9:03:10", "remaining_time": "0:17:48"}
|
||||
{"current_steps": 4275, "total_steps": 4410, "loss": 0.2761, "lr": 1.1576997338821339e-07, "epoch": 6.785714285714286, "percentage": 96.94, "elapsed_time": "9:03:41", "remaining_time": "0:17:10"}
|
||||
{"current_steps": 4280, "total_steps": 4410, "loss": 0.2738, "lr": 1.0742144357882567e-07, "epoch": 6.7936507936507935, "percentage": 97.05, "elapsed_time": "9:04:18", "remaining_time": "0:16:31"}
|
||||
{"current_steps": 4285, "total_steps": 4410, "loss": 0.2674, "lr": 9.938449269197181e-08, "epoch": 6.801587301587301, "percentage": 97.17, "elapsed_time": "9:04:51", "remaining_time": "0:15:53"}
|
||||
{"current_steps": 4290, "total_steps": 4410, "loss": 0.2714, "lr": 9.165924661100889e-08, "epoch": 6.809523809523809, "percentage": 97.28, "elapsed_time": "9:05:30", "remaining_time": "0:15:15"}
|
||||
{"current_steps": 4295, "total_steps": 4410, "loss": 0.2748, "lr": 8.424582633703493e-08, "epoch": 6.817460317460317, "percentage": 97.39, "elapsed_time": "9:06:05", "remaining_time": "0:14:37"}
|
||||
{"current_steps": 4300, "total_steps": 4410, "loss": 0.2716, "lr": 7.714434798699933e-08, "epoch": 6.825396825396825, "percentage": 97.51, "elapsed_time": "9:06:44", "remaining_time": "0:13:59"}
|
||||
{"current_steps": 4305, "total_steps": 4410, "loss": 0.2686, "lr": 7.035492279187538e-08, "epoch": 6.833333333333333, "percentage": 97.62, "elapsed_time": "9:07:26", "remaining_time": "0:13:21"}
|
||||
{"current_steps": 4310, "total_steps": 4410, "loss": 0.2638, "lr": 6.387765709493288e-08, "epoch": 6.841269841269841, "percentage": 97.73, "elapsed_time": "9:08:02", "remaining_time": "0:12:42"}
|
||||
{"current_steps": 4315, "total_steps": 4410, "loss": 0.2716, "lr": 5.7712652350061515e-08, "epoch": 6.849206349206349, "percentage": 97.85, "elapsed_time": "9:08:42", "remaining_time": "0:12:04"}
|
||||
{"current_steps": 4320, "total_steps": 4410, "loss": 0.274, "lr": 5.186000512018341e-08, "epoch": 6.857142857142857, "percentage": 97.96, "elapsed_time": "9:09:16", "remaining_time": "0:11:26"}
|
||||
{"current_steps": 4325, "total_steps": 4410, "loss": 0.2729, "lr": 4.631980707574535e-08, "epoch": 6.865079365079366, "percentage": 98.07, "elapsed_time": "9:09:54", "remaining_time": "0:10:48"}
|
||||
{"current_steps": 4330, "total_steps": 4410, "loss": 0.274, "lr": 4.10921449932733e-08, "epoch": 6.8730158730158735, "percentage": 98.19, "elapsed_time": "9:10:34", "remaining_time": "0:10:10"}
|
||||
{"current_steps": 4335, "total_steps": 4410, "loss": 0.2688, "lr": 3.61771007540268e-08, "epoch": 6.880952380952381, "percentage": 98.3, "elapsed_time": "9:11:12", "remaining_time": "0:09:32"}
|
||||
{"current_steps": 4340, "total_steps": 4410, "loss": 0.274, "lr": 3.157475134270227e-08, "epoch": 6.888888888888889, "percentage": 98.41, "elapsed_time": "9:11:54", "remaining_time": "0:08:54"}
|
||||
{"current_steps": 4345, "total_steps": 4410, "loss": 0.274, "lr": 2.728516884624277e-08, "epoch": 6.896825396825397, "percentage": 98.53, "elapsed_time": "9:12:35", "remaining_time": "0:08:15"}
|
||||
{"current_steps": 4350, "total_steps": 4410, "loss": 0.2673, "lr": 2.3308420452690106e-08, "epoch": 6.904761904761905, "percentage": 98.64, "elapsed_time": "9:13:08", "remaining_time": "0:07:37"}
|
||||
{"current_steps": 4355, "total_steps": 4410, "loss": 0.2642, "lr": 1.9644568450147837e-08, "epoch": 6.912698412698413, "percentage": 98.75, "elapsed_time": "9:13:47", "remaining_time": "0:06:59"}
|
||||
{"current_steps": 4360, "total_steps": 4410, "loss": 0.2684, "lr": 1.6293670225799864e-08, "epoch": 6.920634920634921, "percentage": 98.87, "elapsed_time": "9:14:27", "remaining_time": "0:06:21"}
|
||||
{"current_steps": 4365, "total_steps": 4410, "loss": 0.2693, "lr": 1.3255778265013342e-08, "epoch": 6.928571428571429, "percentage": 98.98, "elapsed_time": "9:15:12", "remaining_time": "0:05:43"}
|
||||
{"current_steps": 4370, "total_steps": 4410, "loss": 0.2689, "lr": 1.0530940150512703e-08, "epoch": 6.936507936507937, "percentage": 99.09, "elapsed_time": "9:15:53", "remaining_time": "0:05:05"}
|
||||
{"current_steps": 4375, "total_steps": 4410, "loss": 0.2742, "lr": 8.119198561638009e-09, "epoch": 6.944444444444445, "percentage": 99.21, "elapsed_time": "9:16:32", "remaining_time": "0:04:27"}
|
||||
{"current_steps": 4380, "total_steps": 4410, "loss": 0.2649, "lr": 6.020591273674381e-09, "epoch": 6.9523809523809526, "percentage": 99.32, "elapsed_time": "9:17:00", "remaining_time": "0:03:48"}
|
||||
{"current_steps": 4385, "total_steps": 4410, "loss": 0.2647, "lr": 4.2351511572635835e-09, "epoch": 6.9603174603174605, "percentage": 99.43, "elapsed_time": "9:17:40", "remaining_time": "0:03:10"}
|
||||
{"current_steps": 4390, "total_steps": 4410, "loss": 0.2685, "lr": 2.7629061778866597e-09, "epoch": 6.968253968253968, "percentage": 99.55, "elapsed_time": "9:18:18", "remaining_time": "0:02:32"}
|
||||
{"current_steps": 4395, "total_steps": 4410, "loss": 0.2693, "lr": 1.603879395422059e-09, "epoch": 6.976190476190476, "percentage": 99.66, "elapsed_time": "9:18:54", "remaining_time": "0:01:54"}
|
||||
{"current_steps": 4400, "total_steps": 4410, "loss": 0.2716, "lr": 7.580889637925914e-10, "epoch": 6.984126984126984, "percentage": 99.77, "elapsed_time": "9:19:33", "remaining_time": "0:01:16"}
|
||||
{"current_steps": 4405, "total_steps": 4410, "loss": 0.2768, "lr": 2.2554813067676705e-10, "epoch": 6.992063492063492, "percentage": 99.89, "elapsed_time": "9:20:14", "remaining_time": "0:00:38"}
|
||||
{"current_steps": 4410, "total_steps": 4410, "loss": 0.272, "lr": 6.265237300073778e-12, "epoch": 7.0, "percentage": 100.0, "elapsed_time": "9:20:51", "remaining_time": "0:00:00"}
|
||||
{"current_steps": 4410, "total_steps": 4410, "epoch": 7.0, "percentage": 100.0, "elapsed_time": "9:21:00", "remaining_time": "0:00:00"}
|
||||
9749
trainer_state.json
Normal file
9749
trainer_state.json
Normal file
File diff suppressed because it is too large
Load Diff
3
training_args.bin
Normal file
3
training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:076c1a5109fb2e189032926fd8d61066b4ac5136e9ae2c64ad338864dd6fe8c8
|
||||
size 8593
|
||||
BIN
training_loss.png
Normal file
BIN
training_loss.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 38 KiB |
1
vocab.json
Normal file
1
vocab.json
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user