初始化项目,由ModelHub XC社区提供模型
Model: DCAgent/b1_top8_seq Source: Original Platform
This commit is contained in:
36
.gitattributes
vendored
Normal file
36
.gitattributes
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||
60
README.md
Normal file
60
README.md
Normal file
@@ -0,0 +1,60 @@
|
||||
---
|
||||
library_name: transformers
|
||||
license: other
|
||||
base_model: Qwen/Qwen3-8B
|
||||
tags:
|
||||
- llama-factory
|
||||
- full
|
||||
- generated_from_trainer
|
||||
model-index:
|
||||
- name: sft_b1_top8_seq__Qwen3-8B
|
||||
results: []
|
||||
---
|
||||
|
||||
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
||||
should probably proofread and complete it, then remove this comment. -->
|
||||
|
||||
# sft_b1_top8_seq__Qwen3-8B
|
||||
|
||||
This model is a fine-tuned version of [Qwen/Qwen3-8B](https://huggingface.co/Qwen/Qwen3-8B) on the /scratch/08134/negin/hub/datasets--DCAgent--b1_top8_seq/snapshots/431317fbde90fded83a2730a01e3e4bcc5981bd2 dataset.
|
||||
|
||||
## Model description
|
||||
|
||||
More information needed
|
||||
|
||||
## Intended uses & limitations
|
||||
|
||||
More information needed
|
||||
|
||||
## Training and evaluation data
|
||||
|
||||
More information needed
|
||||
|
||||
## Training procedure
|
||||
|
||||
### Training hyperparameters
|
||||
|
||||
The following hyperparameters were used during training:
|
||||
- learning_rate: 4e-05
|
||||
- train_batch_size: 1
|
||||
- eval_batch_size: 8
|
||||
- seed: 42
|
||||
- distributed_type: multi-GPU
|
||||
- num_devices: 16
|
||||
- total_train_batch_size: 16
|
||||
- total_eval_batch_size: 128
|
||||
- optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.98) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
||||
- lr_scheduler_type: cosine
|
||||
- lr_scheduler_warmup_ratio: 0.1
|
||||
- num_epochs: 7.0
|
||||
|
||||
### Training results
|
||||
|
||||
|
||||
|
||||
### Framework versions
|
||||
|
||||
- Transformers 4.57.3
|
||||
- Pytorch 2.9.0+cu128
|
||||
- Datasets 4.4.1
|
||||
- Tokenizers 0.22.1
|
||||
28
added_tokens.json
Normal file
28
added_tokens.json
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"</think>": 151668,
|
||||
"</tool_call>": 151658,
|
||||
"</tool_response>": 151666,
|
||||
"<think>": 151667,
|
||||
"<tool_call>": 151657,
|
||||
"<tool_response>": 151665,
|
||||
"<|box_end|>": 151649,
|
||||
"<|box_start|>": 151648,
|
||||
"<|endoftext|>": 151643,
|
||||
"<|file_sep|>": 151664,
|
||||
"<|fim_middle|>": 151660,
|
||||
"<|fim_pad|>": 151662,
|
||||
"<|fim_prefix|>": 151659,
|
||||
"<|fim_suffix|>": 151661,
|
||||
"<|im_end|>": 151645,
|
||||
"<|im_start|>": 151644,
|
||||
"<|image_pad|>": 151655,
|
||||
"<|object_ref_end|>": 151647,
|
||||
"<|object_ref_start|>": 151646,
|
||||
"<|quad_end|>": 151651,
|
||||
"<|quad_start|>": 151650,
|
||||
"<|repo_name|>": 151663,
|
||||
"<|video_pad|>": 151656,
|
||||
"<|vision_end|>": 151653,
|
||||
"<|vision_pad|>": 151654,
|
||||
"<|vision_start|>": 151652
|
||||
}
|
||||
16
all_results.json
Normal file
16
all_results.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"achieved_tflops_per_gpu": 0.003671455564786977,
|
||||
"achieved_tflops_per_gpu_theoretical": 623.3744395594629,
|
||||
"epoch": 7.0,
|
||||
"loss_nan_ranks": 0,
|
||||
"loss_rank_avg": 0.17723463475704193,
|
||||
"mfu_percent": 0.0002594668243665708,
|
||||
"mfu_percent_theoretical": 44.054730710916104,
|
||||
"total_flos": 1286594594209792.0,
|
||||
"train_loss": 0.3047364686883014,
|
||||
"train_runtime": 21901.9843,
|
||||
"train_samples_per_second": 2.937,
|
||||
"train_steps_per_second": 0.184,
|
||||
"valid_targets_mean": 3156.8,
|
||||
"valid_targets_min": 1012
|
||||
}
|
||||
89
chat_template.jinja
Normal file
89
chat_template.jinja
Normal file
@@ -0,0 +1,89 @@
|
||||
{%- if tools %}
|
||||
{{- '<|im_start|>system\n' }}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- messages[0].content + '\n\n' }}
|
||||
{%- endif %}
|
||||
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
||||
{%- for tool in tools %}
|
||||
{{- "\n" }}
|
||||
{{- tool | tojson }}
|
||||
{%- endfor %}
|
||||
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
||||
{%- else %}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
|
||||
{%- for message in messages[::-1] %}
|
||||
{%- set index = (messages|length - 1) - loop.index0 %}
|
||||
{%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
|
||||
{%- set ns.multi_step_tool = false %}
|
||||
{%- set ns.last_query_index = index %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- for message in messages %}
|
||||
{%- if message.content is string %}
|
||||
{%- set content = message.content %}
|
||||
{%- else %}
|
||||
{%- set content = '' %}
|
||||
{%- endif %}
|
||||
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
|
||||
{%- elif message.role == "assistant" %}
|
||||
{%- set reasoning_content = '' %}
|
||||
{%- if message.reasoning_content is string %}
|
||||
{%- set reasoning_content = message.reasoning_content %}
|
||||
{%- else %}
|
||||
{%- if '</think>' in content %}
|
||||
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
||||
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- if loop.index0 > ns.last_query_index %}
|
||||
{%- if loop.last or (not loop.last and reasoning_content) %}
|
||||
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- if message.tool_calls %}
|
||||
{%- for tool_call in message.tool_calls %}
|
||||
{%- if (loop.first and content) or (not loop.first) %}
|
||||
{{- '\n' }}
|
||||
{%- endif %}
|
||||
{%- if tool_call.function %}
|
||||
{%- set tool_call = tool_call.function %}
|
||||
{%- endif %}
|
||||
{{- '<tool_call>\n{"name": "' }}
|
||||
{{- tool_call.name }}
|
||||
{{- '", "arguments": ' }}
|
||||
{%- if tool_call.arguments is string %}
|
||||
{{- tool_call.arguments }}
|
||||
{%- else %}
|
||||
{{- tool_call.arguments | tojson }}
|
||||
{%- endif %}
|
||||
{{- '}\n</tool_call>' }}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- elif message.role == "tool" %}
|
||||
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
||||
{{- '<|im_start|>user' }}
|
||||
{%- endif %}
|
||||
{{- '\n<tool_response>\n' }}
|
||||
{{- content }}
|
||||
{{- '\n</tool_response>' }}
|
||||
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- if add_generation_prompt %}
|
||||
{{- '<|im_start|>assistant\n' }}
|
||||
{%- if enable_thinking is defined and enable_thinking is false %}
|
||||
{{- '<think>\n\n</think>\n\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
68
config.json
Normal file
68
config.json
Normal file
@@ -0,0 +1,68 @@
|
||||
{
|
||||
"architectures": [
|
||||
"Qwen3ForCausalLM"
|
||||
],
|
||||
"attention_bias": false,
|
||||
"attention_dropout": 0.0,
|
||||
"dtype": "bfloat16",
|
||||
"eos_token_id": 151645,
|
||||
"head_dim": 128,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 4096,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 12288,
|
||||
"layer_types": [
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention"
|
||||
],
|
||||
"max_position_embeddings": 40960,
|
||||
"max_window_layers": 36,
|
||||
"model_type": "qwen3",
|
||||
"num_attention_heads": 32,
|
||||
"num_hidden_layers": 36,
|
||||
"num_key_value_heads": 8,
|
||||
"pad_token_id": 151643,
|
||||
"rms_norm_eps": 1e-06,
|
||||
"rope_scaling": null,
|
||||
"rope_theta": 1000000,
|
||||
"sliding_window": null,
|
||||
"tie_word_embeddings": false,
|
||||
"transformers_version": "4.57.3",
|
||||
"use_cache": false,
|
||||
"use_sliding_window": false,
|
||||
"vocab_size": 151936
|
||||
}
|
||||
12
generation_config.json
Normal file
12
generation_config.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"do_sample": true,
|
||||
"eos_token_id": [
|
||||
151645,
|
||||
151643
|
||||
],
|
||||
"pad_token_id": 151643,
|
||||
"temperature": 0.6,
|
||||
"top_k": 20,
|
||||
"top_p": 0.95,
|
||||
"transformers_version": "4.57.3"
|
||||
}
|
||||
151388
merges.txt
Normal file
151388
merges.txt
Normal file
File diff suppressed because it is too large
Load Diff
3
model-00001-of-00004.safetensors
Normal file
3
model-00001-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:cbf7209477566037d503c0a39dd70db0de0d4d8f26a7746f0bec23cf05470648
|
||||
size 4902257696
|
||||
3
model-00002-of-00004.safetensors
Normal file
3
model-00002-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d8448b49b46f3cafea4195735c13d070fc6f11e8a27ede14177745f11fa0fe81
|
||||
size 4915960368
|
||||
3
model-00003-of-00004.safetensors
Normal file
3
model-00003-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2a7e76c391c04365e2dc02c77fefb2b16d1cb50d68803bfd8573631dc9a36acb
|
||||
size 4983068496
|
||||
3
model-00004-of-00004.safetensors
Normal file
3
model-00004-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ce2c2208cfd1b212620563ea71a7ccdf80deb676b7336edf7da8d4bf68d9a858
|
||||
size 1580230264
|
||||
407
model.safetensors.index.json
Normal file
407
model.safetensors.index.json
Normal file
@@ -0,0 +1,407 @@
|
||||
{
|
||||
"metadata": {
|
||||
"total_parameters": 308224,
|
||||
"total_size": 16381470720
|
||||
},
|
||||
"weight_map": {
|
||||
"lm_head.weight": "model-00004-of-00004.safetensors",
|
||||
"model.embed_tokens.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.35.input_layernorm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.q_norm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.norm.weight": "model-00004-of-00004.safetensors"
|
||||
}
|
||||
}
|
||||
31
special_tokens_map.json
Normal file
31
special_tokens_map.json
Normal file
@@ -0,0 +1,31 @@
|
||||
{
|
||||
"additional_special_tokens": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|object_ref_start|>",
|
||||
"<|object_ref_end|>",
|
||||
"<|box_start|>",
|
||||
"<|box_end|>",
|
||||
"<|quad_start|>",
|
||||
"<|quad_end|>",
|
||||
"<|vision_start|>",
|
||||
"<|vision_end|>",
|
||||
"<|vision_pad|>",
|
||||
"<|image_pad|>",
|
||||
"<|video_pad|>"
|
||||
],
|
||||
"eos_token": {
|
||||
"content": "<|im_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"pad_token": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
}
|
||||
}
|
||||
3
tokenizer.json
Normal file
3
tokenizer.json
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
|
||||
size 11422654
|
||||
240
tokenizer_config.json
Normal file
240
tokenizer_config.json
Normal file
@@ -0,0 +1,240 @@
|
||||
{
|
||||
"add_bos_token": false,
|
||||
"add_prefix_space": false,
|
||||
"added_tokens_decoder": {
|
||||
"151643": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151644": {
|
||||
"content": "<|im_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151645": {
|
||||
"content": "<|im_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151646": {
|
||||
"content": "<|object_ref_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151647": {
|
||||
"content": "<|object_ref_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151648": {
|
||||
"content": "<|box_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151649": {
|
||||
"content": "<|box_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151650": {
|
||||
"content": "<|quad_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151651": {
|
||||
"content": "<|quad_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151652": {
|
||||
"content": "<|vision_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151653": {
|
||||
"content": "<|vision_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151654": {
|
||||
"content": "<|vision_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151655": {
|
||||
"content": "<|image_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151656": {
|
||||
"content": "<|video_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151657": {
|
||||
"content": "<tool_call>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151658": {
|
||||
"content": "</tool_call>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151659": {
|
||||
"content": "<|fim_prefix|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151660": {
|
||||
"content": "<|fim_middle|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151661": {
|
||||
"content": "<|fim_suffix|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151662": {
|
||||
"content": "<|fim_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151663": {
|
||||
"content": "<|repo_name|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151664": {
|
||||
"content": "<|file_sep|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151665": {
|
||||
"content": "<tool_response>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151666": {
|
||||
"content": "</tool_response>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151667": {
|
||||
"content": "<think>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151668": {
|
||||
"content": "</think>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
}
|
||||
},
|
||||
"additional_special_tokens": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|object_ref_start|>",
|
||||
"<|object_ref_end|>",
|
||||
"<|box_start|>",
|
||||
"<|box_end|>",
|
||||
"<|quad_start|>",
|
||||
"<|quad_end|>",
|
||||
"<|vision_start|>",
|
||||
"<|vision_end|>",
|
||||
"<|vision_pad|>",
|
||||
"<|image_pad|>",
|
||||
"<|video_pad|>"
|
||||
],
|
||||
"bos_token": null,
|
||||
"clean_up_tokenization_spaces": false,
|
||||
"eos_token": "<|im_end|>",
|
||||
"errors": "replace",
|
||||
"extra_special_tokens": {},
|
||||
"model_max_length": 32768,
|
||||
"pad_token": "<|endoftext|>",
|
||||
"padding_side": "right",
|
||||
"split_special_tokens": false,
|
||||
"tokenizer_class": "Qwen2Tokenizer",
|
||||
"unk_token": null
|
||||
}
|
||||
16
train_results.json
Normal file
16
train_results.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"achieved_tflops_per_gpu": 0.003671455564786977,
|
||||
"achieved_tflops_per_gpu_theoretical": 623.3744395594629,
|
||||
"epoch": 7.0,
|
||||
"loss_nan_ranks": 0,
|
||||
"loss_rank_avg": 0.17723463475704193,
|
||||
"mfu_percent": 0.0002594668243665708,
|
||||
"mfu_percent_theoretical": 44.054730710916104,
|
||||
"total_flos": 1286594594209792.0,
|
||||
"train_loss": 0.3047364686883014,
|
||||
"train_runtime": 21901.9843,
|
||||
"train_samples_per_second": 2.937,
|
||||
"train_steps_per_second": 0.184,
|
||||
"valid_targets_mean": 3156.8,
|
||||
"valid_targets_min": 1012
|
||||
}
|
||||
806
trainer_log.jsonl
Normal file
806
trainer_log.jsonl
Normal file
@@ -0,0 +1,806 @@
|
||||
{"current_steps": 5, "total_steps": 4025, "loss": 0.9204, "lr": 3.970223325062035e-07, "epoch": 0.008695652173913044, "percentage": 0.12, "elapsed_time": "0:00:32", "remaining_time": "7:17:37"}
|
||||
{"current_steps": 10, "total_steps": 4025, "loss": 0.9, "lr": 8.933002481389578e-07, "epoch": 0.017391304347826087, "percentage": 0.25, "elapsed_time": "0:00:54", "remaining_time": "6:01:47"}
|
||||
{"current_steps": 15, "total_steps": 4025, "loss": 0.8815, "lr": 1.3895781637717123e-06, "epoch": 0.02608695652173913, "percentage": 0.37, "elapsed_time": "0:01:17", "remaining_time": "5:45:42"}
|
||||
{"current_steps": 20, "total_steps": 4025, "loss": 0.8479, "lr": 1.8858560794044667e-06, "epoch": 0.034782608695652174, "percentage": 0.5, "elapsed_time": "0:01:40", "remaining_time": "5:35:04"}
|
||||
{"current_steps": 25, "total_steps": 4025, "loss": 0.7836, "lr": 2.382133995037221e-06, "epoch": 0.043478260869565216, "percentage": 0.62, "elapsed_time": "0:02:01", "remaining_time": "5:25:06"}
|
||||
{"current_steps": 30, "total_steps": 4025, "loss": 0.7505, "lr": 2.8784119106699756e-06, "epoch": 0.05217391304347826, "percentage": 0.75, "elapsed_time": "0:02:22", "remaining_time": "5:16:27"}
|
||||
{"current_steps": 35, "total_steps": 4025, "loss": 0.6877, "lr": 3.3746898263027302e-06, "epoch": 0.06086956521739131, "percentage": 0.87, "elapsed_time": "0:02:46", "remaining_time": "5:16:52"}
|
||||
{"current_steps": 40, "total_steps": 4025, "loss": 0.6493, "lr": 3.870967741935484e-06, "epoch": 0.06956521739130435, "percentage": 0.99, "elapsed_time": "0:03:11", "remaining_time": "5:18:11"}
|
||||
{"current_steps": 45, "total_steps": 4025, "loss": 0.5944, "lr": 4.367245657568239e-06, "epoch": 0.0782608695652174, "percentage": 1.12, "elapsed_time": "0:03:32", "remaining_time": "5:13:41"}
|
||||
{"current_steps": 50, "total_steps": 4025, "loss": 0.5608, "lr": 4.863523573200992e-06, "epoch": 0.08695652173913043, "percentage": 1.24, "elapsed_time": "0:03:54", "remaining_time": "5:11:10"}
|
||||
{"current_steps": 55, "total_steps": 4025, "loss": 0.529, "lr": 5.359801488833747e-06, "epoch": 0.09565217391304348, "percentage": 1.37, "elapsed_time": "0:04:17", "remaining_time": "5:09:48"}
|
||||
{"current_steps": 60, "total_steps": 4025, "loss": 0.4853, "lr": 5.856079404466502e-06, "epoch": 0.10434782608695652, "percentage": 1.49, "elapsed_time": "0:04:41", "remaining_time": "5:10:29"}
|
||||
{"current_steps": 65, "total_steps": 4025, "loss": 0.4754, "lr": 6.352357320099256e-06, "epoch": 0.11304347826086956, "percentage": 1.61, "elapsed_time": "0:05:02", "remaining_time": "5:06:53"}
|
||||
{"current_steps": 70, "total_steps": 4025, "loss": 0.4739, "lr": 6.848635235732011e-06, "epoch": 0.12173913043478261, "percentage": 1.74, "elapsed_time": "0:05:25", "remaining_time": "5:06:50"}
|
||||
{"current_steps": 75, "total_steps": 4025, "loss": 0.4396, "lr": 7.344913151364765e-06, "epoch": 0.13043478260869565, "percentage": 1.86, "elapsed_time": "0:05:45", "remaining_time": "5:03:35"}
|
||||
{"current_steps": 80, "total_steps": 4025, "loss": 0.6029, "lr": 7.841191066997518e-06, "epoch": 0.1391304347826087, "percentage": 1.99, "elapsed_time": "0:06:06", "remaining_time": "5:01:36"}
|
||||
{"current_steps": 85, "total_steps": 4025, "loss": 0.6716, "lr": 8.337468982630273e-06, "epoch": 0.14782608695652175, "percentage": 2.11, "elapsed_time": "0:06:23", "remaining_time": "4:56:11"}
|
||||
{"current_steps": 90, "total_steps": 4025, "loss": 0.6513, "lr": 8.833746898263029e-06, "epoch": 0.1565217391304348, "percentage": 2.24, "elapsed_time": "0:06:49", "remaining_time": "4:58:02"}
|
||||
{"current_steps": 95, "total_steps": 4025, "loss": 0.634, "lr": 9.330024813895783e-06, "epoch": 0.16521739130434782, "percentage": 2.36, "elapsed_time": "0:07:10", "remaining_time": "4:56:55"}
|
||||
{"current_steps": 100, "total_steps": 4025, "loss": 0.6477, "lr": 9.826302729528537e-06, "epoch": 0.17391304347826086, "percentage": 2.48, "elapsed_time": "0:07:35", "remaining_time": "4:57:49"}
|
||||
{"current_steps": 105, "total_steps": 4025, "loss": 0.6056, "lr": 1.0322580645161291e-05, "epoch": 0.1826086956521739, "percentage": 2.61, "elapsed_time": "0:07:58", "remaining_time": "4:57:31"}
|
||||
{"current_steps": 110, "total_steps": 4025, "loss": 0.6057, "lr": 1.0818858560794045e-05, "epoch": 0.19130434782608696, "percentage": 2.73, "elapsed_time": "0:08:21", "remaining_time": "4:57:12"}
|
||||
{"current_steps": 115, "total_steps": 4025, "loss": 0.5811, "lr": 1.13151364764268e-05, "epoch": 0.2, "percentage": 2.86, "elapsed_time": "0:08:41", "remaining_time": "4:55:29"}
|
||||
{"current_steps": 120, "total_steps": 4025, "loss": 0.6029, "lr": 1.1811414392059556e-05, "epoch": 0.20869565217391303, "percentage": 2.98, "elapsed_time": "0:09:00", "remaining_time": "4:53:01"}
|
||||
{"current_steps": 125, "total_steps": 4025, "loss": 0.6046, "lr": 1.230769230769231e-05, "epoch": 0.21739130434782608, "percentage": 3.11, "elapsed_time": "0:09:25", "remaining_time": "4:54:07"}
|
||||
{"current_steps": 130, "total_steps": 4025, "loss": 0.5812, "lr": 1.2803970223325064e-05, "epoch": 0.22608695652173913, "percentage": 3.23, "elapsed_time": "0:09:45", "remaining_time": "4:52:37"}
|
||||
{"current_steps": 135, "total_steps": 4025, "loss": 0.565, "lr": 1.3300248138957816e-05, "epoch": 0.23478260869565218, "percentage": 3.35, "elapsed_time": "0:10:11", "remaining_time": "4:53:43"}
|
||||
{"current_steps": 140, "total_steps": 4025, "loss": 0.5573, "lr": 1.379652605459057e-05, "epoch": 0.24347826086956523, "percentage": 3.48, "elapsed_time": "0:10:34", "remaining_time": "4:53:28"}
|
||||
{"current_steps": 145, "total_steps": 4025, "loss": 0.5411, "lr": 1.4292803970223325e-05, "epoch": 0.25217391304347825, "percentage": 3.6, "elapsed_time": "0:10:53", "remaining_time": "4:51:19"}
|
||||
{"current_steps": 150, "total_steps": 4025, "loss": 0.5652, "lr": 1.478908188585608e-05, "epoch": 0.2608695652173913, "percentage": 3.73, "elapsed_time": "0:11:14", "remaining_time": "4:50:33"}
|
||||
{"current_steps": 155, "total_steps": 4025, "loss": 0.5278, "lr": 1.5285359801488835e-05, "epoch": 0.26956521739130435, "percentage": 3.85, "elapsed_time": "0:11:37", "remaining_time": "4:50:12"}
|
||||
{"current_steps": 160, "total_steps": 4025, "loss": 0.5113, "lr": 1.578163771712159e-05, "epoch": 0.2782608695652174, "percentage": 3.98, "elapsed_time": "0:12:16", "remaining_time": "4:56:38"}
|
||||
{"current_steps": 165, "total_steps": 4025, "loss": 0.523, "lr": 1.6277915632754343e-05, "epoch": 0.28695652173913044, "percentage": 4.1, "elapsed_time": "0:12:51", "remaining_time": "5:00:56"}
|
||||
{"current_steps": 170, "total_steps": 4025, "loss": 0.5066, "lr": 1.6774193548387098e-05, "epoch": 0.2956521739130435, "percentage": 4.22, "elapsed_time": "0:13:25", "remaining_time": "5:04:23"}
|
||||
{"current_steps": 175, "total_steps": 4025, "loss": 0.5132, "lr": 1.7270471464019852e-05, "epoch": 0.30434782608695654, "percentage": 4.35, "elapsed_time": "0:13:58", "remaining_time": "5:07:32"}
|
||||
{"current_steps": 180, "total_steps": 4025, "loss": 0.5389, "lr": 1.7766749379652606e-05, "epoch": 0.3130434782608696, "percentage": 4.47, "elapsed_time": "0:14:30", "remaining_time": "5:10:01"}
|
||||
{"current_steps": 185, "total_steps": 4025, "loss": 0.5165, "lr": 1.826302729528536e-05, "epoch": 0.3217391304347826, "percentage": 4.6, "elapsed_time": "0:15:09", "remaining_time": "5:14:39"}
|
||||
{"current_steps": 190, "total_steps": 4025, "loss": 0.4971, "lr": 1.8759305210918114e-05, "epoch": 0.33043478260869563, "percentage": 4.72, "elapsed_time": "0:15:43", "remaining_time": "5:17:14"}
|
||||
{"current_steps": 195, "total_steps": 4025, "loss": 0.5483, "lr": 1.925558312655087e-05, "epoch": 0.3391304347826087, "percentage": 4.84, "elapsed_time": "0:16:08", "remaining_time": "5:17:05"}
|
||||
{"current_steps": 200, "total_steps": 4025, "loss": 0.5044, "lr": 1.9751861042183623e-05, "epoch": 0.34782608695652173, "percentage": 4.97, "elapsed_time": "0:16:46", "remaining_time": "5:20:48"}
|
||||
{"current_steps": 205, "total_steps": 4025, "loss": 0.4915, "lr": 2.024813895781638e-05, "epoch": 0.3565217391304348, "percentage": 5.09, "elapsed_time": "0:17:15", "remaining_time": "5:21:40"}
|
||||
{"current_steps": 210, "total_steps": 4025, "loss": 0.4994, "lr": 2.0744416873449135e-05, "epoch": 0.3652173913043478, "percentage": 5.22, "elapsed_time": "0:17:45", "remaining_time": "5:22:34"}
|
||||
{"current_steps": 215, "total_steps": 4025, "loss": 0.4972, "lr": 2.124069478908189e-05, "epoch": 0.3739130434782609, "percentage": 5.34, "elapsed_time": "0:18:14", "remaining_time": "5:23:14"}
|
||||
{"current_steps": 220, "total_steps": 4025, "loss": 0.4962, "lr": 2.1736972704714643e-05, "epoch": 0.3826086956521739, "percentage": 5.47, "elapsed_time": "0:18:48", "remaining_time": "5:25:23"}
|
||||
{"current_steps": 225, "total_steps": 4025, "loss": 0.5005, "lr": 2.2233250620347397e-05, "epoch": 0.391304347826087, "percentage": 5.59, "elapsed_time": "0:19:21", "remaining_time": "5:26:58"}
|
||||
{"current_steps": 230, "total_steps": 4025, "loss": 0.4738, "lr": 2.272952853598015e-05, "epoch": 0.4, "percentage": 5.71, "elapsed_time": "0:19:54", "remaining_time": "5:28:31"}
|
||||
{"current_steps": 235, "total_steps": 4025, "loss": 0.409, "lr": 2.3225806451612906e-05, "epoch": 0.40869565217391307, "percentage": 5.84, "elapsed_time": "0:20:24", "remaining_time": "5:29:10"}
|
||||
{"current_steps": 240, "total_steps": 4025, "loss": 0.3587, "lr": 2.372208436724566e-05, "epoch": 0.41739130434782606, "percentage": 5.96, "elapsed_time": "0:20:54", "remaining_time": "5:29:51"}
|
||||
{"current_steps": 245, "total_steps": 4025, "loss": 0.342, "lr": 2.4218362282878417e-05, "epoch": 0.4260869565217391, "percentage": 6.09, "elapsed_time": "0:21:27", "remaining_time": "5:31:10"}
|
||||
{"current_steps": 250, "total_steps": 4025, "loss": 0.3431, "lr": 2.4714640198511165e-05, "epoch": 0.43478260869565216, "percentage": 6.21, "elapsed_time": "0:21:56", "remaining_time": "5:31:19"}
|
||||
{"current_steps": 255, "total_steps": 4025, "loss": 0.3218, "lr": 2.521091811414392e-05, "epoch": 0.4434782608695652, "percentage": 6.34, "elapsed_time": "0:22:25", "remaining_time": "5:31:37"}
|
||||
{"current_steps": 260, "total_steps": 4025, "loss": 0.3474, "lr": 2.5707196029776677e-05, "epoch": 0.45217391304347826, "percentage": 6.46, "elapsed_time": "0:22:55", "remaining_time": "5:31:54"}
|
||||
{"current_steps": 265, "total_steps": 4025, "loss": 0.3293, "lr": 2.620347394540943e-05, "epoch": 0.4608695652173913, "percentage": 6.58, "elapsed_time": "0:23:28", "remaining_time": "5:33:05"}
|
||||
{"current_steps": 270, "total_steps": 4025, "loss": 0.3249, "lr": 2.6699751861042185e-05, "epoch": 0.46956521739130436, "percentage": 6.71, "elapsed_time": "0:23:57", "remaining_time": "5:33:12"}
|
||||
{"current_steps": 275, "total_steps": 4025, "loss": 0.3167, "lr": 2.719602977667494e-05, "epoch": 0.4782608695652174, "percentage": 6.83, "elapsed_time": "0:24:28", "remaining_time": "5:33:42"}
|
||||
{"current_steps": 280, "total_steps": 4025, "loss": 0.3341, "lr": 2.7692307692307694e-05, "epoch": 0.48695652173913045, "percentage": 6.96, "elapsed_time": "0:24:57", "remaining_time": "5:33:53"}
|
||||
{"current_steps": 285, "total_steps": 4025, "loss": 0.3361, "lr": 2.8188585607940448e-05, "epoch": 0.4956521739130435, "percentage": 7.08, "elapsed_time": "0:25:29", "remaining_time": "5:34:35"}
|
||||
{"current_steps": 290, "total_steps": 4025, "loss": 0.3137, "lr": 2.8684863523573202e-05, "epoch": 0.5043478260869565, "percentage": 7.2, "elapsed_time": "0:26:03", "remaining_time": "5:35:34"}
|
||||
{"current_steps": 295, "total_steps": 4025, "loss": 0.3004, "lr": 2.9181141439205956e-05, "epoch": 0.5130434782608696, "percentage": 7.33, "elapsed_time": "0:26:34", "remaining_time": "5:36:01"}
|
||||
{"current_steps": 300, "total_steps": 4025, "loss": 0.3269, "lr": 2.9677419354838714e-05, "epoch": 0.5217391304347826, "percentage": 7.45, "elapsed_time": "0:27:10", "remaining_time": "5:37:29"}
|
||||
{"current_steps": 305, "total_steps": 4025, "loss": 0.3053, "lr": 3.0173697270471468e-05, "epoch": 0.5304347826086957, "percentage": 7.58, "elapsed_time": "0:27:44", "remaining_time": "5:38:24"}
|
||||
{"current_steps": 310, "total_steps": 4025, "loss": 0.3173, "lr": 3.066997518610422e-05, "epoch": 0.5391304347826087, "percentage": 7.7, "elapsed_time": "0:28:12", "remaining_time": "5:38:01"}
|
||||
{"current_steps": 315, "total_steps": 4025, "loss": 0.2946, "lr": 3.1166253101736976e-05, "epoch": 0.5478260869565217, "percentage": 7.83, "elapsed_time": "0:28:37", "remaining_time": "5:37:08"}
|
||||
{"current_steps": 320, "total_steps": 4025, "loss": 0.2967, "lr": 3.1662531017369734e-05, "epoch": 0.5565217391304348, "percentage": 7.95, "elapsed_time": "0:29:02", "remaining_time": "5:36:17"}
|
||||
{"current_steps": 325, "total_steps": 4025, "loss": 0.2821, "lr": 3.2158808933002485e-05, "epoch": 0.5652173913043478, "percentage": 8.07, "elapsed_time": "0:29:25", "remaining_time": "5:34:56"}
|
||||
{"current_steps": 330, "total_steps": 4025, "loss": 0.2746, "lr": 3.265508684863524e-05, "epoch": 0.5739130434782609, "percentage": 8.2, "elapsed_time": "0:29:48", "remaining_time": "5:33:46"}
|
||||
{"current_steps": 335, "total_steps": 4025, "loss": 0.2748, "lr": 3.315136476426799e-05, "epoch": 0.5826086956521739, "percentage": 8.32, "elapsed_time": "0:30:12", "remaining_time": "5:32:47"}
|
||||
{"current_steps": 340, "total_steps": 4025, "loss": 0.2887, "lr": 3.3647642679900744e-05, "epoch": 0.591304347826087, "percentage": 8.45, "elapsed_time": "0:30:35", "remaining_time": "5:31:37"}
|
||||
{"current_steps": 345, "total_steps": 4025, "loss": 0.2812, "lr": 3.41439205955335e-05, "epoch": 0.6, "percentage": 8.57, "elapsed_time": "0:31:02", "remaining_time": "5:31:05"}
|
||||
{"current_steps": 350, "total_steps": 4025, "loss": 0.2852, "lr": 3.464019851116625e-05, "epoch": 0.6086956521739131, "percentage": 8.7, "elapsed_time": "0:31:29", "remaining_time": "5:30:37"}
|
||||
{"current_steps": 355, "total_steps": 4025, "loss": 0.2777, "lr": 3.513647642679901e-05, "epoch": 0.6173913043478261, "percentage": 8.82, "elapsed_time": "0:31:55", "remaining_time": "5:30:06"}
|
||||
{"current_steps": 360, "total_steps": 4025, "loss": 0.283, "lr": 3.563275434243176e-05, "epoch": 0.6260869565217392, "percentage": 8.94, "elapsed_time": "0:32:22", "remaining_time": "5:29:35"}
|
||||
{"current_steps": 365, "total_steps": 4025, "loss": 0.2729, "lr": 3.612903225806452e-05, "epoch": 0.6347826086956522, "percentage": 9.07, "elapsed_time": "0:32:48", "remaining_time": "5:28:54"}
|
||||
{"current_steps": 370, "total_steps": 4025, "loss": 0.2745, "lr": 3.6625310173697276e-05, "epoch": 0.6434782608695652, "percentage": 9.19, "elapsed_time": "0:33:16", "remaining_time": "5:28:46"}
|
||||
{"current_steps": 375, "total_steps": 4025, "loss": 0.2606, "lr": 3.712158808933003e-05, "epoch": 0.6521739130434783, "percentage": 9.32, "elapsed_time": "0:33:40", "remaining_time": "5:27:41"}
|
||||
{"current_steps": 380, "total_steps": 4025, "loss": 0.253, "lr": 3.7617866004962784e-05, "epoch": 0.6608695652173913, "percentage": 9.44, "elapsed_time": "0:34:06", "remaining_time": "5:27:07"}
|
||||
{"current_steps": 385, "total_steps": 4025, "loss": 0.3637, "lr": 3.8114143920595535e-05, "epoch": 0.6695652173913044, "percentage": 9.57, "elapsed_time": "0:34:40", "remaining_time": "5:27:50"}
|
||||
{"current_steps": 390, "total_steps": 4025, "loss": 0.4223, "lr": 3.861042183622829e-05, "epoch": 0.6782608695652174, "percentage": 9.69, "elapsed_time": "0:35:21", "remaining_time": "5:29:29"}
|
||||
{"current_steps": 395, "total_steps": 4025, "loss": 0.4141, "lr": 3.9106699751861044e-05, "epoch": 0.6869565217391305, "percentage": 9.81, "elapsed_time": "0:36:05", "remaining_time": "5:31:38"}
|
||||
{"current_steps": 400, "total_steps": 4025, "loss": 0.4041, "lr": 3.96029776674938e-05, "epoch": 0.6956521739130435, "percentage": 9.94, "elapsed_time": "0:36:49", "remaining_time": "5:33:41"}
|
||||
{"current_steps": 405, "total_steps": 4025, "loss": 0.3862, "lr": 3.9999992476796216e-05, "epoch": 0.7043478260869566, "percentage": 10.06, "elapsed_time": "0:37:33", "remaining_time": "5:35:45"}
|
||||
{"current_steps": 410, "total_steps": 4025, "loss": 0.381, "lr": 3.9999729165257865e-05, "epoch": 0.7130434782608696, "percentage": 10.19, "elapsed_time": "0:38:16", "remaining_time": "5:37:28"}
|
||||
{"current_steps": 415, "total_steps": 4025, "loss": 0.4086, "lr": 3.9999089699189876e-05, "epoch": 0.7217391304347827, "percentage": 10.31, "elapsed_time": "0:38:58", "remaining_time": "5:38:58"}
|
||||
{"current_steps": 420, "total_steps": 4025, "loss": 0.39, "lr": 3.9998074090619327e-05, "epoch": 0.7304347826086957, "percentage": 10.43, "elapsed_time": "0:39:42", "remaining_time": "5:40:51"}
|
||||
{"current_steps": 425, "total_steps": 4025, "loss": 0.458, "lr": 3.999668235864774e-05, "epoch": 0.7391304347826086, "percentage": 10.56, "elapsed_time": "0:40:18", "remaining_time": "5:41:22"}
|
||||
{"current_steps": 430, "total_steps": 4025, "loss": 0.5334, "lr": 3.999491452945081e-05, "epoch": 0.7478260869565218, "percentage": 10.68, "elapsed_time": "0:40:48", "remaining_time": "5:41:08"}
|
||||
{"current_steps": 435, "total_steps": 4025, "loss": 0.5227, "lr": 3.999277063627782e-05, "epoch": 0.7565217391304347, "percentage": 10.81, "elapsed_time": "0:41:15", "remaining_time": "5:40:30"}
|
||||
{"current_steps": 440, "total_steps": 4025, "loss": 0.4979, "lr": 3.999025071945107e-05, "epoch": 0.7652173913043478, "percentage": 10.93, "elapsed_time": "0:41:41", "remaining_time": "5:39:38"}
|
||||
{"current_steps": 445, "total_steps": 4025, "loss": 0.5066, "lr": 3.9987354826365117e-05, "epoch": 0.7739130434782608, "percentage": 11.06, "elapsed_time": "0:42:06", "remaining_time": "5:38:49"}
|
||||
{"current_steps": 450, "total_steps": 4025, "loss": 0.5037, "lr": 3.998408301148586e-05, "epoch": 0.782608695652174, "percentage": 11.18, "elapsed_time": "0:42:34", "remaining_time": "5:38:16"}
|
||||
{"current_steps": 455, "total_steps": 4025, "loss": 0.5163, "lr": 3.9980435336349536e-05, "epoch": 0.7913043478260869, "percentage": 11.3, "elapsed_time": "0:43:01", "remaining_time": "5:37:34"}
|
||||
{"current_steps": 460, "total_steps": 4025, "loss": 0.4884, "lr": 3.997641186956154e-05, "epoch": 0.8, "percentage": 11.43, "elapsed_time": "0:43:28", "remaining_time": "5:36:55"}
|
||||
{"current_steps": 465, "total_steps": 4025, "loss": 0.479, "lr": 3.9972012686795167e-05, "epoch": 0.808695652173913, "percentage": 11.55, "elapsed_time": "0:43:54", "remaining_time": "5:36:08"}
|
||||
{"current_steps": 470, "total_steps": 4025, "loss": 0.5018, "lr": 3.996723787079017e-05, "epoch": 0.8173913043478261, "percentage": 11.68, "elapsed_time": "0:44:21", "remaining_time": "5:35:32"}
|
||||
{"current_steps": 475, "total_steps": 4025, "loss": 0.4787, "lr": 3.996208751135118e-05, "epoch": 0.8260869565217391, "percentage": 11.8, "elapsed_time": "0:44:49", "remaining_time": "5:35:04"}
|
||||
{"current_steps": 480, "total_steps": 4025, "loss": 0.4879, "lr": 3.9956561705346076e-05, "epoch": 0.8347826086956521, "percentage": 11.93, "elapsed_time": "0:45:16", "remaining_time": "5:34:23"}
|
||||
{"current_steps": 485, "total_steps": 4025, "loss": 0.4682, "lr": 3.995066055670411e-05, "epoch": 0.8434782608695652, "percentage": 12.05, "elapsed_time": "0:45:46", "remaining_time": "5:34:09"}
|
||||
{"current_steps": 490, "total_steps": 4025, "loss": 0.4817, "lr": 3.994438417641398e-05, "epoch": 0.8521739130434782, "percentage": 12.17, "elapsed_time": "0:46:17", "remaining_time": "5:33:59"}
|
||||
{"current_steps": 495, "total_steps": 4025, "loss": 0.431, "lr": 3.993773268252171e-05, "epoch": 0.8608695652173913, "percentage": 12.3, "elapsed_time": "0:46:48", "remaining_time": "5:33:51"}
|
||||
{"current_steps": 500, "total_steps": 4025, "loss": 0.4401, "lr": 3.993070620012849e-05, "epoch": 0.8695652173913043, "percentage": 12.42, "elapsed_time": "0:47:15", "remaining_time": "5:33:08"}
|
||||
{"current_steps": 505, "total_steps": 4025, "loss": 0.3694, "lr": 3.992330486138826e-05, "epoch": 0.8782608695652174, "percentage": 12.55, "elapsed_time": "0:47:31", "remaining_time": "5:31:18"}
|
||||
{"current_steps": 510, "total_steps": 4025, "loss": 0.3569, "lr": 3.9915528805505264e-05, "epoch": 0.8869565217391304, "percentage": 12.67, "elapsed_time": "0:47:49", "remaining_time": "5:29:40"}
|
||||
{"current_steps": 515, "total_steps": 4025, "loss": 0.3765, "lr": 3.9907378178731416e-05, "epoch": 0.8956521739130435, "percentage": 12.8, "elapsed_time": "0:48:07", "remaining_time": "5:27:57"}
|
||||
{"current_steps": 520, "total_steps": 4025, "loss": 0.3631, "lr": 3.9898853134363545e-05, "epoch": 0.9043478260869565, "percentage": 12.92, "elapsed_time": "0:48:23", "remaining_time": "5:26:13"}
|
||||
{"current_steps": 525, "total_steps": 4025, "loss": 0.3444, "lr": 3.9889953832740524e-05, "epoch": 0.9130434782608695, "percentage": 13.04, "elapsed_time": "0:48:46", "remaining_time": "5:25:07"}
|
||||
{"current_steps": 530, "total_steps": 4025, "loss": 0.3438, "lr": 3.9880680441240256e-05, "epoch": 0.9217391304347826, "percentage": 13.17, "elapsed_time": "0:49:11", "remaining_time": "5:24:19"}
|
||||
{"current_steps": 535, "total_steps": 4025, "loss": 0.3583, "lr": 3.9871033134276517e-05, "epoch": 0.9304347826086956, "percentage": 13.29, "elapsed_time": "0:49:28", "remaining_time": "5:22:46"}
|
||||
{"current_steps": 540, "total_steps": 4025, "loss": 0.3673, "lr": 3.986101209329566e-05, "epoch": 0.9391304347826087, "percentage": 13.42, "elapsed_time": "0:49:47", "remaining_time": "5:21:17"}
|
||||
{"current_steps": 545, "total_steps": 4025, "loss": 0.3194, "lr": 3.9850617506773254e-05, "epoch": 0.9478260869565217, "percentage": 13.54, "elapsed_time": "0:50:07", "remaining_time": "5:20:00"}
|
||||
{"current_steps": 550, "total_steps": 4025, "loss": 0.3202, "lr": 3.983984957021047e-05, "epoch": 0.9565217391304348, "percentage": 13.66, "elapsed_time": "0:50:26", "remaining_time": "5:18:42"}
|
||||
{"current_steps": 555, "total_steps": 4025, "loss": 0.3756, "lr": 3.982870848613046e-05, "epoch": 0.9652173913043478, "percentage": 13.79, "elapsed_time": "0:50:47", "remaining_time": "5:17:32"}
|
||||
{"current_steps": 560, "total_steps": 4025, "loss": 0.3222, "lr": 3.981719446407454e-05, "epoch": 0.9739130434782609, "percentage": 13.91, "elapsed_time": "0:51:05", "remaining_time": "5:16:05"}
|
||||
{"current_steps": 565, "total_steps": 4025, "loss": 0.3487, "lr": 3.980530772059819e-05, "epoch": 0.9826086956521739, "percentage": 14.04, "elapsed_time": "0:51:24", "remaining_time": "5:14:50"}
|
||||
{"current_steps": 570, "total_steps": 4025, "loss": 0.3241, "lr": 3.9793048479267086e-05, "epoch": 0.991304347826087, "percentage": 14.16, "elapsed_time": "0:51:44", "remaining_time": "5:13:37"}
|
||||
{"current_steps": 575, "total_steps": 4025, "loss": 0.36, "lr": 3.97804169706528e-05, "epoch": 1.0, "percentage": 14.29, "elapsed_time": "0:52:07", "remaining_time": "5:12:46"}
|
||||
{"current_steps": 580, "total_steps": 4025, "loss": 0.3396, "lr": 3.97674134323285e-05, "epoch": 1.008695652173913, "percentage": 14.41, "elapsed_time": "0:52:30", "remaining_time": "5:11:52"}
|
||||
{"current_steps": 585, "total_steps": 4025, "loss": 0.3106, "lr": 3.975403810886449e-05, "epoch": 1.017391304347826, "percentage": 14.53, "elapsed_time": "0:52:51", "remaining_time": "5:10:47"}
|
||||
{"current_steps": 590, "total_steps": 4025, "loss": 0.3143, "lr": 3.974029125182361e-05, "epoch": 1.0260869565217392, "percentage": 14.66, "elapsed_time": "0:53:14", "remaining_time": "5:09:58"}
|
||||
{"current_steps": 595, "total_steps": 4025, "loss": 0.3109, "lr": 3.9726173119756485e-05, "epoch": 1.0347826086956522, "percentage": 14.78, "elapsed_time": "0:53:36", "remaining_time": "5:09:03"}
|
||||
{"current_steps": 600, "total_steps": 4025, "loss": 0.2985, "lr": 3.9711683978196685e-05, "epoch": 1.0434782608695652, "percentage": 14.91, "elapsed_time": "0:53:57", "remaining_time": "5:08:02"}
|
||||
{"current_steps": 605, "total_steps": 4025, "loss": 0.3101, "lr": 3.96968240996557e-05, "epoch": 1.0521739130434782, "percentage": 15.03, "elapsed_time": "0:54:18", "remaining_time": "5:06:58"}
|
||||
{"current_steps": 610, "total_steps": 4025, "loss": 0.2922, "lr": 3.9681593763617844e-05, "epoch": 1.0608695652173914, "percentage": 15.16, "elapsed_time": "0:54:42", "remaining_time": "5:06:14"}
|
||||
{"current_steps": 615, "total_steps": 4025, "loss": 0.2923, "lr": 3.9665993256535004e-05, "epoch": 1.0695652173913044, "percentage": 15.28, "elapsed_time": "0:55:06", "remaining_time": "5:05:33"}
|
||||
{"current_steps": 620, "total_steps": 4025, "loss": 0.297, "lr": 3.965002287182121e-05, "epoch": 1.0782608695652174, "percentage": 15.4, "elapsed_time": "0:55:27", "remaining_time": "5:04:35"}
|
||||
{"current_steps": 625, "total_steps": 4025, "loss": 0.3024, "lr": 3.9633682909847166e-05, "epoch": 1.0869565217391304, "percentage": 15.53, "elapsed_time": "0:55:49", "remaining_time": "5:03:41"}
|
||||
{"current_steps": 630, "total_steps": 4025, "loss": 0.2846, "lr": 3.961697367793457e-05, "epoch": 1.0956521739130434, "percentage": 15.65, "elapsed_time": "0:56:12", "remaining_time": "5:02:52"}
|
||||
{"current_steps": 635, "total_steps": 4025, "loss": 0.2845, "lr": 3.959989549035033e-05, "epoch": 1.1043478260869566, "percentage": 15.78, "elapsed_time": "0:56:36", "remaining_time": "5:02:12"}
|
||||
{"current_steps": 640, "total_steps": 4025, "loss": 0.2865, "lr": 3.958244866830069e-05, "epoch": 1.1130434782608696, "percentage": 15.9, "elapsed_time": "0:56:56", "remaining_time": "5:01:11"}
|
||||
{"current_steps": 645, "total_steps": 4025, "loss": 0.3007, "lr": 3.956463353992514e-05, "epoch": 1.1217391304347826, "percentage": 16.02, "elapsed_time": "0:57:20", "remaining_time": "5:00:29"}
|
||||
{"current_steps": 650, "total_steps": 4025, "loss": 0.2817, "lr": 3.954645044029029e-05, "epoch": 1.1304347826086956, "percentage": 16.15, "elapsed_time": "0:57:40", "remaining_time": "4:59:27"}
|
||||
{"current_steps": 655, "total_steps": 4025, "loss": 0.4217, "lr": 3.952789971138352e-05, "epoch": 1.1391304347826088, "percentage": 16.27, "elapsed_time": "0:58:01", "remaining_time": "4:58:31"}
|
||||
{"current_steps": 660, "total_steps": 4025, "loss": 0.4917, "lr": 3.950898170210661e-05, "epoch": 1.1478260869565218, "percentage": 16.4, "elapsed_time": "0:58:17", "remaining_time": "4:57:13"}
|
||||
{"current_steps": 665, "total_steps": 4025, "loss": 0.4936, "lr": 3.948969676826911e-05, "epoch": 1.1565217391304348, "percentage": 16.52, "elapsed_time": "0:58:43", "remaining_time": "4:56:42"}
|
||||
{"current_steps": 670, "total_steps": 4025, "loss": 0.4879, "lr": 3.947004527258169e-05, "epoch": 1.1652173913043478, "percentage": 16.65, "elapsed_time": "0:59:04", "remaining_time": "4:55:50"}
|
||||
{"current_steps": 675, "total_steps": 4025, "loss": 0.5075, "lr": 3.945002758464932e-05, "epoch": 1.1739130434782608, "percentage": 16.77, "elapsed_time": "0:59:29", "remaining_time": "4:55:14"}
|
||||
{"current_steps": 680, "total_steps": 4025, "loss": 0.4805, "lr": 3.942964408096429e-05, "epoch": 1.182608695652174, "percentage": 16.89, "elapsed_time": "0:59:52", "remaining_time": "4:54:31"}
|
||||
{"current_steps": 685, "total_steps": 4025, "loss": 0.4831, "lr": 3.940889514489916e-05, "epoch": 1.191304347826087, "percentage": 17.02, "elapsed_time": "1:00:15", "remaining_time": "4:53:47"}
|
||||
{"current_steps": 690, "total_steps": 4025, "loss": 0.4631, "lr": 3.9387781166699524e-05, "epoch": 1.2, "percentage": 17.14, "elapsed_time": "1:00:35", "remaining_time": "4:52:51"}
|
||||
{"current_steps": 695, "total_steps": 4025, "loss": 0.4888, "lr": 3.936630254347669e-05, "epoch": 1.208695652173913, "percentage": 17.27, "elapsed_time": "1:00:54", "remaining_time": "4:51:49"}
|
||||
{"current_steps": 700, "total_steps": 4025, "loss": 0.4919, "lr": 3.9344459679200196e-05, "epoch": 1.2173913043478262, "percentage": 17.39, "elapsed_time": "1:01:19", "remaining_time": "4:51:18"}
|
||||
{"current_steps": 705, "total_steps": 4025, "loss": 0.4798, "lr": 3.932225298469022e-05, "epoch": 1.2260869565217392, "percentage": 17.52, "elapsed_time": "1:01:39", "remaining_time": "4:50:23"}
|
||||
{"current_steps": 710, "total_steps": 4025, "loss": 0.4688, "lr": 3.929968287760986e-05, "epoch": 1.2347826086956522, "percentage": 17.64, "elapsed_time": "1:02:05", "remaining_time": "4:49:55"}
|
||||
{"current_steps": 715, "total_steps": 4025, "loss": 0.4619, "lr": 3.927674978245726e-05, "epoch": 1.2434782608695651, "percentage": 17.76, "elapsed_time": "1:02:28", "remaining_time": "4:49:13"}
|
||||
{"current_steps": 720, "total_steps": 4025, "loss": 0.4502, "lr": 3.9253454130557654e-05, "epoch": 1.2521739130434781, "percentage": 17.89, "elapsed_time": "1:02:47", "remaining_time": "4:48:12"}
|
||||
{"current_steps": 725, "total_steps": 4025, "loss": 0.4722, "lr": 3.9229796360055204e-05, "epoch": 1.2608695652173914, "percentage": 18.01, "elapsed_time": "1:03:08", "remaining_time": "4:47:25"}
|
||||
{"current_steps": 730, "total_steps": 4025, "loss": 0.4428, "lr": 3.920577691590483e-05, "epoch": 1.2695652173913043, "percentage": 18.14, "elapsed_time": "1:03:31", "remaining_time": "4:46:43"}
|
||||
{"current_steps": 735, "total_steps": 4025, "loss": 0.422, "lr": 3.9181396249863775e-05, "epoch": 1.2782608695652173, "percentage": 18.26, "elapsed_time": "1:04:10", "remaining_time": "4:47:15"}
|
||||
{"current_steps": 740, "total_steps": 4025, "loss": 0.4347, "lr": 3.915665482048316e-05, "epoch": 1.2869565217391306, "percentage": 18.39, "elapsed_time": "1:04:45", "remaining_time": "4:47:27"}
|
||||
{"current_steps": 745, "total_steps": 4025, "loss": 0.4229, "lr": 3.9131553093099303e-05, "epoch": 1.2956521739130435, "percentage": 18.51, "elapsed_time": "1:05:18", "remaining_time": "4:47:32"}
|
||||
{"current_steps": 750, "total_steps": 4025, "loss": 0.427, "lr": 3.910609153982504e-05, "epoch": 1.3043478260869565, "percentage": 18.63, "elapsed_time": "1:05:52", "remaining_time": "4:47:37"}
|
||||
{"current_steps": 755, "total_steps": 4025, "loss": 0.4546, "lr": 3.9080270639540765e-05, "epoch": 1.3130434782608695, "percentage": 18.76, "elapsed_time": "1:06:24", "remaining_time": "4:47:35"}
|
||||
{"current_steps": 760, "total_steps": 4025, "loss": 0.4336, "lr": 3.90540908778855e-05, "epoch": 1.3217391304347825, "percentage": 18.88, "elapsed_time": "1:07:02", "remaining_time": "4:48:02"}
|
||||
{"current_steps": 765, "total_steps": 4025, "loss": 0.4209, "lr": 3.902755274724771e-05, "epoch": 1.3304347826086955, "percentage": 19.01, "elapsed_time": "1:07:36", "remaining_time": "4:48:05"}
|
||||
{"current_steps": 770, "total_steps": 4025, "loss": 0.4611, "lr": 3.900065674675605e-05, "epoch": 1.3391304347826087, "percentage": 19.13, "elapsed_time": "1:08:01", "remaining_time": "4:47:35"}
|
||||
{"current_steps": 775, "total_steps": 4025, "loss": 0.4265, "lr": 3.897340338226999e-05, "epoch": 1.3478260869565217, "percentage": 19.25, "elapsed_time": "1:08:39", "remaining_time": "4:47:55"}
|
||||
{"current_steps": 780, "total_steps": 4025, "loss": 0.4193, "lr": 3.89457931663703e-05, "epoch": 1.3565217391304347, "percentage": 19.38, "elapsed_time": "1:09:08", "remaining_time": "4:47:39"}
|
||||
{"current_steps": 785, "total_steps": 4025, "loss": 0.4256, "lr": 3.89178266183494e-05, "epoch": 1.365217391304348, "percentage": 19.5, "elapsed_time": "1:09:38", "remaining_time": "4:47:25"}
|
||||
{"current_steps": 790, "total_steps": 4025, "loss": 0.4241, "lr": 3.88895042642016e-05, "epoch": 1.373913043478261, "percentage": 19.63, "elapsed_time": "1:10:07", "remaining_time": "4:47:09"}
|
||||
{"current_steps": 795, "total_steps": 4025, "loss": 0.4254, "lr": 3.88608266366132e-05, "epoch": 1.382608695652174, "percentage": 19.75, "elapsed_time": "1:10:42", "remaining_time": "4:47:15"}
|
||||
{"current_steps": 800, "total_steps": 4025, "loss": 0.4317, "lr": 3.8831794274952485e-05, "epoch": 1.391304347826087, "percentage": 19.88, "elapsed_time": "1:11:14", "remaining_time": "4:47:13"}
|
||||
{"current_steps": 805, "total_steps": 4025, "loss": 0.4085, "lr": 3.8802407725259566e-05, "epoch": 1.4, "percentage": 20.0, "elapsed_time": "1:11:47", "remaining_time": "4:47:11"}
|
||||
{"current_steps": 810, "total_steps": 4025, "loss": 0.3405, "lr": 3.877266754023611e-05, "epoch": 1.4086956521739131, "percentage": 20.12, "elapsed_time": "1:12:17", "remaining_time": "4:46:57"}
|
||||
{"current_steps": 815, "total_steps": 4025, "loss": 0.2926, "lr": 3.874257427923496e-05, "epoch": 1.4173913043478261, "percentage": 20.25, "elapsed_time": "1:12:48", "remaining_time": "4:46:45"}
|
||||
{"current_steps": 820, "total_steps": 4025, "loss": 0.2783, "lr": 3.8712128508249606e-05, "epoch": 1.4260869565217391, "percentage": 20.37, "elapsed_time": "1:13:21", "remaining_time": "4:46:42"}
|
||||
{"current_steps": 825, "total_steps": 4025, "loss": 0.2836, "lr": 3.8681330799903526e-05, "epoch": 1.434782608695652, "percentage": 20.5, "elapsed_time": "1:13:49", "remaining_time": "4:46:22"}
|
||||
{"current_steps": 830, "total_steps": 4025, "loss": 0.2681, "lr": 3.865018173343945e-05, "epoch": 1.4434782608695653, "percentage": 20.62, "elapsed_time": "1:14:19", "remaining_time": "4:46:04"}
|
||||
{"current_steps": 835, "total_steps": 4025, "loss": 0.2918, "lr": 3.861868189470843e-05, "epoch": 1.4521739130434783, "percentage": 20.75, "elapsed_time": "1:14:48", "remaining_time": "4:45:47"}
|
||||
{"current_steps": 840, "total_steps": 4025, "loss": 0.2767, "lr": 3.858683187615884e-05, "epoch": 1.4608695652173913, "percentage": 20.87, "elapsed_time": "1:15:21", "remaining_time": "4:45:44"}
|
||||
{"current_steps": 845, "total_steps": 4025, "loss": 0.2733, "lr": 3.8554632276825234e-05, "epoch": 1.4695652173913043, "percentage": 20.99, "elapsed_time": "1:15:50", "remaining_time": "4:45:25"}
|
||||
{"current_steps": 850, "total_steps": 4025, "loss": 0.2691, "lr": 3.852208370231705e-05, "epoch": 1.4782608695652173, "percentage": 21.12, "elapsed_time": "1:16:21", "remaining_time": "4:45:12"}
|
||||
{"current_steps": 855, "total_steps": 4025, "loss": 0.2836, "lr": 3.848918676480729e-05, "epoch": 1.4869565217391305, "percentage": 21.24, "elapsed_time": "1:16:50", "remaining_time": "4:44:55"}
|
||||
{"current_steps": 860, "total_steps": 4025, "loss": 0.2897, "lr": 3.8455942083020925e-05, "epoch": 1.4956521739130435, "percentage": 21.37, "elapsed_time": "1:17:22", "remaining_time": "4:44:47"}
|
||||
{"current_steps": 865, "total_steps": 4025, "loss": 0.2704, "lr": 3.84223502822233e-05, "epoch": 1.5043478260869565, "percentage": 21.49, "elapsed_time": "1:17:56", "remaining_time": "4:44:43"}
|
||||
{"current_steps": 870, "total_steps": 4025, "loss": 0.2572, "lr": 3.8388411994208374e-05, "epoch": 1.5130434782608697, "percentage": 21.61, "elapsed_time": "1:18:27", "remaining_time": "4:44:31"}
|
||||
{"current_steps": 875, "total_steps": 4025, "loss": 0.2817, "lr": 3.835412785728683e-05, "epoch": 1.5217391304347827, "percentage": 21.74, "elapsed_time": "1:19:03", "remaining_time": "4:44:37"}
|
||||
{"current_steps": 880, "total_steps": 4025, "loss": 0.264, "lr": 3.831949851627408e-05, "epoch": 1.5304347826086957, "percentage": 21.86, "elapsed_time": "1:19:37", "remaining_time": "4:44:34"}
|
||||
{"current_steps": 885, "total_steps": 4025, "loss": 0.2597, "lr": 3.82845246224781e-05, "epoch": 1.5391304347826087, "percentage": 21.99, "elapsed_time": "1:20:05", "remaining_time": "4:44:08"}
|
||||
{"current_steps": 890, "total_steps": 4025, "loss": 0.2414, "lr": 3.8249206833687234e-05, "epoch": 1.5478260869565217, "percentage": 22.11, "elapsed_time": "1:20:29", "remaining_time": "4:43:33"}
|
||||
{"current_steps": 895, "total_steps": 4025, "loss": 0.245, "lr": 3.8213545814157795e-05, "epoch": 1.5565217391304347, "percentage": 22.24, "elapsed_time": "1:20:55", "remaining_time": "4:42:59"}
|
||||
{"current_steps": 900, "total_steps": 4025, "loss": 0.2376, "lr": 3.8177542234601557e-05, "epoch": 1.5652173913043477, "percentage": 22.36, "elapsed_time": "1:21:17", "remaining_time": "4:42:15"}
|
||||
{"current_steps": 905, "total_steps": 4025, "loss": 0.2318, "lr": 3.814119677217317e-05, "epoch": 1.5739130434782609, "percentage": 22.48, "elapsed_time": "1:21:40", "remaining_time": "4:41:35"}
|
||||
{"current_steps": 910, "total_steps": 4025, "loss": 0.2334, "lr": 3.8104510110457414e-05, "epoch": 1.5826086956521739, "percentage": 22.61, "elapsed_time": "1:22:04", "remaining_time": "4:40:58"}
|
||||
{"current_steps": 915, "total_steps": 4025, "loss": 0.2485, "lr": 3.806748293945632e-05, "epoch": 1.591304347826087, "percentage": 22.73, "elapsed_time": "1:22:28", "remaining_time": "4:40:18"}
|
||||
{"current_steps": 920, "total_steps": 4025, "loss": 0.2433, "lr": 3.803011595557623e-05, "epoch": 1.6, "percentage": 22.86, "elapsed_time": "1:22:54", "remaining_time": "4:39:49"}
|
||||
{"current_steps": 925, "total_steps": 4025, "loss": 0.2456, "lr": 3.799240986161468e-05, "epoch": 1.608695652173913, "percentage": 22.98, "elapsed_time": "1:23:21", "remaining_time": "4:39:22"}
|
||||
{"current_steps": 930, "total_steps": 4025, "loss": 0.2404, "lr": 3.7954365366747155e-05, "epoch": 1.617391304347826, "percentage": 23.11, "elapsed_time": "1:23:48", "remaining_time": "4:38:54"}
|
||||
{"current_steps": 935, "total_steps": 4025, "loss": 0.2463, "lr": 3.7915983186513824e-05, "epoch": 1.626086956521739, "percentage": 23.23, "elapsed_time": "1:24:15", "remaining_time": "4:38:26"}
|
||||
{"current_steps": 940, "total_steps": 4025, "loss": 0.2365, "lr": 3.787726404280599e-05, "epoch": 1.634782608695652, "percentage": 23.35, "elapsed_time": "1:24:40", "remaining_time": "4:37:54"}
|
||||
{"current_steps": 945, "total_steps": 4025, "loss": 0.2407, "lr": 3.783820866385259e-05, "epoch": 1.643478260869565, "percentage": 23.48, "elapsed_time": "1:25:09", "remaining_time": "4:37:33"}
|
||||
{"current_steps": 950, "total_steps": 4025, "loss": 0.228, "lr": 3.779881778420644e-05, "epoch": 1.6521739130434783, "percentage": 23.6, "elapsed_time": "1:25:32", "remaining_time": "4:36:53"}
|
||||
{"current_steps": 955, "total_steps": 4025, "loss": 0.2221, "lr": 3.775909214473049e-05, "epoch": 1.6608695652173913, "percentage": 23.73, "elapsed_time": "1:25:58", "remaining_time": "4:36:23"}
|
||||
{"current_steps": 960, "total_steps": 4025, "loss": 0.3074, "lr": 3.7719032492583794e-05, "epoch": 1.6695652173913045, "percentage": 23.85, "elapsed_time": "1:26:32", "remaining_time": "4:36:19"}
|
||||
{"current_steps": 965, "total_steps": 4025, "loss": 0.3503, "lr": 3.767863958120755e-05, "epoch": 1.6782608695652175, "percentage": 23.98, "elapsed_time": "1:27:13", "remaining_time": "4:36:35"}
|
||||
{"current_steps": 970, "total_steps": 4025, "loss": 0.345, "lr": 3.7637914170310874e-05, "epoch": 1.6869565217391305, "percentage": 24.1, "elapsed_time": "1:27:57", "remaining_time": "4:37:02"}
|
||||
{"current_steps": 975, "total_steps": 4025, "loss": 0.3413, "lr": 3.759685702585652e-05, "epoch": 1.6956521739130435, "percentage": 24.22, "elapsed_time": "1:28:41", "remaining_time": "4:37:27"}
|
||||
{"current_steps": 980, "total_steps": 4025, "loss": 0.3286, "lr": 3.75554689200465e-05, "epoch": 1.7043478260869565, "percentage": 24.35, "elapsed_time": "1:29:26", "remaining_time": "4:37:53"}
|
||||
{"current_steps": 985, "total_steps": 4025, "loss": 0.3282, "lr": 3.751375063130751e-05, "epoch": 1.7130434782608694, "percentage": 24.47, "elapsed_time": "1:30:08", "remaining_time": "4:38:13"}
|
||||
{"current_steps": 990, "total_steps": 4025, "loss": 0.3512, "lr": 3.747170294427636e-05, "epoch": 1.7217391304347827, "percentage": 24.6, "elapsed_time": "1:30:50", "remaining_time": "4:38:28"}
|
||||
{"current_steps": 995, "total_steps": 4025, "loss": 0.3386, "lr": 3.742932664978514e-05, "epoch": 1.7304347826086957, "percentage": 24.72, "elapsed_time": "1:31:34", "remaining_time": "4:38:53"}
|
||||
{"current_steps": 1000, "total_steps": 4025, "loss": 0.3878, "lr": 3.738662254484641e-05, "epoch": 1.7391304347826086, "percentage": 24.84, "elapsed_time": "1:32:10", "remaining_time": "4:38:49"}
|
||||
{"current_steps": 1005, "total_steps": 4025, "loss": 0.4513, "lr": 3.7343591432638166e-05, "epoch": 1.7478260869565219, "percentage": 24.97, "elapsed_time": "1:32:40", "remaining_time": "4:38:29"}
|
||||
{"current_steps": 1010, "total_steps": 4025, "loss": 0.4402, "lr": 3.7300234122488756e-05, "epoch": 1.7565217391304349, "percentage": 25.09, "elapsed_time": "1:33:07", "remaining_time": "4:38:00"}
|
||||
{"current_steps": 1015, "total_steps": 4025, "loss": 0.4215, "lr": 3.725655142986165e-05, "epoch": 1.7652173913043478, "percentage": 25.22, "elapsed_time": "1:33:33", "remaining_time": "4:37:26"}
|
||||
{"current_steps": 1020, "total_steps": 4025, "loss": 0.4325, "lr": 3.721254417634012e-05, "epoch": 1.7739130434782608, "percentage": 25.34, "elapsed_time": "1:33:59", "remaining_time": "4:36:53"}
|
||||
{"current_steps": 1025, "total_steps": 4025, "loss": 0.4314, "lr": 3.716821318961176e-05, "epoch": 1.7826086956521738, "percentage": 25.47, "elapsed_time": "1:34:26", "remaining_time": "4:36:26"}
|
||||
{"current_steps": 1030, "total_steps": 4025, "loss": 0.4464, "lr": 3.712355930345292e-05, "epoch": 1.7913043478260868, "percentage": 25.59, "elapsed_time": "1:34:53", "remaining_time": "4:35:55"}
|
||||
{"current_steps": 1035, "total_steps": 4025, "loss": 0.4205, "lr": 3.707858335771306e-05, "epoch": 1.8, "percentage": 25.71, "elapsed_time": "1:35:20", "remaining_time": "4:35:26"}
|
||||
{"current_steps": 1040, "total_steps": 4025, "loss": 0.415, "lr": 3.703328619829892e-05, "epoch": 1.808695652173913, "percentage": 25.84, "elapsed_time": "1:35:46", "remaining_time": "4:34:53"}
|
||||
{"current_steps": 1045, "total_steps": 4025, "loss": 0.4346, "lr": 3.6987668677158624e-05, "epoch": 1.8173913043478263, "percentage": 25.96, "elapsed_time": "1:36:13", "remaining_time": "4:34:25"}
|
||||
{"current_steps": 1050, "total_steps": 4025, "loss": 0.4183, "lr": 3.6941731652265646e-05, "epoch": 1.8260869565217392, "percentage": 26.09, "elapsed_time": "1:36:42", "remaining_time": "4:33:59"}
|
||||
{"current_steps": 1055, "total_steps": 4025, "loss": 0.4246, "lr": 3.689547598760269e-05, "epoch": 1.8347826086956522, "percentage": 26.21, "elapsed_time": "1:37:08", "remaining_time": "4:33:29"}
|
||||
{"current_steps": 1060, "total_steps": 4025, "loss": 0.4092, "lr": 3.6848902553145424e-05, "epoch": 1.8434782608695652, "percentage": 26.34, "elapsed_time": "1:37:39", "remaining_time": "4:33:08"}
|
||||
{"current_steps": 1065, "total_steps": 4025, "loss": 0.4244, "lr": 3.6802012224846126e-05, "epoch": 1.8521739130434782, "percentage": 26.46, "elapsed_time": "1:38:10", "remaining_time": "4:32:50"}
|
||||
{"current_steps": 1070, "total_steps": 4025, "loss": 0.3793, "lr": 3.675480588461721e-05, "epoch": 1.8608695652173912, "percentage": 26.58, "elapsed_time": "1:38:41", "remaining_time": "4:32:32"}
|
||||
{"current_steps": 1075, "total_steps": 4025, "loss": 0.3818, "lr": 3.670728442031463e-05, "epoch": 1.8695652173913042, "percentage": 26.71, "elapsed_time": "1:39:07", "remaining_time": "4:32:00"}
|
||||
{"current_steps": 1080, "total_steps": 4025, "loss": 0.3062, "lr": 3.66594487257212e-05, "epoch": 1.8782608695652174, "percentage": 26.83, "elapsed_time": "1:39:23", "remaining_time": "4:31:02"}
|
||||
{"current_steps": 1085, "total_steps": 4025, "loss": 0.2969, "lr": 3.661129970052975e-05, "epoch": 1.8869565217391304, "percentage": 26.96, "elapsed_time": "1:39:42", "remaining_time": "4:30:09"}
|
||||
{"current_steps": 1090, "total_steps": 4025, "loss": 0.3173, "lr": 3.656283825032625e-05, "epoch": 1.8956521739130436, "percentage": 27.08, "elapsed_time": "1:39:59", "remaining_time": "4:29:14"}
|
||||
{"current_steps": 1095, "total_steps": 4025, "loss": 0.306, "lr": 3.651406528657274e-05, "epoch": 1.9043478260869566, "percentage": 27.2, "elapsed_time": "1:40:16", "remaining_time": "4:28:18"}
|
||||
{"current_steps": 1100, "total_steps": 4025, "loss": 0.2951, "lr": 3.646498172659021e-05, "epoch": 1.9130434782608696, "percentage": 27.33, "elapsed_time": "1:40:38", "remaining_time": "4:27:36"}
|
||||
{"current_steps": 1105, "total_steps": 4025, "loss": 0.2939, "lr": 3.641558849354132e-05, "epoch": 1.9217391304347826, "percentage": 27.45, "elapsed_time": "1:41:03", "remaining_time": "4:27:02"}
|
||||
{"current_steps": 1110, "total_steps": 4025, "loss": 0.3047, "lr": 3.636588651641308e-05, "epoch": 1.9304347826086956, "percentage": 27.58, "elapsed_time": "1:41:21", "remaining_time": "4:26:09"}
|
||||
{"current_steps": 1115, "total_steps": 4025, "loss": 0.3164, "lr": 3.631587672999933e-05, "epoch": 1.9391304347826086, "percentage": 27.7, "elapsed_time": "1:41:39", "remaining_time": "4:25:18"}
|
||||
{"current_steps": 1120, "total_steps": 4025, "loss": 0.272, "lr": 3.62655600748832e-05, "epoch": 1.9478260869565216, "percentage": 27.83, "elapsed_time": "1:41:59", "remaining_time": "4:24:32"}
|
||||
{"current_steps": 1125, "total_steps": 4025, "loss": 0.2729, "lr": 3.621493749741938e-05, "epoch": 1.9565217391304348, "percentage": 27.95, "elapsed_time": "1:42:19", "remaining_time": "4:23:45"}
|
||||
{"current_steps": 1130, "total_steps": 4025, "loss": 0.3249, "lr": 3.616400994971637e-05, "epoch": 1.9652173913043478, "percentage": 28.07, "elapsed_time": "1:42:39", "remaining_time": "4:23:00"}
|
||||
{"current_steps": 1135, "total_steps": 4025, "loss": 0.2771, "lr": 3.611277838961852e-05, "epoch": 1.973913043478261, "percentage": 28.2, "elapsed_time": "1:42:57", "remaining_time": "4:22:09"}
|
||||
{"current_steps": 1140, "total_steps": 4025, "loss": 0.3021, "lr": 3.6061243780688045e-05, "epoch": 1.982608695652174, "percentage": 28.32, "elapsed_time": "1:43:17", "remaining_time": "4:21:23"}
|
||||
{"current_steps": 1145, "total_steps": 4025, "loss": 0.2776, "lr": 3.60094070921869e-05, "epoch": 1.991304347826087, "percentage": 28.45, "elapsed_time": "1:43:36", "remaining_time": "4:20:37"}
|
||||
{"current_steps": 1150, "total_steps": 4025, "loss": 0.3127, "lr": 3.595726929905855e-05, "epoch": 2.0, "percentage": 28.57, "elapsed_time": "1:44:00", "remaining_time": "4:20:00"}
|
||||
{"current_steps": 1155, "total_steps": 4025, "loss": 0.2821, "lr": 3.590483138190963e-05, "epoch": 2.008695652173913, "percentage": 28.7, "elapsed_time": "1:44:22", "remaining_time": "4:19:22"}
|
||||
{"current_steps": 1160, "total_steps": 4025, "loss": 0.2612, "lr": 3.58520943269915e-05, "epoch": 2.017391304347826, "percentage": 28.82, "elapsed_time": "1:44:43", "remaining_time": "4:18:39"}
|
||||
{"current_steps": 1165, "total_steps": 4025, "loss": 0.2688, "lr": 3.579905912618168e-05, "epoch": 2.026086956521739, "percentage": 28.94, "elapsed_time": "1:45:06", "remaining_time": "4:18:02"}
|
||||
{"current_steps": 1170, "total_steps": 4025, "loss": 0.2671, "lr": 3.574572677696524e-05, "epoch": 2.034782608695652, "percentage": 29.07, "elapsed_time": "1:45:29", "remaining_time": "4:17:24"}
|
||||
{"current_steps": 1175, "total_steps": 4025, "loss": 0.2595, "lr": 3.5692098282416e-05, "epoch": 2.0434782608695654, "percentage": 29.19, "elapsed_time": "1:45:50", "remaining_time": "4:16:42"}
|
||||
{"current_steps": 1180, "total_steps": 4025, "loss": 0.2698, "lr": 3.563817465117768e-05, "epoch": 2.0521739130434784, "percentage": 29.32, "elapsed_time": "1:46:10", "remaining_time": "4:15:59"}
|
||||
{"current_steps": 1185, "total_steps": 4025, "loss": 0.2541, "lr": 3.558395689744491e-05, "epoch": 2.0608695652173914, "percentage": 29.44, "elapsed_time": "1:46:34", "remaining_time": "4:15:24"}
|
||||
{"current_steps": 1190, "total_steps": 4025, "loss": 0.2549, "lr": 3.5529446040944195e-05, "epoch": 2.0695652173913044, "percentage": 29.57, "elapsed_time": "1:46:58", "remaining_time": "4:14:51"}
|
||||
{"current_steps": 1195, "total_steps": 4025, "loss": 0.2594, "lr": 3.547464310691468e-05, "epoch": 2.0782608695652174, "percentage": 29.69, "elapsed_time": "1:47:20", "remaining_time": "4:14:11"}
|
||||
{"current_steps": 1200, "total_steps": 4025, "loss": 0.2656, "lr": 3.5419549126088934e-05, "epoch": 2.0869565217391304, "percentage": 29.81, "elapsed_time": "1:47:42", "remaining_time": "4:13:32"}
|
||||
{"current_steps": 1205, "total_steps": 4025, "loss": 0.2501, "lr": 3.53641651346735e-05, "epoch": 2.0956521739130434, "percentage": 29.94, "elapsed_time": "1:48:04", "remaining_time": "4:12:55"}
|
||||
{"current_steps": 1210, "total_steps": 4025, "loss": 0.2513, "lr": 3.530849217432946e-05, "epoch": 2.1043478260869564, "percentage": 30.06, "elapsed_time": "1:48:28", "remaining_time": "4:12:22"}
|
||||
{"current_steps": 1215, "total_steps": 4025, "loss": 0.2521, "lr": 3.525253129215278e-05, "epoch": 2.1130434782608694, "percentage": 30.19, "elapsed_time": "1:48:49", "remaining_time": "4:11:40"}
|
||||
{"current_steps": 1220, "total_steps": 4025, "loss": 0.2672, "lr": 3.519628354065471e-05, "epoch": 2.121739130434783, "percentage": 30.31, "elapsed_time": "1:49:12", "remaining_time": "4:11:06"}
|
||||
{"current_steps": 1225, "total_steps": 4025, "loss": 0.2493, "lr": 3.5139749977741867e-05, "epoch": 2.130434782608696, "percentage": 30.43, "elapsed_time": "1:49:32", "remaining_time": "4:10:23"}
|
||||
{"current_steps": 1230, "total_steps": 4025, "loss": 0.3737, "lr": 3.508293166669646e-05, "epoch": 2.139130434782609, "percentage": 30.56, "elapsed_time": "1:49:53", "remaining_time": "4:09:43"}
|
||||
{"current_steps": 1235, "total_steps": 4025, "loss": 0.4306, "lr": 3.502582967615622e-05, "epoch": 2.1478260869565218, "percentage": 30.68, "elapsed_time": "1:50:10", "remaining_time": "4:08:52"}
|
||||
{"current_steps": 1240, "total_steps": 4025, "loss": 0.4357, "lr": 3.496844508009428e-05, "epoch": 2.1565217391304348, "percentage": 30.81, "elapsed_time": "1:50:35", "remaining_time": "4:08:23"}
|
||||
{"current_steps": 1245, "total_steps": 4025, "loss": 0.4305, "lr": 3.491077895779908e-05, "epoch": 2.1652173913043478, "percentage": 30.93, "elapsed_time": "1:50:57", "remaining_time": "4:07:45"}
|
||||
{"current_steps": 1250, "total_steps": 4025, "loss": 0.4493, "lr": 3.485283239385393e-05, "epoch": 2.1739130434782608, "percentage": 31.06, "elapsed_time": "1:51:21", "remaining_time": "4:07:13"}
|
||||
{"current_steps": 1255, "total_steps": 4025, "loss": 0.4235, "lr": 3.4794606478116736e-05, "epoch": 2.1826086956521737, "percentage": 31.18, "elapsed_time": "1:51:44", "remaining_time": "4:06:37"}
|
||||
{"current_steps": 1260, "total_steps": 4025, "loss": 0.427, "lr": 3.473610230569941e-05, "epoch": 2.1913043478260867, "percentage": 31.3, "elapsed_time": "1:52:07", "remaining_time": "4:06:02"}
|
||||
{"current_steps": 1265, "total_steps": 4025, "loss": 0.4061, "lr": 3.467732097694733e-05, "epoch": 2.2, "percentage": 31.43, "elapsed_time": "1:52:27", "remaining_time": "4:05:22"}
|
||||
{"current_steps": 1270, "total_steps": 4025, "loss": 0.4283, "lr": 3.4618263597418625e-05, "epoch": 2.208695652173913, "percentage": 31.55, "elapsed_time": "1:52:46", "remaining_time": "4:04:38"}
|
||||
{"current_steps": 1275, "total_steps": 4025, "loss": 0.4366, "lr": 3.455893127786338e-05, "epoch": 2.217391304347826, "percentage": 31.68, "elapsed_time": "1:53:11", "remaining_time": "4:04:09"}
|
||||
{"current_steps": 1280, "total_steps": 4025, "loss": 0.424, "lr": 3.449932513420274e-05, "epoch": 2.226086956521739, "percentage": 31.8, "elapsed_time": "1:53:31", "remaining_time": "4:03:28"}
|
||||
{"current_steps": 1285, "total_steps": 4025, "loss": 0.4119, "lr": 3.4439446287507926e-05, "epoch": 2.234782608695652, "percentage": 31.93, "elapsed_time": "1:53:57", "remaining_time": "4:02:59"}
|
||||
{"current_steps": 1290, "total_steps": 4025, "loss": 0.4115, "lr": 3.437929586397917e-05, "epoch": 2.243478260869565, "percentage": 32.05, "elapsed_time": "1:54:20", "remaining_time": "4:02:25"}
|
||||
{"current_steps": 1295, "total_steps": 4025, "loss": 0.3959, "lr": 3.43188749949245e-05, "epoch": 2.252173913043478, "percentage": 32.17, "elapsed_time": "1:54:39", "remaining_time": "4:01:42"}
|
||||
{"current_steps": 1300, "total_steps": 4025, "loss": 0.4182, "lr": 3.425818481673848e-05, "epoch": 2.260869565217391, "percentage": 32.3, "elapsed_time": "1:55:01", "remaining_time": "4:01:05"}
|
||||
{"current_steps": 1305, "total_steps": 4025, "loss": 0.3942, "lr": 3.419722647088085e-05, "epoch": 2.269565217391304, "percentage": 32.42, "elapsed_time": "1:55:23", "remaining_time": "4:00:30"}
|
||||
{"current_steps": 1310, "total_steps": 4025, "loss": 0.3759, "lr": 3.413600110385502e-05, "epoch": 2.2782608695652176, "percentage": 32.55, "elapsed_time": "1:56:02", "remaining_time": "4:00:30"}
|
||||
{"current_steps": 1315, "total_steps": 4025, "loss": 0.3859, "lr": 3.407450986718654e-05, "epoch": 2.2869565217391306, "percentage": 32.67, "elapsed_time": "1:56:37", "remaining_time": "4:00:21"}
|
||||
{"current_steps": 1320, "total_steps": 4025, "loss": 0.3766, "lr": 3.401275391740145e-05, "epoch": 2.2956521739130435, "percentage": 32.8, "elapsed_time": "1:57:11", "remaining_time": "4:00:08"}
|
||||
{"current_steps": 1325, "total_steps": 4025, "loss": 0.3792, "lr": 3.395073441600447e-05, "epoch": 2.3043478260869565, "percentage": 32.92, "elapsed_time": "1:57:44", "remaining_time": "3:59:55"}
|
||||
{"current_steps": 1330, "total_steps": 4025, "loss": 0.4099, "lr": 3.3888452529457223e-05, "epoch": 2.3130434782608695, "percentage": 33.04, "elapsed_time": "1:58:16", "remaining_time": "3:59:39"}
|
||||
{"current_steps": 1335, "total_steps": 4025, "loss": 0.389, "lr": 3.382590942915625e-05, "epoch": 2.3217391304347825, "percentage": 33.17, "elapsed_time": "1:58:55", "remaining_time": "3:59:37"}
|
||||
{"current_steps": 1340, "total_steps": 4025, "loss": 0.373, "lr": 3.3763106291411025e-05, "epoch": 2.3304347826086955, "percentage": 33.29, "elapsed_time": "1:59:28", "remaining_time": "3:59:24"}
|
||||
{"current_steps": 1345, "total_steps": 4025, "loss": 0.4117, "lr": 3.370004429742177e-05, "epoch": 2.3391304347826085, "percentage": 33.42, "elapsed_time": "1:59:54", "remaining_time": "3:58:55"}
|
||||
{"current_steps": 1350, "total_steps": 4025, "loss": 0.3817, "lr": 3.363672463325727e-05, "epoch": 2.3478260869565215, "percentage": 33.54, "elapsed_time": "2:00:31", "remaining_time": "3:58:50"}
|
||||
{"current_steps": 1355, "total_steps": 4025, "loss": 0.3738, "lr": 3.357314848983259e-05, "epoch": 2.356521739130435, "percentage": 33.66, "elapsed_time": "2:01:01", "remaining_time": "3:58:28"}
|
||||
{"current_steps": 1360, "total_steps": 4025, "loss": 0.3804, "lr": 3.350931706288663e-05, "epoch": 2.365217391304348, "percentage": 33.79, "elapsed_time": "2:01:31", "remaining_time": "3:58:07"}
|
||||
{"current_steps": 1365, "total_steps": 4025, "loss": 0.3797, "lr": 3.3445231552959663e-05, "epoch": 2.373913043478261, "percentage": 33.91, "elapsed_time": "2:02:00", "remaining_time": "3:57:45"}
|
||||
{"current_steps": 1370, "total_steps": 4025, "loss": 0.3825, "lr": 3.338089316537077e-05, "epoch": 2.382608695652174, "percentage": 34.04, "elapsed_time": "2:02:34", "remaining_time": "3:57:33"}
|
||||
{"current_steps": 1375, "total_steps": 4025, "loss": 0.3896, "lr": 3.3316303110195124e-05, "epoch": 2.391304347826087, "percentage": 34.16, "elapsed_time": "2:03:07", "remaining_time": "3:57:17"}
|
||||
{"current_steps": 1380, "total_steps": 4025, "loss": 0.3686, "lr": 3.325146260224126e-05, "epoch": 2.4, "percentage": 34.29, "elapsed_time": "2:03:40", "remaining_time": "3:57:02"}
|
||||
{"current_steps": 1385, "total_steps": 4025, "loss": 0.3041, "lr": 3.3186372861028244e-05, "epoch": 2.408695652173913, "percentage": 34.41, "elapsed_time": "2:04:10", "remaining_time": "3:56:41"}
|
||||
{"current_steps": 1390, "total_steps": 4025, "loss": 0.26, "lr": 3.3121035110762696e-05, "epoch": 2.417391304347826, "percentage": 34.53, "elapsed_time": "2:04:41", "remaining_time": "3:56:21"}
|
||||
{"current_steps": 1395, "total_steps": 4025, "loss": 0.2471, "lr": 3.3055450580315796e-05, "epoch": 2.426086956521739, "percentage": 34.66, "elapsed_time": "2:05:14", "remaining_time": "3:56:06"}
|
||||
{"current_steps": 1400, "total_steps": 4025, "loss": 0.2539, "lr": 3.298962050320015e-05, "epoch": 2.4347826086956523, "percentage": 34.78, "elapsed_time": "2:05:42", "remaining_time": "3:55:42"}
|
||||
{"current_steps": 1405, "total_steps": 4025, "loss": 0.2386, "lr": 3.292354611754662e-05, "epoch": 2.4434782608695653, "percentage": 34.91, "elapsed_time": "2:06:12", "remaining_time": "3:55:20"}
|
||||
{"current_steps": 1410, "total_steps": 4025, "loss": 0.263, "lr": 3.2857228666081e-05, "epoch": 2.4521739130434783, "percentage": 35.03, "elapsed_time": "2:06:41", "remaining_time": "3:54:58"}
|
||||
{"current_steps": 1415, "total_steps": 4025, "loss": 0.2481, "lr": 3.2790669396100666e-05, "epoch": 2.4608695652173913, "percentage": 35.16, "elapsed_time": "2:07:15", "remaining_time": "3:54:43"}
|
||||
{"current_steps": 1420, "total_steps": 4025, "loss": 0.2448, "lr": 3.272386955945113e-05, "epoch": 2.4695652173913043, "percentage": 35.28, "elapsed_time": "2:07:44", "remaining_time": "3:54:19"}
|
||||
{"current_steps": 1425, "total_steps": 4025, "loss": 0.2438, "lr": 3.265683041250245e-05, "epoch": 2.4782608695652173, "percentage": 35.4, "elapsed_time": "2:08:14", "remaining_time": "3:53:59"}
|
||||
{"current_steps": 1430, "total_steps": 4025, "loss": 0.2553, "lr": 3.2589553216125636e-05, "epoch": 2.4869565217391303, "percentage": 35.53, "elapsed_time": "2:08:44", "remaining_time": "3:53:37"}
|
||||
{"current_steps": 1435, "total_steps": 4025, "loss": 0.2621, "lr": 3.2522039235668945e-05, "epoch": 2.4956521739130437, "percentage": 35.65, "elapsed_time": "2:09:16", "remaining_time": "3:53:19"}
|
||||
{"current_steps": 1440, "total_steps": 4025, "loss": 0.2441, "lr": 3.245428974093404e-05, "epoch": 2.5043478260869563, "percentage": 35.78, "elapsed_time": "2:09:49", "remaining_time": "3:53:03"}
|
||||
{"current_steps": 1445, "total_steps": 4025, "loss": 0.2329, "lr": 3.2386306006152144e-05, "epoch": 2.5130434782608697, "percentage": 35.9, "elapsed_time": "2:10:20", "remaining_time": "3:52:43"}
|
||||
{"current_steps": 1450, "total_steps": 4025, "loss": 0.2557, "lr": 3.231808930996006e-05, "epoch": 2.5217391304347827, "percentage": 36.02, "elapsed_time": "2:10:57", "remaining_time": "3:52:33"}
|
||||
{"current_steps": 1455, "total_steps": 4025, "loss": 0.2398, "lr": 3.224964093537613e-05, "epoch": 2.5304347826086957, "percentage": 36.15, "elapsed_time": "2:11:31", "remaining_time": "3:52:18"}
|
||||
{"current_steps": 1460, "total_steps": 4025, "loss": 0.233, "lr": 3.21809621697761e-05, "epoch": 2.5391304347826087, "percentage": 36.27, "elapsed_time": "2:11:58", "remaining_time": "3:51:51"}
|
||||
{"current_steps": 1465, "total_steps": 4025, "loss": 0.2149, "lr": 3.211205430486888e-05, "epoch": 2.5478260869565217, "percentage": 36.4, "elapsed_time": "2:12:23", "remaining_time": "3:51:20"}
|
||||
{"current_steps": 1470, "total_steps": 4025, "loss": 0.2197, "lr": 3.204291863667231e-05, "epoch": 2.5565217391304347, "percentage": 36.52, "elapsed_time": "2:12:48", "remaining_time": "3:50:50"}
|
||||
{"current_steps": 1475, "total_steps": 4025, "loss": 0.2134, "lr": 3.197355646548874e-05, "epoch": 2.5652173913043477, "percentage": 36.65, "elapsed_time": "2:13:11", "remaining_time": "3:50:15"}
|
||||
{"current_steps": 1480, "total_steps": 4025, "loss": 0.2076, "lr": 3.190396909588057e-05, "epoch": 2.573913043478261, "percentage": 36.77, "elapsed_time": "2:13:34", "remaining_time": "3:49:41"}
|
||||
{"current_steps": 1485, "total_steps": 4025, "loss": 0.2102, "lr": 3.1834157836645745e-05, "epoch": 2.5826086956521737, "percentage": 36.89, "elapsed_time": "2:13:58", "remaining_time": "3:49:09"}
|
||||
{"current_steps": 1490, "total_steps": 4025, "loss": 0.2251, "lr": 3.1764124000793116e-05, "epoch": 2.591304347826087, "percentage": 37.02, "elapsed_time": "2:14:21", "remaining_time": "3:48:36"}
|
||||
{"current_steps": 1495, "total_steps": 4025, "loss": 0.2218, "lr": 3.169386890551774e-05, "epoch": 2.6, "percentage": 37.14, "elapsed_time": "2:14:48", "remaining_time": "3:48:08"}
|
||||
{"current_steps": 1500, "total_steps": 4025, "loss": 0.2242, "lr": 3.162339387217613e-05, "epoch": 2.608695652173913, "percentage": 37.27, "elapsed_time": "2:15:15", "remaining_time": "3:47:40"}
|
||||
{"current_steps": 1505, "total_steps": 4025, "loss": 0.2189, "lr": 3.1552700226261386e-05, "epoch": 2.617391304347826, "percentage": 37.39, "elapsed_time": "2:16:13", "remaining_time": "3:48:05"}
|
||||
{"current_steps": 1510, "total_steps": 4025, "loss": 0.2249, "lr": 3.1481789297378276e-05, "epoch": 2.626086956521739, "percentage": 37.52, "elapsed_time": "2:16:40", "remaining_time": "3:47:37"}
|
||||
{"current_steps": 1515, "total_steps": 4025, "loss": 0.2144, "lr": 3.141066241921821e-05, "epoch": 2.634782608695652, "percentage": 37.64, "elapsed_time": "2:17:05", "remaining_time": "3:47:07"}
|
||||
{"current_steps": 1520, "total_steps": 4025, "loss": 0.2191, "lr": 3.1339320929534175e-05, "epoch": 2.643478260869565, "percentage": 37.76, "elapsed_time": "2:17:34", "remaining_time": "3:46:43"}
|
||||
{"current_steps": 1525, "total_steps": 4025, "loss": 0.2067, "lr": 3.126776617011556e-05, "epoch": 2.6521739130434785, "percentage": 37.89, "elapsed_time": "2:17:57", "remaining_time": "3:46:09"}
|
||||
{"current_steps": 1530, "total_steps": 4025, "loss": 0.2017, "lr": 3.119599948676294e-05, "epoch": 2.660869565217391, "percentage": 38.01, "elapsed_time": "2:18:23", "remaining_time": "3:45:40"}
|
||||
{"current_steps": 1535, "total_steps": 4025, "loss": 0.277, "lr": 3.1124022229262744e-05, "epoch": 2.6695652173913045, "percentage": 38.14, "elapsed_time": "2:18:57", "remaining_time": "3:45:24"}
|
||||
{"current_steps": 1540, "total_steps": 4025, "loss": 0.3145, "lr": 3.105183575136187e-05, "epoch": 2.6782608695652175, "percentage": 38.26, "elapsed_time": "2:19:37", "remaining_time": "3:45:18"}
|
||||
{"current_steps": 1545, "total_steps": 4025, "loss": 0.3114, "lr": 3.0979441410742243e-05, "epoch": 2.6869565217391305, "percentage": 38.39, "elapsed_time": "2:20:22", "remaining_time": "3:45:19"}
|
||||
{"current_steps": 1550, "total_steps": 4025, "loss": 0.3062, "lr": 3.090684056899526e-05, "epoch": 2.6956521739130435, "percentage": 38.51, "elapsed_time": "2:21:06", "remaining_time": "3:45:18"}
|
||||
{"current_steps": 1555, "total_steps": 4025, "loss": 0.2954, "lr": 3.083403459159618e-05, "epoch": 2.7043478260869565, "percentage": 38.63, "elapsed_time": "2:21:50", "remaining_time": "3:45:18"}
|
||||
{"current_steps": 1560, "total_steps": 4025, "loss": 0.2991, "lr": 3.0761024847878454e-05, "epoch": 2.7130434782608694, "percentage": 38.76, "elapsed_time": "2:22:33", "remaining_time": "3:45:15"}
|
||||
{"current_steps": 1565, "total_steps": 4025, "loss": 0.3157, "lr": 3.068781271100797e-05, "epoch": 2.7217391304347824, "percentage": 38.88, "elapsed_time": "2:23:14", "remaining_time": "3:45:10"}
|
||||
{"current_steps": 1570, "total_steps": 4025, "loss": 0.3062, "lr": 3.061439955795722e-05, "epoch": 2.730434782608696, "percentage": 39.01, "elapsed_time": "2:23:59", "remaining_time": "3:45:09"}
|
||||
{"current_steps": 1575, "total_steps": 4025, "loss": 0.3487, "lr": 3.054078676947941e-05, "epoch": 2.7391304347826084, "percentage": 39.13, "elapsed_time": "2:24:34", "remaining_time": "3:44:53"}
|
||||
{"current_steps": 1580, "total_steps": 4025, "loss": 0.4062, "lr": 3.0466975730082474e-05, "epoch": 2.747826086956522, "percentage": 39.25, "elapsed_time": "2:25:04", "remaining_time": "3:44:30"}
|
||||
{"current_steps": 1585, "total_steps": 4025, "loss": 0.3925, "lr": 3.0392967828003043e-05, "epoch": 2.756521739130435, "percentage": 39.38, "elapsed_time": "2:25:32", "remaining_time": "3:44:02"}
|
||||
{"current_steps": 1590, "total_steps": 4025, "loss": 0.3767, "lr": 3.031876445518036e-05, "epoch": 2.765217391304348, "percentage": 39.5, "elapsed_time": "2:25:57", "remaining_time": "3:43:32"}
|
||||
{"current_steps": 1595, "total_steps": 4025, "loss": 0.3864, "lr": 3.024436700723006e-05, "epoch": 2.773913043478261, "percentage": 39.63, "elapsed_time": "2:26:23", "remaining_time": "3:43:01"}
|
||||
{"current_steps": 1600, "total_steps": 4025, "loss": 0.3848, "lr": 3.0169776883417943e-05, "epoch": 2.782608695652174, "percentage": 39.75, "elapsed_time": "2:26:51", "remaining_time": "3:42:34"}
|
||||
{"current_steps": 1605, "total_steps": 4025, "loss": 0.403, "lr": 3.009499548663365e-05, "epoch": 2.791304347826087, "percentage": 39.88, "elapsed_time": "2:27:17", "remaining_time": "3:42:05"}
|
||||
{"current_steps": 1610, "total_steps": 4025, "loss": 0.3776, "lr": 3.0020024223364292e-05, "epoch": 2.8, "percentage": 40.0, "elapsed_time": "2:27:44", "remaining_time": "3:41:37"}
|
||||
{"current_steps": 1615, "total_steps": 4025, "loss": 0.3743, "lr": 2.9944864503667966e-05, "epoch": 2.8086956521739133, "percentage": 40.12, "elapsed_time": "2:28:10", "remaining_time": "3:41:07"}
|
||||
{"current_steps": 1620, "total_steps": 4025, "loss": 0.3896, "lr": 2.9869517741147275e-05, "epoch": 2.8173913043478263, "percentage": 40.25, "elapsed_time": "2:28:38", "remaining_time": "3:40:39"}
|
||||
{"current_steps": 1625, "total_steps": 4025, "loss": 0.3777, "lr": 2.979398535292271e-05, "epoch": 2.8260869565217392, "percentage": 40.37, "elapsed_time": "2:29:06", "remaining_time": "3:40:13"}
|
||||
{"current_steps": 1630, "total_steps": 4025, "loss": 0.381, "lr": 2.9718268759606e-05, "epoch": 2.8347826086956522, "percentage": 40.5, "elapsed_time": "2:29:33", "remaining_time": "3:39:44"}
|
||||
{"current_steps": 1635, "total_steps": 4025, "loss": 0.3702, "lr": 2.964236938527341e-05, "epoch": 2.8434782608695652, "percentage": 40.62, "elapsed_time": "2:30:03", "remaining_time": "3:39:21"}
|
||||
{"current_steps": 1640, "total_steps": 4025, "loss": 0.3833, "lr": 2.9566288657438943e-05, "epoch": 2.8521739130434782, "percentage": 40.75, "elapsed_time": "2:30:34", "remaining_time": "3:38:58"}
|
||||
{"current_steps": 1645, "total_steps": 4025, "loss": 0.343, "lr": 2.9490028007027497e-05, "epoch": 2.860869565217391, "percentage": 40.87, "elapsed_time": "2:31:05", "remaining_time": "3:38:36"}
|
||||
{"current_steps": 1650, "total_steps": 4025, "loss": 0.3398, "lr": 2.9413588868347948e-05, "epoch": 2.869565217391304, "percentage": 40.99, "elapsed_time": "2:31:31", "remaining_time": "3:38:06"}
|
||||
{"current_steps": 1655, "total_steps": 4025, "loss": 0.2633, "lr": 2.9336972679066186e-05, "epoch": 2.878260869565217, "percentage": 41.12, "elapsed_time": "2:31:48", "remaining_time": "3:37:23"}
|
||||
{"current_steps": 1660, "total_steps": 4025, "loss": 0.2509, "lr": 2.926018088017804e-05, "epoch": 2.8869565217391306, "percentage": 41.24, "elapsed_time": "2:32:06", "remaining_time": "3:36:42"}
|
||||
{"current_steps": 1665, "total_steps": 4025, "loss": 0.2743, "lr": 2.9183214915982226e-05, "epoch": 2.8956521739130436, "percentage": 41.37, "elapsed_time": "2:32:23", "remaining_time": "3:36:00"}
|
||||
{"current_steps": 1670, "total_steps": 4025, "loss": 0.2626, "lr": 2.9106076234053154e-05, "epoch": 2.9043478260869566, "percentage": 41.49, "elapsed_time": "2:32:40", "remaining_time": "3:35:17"}
|
||||
{"current_steps": 1675, "total_steps": 4025, "loss": 0.2544, "lr": 2.9028766285213696e-05, "epoch": 2.9130434782608696, "percentage": 41.61, "elapsed_time": "2:33:02", "remaining_time": "3:34:42"}
|
||||
{"current_steps": 1680, "total_steps": 4025, "loss": 0.2584, "lr": 2.895128652350792e-05, "epoch": 2.9217391304347826, "percentage": 41.74, "elapsed_time": "2:33:27", "remaining_time": "3:34:12"}
|
||||
{"current_steps": 1685, "total_steps": 4025, "loss": 0.2624, "lr": 2.8873638406173713e-05, "epoch": 2.9304347826086956, "percentage": 41.86, "elapsed_time": "2:33:45", "remaining_time": "3:33:31"}
|
||||
{"current_steps": 1690, "total_steps": 4025, "loss": 0.2724, "lr": 2.8795823393615417e-05, "epoch": 2.9391304347826086, "percentage": 41.99, "elapsed_time": "2:34:03", "remaining_time": "3:32:51"}
|
||||
{"current_steps": 1695, "total_steps": 4025, "loss": 0.2347, "lr": 2.8717842949376326e-05, "epoch": 2.9478260869565216, "percentage": 42.11, "elapsed_time": "2:34:23", "remaining_time": "3:32:14"}
|
||||
{"current_steps": 1700, "total_steps": 4025, "loss": 0.236, "lr": 2.8639698540111153e-05, "epoch": 2.9565217391304346, "percentage": 42.24, "elapsed_time": "2:34:43", "remaining_time": "3:31:36"}
|
||||
{"current_steps": 1705, "total_steps": 4025, "loss": 0.2876, "lr": 2.8561391635558483e-05, "epoch": 2.965217391304348, "percentage": 42.36, "elapsed_time": "2:35:04", "remaining_time": "3:31:00"}
|
||||
{"current_steps": 1710, "total_steps": 4025, "loss": 0.2383, "lr": 2.8482923708513107e-05, "epoch": 2.973913043478261, "percentage": 42.48, "elapsed_time": "2:35:21", "remaining_time": "3:30:20"}
|
||||
{"current_steps": 1715, "total_steps": 4025, "loss": 0.2683, "lr": 2.840429623479832e-05, "epoch": 2.982608695652174, "percentage": 42.61, "elapsed_time": "2:35:41", "remaining_time": "3:29:42"}
|
||||
{"current_steps": 1720, "total_steps": 4025, "loss": 0.2418, "lr": 2.832551069323815e-05, "epoch": 2.991304347826087, "percentage": 42.73, "elapsed_time": "2:36:01", "remaining_time": "3:29:05"}
|
||||
{"current_steps": 1725, "total_steps": 4025, "loss": 0.2736, "lr": 2.8246568565629606e-05, "epoch": 3.0, "percentage": 42.86, "elapsed_time": "2:36:24", "remaining_time": "3:28:32"}
|
||||
{"current_steps": 1730, "total_steps": 4025, "loss": 0.2512, "lr": 2.8167471336714717e-05, "epoch": 3.008695652173913, "percentage": 42.98, "elapsed_time": "2:36:47", "remaining_time": "3:27:59"}
|
||||
{"current_steps": 1735, "total_steps": 4025, "loss": 0.2369, "lr": 2.8088220494152687e-05, "epoch": 3.017391304347826, "percentage": 43.11, "elapsed_time": "2:37:08", "remaining_time": "3:27:24"}
|
||||
{"current_steps": 1740, "total_steps": 4025, "loss": 0.2411, "lr": 2.8008817528491867e-05, "epoch": 3.026086956521739, "percentage": 43.23, "elapsed_time": "2:37:31", "remaining_time": "3:26:51"}
|
||||
{"current_steps": 1745, "total_steps": 4025, "loss": 0.2405, "lr": 2.792926393314174e-05, "epoch": 3.034782608695652, "percentage": 43.35, "elapsed_time": "2:37:54", "remaining_time": "3:26:19"}
|
||||
{"current_steps": 1750, "total_steps": 4025, "loss": 0.2349, "lr": 2.7849561204344827e-05, "epoch": 3.0434782608695654, "percentage": 43.48, "elapsed_time": "2:38:15", "remaining_time": "3:25:44"}
|
||||
{"current_steps": 1755, "total_steps": 4025, "loss": 0.2415, "lr": 2.7769710841148562e-05, "epoch": 3.0521739130434784, "percentage": 43.6, "elapsed_time": "2:38:36", "remaining_time": "3:25:08"}
|
||||
{"current_steps": 1760, "total_steps": 4025, "loss": 0.2275, "lr": 2.7689714345377063e-05, "epoch": 3.0608695652173914, "percentage": 43.73, "elapsed_time": "2:38:59", "remaining_time": "3:24:37"}
|
||||
{"current_steps": 1765, "total_steps": 4025, "loss": 0.2281, "lr": 2.760957322160291e-05, "epoch": 3.0695652173913044, "percentage": 43.85, "elapsed_time": "2:39:24", "remaining_time": "3:24:07"}
|
||||
{"current_steps": 1770, "total_steps": 4025, "loss": 0.2344, "lr": 2.7529288977118866e-05, "epoch": 3.0782608695652174, "percentage": 43.98, "elapsed_time": "2:39:45", "remaining_time": "3:23:32"}
|
||||
{"current_steps": 1775, "total_steps": 4025, "loss": 0.2399, "lr": 2.744886312190948e-05, "epoch": 3.0869565217391304, "percentage": 44.1, "elapsed_time": "2:40:08", "remaining_time": "3:22:59"}
|
||||
{"current_steps": 1780, "total_steps": 4025, "loss": 0.2237, "lr": 2.736829716862273e-05, "epoch": 3.0956521739130434, "percentage": 44.22, "elapsed_time": "2:40:30", "remaining_time": "3:22:26"}
|
||||
{"current_steps": 1785, "total_steps": 4025, "loss": 0.2266, "lr": 2.728759263254154e-05, "epoch": 3.1043478260869564, "percentage": 44.35, "elapsed_time": "2:40:55", "remaining_time": "3:21:56"}
|
||||
{"current_steps": 1790, "total_steps": 4025, "loss": 0.2281, "lr": 2.720675103155531e-05, "epoch": 3.1130434782608694, "percentage": 44.47, "elapsed_time": "2:41:15", "remaining_time": "3:21:21"}
|
||||
{"current_steps": 1795, "total_steps": 4025, "loss": 0.242, "lr": 2.7125773886131348e-05, "epoch": 3.121739130434783, "percentage": 44.6, "elapsed_time": "2:41:39", "remaining_time": "3:20:49"}
|
||||
{"current_steps": 1800, "total_steps": 4025, "loss": 0.2245, "lr": 2.7044662719286282e-05, "epoch": 3.130434782608696, "percentage": 44.72, "elapsed_time": "2:41:59", "remaining_time": "3:20:14"}
|
||||
{"current_steps": 1805, "total_steps": 4025, "loss": 0.3359, "lr": 2.6963419056557412e-05, "epoch": 3.139130434782609, "percentage": 44.84, "elapsed_time": "2:42:20", "remaining_time": "3:19:39"}
|
||||
{"current_steps": 1810, "total_steps": 4025, "loss": 0.3858, "lr": 2.6882044425974016e-05, "epoch": 3.1478260869565218, "percentage": 44.97, "elapsed_time": "2:42:36", "remaining_time": "3:18:59"}
|
||||
{"current_steps": 1815, "total_steps": 4025, "loss": 0.3914, "lr": 2.680054035802861e-05, "epoch": 3.1565217391304348, "percentage": 45.09, "elapsed_time": "2:43:02", "remaining_time": "3:18:31"}
|
||||
{"current_steps": 1820, "total_steps": 4025, "loss": 0.3862, "lr": 2.6718908385648176e-05, "epoch": 3.1652173913043478, "percentage": 45.22, "elapsed_time": "2:43:23", "remaining_time": "3:17:57"}
|
||||
{"current_steps": 1825, "total_steps": 4025, "loss": 0.404, "lr": 2.6637150044165307e-05, "epoch": 3.1739130434782608, "percentage": 45.34, "elapsed_time": "2:43:48", "remaining_time": "3:17:27"}
|
||||
{"current_steps": 1830, "total_steps": 4025, "loss": 0.3785, "lr": 2.6555266871289362e-05, "epoch": 3.1826086956521737, "percentage": 45.47, "elapsed_time": "2:44:11", "remaining_time": "3:16:56"}
|
||||
{"current_steps": 1835, "total_steps": 4025, "loss": 0.3811, "lr": 2.64732604070775e-05, "epoch": 3.1913043478260867, "percentage": 45.59, "elapsed_time": "2:44:34", "remaining_time": "3:16:24"}
|
||||
{"current_steps": 1840, "total_steps": 4025, "loss": 0.36, "lr": 2.6391132193905767e-05, "epoch": 3.2, "percentage": 45.71, "elapsed_time": "2:44:54", "remaining_time": "3:15:49"}
|
||||
{"current_steps": 1845, "total_steps": 4025, "loss": 0.3799, "lr": 2.6308883776440044e-05, "epoch": 3.208695652173913, "percentage": 45.84, "elapsed_time": "2:45:13", "remaining_time": "3:15:13"}
|
||||
{"current_steps": 1850, "total_steps": 4025, "loss": 0.3903, "lr": 2.6226516701607027e-05, "epoch": 3.217391304347826, "percentage": 45.96, "elapsed_time": "2:45:38", "remaining_time": "3:14:44"}
|
||||
{"current_steps": 1855, "total_steps": 4025, "loss": 0.3774, "lr": 2.61440325185651e-05, "epoch": 3.226086956521739, "percentage": 46.09, "elapsed_time": "2:45:58", "remaining_time": "3:14:09"}
|
||||
{"current_steps": 1860, "total_steps": 4025, "loss": 0.3653, "lr": 2.606143277867523e-05, "epoch": 3.234782608695652, "percentage": 46.21, "elapsed_time": "2:46:24", "remaining_time": "3:13:41"}
|
||||
{"current_steps": 1865, "total_steps": 4025, "loss": 0.3692, "lr": 2.5978719035471766e-05, "epoch": 3.243478260869565, "percentage": 46.34, "elapsed_time": "2:46:47", "remaining_time": "3:13:10"}
|
||||
{"current_steps": 1870, "total_steps": 4025, "loss": 0.3512, "lr": 2.5895892844633234e-05, "epoch": 3.252173913043478, "percentage": 46.46, "elapsed_time": "2:47:06", "remaining_time": "3:12:34"}
|
||||
{"current_steps": 1875, "total_steps": 4025, "loss": 0.3722, "lr": 2.5812955763953074e-05, "epoch": 3.260869565217391, "percentage": 46.58, "elapsed_time": "2:47:27", "remaining_time": "3:12:01"}
|
||||
{"current_steps": 1880, "total_steps": 4025, "loss": 0.356, "lr": 2.572990935331034e-05, "epoch": 3.269565217391304, "percentage": 46.71, "elapsed_time": "2:47:50", "remaining_time": "3:11:29"}
|
||||
{"current_steps": 1885, "total_steps": 4025, "loss": 0.3471, "lr": 2.564675517464035e-05, "epoch": 3.2782608695652176, "percentage": 46.83, "elapsed_time": "2:48:29", "remaining_time": "3:11:16"}
|
||||
{"current_steps": 1890, "total_steps": 4025, "loss": 0.3471, "lr": 2.556349479190534e-05, "epoch": 3.2869565217391306, "percentage": 46.96, "elapsed_time": "2:49:04", "remaining_time": "3:10:59"}
|
||||
{"current_steps": 1895, "total_steps": 4025, "loss": 0.3411, "lr": 2.5480129771065025e-05, "epoch": 3.2956521739130435, "percentage": 47.08, "elapsed_time": "2:49:37", "remaining_time": "3:10:39"}
|
||||
{"current_steps": 1900, "total_steps": 4025, "loss": 0.3414, "lr": 2.5396661680047138e-05, "epoch": 3.3043478260869565, "percentage": 47.2, "elapsed_time": "2:50:10", "remaining_time": "3:10:20"}
|
||||
{"current_steps": 1905, "total_steps": 4025, "loss": 0.3753, "lr": 2.5313092088717965e-05, "epoch": 3.3130434782608695, "percentage": 47.33, "elapsed_time": "2:50:42", "remaining_time": "3:09:58"}
|
||||
{"current_steps": 1910, "total_steps": 4025, "loss": 0.3563, "lr": 2.5229422568852812e-05, "epoch": 3.3217391304347825, "percentage": 47.45, "elapsed_time": "2:51:21", "remaining_time": "3:09:45"}
|
||||
{"current_steps": 1915, "total_steps": 4025, "loss": 0.337, "lr": 2.514565469410643e-05, "epoch": 3.3304347826086955, "percentage": 47.58, "elapsed_time": "2:51:55", "remaining_time": "3:09:25"}
|
||||
{"current_steps": 1920, "total_steps": 4025, "loss": 0.3783, "lr": 2.506179003998343e-05, "epoch": 3.3391304347826085, "percentage": 47.7, "elapsed_time": "2:52:20", "remaining_time": "3:08:56"}
|
||||
{"current_steps": 1925, "total_steps": 4025, "loss": 0.3459, "lr": 2.4977830183808642e-05, "epoch": 3.3478260869565215, "percentage": 47.83, "elapsed_time": "2:52:58", "remaining_time": "3:08:41"}
|
||||
{"current_steps": 1930, "total_steps": 4025, "loss": 0.3367, "lr": 2.4893776704697476e-05, "epoch": 3.356521739130435, "percentage": 47.95, "elapsed_time": "2:53:27", "remaining_time": "3:08:17"}
|
||||
{"current_steps": 1935, "total_steps": 4025, "loss": 0.3439, "lr": 2.4809631183526173e-05, "epoch": 3.365217391304348, "percentage": 48.07, "elapsed_time": "2:53:57", "remaining_time": "3:07:53"}
|
||||
{"current_steps": 1940, "total_steps": 4025, "loss": 0.3444, "lr": 2.472539520290211e-05, "epoch": 3.373913043478261, "percentage": 48.2, "elapsed_time": "2:54:26", "remaining_time": "3:07:28"}
|
||||
{"current_steps": 1945, "total_steps": 4025, "loss": 0.3468, "lr": 2.4641070347134032e-05, "epoch": 3.382608695652174, "percentage": 48.32, "elapsed_time": "2:55:00", "remaining_time": "3:07:09"}
|
||||
{"current_steps": 1950, "total_steps": 4025, "loss": 0.3559, "lr": 2.455665820220223e-05, "epoch": 3.391304347826087, "percentage": 48.45, "elapsed_time": "2:55:33", "remaining_time": "3:06:49"}
|
||||
{"current_steps": 1955, "total_steps": 4025, "loss": 0.3379, "lr": 2.4472160355728745e-05, "epoch": 3.4, "percentage": 48.57, "elapsed_time": "2:56:06", "remaining_time": "3:06:28"}
|
||||
{"current_steps": 1960, "total_steps": 4025, "loss": 0.2776, "lr": 2.4387578396947486e-05, "epoch": 3.408695652173913, "percentage": 48.7, "elapsed_time": "2:56:36", "remaining_time": "3:06:04"}
|
||||
{"current_steps": 1965, "total_steps": 4025, "loss": 0.2377, "lr": 2.4302913916674325e-05, "epoch": 3.417391304347826, "percentage": 48.82, "elapsed_time": "2:57:07", "remaining_time": "3:05:40"}
|
||||
{"current_steps": 1970, "total_steps": 4025, "loss": 0.2243, "lr": 2.4218168507277235e-05, "epoch": 3.426086956521739, "percentage": 48.94, "elapsed_time": "2:57:40", "remaining_time": "3:05:20"}
|
||||
{"current_steps": 1975, "total_steps": 4025, "loss": 0.2309, "lr": 2.4133343762646262e-05, "epoch": 3.4347826086956523, "percentage": 49.07, "elapsed_time": "2:58:08", "remaining_time": "3:04:54"}
|
||||
{"current_steps": 1980, "total_steps": 4025, "loss": 0.2171, "lr": 2.4048441278163615e-05, "epoch": 3.4434782608695653, "percentage": 49.19, "elapsed_time": "2:58:38", "remaining_time": "3:04:29"}
|
||||
{"current_steps": 1985, "total_steps": 4025, "loss": 0.2405, "lr": 2.3963462650673618e-05, "epoch": 3.4521739130434783, "percentage": 49.32, "elapsed_time": "2:59:07", "remaining_time": "3:04:05"}
|
||||
{"current_steps": 1990, "total_steps": 4025, "loss": 0.2279, "lr": 2.3878409478452706e-05, "epoch": 3.4608695652173913, "percentage": 49.44, "elapsed_time": "2:59:40", "remaining_time": "3:03:44"}
|
||||
{"current_steps": 1995, "total_steps": 4025, "loss": 0.2225, "lr": 2.3793283361179333e-05, "epoch": 3.4695652173913043, "percentage": 49.57, "elapsed_time": "3:00:09", "remaining_time": "3:03:19"}
|
||||
{"current_steps": 2000, "total_steps": 4025, "loss": 0.2222, "lr": 2.3708085899903917e-05, "epoch": 3.4782608695652173, "percentage": 49.69, "elapsed_time": "3:00:40", "remaining_time": "3:02:55"}
|
||||
{"current_steps": 2005, "total_steps": 4025, "loss": 0.233, "lr": 2.3622818697018695e-05, "epoch": 3.4869565217391303, "percentage": 49.81, "elapsed_time": "3:01:09", "remaining_time": "3:02:31"}
|
||||
{"current_steps": 2010, "total_steps": 4025, "loss": 0.2399, "lr": 2.3537483356227627e-05, "epoch": 3.4956521739130437, "percentage": 49.94, "elapsed_time": "3:01:42", "remaining_time": "3:02:09"}
|
||||
{"current_steps": 2015, "total_steps": 4025, "loss": 0.2232, "lr": 2.3452081482516188e-05, "epoch": 3.5043478260869563, "percentage": 50.06, "elapsed_time": "3:02:15", "remaining_time": "3:01:48"}
|
||||
{"current_steps": 2020, "total_steps": 4025, "loss": 0.2118, "lr": 2.3366614682121215e-05, "epoch": 3.5130434782608697, "percentage": 50.19, "elapsed_time": "3:02:46", "remaining_time": "3:01:25"}
|
||||
{"current_steps": 2025, "total_steps": 4025, "loss": 0.2343, "lr": 2.328108456250067e-05, "epoch": 3.5217391304347827, "percentage": 50.31, "elapsed_time": "3:03:23", "remaining_time": "3:01:07"}
|
||||
{"current_steps": 2030, "total_steps": 4025, "loss": 0.2198, "lr": 2.319549273230345e-05, "epoch": 3.5304347826086957, "percentage": 50.43, "elapsed_time": "3:03:57", "remaining_time": "3:00:46"}
|
||||
{"current_steps": 2035, "total_steps": 4025, "loss": 0.2137, "lr": 2.3109840801339077e-05, "epoch": 3.5391304347826087, "percentage": 50.56, "elapsed_time": "3:04:24", "remaining_time": "3:00:19"}
|
||||
{"current_steps": 2040, "total_steps": 4025, "loss": 0.1958, "lr": 2.3024130380547455e-05, "epoch": 3.5478260869565217, "percentage": 50.68, "elapsed_time": "3:04:49", "remaining_time": "2:59:50"}
|
||||
{"current_steps": 2045, "total_steps": 4025, "loss": 0.2009, "lr": 2.2938363081968563e-05, "epoch": 3.5565217391304347, "percentage": 50.81, "elapsed_time": "3:05:14", "remaining_time": "2:59:21"}
|
||||
{"current_steps": 2050, "total_steps": 4025, "loss": 0.1941, "lr": 2.2852540518712143e-05, "epoch": 3.5652173913043477, "percentage": 50.93, "elapsed_time": "3:05:37", "remaining_time": "2:58:49"}
|
||||
{"current_steps": 2055, "total_steps": 4025, "loss": 0.1886, "lr": 2.2766664304927346e-05, "epoch": 3.573913043478261, "percentage": 51.06, "elapsed_time": "3:06:00", "remaining_time": "2:58:18"}
|
||||
{"current_steps": 2060, "total_steps": 4025, "loss": 0.1925, "lr": 2.268073605577239e-05, "epoch": 3.5826086956521737, "percentage": 51.18, "elapsed_time": "3:06:24", "remaining_time": "2:57:48"}
|
||||
{"current_steps": 2065, "total_steps": 4025, "loss": 0.2042, "lr": 2.2594757387384157e-05, "epoch": 3.591304347826087, "percentage": 51.3, "elapsed_time": "3:06:47", "remaining_time": "2:57:17"}
|
||||
{"current_steps": 2070, "total_steps": 4025, "loss": 0.2025, "lr": 2.2508729916847835e-05, "epoch": 3.6, "percentage": 51.43, "elapsed_time": "3:07:14", "remaining_time": "2:56:50"}
|
||||
{"current_steps": 2075, "total_steps": 4025, "loss": 0.2049, "lr": 2.2422655262166467e-05, "epoch": 3.608695652173913, "percentage": 51.55, "elapsed_time": "3:07:41", "remaining_time": "2:56:22"}
|
||||
{"current_steps": 2080, "total_steps": 4025, "loss": 0.2005, "lr": 2.2336535042230533e-05, "epoch": 3.617391304347826, "percentage": 51.68, "elapsed_time": "3:08:08", "remaining_time": "2:55:55"}
|
||||
{"current_steps": 2085, "total_steps": 4025, "loss": 0.2058, "lr": 2.2250370876787505e-05, "epoch": 3.626086956521739, "percentage": 51.8, "elapsed_time": "3:08:34", "remaining_time": "2:55:27"}
|
||||
{"current_steps": 2090, "total_steps": 4025, "loss": 0.1952, "lr": 2.2164164386411398e-05, "epoch": 3.634782608695652, "percentage": 51.93, "elapsed_time": "3:09:00", "remaining_time": "2:54:59"}
|
||||
{"current_steps": 2095, "total_steps": 4025, "loss": 0.2015, "lr": 2.207791719247225e-05, "epoch": 3.643478260869565, "percentage": 52.05, "elapsed_time": "3:09:29", "remaining_time": "2:54:33"}
|
||||
{"current_steps": 2100, "total_steps": 4025, "loss": 0.1884, "lr": 2.1991630917105666e-05, "epoch": 3.6521739130434785, "percentage": 52.17, "elapsed_time": "3:09:52", "remaining_time": "2:54:02"}
|
||||
{"current_steps": 2105, "total_steps": 4025, "loss": 0.1842, "lr": 2.190530718318228e-05, "epoch": 3.660869565217391, "percentage": 52.3, "elapsed_time": "3:10:18", "remaining_time": "2:53:35"}
|
||||
{"current_steps": 2110, "total_steps": 4025, "loss": 0.2506, "lr": 2.1818947614277266e-05, "epoch": 3.6695652173913045, "percentage": 52.42, "elapsed_time": "3:10:52", "remaining_time": "2:53:14"}
|
||||
{"current_steps": 2115, "total_steps": 4025, "loss": 0.2859, "lr": 2.173255383463977e-05, "epoch": 3.6782608695652175, "percentage": 52.55, "elapsed_time": "3:11:33", "remaining_time": "2:52:59"}
|
||||
{"current_steps": 2120, "total_steps": 4025, "loss": 0.2874, "lr": 2.164612746916236e-05, "epoch": 3.6869565217391305, "percentage": 52.67, "elapsed_time": "3:12:17", "remaining_time": "2:52:47"}
|
||||
{"current_steps": 2125, "total_steps": 4025, "loss": 0.2787, "lr": 2.15596701433505e-05, "epoch": 3.6956521739130435, "percentage": 52.8, "elapsed_time": "3:13:01", "remaining_time": "2:52:35"}
|
||||
{"current_steps": 2130, "total_steps": 4025, "loss": 0.2689, "lr": 2.1473183483291948e-05, "epoch": 3.7043478260869565, "percentage": 52.92, "elapsed_time": "3:13:46", "remaining_time": "2:52:23"}
|
||||
{"current_steps": 2135, "total_steps": 4025, "loss": 0.2745, "lr": 2.138666911562618e-05, "epoch": 3.7130434782608694, "percentage": 53.04, "elapsed_time": "3:14:29", "remaining_time": "2:52:09"}
|
||||
{"current_steps": 2140, "total_steps": 4025, "loss": 0.2867, "lr": 2.1300128667513783e-05, "epoch": 3.7217391304347824, "percentage": 53.17, "elapsed_time": "3:15:10", "remaining_time": "2:51:55"}
|
||||
{"current_steps": 2145, "total_steps": 4025, "loss": 0.2836, "lr": 2.1213563766605883e-05, "epoch": 3.730434782608696, "percentage": 53.29, "elapsed_time": "3:15:55", "remaining_time": "2:51:42"}
|
||||
{"current_steps": 2150, "total_steps": 4025, "loss": 0.322, "lr": 2.112697604101351e-05, "epoch": 3.7391304347826084, "percentage": 53.42, "elapsed_time": "3:16:30", "remaining_time": "2:51:22"}
|
||||
{"current_steps": 2155, "total_steps": 4025, "loss": 0.3737, "lr": 2.1040367119276973e-05, "epoch": 3.747826086956522, "percentage": 53.54, "elapsed_time": "3:17:00", "remaining_time": "2:50:57"}
|
||||
{"current_steps": 2160, "total_steps": 4025, "loss": 0.3557, "lr": 2.0953738630335234e-05, "epoch": 3.756521739130435, "percentage": 53.66, "elapsed_time": "3:17:28", "remaining_time": "2:50:29"}
|
||||
{"current_steps": 2165, "total_steps": 4025, "loss": 0.3422, "lr": 2.08670922034953e-05, "epoch": 3.765217391304348, "percentage": 53.79, "elapsed_time": "3:17:53", "remaining_time": "2:50:00"}
|
||||
{"current_steps": 2170, "total_steps": 4025, "loss": 0.3508, "lr": 2.0780429468401516e-05, "epoch": 3.773913043478261, "percentage": 53.91, "elapsed_time": "3:18:19", "remaining_time": "2:49:32"}
|
||||
{"current_steps": 2175, "total_steps": 4025, "loss": 0.3486, "lr": 2.0693752055004995e-05, "epoch": 3.782608695652174, "percentage": 54.04, "elapsed_time": "3:18:47", "remaining_time": "2:49:05"}
|
||||
{"current_steps": 2180, "total_steps": 4025, "loss": 0.368, "lr": 2.0607061593532877e-05, "epoch": 3.791304347826087, "percentage": 54.16, "elapsed_time": "3:19:14", "remaining_time": "2:48:37"}
|
||||
{"current_steps": 2185, "total_steps": 4025, "loss": 0.342, "lr": 2.0520359714457734e-05, "epoch": 3.8, "percentage": 54.29, "elapsed_time": "3:19:41", "remaining_time": "2:48:09"}
|
||||
{"current_steps": 2190, "total_steps": 4025, "loss": 0.3407, "lr": 2.043364804846688e-05, "epoch": 3.8086956521739133, "percentage": 54.41, "elapsed_time": "3:20:06", "remaining_time": "2:47:40"}
|
||||
{"current_steps": 2195, "total_steps": 4025, "loss": 0.3542, "lr": 2.0346928226431693e-05, "epoch": 3.8173913043478263, "percentage": 54.53, "elapsed_time": "3:20:34", "remaining_time": "2:47:13"}
|
||||
{"current_steps": 2200, "total_steps": 4025, "loss": 0.3458, "lr": 2.0260201879376957e-05, "epoch": 3.8260869565217392, "percentage": 54.66, "elapsed_time": "3:21:02", "remaining_time": "2:46:46"}
|
||||
{"current_steps": 2205, "total_steps": 4025, "loss": 0.347, "lr": 2.017347063845017e-05, "epoch": 3.8347826086956522, "percentage": 54.78, "elapsed_time": "3:21:29", "remaining_time": "2:46:18"}
|
||||
{"current_steps": 2210, "total_steps": 4025, "loss": 0.3368, "lr": 2.008673613489088e-05, "epoch": 3.8434782608695652, "percentage": 54.91, "elapsed_time": "3:21:59", "remaining_time": "2:45:53"}
|
||||
{"current_steps": 2215, "total_steps": 4025, "loss": 0.3518, "lr": 2e-05, "epoch": 3.8521739130434782, "percentage": 55.03, "elapsed_time": "3:22:30", "remaining_time": "2:45:28"}
|
||||
{"current_steps": 2220, "total_steps": 4025, "loss": 0.3128, "lr": 1.991326386510912e-05, "epoch": 3.860869565217391, "percentage": 55.16, "elapsed_time": "3:23:01", "remaining_time": "2:45:04"}
|
||||
{"current_steps": 2225, "total_steps": 4025, "loss": 0.3057, "lr": 1.9826529361549837e-05, "epoch": 3.869565217391304, "percentage": 55.28, "elapsed_time": "3:23:27", "remaining_time": "2:44:36"}
|
||||
{"current_steps": 2230, "total_steps": 4025, "loss": 0.2286, "lr": 1.973979812062305e-05, "epoch": 3.878260869565217, "percentage": 55.4, "elapsed_time": "3:23:44", "remaining_time": "2:43:59"}
|
||||
{"current_steps": 2235, "total_steps": 4025, "loss": 0.2149, "lr": 1.9653071773568317e-05, "epoch": 3.8869565217391306, "percentage": 55.53, "elapsed_time": "3:24:02", "remaining_time": "2:43:25"}
|
||||
{"current_steps": 2240, "total_steps": 4025, "loss": 0.2412, "lr": 1.9566351951533122e-05, "epoch": 3.8956521739130436, "percentage": 55.65, "elapsed_time": "3:24:19", "remaining_time": "2:42:49"}
|
||||
{"current_steps": 2245, "total_steps": 4025, "loss": 0.2277, "lr": 1.947964028554227e-05, "epoch": 3.9043478260869566, "percentage": 55.78, "elapsed_time": "3:24:36", "remaining_time": "2:42:13"}
|
||||
{"current_steps": 2250, "total_steps": 4025, "loss": 0.2218, "lr": 1.939293840646713e-05, "epoch": 3.9130434782608696, "percentage": 55.9, "elapsed_time": "3:24:58", "remaining_time": "2:41:42"}
|
||||
{"current_steps": 2255, "total_steps": 4025, "loss": 0.2266, "lr": 1.9306247944995015e-05, "epoch": 3.9217391304347826, "percentage": 56.02, "elapsed_time": "3:25:23", "remaining_time": "2:41:13"}
|
||||
{"current_steps": 2260, "total_steps": 4025, "loss": 0.2287, "lr": 1.9219570531598487e-05, "epoch": 3.9304347826086956, "percentage": 56.15, "elapsed_time": "3:25:41", "remaining_time": "2:40:38"}
|
||||
{"current_steps": 2265, "total_steps": 4025, "loss": 0.2343, "lr": 1.9132907796504708e-05, "epoch": 3.9391304347826086, "percentage": 56.27, "elapsed_time": "3:25:59", "remaining_time": "2:40:04"}
|
||||
{"current_steps": 2270, "total_steps": 4025, "loss": 0.2041, "lr": 1.9046261369664773e-05, "epoch": 3.9478260869565216, "percentage": 56.4, "elapsed_time": "3:26:19", "remaining_time": "2:39:31"}
|
||||
{"current_steps": 2275, "total_steps": 4025, "loss": 0.2032, "lr": 1.895963288072304e-05, "epoch": 3.9565217391304346, "percentage": 56.52, "elapsed_time": "3:26:39", "remaining_time": "2:38:58"}
|
||||
{"current_steps": 2280, "total_steps": 4025, "loss": 0.2574, "lr": 1.8873023958986498e-05, "epoch": 3.965217391304348, "percentage": 56.65, "elapsed_time": "3:27:00", "remaining_time": "2:38:25"}
|
||||
{"current_steps": 2285, "total_steps": 4025, "loss": 0.2052, "lr": 1.878643623339412e-05, "epoch": 3.973913043478261, "percentage": 56.77, "elapsed_time": "3:27:18", "remaining_time": "2:37:51"}
|
||||
{"current_steps": 2290, "total_steps": 4025, "loss": 0.2381, "lr": 1.8699871332486223e-05, "epoch": 3.982608695652174, "percentage": 56.89, "elapsed_time": "3:27:37", "remaining_time": "2:37:18"}
|
||||
{"current_steps": 2295, "total_steps": 4025, "loss": 0.2118, "lr": 1.861333088437383e-05, "epoch": 3.991304347826087, "percentage": 57.02, "elapsed_time": "3:27:57", "remaining_time": "2:36:45"}
|
||||
{"current_steps": 2300, "total_steps": 4025, "loss": 0.2421, "lr": 1.8526816516708056e-05, "epoch": 4.0, "percentage": 57.14, "elapsed_time": "3:28:20", "remaining_time": "2:36:15"}
|
||||
{"current_steps": 2305, "total_steps": 4025, "loss": 0.228, "lr": 1.8440329856649505e-05, "epoch": 4.008695652173913, "percentage": 57.27, "elapsed_time": "3:28:43", "remaining_time": "2:35:45"}
|
||||
{"current_steps": 2310, "total_steps": 4025, "loss": 0.2155, "lr": 1.835387253083765e-05, "epoch": 4.017391304347826, "percentage": 57.39, "elapsed_time": "3:29:04", "remaining_time": "2:35:13"}
|
||||
{"current_steps": 2315, "total_steps": 4025, "loss": 0.2197, "lr": 1.8267446165360242e-05, "epoch": 4.026086956521739, "percentage": 57.52, "elapsed_time": "3:29:27", "remaining_time": "2:34:43"}
|
||||
{"current_steps": 2320, "total_steps": 4025, "loss": 0.2185, "lr": 1.8181052385722734e-05, "epoch": 4.034782608695652, "percentage": 57.64, "elapsed_time": "3:29:50", "remaining_time": "2:34:12"}
|
||||
{"current_steps": 2325, "total_steps": 4025, "loss": 0.2137, "lr": 1.809469281681772e-05, "epoch": 4.043478260869565, "percentage": 57.76, "elapsed_time": "3:30:11", "remaining_time": "2:33:41"}
|
||||
{"current_steps": 2330, "total_steps": 4025, "loss": 0.2202, "lr": 1.8008369082894338e-05, "epoch": 4.052173913043478, "percentage": 57.89, "elapsed_time": "3:30:31", "remaining_time": "2:33:09"}
|
||||
{"current_steps": 2335, "total_steps": 4025, "loss": 0.2069, "lr": 1.7922082807527754e-05, "epoch": 4.060869565217391, "percentage": 58.01, "elapsed_time": "3:30:55", "remaining_time": "2:32:39"}
|
||||
{"current_steps": 2340, "total_steps": 4025, "loss": 0.2072, "lr": 1.783583561358861e-05, "epoch": 4.069565217391304, "percentage": 58.14, "elapsed_time": "3:31:20", "remaining_time": "2:32:10"}
|
||||
{"current_steps": 2345, "total_steps": 4025, "loss": 0.2138, "lr": 1.7749629123212502e-05, "epoch": 4.078260869565217, "percentage": 58.26, "elapsed_time": "3:31:41", "remaining_time": "2:31:39"}
|
||||
{"current_steps": 2350, "total_steps": 4025, "loss": 0.219, "lr": 1.7663464957769477e-05, "epoch": 4.086956521739131, "percentage": 58.39, "elapsed_time": "3:32:03", "remaining_time": "2:31:08"}
|
||||
{"current_steps": 2355, "total_steps": 4025, "loss": 0.2023, "lr": 1.7577344737833547e-05, "epoch": 4.095652173913043, "percentage": 58.51, "elapsed_time": "3:32:25", "remaining_time": "2:30:38"}
|
||||
{"current_steps": 2360, "total_steps": 4025, "loss": 0.2075, "lr": 1.7491270083152165e-05, "epoch": 4.104347826086957, "percentage": 58.63, "elapsed_time": "3:32:50", "remaining_time": "2:30:09"}
|
||||
{"current_steps": 2365, "total_steps": 4025, "loss": 0.207, "lr": 1.7405242612615843e-05, "epoch": 4.113043478260869, "percentage": 58.76, "elapsed_time": "3:33:10", "remaining_time": "2:29:37"}
|
||||
{"current_steps": 2370, "total_steps": 4025, "loss": 0.221, "lr": 1.7319263944227617e-05, "epoch": 4.121739130434783, "percentage": 58.88, "elapsed_time": "3:33:34", "remaining_time": "2:29:08"}
|
||||
{"current_steps": 2375, "total_steps": 4025, "loss": 0.2037, "lr": 1.723333569507266e-05, "epoch": 4.130434782608695, "percentage": 59.01, "elapsed_time": "3:33:54", "remaining_time": "2:28:36"}
|
||||
{"current_steps": 2380, "total_steps": 4025, "loss": 0.3034, "lr": 1.714745948128786e-05, "epoch": 4.139130434782609, "percentage": 59.13, "elapsed_time": "3:34:14", "remaining_time": "2:28:05"}
|
||||
{"current_steps": 2385, "total_steps": 4025, "loss": 0.3534, "lr": 1.7061636918031443e-05, "epoch": 4.147826086956521, "percentage": 59.25, "elapsed_time": "3:34:31", "remaining_time": "2:27:30"}
|
||||
{"current_steps": 2390, "total_steps": 4025, "loss": 0.3569, "lr": 1.6975869619452555e-05, "epoch": 4.156521739130435, "percentage": 59.38, "elapsed_time": "3:34:56", "remaining_time": "2:27:02"}
|
||||
{"current_steps": 2395, "total_steps": 4025, "loss": 0.3518, "lr": 1.6890159198660933e-05, "epoch": 4.165217391304348, "percentage": 59.5, "elapsed_time": "3:35:18", "remaining_time": "2:26:32"}
|
||||
{"current_steps": 2400, "total_steps": 4025, "loss": 0.3718, "lr": 1.680450726769655e-05, "epoch": 4.173913043478261, "percentage": 59.63, "elapsed_time": "3:35:42", "remaining_time": "2:26:03"}
|
||||
{"current_steps": 2405, "total_steps": 4025, "loss": 0.3412, "lr": 1.671891543749933e-05, "epoch": 4.182608695652174, "percentage": 59.75, "elapsed_time": "3:36:05", "remaining_time": "2:25:33"}
|
||||
{"current_steps": 2410, "total_steps": 4025, "loss": 0.3434, "lr": 1.663338531787879e-05, "epoch": 4.191304347826087, "percentage": 59.88, "elapsed_time": "3:36:28", "remaining_time": "2:25:04"}
|
||||
{"current_steps": 2415, "total_steps": 4025, "loss": 0.3236, "lr": 1.6547918517483816e-05, "epoch": 4.2, "percentage": 60.0, "elapsed_time": "3:36:49", "remaining_time": "2:24:32"}
|
||||
{"current_steps": 2420, "total_steps": 4025, "loss": 0.3397, "lr": 1.646251664377238e-05, "epoch": 4.208695652173913, "percentage": 60.12, "elapsed_time": "3:37:07", "remaining_time": "2:24:00"}
|
||||
{"current_steps": 2425, "total_steps": 4025, "loss": 0.351, "lr": 1.637718130298131e-05, "epoch": 4.217391304347826, "percentage": 60.25, "elapsed_time": "3:37:33", "remaining_time": "2:23:32"}
|
||||
{"current_steps": 2430, "total_steps": 4025, "loss": 0.3385, "lr": 1.6291914100096093e-05, "epoch": 4.226086956521739, "percentage": 60.37, "elapsed_time": "3:37:53", "remaining_time": "2:23:01"}
|
||||
{"current_steps": 2435, "total_steps": 4025, "loss": 0.3301, "lr": 1.6206716638820677e-05, "epoch": 4.234782608695652, "percentage": 60.5, "elapsed_time": "3:38:18", "remaining_time": "2:22:33"}
|
||||
{"current_steps": 2440, "total_steps": 4025, "loss": 0.3347, "lr": 1.6121590521547297e-05, "epoch": 4.243478260869566, "percentage": 60.62, "elapsed_time": "3:38:41", "remaining_time": "2:22:03"}
|
||||
{"current_steps": 2445, "total_steps": 4025, "loss": 0.3135, "lr": 1.6036537349326385e-05, "epoch": 4.252173913043478, "percentage": 60.75, "elapsed_time": "3:39:00", "remaining_time": "2:21:31"}
|
||||
{"current_steps": 2450, "total_steps": 4025, "loss": 0.3372, "lr": 1.5951558721836392e-05, "epoch": 4.260869565217392, "percentage": 60.87, "elapsed_time": "3:39:22", "remaining_time": "2:21:01"}
|
||||
{"current_steps": 2455, "total_steps": 4025, "loss": 0.3268, "lr": 1.5866656237353744e-05, "epoch": 4.269565217391304, "percentage": 60.99, "elapsed_time": "3:39:44", "remaining_time": "2:20:31"}
|
||||
{"current_steps": 2460, "total_steps": 4025, "loss": 0.3213, "lr": 1.5781831492722775e-05, "epoch": 4.278260869565218, "percentage": 61.12, "elapsed_time": "3:40:23", "remaining_time": "2:20:12"}
|
||||
{"current_steps": 2465, "total_steps": 4025, "loss": 0.3164, "lr": 1.5697086083325678e-05, "epoch": 4.28695652173913, "percentage": 61.24, "elapsed_time": "3:40:58", "remaining_time": "2:19:50"}
|
||||
{"current_steps": 2470, "total_steps": 4025, "loss": 0.3167, "lr": 1.5612421603052524e-05, "epoch": 4.2956521739130435, "percentage": 61.37, "elapsed_time": "3:41:32", "remaining_time": "2:19:28"}
|
||||
{"current_steps": 2475, "total_steps": 4025, "loss": 0.3125, "lr": 1.5527839644271258e-05, "epoch": 4.304347826086957, "percentage": 61.49, "elapsed_time": "3:42:05", "remaining_time": "2:19:05"}
|
||||
{"current_steps": 2480, "total_steps": 4025, "loss": 0.3481, "lr": 1.5443341797797772e-05, "epoch": 4.3130434782608695, "percentage": 61.61, "elapsed_time": "3:42:37", "remaining_time": "2:18:41"}
|
||||
{"current_steps": 2485, "total_steps": 4025, "loss": 0.3294, "lr": 1.5358929652865974e-05, "epoch": 4.321739130434783, "percentage": 61.74, "elapsed_time": "3:43:16", "remaining_time": "2:18:21"}
|
||||
{"current_steps": 2490, "total_steps": 4025, "loss": 0.3074, "lr": 1.5274604797097897e-05, "epoch": 4.3304347826086955, "percentage": 61.86, "elapsed_time": "3:43:49", "remaining_time": "2:17:58"}
|
||||
{"current_steps": 2495, "total_steps": 4025, "loss": 0.3475, "lr": 1.5190368816473833e-05, "epoch": 4.339130434782609, "percentage": 61.99, "elapsed_time": "3:44:15", "remaining_time": "2:17:31"}
|
||||
{"current_steps": 2500, "total_steps": 4025, "loss": 0.3175, "lr": 1.5106223295302532e-05, "epoch": 4.3478260869565215, "percentage": 62.11, "elapsed_time": "3:44:53", "remaining_time": "2:17:10"}
|
||||
{"current_steps": 2505, "total_steps": 4025, "loss": 0.3075, "lr": 1.5022169816191361e-05, "epoch": 4.356521739130435, "percentage": 62.24, "elapsed_time": "3:45:22", "remaining_time": "2:16:45"}
|
||||
{"current_steps": 2510, "total_steps": 4025, "loss": 0.3157, "lr": 1.4938209960016582e-05, "epoch": 4.3652173913043475, "percentage": 62.36, "elapsed_time": "3:45:51", "remaining_time": "2:16:19"}
|
||||
{"current_steps": 2515, "total_steps": 4025, "loss": 0.3166, "lr": 1.485434530589358e-05, "epoch": 4.373913043478261, "percentage": 62.48, "elapsed_time": "3:46:21", "remaining_time": "2:15:54"}
|
||||
{"current_steps": 2520, "total_steps": 4025, "loss": 0.3182, "lr": 1.4770577431147191e-05, "epoch": 4.3826086956521735, "percentage": 62.61, "elapsed_time": "3:46:55", "remaining_time": "2:15:31"}
|
||||
{"current_steps": 2525, "total_steps": 4025, "loss": 0.3305, "lr": 1.4686907911282035e-05, "epoch": 4.391304347826087, "percentage": 62.73, "elapsed_time": "3:47:28", "remaining_time": "2:15:07"}
|
||||
{"current_steps": 2530, "total_steps": 4025, "loss": 0.313, "lr": 1.4603338319952867e-05, "epoch": 4.4, "percentage": 62.86, "elapsed_time": "3:48:01", "remaining_time": "2:14:44"}
|
||||
{"current_steps": 2535, "total_steps": 4025, "loss": 0.2576, "lr": 1.451987022893498e-05, "epoch": 4.408695652173913, "percentage": 62.98, "elapsed_time": "3:48:31", "remaining_time": "2:14:19"}
|
||||
{"current_steps": 2540, "total_steps": 4025, "loss": 0.2208, "lr": 1.4436505208094662e-05, "epoch": 4.417391304347826, "percentage": 63.11, "elapsed_time": "3:49:01", "remaining_time": "2:13:54"}
|
||||
{"current_steps": 2545, "total_steps": 4025, "loss": 0.2068, "lr": 1.4353244825359656e-05, "epoch": 4.426086956521739, "percentage": 63.23, "elapsed_time": "3:49:34", "remaining_time": "2:13:30"}
|
||||
{"current_steps": 2550, "total_steps": 4025, "loss": 0.2133, "lr": 1.4270090646689672e-05, "epoch": 4.434782608695652, "percentage": 63.35, "elapsed_time": "3:50:03", "remaining_time": "2:13:04"}
|
||||
{"current_steps": 2555, "total_steps": 4025, "loss": 0.1999, "lr": 1.4187044236046936e-05, "epoch": 4.443478260869565, "percentage": 63.48, "elapsed_time": "3:50:32", "remaining_time": "2:12:38"}
|
||||
{"current_steps": 2560, "total_steps": 4025, "loss": 0.2241, "lr": 1.4104107155366768e-05, "epoch": 4.452173913043478, "percentage": 63.6, "elapsed_time": "3:51:02", "remaining_time": "2:12:12"}
|
||||
{"current_steps": 2565, "total_steps": 4025, "loss": 0.2111, "lr": 1.402128096452824e-05, "epoch": 4.460869565217392, "percentage": 63.73, "elapsed_time": "3:51:35", "remaining_time": "2:11:49"}
|
||||
{"current_steps": 2570, "total_steps": 4025, "loss": 0.2051, "lr": 1.3938567221324775e-05, "epoch": 4.469565217391304, "percentage": 63.85, "elapsed_time": "3:52:04", "remaining_time": "2:11:23"}
|
||||
{"current_steps": 2575, "total_steps": 4025, "loss": 0.2061, "lr": 1.3855967481434905e-05, "epoch": 4.478260869565218, "percentage": 63.98, "elapsed_time": "3:52:35", "remaining_time": "2:10:58"}
|
||||
{"current_steps": 2580, "total_steps": 4025, "loss": 0.2164, "lr": 1.377348329839298e-05, "epoch": 4.48695652173913, "percentage": 64.1, "elapsed_time": "3:53:04", "remaining_time": "2:10:32"}
|
||||
{"current_steps": 2585, "total_steps": 4025, "loss": 0.2227, "lr": 1.3691116223559961e-05, "epoch": 4.495652173913044, "percentage": 64.22, "elapsed_time": "3:53:36", "remaining_time": "2:10:08"}
|
||||
{"current_steps": 2590, "total_steps": 4025, "loss": 0.207, "lr": 1.3608867806094236e-05, "epoch": 4.504347826086956, "percentage": 64.35, "elapsed_time": "3:54:10", "remaining_time": "2:09:44"}
|
||||
{"current_steps": 2595, "total_steps": 4025, "loss": 0.1957, "lr": 1.3526739592922506e-05, "epoch": 4.51304347826087, "percentage": 64.47, "elapsed_time": "3:54:41", "remaining_time": "2:09:19"}
|
||||
{"current_steps": 2600, "total_steps": 4025, "loss": 0.2178, "lr": 1.3444733128710645e-05, "epoch": 4.521739130434782, "percentage": 64.6, "elapsed_time": "3:55:17", "remaining_time": "2:08:57"}
|
||||
{"current_steps": 2605, "total_steps": 4025, "loss": 0.2042, "lr": 1.3362849955834696e-05, "epoch": 4.530434782608696, "percentage": 64.72, "elapsed_time": "3:55:51", "remaining_time": "2:08:34"}
|
||||
{"current_steps": 2610, "total_steps": 4025, "loss": 0.199, "lr": 1.3281091614351829e-05, "epoch": 4.539130434782608, "percentage": 64.84, "elapsed_time": "3:56:18", "remaining_time": "2:08:07"}
|
||||
{"current_steps": 2615, "total_steps": 4025, "loss": 0.181, "lr": 1.3199459641971394e-05, "epoch": 4.547826086956522, "percentage": 64.97, "elapsed_time": "3:56:43", "remaining_time": "2:07:38"}
|
||||
{"current_steps": 2620, "total_steps": 4025, "loss": 0.1866, "lr": 1.3117955574025994e-05, "epoch": 4.556521739130435, "percentage": 65.09, "elapsed_time": "3:57:08", "remaining_time": "2:07:10"}
|
||||
{"current_steps": 2625, "total_steps": 4025, "loss": 0.1793, "lr": 1.3036580943442597e-05, "epoch": 4.565217391304348, "percentage": 65.22, "elapsed_time": "3:57:31", "remaining_time": "2:06:40"}
|
||||
{"current_steps": 2630, "total_steps": 4025, "loss": 0.1742, "lr": 1.2955337280713724e-05, "epoch": 4.573913043478261, "percentage": 65.34, "elapsed_time": "3:57:54", "remaining_time": "2:06:11"}
|
||||
{"current_steps": 2635, "total_steps": 4025, "loss": 0.1778, "lr": 1.2874226113868659e-05, "epoch": 4.582608695652174, "percentage": 65.47, "elapsed_time": "3:58:18", "remaining_time": "2:05:42"}
|
||||
{"current_steps": 2640, "total_steps": 4025, "loss": 0.1891, "lr": 1.2793248968444691e-05, "epoch": 4.591304347826087, "percentage": 65.59, "elapsed_time": "3:58:42", "remaining_time": "2:05:13"}
|
||||
{"current_steps": 2645, "total_steps": 4025, "loss": 0.188, "lr": 1.2712407367458464e-05, "epoch": 4.6, "percentage": 65.71, "elapsed_time": "3:59:08", "remaining_time": "2:04:46"}
|
||||
{"current_steps": 2650, "total_steps": 4025, "loss": 0.1903, "lr": 1.2631702831377275e-05, "epoch": 4.608695652173913, "percentage": 65.84, "elapsed_time": "3:59:35", "remaining_time": "2:04:19"}
|
||||
{"current_steps": 2655, "total_steps": 4025, "loss": 0.1861, "lr": 1.2551136878090523e-05, "epoch": 4.6173913043478265, "percentage": 65.96, "elapsed_time": "4:00:02", "remaining_time": "2:03:51"}
|
||||
{"current_steps": 2660, "total_steps": 4025, "loss": 0.1914, "lr": 1.2470711022881139e-05, "epoch": 4.626086956521739, "percentage": 66.09, "elapsed_time": "4:00:28", "remaining_time": "2:03:24"}
|
||||
{"current_steps": 2665, "total_steps": 4025, "loss": 0.1803, "lr": 1.2390426778397094e-05, "epoch": 4.6347826086956525, "percentage": 66.21, "elapsed_time": "4:00:54", "remaining_time": "2:02:56"}
|
||||
{"current_steps": 2670, "total_steps": 4025, "loss": 0.1873, "lr": 1.2310285654622949e-05, "epoch": 4.643478260869565, "percentage": 66.34, "elapsed_time": "4:01:23", "remaining_time": "2:02:30"}
|
||||
{"current_steps": 2675, "total_steps": 4025, "loss": 0.1741, "lr": 1.223028915885145e-05, "epoch": 4.6521739130434785, "percentage": 66.46, "elapsed_time": "4:01:46", "remaining_time": "2:02:00"}
|
||||
{"current_steps": 2680, "total_steps": 4025, "loss": 0.1698, "lr": 1.215043879565517e-05, "epoch": 4.660869565217391, "percentage": 66.58, "elapsed_time": "4:02:12", "remaining_time": "2:01:33"}
|
||||
{"current_steps": 2685, "total_steps": 4025, "loss": 0.2298, "lr": 1.2070736066858263e-05, "epoch": 4.6695652173913045, "percentage": 66.71, "elapsed_time": "4:02:46", "remaining_time": "2:01:09"}
|
||||
{"current_steps": 2690, "total_steps": 4025, "loss": 0.2648, "lr": 1.1991182471508138e-05, "epoch": 4.678260869565217, "percentage": 66.83, "elapsed_time": "4:03:26", "remaining_time": "2:00:49"}
|
||||
{"current_steps": 2695, "total_steps": 4025, "loss": 0.2687, "lr": 1.1911779505847321e-05, "epoch": 4.6869565217391305, "percentage": 66.96, "elapsed_time": "4:04:11", "remaining_time": "2:00:30"}
|
||||
{"current_steps": 2700, "total_steps": 4025, "loss": 0.2583, "lr": 1.183252866328529e-05, "epoch": 4.695652173913043, "percentage": 67.08, "elapsed_time": "4:04:55", "remaining_time": "2:00:11"}
|
||||
{"current_steps": 2705, "total_steps": 4025, "loss": 0.2484, "lr": 1.1753431434370401e-05, "epoch": 4.7043478260869565, "percentage": 67.2, "elapsed_time": "4:05:39", "remaining_time": "1:59:52"}
|
||||
{"current_steps": 2710, "total_steps": 4025, "loss": 0.2564, "lr": 1.1674489306761858e-05, "epoch": 4.71304347826087, "percentage": 67.33, "elapsed_time": "4:06:22", "remaining_time": "1:59:33"}
|
||||
{"current_steps": 2715, "total_steps": 4025, "loss": 0.2631, "lr": 1.1595703765201699e-05, "epoch": 4.721739130434782, "percentage": 67.45, "elapsed_time": "4:07:03", "remaining_time": "1:59:12"}
|
||||
{"current_steps": 2720, "total_steps": 4025, "loss": 0.264, "lr": 1.1517076291486896e-05, "epoch": 4.730434782608696, "percentage": 67.58, "elapsed_time": "4:07:48", "remaining_time": "1:58:53"}
|
||||
{"current_steps": 2725, "total_steps": 4025, "loss": 0.298, "lr": 1.1438608364441524e-05, "epoch": 4.739130434782608, "percentage": 67.7, "elapsed_time": "4:08:23", "remaining_time": "1:58:30"}
|
||||
{"current_steps": 2730, "total_steps": 4025, "loss": 0.3472, "lr": 1.1360301459888854e-05, "epoch": 4.747826086956522, "percentage": 67.83, "elapsed_time": "4:08:54", "remaining_time": "1:58:04"}
|
||||
{"current_steps": 2735, "total_steps": 4025, "loss": 0.3288, "lr": 1.1282157050623686e-05, "epoch": 4.756521739130434, "percentage": 67.95, "elapsed_time": "4:09:21", "remaining_time": "1:57:36"}
|
||||
{"current_steps": 2740, "total_steps": 4025, "loss": 0.3169, "lr": 1.120417660638459e-05, "epoch": 4.765217391304348, "percentage": 68.07, "elapsed_time": "4:09:46", "remaining_time": "1:57:08"}
|
||||
{"current_steps": 2745, "total_steps": 4025, "loss": 0.3244, "lr": 1.1126361593826296e-05, "epoch": 4.773913043478261, "percentage": 68.2, "elapsed_time": "4:10:12", "remaining_time": "1:56:40"}
|
||||
{"current_steps": 2750, "total_steps": 4025, "loss": 0.3222, "lr": 1.1048713476492096e-05, "epoch": 4.782608695652174, "percentage": 68.32, "elapsed_time": "4:10:40", "remaining_time": "1:56:13"}
|
||||
{"current_steps": 2755, "total_steps": 4025, "loss": 0.3414, "lr": 1.097123371478631e-05, "epoch": 4.791304347826087, "percentage": 68.45, "elapsed_time": "4:11:07", "remaining_time": "1:55:45"}
|
||||
{"current_steps": 2760, "total_steps": 4025, "loss": 0.3153, "lr": 1.0893923765946855e-05, "epoch": 4.8, "percentage": 68.57, "elapsed_time": "4:11:33", "remaining_time": "1:55:18"}
|
||||
{"current_steps": 2765, "total_steps": 4025, "loss": 0.3153, "lr": 1.0816785084017783e-05, "epoch": 4.808695652173913, "percentage": 68.7, "elapsed_time": "4:12:00", "remaining_time": "1:54:50"}
|
||||
{"current_steps": 2770, "total_steps": 4025, "loss": 0.3272, "lr": 1.073981911982197e-05, "epoch": 4.817391304347826, "percentage": 68.82, "elapsed_time": "4:12:27", "remaining_time": "1:54:22"}
|
||||
{"current_steps": 2775, "total_steps": 4025, "loss": 0.3255, "lr": 1.066302732093382e-05, "epoch": 4.826086956521739, "percentage": 68.94, "elapsed_time": "4:12:55", "remaining_time": "1:53:55"}
|
||||
{"current_steps": 2780, "total_steps": 4025, "loss": 0.3201, "lr": 1.058641113165205e-05, "epoch": 4.834782608695652, "percentage": 69.07, "elapsed_time": "4:13:22", "remaining_time": "1:53:28"}
|
||||
{"current_steps": 2785, "total_steps": 4025, "loss": 0.3114, "lr": 1.0509971992972506e-05, "epoch": 4.843478260869565, "percentage": 69.19, "elapsed_time": "4:13:52", "remaining_time": "1:53:02"}
|
||||
{"current_steps": 2790, "total_steps": 4025, "loss": 0.3257, "lr": 1.0433711342561059e-05, "epoch": 4.852173913043478, "percentage": 69.32, "elapsed_time": "4:14:23", "remaining_time": "1:52:36"}
|
||||
{"current_steps": 2795, "total_steps": 4025, "loss": 0.2896, "lr": 1.0357630614726595e-05, "epoch": 4.860869565217391, "percentage": 69.44, "elapsed_time": "4:14:54", "remaining_time": "1:52:10"}
|
||||
{"current_steps": 2800, "total_steps": 4025, "loss": 0.2799, "lr": 1.0281731240394e-05, "epoch": 4.869565217391305, "percentage": 69.57, "elapsed_time": "4:15:21", "remaining_time": "1:51:42"}
|
||||
{"current_steps": 2805, "total_steps": 4025, "loss": 0.2037, "lr": 1.020601464707729e-05, "epoch": 4.878260869565217, "percentage": 69.69, "elapsed_time": "4:15:37", "remaining_time": "1:51:10"}
|
||||
{"current_steps": 2810, "total_steps": 4025, "loss": 0.1894, "lr": 1.0130482258852723e-05, "epoch": 4.886956521739131, "percentage": 69.81, "elapsed_time": "4:15:55", "remaining_time": "1:50:39"}
|
||||
{"current_steps": 2815, "total_steps": 4025, "loss": 0.2146, "lr": 1.0055135496332034e-05, "epoch": 4.895652173913043, "percentage": 69.94, "elapsed_time": "4:16:13", "remaining_time": "1:50:08"}
|
||||
{"current_steps": 2820, "total_steps": 4025, "loss": 0.2027, "lr": 9.979975776635713e-06, "epoch": 4.904347826086957, "percentage": 70.06, "elapsed_time": "4:16:29", "remaining_time": "1:49:36"}
|
||||
{"current_steps": 2825, "total_steps": 4025, "loss": 0.197, "lr": 9.905004513366354e-06, "epoch": 4.913043478260869, "percentage": 70.19, "elapsed_time": "4:16:52", "remaining_time": "1:49:06"}
|
||||
{"current_steps": 2830, "total_steps": 4025, "loss": 0.2019, "lr": 9.830223116582065e-06, "epoch": 4.921739130434783, "percentage": 70.31, "elapsed_time": "4:17:16", "remaining_time": "1:48:38"}
|
||||
{"current_steps": 2835, "total_steps": 4025, "loss": 0.2035, "lr": 9.755632992769948e-06, "epoch": 4.930434782608696, "percentage": 70.43, "elapsed_time": "4:17:34", "remaining_time": "1:48:07"}
|
||||
{"current_steps": 2840, "total_steps": 4025, "loss": 0.2073, "lr": 9.68123554481964e-06, "epoch": 4.939130434782609, "percentage": 70.56, "elapsed_time": "4:17:52", "remaining_time": "1:47:36"}
|
||||
{"current_steps": 2845, "total_steps": 4025, "loss": 0.1823, "lr": 9.607032171996959e-06, "epoch": 4.947826086956522, "percentage": 70.68, "elapsed_time": "4:18:12", "remaining_time": "1:47:05"}
|
||||
{"current_steps": 2850, "total_steps": 4025, "loss": 0.1799, "lr": 9.533024269917534e-06, "epoch": 4.956521739130435, "percentage": 70.81, "elapsed_time": "4:18:32", "remaining_time": "1:46:35"}
|
||||
{"current_steps": 2855, "total_steps": 4025, "loss": 0.2336, "lr": 9.459213230520596e-06, "epoch": 4.965217391304348, "percentage": 70.93, "elapsed_time": "4:18:53", "remaining_time": "1:46:05"}
|
||||
{"current_steps": 2860, "total_steps": 4025, "loss": 0.1808, "lr": 9.385600442042784e-06, "epoch": 4.973913043478261, "percentage": 71.06, "elapsed_time": "4:19:11", "remaining_time": "1:45:34"}
|
||||
{"current_steps": 2865, "total_steps": 4025, "loss": 0.2157, "lr": 9.312187288992036e-06, "epoch": 4.982608695652174, "percentage": 71.18, "elapsed_time": "4:19:30", "remaining_time": "1:45:04"}
|
||||
{"current_steps": 2870, "total_steps": 4025, "loss": 0.1888, "lr": 9.238975152121555e-06, "epoch": 4.9913043478260875, "percentage": 71.3, "elapsed_time": "4:19:50", "remaining_time": "1:44:34"}
|
||||
{"current_steps": 2875, "total_steps": 4025, "loss": 0.2193, "lr": 9.16596540840383e-06, "epoch": 5.0, "percentage": 71.43, "elapsed_time": "4:20:13", "remaining_time": "1:44:05"}
|
||||
{"current_steps": 2880, "total_steps": 4025, "loss": 0.2125, "lr": 9.09315943100474e-06, "epoch": 5.008695652173913, "percentage": 71.55, "elapsed_time": "4:20:36", "remaining_time": "1:43:36"}
|
||||
{"current_steps": 2885, "total_steps": 4025, "loss": 0.2011, "lr": 9.020558589257755e-06, "epoch": 5.017391304347826, "percentage": 71.68, "elapsed_time": "4:20:57", "remaining_time": "1:43:06"}
|
||||
{"current_steps": 2890, "total_steps": 4025, "loss": 0.2043, "lr": 8.948164248638131e-06, "epoch": 5.026086956521739, "percentage": 71.8, "elapsed_time": "4:21:20", "remaining_time": "1:42:38"}
|
||||
{"current_steps": 2895, "total_steps": 4025, "loss": 0.204, "lr": 8.875977770737262e-06, "epoch": 5.034782608695652, "percentage": 71.93, "elapsed_time": "4:21:42", "remaining_time": "1:42:09"}
|
||||
{"current_steps": 2900, "total_steps": 4025, "loss": 0.1982, "lr": 8.804000513237067e-06, "epoch": 5.043478260869565, "percentage": 72.05, "elapsed_time": "4:22:03", "remaining_time": "1:41:39"}
|
||||
{"current_steps": 2905, "total_steps": 4025, "loss": 0.2043, "lr": 8.73223382988445e-06, "epoch": 5.052173913043478, "percentage": 72.17, "elapsed_time": "4:22:24", "remaining_time": "1:41:10"}
|
||||
{"current_steps": 2910, "total_steps": 4025, "loss": 0.1916, "lr": 8.660679070465839e-06, "epoch": 5.060869565217391, "percentage": 72.3, "elapsed_time": "4:22:48", "remaining_time": "1:40:41"}
|
||||
{"current_steps": 2915, "total_steps": 4025, "loss": 0.1921, "lr": 8.589337580781802e-06, "epoch": 5.069565217391304, "percentage": 72.42, "elapsed_time": "4:23:12", "remaining_time": "1:40:13"}
|
||||
{"current_steps": 2920, "total_steps": 4025, "loss": 0.1977, "lr": 8.518210702621727e-06, "epoch": 5.078260869565217, "percentage": 72.55, "elapsed_time": "4:23:33", "remaining_time": "1:39:44"}
|
||||
{"current_steps": 2925, "total_steps": 4025, "loss": 0.2038, "lr": 8.447299773738617e-06, "epoch": 5.086956521739131, "percentage": 72.67, "elapsed_time": "4:23:55", "remaining_time": "1:39:15"}
|
||||
{"current_steps": 2930, "total_steps": 4025, "loss": 0.187, "lr": 8.376606127823877e-06, "epoch": 5.095652173913043, "percentage": 72.8, "elapsed_time": "4:24:18", "remaining_time": "1:38:46"}
|
||||
{"current_steps": 2935, "total_steps": 4025, "loss": 0.1925, "lr": 8.306131094482271e-06, "epoch": 5.104347826086957, "percentage": 72.92, "elapsed_time": "4:24:42", "remaining_time": "1:38:18"}
|
||||
{"current_steps": 2940, "total_steps": 4025, "loss": 0.1917, "lr": 8.235875999206895e-06, "epoch": 5.113043478260869, "percentage": 73.04, "elapsed_time": "4:25:03", "remaining_time": "1:37:48"}
|
||||
{"current_steps": 2945, "total_steps": 4025, "loss": 0.2078, "lr": 8.165842163354263e-06, "epoch": 5.121739130434783, "percentage": 73.17, "elapsed_time": "4:25:26", "remaining_time": "1:37:20"}
|
||||
{"current_steps": 2950, "total_steps": 4025, "loss": 0.1878, "lr": 8.096030904119438e-06, "epoch": 5.130434782608695, "percentage": 73.29, "elapsed_time": "4:25:46", "remaining_time": "1:36:51"}
|
||||
{"current_steps": 2955, "total_steps": 4025, "loss": 0.2796, "lr": 8.026443534511272e-06, "epoch": 5.139130434782609, "percentage": 73.42, "elapsed_time": "4:26:07", "remaining_time": "1:36:21"}
|
||||
{"current_steps": 2960, "total_steps": 4025, "loss": 0.3275, "lr": 7.957081363327695e-06, "epoch": 5.147826086956521, "percentage": 73.54, "elapsed_time": "4:26:24", "remaining_time": "1:35:51"}
|
||||
{"current_steps": 2965, "total_steps": 4025, "loss": 0.3322, "lr": 7.88794569513113e-06, "epoch": 5.156521739130435, "percentage": 73.66, "elapsed_time": "4:26:49", "remaining_time": "1:35:23"}
|
||||
{"current_steps": 2970, "total_steps": 4025, "loss": 0.3263, "lr": 7.819037830223914e-06, "epoch": 5.165217391304348, "percentage": 73.79, "elapsed_time": "4:27:11", "remaining_time": "1:34:54"}
|
||||
{"current_steps": 2975, "total_steps": 4025, "loss": 0.3453, "lr": 7.750359064623878e-06, "epoch": 5.173913043478261, "percentage": 73.91, "elapsed_time": "4:27:35", "remaining_time": "1:34:26"}
|
||||
{"current_steps": 2980, "total_steps": 4025, "loss": 0.3155, "lr": 7.681910690039947e-06, "epoch": 5.182608695652174, "percentage": 74.04, "elapsed_time": "4:27:58", "remaining_time": "1:33:58"}
|
||||
{"current_steps": 2985, "total_steps": 4025, "loss": 0.3171, "lr": 7.613693993847859e-06, "epoch": 5.191304347826087, "percentage": 74.16, "elapsed_time": "4:28:21", "remaining_time": "1:33:29"}
|
||||
{"current_steps": 2990, "total_steps": 4025, "loss": 0.2975, "lr": 7.545710259065964e-06, "epoch": 5.2, "percentage": 74.29, "elapsed_time": "4:28:41", "remaining_time": "1:33:00"}
|
||||
{"current_steps": 2995, "total_steps": 4025, "loss": 0.3106, "lr": 7.47796076433106e-06, "epoch": 5.208695652173913, "percentage": 74.41, "elapsed_time": "4:29:00", "remaining_time": "1:32:30"}
|
||||
{"current_steps": 3000, "total_steps": 4025, "loss": 0.324, "lr": 7.410446783874365e-06, "epoch": 5.217391304347826, "percentage": 74.53, "elapsed_time": "4:29:25", "remaining_time": "1:32:03"}
|
||||
{"current_steps": 3005, "total_steps": 4025, "loss": 0.3125, "lr": 7.34316958749755e-06, "epoch": 5.226086956521739, "percentage": 74.66, "elapsed_time": "4:30:14", "remaining_time": "1:31:43"}
|
||||
{"current_steps": 3010, "total_steps": 4025, "loss": 0.3035, "lr": 7.27613044054887e-06, "epoch": 5.234782608695652, "percentage": 74.78, "elapsed_time": "4:30:40", "remaining_time": "1:31:16"}
|
||||
{"current_steps": 3015, "total_steps": 4025, "loss": 0.3108, "lr": 7.209330603899331e-06, "epoch": 5.243478260869566, "percentage": 74.91, "elapsed_time": "4:31:03", "remaining_time": "1:30:48"}
|
||||
{"current_steps": 3020, "total_steps": 4025, "loss": 0.2876, "lr": 7.142771333919005e-06, "epoch": 5.252173913043478, "percentage": 75.03, "elapsed_time": "4:31:22", "remaining_time": "1:30:18"}
|
||||
{"current_steps": 3025, "total_steps": 4025, "loss": 0.3115, "lr": 7.076453882453387e-06, "epoch": 5.260869565217392, "percentage": 75.16, "elapsed_time": "4:31:43", "remaining_time": "1:29:49"}
|
||||
{"current_steps": 3030, "total_steps": 4025, "loss": 0.3051, "lr": 7.010379496799855e-06, "epoch": 5.269565217391304, "percentage": 75.28, "elapsed_time": "4:32:06", "remaining_time": "1:29:21"}
|
||||
{"current_steps": 3035, "total_steps": 4025, "loss": 0.3046, "lr": 6.9445494196842146e-06, "epoch": 5.278260869565218, "percentage": 75.4, "elapsed_time": "4:32:45", "remaining_time": "1:28:58"}
|
||||
{"current_steps": 3040, "total_steps": 4025, "loss": 0.2961, "lr": 6.878964889237305e-06, "epoch": 5.28695652173913, "percentage": 75.53, "elapsed_time": "4:33:20", "remaining_time": "1:28:34"}
|
||||
{"current_steps": 3045, "total_steps": 4025, "loss": 0.2984, "lr": 6.813627138971757e-06, "epoch": 5.2956521739130435, "percentage": 75.65, "elapsed_time": "4:33:54", "remaining_time": "1:28:09"}
|
||||
{"current_steps": 3050, "total_steps": 4025, "loss": 0.2934, "lr": 6.748537397758741e-06, "epoch": 5.304347826086957, "percentage": 75.78, "elapsed_time": "4:34:27", "remaining_time": "1:27:44"}
|
||||
{"current_steps": 3055, "total_steps": 4025, "loss": 0.3307, "lr": 6.683696889804883e-06, "epoch": 5.3130434782608695, "percentage": 75.9, "elapsed_time": "4:34:59", "remaining_time": "1:27:18"}
|
||||
{"current_steps": 3060, "total_steps": 4025, "loss": 0.3114, "lr": 6.619106834629234e-06, "epoch": 5.321739130434783, "percentage": 76.02, "elapsed_time": "4:35:38", "remaining_time": "1:26:55"}
|
||||
{"current_steps": 3065, "total_steps": 4025, "loss": 0.2888, "lr": 6.554768447040341e-06, "epoch": 5.3304347826086955, "percentage": 76.15, "elapsed_time": "4:36:12", "remaining_time": "1:26:30"}
|
||||
{"current_steps": 3070, "total_steps": 4025, "loss": 0.3271, "lr": 6.490682937113382e-06, "epoch": 5.339130434782609, "percentage": 76.27, "elapsed_time": "4:36:37", "remaining_time": "1:26:03"}
|
||||
{"current_steps": 3075, "total_steps": 4025, "loss": 0.2982, "lr": 6.426851510167422e-06, "epoch": 5.3478260869565215, "percentage": 76.4, "elapsed_time": "4:37:15", "remaining_time": "1:25:39"}
|
||||
{"current_steps": 3080, "total_steps": 4025, "loss": 0.2872, "lr": 6.363275366742734e-06, "epoch": 5.356521739130435, "percentage": 76.52, "elapsed_time": "4:37:44", "remaining_time": "1:25:13"}
|
||||
{"current_steps": 3085, "total_steps": 4025, "loss": 0.2962, "lr": 6.2999557025782375e-06, "epoch": 5.3652173913043475, "percentage": 76.65, "elapsed_time": "4:38:14", "remaining_time": "1:24:46"}
|
||||
{"current_steps": 3090, "total_steps": 4025, "loss": 0.2973, "lr": 6.236893708588976e-06, "epoch": 5.373913043478261, "percentage": 76.77, "elapsed_time": "4:38:43", "remaining_time": "1:24:20"}
|
||||
{"current_steps": 3095, "total_steps": 4025, "loss": 0.2983, "lr": 6.174090570843747e-06, "epoch": 5.3826086956521735, "percentage": 76.89, "elapsed_time": "4:39:18", "remaining_time": "1:23:55"}
|
||||
{"current_steps": 3100, "total_steps": 4025, "loss": 0.3129, "lr": 6.111547470542785e-06, "epoch": 5.391304347826087, "percentage": 77.02, "elapsed_time": "4:39:50", "remaining_time": "1:23:30"}
|
||||
{"current_steps": 3105, "total_steps": 4025, "loss": 0.2955, "lr": 6.049265583995538e-06, "epoch": 5.4, "percentage": 77.14, "elapsed_time": "4:40:24", "remaining_time": "1:23:04"}
|
||||
{"current_steps": 3110, "total_steps": 4025, "loss": 0.2432, "lr": 5.987246082598561e-06, "epoch": 5.408695652173913, "percentage": 77.27, "elapsed_time": "4:40:53", "remaining_time": "1:22:38"}
|
||||
{"current_steps": 3115, "total_steps": 4025, "loss": 0.21, "lr": 5.925490132813465e-06, "epoch": 5.417391304347826, "percentage": 77.39, "elapsed_time": "4:41:24", "remaining_time": "1:22:12"}
|
||||
{"current_steps": 3120, "total_steps": 4025, "loss": 0.1979, "lr": 5.863998896144984e-06, "epoch": 5.426086956521739, "percentage": 77.52, "elapsed_time": "4:41:57", "remaining_time": "1:21:47"}
|
||||
{"current_steps": 3125, "total_steps": 4025, "loss": 0.2019, "lr": 5.802773529119157e-06, "epoch": 5.434782608695652, "percentage": 77.64, "elapsed_time": "4:42:25", "remaining_time": "1:21:20"}
|
||||
{"current_steps": 3130, "total_steps": 4025, "loss": 0.1893, "lr": 5.741815183261525e-06, "epoch": 5.443478260869565, "percentage": 77.76, "elapsed_time": "4:42:54", "remaining_time": "1:20:53"}
|
||||
{"current_steps": 3135, "total_steps": 4025, "loss": 0.2128, "lr": 5.681125005075509e-06, "epoch": 5.452173913043478, "percentage": 77.89, "elapsed_time": "4:43:24", "remaining_time": "1:20:27"}
|
||||
{"current_steps": 3140, "total_steps": 4025, "loss": 0.2004, "lr": 5.620704136020838e-06, "epoch": 5.460869565217392, "percentage": 78.01, "elapsed_time": "4:43:57", "remaining_time": "1:20:02"}
|
||||
{"current_steps": 3145, "total_steps": 4025, "loss": 0.1934, "lr": 5.560553712492081e-06, "epoch": 5.469565217391304, "percentage": 78.14, "elapsed_time": "4:44:26", "remaining_time": "1:19:35"}
|
||||
{"current_steps": 3150, "total_steps": 4025, "loss": 0.1965, "lr": 5.500674865797271e-06, "epoch": 5.478260869565218, "percentage": 78.26, "elapsed_time": "4:44:57", "remaining_time": "1:19:09"}
|
||||
{"current_steps": 3155, "total_steps": 4025, "loss": 0.2053, "lr": 5.4410687221366265e-06, "epoch": 5.48695652173913, "percentage": 78.39, "elapsed_time": "4:45:26", "remaining_time": "1:18:42"}
|
||||
{"current_steps": 3160, "total_steps": 4025, "loss": 0.2115, "lr": 5.381736402581375e-06, "epoch": 5.495652173913044, "percentage": 78.51, "elapsed_time": "4:45:59", "remaining_time": "1:18:17"}
|
||||
{"current_steps": 3165, "total_steps": 4025, "loss": 0.196, "lr": 5.3226790230526724e-06, "epoch": 5.504347826086956, "percentage": 78.63, "elapsed_time": "4:46:32", "remaining_time": "1:17:51"}
|
||||
{"current_steps": 3170, "total_steps": 4025, "loss": 0.1851, "lr": 5.263897694300597e-06, "epoch": 5.51304347826087, "percentage": 78.76, "elapsed_time": "4:47:03", "remaining_time": "1:17:25"}
|
||||
{"current_steps": 3175, "total_steps": 4025, "loss": 0.2073, "lr": 5.205393521883275e-06, "epoch": 5.521739130434782, "percentage": 78.88, "elapsed_time": "4:47:40", "remaining_time": "1:17:00"}
|
||||
{"current_steps": 3180, "total_steps": 4025, "loss": 0.194, "lr": 5.147167606146078e-06, "epoch": 5.530434782608696, "percentage": 79.01, "elapsed_time": "4:48:13", "remaining_time": "1:16:35"}
|
||||
{"current_steps": 3185, "total_steps": 4025, "loss": 0.1887, "lr": 5.089221042200933e-06, "epoch": 5.539130434782608, "percentage": 79.13, "elapsed_time": "4:48:41", "remaining_time": "1:16:08"}
|
||||
{"current_steps": 3190, "total_steps": 4025, "loss": 0.1723, "lr": 5.0315549199057254e-06, "epoch": 5.547826086956522, "percentage": 79.25, "elapsed_time": "4:49:06", "remaining_time": "1:15:40"}
|
||||
{"current_steps": 3195, "total_steps": 4025, "loss": 0.1772, "lr": 4.97417032384379e-06, "epoch": 5.556521739130435, "percentage": 79.38, "elapsed_time": "4:49:31", "remaining_time": "1:15:12"}
|
||||
{"current_steps": 3200, "total_steps": 4025, "loss": 0.1699, "lr": 4.91706833330354e-06, "epoch": 5.565217391304348, "percentage": 79.5, "elapsed_time": "4:49:54", "remaining_time": "1:14:44"}
|
||||
{"current_steps": 3205, "total_steps": 4025, "loss": 0.1651, "lr": 4.8602500222581376e-06, "epoch": 5.573913043478261, "percentage": 79.63, "elapsed_time": "4:50:17", "remaining_time": "1:14:16"}
|
||||
{"current_steps": 3210, "total_steps": 4025, "loss": 0.1689, "lr": 4.803716459345302e-06, "epoch": 5.582608695652174, "percentage": 79.75, "elapsed_time": "4:50:41", "remaining_time": "1:13:48"}
|
||||
{"current_steps": 3215, "total_steps": 4025, "loss": 0.1791, "lr": 4.747468707847218e-06, "epoch": 5.591304347826087, "percentage": 79.88, "elapsed_time": "4:51:04", "remaining_time": "1:13:20"}
|
||||
{"current_steps": 3220, "total_steps": 4025, "loss": 0.178, "lr": 4.6915078256705475e-06, "epoch": 5.6, "percentage": 80.0, "elapsed_time": "4:51:31", "remaining_time": "1:12:52"}
|
||||
{"current_steps": 3225, "total_steps": 4025, "loss": 0.1807, "lr": 4.635834865326501e-06, "epoch": 5.608695652173913, "percentage": 80.12, "elapsed_time": "4:51:58", "remaining_time": "1:12:25"}
|
||||
{"current_steps": 3230, "total_steps": 4025, "loss": 0.1764, "lr": 4.580450873911071e-06, "epoch": 5.6173913043478265, "percentage": 80.25, "elapsed_time": "4:52:24", "remaining_time": "1:11:58"}
|
||||
{"current_steps": 3235, "total_steps": 4025, "loss": 0.1822, "lr": 4.525356893085324e-06, "epoch": 5.626086956521739, "percentage": 80.37, "elapsed_time": "4:52:51", "remaining_time": "1:11:31"}
|
||||
{"current_steps": 3240, "total_steps": 4025, "loss": 0.1705, "lr": 4.47055395905581e-06, "epoch": 5.6347826086956525, "percentage": 80.5, "elapsed_time": "4:53:16", "remaining_time": "1:11:03"}
|
||||
{"current_steps": 3245, "total_steps": 4025, "loss": 0.1776, "lr": 4.41604310255509e-06, "epoch": 5.643478260869565, "percentage": 80.62, "elapsed_time": "4:53:45", "remaining_time": "1:10:36"}
|
||||
{"current_steps": 3250, "total_steps": 4025, "loss": 0.1647, "lr": 4.361825348822324e-06, "epoch": 5.6521739130434785, "percentage": 80.75, "elapsed_time": "4:54:09", "remaining_time": "1:10:08"}
|
||||
{"current_steps": 3255, "total_steps": 4025, "loss": 0.1596, "lr": 4.307901717584002e-06, "epoch": 5.660869565217391, "percentage": 80.87, "elapsed_time": "4:54:35", "remaining_time": "1:09:41"}
|
||||
{"current_steps": 3260, "total_steps": 4025, "loss": 0.2164, "lr": 4.254273223034764e-06, "epoch": 5.6695652173913045, "percentage": 80.99, "elapsed_time": "4:55:09", "remaining_time": "1:09:15"}
|
||||
{"current_steps": 3265, "total_steps": 4025, "loss": 0.2523, "lr": 4.200940873818326e-06, "epoch": 5.678260869565217, "percentage": 81.12, "elapsed_time": "4:55:49", "remaining_time": "1:08:51"}
|
||||
{"current_steps": 3270, "total_steps": 4025, "loss": 0.256, "lr": 4.1479056730085096e-06, "epoch": 5.6869565217391305, "percentage": 81.24, "elapsed_time": "4:56:33", "remaining_time": "1:08:28"}
|
||||
{"current_steps": 3275, "total_steps": 4025, "loss": 0.2457, "lr": 4.095168618090375e-06, "epoch": 5.695652173913043, "percentage": 81.37, "elapsed_time": "4:57:17", "remaining_time": "1:08:05"}
|
||||
{"current_steps": 3280, "total_steps": 4025, "loss": 0.2359, "lr": 4.0427307009414486e-06, "epoch": 5.7043478260869565, "percentage": 81.49, "elapsed_time": "4:58:02", "remaining_time": "1:07:41"}
|
||||
{"current_steps": 3285, "total_steps": 4025, "loss": 0.2439, "lr": 3.9905929078131e-06, "epoch": 5.71304347826087, "percentage": 81.61, "elapsed_time": "4:58:45", "remaining_time": "1:07:17"}
|
||||
{"current_steps": 3290, "total_steps": 4025, "loss": 0.2494, "lr": 3.938756219311959e-06, "epoch": 5.721739130434782, "percentage": 81.74, "elapsed_time": "4:59:26", "remaining_time": "1:06:53"}
|
||||
{"current_steps": 3295, "total_steps": 4025, "loss": 0.2504, "lr": 3.887221610381487e-06, "epoch": 5.730434782608696, "percentage": 81.86, "elapsed_time": "5:00:11", "remaining_time": "1:06:30"}
|
||||
{"current_steps": 3300, "total_steps": 4025, "loss": 0.2823, "lr": 3.8359900502836335e-06, "epoch": 5.739130434782608, "percentage": 81.99, "elapsed_time": "5:00:46", "remaining_time": "1:06:04"}
|
||||
{"current_steps": 3305, "total_steps": 4025, "loss": 0.3315, "lr": 3.785062502580621e-06, "epoch": 5.747826086956522, "percentage": 82.11, "elapsed_time": "5:01:16", "remaining_time": "1:05:38"}
|
||||
{"current_steps": 3310, "total_steps": 4025, "loss": 0.3121, "lr": 3.7344399251168085e-06, "epoch": 5.756521739130434, "percentage": 82.24, "elapsed_time": "5:01:44", "remaining_time": "1:05:10"}
|
||||
{"current_steps": 3315, "total_steps": 4025, "loss": 0.3022, "lr": 3.6841232700006746e-06, "epoch": 5.765217391304348, "percentage": 82.36, "elapsed_time": "5:02:09", "remaining_time": "1:04:42"}
|
||||
{"current_steps": 3320, "total_steps": 4025, "loss": 0.3091, "lr": 3.634113483586923e-06, "epoch": 5.773913043478261, "percentage": 82.48, "elapsed_time": "5:02:35", "remaining_time": "1:04:15"}
|
||||
{"current_steps": 3325, "total_steps": 4025, "loss": 0.3057, "lr": 3.5844115064586806e-06, "epoch": 5.782608695652174, "percentage": 82.61, "elapsed_time": "5:03:03", "remaining_time": "1:03:48"}
|
||||
{"current_steps": 3330, "total_steps": 4025, "loss": 0.3259, "lr": 3.535018273409794e-06, "epoch": 5.791304347826087, "percentage": 82.73, "elapsed_time": "5:03:29", "remaining_time": "1:03:20"}
|
||||
{"current_steps": 3335, "total_steps": 4025, "loss": 0.2995, "lr": 3.4859347134272616e-06, "epoch": 5.8, "percentage": 82.86, "elapsed_time": "5:03:56", "remaining_time": "1:02:53"}
|
||||
{"current_steps": 3340, "total_steps": 4025, "loss": 0.2999, "lr": 3.437161749673754e-06, "epoch": 5.808695652173913, "percentage": 82.98, "elapsed_time": "5:04:22", "remaining_time": "1:02:25"}
|
||||
{"current_steps": 3345, "total_steps": 4025, "loss": 0.3115, "lr": 3.3887002994702555e-06, "epoch": 5.817391304347826, "percentage": 83.11, "elapsed_time": "5:04:50", "remaining_time": "1:01:58"}
|
||||
{"current_steps": 3350, "total_steps": 4025, "loss": 0.3109, "lr": 3.3405512742788093e-06, "epoch": 5.826086956521739, "percentage": 83.23, "elapsed_time": "5:05:18", "remaining_time": "1:01:31"}
|
||||
{"current_steps": 3355, "total_steps": 4025, "loss": 0.3038, "lr": 3.2927155796853747e-06, "epoch": 5.834782608695652, "percentage": 83.35, "elapsed_time": "5:05:45", "remaining_time": "1:01:03"}
|
||||
{"current_steps": 3360, "total_steps": 4025, "loss": 0.2958, "lr": 3.2451941153827926e-06, "epoch": 5.843478260869565, "percentage": 83.48, "elapsed_time": "5:06:15", "remaining_time": "1:00:36"}
|
||||
{"current_steps": 3365, "total_steps": 4025, "loss": 0.3099, "lr": 3.1979877751538745e-06, "epoch": 5.852173913043478, "percentage": 83.6, "elapsed_time": "5:06:46", "remaining_time": "1:00:10"}
|
||||
{"current_steps": 3370, "total_steps": 4025, "loss": 0.276, "lr": 3.1510974468545784e-06, "epoch": 5.860869565217391, "percentage": 83.73, "elapsed_time": "5:07:17", "remaining_time": "0:59:43"}
|
||||
{"current_steps": 3375, "total_steps": 4025, "loss": 0.2646, "lr": 3.1045240123973118e-06, "epoch": 5.869565217391305, "percentage": 83.85, "elapsed_time": "5:07:43", "remaining_time": "0:59:15"}
|
||||
{"current_steps": 3380, "total_steps": 4025, "loss": 0.1901, "lr": 3.0582683477343565e-06, "epoch": 5.878260869565217, "percentage": 83.98, "elapsed_time": "5:08:00", "remaining_time": "0:58:46"}
|
||||
{"current_steps": 3385, "total_steps": 4025, "loss": 0.1762, "lr": 3.0123313228413797e-06, "epoch": 5.886956521739131, "percentage": 84.1, "elapsed_time": "5:08:18", "remaining_time": "0:58:17"}
|
||||
{"current_steps": 3390, "total_steps": 4025, "loss": 0.2003, "lr": 2.9667138017010844e-06, "epoch": 5.895652173913043, "percentage": 84.22, "elapsed_time": "5:08:35", "remaining_time": "0:57:48"}
|
||||
{"current_steps": 3395, "total_steps": 4025, "loss": 0.1895, "lr": 2.9214166422869474e-06, "epoch": 5.904347826086957, "percentage": 84.35, "elapsed_time": "5:08:52", "remaining_time": "0:57:19"}
|
||||
{"current_steps": 3400, "total_steps": 4025, "loss": 0.1837, "lr": 2.876440696547085e-06, "epoch": 5.913043478260869, "percentage": 84.47, "elapsed_time": "5:09:14", "remaining_time": "0:56:50"}
|
||||
{"current_steps": 3405, "total_steps": 4025, "loss": 0.1887, "lr": 2.8317868103882485e-06, "epoch": 5.921739130434783, "percentage": 84.6, "elapsed_time": "5:09:39", "remaining_time": "0:56:23"}
|
||||
{"current_steps": 3410, "total_steps": 4025, "loss": 0.1896, "lr": 2.7874558236598835e-06, "epoch": 5.930434782608696, "percentage": 84.72, "elapsed_time": "5:09:57", "remaining_time": "0:55:54"}
|
||||
{"current_steps": 3415, "total_steps": 4025, "loss": 0.1921, "lr": 2.743448570138354e-06, "epoch": 5.939130434782609, "percentage": 84.84, "elapsed_time": "5:10:15", "remaining_time": "0:55:25"}
|
||||
{"current_steps": 3420, "total_steps": 4025, "loss": 0.1702, "lr": 2.6997658775112536e-06, "epoch": 5.947826086956522, "percentage": 84.97, "elapsed_time": "5:10:35", "remaining_time": "0:54:56"}
|
||||
{"current_steps": 3425, "total_steps": 4025, "loss": 0.1672, "lr": 2.656408567361841e-06, "epoch": 5.956521739130435, "percentage": 85.09, "elapsed_time": "5:10:55", "remaining_time": "0:54:28"}
|
||||
{"current_steps": 3430, "total_steps": 4025, "loss": 0.2209, "lr": 2.613377455153596e-06, "epoch": 5.965217391304348, "percentage": 85.22, "elapsed_time": "5:11:15", "remaining_time": "0:53:59"}
|
||||
{"current_steps": 3435, "total_steps": 4025, "loss": 0.1671, "lr": 2.570673350214865e-06, "epoch": 5.973913043478261, "percentage": 85.34, "elapsed_time": "5:11:33", "remaining_time": "0:53:30"}
|
||||
{"current_steps": 3440, "total_steps": 4025, "loss": 0.2048, "lr": 2.5282970557236474e-06, "epoch": 5.982608695652174, "percentage": 85.47, "elapsed_time": "5:11:53", "remaining_time": "0:53:02"}
|
||||
{"current_steps": 3445, "total_steps": 4025, "loss": 0.1749, "lr": 2.4862493686924905e-06, "epoch": 5.9913043478260875, "percentage": 85.59, "elapsed_time": "5:12:13", "remaining_time": "0:52:33"}
|
||||
{"current_steps": 3450, "total_steps": 4025, "loss": 0.2066, "lr": 2.444531079953505e-06, "epoch": 6.0, "percentage": 85.71, "elapsed_time": "5:12:36", "remaining_time": "0:52:06"}
|
||||
{"current_steps": 3455, "total_steps": 4025, "loss": 0.2042, "lr": 2.40314297414348e-06, "epoch": 6.008695652173913, "percentage": 85.84, "elapsed_time": "5:12:59", "remaining_time": "0:51:38"}
|
||||
{"current_steps": 3460, "total_steps": 4025, "loss": 0.1952, "lr": 2.362085829689129e-06, "epoch": 6.017391304347826, "percentage": 85.96, "elapsed_time": "5:13:19", "remaining_time": "0:51:09"}
|
||||
{"current_steps": 3465, "total_steps": 4025, "loss": 0.1976, "lr": 2.321360418792451e-06, "epoch": 6.026086956521739, "percentage": 86.09, "elapsed_time": "5:13:43", "remaining_time": "0:50:42"}
|
||||
{"current_steps": 3470, "total_steps": 4025, "loss": 0.1971, "lr": 2.2809675074162098e-06, "epoch": 6.034782608695652, "percentage": 86.21, "elapsed_time": "5:14:05", "remaining_time": "0:50:14"}
|
||||
{"current_steps": 3475, "total_steps": 4025, "loss": 0.1908, "lr": 2.2409078552695185e-06, "epoch": 6.043478260869565, "percentage": 86.34, "elapsed_time": "5:14:26", "remaining_time": "0:49:46"}
|
||||
{"current_steps": 3480, "total_steps": 4025, "loss": 0.1962, "lr": 2.2011822157935603e-06, "epoch": 6.052173913043478, "percentage": 86.46, "elapsed_time": "5:14:47", "remaining_time": "0:49:17"}
|
||||
{"current_steps": 3485, "total_steps": 4025, "loss": 0.184, "lr": 2.1617913361474186e-06, "epoch": 6.060869565217391, "percentage": 86.58, "elapsed_time": "5:15:11", "remaining_time": "0:48:50"}
|
||||
{"current_steps": 3490, "total_steps": 4025, "loss": 0.184, "lr": 2.1227359571940175e-06, "epoch": 6.069565217391304, "percentage": 86.71, "elapsed_time": "5:15:35", "remaining_time": "0:48:22"}
|
||||
{"current_steps": 3495, "total_steps": 4025, "loss": 0.1896, "lr": 2.0840168134861827e-06, "epoch": 6.078260869565217, "percentage": 86.83, "elapsed_time": "5:15:57", "remaining_time": "0:47:54"}
|
||||
{"current_steps": 3500, "total_steps": 4025, "loss": 0.1953, "lr": 2.0456346332528466e-06, "epoch": 6.086956521739131, "percentage": 86.96, "elapsed_time": "5:16:19", "remaining_time": "0:47:26"}
|
||||
{"current_steps": 3505, "total_steps": 4025, "loss": 0.1785, "lr": 2.0075901383853293e-06, "epoch": 6.095652173913043, "percentage": 87.08, "elapsed_time": "5:16:41", "remaining_time": "0:46:59"}
|
||||
{"current_steps": 3510, "total_steps": 4025, "loss": 0.1846, "lr": 1.9698840444237732e-06, "epoch": 6.104347826086957, "percentage": 87.2, "elapsed_time": "5:17:06", "remaining_time": "0:46:31"}
|
||||
{"current_steps": 3515, "total_steps": 4025, "loss": 0.183, "lr": 1.932517060543684e-06, "epoch": 6.113043478260869, "percentage": 87.33, "elapsed_time": "5:17:26", "remaining_time": "0:46:03"}
|
||||
{"current_steps": 3520, "total_steps": 4025, "loss": 0.1988, "lr": 1.8954898895425877e-06, "epoch": 6.121739130434783, "percentage": 87.45, "elapsed_time": "5:17:50", "remaining_time": "0:45:35"}
|
||||
{"current_steps": 3525, "total_steps": 4025, "loss": 0.1788, "lr": 1.8588032278268287e-06, "epoch": 6.130434782608695, "percentage": 87.58, "elapsed_time": "5:18:10", "remaining_time": "0:45:07"}
|
||||
{"current_steps": 3530, "total_steps": 4025, "loss": 0.2673, "lr": 1.8224577653984465e-06, "epoch": 6.139130434782609, "percentage": 87.7, "elapsed_time": "5:18:31", "remaining_time": "0:44:39"}
|
||||
{"current_steps": 3535, "total_steps": 4025, "loss": 0.3136, "lr": 1.7864541858422103e-06, "epoch": 6.147826086956521, "percentage": 87.83, "elapsed_time": "5:18:47", "remaining_time": "0:44:11"}
|
||||
{"current_steps": 3540, "total_steps": 4025, "loss": 0.3198, "lr": 1.7507931663127676e-06, "epoch": 6.156521739130435, "percentage": 87.95, "elapsed_time": "5:19:13", "remaining_time": "0:43:44"}
|
||||
{"current_steps": 3545, "total_steps": 4025, "loss": 0.314, "lr": 1.7154753775219047e-06, "epoch": 6.165217391304348, "percentage": 88.07, "elapsed_time": "5:19:35", "remaining_time": "0:43:16"}
|
||||
{"current_steps": 3550, "total_steps": 4025, "loss": 0.3328, "lr": 1.6805014837259293e-06, "epoch": 6.173913043478261, "percentage": 88.2, "elapsed_time": "5:19:59", "remaining_time": "0:42:48"}
|
||||
{"current_steps": 3555, "total_steps": 4025, "loss": 0.3034, "lr": 1.6458721427131718e-06, "epoch": 6.182608695652174, "percentage": 88.32, "elapsed_time": "5:20:22", "remaining_time": "0:42:21"}
|
||||
{"current_steps": 3560, "total_steps": 4025, "loss": 0.3055, "lr": 1.611588005791629e-06, "epoch": 6.191304347826087, "percentage": 88.45, "elapsed_time": "5:20:45", "remaining_time": "0:41:53"}
|
||||
{"current_steps": 3565, "total_steps": 4025, "loss": 0.285, "lr": 1.5776497177767059e-06, "epoch": 6.2, "percentage": 88.57, "elapsed_time": "5:21:05", "remaining_time": "0:41:25"}
|
||||
{"current_steps": 3570, "total_steps": 4025, "loss": 0.2978, "lr": 1.5440579169790826e-06, "epoch": 6.208695652173913, "percentage": 88.7, "elapsed_time": "5:21:24", "remaining_time": "0:40:57"}
|
||||
{"current_steps": 3575, "total_steps": 4025, "loss": 0.3112, "lr": 1.510813235192714e-06, "epoch": 6.217391304347826, "percentage": 88.82, "elapsed_time": "5:21:49", "remaining_time": "0:40:30"}
|
||||
{"current_steps": 3580, "total_steps": 4025, "loss": 0.2996, "lr": 1.4779162976829508e-06, "epoch": 6.226086956521739, "percentage": 88.94, "elapsed_time": "5:22:09", "remaining_time": "0:40:02"}
|
||||
{"current_steps": 3585, "total_steps": 4025, "loss": 0.2913, "lr": 1.4453677231747754e-06, "epoch": 6.234782608695652, "percentage": 89.07, "elapsed_time": "5:22:35", "remaining_time": "0:39:35"}
|
||||
{"current_steps": 3590, "total_steps": 4025, "loss": 0.2994, "lr": 1.4131681238411642e-06, "epoch": 6.243478260869566, "percentage": 89.19, "elapsed_time": "5:22:58", "remaining_time": "0:39:08"}
|
||||
{"current_steps": 3595, "total_steps": 4025, "loss": 0.2749, "lr": 1.3813181052915737e-06, "epoch": 6.252173913043478, "percentage": 89.32, "elapsed_time": "5:23:17", "remaining_time": "0:38:40"}
|
||||
{"current_steps": 3600, "total_steps": 4025, "loss": 0.2988, "lr": 1.3498182665605496e-06, "epoch": 6.260869565217392, "percentage": 89.44, "elapsed_time": "5:23:38", "remaining_time": "0:38:12"}
|
||||
{"current_steps": 3605, "total_steps": 4025, "loss": 0.2956, "lr": 1.3186692000964763e-06, "epoch": 6.269565217391304, "percentage": 89.57, "elapsed_time": "5:24:01", "remaining_time": "0:37:44"}
|
||||
{"current_steps": 3610, "total_steps": 4025, "loss": 0.2961, "lr": 1.287871491750401e-06, "epoch": 6.278260869565218, "percentage": 89.69, "elapsed_time": "5:24:40", "remaining_time": "0:37:19"}
|
||||
{"current_steps": 3615, "total_steps": 4025, "loss": 0.2873, "lr": 1.2574257207650441e-06, "epoch": 6.28695652173913, "percentage": 89.81, "elapsed_time": "5:25:15", "remaining_time": "0:36:53"}
|
||||
{"current_steps": 3620, "total_steps": 4025, "loss": 0.2911, "lr": 1.2273324597638969e-06, "epoch": 6.2956521739130435, "percentage": 89.94, "elapsed_time": "5:25:48", "remaining_time": "0:36:27"}
|
||||
{"current_steps": 3625, "total_steps": 4025, "loss": 0.2852, "lr": 1.1975922747404401e-06, "epoch": 6.304347826086957, "percentage": 90.06, "elapsed_time": "5:26:21", "remaining_time": "0:36:00"}
|
||||
{"current_steps": 3630, "total_steps": 4025, "loss": 0.3232, "lr": 1.1682057250475177e-06, "epoch": 6.3130434782608695, "percentage": 90.19, "elapsed_time": "5:26:53", "remaining_time": "0:35:34"}
|
||||
{"current_steps": 3635, "total_steps": 4025, "loss": 0.3037, "lr": 1.1391733633867985e-06, "epoch": 6.321739130434783, "percentage": 90.31, "elapsed_time": "5:27:32", "remaining_time": "0:35:08"}
|
||||
{"current_steps": 3640, "total_steps": 4025, "loss": 0.2798, "lr": 1.1104957357984002e-06, "epoch": 6.3304347826086955, "percentage": 90.43, "elapsed_time": "5:28:06", "remaining_time": "0:34:42"}
|
||||
{"current_steps": 3645, "total_steps": 4025, "loss": 0.3184, "lr": 1.0821733816505998e-06, "epoch": 6.339130434782609, "percentage": 90.56, "elapsed_time": "5:28:31", "remaining_time": "0:34:14"}
|
||||
{"current_steps": 3650, "total_steps": 4025, "loss": 0.2902, "lr": 1.0542068336297007e-06, "epoch": 6.3478260869565215, "percentage": 90.68, "elapsed_time": "5:29:09", "remaining_time": "0:33:49"}
|
||||
{"current_steps": 3655, "total_steps": 4025, "loss": 0.2788, "lr": 1.0265966177300113e-06, "epoch": 6.356521739130435, "percentage": 90.81, "elapsed_time": "5:29:38", "remaining_time": "0:33:22"}
|
||||
{"current_steps": 3660, "total_steps": 4025, "loss": 0.2879, "lr": 9.99343253243954e-07, "epoch": 6.3652173913043475, "percentage": 90.93, "elapsed_time": "5:30:08", "remaining_time": "0:32:55"}
|
||||
{"current_steps": 3665, "total_steps": 4025, "loss": 0.2895, "lr": 9.724472527522933e-07, "epoch": 6.373913043478261, "percentage": 91.06, "elapsed_time": "5:30:37", "remaining_time": "0:32:28"}
|
||||
{"current_steps": 3670, "total_steps": 4025, "loss": 0.2895, "lr": 9.459091221145011e-07, "epoch": 6.3826086956521735, "percentage": 91.18, "elapsed_time": "5:31:11", "remaining_time": "0:32:02"}
|
||||
{"current_steps": 3675, "total_steps": 4025, "loss": 0.3041, "lr": 9.197293604592383e-07, "epoch": 6.391304347826087, "percentage": 91.3, "elapsed_time": "5:31:44", "remaining_time": "0:31:35"}
|
||||
{"current_steps": 3680, "total_steps": 4025, "loss": 0.2874, "lr": 8.939084601749681e-07, "epoch": 6.4, "percentage": 91.43, "elapsed_time": "5:32:17", "remaining_time": "0:31:09"}
|
||||
{"current_steps": 3685, "total_steps": 4025, "loss": 0.2361, "lr": 8.684469069006995e-07, "epoch": 6.408695652173913, "percentage": 91.55, "elapsed_time": "5:32:47", "remaining_time": "0:30:42"}
|
||||
{"current_steps": 3690, "total_steps": 4025, "loss": 0.2051, "lr": 8.433451795168479e-07, "epoch": 6.417391304347826, "percentage": 91.68, "elapsed_time": "5:33:18", "remaining_time": "0:30:15"}
|
||||
{"current_steps": 3695, "total_steps": 4025, "loss": 0.1935, "lr": 8.186037501362265e-07, "epoch": 6.426086956521739, "percentage": 91.8, "elapsed_time": "5:33:50", "remaining_time": "0:29:48"}
|
||||
{"current_steps": 3700, "total_steps": 4025, "loss": 0.1976, "lr": 7.942230840951737e-07, "epoch": 6.434782608695652, "percentage": 91.93, "elapsed_time": "5:34:19", "remaining_time": "0:29:21"}
|
||||
{"current_steps": 3705, "total_steps": 4025, "loss": 0.1849, "lr": 7.702036399447998e-07, "epoch": 6.443478260869565, "percentage": 92.05, "elapsed_time": "5:34:48", "remaining_time": "0:28:55"}
|
||||
{"current_steps": 3710, "total_steps": 4025, "loss": 0.2089, "lr": 7.465458694423544e-07, "epoch": 6.452173913043478, "percentage": 92.17, "elapsed_time": "5:35:18", "remaining_time": "0:28:28"}
|
||||
{"current_steps": 3715, "total_steps": 4025, "loss": 0.196, "lr": 7.232502175427436e-07, "epoch": 6.460869565217392, "percentage": 92.3, "elapsed_time": "5:35:51", "remaining_time": "0:28:01"}
|
||||
{"current_steps": 3720, "total_steps": 4025, "loss": 0.1893, "lr": 7.00317122390144e-07, "epoch": 6.469565217391304, "percentage": 92.42, "elapsed_time": "5:36:20", "remaining_time": "0:27:34"}
|
||||
{"current_steps": 3725, "total_steps": 4025, "loss": 0.1925, "lr": 6.777470153097865e-07, "epoch": 6.478260869565218, "percentage": 92.55, "elapsed_time": "5:36:51", "remaining_time": "0:27:07"}
|
||||
{"current_steps": 3730, "total_steps": 4025, "loss": 0.2014, "lr": 6.555403207998123e-07, "epoch": 6.48695652173913, "percentage": 92.67, "elapsed_time": "5:37:20", "remaining_time": "0:26:40"}
|
||||
{"current_steps": 3735, "total_steps": 4025, "loss": 0.2073, "lr": 6.336974565233167e-07, "epoch": 6.495652173913044, "percentage": 92.8, "elapsed_time": "5:37:53", "remaining_time": "0:26:14"}
|
||||
{"current_steps": 3740, "total_steps": 4025, "loss": 0.192, "lr": 6.122188333004819e-07, "epoch": 6.504347826086956, "percentage": 92.92, "elapsed_time": "5:38:26", "remaining_time": "0:25:47"}
|
||||
{"current_steps": 3745, "total_steps": 4025, "loss": 0.1811, "lr": 5.911048551008458e-07, "epoch": 6.51304347826087, "percentage": 93.04, "elapsed_time": "5:38:57", "remaining_time": "0:25:20"}
|
||||
{"current_steps": 3750, "total_steps": 4025, "loss": 0.2031, "lr": 5.703559190357122e-07, "epoch": 6.521739130434782, "percentage": 93.17, "elapsed_time": "5:39:34", "remaining_time": "0:24:54"}
|
||||
{"current_steps": 3755, "total_steps": 4025, "loss": 0.19, "lr": 5.499724153506836e-07, "epoch": 6.530434782608696, "percentage": 93.29, "elapsed_time": "5:40:07", "remaining_time": "0:24:27"}
|
||||
{"current_steps": 3760, "total_steps": 4025, "loss": 0.1842, "lr": 5.299547274183115e-07, "epoch": 6.539130434782608, "percentage": 93.42, "elapsed_time": "5:40:35", "remaining_time": "0:24:00"}
|
||||
{"current_steps": 3765, "total_steps": 4025, "loss": 0.1689, "lr": 5.103032317308932e-07, "epoch": 6.547826086956522, "percentage": 93.54, "elapsed_time": "5:41:00", "remaining_time": "0:23:32"}
|
||||
{"current_steps": 3770, "total_steps": 4025, "loss": 0.1736, "lr": 4.910182978933909e-07, "epoch": 6.556521739130435, "percentage": 93.66, "elapsed_time": "5:41:25", "remaining_time": "0:23:05"}
|
||||
{"current_steps": 3775, "total_steps": 4025, "loss": 0.167, "lr": 4.721002886164772e-07, "epoch": 6.565217391304348, "percentage": 93.79, "elapsed_time": "5:41:48", "remaining_time": "0:22:38"}
|
||||
{"current_steps": 3780, "total_steps": 4025, "loss": 0.1616, "lr": 4.5354955970971395e-07, "epoch": 6.573913043478261, "percentage": 93.91, "elapsed_time": "5:42:11", "remaining_time": "0:22:10"}
|
||||
{"current_steps": 3785, "total_steps": 4025, "loss": 0.1659, "lr": 4.353664600748597e-07, "epoch": 6.582608695652174, "percentage": 94.04, "elapsed_time": "5:42:35", "remaining_time": "0:21:43"}
|
||||
{"current_steps": 3790, "total_steps": 4025, "loss": 0.1753, "lr": 4.1755133169931293e-07, "epoch": 6.591304347826087, "percentage": 94.16, "elapsed_time": "5:42:58", "remaining_time": "0:21:15"}
|
||||
{"current_steps": 3795, "total_steps": 4025, "loss": 0.1746, "lr": 4.001045096496725e-07, "epoch": 6.6, "percentage": 94.29, "elapsed_time": "5:43:25", "remaining_time": "0:20:48"}
|
||||
{"current_steps": 3800, "total_steps": 4025, "loss": 0.1774, "lr": 3.83026322065434e-07, "epoch": 6.608695652173913, "percentage": 94.41, "elapsed_time": "5:43:52", "remaining_time": "0:20:21"}
|
||||
{"current_steps": 3805, "total_steps": 4025, "loss": 0.1734, "lr": 3.6631709015283454e-07, "epoch": 6.6173913043478265, "percentage": 94.53, "elapsed_time": "5:44:18", "remaining_time": "0:19:54"}
|
||||
{"current_steps": 3810, "total_steps": 4025, "loss": 0.179, "lr": 3.4997712817879116e-07, "epoch": 6.626086956521739, "percentage": 94.66, "elapsed_time": "5:44:45", "remaining_time": "0:19:27"}
|
||||
{"current_steps": 3815, "total_steps": 4025, "loss": 0.1673, "lr": 3.340067434650007e-07, "epoch": 6.6347826086956525, "percentage": 94.78, "elapsed_time": "5:45:11", "remaining_time": "0:19:00"}
|
||||
{"current_steps": 3820, "total_steps": 4025, "loss": 0.1742, "lr": 3.184062363821583e-07, "epoch": 6.643478260869565, "percentage": 94.91, "elapsed_time": "5:45:40", "remaining_time": "0:18:33"}
|
||||
{"current_steps": 3825, "total_steps": 4025, "loss": 0.161, "lr": 3.03175900344308e-07, "epoch": 6.6521739130434785, "percentage": 95.03, "elapsed_time": "5:46:03", "remaining_time": "0:18:05"}
|
||||
{"current_steps": 3830, "total_steps": 4025, "loss": 0.156, "lr": 2.8831602180332096e-07, "epoch": 6.660869565217391, "percentage": 95.16, "elapsed_time": "5:46:29", "remaining_time": "0:17:38"}
|
||||
{"current_steps": 3835, "total_steps": 4025, "loss": 0.211, "lr": 2.738268802435151e-07, "epoch": 6.6695652173913045, "percentage": 95.28, "elapsed_time": "5:47:03", "remaining_time": "0:17:11"}
|
||||
{"current_steps": 3840, "total_steps": 4025, "loss": 0.2475, "lr": 2.597087481763905e-07, "epoch": 6.678260869565217, "percentage": 95.4, "elapsed_time": "5:47:44", "remaining_time": "0:16:45"}
|
||||
{"current_steps": 3845, "total_steps": 4025, "loss": 0.2521, "lr": 2.459618911355133e-07, "epoch": 6.6869565217391305, "percentage": 95.53, "elapsed_time": "5:48:28", "remaining_time": "0:16:18"}
|
||||
{"current_steps": 3850, "total_steps": 4025, "loss": 0.2419, "lr": 2.3258656767150888e-07, "epoch": 6.695652173913043, "percentage": 95.65, "elapsed_time": "5:49:12", "remaining_time": "0:15:52"}
|
||||
{"current_steps": 3855, "total_steps": 4025, "loss": 0.2324, "lr": 2.1958302934720787e-07, "epoch": 6.7043478260869565, "percentage": 95.78, "elapsed_time": "5:49:56", "remaining_time": "0:15:25"}
|
||||
{"current_steps": 3860, "total_steps": 4025, "loss": 0.2407, "lr": 2.0695152073291648e-07, "epoch": 6.71304347826087, "percentage": 95.9, "elapsed_time": "5:50:39", "remaining_time": "0:14:59"}
|
||||
{"current_steps": 3865, "total_steps": 4025, "loss": 0.2459, "lr": 1.9469227940180912e-07, "epoch": 6.721739130434782, "percentage": 96.02, "elapsed_time": "5:51:20", "remaining_time": "0:14:32"}
|
||||
{"current_steps": 3870, "total_steps": 4025, "loss": 0.2476, "lr": 1.828055359254699e-07, "epoch": 6.730434782608696, "percentage": 96.15, "elapsed_time": "5:52:05", "remaining_time": "0:14:06"}
|
||||
{"current_steps": 3875, "total_steps": 4025, "loss": 0.2769, "lr": 1.7129151386954256e-07, "epoch": 6.739130434782608, "percentage": 96.27, "elapsed_time": "5:52:40", "remaining_time": "0:13:39"}
|
||||
{"current_steps": 3880, "total_steps": 4025, "loss": 0.325, "lr": 1.601504297895362e-07, "epoch": 6.747826086956522, "percentage": 96.4, "elapsed_time": "5:53:11", "remaining_time": "0:13:11"}
|
||||
{"current_steps": 3885, "total_steps": 4025, "loss": 0.3066, "lr": 1.4938249322675514e-07, "epoch": 6.756521739130434, "percentage": 96.52, "elapsed_time": "5:53:38", "remaining_time": "0:12:44"}
|
||||
{"current_steps": 3890, "total_steps": 4025, "loss": 0.2976, "lr": 1.3898790670434205e-07, "epoch": 6.765217391304348, "percentage": 96.65, "elapsed_time": "5:54:03", "remaining_time": "0:12:17"}
|
||||
{"current_steps": 3895, "total_steps": 4025, "loss": 0.3047, "lr": 1.2896686572348994e-07, "epoch": 6.773913043478261, "percentage": 96.77, "elapsed_time": "5:54:29", "remaining_time": "0:11:49"}
|
||||
{"current_steps": 3900, "total_steps": 4025, "loss": 0.3013, "lr": 1.193195587597473e-07, "epoch": 6.782608695652174, "percentage": 96.89, "elapsed_time": "5:54:57", "remaining_time": "0:11:22"}
|
||||
{"current_steps": 3905, "total_steps": 4025, "loss": 0.322, "lr": 1.100461672594788e-07, "epoch": 6.791304347826087, "percentage": 97.02, "elapsed_time": "5:55:24", "remaining_time": "0:10:55"}
|
||||
{"current_steps": 3910, "total_steps": 4025, "loss": 0.2962, "lr": 1.0114686563645893e-07, "epoch": 6.8, "percentage": 97.14, "elapsed_time": "5:55:51", "remaining_time": "0:10:27"}
|
||||
{"current_steps": 3915, "total_steps": 4025, "loss": 0.2967, "lr": 9.262182126858809e-08, "epoch": 6.808695652173913, "percentage": 97.27, "elapsed_time": "5:56:17", "remaining_time": "0:10:00"}
|
||||
{"current_steps": 3920, "total_steps": 4025, "loss": 0.3077, "lr": 8.447119449473518e-08, "epoch": 6.817391304347826, "percentage": 97.39, "elapsed_time": "5:56:44", "remaining_time": "0:09:33"}
|
||||
{"current_steps": 3925, "total_steps": 4025, "loss": 0.3075, "lr": 7.669513861173761e-08, "epoch": 6.826086956521739, "percentage": 97.52, "elapsed_time": "5:57:12", "remaining_time": "0:09:06"}
|
||||
{"current_steps": 3930, "total_steps": 4025, "loss": 0.3004, "lr": 6.929379987151041e-08, "epoch": 6.834782608695652, "percentage": 97.64, "elapsed_time": "5:57:39", "remaining_time": "0:08:38"}
|
||||
{"current_steps": 3935, "total_steps": 4025, "loss": 0.2924, "lr": 6.226731747829062e-08, "epoch": 6.843478260869565, "percentage": 97.76, "elapsed_time": "5:58:09", "remaining_time": "0:08:11"}
|
||||
{"current_steps": 3940, "total_steps": 4025, "loss": 0.3062, "lr": 5.561582358602602e-08, "epoch": 6.852173913043478, "percentage": 97.89, "elapsed_time": "5:58:40", "remaining_time": "0:07:44"}
|
||||
{"current_steps": 3945, "total_steps": 4025, "loss": 0.2725, "lr": 4.9339443295888244e-08, "epoch": 6.860869565217391, "percentage": 98.01, "elapsed_time": "5:59:12", "remaining_time": "0:07:17"}
|
||||
{"current_steps": 3950, "total_steps": 4025, "loss": 0.2608, "lr": 4.343829465392357e-08, "epoch": 6.869565217391305, "percentage": 98.14, "elapsed_time": "5:59:38", "remaining_time": "0:06:49"}
|
||||
{"current_steps": 3955, "total_steps": 4025, "loss": 0.1866, "lr": 3.7912488648821354e-08, "epoch": 6.878260869565217, "percentage": 98.26, "elapsed_time": "5:59:54", "remaining_time": "0:06:22"}
|
||||
{"current_steps": 3960, "total_steps": 4025, "loss": 0.1738, "lr": 3.27621292098379e-08, "epoch": 6.886956521739131, "percentage": 98.39, "elapsed_time": "6:00:13", "remaining_time": "0:05:54"}
|
||||
{"current_steps": 3965, "total_steps": 4025, "loss": 0.1979, "lr": 2.7987313204833612e-08, "epoch": 6.895652173913043, "percentage": 98.51, "elapsed_time": "6:00:30", "remaining_time": "0:05:27"}
|
||||
{"current_steps": 3970, "total_steps": 4025, "loss": 0.1874, "lr": 2.3588130438463307e-08, "epoch": 6.904347826086957, "percentage": 98.63, "elapsed_time": "6:00:47", "remaining_time": "0:04:59"}
|
||||
{"current_steps": 3975, "total_steps": 4025, "loss": 0.182, "lr": 1.956466365047094e-08, "epoch": 6.913043478260869, "percentage": 98.76, "elapsed_time": "6:01:09", "remaining_time": "0:04:32"}
|
||||
{"current_steps": 3980, "total_steps": 4025, "loss": 0.1867, "lr": 1.5916988514144137e-08, "epoch": 6.921739130434783, "percentage": 98.88, "elapsed_time": "6:01:34", "remaining_time": "0:04:05"}
|
||||
{"current_steps": 3985, "total_steps": 4025, "loss": 0.1882, "lr": 1.2645173634886487e-08, "epoch": 6.930434782608696, "percentage": 99.01, "elapsed_time": "6:01:51", "remaining_time": "0:03:37"}
|
||||
{"current_steps": 3990, "total_steps": 4025, "loss": 0.1914, "lr": 9.749280548934093e-09, "epoch": 6.939130434782609, "percentage": 99.13, "elapsed_time": "6:02:10", "remaining_time": "0:03:10"}
|
||||
{"current_steps": 3995, "total_steps": 4025, "loss": 0.1695, "lr": 7.2293637221876325e-09, "epoch": 6.947826086956522, "percentage": 99.25, "elapsed_time": "6:02:30", "remaining_time": "0:02:43"}
|
||||
{"current_steps": 4000, "total_steps": 4025, "loss": 0.1673, "lr": 5.085470549195392e-09, "epoch": 6.956521739130435, "percentage": 99.38, "elapsed_time": "6:02:50", "remaining_time": "0:02:16"}
|
||||
{"current_steps": 4005, "total_steps": 4025, "loss": 0.2193, "lr": 3.3176413522606475e-09, "epoch": 6.965217391304348, "percentage": 99.5, "elapsed_time": "6:03:10", "remaining_time": "0:01:48"}
|
||||
{"current_steps": 4010, "total_steps": 4025, "loss": 0.1673, "lr": 1.9259093806800464e-09, "epoch": 6.973913043478261, "percentage": 99.63, "elapsed_time": "6:03:28", "remaining_time": "0:01:21"}
|
||||
{"current_steps": 4015, "total_steps": 4025, "loss": 0.2037, "lr": 9.103008101241095e-10, "epoch": 6.982608695652174, "percentage": 99.75, "elapsed_time": "6:03:48", "remaining_time": "0:00:54"}
|
||||
{"current_steps": 4020, "total_steps": 4025, "loss": 0.1744, "lr": 2.708347421376267e-10, "epoch": 6.9913043478260875, "percentage": 99.88, "elapsed_time": "6:04:07", "remaining_time": "0:00:27"}
|
||||
{"current_steps": 4025, "total_steps": 4025, "loss": 0.2041, "lr": 7.523203788828426e-12, "epoch": 7.0, "percentage": 100.0, "elapsed_time": "6:04:31", "remaining_time": "0:00:00"}
|
||||
{"current_steps": 4025, "total_steps": 4025, "epoch": 7.0, "percentage": 100.0, "elapsed_time": "6:05:00", "remaining_time": "0:00:00"}
|
||||
BIN
training_loss.png
Normal file
BIN
training_loss.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 57 KiB |
1
vocab.json
Normal file
1
vocab.json
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user