初始化项目,由ModelHub XC社区提供模型
Model: DCAgent/b1_top16 Source: Original Platform
This commit is contained in:
36
.gitattributes
vendored
Normal file
36
.gitattributes
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||
60
README.md
Normal file
60
README.md
Normal file
@@ -0,0 +1,60 @@
|
||||
---
|
||||
library_name: transformers
|
||||
license: other
|
||||
base_model: Qwen/Qwen3-8B
|
||||
tags:
|
||||
- llama-factory
|
||||
- full
|
||||
- generated_from_trainer
|
||||
model-index:
|
||||
- name: sft_b1_top16__Qwen3-8B
|
||||
results: []
|
||||
---
|
||||
|
||||
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
||||
should probably proofread and complete it, then remove this comment. -->
|
||||
|
||||
# sft_b1_top16__Qwen3-8B
|
||||
|
||||
This model is a fine-tuned version of [Qwen/Qwen3-8B](https://huggingface.co/Qwen/Qwen3-8B) on the /e/scratch/jureap59/raoof1/sft_data/hf_hub/datasets--DCAgent--b1_top16/snapshots/2be82814777f95e38b73694deed12e34f91ca466_thinking_preprocessed dataset.
|
||||
|
||||
## Model description
|
||||
|
||||
More information needed
|
||||
|
||||
## Intended uses & limitations
|
||||
|
||||
More information needed
|
||||
|
||||
## Training and evaluation data
|
||||
|
||||
More information needed
|
||||
|
||||
## Training procedure
|
||||
|
||||
### Training hyperparameters
|
||||
|
||||
The following hyperparameters were used during training:
|
||||
- learning_rate: 4e-05
|
||||
- train_batch_size: 1
|
||||
- eval_batch_size: 8
|
||||
- seed: 42
|
||||
- distributed_type: multi-GPU
|
||||
- num_devices: 16
|
||||
- total_train_batch_size: 16
|
||||
- total_eval_batch_size: 128
|
||||
- optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.98) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
||||
- lr_scheduler_type: cosine
|
||||
- lr_scheduler_warmup_ratio: 0.1
|
||||
- num_epochs: 7.0
|
||||
|
||||
### Training results
|
||||
|
||||
|
||||
|
||||
### Framework versions
|
||||
|
||||
- Transformers 4.57.6
|
||||
- Pytorch 2.9.1+cu130
|
||||
- Datasets 4.7.0
|
||||
- Tokenizers 0.22.2
|
||||
28
added_tokens.json
Normal file
28
added_tokens.json
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"</think>": 151668,
|
||||
"</tool_call>": 151658,
|
||||
"</tool_response>": 151666,
|
||||
"<think>": 151667,
|
||||
"<tool_call>": 151657,
|
||||
"<tool_response>": 151665,
|
||||
"<|box_end|>": 151649,
|
||||
"<|box_start|>": 151648,
|
||||
"<|endoftext|>": 151643,
|
||||
"<|file_sep|>": 151664,
|
||||
"<|fim_middle|>": 151660,
|
||||
"<|fim_pad|>": 151662,
|
||||
"<|fim_prefix|>": 151659,
|
||||
"<|fim_suffix|>": 151661,
|
||||
"<|im_end|>": 151645,
|
||||
"<|im_start|>": 151644,
|
||||
"<|image_pad|>": 151655,
|
||||
"<|object_ref_end|>": 151647,
|
||||
"<|object_ref_start|>": 151646,
|
||||
"<|quad_end|>": 151651,
|
||||
"<|quad_start|>": 151650,
|
||||
"<|repo_name|>": 151663,
|
||||
"<|video_pad|>": 151656,
|
||||
"<|vision_end|>": 151653,
|
||||
"<|vision_pad|>": 151654,
|
||||
"<|vision_start|>": 151652
|
||||
}
|
||||
16
all_results.json
Normal file
16
all_results.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"achieved_tflops_per_gpu": 0.0031050564450395435,
|
||||
"achieved_tflops_per_gpu_theoretical": 501.4098752978288,
|
||||
"epoch": 7.0,
|
||||
"loss_nan_ranks": 0,
|
||||
"loss_rank_avg": 0.23596960306167603,
|
||||
"mfu_percent": 0.00021943861802399602,
|
||||
"mfu_percent_theoretical": 35.43532687617165,
|
||||
"total_flos": 1290449162534912.0,
|
||||
"train_loss": 0.2838498419017905,
|
||||
"train_runtime": 25974.7525,
|
||||
"train_samples_per_second": 2.373,
|
||||
"train_steps_per_second": 0.148,
|
||||
"valid_targets_mean": 3951.1,
|
||||
"valid_targets_min": 1262
|
||||
}
|
||||
89
chat_template.jinja
Normal file
89
chat_template.jinja
Normal file
@@ -0,0 +1,89 @@
|
||||
{%- if tools %}
|
||||
{{- '<|im_start|>system\n' }}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- messages[0].content + '\n\n' }}
|
||||
{%- endif %}
|
||||
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
||||
{%- for tool in tools %}
|
||||
{{- "\n" }}
|
||||
{{- tool | tojson }}
|
||||
{%- endfor %}
|
||||
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
||||
{%- else %}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
|
||||
{%- for message in messages[::-1] %}
|
||||
{%- set index = (messages|length - 1) - loop.index0 %}
|
||||
{%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
|
||||
{%- set ns.multi_step_tool = false %}
|
||||
{%- set ns.last_query_index = index %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- for message in messages %}
|
||||
{%- if message.content is string %}
|
||||
{%- set content = message.content %}
|
||||
{%- else %}
|
||||
{%- set content = '' %}
|
||||
{%- endif %}
|
||||
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
|
||||
{%- elif message.role == "assistant" %}
|
||||
{%- set reasoning_content = '' %}
|
||||
{%- if message.reasoning_content is string %}
|
||||
{%- set reasoning_content = message.reasoning_content %}
|
||||
{%- else %}
|
||||
{%- if '</think>' in content %}
|
||||
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
||||
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- if loop.index0 > ns.last_query_index %}
|
||||
{%- if loop.last or (not loop.last and reasoning_content) %}
|
||||
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- if message.tool_calls %}
|
||||
{%- for tool_call in message.tool_calls %}
|
||||
{%- if (loop.first and content) or (not loop.first) %}
|
||||
{{- '\n' }}
|
||||
{%- endif %}
|
||||
{%- if tool_call.function %}
|
||||
{%- set tool_call = tool_call.function %}
|
||||
{%- endif %}
|
||||
{{- '<tool_call>\n{"name": "' }}
|
||||
{{- tool_call.name }}
|
||||
{{- '", "arguments": ' }}
|
||||
{%- if tool_call.arguments is string %}
|
||||
{{- tool_call.arguments }}
|
||||
{%- else %}
|
||||
{{- tool_call.arguments | tojson }}
|
||||
{%- endif %}
|
||||
{{- '}\n</tool_call>' }}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- elif message.role == "tool" %}
|
||||
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
||||
{{- '<|im_start|>user' }}
|
||||
{%- endif %}
|
||||
{{- '\n<tool_response>\n' }}
|
||||
{{- content }}
|
||||
{{- '\n</tool_response>' }}
|
||||
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- if add_generation_prompt %}
|
||||
{{- '<|im_start|>assistant\n' }}
|
||||
{%- if enable_thinking is defined and enable_thinking is false %}
|
||||
{{- '<think>\n\n</think>\n\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
68
config.json
Normal file
68
config.json
Normal file
@@ -0,0 +1,68 @@
|
||||
{
|
||||
"architectures": [
|
||||
"Qwen3ForCausalLM"
|
||||
],
|
||||
"attention_bias": false,
|
||||
"attention_dropout": 0.0,
|
||||
"dtype": "bfloat16",
|
||||
"eos_token_id": 151645,
|
||||
"head_dim": 128,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 4096,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 12288,
|
||||
"layer_types": [
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention"
|
||||
],
|
||||
"max_position_embeddings": 40960,
|
||||
"max_window_layers": 36,
|
||||
"model_type": "qwen3",
|
||||
"num_attention_heads": 32,
|
||||
"num_hidden_layers": 36,
|
||||
"num_key_value_heads": 8,
|
||||
"pad_token_id": 151643,
|
||||
"rms_norm_eps": 1e-06,
|
||||
"rope_scaling": null,
|
||||
"rope_theta": 1000000,
|
||||
"sliding_window": null,
|
||||
"tie_word_embeddings": false,
|
||||
"transformers_version": "4.57.6",
|
||||
"use_cache": false,
|
||||
"use_sliding_window": false,
|
||||
"vocab_size": 151936
|
||||
}
|
||||
12
generation_config.json
Normal file
12
generation_config.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"do_sample": true,
|
||||
"eos_token_id": [
|
||||
151645,
|
||||
151643
|
||||
],
|
||||
"pad_token_id": 151643,
|
||||
"temperature": 0.6,
|
||||
"top_k": 20,
|
||||
"top_p": 0.95,
|
||||
"transformers_version": "4.57.6"
|
||||
}
|
||||
151388
merges.txt
Normal file
151388
merges.txt
Normal file
File diff suppressed because it is too large
Load Diff
3
model-00001-of-00004.safetensors
Normal file
3
model-00001-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:125c021442172c21db14dc58cc0001a85f40548a8e3cbb9c8ad4ef478f63c6d3
|
||||
size 4902257696
|
||||
3
model-00002-of-00004.safetensors
Normal file
3
model-00002-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:83207c0cdcebac7dcad804f46913aea64d3d992a2b922990e2b659c03c024f0a
|
||||
size 4915960368
|
||||
3
model-00003-of-00004.safetensors
Normal file
3
model-00003-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:265269dd92046373e3cde20dba6252d4dad4d4069ac4938814da154f312b6616
|
||||
size 4983068496
|
||||
3
model-00004-of-00004.safetensors
Normal file
3
model-00004-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2eec87484deed84a5eb44cd6185057c903da808144c94305403abdcf07167d91
|
||||
size 1580230264
|
||||
407
model.safetensors.index.json
Normal file
407
model.safetensors.index.json
Normal file
@@ -0,0 +1,407 @@
|
||||
{
|
||||
"metadata": {
|
||||
"total_parameters": 308224,
|
||||
"total_size": 16381470720
|
||||
},
|
||||
"weight_map": {
|
||||
"lm_head.weight": "model-00004-of-00004.safetensors",
|
||||
"model.embed_tokens.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.35.input_layernorm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.q_norm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.norm.weight": "model-00004-of-00004.safetensors"
|
||||
}
|
||||
}
|
||||
12
run_summary.json
Normal file
12
run_summary.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"agent_name": "2be82814777f95e38b73694deed12e34f91ca466_thinking_preprocessed",
|
||||
"training_start": null,
|
||||
"training_end": null,
|
||||
"created_by": "raoof1",
|
||||
"base_model_name": "Qwen/Qwen3-8B",
|
||||
"dataset_name": "/e/scratch/jureap59/raoof1/sft_data/hf_hub/datasets--DCAgent--b1_top16/snapshots/2be82814777f95e38b73694deed12e34f91ca466_thinking_preprocessed",
|
||||
"training_type": "SFT",
|
||||
"training_parameters": "https://huggingface.co/DCAgent/a1-b1_top16/blob/main/config.json",
|
||||
"wandb_link": null,
|
||||
"traces_location_s3": null
|
||||
}
|
||||
31
special_tokens_map.json
Normal file
31
special_tokens_map.json
Normal file
@@ -0,0 +1,31 @@
|
||||
{
|
||||
"additional_special_tokens": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|object_ref_start|>",
|
||||
"<|object_ref_end|>",
|
||||
"<|box_start|>",
|
||||
"<|box_end|>",
|
||||
"<|quad_start|>",
|
||||
"<|quad_end|>",
|
||||
"<|vision_start|>",
|
||||
"<|vision_end|>",
|
||||
"<|vision_pad|>",
|
||||
"<|image_pad|>",
|
||||
"<|video_pad|>"
|
||||
],
|
||||
"eos_token": {
|
||||
"content": "<|im_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"pad_token": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
}
|
||||
}
|
||||
3
tokenizer.json
Normal file
3
tokenizer.json
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
|
||||
size 11422654
|
||||
240
tokenizer_config.json
Normal file
240
tokenizer_config.json
Normal file
@@ -0,0 +1,240 @@
|
||||
{
|
||||
"add_bos_token": false,
|
||||
"add_prefix_space": false,
|
||||
"added_tokens_decoder": {
|
||||
"151643": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151644": {
|
||||
"content": "<|im_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151645": {
|
||||
"content": "<|im_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151646": {
|
||||
"content": "<|object_ref_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151647": {
|
||||
"content": "<|object_ref_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151648": {
|
||||
"content": "<|box_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151649": {
|
||||
"content": "<|box_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151650": {
|
||||
"content": "<|quad_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151651": {
|
||||
"content": "<|quad_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151652": {
|
||||
"content": "<|vision_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151653": {
|
||||
"content": "<|vision_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151654": {
|
||||
"content": "<|vision_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151655": {
|
||||
"content": "<|image_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151656": {
|
||||
"content": "<|video_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151657": {
|
||||
"content": "<tool_call>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151658": {
|
||||
"content": "</tool_call>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151659": {
|
||||
"content": "<|fim_prefix|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151660": {
|
||||
"content": "<|fim_middle|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151661": {
|
||||
"content": "<|fim_suffix|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151662": {
|
||||
"content": "<|fim_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151663": {
|
||||
"content": "<|repo_name|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151664": {
|
||||
"content": "<|file_sep|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151665": {
|
||||
"content": "<tool_response>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151666": {
|
||||
"content": "</tool_response>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151667": {
|
||||
"content": "<think>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151668": {
|
||||
"content": "</think>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
}
|
||||
},
|
||||
"additional_special_tokens": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|object_ref_start|>",
|
||||
"<|object_ref_end|>",
|
||||
"<|box_start|>",
|
||||
"<|box_end|>",
|
||||
"<|quad_start|>",
|
||||
"<|quad_end|>",
|
||||
"<|vision_start|>",
|
||||
"<|vision_end|>",
|
||||
"<|vision_pad|>",
|
||||
"<|image_pad|>",
|
||||
"<|video_pad|>"
|
||||
],
|
||||
"bos_token": null,
|
||||
"clean_up_tokenization_spaces": false,
|
||||
"eos_token": "<|im_end|>",
|
||||
"errors": "replace",
|
||||
"extra_special_tokens": {},
|
||||
"model_max_length": 32768,
|
||||
"pad_token": "<|endoftext|>",
|
||||
"padding_side": "right",
|
||||
"split_special_tokens": false,
|
||||
"tokenizer_class": "Qwen2Tokenizer",
|
||||
"unk_token": null
|
||||
}
|
||||
16
train_results.json
Normal file
16
train_results.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"achieved_tflops_per_gpu": 0.0031050564450395435,
|
||||
"achieved_tflops_per_gpu_theoretical": 501.4098752978288,
|
||||
"epoch": 7.0,
|
||||
"loss_nan_ranks": 0,
|
||||
"loss_rank_avg": 0.23596960306167603,
|
||||
"mfu_percent": 0.00021943861802399602,
|
||||
"mfu_percent_theoretical": 35.43532687617165,
|
||||
"total_flos": 1290449162534912.0,
|
||||
"train_loss": 0.2838498419017905,
|
||||
"train_runtime": 25974.7525,
|
||||
"train_samples_per_second": 2.373,
|
||||
"train_steps_per_second": 0.148,
|
||||
"valid_targets_mean": 3951.1,
|
||||
"valid_targets_min": 1262
|
||||
}
|
||||
772
trainer_log.jsonl
Normal file
772
trainer_log.jsonl
Normal file
@@ -0,0 +1,772 @@
|
||||
{"current_steps": 5, "total_steps": 3857, "loss": 0.7951, "lr": 4.1450777202072546e-07, "epoch": 0.009074410163339383, "percentage": 0.13, "elapsed_time": "0:00:39", "remaining_time": "8:29:01"}
|
||||
{"current_steps": 10, "total_steps": 3857, "loss": 0.7798, "lr": 9.326424870466322e-07, "epoch": 0.018148820326678767, "percentage": 0.26, "elapsed_time": "0:01:19", "remaining_time": "8:29:51"}
|
||||
{"current_steps": 15, "total_steps": 3857, "loss": 0.8154, "lr": 1.4507772020725389e-06, "epoch": 0.02722323049001815, "percentage": 0.39, "elapsed_time": "0:01:51", "remaining_time": "7:54:50"}
|
||||
{"current_steps": 20, "total_steps": 3857, "loss": 0.7523, "lr": 1.968911917098446e-06, "epoch": 0.036297640653357534, "percentage": 0.52, "elapsed_time": "0:02:23", "remaining_time": "7:38:42"}
|
||||
{"current_steps": 25, "total_steps": 3857, "loss": 0.6576, "lr": 2.4870466321243523e-06, "epoch": 0.045372050816696916, "percentage": 0.65, "elapsed_time": "0:02:54", "remaining_time": "7:26:22"}
|
||||
{"current_steps": 30, "total_steps": 3857, "loss": 0.6083, "lr": 3.0051813471502592e-06, "epoch": 0.0544464609800363, "percentage": 0.78, "elapsed_time": "0:03:30", "remaining_time": "7:28:22"}
|
||||
{"current_steps": 35, "total_steps": 3857, "loss": 0.5895, "lr": 3.5233160621761657e-06, "epoch": 0.06352087114337568, "percentage": 0.91, "elapsed_time": "0:04:11", "remaining_time": "7:37:11"}
|
||||
{"current_steps": 40, "total_steps": 3857, "loss": 0.4907, "lr": 4.041450777202073e-06, "epoch": 0.07259528130671507, "percentage": 1.04, "elapsed_time": "0:04:42", "remaining_time": "7:29:26"}
|
||||
{"current_steps": 45, "total_steps": 3857, "loss": 0.5394, "lr": 4.55958549222798e-06, "epoch": 0.08166969147005444, "percentage": 1.17, "elapsed_time": "0:05:14", "remaining_time": "7:24:28"}
|
||||
{"current_steps": 50, "total_steps": 3857, "loss": 0.5254, "lr": 5.077720207253887e-06, "epoch": 0.09074410163339383, "percentage": 1.3, "elapsed_time": "0:05:51", "remaining_time": "7:26:37"}
|
||||
{"current_steps": 55, "total_steps": 3857, "loss": 0.5618, "lr": 5.5958549222797934e-06, "epoch": 0.0998185117967332, "percentage": 1.43, "elapsed_time": "0:06:27", "remaining_time": "7:25:58"}
|
||||
{"current_steps": 60, "total_steps": 3857, "loss": 0.4994, "lr": 6.113989637305699e-06, "epoch": 0.1088929219600726, "percentage": 1.56, "elapsed_time": "0:06:59", "remaining_time": "7:22:35"}
|
||||
{"current_steps": 65, "total_steps": 3857, "loss": 0.5158, "lr": 6.632124352331607e-06, "epoch": 0.11796733212341198, "percentage": 1.69, "elapsed_time": "0:07:38", "remaining_time": "7:26:08"}
|
||||
{"current_steps": 70, "total_steps": 3857, "loss": 0.5032, "lr": 7.150259067357514e-06, "epoch": 0.12704174228675136, "percentage": 1.81, "elapsed_time": "0:08:11", "remaining_time": "7:23:27"}
|
||||
{"current_steps": 75, "total_steps": 3857, "loss": 0.5042, "lr": 7.66839378238342e-06, "epoch": 0.13611615245009073, "percentage": 1.94, "elapsed_time": "0:08:37", "remaining_time": "7:15:11"}
|
||||
{"current_steps": 80, "total_steps": 3857, "loss": 0.4939, "lr": 8.186528497409328e-06, "epoch": 0.14519056261343014, "percentage": 2.07, "elapsed_time": "0:09:19", "remaining_time": "7:20:22"}
|
||||
{"current_steps": 85, "total_steps": 3857, "loss": 0.4527, "lr": 8.704663212435233e-06, "epoch": 0.1542649727767695, "percentage": 2.2, "elapsed_time": "0:09:48", "remaining_time": "7:15:13"}
|
||||
{"current_steps": 90, "total_steps": 3857, "loss": 0.4718, "lr": 9.22279792746114e-06, "epoch": 0.16333938294010888, "percentage": 2.33, "elapsed_time": "0:10:23", "remaining_time": "7:15:02"}
|
||||
{"current_steps": 95, "total_steps": 3857, "loss": 0.4716, "lr": 9.740932642487048e-06, "epoch": 0.1724137931034483, "percentage": 2.46, "elapsed_time": "0:10:59", "remaining_time": "7:15:01"}
|
||||
{"current_steps": 100, "total_steps": 3857, "loss": 0.488, "lr": 1.0259067357512955e-05, "epoch": 0.18148820326678766, "percentage": 2.59, "elapsed_time": "0:11:27", "remaining_time": "7:10:46"}
|
||||
{"current_steps": 105, "total_steps": 3857, "loss": 0.4396, "lr": 1.0777202072538861e-05, "epoch": 0.19056261343012704, "percentage": 2.72, "elapsed_time": "0:12:05", "remaining_time": "7:11:59"}
|
||||
{"current_steps": 110, "total_steps": 3857, "loss": 0.4373, "lr": 1.1295336787564768e-05, "epoch": 0.1996370235934664, "percentage": 2.85, "elapsed_time": "0:12:41", "remaining_time": "7:12:12"}
|
||||
{"current_steps": 115, "total_steps": 3857, "loss": 0.4484, "lr": 1.1813471502590674e-05, "epoch": 0.20871143375680581, "percentage": 2.98, "elapsed_time": "0:13:15", "remaining_time": "7:11:25"}
|
||||
{"current_steps": 120, "total_steps": 3857, "loss": 0.4433, "lr": 1.2331606217616581e-05, "epoch": 0.2177858439201452, "percentage": 3.11, "elapsed_time": "0:13:47", "remaining_time": "7:09:33"}
|
||||
{"current_steps": 125, "total_steps": 3857, "loss": 0.4079, "lr": 1.2849740932642487e-05, "epoch": 0.22686025408348456, "percentage": 3.24, "elapsed_time": "0:14:20", "remaining_time": "7:08:07"}
|
||||
{"current_steps": 130, "total_steps": 3857, "loss": 0.3998, "lr": 1.3367875647668396e-05, "epoch": 0.23593466424682397, "percentage": 3.37, "elapsed_time": "0:14:53", "remaining_time": "7:06:59"}
|
||||
{"current_steps": 135, "total_steps": 3857, "loss": 0.4242, "lr": 1.3886010362694302e-05, "epoch": 0.24500907441016334, "percentage": 3.5, "elapsed_time": "0:15:31", "remaining_time": "7:07:58"}
|
||||
{"current_steps": 140, "total_steps": 3857, "loss": 0.4345, "lr": 1.4404145077720209e-05, "epoch": 0.2540834845735027, "percentage": 3.63, "elapsed_time": "0:16:10", "remaining_time": "7:09:24"}
|
||||
{"current_steps": 145, "total_steps": 3857, "loss": 0.4812, "lr": 1.4922279792746115e-05, "epoch": 0.2631578947368421, "percentage": 3.76, "elapsed_time": "0:16:47", "remaining_time": "7:09:53"}
|
||||
{"current_steps": 150, "total_steps": 3857, "loss": 0.449, "lr": 1.544041450777202e-05, "epoch": 0.27223230490018147, "percentage": 3.89, "elapsed_time": "0:17:20", "remaining_time": "7:08:30"}
|
||||
{"current_steps": 155, "total_steps": 3857, "loss": 0.4379, "lr": 1.595854922279793e-05, "epoch": 0.2813067150635209, "percentage": 4.02, "elapsed_time": "0:17:57", "remaining_time": "7:08:52"}
|
||||
{"current_steps": 160, "total_steps": 3857, "loss": 0.422, "lr": 1.6476683937823835e-05, "epoch": 0.29038112522686027, "percentage": 4.15, "elapsed_time": "0:18:28", "remaining_time": "7:06:47"}
|
||||
{"current_steps": 165, "total_steps": 3857, "loss": 0.4643, "lr": 1.6994818652849744e-05, "epoch": 0.29945553539019965, "percentage": 4.28, "elapsed_time": "0:19:06", "remaining_time": "7:07:41"}
|
||||
{"current_steps": 170, "total_steps": 3857, "loss": 0.446, "lr": 1.751295336787565e-05, "epoch": 0.308529945553539, "percentage": 4.41, "elapsed_time": "0:19:44", "remaining_time": "7:08:14"}
|
||||
{"current_steps": 175, "total_steps": 3857, "loss": 0.4014, "lr": 1.8031088082901555e-05, "epoch": 0.3176043557168784, "percentage": 4.54, "elapsed_time": "0:20:14", "remaining_time": "7:05:59"}
|
||||
{"current_steps": 180, "total_steps": 3857, "loss": 0.3669, "lr": 1.854922279792746e-05, "epoch": 0.32667876588021777, "percentage": 4.67, "elapsed_time": "0:20:51", "remaining_time": "7:06:06"}
|
||||
{"current_steps": 185, "total_steps": 3857, "loss": 0.4152, "lr": 1.9067357512953367e-05, "epoch": 0.33575317604355714, "percentage": 4.8, "elapsed_time": "0:21:26", "remaining_time": "7:05:41"}
|
||||
{"current_steps": 190, "total_steps": 3857, "loss": 0.4033, "lr": 1.9585492227979276e-05, "epoch": 0.3448275862068966, "percentage": 4.93, "elapsed_time": "0:22:00", "remaining_time": "7:04:40"}
|
||||
{"current_steps": 195, "total_steps": 3857, "loss": 0.4056, "lr": 2.0103626943005185e-05, "epoch": 0.35390199637023595, "percentage": 5.06, "elapsed_time": "0:22:33", "remaining_time": "7:03:42"}
|
||||
{"current_steps": 200, "total_steps": 3857, "loss": 0.4139, "lr": 2.062176165803109e-05, "epoch": 0.3629764065335753, "percentage": 5.19, "elapsed_time": "0:23:04", "remaining_time": "7:02:00"}
|
||||
{"current_steps": 205, "total_steps": 3857, "loss": 0.4191, "lr": 2.1139896373056996e-05, "epoch": 0.3720508166969147, "percentage": 5.32, "elapsed_time": "0:23:42", "remaining_time": "7:02:28"}
|
||||
{"current_steps": 210, "total_steps": 3857, "loss": 0.4028, "lr": 2.1658031088082905e-05, "epoch": 0.3811252268602541, "percentage": 5.44, "elapsed_time": "0:24:18", "remaining_time": "7:02:17"}
|
||||
{"current_steps": 215, "total_steps": 3857, "loss": 0.4125, "lr": 2.2176165803108807e-05, "epoch": 0.39019963702359345, "percentage": 5.57, "elapsed_time": "0:24:53", "remaining_time": "7:01:40"}
|
||||
{"current_steps": 220, "total_steps": 3857, "loss": 0.3849, "lr": 2.2694300518134716e-05, "epoch": 0.3992740471869328, "percentage": 5.7, "elapsed_time": "0:25:31", "remaining_time": "7:01:50"}
|
||||
{"current_steps": 225, "total_steps": 3857, "loss": 0.3952, "lr": 2.3212435233160622e-05, "epoch": 0.40834845735027225, "percentage": 5.83, "elapsed_time": "0:26:02", "remaining_time": "7:00:16"}
|
||||
{"current_steps": 230, "total_steps": 3857, "loss": 0.3935, "lr": 2.373056994818653e-05, "epoch": 0.41742286751361163, "percentage": 5.96, "elapsed_time": "0:26:37", "remaining_time": "6:59:51"}
|
||||
{"current_steps": 235, "total_steps": 3857, "loss": 0.407, "lr": 2.424870466321244e-05, "epoch": 0.426497277676951, "percentage": 6.09, "elapsed_time": "0:27:09", "remaining_time": "6:58:35"}
|
||||
{"current_steps": 240, "total_steps": 3857, "loss": 0.3863, "lr": 2.4766839378238342e-05, "epoch": 0.4355716878402904, "percentage": 6.22, "elapsed_time": "0:27:42", "remaining_time": "6:57:30"}
|
||||
{"current_steps": 245, "total_steps": 3857, "loss": 0.3936, "lr": 2.528497409326425e-05, "epoch": 0.44464609800362975, "percentage": 6.35, "elapsed_time": "0:28:21", "remaining_time": "6:58:04"}
|
||||
{"current_steps": 250, "total_steps": 3857, "loss": 0.3909, "lr": 2.5803108808290157e-05, "epoch": 0.4537205081669691, "percentage": 6.48, "elapsed_time": "0:28:57", "remaining_time": "6:57:53"}
|
||||
{"current_steps": 255, "total_steps": 3857, "loss": 0.3937, "lr": 2.6321243523316066e-05, "epoch": 0.4627949183303085, "percentage": 6.61, "elapsed_time": "0:29:35", "remaining_time": "6:57:57"}
|
||||
{"current_steps": 260, "total_steps": 3857, "loss": 0.3801, "lr": 2.6839378238341972e-05, "epoch": 0.47186932849364793, "percentage": 6.74, "elapsed_time": "0:30:12", "remaining_time": "6:57:53"}
|
||||
{"current_steps": 265, "total_steps": 3857, "loss": 0.3807, "lr": 2.7357512953367878e-05, "epoch": 0.4809437386569873, "percentage": 6.87, "elapsed_time": "0:30:48", "remaining_time": "6:57:31"}
|
||||
{"current_steps": 270, "total_steps": 3857, "loss": 0.3709, "lr": 2.7875647668393787e-05, "epoch": 0.4900181488203267, "percentage": 7.0, "elapsed_time": "0:31:20", "remaining_time": "6:56:29"}
|
||||
{"current_steps": 275, "total_steps": 3857, "loss": 0.3831, "lr": 2.839378238341969e-05, "epoch": 0.49909255898366606, "percentage": 7.13, "elapsed_time": "0:31:53", "remaining_time": "6:55:26"}
|
||||
{"current_steps": 280, "total_steps": 3857, "loss": 0.3647, "lr": 2.8911917098445598e-05, "epoch": 0.5081669691470054, "percentage": 7.26, "elapsed_time": "0:32:21", "remaining_time": "6:53:24"}
|
||||
{"current_steps": 285, "total_steps": 3857, "loss": 0.4106, "lr": 2.9430051813471504e-05, "epoch": 0.5172413793103449, "percentage": 7.39, "elapsed_time": "0:32:58", "remaining_time": "6:53:16"}
|
||||
{"current_steps": 290, "total_steps": 3857, "loss": 0.3813, "lr": 2.9948186528497413e-05, "epoch": 0.5263157894736842, "percentage": 7.52, "elapsed_time": "0:33:33", "remaining_time": "6:52:51"}
|
||||
{"current_steps": 295, "total_steps": 3857, "loss": 0.3765, "lr": 3.046632124352332e-05, "epoch": 0.5353901996370236, "percentage": 7.65, "elapsed_time": "0:34:06", "remaining_time": "6:51:46"}
|
||||
{"current_steps": 300, "total_steps": 3857, "loss": 0.377, "lr": 3.0984455958549224e-05, "epoch": 0.5444646098003629, "percentage": 7.78, "elapsed_time": "0:34:37", "remaining_time": "6:50:30"}
|
||||
{"current_steps": 305, "total_steps": 3857, "loss": 0.3817, "lr": 3.150259067357513e-05, "epoch": 0.5535390199637024, "percentage": 7.91, "elapsed_time": "0:35:12", "remaining_time": "6:50:05"}
|
||||
{"current_steps": 310, "total_steps": 3857, "loss": 0.3738, "lr": 3.2020725388601035e-05, "epoch": 0.5626134301270418, "percentage": 8.04, "elapsed_time": "0:35:46", "remaining_time": "6:49:21"}
|
||||
{"current_steps": 315, "total_steps": 3857, "loss": 0.3353, "lr": 3.2538860103626944e-05, "epoch": 0.5716878402903811, "percentage": 8.17, "elapsed_time": "0:36:25", "remaining_time": "6:49:33"}
|
||||
{"current_steps": 320, "total_steps": 3857, "loss": 0.3556, "lr": 3.305699481865285e-05, "epoch": 0.5807622504537205, "percentage": 8.3, "elapsed_time": "0:36:58", "remaining_time": "6:48:36"}
|
||||
{"current_steps": 325, "total_steps": 3857, "loss": 0.3473, "lr": 3.3575129533678756e-05, "epoch": 0.5898366606170599, "percentage": 8.43, "elapsed_time": "0:37:30", "remaining_time": "6:47:37"}
|
||||
{"current_steps": 330, "total_steps": 3857, "loss": 0.3583, "lr": 3.4093264248704665e-05, "epoch": 0.5989110707803993, "percentage": 8.56, "elapsed_time": "0:38:03", "remaining_time": "6:46:42"}
|
||||
{"current_steps": 335, "total_steps": 3857, "loss": 0.36, "lr": 3.4611398963730574e-05, "epoch": 0.6079854809437386, "percentage": 8.69, "elapsed_time": "0:38:37", "remaining_time": "6:46:07"}
|
||||
{"current_steps": 340, "total_steps": 3857, "loss": 0.3595, "lr": 3.512953367875648e-05, "epoch": 0.617059891107078, "percentage": 8.82, "elapsed_time": "0:39:17", "remaining_time": "6:46:29"}
|
||||
{"current_steps": 345, "total_steps": 3857, "loss": 0.348, "lr": 3.5647668393782385e-05, "epoch": 0.6261343012704175, "percentage": 8.94, "elapsed_time": "0:39:50", "remaining_time": "6:45:35"}
|
||||
{"current_steps": 350, "total_steps": 3857, "loss": 0.3933, "lr": 3.6165803108808294e-05, "epoch": 0.6352087114337568, "percentage": 9.07, "elapsed_time": "0:40:26", "remaining_time": "6:45:08"}
|
||||
{"current_steps": 355, "total_steps": 3857, "loss": 0.3614, "lr": 3.66839378238342e-05, "epoch": 0.6442831215970962, "percentage": 9.2, "elapsed_time": "0:41:02", "remaining_time": "6:44:56"}
|
||||
{"current_steps": 360, "total_steps": 3857, "loss": 0.3869, "lr": 3.7202072538860105e-05, "epoch": 0.6533575317604355, "percentage": 9.33, "elapsed_time": "0:41:43", "remaining_time": "6:45:15"}
|
||||
{"current_steps": 365, "total_steps": 3857, "loss": 0.355, "lr": 3.7720207253886014e-05, "epoch": 0.662431941923775, "percentage": 9.46, "elapsed_time": "0:42:18", "remaining_time": "6:44:42"}
|
||||
{"current_steps": 370, "total_steps": 3857, "loss": 0.3849, "lr": 3.8238341968911917e-05, "epoch": 0.6715063520871143, "percentage": 9.59, "elapsed_time": "0:42:57", "remaining_time": "6:44:49"}
|
||||
{"current_steps": 375, "total_steps": 3857, "loss": 0.3656, "lr": 3.8756476683937826e-05, "epoch": 0.6805807622504537, "percentage": 9.72, "elapsed_time": "0:43:32", "remaining_time": "6:44:19"}
|
||||
{"current_steps": 380, "total_steps": 3857, "loss": 0.3831, "lr": 3.9274611398963735e-05, "epoch": 0.6896551724137931, "percentage": 9.85, "elapsed_time": "0:44:07", "remaining_time": "6:43:40"}
|
||||
{"current_steps": 385, "total_steps": 3857, "loss": 0.3614, "lr": 3.979274611398964e-05, "epoch": 0.6987295825771325, "percentage": 9.98, "elapsed_time": "0:44:42", "remaining_time": "6:43:14"}
|
||||
{"current_steps": 390, "total_steps": 3857, "loss": 0.3885, "lr": 3.999992627194779e-05, "epoch": 0.7078039927404719, "percentage": 10.11, "elapsed_time": "0:45:17", "remaining_time": "6:42:39"}
|
||||
{"current_steps": 395, "total_steps": 3857, "loss": 0.3644, "lr": 3.999947571359721e-05, "epoch": 0.7168784029038112, "percentage": 10.24, "elapsed_time": "0:45:55", "remaining_time": "6:42:31"}
|
||||
{"current_steps": 400, "total_steps": 3857, "loss": 0.3559, "lr": 3.999861556614132e-05, "epoch": 0.7259528130671506, "percentage": 10.37, "elapsed_time": "0:46:38", "remaining_time": "6:43:03"}
|
||||
{"current_steps": 405, "total_steps": 3857, "loss": 0.3599, "lr": 3.999734584719592e-05, "epoch": 0.73502722323049, "percentage": 10.5, "elapsed_time": "0:47:10", "remaining_time": "6:42:09"}
|
||||
{"current_steps": 410, "total_steps": 3857, "loss": 0.3586, "lr": 3.999566658276485e-05, "epoch": 0.7441016333938294, "percentage": 10.63, "elapsed_time": "0:47:44", "remaining_time": "6:41:18"}
|
||||
{"current_steps": 415, "total_steps": 3857, "loss": 0.3494, "lr": 3.999357780723943e-05, "epoch": 0.7531760435571688, "percentage": 10.76, "elapsed_time": "0:48:16", "remaining_time": "6:40:20"}
|
||||
{"current_steps": 420, "total_steps": 3857, "loss": 0.3599, "lr": 3.9991079563397764e-05, "epoch": 0.7622504537205081, "percentage": 10.89, "elapsed_time": "0:48:54", "remaining_time": "6:40:10"}
|
||||
{"current_steps": 425, "total_steps": 3857, "loss": 0.3618, "lr": 3.998817190240387e-05, "epoch": 0.7713248638838476, "percentage": 11.02, "elapsed_time": "0:49:28", "remaining_time": "6:39:30"}
|
||||
{"current_steps": 430, "total_steps": 3857, "loss": 0.3573, "lr": 3.998485488380663e-05, "epoch": 0.7803992740471869, "percentage": 11.15, "elapsed_time": "0:49:55", "remaining_time": "6:37:56"}
|
||||
{"current_steps": 435, "total_steps": 3857, "loss": 0.3707, "lr": 3.998112857553854e-05, "epoch": 0.7894736842105263, "percentage": 11.28, "elapsed_time": "0:50:28", "remaining_time": "6:37:04"}
|
||||
{"current_steps": 440, "total_steps": 3857, "loss": 0.3503, "lr": 3.997699305391438e-05, "epoch": 0.7985480943738656, "percentage": 11.41, "elapsed_time": "0:50:55", "remaining_time": "6:35:29"}
|
||||
{"current_steps": 445, "total_steps": 3857, "loss": 0.3613, "lr": 3.997244840362959e-05, "epoch": 0.8076225045372051, "percentage": 11.54, "elapsed_time": "0:51:27", "remaining_time": "6:34:35"}
|
||||
{"current_steps": 450, "total_steps": 3857, "loss": 0.3665, "lr": 3.996749471775858e-05, "epoch": 0.8166969147005445, "percentage": 11.67, "elapsed_time": "0:52:00", "remaining_time": "6:33:45"}
|
||||
{"current_steps": 455, "total_steps": 3857, "loss": 0.3622, "lr": 3.9962132097752795e-05, "epoch": 0.8257713248638838, "percentage": 11.8, "elapsed_time": "0:52:27", "remaining_time": "6:32:16"}
|
||||
{"current_steps": 460, "total_steps": 3857, "loss": 0.331, "lr": 3.995636065343865e-05, "epoch": 0.8348457350272233, "percentage": 11.93, "elapsed_time": "0:53:02", "remaining_time": "6:31:44"}
|
||||
{"current_steps": 465, "total_steps": 3857, "loss": 0.3721, "lr": 3.9950180503015274e-05, "epoch": 0.8439201451905626, "percentage": 12.06, "elapsed_time": "0:53:32", "remaining_time": "6:30:36"}
|
||||
{"current_steps": 470, "total_steps": 3857, "loss": 0.3641, "lr": 3.994359177305212e-05, "epoch": 0.852994555353902, "percentage": 12.19, "elapsed_time": "0:54:04", "remaining_time": "6:29:40"}
|
||||
{"current_steps": 475, "total_steps": 3857, "loss": 0.3541, "lr": 3.993659459848629e-05, "epoch": 0.8620689655172413, "percentage": 12.32, "elapsed_time": "0:54:43", "remaining_time": "6:29:37"}
|
||||
{"current_steps": 480, "total_steps": 3857, "loss": 0.3418, "lr": 3.992918912261989e-05, "epoch": 0.8711433756805808, "percentage": 12.44, "elapsed_time": "0:55:19", "remaining_time": "6:29:12"}
|
||||
{"current_steps": 485, "total_steps": 3857, "loss": 0.3678, "lr": 3.992137549711701e-05, "epoch": 0.8802177858439202, "percentage": 12.57, "elapsed_time": "0:55:55", "remaining_time": "6:28:49"}
|
||||
{"current_steps": 490, "total_steps": 3857, "loss": 0.3273, "lr": 3.991315388200063e-05, "epoch": 0.8892921960072595, "percentage": 12.7, "elapsed_time": "0:56:33", "remaining_time": "6:28:40"}
|
||||
{"current_steps": 495, "total_steps": 3857, "loss": 0.3403, "lr": 3.990452444564936e-05, "epoch": 0.8983666061705989, "percentage": 12.83, "elapsed_time": "0:57:06", "remaining_time": "6:27:54"}
|
||||
{"current_steps": 500, "total_steps": 3857, "loss": 0.3554, "lr": 3.9895487364793986e-05, "epoch": 0.9074410163339383, "percentage": 12.96, "elapsed_time": "0:57:34", "remaining_time": "6:26:30"}
|
||||
{"current_steps": 505, "total_steps": 3857, "loss": 0.3501, "lr": 3.9886042824513864e-05, "epoch": 0.9165154264972777, "percentage": 13.09, "elapsed_time": "0:58:11", "remaining_time": "6:26:16"}
|
||||
{"current_steps": 510, "total_steps": 3857, "loss": 0.3496, "lr": 3.9876191018233094e-05, "epoch": 0.925589836660617, "percentage": 13.22, "elapsed_time": "0:58:52", "remaining_time": "6:26:25"}
|
||||
{"current_steps": 515, "total_steps": 3857, "loss": 0.3445, "lr": 3.9865932147716605e-05, "epoch": 0.9346642468239564, "percentage": 13.35, "elapsed_time": "0:59:24", "remaining_time": "6:25:32"}
|
||||
{"current_steps": 520, "total_steps": 3857, "loss": 0.357, "lr": 3.985526642306597e-05, "epoch": 0.9437386569872959, "percentage": 13.48, "elapsed_time": "1:00:03", "remaining_time": "6:25:22"}
|
||||
{"current_steps": 525, "total_steps": 3857, "loss": 0.3621, "lr": 3.984419406271515e-05, "epoch": 0.9528130671506352, "percentage": 13.61, "elapsed_time": "1:00:31", "remaining_time": "6:24:10"}
|
||||
{"current_steps": 530, "total_steps": 3857, "loss": 0.3431, "lr": 3.983271529342601e-05, "epoch": 0.9618874773139746, "percentage": 13.74, "elapsed_time": "1:00:56", "remaining_time": "6:22:33"}
|
||||
{"current_steps": 535, "total_steps": 3857, "loss": 0.3244, "lr": 3.982083035028364e-05, "epoch": 0.9709618874773139, "percentage": 13.87, "elapsed_time": "1:01:22", "remaining_time": "6:21:05"}
|
||||
{"current_steps": 540, "total_steps": 3857, "loss": 0.386, "lr": 3.9808539476691586e-05, "epoch": 0.9800362976406534, "percentage": 14.0, "elapsed_time": "1:01:50", "remaining_time": "6:19:54"}
|
||||
{"current_steps": 545, "total_steps": 3857, "loss": 0.378, "lr": 3.979584292436684e-05, "epoch": 0.9891107078039928, "percentage": 14.13, "elapsed_time": "1:02:30", "remaining_time": "6:19:51"}
|
||||
{"current_steps": 550, "total_steps": 3857, "loss": 0.3644, "lr": 3.9782740953334705e-05, "epoch": 0.9981851179673321, "percentage": 14.26, "elapsed_time": "1:03:06", "remaining_time": "6:19:28"}
|
||||
{"current_steps": 555, "total_steps": 3857, "loss": 0.3213, "lr": 3.9769233831923434e-05, "epoch": 1.0072595281306715, "percentage": 14.39, "elapsed_time": "1:03:42", "remaining_time": "6:19:01"}
|
||||
{"current_steps": 560, "total_steps": 3857, "loss": 0.3441, "lr": 3.975532183675877e-05, "epoch": 1.0163339382940109, "percentage": 14.52, "elapsed_time": "1:04:16", "remaining_time": "6:18:27"}
|
||||
{"current_steps": 565, "total_steps": 3857, "loss": 0.3169, "lr": 3.9741005252758255e-05, "epoch": 1.0254083484573502, "percentage": 14.65, "elapsed_time": "1:04:49", "remaining_time": "6:17:40"}
|
||||
{"current_steps": 570, "total_steps": 3857, "loss": 0.3403, "lr": 3.972628437312544e-05, "epoch": 1.0344827586206897, "percentage": 14.78, "elapsed_time": "1:05:27", "remaining_time": "6:17:28"}
|
||||
{"current_steps": 575, "total_steps": 3857, "loss": 0.3509, "lr": 3.9711159499343806e-05, "epoch": 1.043557168784029, "percentage": 14.91, "elapsed_time": "1:06:03", "remaining_time": "6:17:01"}
|
||||
{"current_steps": 580, "total_steps": 3857, "loss": 0.3417, "lr": 3.969563094117065e-05, "epoch": 1.0526315789473684, "percentage": 15.04, "elapsed_time": "1:06:35", "remaining_time": "6:16:13"}
|
||||
{"current_steps": 585, "total_steps": 3857, "loss": 0.3258, "lr": 3.967969901663073e-05, "epoch": 1.061705989110708, "percentage": 15.17, "elapsed_time": "1:07:09", "remaining_time": "6:15:35"}
|
||||
{"current_steps": 590, "total_steps": 3857, "loss": 0.3429, "lr": 3.9663364052009726e-05, "epoch": 1.0707803992740472, "percentage": 15.3, "elapsed_time": "1:07:44", "remaining_time": "6:15:04"}
|
||||
{"current_steps": 595, "total_steps": 3857, "loss": 0.3666, "lr": 3.9646626381847594e-05, "epoch": 1.0798548094373865, "percentage": 15.43, "elapsed_time": "1:08:20", "remaining_time": "6:14:40"}
|
||||
{"current_steps": 600, "total_steps": 3857, "loss": 0.3404, "lr": 3.962948634893168e-05, "epoch": 1.0889292196007259, "percentage": 15.56, "elapsed_time": "1:08:44", "remaining_time": "6:13:10"}
|
||||
{"current_steps": 605, "total_steps": 3857, "loss": 0.3165, "lr": 3.9611944304289715e-05, "epoch": 1.0980036297640654, "percentage": 15.69, "elapsed_time": "1:09:15", "remaining_time": "6:12:16"}
|
||||
{"current_steps": 610, "total_steps": 3857, "loss": 0.3359, "lr": 3.959400060718266e-05, "epoch": 1.1070780399274047, "percentage": 15.82, "elapsed_time": "1:09:52", "remaining_time": "6:11:56"}
|
||||
{"current_steps": 615, "total_steps": 3857, "loss": 0.3262, "lr": 3.957565562509727e-05, "epoch": 1.116152450090744, "percentage": 15.95, "elapsed_time": "1:10:33", "remaining_time": "6:11:56"}
|
||||
{"current_steps": 620, "total_steps": 3857, "loss": 0.3146, "lr": 3.9556909733738644e-05, "epoch": 1.1252268602540836, "percentage": 16.07, "elapsed_time": "1:11:07", "remaining_time": "6:11:20"}
|
||||
{"current_steps": 625, "total_steps": 3857, "loss": 0.3077, "lr": 3.953776331702248e-05, "epoch": 1.134301270417423, "percentage": 16.2, "elapsed_time": "1:11:42", "remaining_time": "6:10:47"}
|
||||
{"current_steps": 630, "total_steps": 3857, "loss": 0.3047, "lr": 3.951821676706726e-05, "epoch": 1.1433756805807622, "percentage": 16.33, "elapsed_time": "1:12:13", "remaining_time": "6:09:57"}
|
||||
{"current_steps": 635, "total_steps": 3857, "loss": 0.3639, "lr": 3.949827048418614e-05, "epoch": 1.1524500907441015, "percentage": 16.46, "elapsed_time": "1:12:45", "remaining_time": "6:09:10"}
|
||||
{"current_steps": 640, "total_steps": 3857, "loss": 0.3156, "lr": 3.9477924876878874e-05, "epoch": 1.161524500907441, "percentage": 16.59, "elapsed_time": "1:13:16", "remaining_time": "6:08:18"}
|
||||
{"current_steps": 645, "total_steps": 3857, "loss": 0.3323, "lr": 3.945718036182332e-05, "epoch": 1.1705989110707804, "percentage": 16.72, "elapsed_time": "1:13:50", "remaining_time": "6:07:44"}
|
||||
{"current_steps": 650, "total_steps": 3857, "loss": 0.3254, "lr": 3.943603736386699e-05, "epoch": 1.1796733212341197, "percentage": 16.85, "elapsed_time": "1:14:26", "remaining_time": "6:07:19"}
|
||||
{"current_steps": 655, "total_steps": 3857, "loss": 0.3373, "lr": 3.9414496316018305e-05, "epoch": 1.1887477313974593, "percentage": 16.98, "elapsed_time": "1:15:04", "remaining_time": "6:06:59"}
|
||||
{"current_steps": 660, "total_steps": 3857, "loss": 0.3166, "lr": 3.9392557659437764e-05, "epoch": 1.1978221415607986, "percentage": 17.11, "elapsed_time": "1:15:37", "remaining_time": "6:06:18"}
|
||||
{"current_steps": 665, "total_steps": 3857, "loss": 0.3389, "lr": 3.9370221843428885e-05, "epoch": 1.206896551724138, "percentage": 17.24, "elapsed_time": "1:16:14", "remaining_time": "6:05:57"}
|
||||
{"current_steps": 670, "total_steps": 3857, "loss": 0.3088, "lr": 3.934748932542899e-05, "epoch": 1.2159709618874772, "percentage": 17.37, "elapsed_time": "1:16:47", "remaining_time": "6:05:16"}
|
||||
{"current_steps": 675, "total_steps": 3857, "loss": 0.329, "lr": 3.932436057099989e-05, "epoch": 1.2250453720508168, "percentage": 17.5, "elapsed_time": "1:17:18", "remaining_time": "6:04:26"}
|
||||
{"current_steps": 680, "total_steps": 3857, "loss": 0.3222, "lr": 3.930083605381827e-05, "epoch": 1.234119782214156, "percentage": 17.63, "elapsed_time": "1:17:58", "remaining_time": "6:04:17"}
|
||||
{"current_steps": 685, "total_steps": 3857, "loss": 0.3274, "lr": 3.927691625566608e-05, "epoch": 1.2431941923774954, "percentage": 17.76, "elapsed_time": "1:18:32", "remaining_time": "6:03:41"}
|
||||
{"current_steps": 690, "total_steps": 3857, "loss": 0.3318, "lr": 3.9252601666420587e-05, "epoch": 1.252268602540835, "percentage": 17.89, "elapsed_time": "1:19:04", "remaining_time": "6:02:57"}
|
||||
{"current_steps": 695, "total_steps": 3857, "loss": 0.3334, "lr": 3.9227892784044375e-05, "epoch": 1.2613430127041743, "percentage": 18.02, "elapsed_time": "1:19:39", "remaining_time": "6:02:22"}
|
||||
{"current_steps": 700, "total_steps": 3857, "loss": 0.3318, "lr": 3.9202790114575175e-05, "epoch": 1.2704174228675136, "percentage": 18.15, "elapsed_time": "1:20:12", "remaining_time": "6:01:42"}
|
||||
{"current_steps": 705, "total_steps": 3857, "loss": 0.333, "lr": 3.917729417211547e-05, "epoch": 1.279491833030853, "percentage": 18.28, "elapsed_time": "1:20:50", "remaining_time": "6:01:24"}
|
||||
{"current_steps": 710, "total_steps": 3857, "loss": 0.3496, "lr": 3.9151405478821945e-05, "epoch": 1.2885662431941924, "percentage": 18.41, "elapsed_time": "1:21:25", "remaining_time": "6:00:54"}
|
||||
{"current_steps": 715, "total_steps": 3857, "loss": 0.3261, "lr": 3.912512456489483e-05, "epoch": 1.2976406533575318, "percentage": 18.54, "elapsed_time": "1:21:58", "remaining_time": "6:00:12"}
|
||||
{"current_steps": 720, "total_steps": 3857, "loss": 0.3367, "lr": 3.9098451968567055e-05, "epoch": 1.306715063520871, "percentage": 18.67, "elapsed_time": "1:22:30", "remaining_time": "5:59:30"}
|
||||
{"current_steps": 725, "total_steps": 3857, "loss": 0.3178, "lr": 3.907138823609317e-05, "epoch": 1.3157894736842106, "percentage": 18.8, "elapsed_time": "1:23:00", "remaining_time": "5:58:37"}
|
||||
{"current_steps": 730, "total_steps": 3857, "loss": 0.3141, "lr": 3.904393392173821e-05, "epoch": 1.32486388384755, "percentage": 18.93, "elapsed_time": "1:23:35", "remaining_time": "5:58:02"}
|
||||
{"current_steps": 735, "total_steps": 3857, "loss": 0.3335, "lr": 3.9016089587766303e-05, "epoch": 1.3339382940108893, "percentage": 19.06, "elapsed_time": "1:24:01", "remaining_time": "5:56:55"}
|
||||
{"current_steps": 740, "total_steps": 3857, "loss": 0.3482, "lr": 3.8987855804429206e-05, "epoch": 1.3430127041742286, "percentage": 19.19, "elapsed_time": "1:24:40", "remaining_time": "5:56:41"}
|
||||
{"current_steps": 745, "total_steps": 3857, "loss": 0.3215, "lr": 3.8959233149954574e-05, "epoch": 1.3520871143375681, "percentage": 19.32, "elapsed_time": "1:25:13", "remaining_time": "5:55:59"}
|
||||
{"current_steps": 750, "total_steps": 3857, "loss": 0.3255, "lr": 3.893022221053414e-05, "epoch": 1.3611615245009074, "percentage": 19.45, "elapsed_time": "1:25:55", "remaining_time": "5:55:56"}
|
||||
{"current_steps": 755, "total_steps": 3857, "loss": 0.3407, "lr": 3.890082358031173e-05, "epoch": 1.3702359346642468, "percentage": 19.57, "elapsed_time": "1:26:36", "remaining_time": "5:55:48"}
|
||||
{"current_steps": 760, "total_steps": 3857, "loss": 0.3308, "lr": 3.887103786137104e-05, "epoch": 1.3793103448275863, "percentage": 19.7, "elapsed_time": "1:27:07", "remaining_time": "5:55:00"}
|
||||
{"current_steps": 765, "total_steps": 3857, "loss": 0.3061, "lr": 3.8840865663723376e-05, "epoch": 1.3883847549909256, "percentage": 19.83, "elapsed_time": "1:27:40", "remaining_time": "5:54:21"}
|
||||
{"current_steps": 770, "total_steps": 3857, "loss": 0.3176, "lr": 3.88103076052951e-05, "epoch": 1.397459165154265, "percentage": 19.96, "elapsed_time": "1:28:17", "remaining_time": "5:53:56"}
|
||||
{"current_steps": 775, "total_steps": 3857, "loss": 0.3352, "lr": 3.877936431191501e-05, "epoch": 1.4065335753176043, "percentage": 20.09, "elapsed_time": "1:28:51", "remaining_time": "5:53:20"}
|
||||
{"current_steps": 780, "total_steps": 3857, "loss": 0.3375, "lr": 3.874803641730151e-05, "epoch": 1.4156079854809438, "percentage": 20.22, "elapsed_time": "1:29:24", "remaining_time": "5:52:41"}
|
||||
{"current_steps": 785, "total_steps": 3857, "loss": 0.3309, "lr": 3.871632456304964e-05, "epoch": 1.4246823956442831, "percentage": 20.35, "elapsed_time": "1:30:02", "remaining_time": "5:52:23"}
|
||||
{"current_steps": 790, "total_steps": 3857, "loss": 0.31, "lr": 3.8684229398617924e-05, "epoch": 1.4337568058076224, "percentage": 20.48, "elapsed_time": "1:30:37", "remaining_time": "5:51:48"}
|
||||
{"current_steps": 795, "total_steps": 3857, "loss": 0.297, "lr": 3.8651751581315074e-05, "epoch": 1.442831215970962, "percentage": 20.61, "elapsed_time": "1:31:13", "remaining_time": "5:51:20"}
|
||||
{"current_steps": 800, "total_steps": 3857, "loss": 0.3402, "lr": 3.861889177628653e-05, "epoch": 1.4519056261343013, "percentage": 20.74, "elapsed_time": "1:31:46", "remaining_time": "5:50:40"}
|
||||
{"current_steps": 805, "total_steps": 3857, "loss": 0.3291, "lr": 3.858565065650085e-05, "epoch": 1.4609800362976406, "percentage": 20.87, "elapsed_time": "1:32:20", "remaining_time": "5:50:07"}
|
||||
{"current_steps": 810, "total_steps": 3857, "loss": 0.3399, "lr": 3.85520289027359e-05, "epoch": 1.47005444646098, "percentage": 21.0, "elapsed_time": "1:32:59", "remaining_time": "5:49:49"}
|
||||
{"current_steps": 815, "total_steps": 3857, "loss": 0.3519, "lr": 3.8518027203564935e-05, "epoch": 1.4791288566243195, "percentage": 21.13, "elapsed_time": "1:33:32", "remaining_time": "5:49:09"}
|
||||
{"current_steps": 820, "total_steps": 3857, "loss": 0.3409, "lr": 3.848364625534248e-05, "epoch": 1.4882032667876588, "percentage": 21.26, "elapsed_time": "1:34:10", "remaining_time": "5:48:49"}
|
||||
{"current_steps": 825, "total_steps": 3857, "loss": 0.3564, "lr": 3.8448886762190094e-05, "epoch": 1.4972776769509981, "percentage": 21.39, "elapsed_time": "1:34:40", "remaining_time": "5:47:57"}
|
||||
{"current_steps": 830, "total_steps": 3857, "loss": 0.3224, "lr": 3.8413749435981914e-05, "epoch": 1.5063520871143377, "percentage": 21.52, "elapsed_time": "1:35:09", "remaining_time": "5:47:03"}
|
||||
{"current_steps": 835, "total_steps": 3857, "loss": 0.3004, "lr": 3.837823499633011e-05, "epoch": 1.515426497277677, "percentage": 21.65, "elapsed_time": "1:35:41", "remaining_time": "5:46:19"}
|
||||
{"current_steps": 840, "total_steps": 3857, "loss": 0.33, "lr": 3.834234417057013e-05, "epoch": 1.5245009074410163, "percentage": 21.78, "elapsed_time": "1:36:13", "remaining_time": "5:45:36"}
|
||||
{"current_steps": 845, "total_steps": 3857, "loss": 0.312, "lr": 3.83060776937458e-05, "epoch": 1.5335753176043556, "percentage": 21.91, "elapsed_time": "1:36:44", "remaining_time": "5:44:49"}
|
||||
{"current_steps": 850, "total_steps": 3857, "loss": 0.3333, "lr": 3.826943630859427e-05, "epoch": 1.542649727767695, "percentage": 22.04, "elapsed_time": "1:37:19", "remaining_time": "5:44:19"}
|
||||
{"current_steps": 855, "total_steps": 3857, "loss": 0.3514, "lr": 3.823242076553084e-05, "epoch": 1.5517241379310345, "percentage": 22.17, "elapsed_time": "1:37:48", "remaining_time": "5:43:25"}
|
||||
{"current_steps": 860, "total_steps": 3857, "loss": 0.3228, "lr": 3.819503182263352e-05, "epoch": 1.560798548094374, "percentage": 22.3, "elapsed_time": "1:38:30", "remaining_time": "5:43:16"}
|
||||
{"current_steps": 865, "total_steps": 3857, "loss": 0.3163, "lr": 3.815727024562759e-05, "epoch": 1.5698729582577133, "percentage": 22.43, "elapsed_time": "1:39:03", "remaining_time": "5:42:39"}
|
||||
{"current_steps": 870, "total_steps": 3857, "loss": 0.3378, "lr": 3.811913680786982e-05, "epoch": 1.5789473684210527, "percentage": 22.56, "elapsed_time": "1:39:38", "remaining_time": "5:42:06"}
|
||||
{"current_steps": 875, "total_steps": 3857, "loss": 0.3381, "lr": 3.8080632290332743e-05, "epoch": 1.588021778584392, "percentage": 22.69, "elapsed_time": "1:40:12", "remaining_time": "5:41:29"}
|
||||
{"current_steps": 880, "total_steps": 3857, "loss": 0.3325, "lr": 3.804175748158856e-05, "epoch": 1.5970961887477313, "percentage": 22.82, "elapsed_time": "1:40:42", "remaining_time": "5:40:41"}
|
||||
{"current_steps": 885, "total_steps": 3857, "loss": 0.3313, "lr": 3.800251317779305e-05, "epoch": 1.6061705989110708, "percentage": 22.95, "elapsed_time": "1:41:18", "remaining_time": "5:40:12"}
|
||||
{"current_steps": 890, "total_steps": 3857, "loss": 0.3499, "lr": 3.796290018266925e-05, "epoch": 1.6152450090744102, "percentage": 23.07, "elapsed_time": "1:41:51", "remaining_time": "5:39:33"}
|
||||
{"current_steps": 895, "total_steps": 3857, "loss": 0.3111, "lr": 3.792291930749099e-05, "epoch": 1.6243194192377497, "percentage": 23.2, "elapsed_time": "1:42:18", "remaining_time": "5:38:36"}
|
||||
{"current_steps": 900, "total_steps": 3857, "loss": 0.3262, "lr": 3.788257137106627e-05, "epoch": 1.633393829401089, "percentage": 23.33, "elapsed_time": "1:42:49", "remaining_time": "5:37:51"}
|
||||
{"current_steps": 905, "total_steps": 3857, "loss": 0.3251, "lr": 3.7841857199720526e-05, "epoch": 1.6424682395644283, "percentage": 23.46, "elapsed_time": "1:43:23", "remaining_time": "5:37:13"}
|
||||
{"current_steps": 910, "total_steps": 3857, "loss": 0.3074, "lr": 3.780077762727965e-05, "epoch": 1.6515426497277677, "percentage": 23.59, "elapsed_time": "1:43:55", "remaining_time": "5:36:32"}
|
||||
{"current_steps": 915, "total_steps": 3857, "loss": 0.3203, "lr": 3.775933349505298e-05, "epoch": 1.660617059891107, "percentage": 23.72, "elapsed_time": "1:44:26", "remaining_time": "5:35:48"}
|
||||
{"current_steps": 920, "total_steps": 3857, "loss": 0.3176, "lr": 3.7717525651816e-05, "epoch": 1.6696914700544465, "percentage": 23.85, "elapsed_time": "1:44:58", "remaining_time": "5:35:06"}
|
||||
{"current_steps": 925, "total_steps": 3857, "loss": 0.314, "lr": 3.7675354953793017e-05, "epoch": 1.6787658802177858, "percentage": 23.98, "elapsed_time": "1:45:30", "remaining_time": "5:34:26"}
|
||||
{"current_steps": 930, "total_steps": 3857, "loss": 0.3116, "lr": 3.76328222646396e-05, "epoch": 1.6878402903811254, "percentage": 24.11, "elapsed_time": "1:46:09", "remaining_time": "5:34:07"}
|
||||
{"current_steps": 935, "total_steps": 3857, "loss": 0.3314, "lr": 3.758992845542488e-05, "epoch": 1.6969147005444647, "percentage": 24.24, "elapsed_time": "1:46:39", "remaining_time": "5:33:20"}
|
||||
{"current_steps": 940, "total_steps": 3857, "loss": 0.3098, "lr": 3.754667440461375e-05, "epoch": 1.705989110707804, "percentage": 24.37, "elapsed_time": "1:47:15", "remaining_time": "5:32:49"}
|
||||
{"current_steps": 945, "total_steps": 3857, "loss": 0.3449, "lr": 3.7503060998048806e-05, "epoch": 1.7150635208711433, "percentage": 24.5, "elapsed_time": "1:47:57", "remaining_time": "5:32:41"}
|
||||
{"current_steps": 950, "total_steps": 3857, "loss": 0.3248, "lr": 3.745908912893229e-05, "epoch": 1.7241379310344827, "percentage": 24.63, "elapsed_time": "1:48:32", "remaining_time": "5:32:09"}
|
||||
{"current_steps": 955, "total_steps": 3857, "loss": 0.3014, "lr": 3.7414759697807725e-05, "epoch": 1.7332123411978222, "percentage": 24.76, "elapsed_time": "1:49:09", "remaining_time": "5:31:40"}
|
||||
{"current_steps": 960, "total_steps": 3857, "loss": 0.3434, "lr": 3.737007361254152e-05, "epoch": 1.7422867513611615, "percentage": 24.89, "elapsed_time": "1:49:43", "remaining_time": "5:31:05"}
|
||||
{"current_steps": 965, "total_steps": 3857, "loss": 0.3387, "lr": 3.732503178830434e-05, "epoch": 1.751361161524501, "percentage": 25.02, "elapsed_time": "1:50:10", "remaining_time": "5:30:11"}
|
||||
{"current_steps": 970, "total_steps": 3857, "loss": 0.35, "lr": 3.7279635147552405e-05, "epoch": 1.7604355716878404, "percentage": 25.15, "elapsed_time": "1:50:46", "remaining_time": "5:29:43"}
|
||||
{"current_steps": 975, "total_steps": 3857, "loss": 0.3475, "lr": 3.7233884620008546e-05, "epoch": 1.7695099818511797, "percentage": 25.28, "elapsed_time": "1:51:21", "remaining_time": "5:29:09"}
|
||||
{"current_steps": 980, "total_steps": 3857, "loss": 0.3246, "lr": 3.718778114264321e-05, "epoch": 1.778584392014519, "percentage": 25.41, "elapsed_time": "1:52:04", "remaining_time": "5:29:00"}
|
||||
{"current_steps": 985, "total_steps": 3857, "loss": 0.3183, "lr": 3.7141325659655264e-05, "epoch": 1.7876588021778583, "percentage": 25.54, "elapsed_time": "1:52:40", "remaining_time": "5:28:33"}
|
||||
{"current_steps": 990, "total_steps": 3857, "loss": 0.3402, "lr": 3.7094519122452626e-05, "epoch": 1.7967332123411979, "percentage": 25.67, "elapsed_time": "1:53:19", "remaining_time": "5:28:10"}
|
||||
{"current_steps": 995, "total_steps": 3857, "loss": 0.3264, "lr": 3.70473624896328e-05, "epoch": 1.8058076225045372, "percentage": 25.8, "elapsed_time": "1:53:49", "remaining_time": "5:27:25"}
|
||||
{"current_steps": 1000, "total_steps": 3857, "loss": 0.3229, "lr": 3.6999856726963267e-05, "epoch": 1.8148820326678767, "percentage": 25.93, "elapsed_time": "1:54:26", "remaining_time": "5:26:57"}
|
||||
{"current_steps": 1005, "total_steps": 3857, "loss": 0.3381, "lr": 3.6952002807361665e-05, "epoch": 1.823956442831216, "percentage": 26.06, "elapsed_time": "1:54:59", "remaining_time": "5:26:19"}
|
||||
{"current_steps": 1010, "total_steps": 3857, "loss": 0.3358, "lr": 3.69038017108759e-05, "epoch": 1.8330308529945554, "percentage": 26.19, "elapsed_time": "1:55:36", "remaining_time": "5:25:53"}
|
||||
{"current_steps": 1015, "total_steps": 3857, "loss": 0.3132, "lr": 3.685525442466405e-05, "epoch": 1.8421052631578947, "percentage": 26.32, "elapsed_time": "1:56:13", "remaining_time": "5:25:25"}
|
||||
{"current_steps": 1020, "total_steps": 3857, "loss": 0.3111, "lr": 3.6806361942974134e-05, "epoch": 1.851179673321234, "percentage": 26.45, "elapsed_time": "1:56:45", "remaining_time": "5:24:46"}
|
||||
{"current_steps": 1025, "total_steps": 3857, "loss": 0.314, "lr": 3.6757125267123816e-05, "epoch": 1.8602540834845736, "percentage": 26.58, "elapsed_time": "1:57:22", "remaining_time": "5:24:18"}
|
||||
{"current_steps": 1030, "total_steps": 3857, "loss": 0.3421, "lr": 3.6707545405479825e-05, "epoch": 1.8693284936479129, "percentage": 26.7, "elapsed_time": "1:57:56", "remaining_time": "5:23:42"}
|
||||
{"current_steps": 1035, "total_steps": 3857, "loss": 0.2924, "lr": 3.6657623373437334e-05, "epoch": 1.8784029038112524, "percentage": 26.83, "elapsed_time": "1:58:32", "remaining_time": "5:23:12"}
|
||||
{"current_steps": 1040, "total_steps": 3857, "loss": 0.3099, "lr": 3.6607360193399165e-05, "epoch": 1.8874773139745917, "percentage": 26.96, "elapsed_time": "1:58:59", "remaining_time": "5:22:18"}
|
||||
{"current_steps": 1045, "total_steps": 3857, "loss": 0.302, "lr": 3.655675689475485e-05, "epoch": 1.896551724137931, "percentage": 27.09, "elapsed_time": "1:59:34", "remaining_time": "5:21:45"}
|
||||
{"current_steps": 1050, "total_steps": 3857, "loss": 0.3237, "lr": 3.6505814513859555e-05, "epoch": 1.9056261343012704, "percentage": 27.22, "elapsed_time": "2:00:07", "remaining_time": "5:21:07"}
|
||||
{"current_steps": 1055, "total_steps": 3857, "loss": 0.3355, "lr": 3.6454534094012825e-05, "epoch": 1.9147005444646097, "percentage": 27.35, "elapsed_time": "2:00:41", "remaining_time": "5:20:32"}
|
||||
{"current_steps": 1060, "total_steps": 3857, "loss": 0.3072, "lr": 3.640291668543727e-05, "epoch": 1.9237749546279492, "percentage": 27.48, "elapsed_time": "2:01:21", "remaining_time": "5:20:13"}
|
||||
{"current_steps": 1065, "total_steps": 3857, "loss": 0.3516, "lr": 3.635096334525701e-05, "epoch": 1.9328493647912885, "percentage": 27.61, "elapsed_time": "2:01:56", "remaining_time": "5:19:41"}
|
||||
{"current_steps": 1070, "total_steps": 3857, "loss": 0.3235, "lr": 3.629867513747603e-05, "epoch": 1.941923774954628, "percentage": 27.74, "elapsed_time": "2:02:29", "remaining_time": "5:19:02"}
|
||||
{"current_steps": 1075, "total_steps": 3857, "loss": 0.3241, "lr": 3.624605313295643e-05, "epoch": 1.9509981851179674, "percentage": 27.87, "elapsed_time": "2:03:09", "remaining_time": "5:18:42"}
|
||||
{"current_steps": 1080, "total_steps": 3857, "loss": 0.3174, "lr": 3.6193098409396447e-05, "epoch": 1.9600725952813067, "percentage": 28.0, "elapsed_time": "2:03:49", "remaining_time": "5:18:23"}
|
||||
{"current_steps": 1085, "total_steps": 3857, "loss": 0.3263, "lr": 3.6139812051308386e-05, "epoch": 1.969147005444646, "percentage": 28.13, "elapsed_time": "2:04:16", "remaining_time": "5:17:31"}
|
||||
{"current_steps": 1090, "total_steps": 3857, "loss": 0.3376, "lr": 3.6086195149996457e-05, "epoch": 1.9782214156079854, "percentage": 28.26, "elapsed_time": "2:04:54", "remaining_time": "5:17:04"}
|
||||
{"current_steps": 1095, "total_steps": 3857, "loss": 0.3074, "lr": 3.603224880353438e-05, "epoch": 1.987295825771325, "percentage": 28.39, "elapsed_time": "2:05:26", "remaining_time": "5:16:23"}
|
||||
{"current_steps": 1100, "total_steps": 3857, "loss": 0.2963, "lr": 3.5977974116742894e-05, "epoch": 1.9963702359346642, "percentage": 28.52, "elapsed_time": "2:06:03", "remaining_time": "5:15:56"}
|
||||
{"current_steps": 1105, "total_steps": 3857, "loss": 0.2995, "lr": 3.5923372201167166e-05, "epoch": 2.0054446460980038, "percentage": 28.65, "elapsed_time": "2:06:35", "remaining_time": "5:15:15"}
|
||||
{"current_steps": 1110, "total_steps": 3857, "loss": 0.2851, "lr": 3.586844417505399e-05, "epoch": 2.014519056261343, "percentage": 28.78, "elapsed_time": "2:07:11", "remaining_time": "5:14:45"}
|
||||
{"current_steps": 1115, "total_steps": 3857, "loss": 0.307, "lr": 3.5813191163328925e-05, "epoch": 2.0235934664246824, "percentage": 28.91, "elapsed_time": "2:07:43", "remaining_time": "5:14:07"}
|
||||
{"current_steps": 1120, "total_steps": 3857, "loss": 0.2736, "lr": 3.57576142975732e-05, "epoch": 2.0326678765880217, "percentage": 29.04, "elapsed_time": "2:08:16", "remaining_time": "5:13:29"}
|
||||
{"current_steps": 1125, "total_steps": 3857, "loss": 0.3197, "lr": 3.57017147160006e-05, "epoch": 2.041742286751361, "percentage": 29.17, "elapsed_time": "2:08:44", "remaining_time": "5:12:39"}
|
||||
{"current_steps": 1130, "total_steps": 3857, "loss": 0.2973, "lr": 3.56454935634341e-05, "epoch": 2.0508166969147004, "percentage": 29.3, "elapsed_time": "2:09:17", "remaining_time": "5:12:00"}
|
||||
{"current_steps": 1135, "total_steps": 3857, "loss": 0.277, "lr": 3.558895199128248e-05, "epoch": 2.05989110707804, "percentage": 29.43, "elapsed_time": "2:09:45", "remaining_time": "5:11:11"}
|
||||
{"current_steps": 1140, "total_steps": 3857, "loss": 0.2828, "lr": 3.553209115751668e-05, "epoch": 2.0689655172413794, "percentage": 29.56, "elapsed_time": "2:10:12", "remaining_time": "5:10:19"}
|
||||
{"current_steps": 1145, "total_steps": 3857, "loss": 0.276, "lr": 3.547491222664614e-05, "epoch": 2.0780399274047188, "percentage": 29.69, "elapsed_time": "2:10:52", "remaining_time": "5:09:58"}
|
||||
{"current_steps": 1150, "total_steps": 3857, "loss": 0.3006, "lr": 3.54174163696949e-05, "epoch": 2.087114337568058, "percentage": 29.82, "elapsed_time": "2:11:28", "remaining_time": "5:09:28"}
|
||||
{"current_steps": 1155, "total_steps": 3857, "loss": 0.2919, "lr": 3.535960476417767e-05, "epoch": 2.0961887477313974, "percentage": 29.95, "elapsed_time": "2:11:57", "remaining_time": "5:08:42"}
|
||||
{"current_steps": 1160, "total_steps": 3857, "loss": 0.3105, "lr": 3.530147859407568e-05, "epoch": 2.1052631578947367, "percentage": 30.08, "elapsed_time": "2:12:30", "remaining_time": "5:08:05"}
|
||||
{"current_steps": 1165, "total_steps": 3857, "loss": 0.2788, "lr": 3.5243039049812416e-05, "epoch": 2.114337568058076, "percentage": 30.2, "elapsed_time": "2:13:08", "remaining_time": "5:07:40"}
|
||||
{"current_steps": 1170, "total_steps": 3857, "loss": 0.3051, "lr": 3.518428732822931e-05, "epoch": 2.123411978221416, "percentage": 30.33, "elapsed_time": "2:13:39", "remaining_time": "5:06:57"}
|
||||
{"current_steps": 1175, "total_steps": 3857, "loss": 0.2888, "lr": 3.512522463256114e-05, "epoch": 2.132486388384755, "percentage": 30.46, "elapsed_time": "2:14:10", "remaining_time": "5:06:15"}
|
||||
{"current_steps": 1180, "total_steps": 3857, "loss": 0.29, "lr": 3.506585217241146e-05, "epoch": 2.1415607985480944, "percentage": 30.59, "elapsed_time": "2:14:41", "remaining_time": "5:05:33"}
|
||||
{"current_steps": 1185, "total_steps": 3857, "loss": 0.2789, "lr": 3.500617116372777e-05, "epoch": 2.1506352087114338, "percentage": 30.72, "elapsed_time": "2:15:11", "remaining_time": "5:04:49"}
|
||||
{"current_steps": 1190, "total_steps": 3857, "loss": 0.2948, "lr": 3.494618282877668e-05, "epoch": 2.159709618874773, "percentage": 30.85, "elapsed_time": "2:15:44", "remaining_time": "5:04:14"}
|
||||
{"current_steps": 1195, "total_steps": 3857, "loss": 0.2779, "lr": 3.488588839611881e-05, "epoch": 2.1687840290381124, "percentage": 30.98, "elapsed_time": "2:16:21", "remaining_time": "5:03:45"}
|
||||
{"current_steps": 1200, "total_steps": 3857, "loss": 0.2953, "lr": 3.482528910058366e-05, "epoch": 2.1778584392014517, "percentage": 31.11, "elapsed_time": "2:16:53", "remaining_time": "5:03:06"}
|
||||
{"current_steps": 1205, "total_steps": 3857, "loss": 0.2911, "lr": 3.476438618324433e-05, "epoch": 2.1869328493647915, "percentage": 31.24, "elapsed_time": "2:17:25", "remaining_time": "5:02:27"}
|
||||
{"current_steps": 1210, "total_steps": 3857, "loss": 0.2801, "lr": 3.47031808913921e-05, "epoch": 2.196007259528131, "percentage": 31.37, "elapsed_time": "2:17:54", "remaining_time": "5:01:40"}
|
||||
{"current_steps": 1215, "total_steps": 3857, "loss": 0.318, "lr": 3.464167447851084e-05, "epoch": 2.20508166969147, "percentage": 31.5, "elapsed_time": "2:18:28", "remaining_time": "5:01:07"}
|
||||
{"current_steps": 1220, "total_steps": 3857, "loss": 0.2845, "lr": 3.457986820425143e-05, "epoch": 2.2141560798548094, "percentage": 31.63, "elapsed_time": "2:19:06", "remaining_time": "5:00:41"}
|
||||
{"current_steps": 1225, "total_steps": 3857, "loss": 0.3111, "lr": 3.451776333440586e-05, "epoch": 2.2232304900181488, "percentage": 31.76, "elapsed_time": "2:19:37", "remaining_time": "4:59:58"}
|
||||
{"current_steps": 1230, "total_steps": 3857, "loss": 0.2827, "lr": 3.445536114088138e-05, "epoch": 2.232304900181488, "percentage": 31.89, "elapsed_time": "2:20:10", "remaining_time": "4:59:22"}
|
||||
{"current_steps": 1235, "total_steps": 3857, "loss": 0.324, "lr": 3.439266290167443e-05, "epoch": 2.2413793103448274, "percentage": 32.02, "elapsed_time": "2:20:40", "remaining_time": "4:58:38"}
|
||||
{"current_steps": 1240, "total_steps": 3857, "loss": 0.2909, "lr": 3.4329669900844455e-05, "epoch": 2.250453720508167, "percentage": 32.15, "elapsed_time": "2:21:20", "remaining_time": "4:58:17"}
|
||||
{"current_steps": 1245, "total_steps": 3857, "loss": 0.2906, "lr": 3.426638342848761e-05, "epoch": 2.2595281306715065, "percentage": 32.28, "elapsed_time": "2:21:54", "remaining_time": "4:57:42"}
|
||||
{"current_steps": 1250, "total_steps": 3857, "loss": 0.2858, "lr": 3.420280478071036e-05, "epoch": 2.268602540834846, "percentage": 32.41, "elapsed_time": "2:22:33", "remaining_time": "4:57:19"}
|
||||
{"current_steps": 1255, "total_steps": 3857, "loss": 0.2859, "lr": 3.4138935259602895e-05, "epoch": 2.277676950998185, "percentage": 32.54, "elapsed_time": "2:23:09", "remaining_time": "4:56:49"}
|
||||
{"current_steps": 1260, "total_steps": 3857, "loss": 0.3072, "lr": 3.407477617321252e-05, "epoch": 2.2867513611615244, "percentage": 32.67, "elapsed_time": "2:23:46", "remaining_time": "4:56:19"}
|
||||
{"current_steps": 1265, "total_steps": 3857, "loss": 0.2972, "lr": 3.4010328835516815e-05, "epoch": 2.2958257713248638, "percentage": 32.8, "elapsed_time": "2:24:25", "remaining_time": "4:55:55"}
|
||||
{"current_steps": 1270, "total_steps": 3857, "loss": 0.3021, "lr": 3.3945594566396755e-05, "epoch": 2.304900181488203, "percentage": 32.93, "elapsed_time": "2:25:01", "remaining_time": "4:55:24"}
|
||||
{"current_steps": 1275, "total_steps": 3857, "loss": 0.2707, "lr": 3.3880574691609656e-05, "epoch": 2.313974591651543, "percentage": 33.06, "elapsed_time": "2:25:37", "remaining_time": "4:54:54"}
|
||||
{"current_steps": 1280, "total_steps": 3857, "loss": 0.2792, "lr": 3.3815270542762035e-05, "epoch": 2.323049001814882, "percentage": 33.19, "elapsed_time": "2:26:12", "remaining_time": "4:54:20"}
|
||||
{"current_steps": 1285, "total_steps": 3857, "loss": 0.2825, "lr": 3.3749683457282365e-05, "epoch": 2.3321234119782215, "percentage": 33.32, "elapsed_time": "2:26:49", "remaining_time": "4:53:52"}
|
||||
{"current_steps": 1290, "total_steps": 3857, "loss": 0.2925, "lr": 3.368381477839362e-05, "epoch": 2.341197822141561, "percentage": 33.45, "elapsed_time": "2:27:25", "remaining_time": "4:53:21"}
|
||||
{"current_steps": 1295, "total_steps": 3857, "loss": 0.2846, "lr": 3.361766585508585e-05, "epoch": 2.3502722323049, "percentage": 33.58, "elapsed_time": "2:27:53", "remaining_time": "4:52:35"}
|
||||
{"current_steps": 1300, "total_steps": 3857, "loss": 0.2944, "lr": 3.3551238042088466e-05, "epoch": 2.3593466424682394, "percentage": 33.7, "elapsed_time": "2:28:31", "remaining_time": "4:52:08"}
|
||||
{"current_steps": 1305, "total_steps": 3857, "loss": 0.3137, "lr": 3.348453269984256e-05, "epoch": 2.3684210526315788, "percentage": 33.83, "elapsed_time": "2:29:06", "remaining_time": "4:51:36"}
|
||||
{"current_steps": 1310, "total_steps": 3857, "loss": 0.3169, "lr": 3.3417551194473014e-05, "epoch": 2.3774954627949185, "percentage": 33.96, "elapsed_time": "2:29:43", "remaining_time": "4:51:06"}
|
||||
{"current_steps": 1315, "total_steps": 3857, "loss": 0.2847, "lr": 3.335029489776054e-05, "epoch": 2.386569872958258, "percentage": 34.09, "elapsed_time": "2:30:23", "remaining_time": "4:50:43"}
|
||||
{"current_steps": 1320, "total_steps": 3857, "loss": 0.2781, "lr": 3.328276518711355e-05, "epoch": 2.395644283121597, "percentage": 34.22, "elapsed_time": "2:30:54", "remaining_time": "4:50:03"}
|
||||
{"current_steps": 1325, "total_steps": 3857, "loss": 0.3163, "lr": 3.3214963445540006e-05, "epoch": 2.4047186932849365, "percentage": 34.35, "elapsed_time": "2:31:25", "remaining_time": "4:49:22"}
|
||||
{"current_steps": 1330, "total_steps": 3857, "loss": 0.3127, "lr": 3.314689106161903e-05, "epoch": 2.413793103448276, "percentage": 34.48, "elapsed_time": "2:31:52", "remaining_time": "4:48:34"}
|
||||
{"current_steps": 1335, "total_steps": 3857, "loss": 0.3057, "lr": 3.307854942947251e-05, "epoch": 2.422867513611615, "percentage": 34.61, "elapsed_time": "2:32:23", "remaining_time": "4:47:53"}
|
||||
{"current_steps": 1340, "total_steps": 3857, "loss": 0.2816, "lr": 3.300993994873654e-05, "epoch": 2.4319419237749544, "percentage": 34.74, "elapsed_time": "2:32:58", "remaining_time": "4:47:20"}
|
||||
{"current_steps": 1345, "total_steps": 3857, "loss": 0.2942, "lr": 3.294106402453274e-05, "epoch": 2.441016333938294, "percentage": 34.87, "elapsed_time": "2:33:31", "remaining_time": "4:46:43"}
|
||||
{"current_steps": 1350, "total_steps": 3857, "loss": 0.3032, "lr": 3.2871923067439505e-05, "epoch": 2.4500907441016335, "percentage": 35.0, "elapsed_time": "2:34:05", "remaining_time": "4:46:09"}
|
||||
{"current_steps": 1355, "total_steps": 3857, "loss": 0.3113, "lr": 3.280251849346308e-05, "epoch": 2.459165154264973, "percentage": 35.13, "elapsed_time": "2:34:35", "remaining_time": "4:45:26"}
|
||||
{"current_steps": 1360, "total_steps": 3857, "loss": 0.3249, "lr": 3.273285172400861e-05, "epoch": 2.468239564428312, "percentage": 35.26, "elapsed_time": "2:35:09", "remaining_time": "4:44:52"}
|
||||
{"current_steps": 1365, "total_steps": 3857, "loss": 0.2806, "lr": 3.266292418585097e-05, "epoch": 2.4773139745916515, "percentage": 35.39, "elapsed_time": "2:35:43", "remaining_time": "4:44:17"}
|
||||
{"current_steps": 1370, "total_steps": 3857, "loss": 0.2593, "lr": 3.259273731110559e-05, "epoch": 2.486388384754991, "percentage": 35.52, "elapsed_time": "2:36:16", "remaining_time": "4:43:40"}
|
||||
{"current_steps": 1375, "total_steps": 3857, "loss": 0.2894, "lr": 3.252229253719913e-05, "epoch": 2.49546279491833, "percentage": 35.65, "elapsed_time": "2:36:50", "remaining_time": "4:43:07"}
|
||||
{"current_steps": 1380, "total_steps": 3857, "loss": 0.2958, "lr": 3.245159130683999e-05, "epoch": 2.50453720508167, "percentage": 35.78, "elapsed_time": "2:37:23", "remaining_time": "4:42:30"}
|
||||
{"current_steps": 1385, "total_steps": 3857, "loss": 0.3029, "lr": 3.2380635067988816e-05, "epoch": 2.513611615245009, "percentage": 35.91, "elapsed_time": "2:37:51", "remaining_time": "4:41:44"}
|
||||
{"current_steps": 1390, "total_steps": 3857, "loss": 0.3015, "lr": 3.2309425273828826e-05, "epoch": 2.5226860254083485, "percentage": 36.04, "elapsed_time": "2:38:27", "remaining_time": "4:41:13"}
|
||||
{"current_steps": 1395, "total_steps": 3857, "loss": 0.3109, "lr": 3.223796338273604e-05, "epoch": 2.531760435571688, "percentage": 36.17, "elapsed_time": "2:39:04", "remaining_time": "4:40:45"}
|
||||
{"current_steps": 1400, "total_steps": 3857, "loss": 0.293, "lr": 3.216625085824943e-05, "epoch": 2.540834845735027, "percentage": 36.3, "elapsed_time": "2:39:36", "remaining_time": "4:40:07"}
|
||||
{"current_steps": 1405, "total_steps": 3857, "loss": 0.3057, "lr": 3.2094289169040934e-05, "epoch": 2.5499092558983665, "percentage": 36.43, "elapsed_time": "2:40:13", "remaining_time": "4:39:37"}
|
||||
{"current_steps": 1410, "total_steps": 3857, "loss": 0.2842, "lr": 3.202207978888537e-05, "epoch": 2.558983666061706, "percentage": 36.56, "elapsed_time": "2:40:50", "remaining_time": "4:39:07"}
|
||||
{"current_steps": 1415, "total_steps": 3857, "loss": 0.2886, "lr": 3.194962419663029e-05, "epoch": 2.5680580762250456, "percentage": 36.69, "elapsed_time": "2:41:25", "remaining_time": "4:38:34"}
|
||||
{"current_steps": 1420, "total_steps": 3857, "loss": 0.2801, "lr": 3.1876923876165645e-05, "epoch": 2.577132486388385, "percentage": 36.82, "elapsed_time": "2:41:56", "remaining_time": "4:37:56"}
|
||||
{"current_steps": 1425, "total_steps": 3857, "loss": 0.2687, "lr": 3.1803980316393424e-05, "epoch": 2.586206896551724, "percentage": 36.95, "elapsed_time": "2:42:26", "remaining_time": "4:37:13"}
|
||||
{"current_steps": 1430, "total_steps": 3857, "loss": 0.3095, "lr": 3.173079501119716e-05, "epoch": 2.5952813067150635, "percentage": 37.08, "elapsed_time": "2:42:51", "remaining_time": "4:36:24"}
|
||||
{"current_steps": 1435, "total_steps": 3857, "loss": 0.2979, "lr": 3.1657369459411346e-05, "epoch": 2.604355716878403, "percentage": 37.21, "elapsed_time": "2:43:27", "remaining_time": "4:35:53"}
|
||||
{"current_steps": 1440, "total_steps": 3857, "loss": 0.2931, "lr": 3.158370516479067e-05, "epoch": 2.613430127041742, "percentage": 37.33, "elapsed_time": "2:44:03", "remaining_time": "4:35:21"}
|
||||
{"current_steps": 1445, "total_steps": 3857, "loss": 0.2757, "lr": 3.150980363597933e-05, "epoch": 2.6225045372050815, "percentage": 37.46, "elapsed_time": "2:44:38", "remaining_time": "4:34:49"}
|
||||
{"current_steps": 1450, "total_steps": 3857, "loss": 0.3131, "lr": 3.1435666386480054e-05, "epoch": 2.6315789473684212, "percentage": 37.59, "elapsed_time": "2:45:15", "remaining_time": "4:34:19"}
|
||||
{"current_steps": 1455, "total_steps": 3857, "loss": 0.2862, "lr": 3.136129493462312e-05, "epoch": 2.6406533575317606, "percentage": 37.72, "elapsed_time": "2:45:47", "remaining_time": "4:33:42"}
|
||||
{"current_steps": 1460, "total_steps": 3857, "loss": 0.2879, "lr": 3.128669080353528e-05, "epoch": 2.6497277676951, "percentage": 37.85, "elapsed_time": "2:46:16", "remaining_time": "4:32:59"}
|
||||
{"current_steps": 1465, "total_steps": 3857, "loss": 0.2952, "lr": 3.1211855521108556e-05, "epoch": 2.658802177858439, "percentage": 37.98, "elapsed_time": "2:46:50", "remaining_time": "4:32:24"}
|
||||
{"current_steps": 1470, "total_steps": 3857, "loss": 0.2854, "lr": 3.113679061996894e-05, "epoch": 2.6678765880217785, "percentage": 38.11, "elapsed_time": "2:47:19", "remaining_time": "4:31:42"}
|
||||
{"current_steps": 1475, "total_steps": 3857, "loss": 0.2751, "lr": 3.106149763744502e-05, "epoch": 2.676950998185118, "percentage": 38.24, "elapsed_time": "2:47:58", "remaining_time": "4:31:15"}
|
||||
{"current_steps": 1480, "total_steps": 3857, "loss": 0.2862, "lr": 3.098597811553648e-05, "epoch": 2.686025408348457, "percentage": 38.37, "elapsed_time": "2:48:33", "remaining_time": "4:30:43"}
|
||||
{"current_steps": 1485, "total_steps": 3857, "loss": 0.2949, "lr": 3.091023360088254e-05, "epoch": 2.695099818511797, "percentage": 38.5, "elapsed_time": "2:49:05", "remaining_time": "4:30:05"}
|
||||
{"current_steps": 1490, "total_steps": 3857, "loss": 0.2968, "lr": 3.083426564473026e-05, "epoch": 2.7041742286751362, "percentage": 38.63, "elapsed_time": "2:49:47", "remaining_time": "4:29:43"}
|
||||
{"current_steps": 1495, "total_steps": 3857, "loss": 0.2808, "lr": 3.075807580290279e-05, "epoch": 2.7132486388384756, "percentage": 38.76, "elapsed_time": "2:50:13", "remaining_time": "4:28:56"}
|
||||
{"current_steps": 1500, "total_steps": 3857, "loss": 0.2712, "lr": 3.0681665635767486e-05, "epoch": 2.722323049001815, "percentage": 38.89, "elapsed_time": "2:50:51", "remaining_time": "4:28:28"}
|
||||
{"current_steps": 1505, "total_steps": 3857, "loss": 0.308, "lr": 3.0605036708203984e-05, "epoch": 2.731397459165154, "percentage": 39.02, "elapsed_time": "2:51:34", "remaining_time": "4:28:08"}
|
||||
{"current_steps": 1510, "total_steps": 3857, "loss": 0.2944, "lr": 3.0528190589572095e-05, "epoch": 2.7404718693284935, "percentage": 39.15, "elapsed_time": "2:52:14", "remaining_time": "4:27:43"}
|
||||
{"current_steps": 1515, "total_steps": 3857, "loss": 0.3, "lr": 3.0451128853679744e-05, "epoch": 2.749546279491833, "percentage": 39.28, "elapsed_time": "2:52:50", "remaining_time": "4:27:10"}
|
||||
{"current_steps": 1520, "total_steps": 3857, "loss": 0.2866, "lr": 3.0373853078750658e-05, "epoch": 2.7586206896551726, "percentage": 39.41, "elapsed_time": "2:53:26", "remaining_time": "4:26:40"}
|
||||
{"current_steps": 1525, "total_steps": 3857, "loss": 0.2812, "lr": 3.029636484739211e-05, "epoch": 2.767695099818512, "percentage": 39.54, "elapsed_time": "2:53:54", "remaining_time": "4:25:56"}
|
||||
{"current_steps": 1530, "total_steps": 3857, "loss": 0.2797, "lr": 3.0218665746562473e-05, "epoch": 2.7767695099818512, "percentage": 39.67, "elapsed_time": "2:54:26", "remaining_time": "4:25:19"}
|
||||
{"current_steps": 1535, "total_steps": 3857, "loss": 0.2889, "lr": 3.0140757367538716e-05, "epoch": 2.7858439201451906, "percentage": 39.8, "elapsed_time": "2:55:04", "remaining_time": "4:24:50"}
|
||||
{"current_steps": 1540, "total_steps": 3857, "loss": 0.3133, "lr": 3.006264130588383e-05, "epoch": 2.79491833030853, "percentage": 39.93, "elapsed_time": "2:55:40", "remaining_time": "4:24:18"}
|
||||
{"current_steps": 1545, "total_steps": 3857, "loss": 0.2878, "lr": 2.998431916141414e-05, "epoch": 2.803992740471869, "percentage": 40.06, "elapsed_time": "2:56:16", "remaining_time": "4:23:47"}
|
||||
{"current_steps": 1550, "total_steps": 3857, "loss": 0.2806, "lr": 2.990579253816655e-05, "epoch": 2.8130671506352085, "percentage": 40.19, "elapsed_time": "2:56:50", "remaining_time": "4:23:11"}
|
||||
{"current_steps": 1555, "total_steps": 3857, "loss": 0.2711, "lr": 2.9827063044365688e-05, "epoch": 2.8221415607985483, "percentage": 40.32, "elapsed_time": "2:57:23", "remaining_time": "4:22:35"}
|
||||
{"current_steps": 1560, "total_steps": 3857, "loss": 0.295, "lr": 2.974813229239097e-05, "epoch": 2.8312159709618876, "percentage": 40.45, "elapsed_time": "2:57:54", "remaining_time": "4:21:57"}
|
||||
{"current_steps": 1565, "total_steps": 3857, "loss": 0.2933, "lr": 2.9669001898743573e-05, "epoch": 2.840290381125227, "percentage": 40.58, "elapsed_time": "2:58:24", "remaining_time": "4:21:17"}
|
||||
{"current_steps": 1570, "total_steps": 3857, "loss": 0.2981, "lr": 2.9589673484013334e-05, "epoch": 2.8493647912885662, "percentage": 40.71, "elapsed_time": "2:58:59", "remaining_time": "4:20:44"}
|
||||
{"current_steps": 1575, "total_steps": 3857, "loss": 0.2846, "lr": 2.9510148672845557e-05, "epoch": 2.8584392014519056, "percentage": 40.83, "elapsed_time": "2:59:33", "remaining_time": "4:20:09"}
|
||||
{"current_steps": 1580, "total_steps": 3857, "loss": 0.2893, "lr": 2.943042909390777e-05, "epoch": 2.867513611615245, "percentage": 40.96, "elapsed_time": "3:00:08", "remaining_time": "4:19:35"}
|
||||
{"current_steps": 1585, "total_steps": 3857, "loss": 0.2728, "lr": 2.9350516379856303e-05, "epoch": 2.876588021778584, "percentage": 41.09, "elapsed_time": "3:00:39", "remaining_time": "4:18:58"}
|
||||
{"current_steps": 1590, "total_steps": 3857, "loss": 0.2855, "lr": 2.9270412167302922e-05, "epoch": 2.885662431941924, "percentage": 41.22, "elapsed_time": "3:01:17", "remaining_time": "4:18:28"}
|
||||
{"current_steps": 1595, "total_steps": 3857, "loss": 0.2895, "lr": 2.9190118096781262e-05, "epoch": 2.8947368421052633, "percentage": 41.35, "elapsed_time": "3:01:54", "remaining_time": "4:17:59"}
|
||||
{"current_steps": 1600, "total_steps": 3857, "loss": 0.2889, "lr": 2.9109635812713262e-05, "epoch": 2.9038112522686026, "percentage": 41.48, "elapsed_time": "3:02:33", "remaining_time": "4:17:31"}
|
||||
{"current_steps": 1605, "total_steps": 3857, "loss": 0.3011, "lr": 2.9028966963375467e-05, "epoch": 2.912885662431942, "percentage": 41.61, "elapsed_time": "3:03:07", "remaining_time": "4:16:56"}
|
||||
{"current_steps": 1610, "total_steps": 3857, "loss": 0.2883, "lr": 2.8948113200865275e-05, "epoch": 2.9219600725952812, "percentage": 41.74, "elapsed_time": "3:03:36", "remaining_time": "4:16:15"}
|
||||
{"current_steps": 1615, "total_steps": 3857, "loss": 0.2997, "lr": 2.8867076181067104e-05, "epoch": 2.9310344827586206, "percentage": 41.87, "elapsed_time": "3:04:07", "remaining_time": "4:15:36"}
|
||||
{"current_steps": 1620, "total_steps": 3857, "loss": 0.293, "lr": 2.8785857563618485e-05, "epoch": 2.94010889292196, "percentage": 42.0, "elapsed_time": "3:04:38", "remaining_time": "4:14:57"}
|
||||
{"current_steps": 1625, "total_steps": 3857, "loss": 0.2892, "lr": 2.8704459011876063e-05, "epoch": 2.9491833030852996, "percentage": 42.13, "elapsed_time": "3:05:05", "remaining_time": "4:14:14"}
|
||||
{"current_steps": 1630, "total_steps": 3857, "loss": 0.2787, "lr": 2.8622882192881536e-05, "epoch": 2.958257713248639, "percentage": 42.26, "elapsed_time": "3:05:41", "remaining_time": "4:13:41"}
|
||||
{"current_steps": 1635, "total_steps": 3857, "loss": 0.2976, "lr": 2.8541128777327523e-05, "epoch": 2.9673321234119783, "percentage": 42.39, "elapsed_time": "3:06:09", "remaining_time": "4:12:59"}
|
||||
{"current_steps": 1640, "total_steps": 3857, "loss": 0.3098, "lr": 2.8459200439523323e-05, "epoch": 2.9764065335753176, "percentage": 42.52, "elapsed_time": "3:06:40", "remaining_time": "4:12:21"}
|
||||
{"current_steps": 1645, "total_steps": 3857, "loss": 0.279, "lr": 2.8377098857360655e-05, "epoch": 2.985480943738657, "percentage": 42.65, "elapsed_time": "3:07:09", "remaining_time": "4:11:40"}
|
||||
{"current_steps": 1650, "total_steps": 3857, "loss": 0.3015, "lr": 2.8294825712279276e-05, "epoch": 2.9945553539019962, "percentage": 42.78, "elapsed_time": "3:07:42", "remaining_time": "4:11:05"}
|
||||
{"current_steps": 1655, "total_steps": 3857, "loss": 0.2892, "lr": 2.8212382689232537e-05, "epoch": 3.0036297640653356, "percentage": 42.91, "elapsed_time": "3:08:21", "remaining_time": "4:10:36"}
|
||||
{"current_steps": 1660, "total_steps": 3857, "loss": 0.2598, "lr": 2.8129771476652928e-05, "epoch": 3.0127041742286753, "percentage": 43.04, "elapsed_time": "3:08:59", "remaining_time": "4:10:07"}
|
||||
{"current_steps": 1665, "total_steps": 3857, "loss": 0.2634, "lr": 2.8046993766417405e-05, "epoch": 3.0217785843920146, "percentage": 43.17, "elapsed_time": "3:09:30", "remaining_time": "4:09:29"}
|
||||
{"current_steps": 1670, "total_steps": 3857, "loss": 0.2629, "lr": 2.796405125381284e-05, "epoch": 3.030852994555354, "percentage": 43.3, "elapsed_time": "3:10:02", "remaining_time": "4:08:51"}
|
||||
{"current_steps": 1675, "total_steps": 3857, "loss": 0.2495, "lr": 2.788094563750124e-05, "epoch": 3.0399274047186933, "percentage": 43.43, "elapsed_time": "3:10:34", "remaining_time": "4:08:15"}
|
||||
{"current_steps": 1680, "total_steps": 3857, "loss": 0.2652, "lr": 2.779767861948496e-05, "epoch": 3.0490018148820326, "percentage": 43.56, "elapsed_time": "3:11:07", "remaining_time": "4:07:40"}
|
||||
{"current_steps": 1685, "total_steps": 3857, "loss": 0.2569, "lr": 2.7714251905071888e-05, "epoch": 3.058076225045372, "percentage": 43.69, "elapsed_time": "3:11:42", "remaining_time": "4:07:06"}
|
||||
{"current_steps": 1690, "total_steps": 3857, "loss": 0.2637, "lr": 2.7630667202840467e-05, "epoch": 3.0671506352087112, "percentage": 43.82, "elapsed_time": "3:12:11", "remaining_time": "4:06:26"}
|
||||
{"current_steps": 1695, "total_steps": 3857, "loss": 0.245, "lr": 2.754692622460475e-05, "epoch": 3.076225045372051, "percentage": 43.95, "elapsed_time": "3:12:43", "remaining_time": "4:05:49"}
|
||||
{"current_steps": 1700, "total_steps": 3857, "loss": 0.2682, "lr": 2.7463030685379308e-05, "epoch": 3.0852994555353903, "percentage": 44.08, "elapsed_time": "3:13:19", "remaining_time": "4:05:17"}
|
||||
{"current_steps": 1705, "total_steps": 3857, "loss": 0.248, "lr": 2.7378982303344122e-05, "epoch": 3.0943738656987296, "percentage": 44.21, "elapsed_time": "3:13:51", "remaining_time": "4:04:40"}
|
||||
{"current_steps": 1710, "total_steps": 3857, "loss": 0.2643, "lr": 2.7294782799809403e-05, "epoch": 3.103448275862069, "percentage": 44.33, "elapsed_time": "3:14:22", "remaining_time": "4:04:03"}
|
||||
{"current_steps": 1715, "total_steps": 3857, "loss": 0.2677, "lr": 2.721043389918032e-05, "epoch": 3.1125226860254083, "percentage": 44.46, "elapsed_time": "3:15:04", "remaining_time": "4:03:39"}
|
||||
{"current_steps": 1720, "total_steps": 3857, "loss": 0.2745, "lr": 2.7125937328921693e-05, "epoch": 3.1215970961887476, "percentage": 44.59, "elapsed_time": "3:15:43", "remaining_time": "4:03:10"}
|
||||
{"current_steps": 1725, "total_steps": 3857, "loss": 0.2472, "lr": 2.704129481952261e-05, "epoch": 3.130671506352087, "percentage": 44.72, "elapsed_time": "3:16:18", "remaining_time": "4:02:38"}
|
||||
{"current_steps": 1730, "total_steps": 3857, "loss": 0.2562, "lr": 2.6956508104461003e-05, "epoch": 3.1397459165154267, "percentage": 44.85, "elapsed_time": "3:16:51", "remaining_time": "4:02:02"}
|
||||
{"current_steps": 1735, "total_steps": 3857, "loss": 0.27, "lr": 2.6871578920168132e-05, "epoch": 3.148820326678766, "percentage": 44.98, "elapsed_time": "3:17:25", "remaining_time": "4:01:27"}
|
||||
{"current_steps": 1740, "total_steps": 3857, "loss": 0.2771, "lr": 2.6786509005993022e-05, "epoch": 3.1578947368421053, "percentage": 45.11, "elapsed_time": "3:17:56", "remaining_time": "4:00:49"}
|
||||
{"current_steps": 1745, "total_steps": 3857, "loss": 0.2687, "lr": 2.670130010416684e-05, "epoch": 3.1669691470054446, "percentage": 45.24, "elapsed_time": "3:18:35", "remaining_time": "4:00:21"}
|
||||
{"current_steps": 1750, "total_steps": 3857, "loss": 0.2576, "lr": 2.6615953959767238e-05, "epoch": 3.176043557168784, "percentage": 45.37, "elapsed_time": "3:19:10", "remaining_time": "3:59:48"}
|
||||
{"current_steps": 1755, "total_steps": 3857, "loss": 0.2514, "lr": 2.6530472320682564e-05, "epoch": 3.1851179673321233, "percentage": 45.5, "elapsed_time": "3:19:39", "remaining_time": "3:59:07"}
|
||||
{"current_steps": 1760, "total_steps": 3857, "loss": 0.286, "lr": 2.644485693757613e-05, "epoch": 3.1941923774954626, "percentage": 45.63, "elapsed_time": "3:20:14", "remaining_time": "3:58:35"}
|
||||
{"current_steps": 1765, "total_steps": 3857, "loss": 0.2552, "lr": 2.63591095638503e-05, "epoch": 3.2032667876588024, "percentage": 45.76, "elapsed_time": "3:20:49", "remaining_time": "3:58:02"}
|
||||
{"current_steps": 1770, "total_steps": 3857, "loss": 0.2853, "lr": 2.6273231955610624e-05, "epoch": 3.2123411978221417, "percentage": 45.89, "elapsed_time": "3:21:27", "remaining_time": "3:57:31"}
|
||||
{"current_steps": 1775, "total_steps": 3857, "loss": 0.2564, "lr": 2.6187225871629833e-05, "epoch": 3.221415607985481, "percentage": 46.02, "elapsed_time": "3:22:04", "remaining_time": "3:57:01"}
|
||||
{"current_steps": 1780, "total_steps": 3857, "loss": 0.2735, "lr": 2.610109307331185e-05, "epoch": 3.2304900181488203, "percentage": 46.15, "elapsed_time": "3:22:34", "remaining_time": "3:56:22"}
|
||||
{"current_steps": 1785, "total_steps": 3857, "loss": 0.2791, "lr": 2.6014835324655714e-05, "epoch": 3.2395644283121596, "percentage": 46.28, "elapsed_time": "3:22:59", "remaining_time": "3:55:37"}
|
||||
{"current_steps": 1790, "total_steps": 3857, "loss": 0.2719, "lr": 2.5928454392219435e-05, "epoch": 3.248638838475499, "percentage": 46.41, "elapsed_time": "3:23:38", "remaining_time": "3:55:09"}
|
||||
{"current_steps": 1795, "total_steps": 3857, "loss": 0.2422, "lr": 2.584195204508383e-05, "epoch": 3.2577132486388383, "percentage": 46.54, "elapsed_time": "3:24:06", "remaining_time": "3:54:28"}
|
||||
{"current_steps": 1800, "total_steps": 3857, "loss": 0.2451, "lr": 2.5755330054816292e-05, "epoch": 3.266787658802178, "percentage": 46.67, "elapsed_time": "3:24:39", "remaining_time": "3:53:52"}
|
||||
{"current_steps": 1805, "total_steps": 3857, "loss": 0.2699, "lr": 2.56685901954345e-05, "epoch": 3.2758620689655173, "percentage": 46.8, "elapsed_time": "3:25:15", "remaining_time": "3:53:21"}
|
||||
{"current_steps": 1810, "total_steps": 3857, "loss": 0.2636, "lr": 2.558173424337008e-05, "epoch": 3.2849364791288567, "percentage": 46.93, "elapsed_time": "3:25:44", "remaining_time": "3:52:40"}
|
||||
{"current_steps": 1815, "total_steps": 3857, "loss": 0.2615, "lr": 2.5494763977432254e-05, "epoch": 3.294010889292196, "percentage": 47.06, "elapsed_time": "3:26:18", "remaining_time": "3:52:06"}
|
||||
{"current_steps": 1820, "total_steps": 3857, "loss": 0.2642, "lr": 2.540768117877139e-05, "epoch": 3.3030852994555353, "percentage": 47.19, "elapsed_time": "3:26:49", "remaining_time": "3:51:28"}
|
||||
{"current_steps": 1825, "total_steps": 3857, "loss": 0.254, "lr": 2.532048763084252e-05, "epoch": 3.3121597096188746, "percentage": 47.32, "elapsed_time": "3:27:18", "remaining_time": "3:50:49"}
|
||||
{"current_steps": 1830, "total_steps": 3857, "loss": 0.2798, "lr": 2.5233185119368816e-05, "epoch": 3.321234119782214, "percentage": 47.45, "elapsed_time": "3:27:48", "remaining_time": "3:50:10"}
|
||||
{"current_steps": 1835, "total_steps": 3857, "loss": 0.2561, "lr": 2.5145775432305027e-05, "epoch": 3.3303085299455537, "percentage": 47.58, "elapsed_time": "3:28:22", "remaining_time": "3:49:36"}
|
||||
{"current_steps": 1840, "total_steps": 3857, "loss": 0.268, "lr": 2.5058260359800847e-05, "epoch": 3.339382940108893, "percentage": 47.71, "elapsed_time": "3:28:52", "remaining_time": "3:48:58"}
|
||||
{"current_steps": 1845, "total_steps": 3857, "loss": 0.2487, "lr": 2.4970641694164286e-05, "epoch": 3.3484573502722323, "percentage": 47.84, "elapsed_time": "3:29:27", "remaining_time": "3:48:25"}
|
||||
{"current_steps": 1850, "total_steps": 3857, "loss": 0.2643, "lr": 2.4882921229824927e-05, "epoch": 3.3575317604355717, "percentage": 47.96, "elapsed_time": "3:29:58", "remaining_time": "3:47:47"}
|
||||
{"current_steps": 1855, "total_steps": 3857, "loss": 0.2412, "lr": 2.4795100763297184e-05, "epoch": 3.366606170598911, "percentage": 48.09, "elapsed_time": "3:30:32", "remaining_time": "3:47:13"}
|
||||
{"current_steps": 1860, "total_steps": 3857, "loss": 0.2578, "lr": 2.470718209314352e-05, "epoch": 3.3756805807622503, "percentage": 48.22, "elapsed_time": "3:31:06", "remaining_time": "3:46:38"}
|
||||
{"current_steps": 1865, "total_steps": 3857, "loss": 0.2638, "lr": 2.4619167019937607e-05, "epoch": 3.3847549909255896, "percentage": 48.35, "elapsed_time": "3:31:34", "remaining_time": "3:45:58"}
|
||||
{"current_steps": 1870, "total_steps": 3857, "loss": 0.2771, "lr": 2.4531057346227457e-05, "epoch": 3.3938294010889294, "percentage": 48.48, "elapsed_time": "3:32:03", "remaining_time": "3:45:19"}
|
||||
{"current_steps": 1875, "total_steps": 3857, "loss": 0.2877, "lr": 2.44428548764985e-05, "epoch": 3.4029038112522687, "percentage": 48.61, "elapsed_time": "3:32:36", "remaining_time": "3:44:44"}
|
||||
{"current_steps": 1880, "total_steps": 3857, "loss": 0.2763, "lr": 2.435456141713661e-05, "epoch": 3.411978221415608, "percentage": 48.74, "elapsed_time": "3:33:10", "remaining_time": "3:44:10"}
|
||||
{"current_steps": 1885, "total_steps": 3857, "loss": 0.2392, "lr": 2.4266178776391146e-05, "epoch": 3.4210526315789473, "percentage": 48.87, "elapsed_time": "3:33:44", "remaining_time": "3:43:36"}
|
||||
{"current_steps": 1890, "total_steps": 3857, "loss": 0.2674, "lr": 2.4177708764337892e-05, "epoch": 3.4301270417422867, "percentage": 49.0, "elapsed_time": "3:34:21", "remaining_time": "3:43:05"}
|
||||
{"current_steps": 1895, "total_steps": 3857, "loss": 0.2687, "lr": 2.4089153192842007e-05, "epoch": 3.439201451905626, "percentage": 49.13, "elapsed_time": "3:35:02", "remaining_time": "3:42:38"}
|
||||
{"current_steps": 1900, "total_steps": 3857, "loss": 0.261, "lr": 2.4000513875520892e-05, "epoch": 3.4482758620689653, "percentage": 49.26, "elapsed_time": "3:35:34", "remaining_time": "3:42:02"}
|
||||
{"current_steps": 1905, "total_steps": 3857, "loss": 0.2618, "lr": 2.391179262770707e-05, "epoch": 3.457350272232305, "percentage": 49.39, "elapsed_time": "3:36:09", "remaining_time": "3:41:29"}
|
||||
{"current_steps": 1910, "total_steps": 3857, "loss": 0.2537, "lr": 2.3822991266411002e-05, "epoch": 3.4664246823956444, "percentage": 49.52, "elapsed_time": "3:36:39", "remaining_time": "3:40:51"}
|
||||
{"current_steps": 1915, "total_steps": 3857, "loss": 0.2719, "lr": 2.3734111610283854e-05, "epoch": 3.4754990925589837, "percentage": 49.65, "elapsed_time": "3:37:13", "remaining_time": "3:40:17"}
|
||||
{"current_steps": 1920, "total_steps": 3857, "loss": 0.2492, "lr": 2.3645155479580306e-05, "epoch": 3.484573502722323, "percentage": 49.78, "elapsed_time": "3:37:50", "remaining_time": "3:39:46"}
|
||||
{"current_steps": 1925, "total_steps": 3857, "loss": 0.2634, "lr": 2.355612469612121e-05, "epoch": 3.4936479128856623, "percentage": 49.91, "elapsed_time": "3:38:25", "remaining_time": "3:39:13"}
|
||||
{"current_steps": 1930, "total_steps": 3857, "loss": 0.2491, "lr": 2.3467021083256296e-05, "epoch": 3.5027223230490017, "percentage": 50.04, "elapsed_time": "3:39:00", "remaining_time": "3:38:39"}
|
||||
{"current_steps": 1935, "total_steps": 3857, "loss": 0.2596, "lr": 2.337784646582687e-05, "epoch": 3.511796733212341, "percentage": 50.17, "elapsed_time": "3:39:33", "remaining_time": "3:38:04"}
|
||||
{"current_steps": 1940, "total_steps": 3857, "loss": 0.2567, "lr": 2.328860267012839e-05, "epoch": 3.5208711433756807, "percentage": 50.3, "elapsed_time": "3:40:03", "remaining_time": "3:37:26"}
|
||||
{"current_steps": 1945, "total_steps": 3857, "loss": 0.2608, "lr": 2.319929152387309e-05, "epoch": 3.52994555353902, "percentage": 50.43, "elapsed_time": "3:40:36", "remaining_time": "3:36:52"}
|
||||
{"current_steps": 1950, "total_steps": 3857, "loss": 0.2996, "lr": 2.3109914856152543e-05, "epoch": 3.5390199637023594, "percentage": 50.56, "elapsed_time": "3:41:14", "remaining_time": "3:36:21"}
|
||||
{"current_steps": 1955, "total_steps": 3857, "loss": 0.2528, "lr": 2.3020474497400206e-05, "epoch": 3.5480943738656987, "percentage": 50.69, "elapsed_time": "3:41:49", "remaining_time": "3:35:48"}
|
||||
{"current_steps": 1960, "total_steps": 3857, "loss": 0.2519, "lr": 2.293097227935391e-05, "epoch": 3.557168784029038, "percentage": 50.82, "elapsed_time": "3:42:25", "remaining_time": "3:35:16"}
|
||||
{"current_steps": 1965, "total_steps": 3857, "loss": 0.2792, "lr": 2.2841410035018394e-05, "epoch": 3.5662431941923773, "percentage": 50.95, "elapsed_time": "3:42:58", "remaining_time": "3:34:41"}
|
||||
{"current_steps": 1970, "total_steps": 3857, "loss": 0.289, "lr": 2.2751789598627702e-05, "epoch": 3.5753176043557167, "percentage": 51.08, "elapsed_time": "3:43:27", "remaining_time": "3:34:03"}
|
||||
{"current_steps": 1975, "total_steps": 3857, "loss": 0.2623, "lr": 2.2662112805607673e-05, "epoch": 3.5843920145190564, "percentage": 51.21, "elapsed_time": "3:44:00", "remaining_time": "3:33:27"}
|
||||
{"current_steps": 1980, "total_steps": 3857, "loss": 0.2394, "lr": 2.2572381492538328e-05, "epoch": 3.5934664246823957, "percentage": 51.34, "elapsed_time": "3:44:33", "remaining_time": "3:32:52"}
|
||||
{"current_steps": 1985, "total_steps": 3857, "loss": 0.2576, "lr": 2.2482597497116246e-05, "epoch": 3.602540834845735, "percentage": 51.46, "elapsed_time": "3:45:04", "remaining_time": "3:32:15"}
|
||||
{"current_steps": 1990, "total_steps": 3857, "loss": 0.2683, "lr": 2.2392762658116946e-05, "epoch": 3.6116152450090744, "percentage": 51.59, "elapsed_time": "3:45:36", "remaining_time": "3:31:40"}
|
||||
{"current_steps": 1995, "total_steps": 3857, "loss": 0.2464, "lr": 2.2302878815357225e-05, "epoch": 3.6206896551724137, "percentage": 51.72, "elapsed_time": "3:46:05", "remaining_time": "3:31:01"}
|
||||
{"current_steps": 2000, "total_steps": 3857, "loss": 0.2516, "lr": 2.2212947809657477e-05, "epoch": 3.629764065335753, "percentage": 51.85, "elapsed_time": "3:46:40", "remaining_time": "3:30:27"}
|
||||
{"current_steps": 2005, "total_steps": 3857, "loss": 0.2675, "lr": 2.2122971482803998e-05, "epoch": 3.6388384754990923, "percentage": 51.98, "elapsed_time": "3:47:14", "remaining_time": "3:29:54"}
|
||||
{"current_steps": 2010, "total_steps": 3857, "loss": 0.2784, "lr": 2.2032951677511246e-05, "epoch": 3.647912885662432, "percentage": 52.11, "elapsed_time": "3:47:47", "remaining_time": "3:29:18"}
|
||||
{"current_steps": 2015, "total_steps": 3857, "loss": 0.2523, "lr": 2.1942890237384127e-05, "epoch": 3.6569872958257714, "percentage": 52.24, "elapsed_time": "3:48:20", "remaining_time": "3:28:44"}
|
||||
{"current_steps": 2020, "total_steps": 3857, "loss": 0.2467, "lr": 2.1852789006880223e-05, "epoch": 3.6660617059891107, "percentage": 52.37, "elapsed_time": "3:48:55", "remaining_time": "3:28:11"}
|
||||
{"current_steps": 2025, "total_steps": 3857, "loss": 0.2648, "lr": 2.176264983127204e-05, "epoch": 3.67513611615245, "percentage": 52.5, "elapsed_time": "3:49:27", "remaining_time": "3:27:35"}
|
||||
{"current_steps": 2030, "total_steps": 3857, "loss": 0.2865, "lr": 2.167247455660917e-05, "epoch": 3.6842105263157894, "percentage": 52.63, "elapsed_time": "3:50:01", "remaining_time": "3:27:01"}
|
||||
{"current_steps": 2035, "total_steps": 3857, "loss": 0.2676, "lr": 2.1582265029680535e-05, "epoch": 3.6932849364791287, "percentage": 52.76, "elapsed_time": "3:50:39", "remaining_time": "3:26:30"}
|
||||
{"current_steps": 2040, "total_steps": 3857, "loss": 0.2551, "lr": 2.1492023097976538e-05, "epoch": 3.702359346642468, "percentage": 52.89, "elapsed_time": "3:51:09", "remaining_time": "3:25:53"}
|
||||
{"current_steps": 2045, "total_steps": 3857, "loss": 0.3046, "lr": 2.1401750609651227e-05, "epoch": 3.711433756805808, "percentage": 53.02, "elapsed_time": "3:51:49", "remaining_time": "3:25:24"}
|
||||
{"current_steps": 2050, "total_steps": 3857, "loss": 0.2461, "lr": 2.1311449413484454e-05, "epoch": 3.720508166969147, "percentage": 53.15, "elapsed_time": "3:52:13", "remaining_time": "3:24:41"}
|
||||
{"current_steps": 2055, "total_steps": 3857, "loss": 0.2602, "lr": 2.1221121358844014e-05, "epoch": 3.7295825771324864, "percentage": 53.28, "elapsed_time": "3:52:45", "remaining_time": "3:24:06"}
|
||||
{"current_steps": 2060, "total_steps": 3857, "loss": 0.2663, "lr": 2.1130768295647743e-05, "epoch": 3.7386569872958257, "percentage": 53.41, "elapsed_time": "3:53:16", "remaining_time": "3:23:29"}
|
||||
{"current_steps": 2065, "total_steps": 3857, "loss": 0.2887, "lr": 2.104039207432567e-05, "epoch": 3.747731397459165, "percentage": 53.54, "elapsed_time": "3:53:43", "remaining_time": "3:22:49"}
|
||||
{"current_steps": 2070, "total_steps": 3857, "loss": 0.2437, "lr": 2.09499945457821e-05, "epoch": 3.7568058076225044, "percentage": 53.67, "elapsed_time": "3:54:20", "remaining_time": "3:22:17"}
|
||||
{"current_steps": 2075, "total_steps": 3857, "loss": 0.2831, "lr": 2.0859577561357702e-05, "epoch": 3.7658802177858437, "percentage": 53.8, "elapsed_time": "3:54:56", "remaining_time": "3:21:45"}
|
||||
{"current_steps": 2080, "total_steps": 3857, "loss": 0.2589, "lr": 2.076914297279161e-05, "epoch": 3.7749546279491835, "percentage": 53.93, "elapsed_time": "3:55:25", "remaining_time": "3:21:07"}
|
||||
{"current_steps": 2085, "total_steps": 3857, "loss": 0.2773, "lr": 2.0678692632183485e-05, "epoch": 3.784029038112523, "percentage": 54.06, "elapsed_time": "3:56:02", "remaining_time": "3:20:36"}
|
||||
{"current_steps": 2090, "total_steps": 3857, "loss": 0.2696, "lr": 2.0588228391955593e-05, "epoch": 3.793103448275862, "percentage": 54.19, "elapsed_time": "3:56:35", "remaining_time": "3:20:01"}
|
||||
{"current_steps": 2095, "total_steps": 3857, "loss": 0.2735, "lr": 2.0497752104814853e-05, "epoch": 3.8021778584392014, "percentage": 54.32, "elapsed_time": "3:57:03", "remaining_time": "3:19:23"}
|
||||
{"current_steps": 2100, "total_steps": 3857, "loss": 0.2498, "lr": 2.0407265623714924e-05, "epoch": 3.8112522686025407, "percentage": 54.45, "elapsed_time": "3:57:36", "remaining_time": "3:18:47"}
|
||||
{"current_steps": 2105, "total_steps": 3857, "loss": 0.2763, "lr": 2.031677080181823e-05, "epoch": 3.82032667876588, "percentage": 54.58, "elapsed_time": "3:58:08", "remaining_time": "3:18:12"}
|
||||
{"current_steps": 2110, "total_steps": 3857, "loss": 0.2771, "lr": 2.0226269492458e-05, "epoch": 3.8294010889292194, "percentage": 54.71, "elapsed_time": "3:58:40", "remaining_time": "3:17:36"}
|
||||
{"current_steps": 2115, "total_steps": 3857, "loss": 0.2978, "lr": 2.013576354910035e-05, "epoch": 3.838475499092559, "percentage": 54.84, "elapsed_time": "3:59:15", "remaining_time": "3:17:03"}
|
||||
{"current_steps": 2120, "total_steps": 3857, "loss": 0.2907, "lr": 2.0045254825306275e-05, "epoch": 3.8475499092558985, "percentage": 54.96, "elapsed_time": "3:59:47", "remaining_time": "3:16:28"}
|
||||
{"current_steps": 2125, "total_steps": 3857, "loss": 0.2877, "lr": 1.9954745174693736e-05, "epoch": 3.856624319419238, "percentage": 55.09, "elapsed_time": "4:00:19", "remaining_time": "3:15:52"}
|
||||
{"current_steps": 2130, "total_steps": 3857, "loss": 0.2609, "lr": 1.9864236450899664e-05, "epoch": 3.865698729582577, "percentage": 55.22, "elapsed_time": "4:00:57", "remaining_time": "3:15:22"}
|
||||
{"current_steps": 2135, "total_steps": 3857, "loss": 0.2646, "lr": 1.9773730507542005e-05, "epoch": 3.8747731397459164, "percentage": 55.35, "elapsed_time": "4:01:27", "remaining_time": "3:14:44"}
|
||||
{"current_steps": 2140, "total_steps": 3857, "loss": 0.2862, "lr": 1.9683229198181784e-05, "epoch": 3.8838475499092557, "percentage": 55.48, "elapsed_time": "4:02:07", "remaining_time": "3:14:15"}
|
||||
{"current_steps": 2145, "total_steps": 3857, "loss": 0.2491, "lr": 1.9592734376285082e-05, "epoch": 3.892921960072595, "percentage": 55.61, "elapsed_time": "4:02:44", "remaining_time": "3:13:44"}
|
||||
{"current_steps": 2150, "total_steps": 3857, "loss": 0.2538, "lr": 1.9502247895185154e-05, "epoch": 3.901996370235935, "percentage": 55.74, "elapsed_time": "4:03:16", "remaining_time": "3:13:09"}
|
||||
{"current_steps": 2155, "total_steps": 3857, "loss": 0.2656, "lr": 1.9411771608044414e-05, "epoch": 3.911070780399274, "percentage": 55.87, "elapsed_time": "4:03:56", "remaining_time": "3:12:39"}
|
||||
{"current_steps": 2160, "total_steps": 3857, "loss": 0.2517, "lr": 1.9321307367816525e-05, "epoch": 3.9201451905626135, "percentage": 56.0, "elapsed_time": "4:04:22", "remaining_time": "3:11:59"}
|
||||
{"current_steps": 2165, "total_steps": 3857, "loss": 0.2588, "lr": 1.9230857027208397e-05, "epoch": 3.9292196007259528, "percentage": 56.13, "elapsed_time": "4:04:57", "remaining_time": "3:11:26"}
|
||||
{"current_steps": 2170, "total_steps": 3857, "loss": 0.2591, "lr": 1.9140422438642308e-05, "epoch": 3.938294010889292, "percentage": 56.26, "elapsed_time": "4:05:29", "remaining_time": "3:10:50"}
|
||||
{"current_steps": 2175, "total_steps": 3857, "loss": 0.2398, "lr": 1.9050005454217907e-05, "epoch": 3.9473684210526314, "percentage": 56.39, "elapsed_time": "4:05:56", "remaining_time": "3:10:11"}
|
||||
{"current_steps": 2180, "total_steps": 3857, "loss": 0.2836, "lr": 1.895960792567434e-05, "epoch": 3.9564428312159707, "percentage": 56.52, "elapsed_time": "4:06:31", "remaining_time": "3:09:38"}
|
||||
{"current_steps": 2185, "total_steps": 3857, "loss": 0.278, "lr": 1.8869231704352264e-05, "epoch": 3.9655172413793105, "percentage": 56.65, "elapsed_time": "4:07:02", "remaining_time": "3:09:02"}
|
||||
{"current_steps": 2190, "total_steps": 3857, "loss": 0.259, "lr": 1.8778878641156e-05, "epoch": 3.97459165154265, "percentage": 56.78, "elapsed_time": "4:07:39", "remaining_time": "3:08:31"}
|
||||
{"current_steps": 2195, "total_steps": 3857, "loss": 0.2558, "lr": 1.868855058651555e-05, "epoch": 3.983666061705989, "percentage": 56.91, "elapsed_time": "4:08:09", "remaining_time": "3:07:54"}
|
||||
{"current_steps": 2200, "total_steps": 3857, "loss": 0.2776, "lr": 1.8598249390348783e-05, "epoch": 3.9927404718693285, "percentage": 57.04, "elapsed_time": "4:08:47", "remaining_time": "3:07:23"}
|
||||
{"current_steps": 2205, "total_steps": 3857, "loss": 0.2703, "lr": 1.850797690202347e-05, "epoch": 4.001814882032668, "percentage": 57.17, "elapsed_time": "4:09:19", "remaining_time": "3:06:47"}
|
||||
{"current_steps": 2210, "total_steps": 3857, "loss": 0.2302, "lr": 1.8417734970319475e-05, "epoch": 4.0108892921960075, "percentage": 57.3, "elapsed_time": "4:09:54", "remaining_time": "3:06:14"}
|
||||
{"current_steps": 2215, "total_steps": 3857, "loss": 0.2446, "lr": 1.8327525443390837e-05, "epoch": 4.019963702359346, "percentage": 57.43, "elapsed_time": "4:10:26", "remaining_time": "3:05:39"}
|
||||
{"current_steps": 2220, "total_steps": 3857, "loss": 0.2301, "lr": 1.8237350168727973e-05, "epoch": 4.029038112522686, "percentage": 57.56, "elapsed_time": "4:11:01", "remaining_time": "3:05:05"}
|
||||
{"current_steps": 2225, "total_steps": 3857, "loss": 0.232, "lr": 1.814721099311978e-05, "epoch": 4.038112522686025, "percentage": 57.69, "elapsed_time": "4:11:34", "remaining_time": "3:04:31"}
|
||||
{"current_steps": 2230, "total_steps": 3857, "loss": 0.2559, "lr": 1.805710976261588e-05, "epoch": 4.047186932849365, "percentage": 57.82, "elapsed_time": "4:12:05", "remaining_time": "3:03:55"}
|
||||
{"current_steps": 2235, "total_steps": 3857, "loss": 0.2573, "lr": 1.7967048322488757e-05, "epoch": 4.056261343012705, "percentage": 57.95, "elapsed_time": "4:12:39", "remaining_time": "3:03:21"}
|
||||
{"current_steps": 2240, "total_steps": 3857, "loss": 0.2408, "lr": 1.7877028517196012e-05, "epoch": 4.0653357531760435, "percentage": 58.08, "elapsed_time": "4:13:13", "remaining_time": "3:02:47"}
|
||||
{"current_steps": 2245, "total_steps": 3857, "loss": 0.2463, "lr": 1.7787052190342526e-05, "epoch": 4.074410163339383, "percentage": 58.21, "elapsed_time": "4:13:47", "remaining_time": "3:02:13"}
|
||||
{"current_steps": 2250, "total_steps": 3857, "loss": 0.2438, "lr": 1.7697121184642782e-05, "epoch": 4.083484573502722, "percentage": 58.34, "elapsed_time": "4:14:20", "remaining_time": "3:01:39"}
|
||||
{"current_steps": 2255, "total_steps": 3857, "loss": 0.2613, "lr": 1.760723734188306e-05, "epoch": 4.092558983666062, "percentage": 58.47, "elapsed_time": "4:14:52", "remaining_time": "3:01:04"}
|
||||
{"current_steps": 2260, "total_steps": 3857, "loss": 0.2528, "lr": 1.7517402502883767e-05, "epoch": 4.101633393829401, "percentage": 58.59, "elapsed_time": "4:15:28", "remaining_time": "3:00:31"}
|
||||
{"current_steps": 2265, "total_steps": 3857, "loss": 0.231, "lr": 1.742761850746168e-05, "epoch": 4.1107078039927405, "percentage": 58.72, "elapsed_time": "4:16:03", "remaining_time": "2:59:58"}
|
||||
{"current_steps": 2270, "total_steps": 3857, "loss": 0.2309, "lr": 1.7337887194392337e-05, "epoch": 4.11978221415608, "percentage": 58.85, "elapsed_time": "4:16:33", "remaining_time": "2:59:21"}
|
||||
{"current_steps": 2275, "total_steps": 3857, "loss": 0.2471, "lr": 1.7248210401372304e-05, "epoch": 4.128856624319419, "percentage": 58.98, "elapsed_time": "4:17:11", "remaining_time": "2:58:50"}
|
||||
{"current_steps": 2280, "total_steps": 3857, "loss": 0.2369, "lr": 1.7158589964981616e-05, "epoch": 4.137931034482759, "percentage": 59.11, "elapsed_time": "4:17:50", "remaining_time": "2:58:20"}
|
||||
{"current_steps": 2285, "total_steps": 3857, "loss": 0.2523, "lr": 1.7069027720646093e-05, "epoch": 4.147005444646098, "percentage": 59.24, "elapsed_time": "4:18:20", "remaining_time": "2:57:43"}
|
||||
{"current_steps": 2290, "total_steps": 3857, "loss": 0.2205, "lr": 1.6979525502599804e-05, "epoch": 4.1560798548094375, "percentage": 59.37, "elapsed_time": "4:18:51", "remaining_time": "2:57:07"}
|
||||
{"current_steps": 2295, "total_steps": 3857, "loss": 0.2436, "lr": 1.689008514384746e-05, "epoch": 4.165154264972776, "percentage": 59.5, "elapsed_time": "4:19:26", "remaining_time": "2:56:34"}
|
||||
{"current_steps": 2300, "total_steps": 3857, "loss": 0.2324, "lr": 1.6800708476126922e-05, "epoch": 4.174228675136116, "percentage": 59.63, "elapsed_time": "4:19:57", "remaining_time": "2:55:59"}
|
||||
{"current_steps": 2305, "total_steps": 3857, "loss": 0.2272, "lr": 1.6711397329871617e-05, "epoch": 4.183303085299456, "percentage": 59.76, "elapsed_time": "4:20:35", "remaining_time": "2:55:27"}
|
||||
{"current_steps": 2310, "total_steps": 3857, "loss": 0.2873, "lr": 1.6622153534173132e-05, "epoch": 4.192377495462795, "percentage": 59.89, "elapsed_time": "4:21:07", "remaining_time": "2:54:52"}
|
||||
{"current_steps": 2315, "total_steps": 3857, "loss": 0.2419, "lr": 1.6532978916743707e-05, "epoch": 4.201451905626135, "percentage": 60.02, "elapsed_time": "4:21:44", "remaining_time": "2:54:20"}
|
||||
{"current_steps": 2320, "total_steps": 3857, "loss": 0.2396, "lr": 1.6443875303878794e-05, "epoch": 4.2105263157894735, "percentage": 60.15, "elapsed_time": "4:22:22", "remaining_time": "2:53:49"}
|
||||
{"current_steps": 2325, "total_steps": 3857, "loss": 0.2461, "lr": 1.6354844520419697e-05, "epoch": 4.219600725952813, "percentage": 60.28, "elapsed_time": "4:22:52", "remaining_time": "2:53:12"}
|
||||
{"current_steps": 2330, "total_steps": 3857, "loss": 0.2379, "lr": 1.6265888389716146e-05, "epoch": 4.228675136116152, "percentage": 60.41, "elapsed_time": "4:23:26", "remaining_time": "2:52:38"}
|
||||
{"current_steps": 2335, "total_steps": 3857, "loss": 0.2493, "lr": 1.6177008733589004e-05, "epoch": 4.237749546279492, "percentage": 60.54, "elapsed_time": "4:24:00", "remaining_time": "2:52:05"}
|
||||
{"current_steps": 2340, "total_steps": 3857, "loss": 0.246, "lr": 1.608820737229293e-05, "epoch": 4.246823956442832, "percentage": 60.67, "elapsed_time": "4:24:32", "remaining_time": "2:51:30"}
|
||||
{"current_steps": 2345, "total_steps": 3857, "loss": 0.261, "lr": 1.5999486124479115e-05, "epoch": 4.2558983666061705, "percentage": 60.8, "elapsed_time": "4:25:05", "remaining_time": "2:50:55"}
|
||||
{"current_steps": 2350, "total_steps": 3857, "loss": 0.2404, "lr": 1.5910846807157996e-05, "epoch": 4.26497277676951, "percentage": 60.93, "elapsed_time": "4:25:31", "remaining_time": "2:50:16"}
|
||||
{"current_steps": 2355, "total_steps": 3857, "loss": 0.2603, "lr": 1.5822291235662114e-05, "epoch": 4.274047186932849, "percentage": 61.06, "elapsed_time": "4:26:08", "remaining_time": "2:49:44"}
|
||||
{"current_steps": 2360, "total_steps": 3857, "loss": 0.2277, "lr": 1.573382122360886e-05, "epoch": 4.283121597096189, "percentage": 61.19, "elapsed_time": "4:26:42", "remaining_time": "2:49:10"}
|
||||
{"current_steps": 2365, "total_steps": 3857, "loss": 0.2618, "lr": 1.5645438582863393e-05, "epoch": 4.292196007259528, "percentage": 61.32, "elapsed_time": "4:27:21", "remaining_time": "2:48:39"}
|
||||
{"current_steps": 2370, "total_steps": 3857, "loss": 0.2415, "lr": 1.5557145123501504e-05, "epoch": 4.3012704174228675, "percentage": 61.45, "elapsed_time": "4:27:55", "remaining_time": "2:48:06"}
|
||||
{"current_steps": 2375, "total_steps": 3857, "loss": 0.2336, "lr": 1.5468942653772546e-05, "epoch": 4.310344827586207, "percentage": 61.58, "elapsed_time": "4:28:26", "remaining_time": "2:47:30"}
|
||||
{"current_steps": 2380, "total_steps": 3857, "loss": 0.2395, "lr": 1.5380832980062393e-05, "epoch": 4.319419237749546, "percentage": 61.71, "elapsed_time": "4:28:59", "remaining_time": "2:46:55"}
|
||||
{"current_steps": 2385, "total_steps": 3857, "loss": 0.2351, "lr": 1.5292817906856486e-05, "epoch": 4.328493647912886, "percentage": 61.84, "elapsed_time": "4:29:28", "remaining_time": "2:46:18"}
|
||||
{"current_steps": 2390, "total_steps": 3857, "loss": 0.2401, "lr": 1.520489923670282e-05, "epoch": 4.337568058076225, "percentage": 61.97, "elapsed_time": "4:30:05", "remaining_time": "2:45:47"}
|
||||
{"current_steps": 2395, "total_steps": 3857, "loss": 0.2682, "lr": 1.511707877017508e-05, "epoch": 4.346642468239565, "percentage": 62.09, "elapsed_time": "4:30:37", "remaining_time": "2:45:12"}
|
||||
{"current_steps": 2400, "total_steps": 3857, "loss": 0.2394, "lr": 1.5029358305835714e-05, "epoch": 4.3557168784029034, "percentage": 62.22, "elapsed_time": "4:31:08", "remaining_time": "2:44:36"}
|
||||
{"current_steps": 2405, "total_steps": 3857, "loss": 0.2615, "lr": 1.494173964019916e-05, "epoch": 4.364791288566243, "percentage": 62.35, "elapsed_time": "4:31:49", "remaining_time": "2:44:06"}
|
||||
{"current_steps": 2410, "total_steps": 3857, "loss": 0.2237, "lr": 1.4854224567694981e-05, "epoch": 4.373865698729583, "percentage": 62.48, "elapsed_time": "4:32:21", "remaining_time": "2:43:31"}
|
||||
{"current_steps": 2415, "total_steps": 3857, "loss": 0.2582, "lr": 1.476681488063119e-05, "epoch": 4.382940108892922, "percentage": 62.61, "elapsed_time": "4:32:54", "remaining_time": "2:42:57"}
|
||||
{"current_steps": 2420, "total_steps": 3857, "loss": 0.2335, "lr": 1.467951236915748e-05, "epoch": 4.392014519056262, "percentage": 62.74, "elapsed_time": "4:33:27", "remaining_time": "2:42:22"}
|
||||
{"current_steps": 2425, "total_steps": 3857, "loss": 0.2327, "lr": 1.4592318821228612e-05, "epoch": 4.4010889292196005, "percentage": 62.87, "elapsed_time": "4:34:02", "remaining_time": "2:41:49"}
|
||||
{"current_steps": 2430, "total_steps": 3857, "loss": 0.2532, "lr": 1.4505236022567748e-05, "epoch": 4.41016333938294, "percentage": 63.0, "elapsed_time": "4:34:42", "remaining_time": "2:41:19"}
|
||||
{"current_steps": 2435, "total_steps": 3857, "loss": 0.2497, "lr": 1.4418265756629929e-05, "epoch": 4.419237749546279, "percentage": 63.13, "elapsed_time": "4:35:18", "remaining_time": "2:40:46"}
|
||||
{"current_steps": 2440, "total_steps": 3857, "loss": 0.2456, "lr": 1.433140980456551e-05, "epoch": 4.428312159709619, "percentage": 63.26, "elapsed_time": "4:35:53", "remaining_time": "2:40:12"}
|
||||
{"current_steps": 2445, "total_steps": 3857, "loss": 0.2277, "lr": 1.4244669945183716e-05, "epoch": 4.437386569872959, "percentage": 63.39, "elapsed_time": "4:36:18", "remaining_time": "2:39:34"}
|
||||
{"current_steps": 2450, "total_steps": 3857, "loss": 0.252, "lr": 1.415804795491617e-05, "epoch": 4.4464609800362975, "percentage": 63.52, "elapsed_time": "4:36:59", "remaining_time": "2:39:04"}
|
||||
{"current_steps": 2455, "total_steps": 3857, "loss": 0.2283, "lr": 1.407154560778057e-05, "epoch": 4.455535390199637, "percentage": 63.65, "elapsed_time": "4:37:32", "remaining_time": "2:38:29"}
|
||||
{"current_steps": 2460, "total_steps": 3857, "loss": 0.231, "lr": 1.398516467534429e-05, "epoch": 4.464609800362976, "percentage": 63.78, "elapsed_time": "4:38:06", "remaining_time": "2:37:56"}
|
||||
{"current_steps": 2465, "total_steps": 3857, "loss": 0.229, "lr": 1.3898906926688152e-05, "epoch": 4.473684210526316, "percentage": 63.91, "elapsed_time": "4:38:42", "remaining_time": "2:37:23"}
|
||||
{"current_steps": 2470, "total_steps": 3857, "loss": 0.2304, "lr": 1.381277412837017e-05, "epoch": 4.482758620689655, "percentage": 64.04, "elapsed_time": "4:39:15", "remaining_time": "2:36:48"}
|
||||
{"current_steps": 2475, "total_steps": 3857, "loss": 0.2512, "lr": 1.372676804438938e-05, "epoch": 4.491833030852995, "percentage": 64.17, "elapsed_time": "4:39:49", "remaining_time": "2:36:15"}
|
||||
{"current_steps": 2480, "total_steps": 3857, "loss": 0.2355, "lr": 1.3640890436149698e-05, "epoch": 4.500907441016334, "percentage": 64.3, "elapsed_time": "4:40:23", "remaining_time": "2:35:41"}
|
||||
{"current_steps": 2485, "total_steps": 3857, "loss": 0.2644, "lr": 1.355514306242388e-05, "epoch": 4.509981851179673, "percentage": 64.43, "elapsed_time": "4:40:56", "remaining_time": "2:35:06"}
|
||||
{"current_steps": 2490, "total_steps": 3857, "loss": 0.2518, "lr": 1.346952767931744e-05, "epoch": 4.519056261343013, "percentage": 64.56, "elapsed_time": "4:41:27", "remaining_time": "2:34:31"}
|
||||
{"current_steps": 2495, "total_steps": 3857, "loss": 0.2589, "lr": 1.3384046040232774e-05, "epoch": 4.528130671506352, "percentage": 64.69, "elapsed_time": "4:42:00", "remaining_time": "2:33:56"}
|
||||
{"current_steps": 2500, "total_steps": 3857, "loss": 0.2255, "lr": 1.3298699895833162e-05, "epoch": 4.537205081669692, "percentage": 64.82, "elapsed_time": "4:42:39", "remaining_time": "2:33:25"}
|
||||
{"current_steps": 2505, "total_steps": 3857, "loss": 0.2262, "lr": 1.321349099400699e-05, "epoch": 4.5462794918330305, "percentage": 64.95, "elapsed_time": "4:43:12", "remaining_time": "2:32:51"}
|
||||
{"current_steps": 2510, "total_steps": 3857, "loss": 0.2186, "lr": 1.3128421079831873e-05, "epoch": 4.55535390199637, "percentage": 65.08, "elapsed_time": "4:43:49", "remaining_time": "2:32:18"}
|
||||
{"current_steps": 2515, "total_steps": 3857, "loss": 0.253, "lr": 1.3043491895539004e-05, "epoch": 4.564428312159709, "percentage": 65.21, "elapsed_time": "4:44:18", "remaining_time": "2:31:42"}
|
||||
{"current_steps": 2520, "total_steps": 3857, "loss": 0.2443, "lr": 1.2958705180477394e-05, "epoch": 4.573502722323049, "percentage": 65.34, "elapsed_time": "4:44:57", "remaining_time": "2:31:11"}
|
||||
{"current_steps": 2525, "total_steps": 3857, "loss": 0.2528, "lr": 1.2874062671078317e-05, "epoch": 4.582577132486389, "percentage": 65.47, "elapsed_time": "4:45:25", "remaining_time": "2:30:34"}
|
||||
{"current_steps": 2530, "total_steps": 3857, "loss": 0.2295, "lr": 1.2789566100819686e-05, "epoch": 4.5916515426497275, "percentage": 65.6, "elapsed_time": "4:45:56", "remaining_time": "2:29:58"}
|
||||
{"current_steps": 2535, "total_steps": 3857, "loss": 0.2398, "lr": 1.2705217200190604e-05, "epoch": 4.600725952813067, "percentage": 65.72, "elapsed_time": "4:46:23", "remaining_time": "2:29:21"}
|
||||
{"current_steps": 2540, "total_steps": 3857, "loss": 0.2389, "lr": 1.2621017696655881e-05, "epoch": 4.609800362976406, "percentage": 65.85, "elapsed_time": "4:46:57", "remaining_time": "2:28:47"}
|
||||
{"current_steps": 2545, "total_steps": 3857, "loss": 0.2449, "lr": 1.2536969314620702e-05, "epoch": 4.618874773139746, "percentage": 65.98, "elapsed_time": "4:47:21", "remaining_time": "2:28:08"}
|
||||
{"current_steps": 2550, "total_steps": 3857, "loss": 0.2473, "lr": 1.2453073775395257e-05, "epoch": 4.627949183303086, "percentage": 66.11, "elapsed_time": "4:47:50", "remaining_time": "2:27:32"}
|
||||
{"current_steps": 2555, "total_steps": 3857, "loss": 0.2389, "lr": 1.2369332797159543e-05, "epoch": 4.637023593466425, "percentage": 66.24, "elapsed_time": "4:48:21", "remaining_time": "2:26:56"}
|
||||
{"current_steps": 2560, "total_steps": 3857, "loss": 0.2409, "lr": 1.228574809492812e-05, "epoch": 4.646098003629764, "percentage": 66.37, "elapsed_time": "4:48:56", "remaining_time": "2:26:23"}
|
||||
{"current_steps": 2565, "total_steps": 3857, "loss": 0.2431, "lr": 1.2202321380515046e-05, "epoch": 4.655172413793103, "percentage": 66.5, "elapsed_time": "4:49:27", "remaining_time": "2:25:48"}
|
||||
{"current_steps": 2570, "total_steps": 3857, "loss": 0.2344, "lr": 1.2119054362498766e-05, "epoch": 4.664246823956443, "percentage": 66.63, "elapsed_time": "4:50:01", "remaining_time": "2:25:14"}
|
||||
{"current_steps": 2575, "total_steps": 3857, "loss": 0.225, "lr": 1.2035948746187165e-05, "epoch": 4.673321234119782, "percentage": 66.76, "elapsed_time": "4:50:31", "remaining_time": "2:24:38"}
|
||||
{"current_steps": 2580, "total_steps": 3857, "loss": 0.2417, "lr": 1.1953006233582598e-05, "epoch": 4.682395644283122, "percentage": 66.89, "elapsed_time": "4:51:04", "remaining_time": "2:24:04"}
|
||||
{"current_steps": 2585, "total_steps": 3857, "loss": 0.2439, "lr": 1.1870228523347084e-05, "epoch": 4.691470054446461, "percentage": 67.02, "elapsed_time": "4:51:38", "remaining_time": "2:23:30"}
|
||||
{"current_steps": 2590, "total_steps": 3857, "loss": 0.2752, "lr": 1.1787617310767466e-05, "epoch": 4.7005444646098, "percentage": 67.15, "elapsed_time": "4:52:09", "remaining_time": "2:22:55"}
|
||||
{"current_steps": 2595, "total_steps": 3857, "loss": 0.2467, "lr": 1.1705174287720736e-05, "epoch": 4.70961887477314, "percentage": 67.28, "elapsed_time": "4:52:39", "remaining_time": "2:22:19"}
|
||||
{"current_steps": 2600, "total_steps": 3857, "loss": 0.2355, "lr": 1.1622901142639353e-05, "epoch": 4.718693284936479, "percentage": 67.41, "elapsed_time": "4:53:15", "remaining_time": "2:21:46"}
|
||||
{"current_steps": 2605, "total_steps": 3857, "loss": 0.2217, "lr": 1.1540799560476685e-05, "epoch": 4.727767695099819, "percentage": 67.54, "elapsed_time": "4:53:46", "remaining_time": "2:21:11"}
|
||||
{"current_steps": 2610, "total_steps": 3857, "loss": 0.2352, "lr": 1.1458871222672479e-05, "epoch": 4.7368421052631575, "percentage": 67.67, "elapsed_time": "4:54:23", "remaining_time": "2:20:39"}
|
||||
{"current_steps": 2615, "total_steps": 3857, "loss": 0.2501, "lr": 1.1377117807118473e-05, "epoch": 4.745916515426497, "percentage": 67.8, "elapsed_time": "4:55:01", "remaining_time": "2:20:07"}
|
||||
{"current_steps": 2620, "total_steps": 3857, "loss": 0.2396, "lr": 1.129554098812394e-05, "epoch": 4.754990925589837, "percentage": 67.93, "elapsed_time": "4:55:29", "remaining_time": "2:19:30"}
|
||||
{"current_steps": 2625, "total_steps": 3857, "loss": 0.2421, "lr": 1.1214142436381528e-05, "epoch": 4.764065335753176, "percentage": 68.06, "elapsed_time": "4:55:58", "remaining_time": "2:18:54"}
|
||||
{"current_steps": 2630, "total_steps": 3857, "loss": 0.2802, "lr": 1.1132923818932901e-05, "epoch": 4.773139745916516, "percentage": 68.19, "elapsed_time": "4:56:30", "remaining_time": "2:18:19"}
|
||||
{"current_steps": 2635, "total_steps": 3857, "loss": 0.2398, "lr": 1.1051886799134732e-05, "epoch": 4.782214156079855, "percentage": 68.32, "elapsed_time": "4:57:01", "remaining_time": "2:17:44"}
|
||||
{"current_steps": 2640, "total_steps": 3857, "loss": 0.2527, "lr": 1.0971033036624538e-05, "epoch": 4.791288566243194, "percentage": 68.45, "elapsed_time": "4:57:39", "remaining_time": "2:17:12"}
|
||||
{"current_steps": 2645, "total_steps": 3857, "loss": 0.2463, "lr": 1.0890364187286742e-05, "epoch": 4.800362976406533, "percentage": 68.58, "elapsed_time": "4:58:11", "remaining_time": "2:16:38"}
|
||||
{"current_steps": 2650, "total_steps": 3857, "loss": 0.216, "lr": 1.0809881903218744e-05, "epoch": 4.809437386569873, "percentage": 68.71, "elapsed_time": "4:58:42", "remaining_time": "2:16:03"}
|
||||
{"current_steps": 2655, "total_steps": 3857, "loss": 0.2438, "lr": 1.0729587832697088e-05, "epoch": 4.818511796733213, "percentage": 68.84, "elapsed_time": "4:59:16", "remaining_time": "2:15:29"}
|
||||
{"current_steps": 2660, "total_steps": 3857, "loss": 0.241, "lr": 1.0649483620143699e-05, "epoch": 4.827586206896552, "percentage": 68.97, "elapsed_time": "4:59:53", "remaining_time": "2:14:56"}
|
||||
{"current_steps": 2665, "total_steps": 3857, "loss": 0.2518, "lr": 1.056957090609224e-05, "epoch": 4.836660617059891, "percentage": 69.1, "elapsed_time": "5:00:26", "remaining_time": "2:14:22"}
|
||||
{"current_steps": 2670, "total_steps": 3857, "loss": 0.2376, "lr": 1.0489851327154442e-05, "epoch": 4.84573502722323, "percentage": 69.22, "elapsed_time": "5:00:54", "remaining_time": "2:13:46"}
|
||||
{"current_steps": 2675, "total_steps": 3857, "loss": 0.2434, "lr": 1.0410326515986681e-05, "epoch": 4.85480943738657, "percentage": 69.35, "elapsed_time": "5:01:23", "remaining_time": "2:13:10"}
|
||||
{"current_steps": 2680, "total_steps": 3857, "loss": 0.2449, "lr": 1.0330998101256435e-05, "epoch": 4.863883847549909, "percentage": 69.48, "elapsed_time": "5:02:00", "remaining_time": "2:12:38"}
|
||||
{"current_steps": 2685, "total_steps": 3857, "loss": 0.2205, "lr": 1.0251867707609038e-05, "epoch": 4.872958257713249, "percentage": 69.61, "elapsed_time": "5:02:34", "remaining_time": "2:12:04"}
|
||||
{"current_steps": 2690, "total_steps": 3857, "loss": 0.2224, "lr": 1.0172936955634317e-05, "epoch": 4.882032667876588, "percentage": 69.74, "elapsed_time": "5:03:17", "remaining_time": "2:11:34"}
|
||||
{"current_steps": 2695, "total_steps": 3857, "loss": 0.2579, "lr": 1.0094207461833447e-05, "epoch": 4.891107078039927, "percentage": 69.87, "elapsed_time": "5:03:53", "remaining_time": "2:11:01"}
|
||||
{"current_steps": 2700, "total_steps": 3857, "loss": 0.2381, "lr": 1.0015680838585865e-05, "epoch": 4.900181488203267, "percentage": 70.0, "elapsed_time": "5:04:25", "remaining_time": "2:10:27"}
|
||||
{"current_steps": 2705, "total_steps": 3857, "loss": 0.2234, "lr": 9.93735869411617e-06, "epoch": 4.909255898366606, "percentage": 70.13, "elapsed_time": "5:05:03", "remaining_time": "2:09:55"}
|
||||
{"current_steps": 2710, "total_steps": 3857, "loss": 0.2416, "lr": 9.85924263246129e-06, "epoch": 4.918330308529946, "percentage": 70.26, "elapsed_time": "5:05:43", "remaining_time": "2:09:23"}
|
||||
{"current_steps": 2715, "total_steps": 3857, "loss": 0.2387, "lr": 9.781334253437527e-06, "epoch": 4.927404718693285, "percentage": 70.39, "elapsed_time": "5:06:14", "remaining_time": "2:08:48"}
|
||||
{"current_steps": 2720, "total_steps": 3857, "loss": 0.2319, "lr": 9.703635152607889e-06, "epoch": 4.936479128856624, "percentage": 70.52, "elapsed_time": "5:06:42", "remaining_time": "2:08:12"}
|
||||
{"current_steps": 2725, "total_steps": 3857, "loss": 0.2377, "lr": 9.626146921249346e-06, "epoch": 4.945553539019964, "percentage": 70.65, "elapsed_time": "5:07:14", "remaining_time": "2:07:37"}
|
||||
{"current_steps": 2730, "total_steps": 3857, "loss": 0.2283, "lr": 9.548871146320264e-06, "epoch": 4.954627949183303, "percentage": 70.78, "elapsed_time": "5:07:42", "remaining_time": "2:07:01"}
|
||||
{"current_steps": 2735, "total_steps": 3857, "loss": 0.2337, "lr": 9.47180941042791e-06, "epoch": 4.963702359346643, "percentage": 70.91, "elapsed_time": "5:08:20", "remaining_time": "2:06:29"}
|
||||
{"current_steps": 2740, "total_steps": 3857, "loss": 0.2292, "lr": 9.394963291796024e-06, "epoch": 4.972776769509982, "percentage": 71.04, "elapsed_time": "5:08:51", "remaining_time": "2:05:54"}
|
||||
{"current_steps": 2745, "total_steps": 3857, "loss": 0.2369, "lr": 9.318334364232507e-06, "epoch": 4.981851179673321, "percentage": 71.17, "elapsed_time": "5:09:22", "remaining_time": "2:05:19"}
|
||||
{"current_steps": 2750, "total_steps": 3857, "loss": 0.2351, "lr": 9.241924197097214e-06, "epoch": 4.99092558983666, "percentage": 71.3, "elapsed_time": "5:09:56", "remaining_time": "2:04:46"}
|
||||
{"current_steps": 2755, "total_steps": 3857, "loss": 0.2466, "lr": 9.165734355269737e-06, "epoch": 5.0, "percentage": 71.43, "elapsed_time": "5:10:34", "remaining_time": "2:04:13"}
|
||||
{"current_steps": 2760, "total_steps": 3857, "loss": 0.2249, "lr": 9.08976639911747e-06, "epoch": 5.00907441016334, "percentage": 71.56, "elapsed_time": "5:11:11", "remaining_time": "2:03:41"}
|
||||
{"current_steps": 2765, "total_steps": 3857, "loss": 0.2351, "lr": 9.014021884463522e-06, "epoch": 5.018148820326679, "percentage": 71.69, "elapsed_time": "5:11:42", "remaining_time": "2:03:06"}
|
||||
{"current_steps": 2770, "total_steps": 3857, "loss": 0.2082, "lr": 8.938502362554984e-06, "epoch": 5.027223230490018, "percentage": 71.82, "elapsed_time": "5:12:16", "remaining_time": "2:02:32"}
|
||||
{"current_steps": 2775, "total_steps": 3857, "loss": 0.248, "lr": 8.863209380031064e-06, "epoch": 5.036297640653357, "percentage": 71.95, "elapsed_time": "5:12:47", "remaining_time": "2:01:57"}
|
||||
{"current_steps": 2780, "total_steps": 3857, "loss": 0.2243, "lr": 8.78814447889145e-06, "epoch": 5.045372050816697, "percentage": 72.08, "elapsed_time": "5:13:18", "remaining_time": "2:01:22"}
|
||||
{"current_steps": 2785, "total_steps": 3857, "loss": 0.2218, "lr": 8.713309196464725e-06, "epoch": 5.054446460980036, "percentage": 72.21, "elapsed_time": "5:13:51", "remaining_time": "2:00:48"}
|
||||
{"current_steps": 2790, "total_steps": 3857, "loss": 0.2209, "lr": 8.638705065376887e-06, "epoch": 5.063520871143376, "percentage": 72.34, "elapsed_time": "5:14:24", "remaining_time": "2:00:14"}
|
||||
{"current_steps": 2795, "total_steps": 3857, "loss": 0.2113, "lr": 8.564333613519945e-06, "epoch": 5.072595281306715, "percentage": 72.47, "elapsed_time": "5:14:59", "remaining_time": "1:59:40"}
|
||||
{"current_steps": 2800, "total_steps": 3857, "loss": 0.229, "lr": 8.490196364020676e-06, "epoch": 5.081669691470054, "percentage": 72.6, "elapsed_time": "5:15:31", "remaining_time": "1:59:06"}
|
||||
{"current_steps": 2805, "total_steps": 3857, "loss": 0.2286, "lr": 8.41629483520933e-06, "epoch": 5.090744101633394, "percentage": 72.72, "elapsed_time": "5:16:00", "remaining_time": "1:58:31"}
|
||||
{"current_steps": 2810, "total_steps": 3857, "loss": 0.2135, "lr": 8.342630540588667e-06, "epoch": 5.099818511796733, "percentage": 72.85, "elapsed_time": "5:16:32", "remaining_time": "1:57:56"}
|
||||
{"current_steps": 2815, "total_steps": 3857, "loss": 0.2341, "lr": 8.269204988802835e-06, "epoch": 5.108892921960073, "percentage": 72.98, "elapsed_time": "5:17:11", "remaining_time": "1:57:24"}
|
||||
{"current_steps": 2820, "total_steps": 3857, "loss": 0.2381, "lr": 8.196019683606576e-06, "epoch": 5.117967332123412, "percentage": 73.11, "elapsed_time": "5:17:46", "remaining_time": "1:56:51"}
|
||||
{"current_steps": 2825, "total_steps": 3857, "loss": 0.2205, "lr": 8.123076123834356e-06, "epoch": 5.127041742286751, "percentage": 73.24, "elapsed_time": "5:18:26", "remaining_time": "1:56:19"}
|
||||
{"current_steps": 2830, "total_steps": 3857, "loss": 0.239, "lr": 8.050375803369712e-06, "epoch": 5.136116152450091, "percentage": 73.37, "elapsed_time": "5:19:01", "remaining_time": "1:55:46"}
|
||||
{"current_steps": 2835, "total_steps": 3857, "loss": 0.2201, "lr": 7.977920211114629e-06, "epoch": 5.14519056261343, "percentage": 73.5, "elapsed_time": "5:19:29", "remaining_time": "1:55:10"}
|
||||
{"current_steps": 2840, "total_steps": 3857, "loss": 0.2316, "lr": 7.905710830959071e-06, "epoch": 5.15426497277677, "percentage": 73.63, "elapsed_time": "5:19:57", "remaining_time": "1:54:34"}
|
||||
{"current_steps": 2845, "total_steps": 3857, "loss": 0.2311, "lr": 7.833749141750572e-06, "epoch": 5.163339382940109, "percentage": 73.76, "elapsed_time": "5:20:34", "remaining_time": "1:54:01"}
|
||||
{"current_steps": 2850, "total_steps": 3857, "loss": 0.2474, "lr": 7.762036617263962e-06, "epoch": 5.172413793103448, "percentage": 73.89, "elapsed_time": "5:21:04", "remaining_time": "1:53:26"}
|
||||
{"current_steps": 2855, "total_steps": 3857, "loss": 0.2304, "lr": 7.690574726171174e-06, "epoch": 5.181488203266787, "percentage": 74.02, "elapsed_time": "5:21:38", "remaining_time": "1:52:53"}
|
||||
{"current_steps": 2860, "total_steps": 3857, "loss": 0.2377, "lr": 7.619364932011193e-06, "epoch": 5.190562613430127, "percentage": 74.15, "elapsed_time": "5:22:07", "remaining_time": "1:52:17"}
|
||||
{"current_steps": 2865, "total_steps": 3857, "loss": 0.2247, "lr": 7.548408693160016e-06, "epoch": 5.199637023593467, "percentage": 74.28, "elapsed_time": "5:22:40", "remaining_time": "1:51:43"}
|
||||
{"current_steps": 2870, "total_steps": 3857, "loss": 0.2364, "lr": 7.477707462800879e-06, "epoch": 5.208711433756806, "percentage": 74.41, "elapsed_time": "5:23:16", "remaining_time": "1:51:10"}
|
||||
{"current_steps": 2875, "total_steps": 3857, "loss": 0.216, "lr": 7.407262688894416e-06, "epoch": 5.217785843920145, "percentage": 74.54, "elapsed_time": "5:23:49", "remaining_time": "1:50:36"}
|
||||
{"current_steps": 2880, "total_steps": 3857, "loss": 0.2318, "lr": 7.337075814149044e-06, "epoch": 5.226860254083484, "percentage": 74.67, "elapsed_time": "5:24:13", "remaining_time": "1:49:59"}
|
||||
{"current_steps": 2885, "total_steps": 3857, "loss": 0.1996, "lr": 7.267148275991402e-06, "epoch": 5.235934664246824, "percentage": 74.8, "elapsed_time": "5:24:42", "remaining_time": "1:49:24"}
|
||||
{"current_steps": 2890, "total_steps": 3857, "loss": 0.2146, "lr": 7.197481506536927e-06, "epoch": 5.245009074410163, "percentage": 74.93, "elapsed_time": "5:25:19", "remaining_time": "1:48:51"}
|
||||
{"current_steps": 2895, "total_steps": 3857, "loss": 0.2095, "lr": 7.128076932560504e-06, "epoch": 5.254083484573503, "percentage": 75.06, "elapsed_time": "5:25:49", "remaining_time": "1:48:16"}
|
||||
{"current_steps": 2900, "total_steps": 3857, "loss": 0.2315, "lr": 7.0589359754672675e-06, "epoch": 5.2631578947368425, "percentage": 75.19, "elapsed_time": "5:26:25", "remaining_time": "1:47:43"}
|
||||
{"current_steps": 2905, "total_steps": 3857, "loss": 0.2303, "lr": 6.9900600512634605e-06, "epoch": 5.272232304900181, "percentage": 75.32, "elapsed_time": "5:27:00", "remaining_time": "1:47:09"}
|
||||
{"current_steps": 2910, "total_steps": 3857, "loss": 0.236, "lr": 6.921450570527499e-06, "epoch": 5.281306715063521, "percentage": 75.45, "elapsed_time": "5:27:38", "remaining_time": "1:46:37"}
|
||||
{"current_steps": 2915, "total_steps": 3857, "loss": 0.206, "lr": 6.853108938380975e-06, "epoch": 5.29038112522686, "percentage": 75.58, "elapsed_time": "5:28:06", "remaining_time": "1:46:01"}
|
||||
{"current_steps": 2920, "total_steps": 3857, "loss": 0.2253, "lr": 6.785036554459999e-06, "epoch": 5.2994555353902, "percentage": 75.71, "elapsed_time": "5:28:39", "remaining_time": "1:45:27"}
|
||||
{"current_steps": 2925, "total_steps": 3857, "loss": 0.2284, "lr": 6.7172348128864525e-06, "epoch": 5.308529945553539, "percentage": 75.84, "elapsed_time": "5:29:13", "remaining_time": "1:44:54"}
|
||||
{"current_steps": 2930, "total_steps": 3857, "loss": 0.2273, "lr": 6.64970510223947e-06, "epoch": 5.317604355716878, "percentage": 75.97, "elapsed_time": "5:29:46", "remaining_time": "1:44:20"}
|
||||
{"current_steps": 2935, "total_steps": 3857, "loss": 0.2181, "lr": 6.582448805526991e-06, "epoch": 5.326678765880218, "percentage": 76.1, "elapsed_time": "5:30:19", "remaining_time": "1:43:45"}
|
||||
{"current_steps": 2940, "total_steps": 3857, "loss": 0.2207, "lr": 6.5154673001574474e-06, "epoch": 5.335753176043557, "percentage": 76.23, "elapsed_time": "5:30:55", "remaining_time": "1:43:12"}
|
||||
{"current_steps": 2945, "total_steps": 3857, "loss": 0.2191, "lr": 6.448761957911542e-06, "epoch": 5.344827586206897, "percentage": 76.35, "elapsed_time": "5:31:27", "remaining_time": "1:42:38"}
|
||||
{"current_steps": 2950, "total_steps": 3857, "loss": 0.2234, "lr": 6.382334144914157e-06, "epoch": 5.353901996370236, "percentage": 76.48, "elapsed_time": "5:32:00", "remaining_time": "1:42:04"}
|
||||
{"current_steps": 2955, "total_steps": 3857, "loss": 0.2223, "lr": 6.316185221606377e-06, "epoch": 5.362976406533575, "percentage": 76.61, "elapsed_time": "5:32:30", "remaining_time": "1:41:29"}
|
||||
{"current_steps": 2960, "total_steps": 3857, "loss": 0.2227, "lr": 6.250316542717645e-06, "epoch": 5.372050816696914, "percentage": 76.74, "elapsed_time": "5:33:03", "remaining_time": "1:40:55"}
|
||||
{"current_steps": 2965, "total_steps": 3857, "loss": 0.2542, "lr": 6.184729457237965e-06, "epoch": 5.381125226860254, "percentage": 76.87, "elapsed_time": "5:33:43", "remaining_time": "1:40:24"}
|
||||
{"current_steps": 2970, "total_steps": 3857, "loss": 0.2233, "lr": 6.119425308390352e-06, "epoch": 5.390199637023594, "percentage": 77.0, "elapsed_time": "5:34:13", "remaining_time": "1:39:49"}
|
||||
{"current_steps": 2975, "total_steps": 3857, "loss": 0.2202, "lr": 6.054405433603252e-06, "epoch": 5.399274047186933, "percentage": 77.13, "elapsed_time": "5:34:46", "remaining_time": "1:39:14"}
|
||||
{"current_steps": 2980, "total_steps": 3857, "loss": 0.2286, "lr": 5.989671164483189e-06, "epoch": 5.4083484573502725, "percentage": 77.26, "elapsed_time": "5:35:22", "remaining_time": "1:38:41"}
|
||||
{"current_steps": 2985, "total_steps": 3857, "loss": 0.2133, "lr": 5.925223826787488e-06, "epoch": 5.417422867513611, "percentage": 77.39, "elapsed_time": "5:35:53", "remaining_time": "1:38:07"}
|
||||
{"current_steps": 2990, "total_steps": 3857, "loss": 0.254, "lr": 5.861064740397113e-06, "epoch": 5.426497277676951, "percentage": 77.52, "elapsed_time": "5:36:28", "remaining_time": "1:37:34"}
|
||||
{"current_steps": 2995, "total_steps": 3857, "loss": 0.2282, "lr": 5.7971952192896555e-06, "epoch": 5.43557168784029, "percentage": 77.65, "elapsed_time": "5:37:02", "remaining_time": "1:37:00"}
|
||||
{"current_steps": 3000, "total_steps": 3857, "loss": 0.2175, "lr": 5.733616571512401e-06, "epoch": 5.44464609800363, "percentage": 77.78, "elapsed_time": "5:37:29", "remaining_time": "1:36:24"}
|
||||
{"current_steps": 3005, "total_steps": 3857, "loss": 0.2246, "lr": 5.67033009915555e-06, "epoch": 5.4537205081669695, "percentage": 77.91, "elapsed_time": "5:38:08", "remaining_time": "1:35:52"}
|
||||
{"current_steps": 3010, "total_steps": 3857, "loss": 0.239, "lr": 5.6073370983255785e-06, "epoch": 5.462794918330308, "percentage": 78.04, "elapsed_time": "5:38:53", "remaining_time": "1:35:21"}
|
||||
{"current_steps": 3015, "total_steps": 3857, "loss": 0.2269, "lr": 5.544638859118623e-06, "epoch": 5.471869328493648, "percentage": 78.17, "elapsed_time": "5:39:30", "remaining_time": "1:34:48"}
|
||||
{"current_steps": 3020, "total_steps": 3857, "loss": 0.2183, "lr": 5.482236665594154e-06, "epoch": 5.480943738656987, "percentage": 78.3, "elapsed_time": "5:40:03", "remaining_time": "1:34:14"}
|
||||
{"current_steps": 3025, "total_steps": 3857, "loss": 0.2422, "lr": 5.4201317957485775e-06, "epoch": 5.490018148820327, "percentage": 78.43, "elapsed_time": "5:40:30", "remaining_time": "1:33:39"}
|
||||
{"current_steps": 3030, "total_steps": 3857, "loss": 0.2372, "lr": 5.35832552148916e-06, "epoch": 5.499092558983666, "percentage": 78.56, "elapsed_time": "5:41:04", "remaining_time": "1:33:05"}
|
||||
{"current_steps": 3035, "total_steps": 3857, "loss": 0.2243, "lr": 5.296819108607907e-06, "epoch": 5.508166969147005, "percentage": 78.69, "elapsed_time": "5:41:38", "remaining_time": "1:32:31"}
|
||||
{"current_steps": 3040, "total_steps": 3857, "loss": 0.2348, "lr": 5.235613816755669e-06, "epoch": 5.517241379310345, "percentage": 78.82, "elapsed_time": "5:42:10", "remaining_time": "1:31:57"}
|
||||
{"current_steps": 3045, "total_steps": 3857, "loss": 0.1907, "lr": 5.174710899416342e-06, "epoch": 5.526315789473684, "percentage": 78.95, "elapsed_time": "5:42:44", "remaining_time": "1:31:23"}
|
||||
{"current_steps": 3050, "total_steps": 3857, "loss": 0.2228, "lr": 5.114111603881189e-06, "epoch": 5.535390199637024, "percentage": 79.08, "elapsed_time": "5:43:22", "remaining_time": "1:30:51"}
|
||||
{"current_steps": 3055, "total_steps": 3857, "loss": 0.2297, "lr": 5.053817171223316e-06, "epoch": 5.544464609800363, "percentage": 79.21, "elapsed_time": "5:43:55", "remaining_time": "1:30:17"}
|
||||
{"current_steps": 3060, "total_steps": 3857, "loss": 0.2349, "lr": 4.993828836272226e-06, "epoch": 5.5535390199637025, "percentage": 79.34, "elapsed_time": "5:44:28", "remaining_time": "1:29:43"}
|
||||
{"current_steps": 3065, "total_steps": 3857, "loss": 0.2449, "lr": 4.934147827588545e-06, "epoch": 5.562613430127042, "percentage": 79.47, "elapsed_time": "5:44:59", "remaining_time": "1:29:08"}
|
||||
{"current_steps": 3070, "total_steps": 3857, "loss": 0.2125, "lr": 4.874775367438862e-06, "epoch": 5.571687840290381, "percentage": 79.6, "elapsed_time": "5:45:40", "remaining_time": "1:28:36"}
|
||||
{"current_steps": 3075, "total_steps": 3857, "loss": 0.236, "lr": 4.8157126717706935e-06, "epoch": 5.580762250453721, "percentage": 79.73, "elapsed_time": "5:46:16", "remaining_time": "1:28:03"}
|
||||
{"current_steps": 3080, "total_steps": 3857, "loss": 0.2276, "lr": 4.7569609501875834e-06, "epoch": 5.58983666061706, "percentage": 79.85, "elapsed_time": "5:46:51", "remaining_time": "1:27:30"}
|
||||
{"current_steps": 3085, "total_steps": 3857, "loss": 0.2126, "lr": 4.698521405924328e-06, "epoch": 5.5989110707803995, "percentage": 79.98, "elapsed_time": "5:47:22", "remaining_time": "1:26:55"}
|
||||
{"current_steps": 3090, "total_steps": 3857, "loss": 0.2503, "lr": 4.640395235822326e-06, "epoch": 5.607985480943738, "percentage": 80.11, "elapsed_time": "5:48:01", "remaining_time": "1:26:23"}
|
||||
{"current_steps": 3095, "total_steps": 3857, "loss": 0.2444, "lr": 4.582583630305104e-06, "epoch": 5.617059891107078, "percentage": 80.24, "elapsed_time": "5:48:36", "remaining_time": "1:25:49"}
|
||||
{"current_steps": 3100, "total_steps": 3857, "loss": 0.259, "lr": 4.525087773353865e-06, "epoch": 5.626134301270417, "percentage": 80.37, "elapsed_time": "5:49:10", "remaining_time": "1:25:15"}
|
||||
{"current_steps": 3105, "total_steps": 3857, "loss": 0.2218, "lr": 4.467908842483322e-06, "epoch": 5.635208711433757, "percentage": 80.5, "elapsed_time": "5:49:44", "remaining_time": "1:24:42"}
|
||||
{"current_steps": 3110, "total_steps": 3857, "loss": 0.2221, "lr": 4.4110480087175244e-06, "epoch": 5.6442831215970966, "percentage": 80.63, "elapsed_time": "5:50:16", "remaining_time": "1:24:07"}
|
||||
{"current_steps": 3115, "total_steps": 3857, "loss": 0.2045, "lr": 4.3545064365659015e-06, "epoch": 5.653357531760435, "percentage": 80.76, "elapsed_time": "5:50:56", "remaining_time": "1:23:35"}
|
||||
{"current_steps": 3120, "total_steps": 3857, "loss": 0.2258, "lr": 4.298285283999406e-06, "epoch": 5.662431941923775, "percentage": 80.89, "elapsed_time": "5:51:35", "remaining_time": "1:23:03"}
|
||||
{"current_steps": 3125, "total_steps": 3857, "loss": 0.2244, "lr": 4.2423857024268015e-06, "epoch": 5.671506352087114, "percentage": 81.02, "elapsed_time": "5:52:07", "remaining_time": "1:22:28"}
|
||||
{"current_steps": 3130, "total_steps": 3857, "loss": 0.2431, "lr": 4.186808836671077e-06, "epoch": 5.680580762250454, "percentage": 81.15, "elapsed_time": "5:52:43", "remaining_time": "1:21:55"}
|
||||
{"current_steps": 3135, "total_steps": 3857, "loss": 0.2171, "lr": 4.131555824946007e-06, "epoch": 5.689655172413794, "percentage": 81.28, "elapsed_time": "5:53:18", "remaining_time": "1:21:22"}
|
||||
{"current_steps": 3140, "total_steps": 3857, "loss": 0.2186, "lr": 4.076627798832833e-06, "epoch": 5.6987295825771325, "percentage": 81.41, "elapsed_time": "5:53:54", "remaining_time": "1:20:48"}
|
||||
{"current_steps": 3145, "total_steps": 3857, "loss": 0.2119, "lr": 4.02202588325711e-06, "epoch": 5.707803992740472, "percentage": 81.54, "elapsed_time": "5:54:25", "remaining_time": "1:20:14"}
|
||||
{"current_steps": 3150, "total_steps": 3857, "loss": 0.227, "lr": 3.9677511964656216e-06, "epoch": 5.716878402903811, "percentage": 81.67, "elapsed_time": "5:54:54", "remaining_time": "1:19:39"}
|
||||
{"current_steps": 3155, "total_steps": 3857, "loss": 0.2188, "lr": 3.913804850003542e-06, "epoch": 5.725952813067151, "percentage": 81.8, "elapsed_time": "5:55:27", "remaining_time": "1:19:05"}
|
||||
{"current_steps": 3160, "total_steps": 3857, "loss": 0.2169, "lr": 3.860187948691616e-06, "epoch": 5.73502722323049, "percentage": 81.93, "elapsed_time": "5:55:58", "remaining_time": "1:18:31"}
|
||||
{"current_steps": 3165, "total_steps": 3857, "loss": 0.2451, "lr": 3.806901590603562e-06, "epoch": 5.7441016333938295, "percentage": 82.06, "elapsed_time": "5:56:27", "remaining_time": "1:17:56"}
|
||||
{"current_steps": 3170, "total_steps": 3857, "loss": 0.2151, "lr": 3.753946867043572e-06, "epoch": 5.753176043557168, "percentage": 82.19, "elapsed_time": "5:57:04", "remaining_time": "1:17:23"}
|
||||
{"current_steps": 3175, "total_steps": 3857, "loss": 0.2088, "lr": 3.701324862523972e-06, "epoch": 5.762250453720508, "percentage": 82.32, "elapsed_time": "5:57:36", "remaining_time": "1:16:48"}
|
||||
{"current_steps": 3180, "total_steps": 3857, "loss": 0.22, "lr": 3.649036654742997e-06, "epoch": 5.771324863883848, "percentage": 82.45, "elapsed_time": "5:58:05", "remaining_time": "1:16:14"}
|
||||
{"current_steps": 3185, "total_steps": 3857, "loss": 0.2304, "lr": 3.597083314562735e-06, "epoch": 5.780399274047187, "percentage": 82.58, "elapsed_time": "5:58:44", "remaining_time": "1:15:41"}
|
||||
{"current_steps": 3190, "total_steps": 3857, "loss": 0.2196, "lr": 3.5454659059871755e-06, "epoch": 5.7894736842105265, "percentage": 82.71, "elapsed_time": "5:59:22", "remaining_time": "1:15:08"}
|
||||
{"current_steps": 3195, "total_steps": 3857, "loss": 0.2192, "lr": 3.4941854861404554e-06, "epoch": 5.798548094373865, "percentage": 82.84, "elapsed_time": "5:59:50", "remaining_time": "1:14:33"}
|
||||
{"current_steps": 3200, "total_steps": 3857, "loss": 0.2273, "lr": 3.4432431052451533e-06, "epoch": 5.807622504537205, "percentage": 82.97, "elapsed_time": "6:00:20", "remaining_time": "1:13:58"}
|
||||
{"current_steps": 3205, "total_steps": 3857, "loss": 0.2211, "lr": 3.392639806600846e-06, "epoch": 5.816696914700545, "percentage": 83.1, "elapsed_time": "6:00:55", "remaining_time": "1:13:25"}
|
||||
{"current_steps": 3210, "total_steps": 3857, "loss": 0.2244, "lr": 3.342376626562671e-06, "epoch": 5.825771324863884, "percentage": 83.23, "elapsed_time": "6:01:28", "remaining_time": "1:12:51"}
|
||||
{"current_steps": 3215, "total_steps": 3857, "loss": 0.2368, "lr": 3.2924545945201804e-06, "epoch": 5.834845735027224, "percentage": 83.35, "elapsed_time": "6:02:07", "remaining_time": "1:12:18"}
|
||||
{"current_steps": 3220, "total_steps": 3857, "loss": 0.2062, "lr": 3.2428747328761867e-06, "epoch": 5.8439201451905625, "percentage": 83.48, "elapsed_time": "6:02:40", "remaining_time": "1:11:44"}
|
||||
{"current_steps": 3225, "total_steps": 3857, "loss": 0.2286, "lr": 3.1936380570258698e-06, "epoch": 5.852994555353902, "percentage": 83.61, "elapsed_time": "6:03:10", "remaining_time": "1:11:10"}
|
||||
{"current_steps": 3230, "total_steps": 3857, "loss": 0.2146, "lr": 3.1447455753359633e-06, "epoch": 5.862068965517241, "percentage": 83.74, "elapsed_time": "6:03:33", "remaining_time": "1:10:34"}
|
||||
{"current_steps": 3235, "total_steps": 3857, "loss": 0.2185, "lr": 3.0961982891241083e-06, "epoch": 5.871143375680581, "percentage": 83.87, "elapsed_time": "6:04:04", "remaining_time": "1:10:00"}
|
||||
{"current_steps": 3240, "total_steps": 3857, "loss": 0.2142, "lr": 3.0479971926383366e-06, "epoch": 5.88021778584392, "percentage": 84.0, "elapsed_time": "6:04:39", "remaining_time": "1:09:26"}
|
||||
{"current_steps": 3245, "total_steps": 3857, "loss": 0.2222, "lr": 3.0001432730367443e-06, "epoch": 5.8892921960072595, "percentage": 84.13, "elapsed_time": "6:05:07", "remaining_time": "1:08:51"}
|
||||
{"current_steps": 3250, "total_steps": 3857, "loss": 0.2057, "lr": 2.952637510367207e-06, "epoch": 5.898366606170599, "percentage": 84.26, "elapsed_time": "6:05:40", "remaining_time": "1:08:17"}
|
||||
{"current_steps": 3255, "total_steps": 3857, "loss": 0.2484, "lr": 2.9054808775473886e-06, "epoch": 5.907441016333938, "percentage": 84.39, "elapsed_time": "6:06:08", "remaining_time": "1:07:42"}
|
||||
{"current_steps": 3260, "total_steps": 3857, "loss": 0.241, "lr": 2.8586743403447402e-06, "epoch": 5.916515426497278, "percentage": 84.52, "elapsed_time": "6:06:40", "remaining_time": "1:07:08"}
|
||||
{"current_steps": 3265, "total_steps": 3857, "loss": 0.2336, "lr": 2.81221885735679e-06, "epoch": 5.925589836660617, "percentage": 84.65, "elapsed_time": "6:07:13", "remaining_time": "1:06:34"}
|
||||
{"current_steps": 3270, "total_steps": 3857, "loss": 0.2268, "lr": 2.7661153799914585e-06, "epoch": 5.9346642468239565, "percentage": 84.78, "elapsed_time": "6:07:46", "remaining_time": "1:06:01"}
|
||||
{"current_steps": 3275, "total_steps": 3857, "loss": 0.2485, "lr": 2.7203648524476012e-06, "epoch": 5.943738656987296, "percentage": 84.91, "elapsed_time": "6:08:17", "remaining_time": "1:05:27"}
|
||||
{"current_steps": 3280, "total_steps": 3857, "loss": 0.2178, "lr": 2.6749682116956633e-06, "epoch": 5.952813067150635, "percentage": 85.04, "elapsed_time": "6:08:50", "remaining_time": "1:04:53"}
|
||||
{"current_steps": 3285, "total_steps": 3857, "loss": 0.2268, "lr": 2.629926387458486e-06, "epoch": 5.961887477313975, "percentage": 85.17, "elapsed_time": "6:09:24", "remaining_time": "1:04:19"}
|
||||
{"current_steps": 3290, "total_steps": 3857, "loss": 0.2361, "lr": 2.5852403021922758e-06, "epoch": 5.970961887477314, "percentage": 85.3, "elapsed_time": "6:09:56", "remaining_time": "1:03:45"}
|
||||
{"current_steps": 3295, "total_steps": 3857, "loss": 0.2118, "lr": 2.5409108710677167e-06, "epoch": 5.980036297640654, "percentage": 85.43, "elapsed_time": "6:10:30", "remaining_time": "1:03:11"}
|
||||
{"current_steps": 3300, "total_steps": 3857, "loss": 0.2335, "lr": 2.4969390019511952e-06, "epoch": 5.9891107078039925, "percentage": 85.56, "elapsed_time": "6:11:00", "remaining_time": "1:02:37"}
|
||||
{"current_steps": 3305, "total_steps": 3857, "loss": 0.2307, "lr": 2.4533255953862602e-06, "epoch": 5.998185117967332, "percentage": 85.69, "elapsed_time": "6:11:33", "remaining_time": "1:02:03"}
|
||||
{"current_steps": 3310, "total_steps": 3857, "loss": 0.2173, "lr": 2.410071544575119e-06, "epoch": 6.007259528130671, "percentage": 85.82, "elapsed_time": "6:12:09", "remaining_time": "1:01:30"}
|
||||
{"current_steps": 3315, "total_steps": 3857, "loss": 0.2141, "lr": 2.3671777353604042e-06, "epoch": 6.016333938294011, "percentage": 85.95, "elapsed_time": "6:12:44", "remaining_time": "1:00:56"}
|
||||
{"current_steps": 3320, "total_steps": 3857, "loss": 0.2359, "lr": 2.324645046206988e-06, "epoch": 6.025408348457351, "percentage": 86.08, "elapsed_time": "6:13:21", "remaining_time": "1:00:23"}
|
||||
{"current_steps": 3325, "total_steps": 3857, "loss": 0.2268, "lr": 2.282474348184007e-06, "epoch": 6.0344827586206895, "percentage": 86.21, "elapsed_time": "6:13:53", "remaining_time": "0:59:49"}
|
||||
{"current_steps": 3330, "total_steps": 3857, "loss": 0.2177, "lr": 2.240666504947029e-06, "epoch": 6.043557168784029, "percentage": 86.34, "elapsed_time": "6:14:26", "remaining_time": "0:59:15"}
|
||||
{"current_steps": 3335, "total_steps": 3857, "loss": 0.2315, "lr": 2.199222372720353e-06, "epoch": 6.052631578947368, "percentage": 86.47, "elapsed_time": "6:15:03", "remaining_time": "0:58:42"}
|
||||
{"current_steps": 3340, "total_steps": 3857, "loss": 0.2089, "lr": 2.158142800279479e-06, "epoch": 6.061705989110708, "percentage": 86.6, "elapsed_time": "6:15:36", "remaining_time": "0:58:08"}
|
||||
{"current_steps": 3345, "total_steps": 3857, "loss": 0.2066, "lr": 2.117428628933731e-06, "epoch": 6.070780399274047, "percentage": 86.73, "elapsed_time": "6:16:08", "remaining_time": "0:57:34"}
|
||||
{"current_steps": 3350, "total_steps": 3857, "loss": 0.2159, "lr": 2.0770806925090124e-06, "epoch": 6.0798548094373865, "percentage": 86.86, "elapsed_time": "6:16:37", "remaining_time": "0:56:59"}
|
||||
{"current_steps": 3355, "total_steps": 3857, "loss": 0.2054, "lr": 2.0370998173307545e-06, "epoch": 6.088929219600726, "percentage": 86.98, "elapsed_time": "6:17:16", "remaining_time": "0:56:27"}
|
||||
{"current_steps": 3360, "total_steps": 3857, "loss": 0.216, "lr": 1.997486822206951e-06, "epoch": 6.098003629764065, "percentage": 87.11, "elapsed_time": "6:17:54", "remaining_time": "0:55:53"}
|
||||
{"current_steps": 3365, "total_steps": 3857, "loss": 0.2093, "lr": 1.958242518411444e-06, "epoch": 6.107078039927405, "percentage": 87.24, "elapsed_time": "6:18:30", "remaining_time": "0:55:20"}
|
||||
{"current_steps": 3370, "total_steps": 3857, "loss": 0.2285, "lr": 1.9193677096672615e-06, "epoch": 6.116152450090744, "percentage": 87.37, "elapsed_time": "6:19:06", "remaining_time": "0:54:47"}
|
||||
{"current_steps": 3375, "total_steps": 3857, "loss": 0.2116, "lr": 1.88086319213018e-06, "epoch": 6.125226860254084, "percentage": 87.5, "elapsed_time": "6:19:39", "remaining_time": "0:54:13"}
|
||||
{"current_steps": 3380, "total_steps": 3857, "loss": 0.2094, "lr": 1.8427297543724187e-06, "epoch": 6.1343012704174225, "percentage": 87.63, "elapsed_time": "6:20:08", "remaining_time": "0:53:38"}
|
||||
{"current_steps": 3385, "total_steps": 3857, "loss": 0.2033, "lr": 1.8049681773664796e-06, "epoch": 6.143375680580762, "percentage": 87.76, "elapsed_time": "6:20:38", "remaining_time": "0:53:04"}
|
||||
{"current_steps": 3390, "total_steps": 3857, "loss": 0.2191, "lr": 1.7675792344691655e-06, "epoch": 6.152450090744102, "percentage": 87.89, "elapsed_time": "6:21:11", "remaining_time": "0:52:30"}
|
||||
{"current_steps": 3395, "total_steps": 3857, "loss": 0.2241, "lr": 1.730563691405731e-06, "epoch": 6.161524500907441, "percentage": 88.02, "elapsed_time": "6:21:50", "remaining_time": "0:51:57"}
|
||||
{"current_steps": 3400, "total_steps": 3857, "loss": 0.2221, "lr": 1.693922306254203e-06, "epoch": 6.170598911070781, "percentage": 88.15, "elapsed_time": "6:22:23", "remaining_time": "0:51:23"}
|
||||
{"current_steps": 3405, "total_steps": 3857, "loss": 0.2125, "lr": 1.6576558294298739e-06, "epoch": 6.1796733212341195, "percentage": 88.28, "elapsed_time": "6:22:53", "remaining_time": "0:50:49"}
|
||||
{"current_steps": 3410, "total_steps": 3857, "loss": 0.2156, "lr": 1.6217650036698907e-06, "epoch": 6.188747731397459, "percentage": 88.41, "elapsed_time": "6:23:26", "remaining_time": "0:50:15"}
|
||||
{"current_steps": 3415, "total_steps": 3857, "loss": 0.2275, "lr": 1.5862505640180882e-06, "epoch": 6.197822141560798, "percentage": 88.54, "elapsed_time": "6:24:04", "remaining_time": "0:49:42"}
|
||||
{"current_steps": 3420, "total_steps": 3857, "loss": 0.2273, "lr": 1.5511132378099114e-06, "epoch": 6.206896551724138, "percentage": 88.67, "elapsed_time": "6:24:30", "remaining_time": "0:49:07"}
|
||||
{"current_steps": 3425, "total_steps": 3857, "loss": 0.2119, "lr": 1.5163537446575215e-06, "epoch": 6.215970961887478, "percentage": 88.8, "elapsed_time": "6:25:00", "remaining_time": "0:48:33"}
|
||||
{"current_steps": 3430, "total_steps": 3857, "loss": 0.2312, "lr": 1.4819727964350717e-06, "epoch": 6.2250453720508165, "percentage": 88.93, "elapsed_time": "6:25:34", "remaining_time": "0:48:00"}
|
||||
{"current_steps": 3435, "total_steps": 3857, "loss": 0.2313, "lr": 1.4479710972641026e-06, "epoch": 6.234119782214156, "percentage": 89.06, "elapsed_time": "6:26:14", "remaining_time": "0:47:27"}
|
||||
{"current_steps": 3440, "total_steps": 3857, "loss": 0.2058, "lr": 1.4143493434991551e-06, "epoch": 6.243194192377495, "percentage": 89.19, "elapsed_time": "6:26:50", "remaining_time": "0:46:53"}
|
||||
{"current_steps": 3445, "total_steps": 3857, "loss": 0.2039, "lr": 1.381108223713472e-06, "epoch": 6.252268602540835, "percentage": 89.32, "elapsed_time": "6:27:24", "remaining_time": "0:46:19"}
|
||||
{"current_steps": 3450, "total_steps": 3857, "loss": 0.2122, "lr": 1.348248418684932e-06, "epoch": 6.261343012704174, "percentage": 89.45, "elapsed_time": "6:27:50", "remaining_time": "0:45:45"}
|
||||
{"current_steps": 3455, "total_steps": 3857, "loss": 0.2152, "lr": 1.3157706013820804e-06, "epoch": 6.270417422867514, "percentage": 89.58, "elapsed_time": "6:28:26", "remaining_time": "0:45:11"}
|
||||
{"current_steps": 3460, "total_steps": 3857, "loss": 0.2018, "lr": 1.2836754369503624e-06, "epoch": 6.279491833030853, "percentage": 89.71, "elapsed_time": "6:28:56", "remaining_time": "0:44:37"}
|
||||
{"current_steps": 3465, "total_steps": 3857, "loss": 0.2096, "lr": 1.2519635826984922e-06, "epoch": 6.288566243194192, "percentage": 89.84, "elapsed_time": "6:29:33", "remaining_time": "0:44:04"}
|
||||
{"current_steps": 3470, "total_steps": 3857, "loss": 0.2199, "lr": 1.2206356880849945e-06, "epoch": 6.297640653357532, "percentage": 89.97, "elapsed_time": "6:30:01", "remaining_time": "0:43:29"}
|
||||
{"current_steps": 3475, "total_steps": 3857, "loss": 0.2113, "lr": 1.1896923947049067e-06, "epoch": 6.306715063520871, "percentage": 90.1, "elapsed_time": "6:30:32", "remaining_time": "0:42:55"}
|
||||
{"current_steps": 3480, "total_steps": 3857, "loss": 0.2185, "lr": 1.1591343362766305e-06, "epoch": 6.315789473684211, "percentage": 90.23, "elapsed_time": "6:31:02", "remaining_time": "0:42:21"}
|
||||
{"current_steps": 3485, "total_steps": 3857, "loss": 0.2186, "lr": 1.1289621386289618e-06, "epoch": 6.3248638838475495, "percentage": 90.36, "elapsed_time": "6:31:34", "remaining_time": "0:41:47"}
|
||||
{"current_steps": 3490, "total_steps": 3857, "loss": 0.2194, "lr": 1.0991764196882792e-06, "epoch": 6.333938294010889, "percentage": 90.48, "elapsed_time": "6:32:06", "remaining_time": "0:41:13"}
|
||||
{"current_steps": 3495, "total_steps": 3857, "loss": 0.2026, "lr": 1.0697777894658623e-06, "epoch": 6.343012704174229, "percentage": 90.61, "elapsed_time": "6:32:39", "remaining_time": "0:40:40"}
|
||||
{"current_steps": 3500, "total_steps": 3857, "loss": 0.2211, "lr": 1.0407668500454315e-06, "epoch": 6.352087114337568, "percentage": 90.74, "elapsed_time": "6:33:11", "remaining_time": "0:40:06"}
|
||||
{"current_steps": 3505, "total_steps": 3857, "loss": 0.1911, "lr": 1.012144195570799e-06, "epoch": 6.361161524500908, "percentage": 90.87, "elapsed_time": "6:33:44", "remaining_time": "0:39:32"}
|
||||
{"current_steps": 3510, "total_steps": 3857, "loss": 0.221, "lr": 9.839104122337018e-07, "epoch": 6.3702359346642465, "percentage": 91.0, "elapsed_time": "6:34:14", "remaining_time": "0:38:58"}
|
||||
{"current_steps": 3515, "total_steps": 3857, "loss": 0.212, "lr": 9.560660782618014e-07, "epoch": 6.379310344827586, "percentage": 91.13, "elapsed_time": "6:34:46", "remaining_time": "0:38:24"}
|
||||
{"current_steps": 3520, "total_steps": 3857, "loss": 0.2209, "lr": 9.286117639068371e-07, "epoch": 6.388384754990925, "percentage": 91.26, "elapsed_time": "6:35:24", "remaining_time": "0:37:51"}
|
||||
{"current_steps": 3525, "total_steps": 3857, "loss": 0.2168, "lr": 9.015480314329505e-07, "epoch": 6.397459165154265, "percentage": 91.39, "elapsed_time": "6:36:03", "remaining_time": "0:37:18"}
|
||||
{"current_steps": 3530, "total_steps": 3857, "loss": 0.234, "lr": 8.748754351051713e-07, "epoch": 6.406533575317605, "percentage": 91.52, "elapsed_time": "6:36:35", "remaining_time": "0:36:44"}
|
||||
{"current_steps": 3535, "total_steps": 3857, "loss": 0.21, "lr": 8.485945211780611e-07, "epoch": 6.415607985480944, "percentage": 91.65, "elapsed_time": "6:37:11", "remaining_time": "0:36:10"}
|
||||
{"current_steps": 3540, "total_steps": 3857, "loss": 0.2212, "lr": 8.227058278845357e-07, "epoch": 6.424682395644283, "percentage": 91.78, "elapsed_time": "6:37:40", "remaining_time": "0:35:36"}
|
||||
{"current_steps": 3545, "total_steps": 3857, "loss": 0.2272, "lr": 7.972098854248212e-07, "epoch": 6.433756805807622, "percentage": 91.91, "elapsed_time": "6:38:20", "remaining_time": "0:35:03"}
|
||||
{"current_steps": 3550, "total_steps": 3857, "loss": 0.2211, "lr": 7.721072159556264e-07, "epoch": 6.442831215970962, "percentage": 92.04, "elapsed_time": "6:38:46", "remaining_time": "0:34:29"}
|
||||
{"current_steps": 3555, "total_steps": 3857, "loss": 0.2024, "lr": 7.473983335794211e-07, "epoch": 6.451905626134302, "percentage": 92.17, "elapsed_time": "6:39:14", "remaining_time": "0:33:54"}
|
||||
{"current_steps": 3560, "total_steps": 3857, "loss": 0.2129, "lr": 7.230837443339234e-07, "epoch": 6.460980036297641, "percentage": 92.3, "elapsed_time": "6:39:44", "remaining_time": "0:33:20"}
|
||||
{"current_steps": 3565, "total_steps": 3857, "loss": 0.2338, "lr": 6.99163946181729e-07, "epoch": 6.47005444646098, "percentage": 92.43, "elapsed_time": "6:40:15", "remaining_time": "0:32:47"}
|
||||
{"current_steps": 3570, "total_steps": 3857, "loss": 0.2147, "lr": 6.756394290001145e-07, "epoch": 6.479128856624319, "percentage": 92.56, "elapsed_time": "6:40:51", "remaining_time": "0:32:13"}
|
||||
{"current_steps": 3575, "total_steps": 3857, "loss": 0.2101, "lr": 6.525106745710097e-07, "epoch": 6.488203266787659, "percentage": 92.69, "elapsed_time": "6:41:18", "remaining_time": "0:31:39"}
|
||||
{"current_steps": 3580, "total_steps": 3857, "loss": 0.212, "lr": 6.297781565711192e-07, "epoch": 6.497277676950998, "percentage": 92.82, "elapsed_time": "6:41:52", "remaining_time": "0:31:05"}
|
||||
{"current_steps": 3585, "total_steps": 3857, "loss": 0.2327, "lr": 6.074423405622365e-07, "epoch": 6.506352087114338, "percentage": 92.95, "elapsed_time": "6:42:22", "remaining_time": "0:30:31"}
|
||||
{"current_steps": 3590, "total_steps": 3857, "loss": 0.2144, "lr": 5.855036839816985e-07, "epoch": 6.5154264972776765, "percentage": 93.08, "elapsed_time": "6:42:56", "remaining_time": "0:29:58"}
|
||||
{"current_steps": 3595, "total_steps": 3857, "loss": 0.23, "lr": 5.639626361330153e-07, "epoch": 6.524500907441016, "percentage": 93.21, "elapsed_time": "6:43:30", "remaining_time": "0:29:24"}
|
||||
{"current_steps": 3600, "total_steps": 3857, "loss": 0.1937, "lr": 5.428196381766814e-07, "epoch": 6.533575317604356, "percentage": 93.34, "elapsed_time": "6:44:00", "remaining_time": "0:28:50"}
|
||||
{"current_steps": 3605, "total_steps": 3857, "loss": 0.2162, "lr": 5.220751231211263e-07, "epoch": 6.542649727767695, "percentage": 93.47, "elapsed_time": "6:44:36", "remaining_time": "0:28:16"}
|
||||
{"current_steps": 3610, "total_steps": 3857, "loss": 0.2109, "lr": 5.01729515813858e-07, "epoch": 6.551724137931035, "percentage": 93.6, "elapsed_time": "6:45:05", "remaining_time": "0:27:43"}
|
||||
{"current_steps": 3615, "total_steps": 3857, "loss": 0.2243, "lr": 4.81783232932751e-07, "epoch": 6.560798548094374, "percentage": 93.73, "elapsed_time": "6:45:34", "remaining_time": "0:27:09"}
|
||||
{"current_steps": 3620, "total_steps": 3857, "loss": 0.2063, "lr": 4.6223668297752157e-07, "epoch": 6.569872958257713, "percentage": 93.86, "elapsed_time": "6:46:18", "remaining_time": "0:26:36"}
|
||||
{"current_steps": 3625, "total_steps": 3857, "loss": 0.2284, "lr": 4.430902662613612e-07, "epoch": 6.578947368421053, "percentage": 93.98, "elapsed_time": "6:46:52", "remaining_time": "0:26:02"}
|
||||
{"current_steps": 3630, "total_steps": 3857, "loss": 0.2185, "lr": 4.243443749027343e-07, "epoch": 6.588021778584392, "percentage": 94.11, "elapsed_time": "6:47:28", "remaining_time": "0:25:28"}
|
||||
{"current_steps": 3635, "total_steps": 3857, "loss": 0.2163, "lr": 4.0599939281734447e-07, "epoch": 6.597096188747732, "percentage": 94.24, "elapsed_time": "6:48:11", "remaining_time": "0:24:55"}
|
||||
{"current_steps": 3640, "total_steps": 3857, "loss": 0.2241, "lr": 3.8805569571028546e-07, "epoch": 6.606170598911071, "percentage": 94.37, "elapsed_time": "6:48:37", "remaining_time": "0:24:21"}
|
||||
{"current_steps": 3645, "total_steps": 3857, "loss": 0.218, "lr": 3.7051365106832936e-07, "epoch": 6.61524500907441, "percentage": 94.5, "elapsed_time": "6:49:09", "remaining_time": "0:23:47"}
|
||||
{"current_steps": 3650, "total_steps": 3857, "loss": 0.2276, "lr": 3.53373618152415e-07, "epoch": 6.624319419237749, "percentage": 94.63, "elapsed_time": "6:49:42", "remaining_time": "0:23:14"}
|
||||
{"current_steps": 3655, "total_steps": 3857, "loss": 0.2137, "lr": 3.366359479902781e-07, "epoch": 6.633393829401089, "percentage": 94.76, "elapsed_time": "6:50:15", "remaining_time": "0:22:40"}
|
||||
{"current_steps": 3660, "total_steps": 3857, "loss": 0.2014, "lr": 3.2030098336927497e-07, "epoch": 6.642468239564428, "percentage": 94.89, "elapsed_time": "6:50:50", "remaining_time": "0:22:06"}
|
||||
{"current_steps": 3665, "total_steps": 3857, "loss": 0.227, "lr": 3.043690588293524e-07, "epoch": 6.651542649727768, "percentage": 95.02, "elapsed_time": "6:51:28", "remaining_time": "0:21:33"}
|
||||
{"current_steps": 3670, "total_steps": 3857, "loss": 0.2273, "lr": 2.888405006562001e-07, "epoch": 6.660617059891107, "percentage": 95.15, "elapsed_time": "6:51:58", "remaining_time": "0:20:59"}
|
||||
{"current_steps": 3675, "total_steps": 3857, "loss": 0.2294, "lr": 2.7371562687456444e-07, "epoch": 6.669691470054446, "percentage": 95.28, "elapsed_time": "6:52:36", "remaining_time": "0:20:26"}
|
||||
{"current_steps": 3680, "total_steps": 3857, "loss": 0.215, "lr": 2.5899474724174313e-07, "epoch": 6.678765880217786, "percentage": 95.41, "elapsed_time": "6:53:11", "remaining_time": "0:19:52"}
|
||||
{"current_steps": 3685, "total_steps": 3857, "loss": 0.2256, "lr": 2.446781632412365e-07, "epoch": 6.687840290381125, "percentage": 95.54, "elapsed_time": "6:53:43", "remaining_time": "0:19:18"}
|
||||
{"current_steps": 3690, "total_steps": 3857, "loss": 0.2138, "lr": 2.3076616807657047e-07, "epoch": 6.696914700544465, "percentage": 95.67, "elapsed_time": "6:54:17", "remaining_time": "0:18:45"}
|
||||
{"current_steps": 3695, "total_steps": 3857, "loss": 0.2391, "lr": 2.1725904666529685e-07, "epoch": 6.7059891107078045, "percentage": 95.8, "elapsed_time": "6:54:49", "remaining_time": "0:18:11"}
|
||||
{"current_steps": 3700, "total_steps": 3857, "loss": 0.2168, "lr": 2.0415707563316012e-07, "epoch": 6.715063520871143, "percentage": 95.93, "elapsed_time": "6:55:22", "remaining_time": "0:17:37"}
|
||||
{"current_steps": 3705, "total_steps": 3857, "loss": 0.2156, "lr": 1.9146052330841546e-07, "epoch": 6.724137931034483, "percentage": 96.06, "elapsed_time": "6:55:54", "remaining_time": "0:17:03"}
|
||||
{"current_steps": 3710, "total_steps": 3857, "loss": 0.2176, "lr": 1.7916964971636197e-07, "epoch": 6.733212341197822, "percentage": 96.19, "elapsed_time": "6:56:28", "remaining_time": "0:16:30"}
|
||||
{"current_steps": 3715, "total_steps": 3857, "loss": 0.221, "lr": 1.6728470657399132e-07, "epoch": 6.742286751361162, "percentage": 96.32, "elapsed_time": "6:57:01", "remaining_time": "0:15:56"}
|
||||
{"current_steps": 3720, "total_steps": 3857, "loss": 0.2455, "lr": 1.558059372848475e-07, "epoch": 6.751361161524501, "percentage": 96.45, "elapsed_time": "6:57:36", "remaining_time": "0:15:22"}
|
||||
{"current_steps": 3725, "total_steps": 3857, "loss": 0.2122, "lr": 1.4473357693403078e-07, "epoch": 6.76043557168784, "percentage": 96.58, "elapsed_time": "6:58:11", "remaining_time": "0:14:49"}
|
||||
{"current_steps": 3730, "total_steps": 3857, "loss": 0.2169, "lr": 1.3406785228339936e-07, "epoch": 6.769509981851179, "percentage": 96.71, "elapsed_time": "6:58:38", "remaining_time": "0:14:15"}
|
||||
{"current_steps": 3735, "total_steps": 3857, "loss": 0.2375, "lr": 1.2380898176690636e-07, "epoch": 6.778584392014519, "percentage": 96.84, "elapsed_time": "6:59:12", "remaining_time": "0:13:41"}
|
||||
{"current_steps": 3740, "total_steps": 3857, "loss": 0.2238, "lr": 1.1395717548614126e-07, "epoch": 6.787658802177859, "percentage": 96.97, "elapsed_time": "6:59:39", "remaining_time": "0:13:07"}
|
||||
{"current_steps": 3745, "total_steps": 3857, "loss": 0.243, "lr": 1.0451263520601773e-07, "epoch": 6.796733212341198, "percentage": 97.1, "elapsed_time": "7:00:09", "remaining_time": "0:12:33"}
|
||||
{"current_steps": 3750, "total_steps": 3857, "loss": 0.2078, "lr": 9.547555435065026e-08, "epoch": 6.805807622504537, "percentage": 97.23, "elapsed_time": "7:00:43", "remaining_time": "0:12:00"}
|
||||
{"current_steps": 3755, "total_steps": 3857, "loss": 0.2187, "lr": 8.684611799937736e-08, "epoch": 6.814882032667876, "percentage": 97.36, "elapsed_time": "7:01:18", "remaining_time": "0:11:26"}
|
||||
{"current_steps": 3760, "total_steps": 3857, "loss": 0.211, "lr": 7.86245028829935e-08, "epoch": 6.823956442831216, "percentage": 97.49, "elapsed_time": "7:01:50", "remaining_time": "0:10:52"}
|
||||
{"current_steps": 3765, "total_steps": 3857, "loss": 0.2265, "lr": 7.08108773801075e-08, "epoch": 6.833030852994556, "percentage": 97.61, "elapsed_time": "7:02:29", "remaining_time": "0:10:19"}
|
||||
{"current_steps": 3770, "total_steps": 3857, "loss": 0.2268, "lr": 6.340540151370978e-08, "epoch": 6.842105263157895, "percentage": 97.74, "elapsed_time": "7:02:58", "remaining_time": "0:09:45"}
|
||||
{"current_steps": 3775, "total_steps": 3857, "loss": 0.2148, "lr": 5.640822694788828e-08, "epoch": 6.8511796733212345, "percentage": 97.87, "elapsed_time": "7:03:38", "remaining_time": "0:09:12"}
|
||||
{"current_steps": 3780, "total_steps": 3857, "loss": 0.2264, "lr": 4.9819496984724324e-08, "epoch": 6.860254083484573, "percentage": 98.0, "elapsed_time": "7:04:14", "remaining_time": "0:08:38"}
|
||||
{"current_steps": 3785, "total_steps": 3857, "loss": 0.2371, "lr": 4.363934656135271e-08, "epoch": 6.869328493647913, "percentage": 98.13, "elapsed_time": "7:04:49", "remaining_time": "0:08:04"}
|
||||
{"current_steps": 3790, "total_steps": 3857, "loss": 0.1987, "lr": 3.7867902247206156e-08, "epoch": 6.878402903811252, "percentage": 98.26, "elapsed_time": "7:05:20", "remaining_time": "0:07:31"}
|
||||
{"current_steps": 3795, "total_steps": 3857, "loss": 0.234, "lr": 3.2505282241421797e-08, "epoch": 6.887477313974592, "percentage": 98.39, "elapsed_time": "7:05:49", "remaining_time": "0:06:57"}
|
||||
{"current_steps": 3800, "total_steps": 3857, "loss": 0.2133, "lr": 2.7551596370409828e-08, "epoch": 6.896551724137931, "percentage": 98.52, "elapsed_time": "7:06:13", "remaining_time": "0:06:23"}
|
||||
{"current_steps": 3805, "total_steps": 3857, "loss": 0.219, "lr": 2.300694608562415e-08, "epoch": 6.90562613430127, "percentage": 98.65, "elapsed_time": "7:06:41", "remaining_time": "0:05:49"}
|
||||
{"current_steps": 3810, "total_steps": 3857, "loss": 0.2234, "lr": 1.887142446146184e-08, "epoch": 6.91470054446461, "percentage": 98.78, "elapsed_time": "7:07:13", "remaining_time": "0:05:16"}
|
||||
{"current_steps": 3815, "total_steps": 3857, "loss": 0.2134, "lr": 1.5145116193375774e-08, "epoch": 6.923774954627949, "percentage": 98.91, "elapsed_time": "7:07:51", "remaining_time": "0:04:42"}
|
||||
{"current_steps": 3820, "total_steps": 3857, "loss": 0.2235, "lr": 1.182809759612935e-08, "epoch": 6.932849364791289, "percentage": 99.04, "elapsed_time": "7:08:29", "remaining_time": "0:04:09"}
|
||||
{"current_steps": 3825, "total_steps": 3857, "loss": 0.2015, "lr": 8.920436602237737e-09, "epoch": 6.941923774954628, "percentage": 99.17, "elapsed_time": "7:09:01", "remaining_time": "0:03:35"}
|
||||
{"current_steps": 3830, "total_steps": 3857, "loss": 0.2218, "lr": 6.422192760575652e-09, "epoch": 6.950998185117967, "percentage": 99.3, "elapsed_time": "7:09:39", "remaining_time": "0:03:01"}
|
||||
{"current_steps": 3835, "total_steps": 3857, "loss": 0.2185, "lr": 4.333417235156124e-09, "epoch": 6.960072595281307, "percentage": 99.43, "elapsed_time": "7:10:15", "remaining_time": "0:02:28"}
|
||||
{"current_steps": 3840, "total_steps": 3857, "loss": 0.223, "lr": 2.654152804084653e-09, "epoch": 6.969147005444646, "percentage": 99.56, "elapsed_time": "7:10:44", "remaining_time": "0:01:54"}
|
||||
{"current_steps": 3845, "total_steps": 3857, "loss": 0.231, "lr": 1.3844338586843642e-09, "epoch": 6.978221415607986, "percentage": 99.69, "elapsed_time": "7:11:17", "remaining_time": "0:01:20"}
|
||||
{"current_steps": 3850, "total_steps": 3857, "loss": 0.1896, "lr": 5.242864027921179e-10, "epoch": 6.987295825771325, "percentage": 99.82, "elapsed_time": "7:11:54", "remaining_time": "0:00:47"}
|
||||
{"current_steps": 3855, "total_steps": 3857, "loss": 0.2292, "lr": 7.372805221672608e-11, "epoch": 6.9963702359346644, "percentage": 99.95, "elapsed_time": "7:12:29", "remaining_time": "0:00:13"}
|
||||
{"current_steps": 3857, "total_steps": 3857, "epoch": 7.0, "percentage": 100.0, "elapsed_time": "7:12:50", "remaining_time": "0:00:00"}
|
||||
BIN
training_loss.png
Normal file
BIN
training_loss.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 45 KiB |
1
vocab.json
Normal file
1
vocab.json
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user