初始化项目,由ModelHub XC社区提供模型
Model: laion/exp-uns-r2egym-8_4x_glm_4_7_traces_jupiter Source: Original Platform
This commit is contained in:
36
.gitattributes
vendored
Normal file
36
.gitattributes
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||
61
README.md
Normal file
61
README.md
Normal file
@@ -0,0 +1,61 @@
|
||||
---
|
||||
library_name: transformers
|
||||
license: apache-2.0
|
||||
base_model: Qwen/Qwen3-8B
|
||||
tags:
|
||||
- llama-factory
|
||||
- full
|
||||
- generated_from_trainer
|
||||
model-index:
|
||||
- name: exp-uns-r2egym-8_4x_glm_4_7_traces_jupiter
|
||||
results: []
|
||||
---
|
||||
|
||||
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
||||
should probably proofread and complete it, then remove this comment. -->
|
||||
|
||||
# exp-uns-r2egym-8_4x_glm_4_7_traces_jupiter
|
||||
|
||||
This model is a fine-tuned version of [Qwen/Qwen3-8B](https://huggingface.co/Qwen/Qwen3-8B) on the /data/cat/ws/befe330h-befe330h-otagent/huggingface/hub/datasets--DCAgent--exp-uns-r2egym-8_4x_glm_4.7_traces_jupiter/snapshots/c9a4363391aad8ddeb2df878a3490276d14e91a0_thinking_preprocessed dataset.
|
||||
|
||||
## Model description
|
||||
|
||||
More information needed
|
||||
|
||||
## Intended uses & limitations
|
||||
|
||||
More information needed
|
||||
|
||||
## Training and evaluation data
|
||||
|
||||
More information needed
|
||||
|
||||
## Training procedure
|
||||
|
||||
### Training hyperparameters
|
||||
|
||||
The following hyperparameters were used during training:
|
||||
- learning_rate: 4e-05
|
||||
- train_batch_size: 1
|
||||
- eval_batch_size: 8
|
||||
- seed: 42
|
||||
- distributed_type: multi-GPU
|
||||
- num_devices: 8
|
||||
- gradient_accumulation_steps: 2
|
||||
- total_train_batch_size: 16
|
||||
- total_eval_batch_size: 64
|
||||
- optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.98) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
||||
- lr_scheduler_type: cosine
|
||||
- lr_scheduler_warmup_ratio: 0.1
|
||||
- num_epochs: 7.0
|
||||
|
||||
### Training results
|
||||
|
||||
|
||||
|
||||
### Framework versions
|
||||
|
||||
- Transformers 4.57.6
|
||||
- Pytorch 2.9.0+cu128
|
||||
- Datasets 4.4.1
|
||||
- Tokenizers 0.22.2
|
||||
28
added_tokens.json
Normal file
28
added_tokens.json
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"</think>": 151668,
|
||||
"</tool_call>": 151658,
|
||||
"</tool_response>": 151666,
|
||||
"<think>": 151667,
|
||||
"<tool_call>": 151657,
|
||||
"<tool_response>": 151665,
|
||||
"<|box_end|>": 151649,
|
||||
"<|box_start|>": 151648,
|
||||
"<|endoftext|>": 151643,
|
||||
"<|file_sep|>": 151664,
|
||||
"<|fim_middle|>": 151660,
|
||||
"<|fim_pad|>": 151662,
|
||||
"<|fim_prefix|>": 151659,
|
||||
"<|fim_suffix|>": 151661,
|
||||
"<|im_end|>": 151645,
|
||||
"<|im_start|>": 151644,
|
||||
"<|image_pad|>": 151655,
|
||||
"<|object_ref_end|>": 151647,
|
||||
"<|object_ref_start|>": 151646,
|
||||
"<|quad_end|>": 151651,
|
||||
"<|quad_start|>": 151650,
|
||||
"<|repo_name|>": 151663,
|
||||
"<|video_pad|>": 151656,
|
||||
"<|vision_end|>": 151653,
|
||||
"<|vision_pad|>": 151654,
|
||||
"<|vision_start|>": 151652
|
||||
}
|
||||
16
all_results.json
Normal file
16
all_results.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"achieved_tflops_per_gpu": 5.823045638802442,
|
||||
"achieved_tflops_per_gpu_theoretical": 260.86865133738394,
|
||||
"epoch": 7.0,
|
||||
"loss_nan_ranks": 0,
|
||||
"loss_rank_avg": 0.17524641752243042,
|
||||
"mfu_percent": 0.5887811566028758,
|
||||
"mfu_percent_theoretical": 26.377012268694028,
|
||||
"total_flos": 2.557069172618035e+18,
|
||||
"train_loss": 0.2496959186285973,
|
||||
"train_runtime": 54891.1457,
|
||||
"train_samples_per_second": 1.235,
|
||||
"train_steps_per_second": 0.077,
|
||||
"valid_targets_mean": 4057.6,
|
||||
"valid_targets_min": 3531
|
||||
}
|
||||
89
chat_template.jinja
Normal file
89
chat_template.jinja
Normal file
@@ -0,0 +1,89 @@
|
||||
{%- if tools %}
|
||||
{{- '<|im_start|>system\n' }}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- messages[0].content + '\n\n' }}
|
||||
{%- endif %}
|
||||
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
||||
{%- for tool in tools %}
|
||||
{{- "\n" }}
|
||||
{{- tool | tojson }}
|
||||
{%- endfor %}
|
||||
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
||||
{%- else %}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
|
||||
{%- for message in messages[::-1] %}
|
||||
{%- set index = (messages|length - 1) - loop.index0 %}
|
||||
{%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
|
||||
{%- set ns.multi_step_tool = false %}
|
||||
{%- set ns.last_query_index = index %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- for message in messages %}
|
||||
{%- if message.content is string %}
|
||||
{%- set content = message.content %}
|
||||
{%- else %}
|
||||
{%- set content = '' %}
|
||||
{%- endif %}
|
||||
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
|
||||
{%- elif message.role == "assistant" %}
|
||||
{%- set reasoning_content = '' %}
|
||||
{%- if message.reasoning_content is string %}
|
||||
{%- set reasoning_content = message.reasoning_content %}
|
||||
{%- else %}
|
||||
{%- if '</think>' in content %}
|
||||
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
||||
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- if loop.index0 > ns.last_query_index %}
|
||||
{%- if loop.last or (not loop.last and reasoning_content) %}
|
||||
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- if message.tool_calls %}
|
||||
{%- for tool_call in message.tool_calls %}
|
||||
{%- if (loop.first and content) or (not loop.first) %}
|
||||
{{- '\n' }}
|
||||
{%- endif %}
|
||||
{%- if tool_call.function %}
|
||||
{%- set tool_call = tool_call.function %}
|
||||
{%- endif %}
|
||||
{{- '<tool_call>\n{"name": "' }}
|
||||
{{- tool_call.name }}
|
||||
{{- '", "arguments": ' }}
|
||||
{%- if tool_call.arguments is string %}
|
||||
{{- tool_call.arguments }}
|
||||
{%- else %}
|
||||
{{- tool_call.arguments | tojson }}
|
||||
{%- endif %}
|
||||
{{- '}\n</tool_call>' }}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- elif message.role == "tool" %}
|
||||
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
||||
{{- '<|im_start|>user' }}
|
||||
{%- endif %}
|
||||
{{- '\n<tool_response>\n' }}
|
||||
{{- content }}
|
||||
{{- '\n</tool_response>' }}
|
||||
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- if add_generation_prompt %}
|
||||
{{- '<|im_start|>assistant\n' }}
|
||||
{%- if enable_thinking is defined and enable_thinking is false %}
|
||||
{{- '<think>\n\n</think>\n\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
68
config.json
Normal file
68
config.json
Normal file
@@ -0,0 +1,68 @@
|
||||
{
|
||||
"architectures": [
|
||||
"Qwen3ForCausalLM"
|
||||
],
|
||||
"attention_bias": false,
|
||||
"attention_dropout": 0.0,
|
||||
"dtype": "bfloat16",
|
||||
"eos_token_id": 151645,
|
||||
"head_dim": 128,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 4096,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 12288,
|
||||
"layer_types": [
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention"
|
||||
],
|
||||
"max_position_embeddings": 40960,
|
||||
"max_window_layers": 36,
|
||||
"model_type": "qwen3",
|
||||
"num_attention_heads": 32,
|
||||
"num_hidden_layers": 36,
|
||||
"num_key_value_heads": 8,
|
||||
"pad_token_id": 151643,
|
||||
"rms_norm_eps": 1e-06,
|
||||
"rope_scaling": null,
|
||||
"rope_theta": 1000000,
|
||||
"sliding_window": null,
|
||||
"tie_word_embeddings": false,
|
||||
"transformers_version": "4.57.6",
|
||||
"use_cache": false,
|
||||
"use_sliding_window": false,
|
||||
"vocab_size": 151936
|
||||
}
|
||||
12
generation_config.json
Normal file
12
generation_config.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"do_sample": true,
|
||||
"eos_token_id": [
|
||||
151645,
|
||||
151643
|
||||
],
|
||||
"pad_token_id": 151643,
|
||||
"temperature": 0.6,
|
||||
"top_k": 20,
|
||||
"top_p": 0.95,
|
||||
"transformers_version": "4.57.6"
|
||||
}
|
||||
151388
merges.txt
Normal file
151388
merges.txt
Normal file
File diff suppressed because it is too large
Load Diff
3
model-00001-of-00004.safetensors
Normal file
3
model-00001-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:6938cd5c55d198601bff3ae0a7f355610bca7811997953a935eee79639834b1d
|
||||
size 4902257696
|
||||
3
model-00002-of-00004.safetensors
Normal file
3
model-00002-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d0f5c310a504a359d011e3b21be27175053ae28190c5711bc47a0e12cf060dac
|
||||
size 4915960368
|
||||
3
model-00003-of-00004.safetensors
Normal file
3
model-00003-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:7a6fb746d120c789227a2b026bc0e39b76da65edba6a27cee06e132b6b9cd398
|
||||
size 4983068496
|
||||
3
model-00004-of-00004.safetensors
Normal file
3
model-00004-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d213729aabe677a7077fdb45a7e460c214896db52ff6eb25bb29a533d8077c83
|
||||
size 1580230264
|
||||
407
model.safetensors.index.json
Normal file
407
model.safetensors.index.json
Normal file
@@ -0,0 +1,407 @@
|
||||
{
|
||||
"metadata": {
|
||||
"total_parameters": 308224,
|
||||
"total_size": 16381470720
|
||||
},
|
||||
"weight_map": {
|
||||
"lm_head.weight": "model-00004-of-00004.safetensors",
|
||||
"model.embed_tokens.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.35.input_layernorm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.q_norm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.norm.weight": "model-00004-of-00004.safetensors"
|
||||
}
|
||||
}
|
||||
12
run_summary.json
Normal file
12
run_summary.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"agent_name": "c9a4363391aad8ddeb2df878a3490276d14e91a0_thinking_preprocessed",
|
||||
"training_start": null,
|
||||
"training_end": null,
|
||||
"created_by": "DCAgent",
|
||||
"base_model_name": "Qwen/Qwen3-8B",
|
||||
"dataset_name": "/data/cat/ws/befe330h-befe330h-otagent/huggingface/hub/datasets--DCAgent--exp-uns-r2egym-8_4x_glm_4.7_traces_jupiter/snapshots/c9a4363391aad8ddeb2df878a3490276d14e91a0_thinking_preprocessed",
|
||||
"training_type": "SFT",
|
||||
"training_parameters": "https://huggingface.co/laion/exp-uns-r2egym-8_4x_glm_4_7_traces_jupiter/blob/main/config.json",
|
||||
"wandb_link": "https://wandb.ai/dogml/OpenThoughts-Agent/runs/sft_exp-uns-r2egym-8_4x_glm_4-7_traces_jupiter_Qwen3-8B",
|
||||
"traces_location_s3": null
|
||||
}
|
||||
31
special_tokens_map.json
Normal file
31
special_tokens_map.json
Normal file
@@ -0,0 +1,31 @@
|
||||
{
|
||||
"additional_special_tokens": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|object_ref_start|>",
|
||||
"<|object_ref_end|>",
|
||||
"<|box_start|>",
|
||||
"<|box_end|>",
|
||||
"<|quad_start|>",
|
||||
"<|quad_end|>",
|
||||
"<|vision_start|>",
|
||||
"<|vision_end|>",
|
||||
"<|vision_pad|>",
|
||||
"<|image_pad|>",
|
||||
"<|video_pad|>"
|
||||
],
|
||||
"eos_token": {
|
||||
"content": "<|im_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"pad_token": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
}
|
||||
}
|
||||
BIN
tokenizer.json
(Stored with Git LFS)
Normal file
BIN
tokenizer.json
(Stored with Git LFS)
Normal file
Binary file not shown.
240
tokenizer_config.json
Normal file
240
tokenizer_config.json
Normal file
@@ -0,0 +1,240 @@
|
||||
{
|
||||
"add_bos_token": false,
|
||||
"add_prefix_space": false,
|
||||
"added_tokens_decoder": {
|
||||
"151643": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151644": {
|
||||
"content": "<|im_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151645": {
|
||||
"content": "<|im_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151646": {
|
||||
"content": "<|object_ref_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151647": {
|
||||
"content": "<|object_ref_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151648": {
|
||||
"content": "<|box_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151649": {
|
||||
"content": "<|box_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151650": {
|
||||
"content": "<|quad_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151651": {
|
||||
"content": "<|quad_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151652": {
|
||||
"content": "<|vision_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151653": {
|
||||
"content": "<|vision_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151654": {
|
||||
"content": "<|vision_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151655": {
|
||||
"content": "<|image_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151656": {
|
||||
"content": "<|video_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151657": {
|
||||
"content": "<tool_call>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151658": {
|
||||
"content": "</tool_call>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151659": {
|
||||
"content": "<|fim_prefix|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151660": {
|
||||
"content": "<|fim_middle|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151661": {
|
||||
"content": "<|fim_suffix|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151662": {
|
||||
"content": "<|fim_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151663": {
|
||||
"content": "<|repo_name|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151664": {
|
||||
"content": "<|file_sep|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151665": {
|
||||
"content": "<tool_response>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151666": {
|
||||
"content": "</tool_response>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151667": {
|
||||
"content": "<think>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151668": {
|
||||
"content": "</think>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
}
|
||||
},
|
||||
"additional_special_tokens": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|object_ref_start|>",
|
||||
"<|object_ref_end|>",
|
||||
"<|box_start|>",
|
||||
"<|box_end|>",
|
||||
"<|quad_start|>",
|
||||
"<|quad_end|>",
|
||||
"<|vision_start|>",
|
||||
"<|vision_end|>",
|
||||
"<|vision_pad|>",
|
||||
"<|image_pad|>",
|
||||
"<|video_pad|>"
|
||||
],
|
||||
"bos_token": null,
|
||||
"clean_up_tokenization_spaces": false,
|
||||
"eos_token": "<|im_end|>",
|
||||
"errors": "replace",
|
||||
"extra_special_tokens": {},
|
||||
"model_max_length": 32768,
|
||||
"pad_token": "<|endoftext|>",
|
||||
"padding_side": "right",
|
||||
"split_special_tokens": false,
|
||||
"tokenizer_class": "Qwen2Tokenizer",
|
||||
"unk_token": null
|
||||
}
|
||||
16
train_results.json
Normal file
16
train_results.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"achieved_tflops_per_gpu": 5.823045638802442,
|
||||
"achieved_tflops_per_gpu_theoretical": 260.86865133738394,
|
||||
"epoch": 7.0,
|
||||
"loss_nan_ranks": 0,
|
||||
"loss_rank_avg": 0.17524641752243042,
|
||||
"mfu_percent": 0.5887811566028758,
|
||||
"mfu_percent_theoretical": 26.377012268694028,
|
||||
"total_flos": 2.557069172618035e+18,
|
||||
"train_loss": 0.2496959186285973,
|
||||
"train_runtime": 54891.1457,
|
||||
"train_samples_per_second": 1.235,
|
||||
"train_steps_per_second": 0.077,
|
||||
"valid_targets_mean": 4057.6,
|
||||
"valid_targets_min": 3531
|
||||
}
|
||||
849
trainer_log.jsonl
Normal file
849
trainer_log.jsonl
Normal file
@@ -0,0 +1,849 @@
|
||||
{"current_steps": 5, "total_steps": 4242, "loss": 0.8754, "lr": 3.764705882352941e-07, "epoch": 0.008257638315441783, "percentage": 0.12, "elapsed_time": "0:01:23", "remaining_time": "19:45:01"}
|
||||
{"current_steps": 10, "total_steps": 4242, "loss": 0.8797, "lr": 8.470588235294118e-07, "epoch": 0.016515276630883566, "percentage": 0.24, "elapsed_time": "0:02:29", "remaining_time": "17:32:36"}
|
||||
{"current_steps": 15, "total_steps": 4242, "loss": 0.8466, "lr": 1.3176470588235296e-06, "epoch": 0.02477291494632535, "percentage": 0.35, "elapsed_time": "0:03:35", "remaining_time": "16:52:48"}
|
||||
{"current_steps": 20, "total_steps": 4242, "loss": 0.8153, "lr": 1.7882352941176474e-06, "epoch": 0.03303055326176713, "percentage": 0.47, "elapsed_time": "0:04:37", "remaining_time": "16:16:38"}
|
||||
{"current_steps": 25, "total_steps": 4242, "loss": 0.7169, "lr": 2.258823529411765e-06, "epoch": 0.04128819157720892, "percentage": 0.59, "elapsed_time": "0:05:44", "remaining_time": "16:09:20"}
|
||||
{"current_steps": 30, "total_steps": 4242, "loss": 0.6578, "lr": 2.7294117647058825e-06, "epoch": 0.0495458298926507, "percentage": 0.71, "elapsed_time": "0:06:56", "remaining_time": "16:14:06"}
|
||||
{"current_steps": 35, "total_steps": 4242, "loss": 0.6295, "lr": 3.2000000000000003e-06, "epoch": 0.057803468208092484, "percentage": 0.83, "elapsed_time": "0:08:01", "remaining_time": "16:04:49"}
|
||||
{"current_steps": 40, "total_steps": 4242, "loss": 0.5766, "lr": 3.670588235294118e-06, "epoch": 0.06606110652353427, "percentage": 0.94, "elapsed_time": "0:09:03", "remaining_time": "15:52:21"}
|
||||
{"current_steps": 45, "total_steps": 4242, "loss": 0.5631, "lr": 4.141176470588235e-06, "epoch": 0.07431874483897605, "percentage": 1.06, "elapsed_time": "0:10:07", "remaining_time": "15:44:56"}
|
||||
{"current_steps": 50, "total_steps": 4242, "loss": 0.5418, "lr": 4.611764705882353e-06, "epoch": 0.08257638315441784, "percentage": 1.18, "elapsed_time": "0:11:15", "remaining_time": "15:44:03"}
|
||||
{"current_steps": 55, "total_steps": 4242, "loss": 0.5304, "lr": 5.08235294117647e-06, "epoch": 0.09083402146985962, "percentage": 1.3, "elapsed_time": "0:12:20", "remaining_time": "15:39:13"}
|
||||
{"current_steps": 60, "total_steps": 4242, "loss": 0.5188, "lr": 5.552941176470589e-06, "epoch": 0.0990916597853014, "percentage": 1.41, "elapsed_time": "0:13:31", "remaining_time": "15:42:09"}
|
||||
{"current_steps": 65, "total_steps": 4242, "loss": 0.4941, "lr": 6.023529411764706e-06, "epoch": 0.10734929810074319, "percentage": 1.53, "elapsed_time": "0:14:34", "remaining_time": "15:36:09"}
|
||||
{"current_steps": 70, "total_steps": 4242, "loss": 0.48, "lr": 6.494117647058824e-06, "epoch": 0.11560693641618497, "percentage": 1.65, "elapsed_time": "0:15:41", "remaining_time": "15:35:20"}
|
||||
{"current_steps": 75, "total_steps": 4242, "loss": 0.4493, "lr": 6.964705882352941e-06, "epoch": 0.12386457473162675, "percentage": 1.77, "elapsed_time": "0:16:48", "remaining_time": "15:33:56"}
|
||||
{"current_steps": 80, "total_steps": 4242, "loss": 0.4565, "lr": 7.43529411764706e-06, "epoch": 0.13212221304706853, "percentage": 1.89, "elapsed_time": "0:17:51", "remaining_time": "15:28:58"}
|
||||
{"current_steps": 85, "total_steps": 4242, "loss": 0.4452, "lr": 7.905882352941176e-06, "epoch": 0.14037985136251033, "percentage": 2.0, "elapsed_time": "0:18:52", "remaining_time": "15:23:18"}
|
||||
{"current_steps": 90, "total_steps": 4242, "loss": 0.4177, "lr": 8.376470588235295e-06, "epoch": 0.1486374896779521, "percentage": 2.12, "elapsed_time": "0:20:01", "remaining_time": "15:23:55"}
|
||||
{"current_steps": 95, "total_steps": 4242, "loss": 0.3958, "lr": 8.847058823529413e-06, "epoch": 0.1568951279933939, "percentage": 2.24, "elapsed_time": "0:21:03", "remaining_time": "15:19:00"}
|
||||
{"current_steps": 100, "total_steps": 4242, "loss": 0.3986, "lr": 9.31764705882353e-06, "epoch": 0.16515276630883569, "percentage": 2.36, "elapsed_time": "0:22:06", "remaining_time": "15:15:41"}
|
||||
{"current_steps": 105, "total_steps": 4242, "loss": 0.399, "lr": 9.788235294117649e-06, "epoch": 0.17341040462427745, "percentage": 2.48, "elapsed_time": "0:23:12", "remaining_time": "15:14:10"}
|
||||
{"current_steps": 110, "total_steps": 4242, "loss": 0.3923, "lr": 1.0258823529411766e-05, "epoch": 0.18166804293971925, "percentage": 2.59, "elapsed_time": "0:24:20", "remaining_time": "15:14:07"}
|
||||
{"current_steps": 115, "total_steps": 4242, "loss": 0.3778, "lr": 1.0729411764705884e-05, "epoch": 0.18992568125516102, "percentage": 2.71, "elapsed_time": "0:25:31", "remaining_time": "15:16:08"}
|
||||
{"current_steps": 120, "total_steps": 4242, "loss": 0.3782, "lr": 1.1200000000000001e-05, "epoch": 0.1981833195706028, "percentage": 2.83, "elapsed_time": "0:26:34", "remaining_time": "15:12:39"}
|
||||
{"current_steps": 125, "total_steps": 4242, "loss": 0.3699, "lr": 1.1670588235294118e-05, "epoch": 0.20644095788604458, "percentage": 2.95, "elapsed_time": "0:27:36", "remaining_time": "15:09:19"}
|
||||
{"current_steps": 130, "total_steps": 4242, "loss": 0.3638, "lr": 1.2141176470588237e-05, "epoch": 0.21469859620148637, "percentage": 3.06, "elapsed_time": "0:28:46", "remaining_time": "15:10:19"}
|
||||
{"current_steps": 135, "total_steps": 4242, "loss": 0.3598, "lr": 1.2611764705882354e-05, "epoch": 0.22295623451692817, "percentage": 3.18, "elapsed_time": "0:29:47", "remaining_time": "15:06:06"}
|
||||
{"current_steps": 140, "total_steps": 4242, "loss": 0.3496, "lr": 1.3082352941176472e-05, "epoch": 0.23121387283236994, "percentage": 3.3, "elapsed_time": "0:30:53", "remaining_time": "15:05:00"}
|
||||
{"current_steps": 145, "total_steps": 4242, "loss": 0.3644, "lr": 1.355294117647059e-05, "epoch": 0.23947151114781173, "percentage": 3.42, "elapsed_time": "0:31:51", "remaining_time": "15:00:07"}
|
||||
{"current_steps": 150, "total_steps": 4242, "loss": 0.3529, "lr": 1.4023529411764706e-05, "epoch": 0.2477291494632535, "percentage": 3.54, "elapsed_time": "0:32:57", "remaining_time": "14:59:15"}
|
||||
{"current_steps": 155, "total_steps": 4242, "loss": 0.3416, "lr": 1.4494117647058825e-05, "epoch": 0.25598678777869527, "percentage": 3.65, "elapsed_time": "0:33:56", "remaining_time": "14:55:08"}
|
||||
{"current_steps": 160, "total_steps": 4242, "loss": 0.3444, "lr": 1.4964705882352943e-05, "epoch": 0.26424442609413706, "percentage": 3.77, "elapsed_time": "0:35:04", "remaining_time": "14:54:56"}
|
||||
{"current_steps": 165, "total_steps": 4242, "loss": 0.3373, "lr": 1.543529411764706e-05, "epoch": 0.27250206440957886, "percentage": 3.89, "elapsed_time": "0:36:05", "remaining_time": "14:51:37"}
|
||||
{"current_steps": 170, "total_steps": 4242, "loss": 0.3343, "lr": 1.5905882352941177e-05, "epoch": 0.28075970272502065, "percentage": 4.01, "elapsed_time": "0:37:17", "remaining_time": "14:53:10"}
|
||||
{"current_steps": 175, "total_steps": 4242, "loss": 0.3571, "lr": 1.6376470588235298e-05, "epoch": 0.28901734104046245, "percentage": 4.13, "elapsed_time": "0:38:28", "remaining_time": "14:54:08"}
|
||||
{"current_steps": 180, "total_steps": 4242, "loss": 0.3279, "lr": 1.6847058823529414e-05, "epoch": 0.2972749793559042, "percentage": 4.24, "elapsed_time": "0:39:33", "remaining_time": "14:52:37"}
|
||||
{"current_steps": 185, "total_steps": 4242, "loss": 0.3419, "lr": 1.731764705882353e-05, "epoch": 0.305532617671346, "percentage": 4.36, "elapsed_time": "0:40:41", "remaining_time": "14:52:13"}
|
||||
{"current_steps": 190, "total_steps": 4242, "loss": 0.347, "lr": 1.778823529411765e-05, "epoch": 0.3137902559867878, "percentage": 4.48, "elapsed_time": "0:41:53", "remaining_time": "14:53:27"}
|
||||
{"current_steps": 195, "total_steps": 4242, "loss": 0.3334, "lr": 1.8258823529411765e-05, "epoch": 0.3220478943022296, "percentage": 4.6, "elapsed_time": "0:43:04", "remaining_time": "14:54:04"}
|
||||
{"current_steps": 200, "total_steps": 4242, "loss": 0.3274, "lr": 1.8729411764705886e-05, "epoch": 0.33030553261767137, "percentage": 4.71, "elapsed_time": "0:44:07", "remaining_time": "14:51:54"}
|
||||
{"current_steps": 205, "total_steps": 4242, "loss": 0.3379, "lr": 1.9200000000000003e-05, "epoch": 0.3385631709331131, "percentage": 4.83, "elapsed_time": "0:45:20", "remaining_time": "14:52:44"}
|
||||
{"current_steps": 210, "total_steps": 4242, "loss": 0.3291, "lr": 1.967058823529412e-05, "epoch": 0.3468208092485549, "percentage": 4.95, "elapsed_time": "0:46:24", "remaining_time": "14:51:02"}
|
||||
{"current_steps": 215, "total_steps": 4242, "loss": 0.3246, "lr": 2.0141176470588236e-05, "epoch": 0.3550784475639967, "percentage": 5.07, "elapsed_time": "0:47:28", "remaining_time": "14:49:17"}
|
||||
{"current_steps": 220, "total_steps": 4242, "loss": 0.3214, "lr": 2.0611764705882353e-05, "epoch": 0.3633360858794385, "percentage": 5.19, "elapsed_time": "0:48:36", "remaining_time": "14:48:39"}
|
||||
{"current_steps": 225, "total_steps": 4242, "loss": 0.3155, "lr": 2.1082352941176474e-05, "epoch": 0.37159372419488024, "percentage": 5.3, "elapsed_time": "0:49:44", "remaining_time": "14:48:07"}
|
||||
{"current_steps": 230, "total_steps": 4242, "loss": 0.3175, "lr": 2.155294117647059e-05, "epoch": 0.37985136251032203, "percentage": 5.42, "elapsed_time": "0:50:51", "remaining_time": "14:47:06"}
|
||||
{"current_steps": 235, "total_steps": 4242, "loss": 0.3243, "lr": 2.2023529411764707e-05, "epoch": 0.38810900082576383, "percentage": 5.54, "elapsed_time": "0:51:58", "remaining_time": "14:46:09"}
|
||||
{"current_steps": 240, "total_steps": 4242, "loss": 0.3262, "lr": 2.2494117647058828e-05, "epoch": 0.3963666391412056, "percentage": 5.66, "elapsed_time": "0:53:05", "remaining_time": "14:45:19"}
|
||||
{"current_steps": 245, "total_steps": 4242, "loss": 0.3352, "lr": 2.296470588235294e-05, "epoch": 0.4046242774566474, "percentage": 5.78, "elapsed_time": "0:54:13", "remaining_time": "14:44:45"}
|
||||
{"current_steps": 250, "total_steps": 4242, "loss": 0.3177, "lr": 2.343529411764706e-05, "epoch": 0.41288191577208916, "percentage": 5.89, "elapsed_time": "0:55:19", "remaining_time": "14:43:26"}
|
||||
{"current_steps": 255, "total_steps": 4242, "loss": 0.3113, "lr": 2.390588235294118e-05, "epoch": 0.42113955408753095, "percentage": 6.01, "elapsed_time": "0:56:22", "remaining_time": "14:41:29"}
|
||||
{"current_steps": 260, "total_steps": 4242, "loss": 0.31, "lr": 2.4376470588235296e-05, "epoch": 0.42939719240297275, "percentage": 6.13, "elapsed_time": "0:57:25", "remaining_time": "14:39:25"}
|
||||
{"current_steps": 265, "total_steps": 4242, "loss": 0.3117, "lr": 2.4847058823529416e-05, "epoch": 0.43765483071841454, "percentage": 6.25, "elapsed_time": "0:58:25", "remaining_time": "14:36:55"}
|
||||
{"current_steps": 270, "total_steps": 4242, "loss": 0.3193, "lr": 2.5317647058823533e-05, "epoch": 0.44591246903385634, "percentage": 6.36, "elapsed_time": "0:59:34", "remaining_time": "14:36:31"}
|
||||
{"current_steps": 275, "total_steps": 4242, "loss": 0.3125, "lr": 2.5788235294117646e-05, "epoch": 0.4541701073492981, "percentage": 6.48, "elapsed_time": "1:00:43", "remaining_time": "14:36:03"}
|
||||
{"current_steps": 280, "total_steps": 4242, "loss": 0.3145, "lr": 2.6258823529411767e-05, "epoch": 0.4624277456647399, "percentage": 6.6, "elapsed_time": "1:01:51", "remaining_time": "14:35:14"}
|
||||
{"current_steps": 285, "total_steps": 4242, "loss": 0.3229, "lr": 2.6729411764705884e-05, "epoch": 0.47068538398018167, "percentage": 6.72, "elapsed_time": "1:03:00", "remaining_time": "14:34:47"}
|
||||
{"current_steps": 290, "total_steps": 4242, "loss": 0.3214, "lr": 2.7200000000000004e-05, "epoch": 0.47894302229562347, "percentage": 6.84, "elapsed_time": "1:04:02", "remaining_time": "14:32:43"}
|
||||
{"current_steps": 295, "total_steps": 4242, "loss": 0.3077, "lr": 2.767058823529412e-05, "epoch": 0.48720066061106526, "percentage": 6.95, "elapsed_time": "1:05:05", "remaining_time": "14:30:54"}
|
||||
{"current_steps": 300, "total_steps": 4242, "loss": 0.304, "lr": 2.8141176470588238e-05, "epoch": 0.495458298926507, "percentage": 7.07, "elapsed_time": "1:06:10", "remaining_time": "14:29:31"}
|
||||
{"current_steps": 305, "total_steps": 4242, "loss": 0.3177, "lr": 2.8611764705882358e-05, "epoch": 0.5037159372419489, "percentage": 7.19, "elapsed_time": "1:07:17", "remaining_time": "14:28:41"}
|
||||
{"current_steps": 310, "total_steps": 4242, "loss": 0.3134, "lr": 2.908235294117647e-05, "epoch": 0.5119735755573905, "percentage": 7.31, "elapsed_time": "1:08:26", "remaining_time": "14:28:06"}
|
||||
{"current_steps": 315, "total_steps": 4242, "loss": 0.3032, "lr": 2.955294117647059e-05, "epoch": 0.5202312138728323, "percentage": 7.43, "elapsed_time": "1:09:37", "remaining_time": "14:27:53"}
|
||||
{"current_steps": 320, "total_steps": 4242, "loss": 0.3003, "lr": 3.002352941176471e-05, "epoch": 0.5284888521882741, "percentage": 7.54, "elapsed_time": "1:10:42", "remaining_time": "14:26:35"}
|
||||
{"current_steps": 325, "total_steps": 4242, "loss": 0.3041, "lr": 3.0494117647058826e-05, "epoch": 0.5367464905037159, "percentage": 7.66, "elapsed_time": "1:11:50", "remaining_time": "14:25:55"}
|
||||
{"current_steps": 330, "total_steps": 4242, "loss": 0.3028, "lr": 3.0964705882352946e-05, "epoch": 0.5450041288191577, "percentage": 7.78, "elapsed_time": "1:12:54", "remaining_time": "14:24:16"}
|
||||
{"current_steps": 335, "total_steps": 4242, "loss": 0.2985, "lr": 3.143529411764706e-05, "epoch": 0.5532617671345995, "percentage": 7.9, "elapsed_time": "1:14:00", "remaining_time": "14:23:06"}
|
||||
{"current_steps": 340, "total_steps": 4242, "loss": 0.3087, "lr": 3.190588235294118e-05, "epoch": 0.5615194054500413, "percentage": 8.02, "elapsed_time": "1:15:04", "remaining_time": "14:21:32"}
|
||||
{"current_steps": 345, "total_steps": 4242, "loss": 0.311, "lr": 3.23764705882353e-05, "epoch": 0.5697770437654831, "percentage": 8.13, "elapsed_time": "1:16:11", "remaining_time": "14:20:40"}
|
||||
{"current_steps": 350, "total_steps": 4242, "loss": 0.3101, "lr": 3.2847058823529414e-05, "epoch": 0.5780346820809249, "percentage": 8.25, "elapsed_time": "1:17:18", "remaining_time": "14:19:44"}
|
||||
{"current_steps": 355, "total_steps": 4242, "loss": 0.3077, "lr": 3.331764705882353e-05, "epoch": 0.5862923203963666, "percentage": 8.37, "elapsed_time": "1:18:23", "remaining_time": "14:18:14"}
|
||||
{"current_steps": 360, "total_steps": 4242, "loss": 0.3052, "lr": 3.378823529411765e-05, "epoch": 0.5945499587118084, "percentage": 8.49, "elapsed_time": "1:19:22", "remaining_time": "14:15:56"}
|
||||
{"current_steps": 365, "total_steps": 4242, "loss": 0.2946, "lr": 3.425882352941177e-05, "epoch": 0.6028075970272502, "percentage": 8.6, "elapsed_time": "1:20:19", "remaining_time": "14:13:13"}
|
||||
{"current_steps": 370, "total_steps": 4242, "loss": 0.3079, "lr": 3.472941176470589e-05, "epoch": 0.611065235342692, "percentage": 8.72, "elapsed_time": "1:21:26", "remaining_time": "14:12:17"}
|
||||
{"current_steps": 375, "total_steps": 4242, "loss": 0.3048, "lr": 3.52e-05, "epoch": 0.6193228736581338, "percentage": 8.84, "elapsed_time": "1:22:35", "remaining_time": "14:11:43"}
|
||||
{"current_steps": 380, "total_steps": 4242, "loss": 0.2906, "lr": 3.567058823529412e-05, "epoch": 0.6275805119735756, "percentage": 8.96, "elapsed_time": "1:23:40", "remaining_time": "14:10:27"}
|
||||
{"current_steps": 385, "total_steps": 4242, "loss": 0.2932, "lr": 3.614117647058824e-05, "epoch": 0.6358381502890174, "percentage": 9.08, "elapsed_time": "1:24:45", "remaining_time": "14:09:07"}
|
||||
{"current_steps": 390, "total_steps": 4242, "loss": 0.2886, "lr": 3.6611764705882356e-05, "epoch": 0.6440957886044592, "percentage": 9.19, "elapsed_time": "1:25:48", "remaining_time": "14:07:35"}
|
||||
{"current_steps": 395, "total_steps": 4242, "loss": 0.2992, "lr": 3.708235294117647e-05, "epoch": 0.652353426919901, "percentage": 9.31, "elapsed_time": "1:26:58", "remaining_time": "14:07:08"}
|
||||
{"current_steps": 400, "total_steps": 4242, "loss": 0.2985, "lr": 3.755294117647059e-05, "epoch": 0.6606110652353427, "percentage": 9.43, "elapsed_time": "1:28:00", "remaining_time": "14:05:18"}
|
||||
{"current_steps": 405, "total_steps": 4242, "loss": 0.2945, "lr": 3.802352941176471e-05, "epoch": 0.6688687035507844, "percentage": 9.55, "elapsed_time": "1:29:03", "remaining_time": "14:03:45"}
|
||||
{"current_steps": 410, "total_steps": 4242, "loss": 0.2947, "lr": 3.849411764705883e-05, "epoch": 0.6771263418662262, "percentage": 9.67, "elapsed_time": "1:30:06", "remaining_time": "14:02:08"}
|
||||
{"current_steps": 415, "total_steps": 4242, "loss": 0.2898, "lr": 3.8964705882352944e-05, "epoch": 0.685383980181668, "percentage": 9.78, "elapsed_time": "1:31:08", "remaining_time": "14:00:24"}
|
||||
{"current_steps": 420, "total_steps": 4242, "loss": 0.2916, "lr": 3.943529411764706e-05, "epoch": 0.6936416184971098, "percentage": 9.9, "elapsed_time": "1:32:10", "remaining_time": "13:58:51"}
|
||||
{"current_steps": 425, "total_steps": 4242, "loss": 0.2843, "lr": 3.990588235294118e-05, "epoch": 0.7018992568125516, "percentage": 10.02, "elapsed_time": "1:33:12", "remaining_time": "13:57:04"}
|
||||
{"current_steps": 430, "total_steps": 4242, "loss": 0.2947, "lr": 3.9999891613544045e-05, "epoch": 0.7101568951279934, "percentage": 10.14, "elapsed_time": "1:34:21", "remaining_time": "13:56:25"}
|
||||
{"current_steps": 435, "total_steps": 4242, "loss": 0.2995, "lr": 3.999945129558009e-05, "epoch": 0.7184145334434352, "percentage": 10.25, "elapsed_time": "1:35:18", "remaining_time": "13:54:09"}
|
||||
{"current_steps": 440, "total_steps": 4242, "loss": 0.2995, "lr": 3.9998672279405856e-05, "epoch": 0.726672171758877, "percentage": 10.37, "elapsed_time": "1:36:28", "remaining_time": "13:53:36"}
|
||||
{"current_steps": 445, "total_steps": 4242, "loss": 0.2915, "lr": 3.999755457821429e-05, "epoch": 0.7349298100743188, "percentage": 10.49, "elapsed_time": "1:37:31", "remaining_time": "13:52:10"}
|
||||
{"current_steps": 450, "total_steps": 4242, "loss": 0.2942, "lr": 3.999609821093405e-05, "epoch": 0.7431874483897605, "percentage": 10.61, "elapsed_time": "1:38:34", "remaining_time": "13:50:41"}
|
||||
{"current_steps": 455, "total_steps": 4242, "loss": 0.2988, "lr": 3.9994303202229285e-05, "epoch": 0.7514450867052023, "percentage": 10.73, "elapsed_time": "1:39:36", "remaining_time": "13:49:00"}
|
||||
{"current_steps": 460, "total_steps": 4242, "loss": 0.2923, "lr": 3.999216958249914e-05, "epoch": 0.7597027250206441, "percentage": 10.84, "elapsed_time": "1:40:43", "remaining_time": "13:48:07"}
|
||||
{"current_steps": 465, "total_steps": 4242, "loss": 0.2942, "lr": 3.9989697387877264e-05, "epoch": 0.7679603633360859, "percentage": 10.96, "elapsed_time": "1:41:47", "remaining_time": "13:46:50"}
|
||||
{"current_steps": 470, "total_steps": 4242, "loss": 0.2848, "lr": 3.9986886660231184e-05, "epoch": 0.7762180016515277, "percentage": 11.08, "elapsed_time": "1:42:46", "remaining_time": "13:44:49"}
|
||||
{"current_steps": 475, "total_steps": 4242, "loss": 0.2772, "lr": 3.998373744716165e-05, "epoch": 0.7844756399669695, "percentage": 11.2, "elapsed_time": "1:43:54", "remaining_time": "13:44:04"}
|
||||
{"current_steps": 480, "total_steps": 4242, "loss": 0.2922, "lr": 3.9980249802001756e-05, "epoch": 0.7927332782824112, "percentage": 11.32, "elapsed_time": "1:45:01", "remaining_time": "13:43:11"}
|
||||
{"current_steps": 485, "total_steps": 4242, "loss": 0.2849, "lr": 3.997642378381608e-05, "epoch": 0.800990916597853, "percentage": 11.43, "elapsed_time": "1:46:05", "remaining_time": "13:41:47"}
|
||||
{"current_steps": 490, "total_steps": 4242, "loss": 0.291, "lr": 3.997225945739968e-05, "epoch": 0.8092485549132948, "percentage": 11.55, "elapsed_time": "1:47:11", "remaining_time": "13:40:43"}
|
||||
{"current_steps": 495, "total_steps": 4242, "loss": 0.2893, "lr": 3.996775689327698e-05, "epoch": 0.8175061932287366, "percentage": 11.67, "elapsed_time": "1:48:14", "remaining_time": "13:39:21"}
|
||||
{"current_steps": 500, "total_steps": 4242, "loss": 0.2887, "lr": 3.99629161677006e-05, "epoch": 0.8257638315441783, "percentage": 11.79, "elapsed_time": "1:49:22", "remaining_time": "13:38:30"}
|
||||
{"current_steps": 505, "total_steps": 4242, "loss": 0.2798, "lr": 3.9957737362650034e-05, "epoch": 0.8340214698596201, "percentage": 11.9, "elapsed_time": "1:50:25", "remaining_time": "13:37:06"}
|
||||
{"current_steps": 510, "total_steps": 4242, "loss": 0.286, "lr": 3.99522205658303e-05, "epoch": 0.8422791081750619, "percentage": 12.02, "elapsed_time": "1:51:33", "remaining_time": "13:36:19"}
|
||||
{"current_steps": 515, "total_steps": 4242, "loss": 0.2896, "lr": 3.9946365870670417e-05, "epoch": 0.8505367464905037, "percentage": 12.14, "elapsed_time": "1:52:39", "remaining_time": "13:35:16"}
|
||||
{"current_steps": 520, "total_steps": 4242, "loss": 0.2828, "lr": 3.994017337632184e-05, "epoch": 0.8587943848059455, "percentage": 12.26, "elapsed_time": "1:53:47", "remaining_time": "13:34:32"}
|
||||
{"current_steps": 525, "total_steps": 4242, "loss": 0.2847, "lr": 3.9933643187656786e-05, "epoch": 0.8670520231213873, "percentage": 12.38, "elapsed_time": "1:54:52", "remaining_time": "13:33:19"}
|
||||
{"current_steps": 530, "total_steps": 4242, "loss": 0.2891, "lr": 3.992677541526645e-05, "epoch": 0.8753096614368291, "percentage": 12.49, "elapsed_time": "1:56:01", "remaining_time": "13:32:37"}
|
||||
{"current_steps": 535, "total_steps": 4242, "loss": 0.2837, "lr": 3.9919570175459136e-05, "epoch": 0.8835672997522709, "percentage": 12.61, "elapsed_time": "1:57:09", "remaining_time": "13:31:46"}
|
||||
{"current_steps": 540, "total_steps": 4242, "loss": 0.2852, "lr": 3.991202759025828e-05, "epoch": 0.8918249380677127, "percentage": 12.73, "elapsed_time": "1:58:15", "remaining_time": "13:30:44"}
|
||||
{"current_steps": 545, "total_steps": 4242, "loss": 0.2809, "lr": 3.990414778740038e-05, "epoch": 0.9000825763831544, "percentage": 12.85, "elapsed_time": "1:59:24", "remaining_time": "13:30:01"}
|
||||
{"current_steps": 550, "total_steps": 4242, "loss": 0.2795, "lr": 3.989593090033288e-05, "epoch": 0.9083402146985962, "percentage": 12.97, "elapsed_time": "2:00:35", "remaining_time": "13:29:31"}
|
||||
{"current_steps": 555, "total_steps": 4242, "loss": 0.2768, "lr": 3.988737706821184e-05, "epoch": 0.916597853014038, "percentage": 13.08, "elapsed_time": "2:01:42", "remaining_time": "13:28:31"}
|
||||
{"current_steps": 560, "total_steps": 4242, "loss": 0.2885, "lr": 3.987848643589959e-05, "epoch": 0.9248554913294798, "percentage": 13.2, "elapsed_time": "2:02:42", "remaining_time": "13:26:47"}
|
||||
{"current_steps": 565, "total_steps": 4242, "loss": 0.2928, "lr": 3.986925915396235e-05, "epoch": 0.9331131296449215, "percentage": 13.32, "elapsed_time": "2:03:57", "remaining_time": "13:26:40"}
|
||||
{"current_steps": 570, "total_steps": 4242, "loss": 0.2769, "lr": 3.9859695378667584e-05, "epoch": 0.9413707679603633, "percentage": 13.44, "elapsed_time": "2:05:00", "remaining_time": "13:25:16"}
|
||||
{"current_steps": 575, "total_steps": 4242, "loss": 0.2724, "lr": 3.984979527198143e-05, "epoch": 0.9496284062758051, "percentage": 13.55, "elapsed_time": "2:06:08", "remaining_time": "13:24:27"}
|
||||
{"current_steps": 580, "total_steps": 4242, "loss": 0.2811, "lr": 3.98395590015659e-05, "epoch": 0.9578860445912469, "percentage": 13.67, "elapsed_time": "2:07:15", "remaining_time": "13:23:30"}
|
||||
{"current_steps": 585, "total_steps": 4242, "loss": 0.2757, "lr": 3.982898674077607e-05, "epoch": 0.9661436829066887, "percentage": 13.79, "elapsed_time": "2:08:20", "remaining_time": "13:22:18"}
|
||||
{"current_steps": 590, "total_steps": 4242, "loss": 0.2844, "lr": 3.981807866865715e-05, "epoch": 0.9744013212221305, "percentage": 13.91, "elapsed_time": "2:09:27", "remaining_time": "13:21:19"}
|
||||
{"current_steps": 595, "total_steps": 4242, "loss": 0.2859, "lr": 3.980683496994143e-05, "epoch": 0.9826589595375722, "percentage": 14.03, "elapsed_time": "2:10:33", "remaining_time": "13:20:15"}
|
||||
{"current_steps": 600, "total_steps": 4242, "loss": 0.2931, "lr": 3.9795255835045166e-05, "epoch": 0.990916597853014, "percentage": 14.14, "elapsed_time": "2:11:38", "remaining_time": "13:19:01"}
|
||||
{"current_steps": 605, "total_steps": 4242, "loss": 0.281, "lr": 3.9783341460065343e-05, "epoch": 0.9991742361684558, "percentage": 14.26, "elapsed_time": "2:12:42", "remaining_time": "13:17:45"}
|
||||
{"current_steps": 610, "total_steps": 4242, "loss": 0.2681, "lr": 3.977109204677639e-05, "epoch": 1.0066061106523534, "percentage": 14.38, "elapsed_time": "2:13:46", "remaining_time": "13:16:31"}
|
||||
{"current_steps": 615, "total_steps": 4242, "loss": 0.2727, "lr": 3.9758507802626704e-05, "epoch": 1.0148637489677952, "percentage": 14.5, "elapsed_time": "2:14:49", "remaining_time": "13:15:09"}
|
||||
{"current_steps": 620, "total_steps": 4242, "loss": 0.2768, "lr": 3.9745588940735176e-05, "epoch": 1.023121387283237, "percentage": 14.62, "elapsed_time": "2:15:51", "remaining_time": "13:13:40"}
|
||||
{"current_steps": 625, "total_steps": 4242, "loss": 0.266, "lr": 3.973233567988759e-05, "epoch": 1.0313790255986788, "percentage": 14.73, "elapsed_time": "2:16:51", "remaining_time": "13:12:01"}
|
||||
{"current_steps": 630, "total_steps": 4242, "loss": 0.2811, "lr": 3.9718748244532883e-05, "epoch": 1.0396366639141206, "percentage": 14.85, "elapsed_time": "2:17:55", "remaining_time": "13:10:45"}
|
||||
{"current_steps": 635, "total_steps": 4242, "loss": 0.2664, "lr": 3.970482686477937e-05, "epoch": 1.0478943022295624, "percentage": 14.97, "elapsed_time": "2:18:59", "remaining_time": "13:09:29"}
|
||||
{"current_steps": 640, "total_steps": 4242, "loss": 0.2707, "lr": 3.969057177639084e-05, "epoch": 1.0561519405450042, "percentage": 15.09, "elapsed_time": "2:20:01", "remaining_time": "13:08:05"}
|
||||
{"current_steps": 645, "total_steps": 4242, "loss": 0.2779, "lr": 3.967598322078257e-05, "epoch": 1.064409578860446, "percentage": 15.21, "elapsed_time": "2:21:05", "remaining_time": "13:06:51"}
|
||||
{"current_steps": 650, "total_steps": 4242, "loss": 0.2687, "lr": 3.966106144501721e-05, "epoch": 1.0726672171758878, "percentage": 15.32, "elapsed_time": "2:22:02", "remaining_time": "13:04:54"}
|
||||
{"current_steps": 655, "total_steps": 4242, "loss": 0.2664, "lr": 3.964580670180063e-05, "epoch": 1.0809248554913296, "percentage": 15.44, "elapsed_time": "2:23:06", "remaining_time": "13:03:44"}
|
||||
{"current_steps": 660, "total_steps": 4242, "loss": 0.2715, "lr": 3.9630219249477655e-05, "epoch": 1.0891824938067713, "percentage": 15.56, "elapsed_time": "2:24:11", "remaining_time": "13:02:31"}
|
||||
{"current_steps": 665, "total_steps": 4242, "loss": 0.2758, "lr": 3.96142993520276e-05, "epoch": 1.0974401321222131, "percentage": 15.68, "elapsed_time": "2:25:19", "remaining_time": "13:01:40"}
|
||||
{"current_steps": 670, "total_steps": 4242, "loss": 0.2664, "lr": 3.959804727905992e-05, "epoch": 1.1056977704376547, "percentage": 15.79, "elapsed_time": "2:26:27", "remaining_time": "13:00:47"}
|
||||
{"current_steps": 675, "total_steps": 4242, "loss": 0.2775, "lr": 3.9581463305809576e-05, "epoch": 1.1139554087530965, "percentage": 15.91, "elapsed_time": "2:27:34", "remaining_time": "12:59:50"}
|
||||
{"current_steps": 680, "total_steps": 4242, "loss": 0.2614, "lr": 3.956454771313236e-05, "epoch": 1.1222130470685383, "percentage": 16.03, "elapsed_time": "2:28:34", "remaining_time": "12:58:17"}
|
||||
{"current_steps": 685, "total_steps": 4242, "loss": 0.2731, "lr": 3.954730078750018e-05, "epoch": 1.13047068538398, "percentage": 16.15, "elapsed_time": "2:29:35", "remaining_time": "12:56:49"}
|
||||
{"current_steps": 690, "total_steps": 4242, "loss": 0.2762, "lr": 3.95297228209962e-05, "epoch": 1.138728323699422, "percentage": 16.27, "elapsed_time": "2:30:38", "remaining_time": "12:55:30"}
|
||||
{"current_steps": 695, "total_steps": 4242, "loss": 0.2692, "lr": 3.9511814111309875e-05, "epoch": 1.1469859620148637, "percentage": 16.38, "elapsed_time": "2:31:41", "remaining_time": "12:54:12"}
|
||||
{"current_steps": 700, "total_steps": 4242, "loss": 0.2667, "lr": 3.949357496173192e-05, "epoch": 1.1552436003303055, "percentage": 16.5, "elapsed_time": "2:32:43", "remaining_time": "12:52:47"}
|
||||
{"current_steps": 705, "total_steps": 4242, "loss": 0.2724, "lr": 3.947500568114918e-05, "epoch": 1.1635012386457473, "percentage": 16.62, "elapsed_time": "2:33:48", "remaining_time": "12:51:41"}
|
||||
{"current_steps": 710, "total_steps": 4242, "loss": 0.2698, "lr": 3.945610658403938e-05, "epoch": 1.171758876961189, "percentage": 16.74, "elapsed_time": "2:34:55", "remaining_time": "12:50:41"}
|
||||
{"current_steps": 715, "total_steps": 4242, "loss": 0.2683, "lr": 3.9436877990465826e-05, "epoch": 1.1800165152766309, "percentage": 16.86, "elapsed_time": "2:35:54", "remaining_time": "12:49:06"}
|
||||
{"current_steps": 720, "total_steps": 4242, "loss": 0.271, "lr": 3.9417320226071956e-05, "epoch": 1.1882741535920727, "percentage": 16.97, "elapsed_time": "2:36:59", "remaining_time": "12:47:58"}
|
||||
{"current_steps": 725, "total_steps": 4242, "loss": 0.2644, "lr": 3.939743362207586e-05, "epoch": 1.1965317919075145, "percentage": 17.09, "elapsed_time": "2:38:04", "remaining_time": "12:46:49"}
|
||||
{"current_steps": 730, "total_steps": 4242, "loss": 0.2581, "lr": 3.937721851526462e-05, "epoch": 1.2047894302229563, "percentage": 17.21, "elapsed_time": "2:39:09", "remaining_time": "12:45:42"}
|
||||
{"current_steps": 735, "total_steps": 4242, "loss": 0.2721, "lr": 3.935667524798866e-05, "epoch": 1.213047068538398, "percentage": 17.33, "elapsed_time": "2:40:14", "remaining_time": "12:44:34"}
|
||||
{"current_steps": 740, "total_steps": 4242, "loss": 0.2607, "lr": 3.933580416815593e-05, "epoch": 1.2213047068538398, "percentage": 17.44, "elapsed_time": "2:41:17", "remaining_time": "12:43:16"}
|
||||
{"current_steps": 745, "total_steps": 4242, "loss": 0.2729, "lr": 3.931460562922598e-05, "epoch": 1.2295623451692816, "percentage": 17.56, "elapsed_time": "2:42:26", "remaining_time": "12:42:28"}
|
||||
{"current_steps": 750, "total_steps": 4242, "loss": 0.2767, "lr": 3.9293079990204034e-05, "epoch": 1.2378199834847234, "percentage": 17.68, "elapsed_time": "2:43:29", "remaining_time": "12:41:13"}
|
||||
{"current_steps": 755, "total_steps": 4242, "loss": 0.2726, "lr": 3.927122761563485e-05, "epoch": 1.2460776218001652, "percentage": 17.8, "elapsed_time": "2:44:33", "remaining_time": "12:39:59"}
|
||||
{"current_steps": 760, "total_steps": 4242, "loss": 0.278, "lr": 3.9249048875596593e-05, "epoch": 1.254335260115607, "percentage": 17.92, "elapsed_time": "2:45:37", "remaining_time": "12:38:50"}
|
||||
{"current_steps": 765, "total_steps": 4242, "loss": 0.2699, "lr": 3.922654414569455e-05, "epoch": 1.2625928984310488, "percentage": 18.03, "elapsed_time": "2:46:38", "remaining_time": "12:37:24"}
|
||||
{"current_steps": 770, "total_steps": 4242, "loss": 0.2631, "lr": 3.9203713807054755e-05, "epoch": 1.2708505367464906, "percentage": 18.15, "elapsed_time": "2:47:47", "remaining_time": "12:36:35"}
|
||||
{"current_steps": 775, "total_steps": 4242, "loss": 0.2585, "lr": 3.918055824631757e-05, "epoch": 1.2791081750619324, "percentage": 18.27, "elapsed_time": "2:48:51", "remaining_time": "12:35:25"}
|
||||
{"current_steps": 780, "total_steps": 4242, "loss": 0.269, "lr": 3.915707785563109e-05, "epoch": 1.287365813377374, "percentage": 18.39, "elapsed_time": "2:49:54", "remaining_time": "12:34:07"}
|
||||
{"current_steps": 785, "total_steps": 4242, "loss": 0.2716, "lr": 3.913327303264456e-05, "epoch": 1.2956234516928158, "percentage": 18.51, "elapsed_time": "2:51:00", "remaining_time": "12:33:05"}
|
||||
{"current_steps": 790, "total_steps": 4242, "loss": 0.266, "lr": 3.9109144180501564e-05, "epoch": 1.3038810900082576, "percentage": 18.62, "elapsed_time": "2:52:09", "remaining_time": "12:32:17"}
|
||||
{"current_steps": 795, "total_steps": 4242, "loss": 0.2651, "lr": 3.908469170783328e-05, "epoch": 1.3121387283236994, "percentage": 18.74, "elapsed_time": "2:53:13", "remaining_time": "12:31:04"}
|
||||
{"current_steps": 800, "total_steps": 4242, "loss": 0.2722, "lr": 3.9059916028751496e-05, "epoch": 1.3203963666391412, "percentage": 18.86, "elapsed_time": "2:54:15", "remaining_time": "12:29:44"}
|
||||
{"current_steps": 805, "total_steps": 4242, "loss": 0.2591, "lr": 3.903481756284164e-05, "epoch": 1.328654004954583, "percentage": 18.98, "elapsed_time": "2:55:16", "remaining_time": "12:28:19"}
|
||||
{"current_steps": 810, "total_steps": 4242, "loss": 0.2707, "lr": 3.900939673515564e-05, "epoch": 1.3369116432700248, "percentage": 19.09, "elapsed_time": "2:56:23", "remaining_time": "12:27:22"}
|
||||
{"current_steps": 815, "total_steps": 4242, "loss": 0.2659, "lr": 3.898365397620475e-05, "epoch": 1.3451692815854666, "percentage": 19.21, "elapsed_time": "2:57:25", "remaining_time": "12:26:04"}
|
||||
{"current_steps": 820, "total_steps": 4242, "loss": 0.2689, "lr": 3.8957589721952254e-05, "epoch": 1.3534269199009084, "percentage": 19.33, "elapsed_time": "2:58:27", "remaining_time": "12:24:43"}
|
||||
{"current_steps": 825, "total_steps": 4242, "loss": 0.2762, "lr": 3.8931204413806076e-05, "epoch": 1.3616845582163501, "percentage": 19.45, "elapsed_time": "2:59:35", "remaining_time": "12:23:49"}
|
||||
{"current_steps": 830, "total_steps": 4242, "loss": 0.2653, "lr": 3.8904498498611306e-05, "epoch": 1.369942196531792, "percentage": 19.57, "elapsed_time": "3:00:35", "remaining_time": "12:22:24"}
|
||||
{"current_steps": 835, "total_steps": 4242, "loss": 0.2698, "lr": 3.8877472428642634e-05, "epoch": 1.3781998348472337, "percentage": 19.68, "elapsed_time": "3:01:41", "remaining_time": "12:21:19"}
|
||||
{"current_steps": 840, "total_steps": 4242, "loss": 0.2614, "lr": 3.885012666159669e-05, "epoch": 1.3864574731626755, "percentage": 19.8, "elapsed_time": "3:02:44", "remaining_time": "12:20:07"}
|
||||
{"current_steps": 845, "total_steps": 4242, "loss": 0.2575, "lr": 3.8822461660584315e-05, "epoch": 1.3947151114781173, "percentage": 19.92, "elapsed_time": "3:03:49", "remaining_time": "12:18:58"}
|
||||
{"current_steps": 850, "total_steps": 4242, "loss": 0.2724, "lr": 3.8794477894122666e-05, "epoch": 1.402972749793559, "percentage": 20.04, "elapsed_time": "3:04:49", "remaining_time": "12:17:33"}
|
||||
{"current_steps": 855, "total_steps": 4242, "loss": 0.2687, "lr": 3.8766175836127323e-05, "epoch": 1.4112303881090007, "percentage": 20.16, "elapsed_time": "3:05:58", "remaining_time": "12:16:45"}
|
||||
{"current_steps": 860, "total_steps": 4242, "loss": 0.2611, "lr": 3.873755596590426e-05, "epoch": 1.4194880264244425, "percentage": 20.27, "elapsed_time": "3:07:06", "remaining_time": "12:15:50"}
|
||||
{"current_steps": 865, "total_steps": 4242, "loss": 0.2593, "lr": 3.8708618768141715e-05, "epoch": 1.4277456647398843, "percentage": 20.39, "elapsed_time": "3:08:14", "remaining_time": "12:14:54"}
|
||||
{"current_steps": 870, "total_steps": 4242, "loss": 0.2724, "lr": 3.867936473290199e-05, "epoch": 1.436003303055326, "percentage": 20.51, "elapsed_time": "3:09:24", "remaining_time": "12:14:07"}
|
||||
{"current_steps": 875, "total_steps": 4242, "loss": 0.2647, "lr": 3.8649794355613136e-05, "epoch": 1.4442609413707679, "percentage": 20.63, "elapsed_time": "3:10:33", "remaining_time": "12:13:15"}
|
||||
{"current_steps": 880, "total_steps": 4242, "loss": 0.2676, "lr": 3.8619908137060605e-05, "epoch": 1.4525185796862097, "percentage": 20.74, "elapsed_time": "3:11:39", "remaining_time": "12:12:15"}
|
||||
{"current_steps": 885, "total_steps": 4242, "loss": 0.2622, "lr": 3.85897065833787e-05, "epoch": 1.4607762180016515, "percentage": 20.86, "elapsed_time": "3:12:47", "remaining_time": "12:11:17"}
|
||||
{"current_steps": 890, "total_steps": 4242, "loss": 0.2728, "lr": 3.855919020604207e-05, "epoch": 1.4690338563170933, "percentage": 20.98, "elapsed_time": "3:13:53", "remaining_time": "12:10:13"}
|
||||
{"current_steps": 895, "total_steps": 4242, "loss": 0.2694, "lr": 3.852835952185702e-05, "epoch": 1.477291494632535, "percentage": 21.1, "elapsed_time": "3:14:56", "remaining_time": "12:09:02"}
|
||||
{"current_steps": 900, "total_steps": 4242, "loss": 0.2659, "lr": 3.849721505295274e-05, "epoch": 1.4855491329479769, "percentage": 21.22, "elapsed_time": "3:16:00", "remaining_time": "12:07:52"}
|
||||
{"current_steps": 905, "total_steps": 4242, "loss": 0.2638, "lr": 3.8465757326772507e-05, "epoch": 1.4938067712634187, "percentage": 21.33, "elapsed_time": "3:17:10", "remaining_time": "12:07:00"}
|
||||
{"current_steps": 910, "total_steps": 4242, "loss": 0.2659, "lr": 3.84339868760647e-05, "epoch": 1.5020644095788604, "percentage": 21.45, "elapsed_time": "3:18:16", "remaining_time": "12:05:58"}
|
||||
{"current_steps": 915, "total_steps": 4242, "loss": 0.2609, "lr": 3.840190423887383e-05, "epoch": 1.5103220478943022, "percentage": 21.57, "elapsed_time": "3:19:18", "remaining_time": "12:04:41"}
|
||||
{"current_steps": 920, "total_steps": 4242, "loss": 0.2776, "lr": 3.8369509958531394e-05, "epoch": 1.518579686209744, "percentage": 21.69, "elapsed_time": "3:20:22", "remaining_time": "12:03:30"}
|
||||
{"current_steps": 925, "total_steps": 4242, "loss": 0.2565, "lr": 3.833680458364668e-05, "epoch": 1.5268373245251858, "percentage": 21.81, "elapsed_time": "3:21:28", "remaining_time": "12:02:27"}
|
||||
{"current_steps": 930, "total_steps": 4242, "loss": 0.2573, "lr": 3.8303788668097486e-05, "epoch": 1.5350949628406276, "percentage": 21.92, "elapsed_time": "3:22:34", "remaining_time": "12:01:23"}
|
||||
{"current_steps": 935, "total_steps": 4242, "loss": 0.2645, "lr": 3.8270462771020714e-05, "epoch": 1.5433526011560694, "percentage": 22.04, "elapsed_time": "3:23:36", "remaining_time": "12:00:09"}
|
||||
{"current_steps": 940, "total_steps": 4242, "loss": 0.2702, "lr": 3.8236827456802944e-05, "epoch": 1.5516102394715112, "percentage": 22.16, "elapsed_time": "3:24:49", "remaining_time": "11:59:28"}
|
||||
{"current_steps": 945, "total_steps": 4242, "loss": 0.2728, "lr": 3.820288329507083e-05, "epoch": 1.559867877786953, "percentage": 22.28, "elapsed_time": "3:25:54", "remaining_time": "11:58:23"}
|
||||
{"current_steps": 950, "total_steps": 4242, "loss": 0.265, "lr": 3.81686308606815e-05, "epoch": 1.5681255161023948, "percentage": 22.4, "elapsed_time": "3:26:51", "remaining_time": "11:56:48"}
|
||||
{"current_steps": 955, "total_steps": 4242, "loss": 0.2628, "lr": 3.813407073371277e-05, "epoch": 1.5763831544178366, "percentage": 22.51, "elapsed_time": "3:27:48", "remaining_time": "11:55:16"}
|
||||
{"current_steps": 960, "total_steps": 4242, "loss": 0.265, "lr": 3.8099203499453354e-05, "epoch": 1.5846407927332784, "percentage": 22.63, "elapsed_time": "3:28:53", "remaining_time": "11:54:09"}
|
||||
{"current_steps": 965, "total_steps": 4242, "loss": 0.2647, "lr": 3.806402974839295e-05, "epoch": 1.5928984310487202, "percentage": 22.75, "elapsed_time": "3:29:58", "remaining_time": "11:53:02"}
|
||||
{"current_steps": 970, "total_steps": 4242, "loss": 0.2708, "lr": 3.8028550076212223e-05, "epoch": 1.601156069364162, "percentage": 22.87, "elapsed_time": "3:30:56", "remaining_time": "11:51:33"}
|
||||
{"current_steps": 975, "total_steps": 4242, "loss": 0.2664, "lr": 3.799276508377273e-05, "epoch": 1.6094137076796038, "percentage": 22.98, "elapsed_time": "3:32:06", "remaining_time": "11:50:42"}
|
||||
{"current_steps": 980, "total_steps": 4242, "loss": 0.2669, "lr": 3.7956675377106746e-05, "epoch": 1.6176713459950454, "percentage": 23.1, "elapsed_time": "3:33:13", "remaining_time": "11:49:44"}
|
||||
{"current_steps": 985, "total_steps": 4242, "loss": 0.2672, "lr": 3.792028156740699e-05, "epoch": 1.6259289843104872, "percentage": 23.22, "elapsed_time": "3:34:23", "remaining_time": "11:48:54"}
|
||||
{"current_steps": 990, "total_steps": 4242, "loss": 0.264, "lr": 3.788358427101629e-05, "epoch": 1.634186622625929, "percentage": 23.34, "elapsed_time": "3:35:31", "remaining_time": "11:47:56"}
|
||||
{"current_steps": 995, "total_steps": 4242, "loss": 0.253, "lr": 3.784658410941711e-05, "epoch": 1.6424442609413707, "percentage": 23.46, "elapsed_time": "3:36:34", "remaining_time": "11:46:45"}
|
||||
{"current_steps": 1000, "total_steps": 4242, "loss": 0.2726, "lr": 3.780928170922108e-05, "epoch": 1.6507018992568125, "percentage": 23.57, "elapsed_time": "3:37:38", "remaining_time": "11:45:35"}
|
||||
{"current_steps": 1005, "total_steps": 4242, "loss": 0.2634, "lr": 3.777167770215834e-05, "epoch": 1.6589595375722543, "percentage": 23.69, "elapsed_time": "3:38:39", "remaining_time": "11:44:17"}
|
||||
{"current_steps": 1010, "total_steps": 4242, "loss": 0.2655, "lr": 3.773377272506685e-05, "epoch": 1.6672171758876961, "percentage": 23.81, "elapsed_time": "3:39:44", "remaining_time": "11:43:10"}
|
||||
{"current_steps": 1015, "total_steps": 4242, "loss": 0.2564, "lr": 3.769556741988163e-05, "epoch": 1.675474814203138, "percentage": 23.93, "elapsed_time": "3:40:48", "remaining_time": "11:42:01"}
|
||||
{"current_steps": 1020, "total_steps": 4242, "loss": 0.2696, "lr": 3.7657062433623825e-05, "epoch": 1.6837324525185797, "percentage": 24.05, "elapsed_time": "3:41:57", "remaining_time": "11:41:07"}
|
||||
{"current_steps": 1025, "total_steps": 4242, "loss": 0.2581, "lr": 3.761825841838986e-05, "epoch": 1.6919900908340215, "percentage": 24.16, "elapsed_time": "3:43:01", "remaining_time": "11:39:58"}
|
||||
{"current_steps": 1030, "total_steps": 4242, "loss": 0.268, "lr": 3.7579156031340274e-05, "epoch": 1.700247729149463, "percentage": 24.28, "elapsed_time": "3:44:08", "remaining_time": "11:38:57"}
|
||||
{"current_steps": 1035, "total_steps": 4242, "loss": 0.2578, "lr": 3.753975593468865e-05, "epoch": 1.708505367464905, "percentage": 24.4, "elapsed_time": "3:45:08", "remaining_time": "11:37:37"}
|
||||
{"current_steps": 1040, "total_steps": 4242, "loss": 0.2616, "lr": 3.750005879569043e-05, "epoch": 1.7167630057803467, "percentage": 24.52, "elapsed_time": "3:46:07", "remaining_time": "11:36:10"}
|
||||
{"current_steps": 1045, "total_steps": 4242, "loss": 0.2615, "lr": 3.7460065286631526e-05, "epoch": 1.7250206440957885, "percentage": 24.63, "elapsed_time": "3:47:12", "remaining_time": "11:35:06"}
|
||||
{"current_steps": 1050, "total_steps": 4242, "loss": 0.2624, "lr": 3.741977608481704e-05, "epoch": 1.7332782824112303, "percentage": 24.75, "elapsed_time": "3:48:17", "remaining_time": "11:34:00"}
|
||||
{"current_steps": 1055, "total_steps": 4242, "loss": 0.2568, "lr": 3.73791918725597e-05, "epoch": 1.741535920726672, "percentage": 24.87, "elapsed_time": "3:49:23", "remaining_time": "11:32:57"}
|
||||
{"current_steps": 1060, "total_steps": 4242, "loss": 0.2637, "lr": 3.733831333716838e-05, "epoch": 1.7497935590421139, "percentage": 24.99, "elapsed_time": "3:50:25", "remaining_time": "11:31:42"}
|
||||
{"current_steps": 1065, "total_steps": 4242, "loss": 0.2694, "lr": 3.729714117093641e-05, "epoch": 1.7580511973575557, "percentage": 25.11, "elapsed_time": "3:51:29", "remaining_time": "11:30:34"}
|
||||
{"current_steps": 1070, "total_steps": 4242, "loss": 0.2728, "lr": 3.725567607112987e-05, "epoch": 1.7663088356729975, "percentage": 25.22, "elapsed_time": "3:52:33", "remaining_time": "11:29:24"}
|
||||
{"current_steps": 1075, "total_steps": 4242, "loss": 0.2613, "lr": 3.721391873997577e-05, "epoch": 1.7745664739884393, "percentage": 25.34, "elapsed_time": "3:53:36", "remaining_time": "11:28:13"}
|
||||
{"current_steps": 1080, "total_steps": 4242, "loss": 0.2638, "lr": 3.7171869884650186e-05, "epoch": 1.782824112303881, "percentage": 25.46, "elapsed_time": "3:54:42", "remaining_time": "11:27:09"}
|
||||
{"current_steps": 1085, "total_steps": 4242, "loss": 0.2757, "lr": 3.712953021726626e-05, "epoch": 1.7910817506193228, "percentage": 25.58, "elapsed_time": "3:55:49", "remaining_time": "11:26:10"}
|
||||
{"current_steps": 1090, "total_steps": 4242, "loss": 0.2574, "lr": 3.708690045486214e-05, "epoch": 1.7993393889347646, "percentage": 25.7, "elapsed_time": "3:56:56", "remaining_time": "11:25:11"}
|
||||
{"current_steps": 1095, "total_steps": 4242, "loss": 0.251, "lr": 3.7043981319388844e-05, "epoch": 1.8075970272502064, "percentage": 25.81, "elapsed_time": "3:57:55", "remaining_time": "11:23:47"}
|
||||
{"current_steps": 1100, "total_steps": 4242, "loss": 0.2598, "lr": 3.700077353769803e-05, "epoch": 1.8158546655656482, "percentage": 25.93, "elapsed_time": "3:59:03", "remaining_time": "11:22:51"}
|
||||
{"current_steps": 1105, "total_steps": 4242, "loss": 0.2688, "lr": 3.69572778415297e-05, "epoch": 1.82411230388109, "percentage": 26.05, "elapsed_time": "4:00:04", "remaining_time": "11:21:34"}
|
||||
{"current_steps": 1110, "total_steps": 4242, "loss": 0.263, "lr": 3.691349496749977e-05, "epoch": 1.8323699421965318, "percentage": 26.17, "elapsed_time": "4:01:10", "remaining_time": "11:20:30"}
|
||||
{"current_steps": 1115, "total_steps": 4242, "loss": 0.2597, "lr": 3.686942565708765e-05, "epoch": 1.8406275805119736, "percentage": 26.28, "elapsed_time": "4:02:14", "remaining_time": "11:19:21"}
|
||||
{"current_steps": 1120, "total_steps": 4242, "loss": 0.2597, "lr": 3.6825070656623626e-05, "epoch": 1.8488852188274154, "percentage": 26.4, "elapsed_time": "4:03:14", "remaining_time": "11:18:01"}
|
||||
{"current_steps": 1125, "total_steps": 4242, "loss": 0.2753, "lr": 3.6780430717276295e-05, "epoch": 1.8571428571428572, "percentage": 26.52, "elapsed_time": "4:04:17", "remaining_time": "11:16:51"}
|
||||
{"current_steps": 1130, "total_steps": 4242, "loss": 0.256, "lr": 3.673550659503975e-05, "epoch": 1.865400495458299, "percentage": 26.64, "elapsed_time": "4:05:19", "remaining_time": "11:15:38"}
|
||||
{"current_steps": 1135, "total_steps": 4242, "loss": 0.256, "lr": 3.669029905072087e-05, "epoch": 1.8736581337737408, "percentage": 26.76, "elapsed_time": "4:06:24", "remaining_time": "11:14:31"}
|
||||
{"current_steps": 1140, "total_steps": 4242, "loss": 0.254, "lr": 3.664480884992638e-05, "epoch": 1.8819157720891826, "percentage": 26.87, "elapsed_time": "4:07:23", "remaining_time": "11:13:10"}
|
||||
{"current_steps": 1145, "total_steps": 4242, "loss": 0.2615, "lr": 3.6599036763049886e-05, "epoch": 1.8901734104046244, "percentage": 26.99, "elapsed_time": "4:08:26", "remaining_time": "11:11:59"}
|
||||
{"current_steps": 1150, "total_steps": 4242, "loss": 0.2645, "lr": 3.655298356525885e-05, "epoch": 1.8984310487200662, "percentage": 27.11, "elapsed_time": "4:09:33", "remaining_time": "11:10:58"}
|
||||
{"current_steps": 1155, "total_steps": 4242, "loss": 0.2632, "lr": 3.650665003648147e-05, "epoch": 1.906688687035508, "percentage": 27.23, "elapsed_time": "4:10:32", "remaining_time": "11:09:38"}
|
||||
{"current_steps": 1160, "total_steps": 4242, "loss": 0.2577, "lr": 3.646003696139341e-05, "epoch": 1.9149463253509498, "percentage": 27.35, "elapsed_time": "4:11:35", "remaining_time": "11:08:27"}
|
||||
{"current_steps": 1165, "total_steps": 4242, "loss": 0.2655, "lr": 3.64131451294046e-05, "epoch": 1.9232039636663916, "percentage": 27.46, "elapsed_time": "4:12:38", "remaining_time": "11:07:15"}
|
||||
{"current_steps": 1170, "total_steps": 4242, "loss": 0.2587, "lr": 3.6365975334645806e-05, "epoch": 1.9314616019818331, "percentage": 27.58, "elapsed_time": "4:13:47", "remaining_time": "11:06:21"}
|
||||
{"current_steps": 1175, "total_steps": 4242, "loss": 0.26, "lr": 3.63185283759552e-05, "epoch": 1.939719240297275, "percentage": 27.7, "elapsed_time": "4:14:52", "remaining_time": "11:05:15"}
|
||||
{"current_steps": 1180, "total_steps": 4242, "loss": 0.2618, "lr": 3.627080505686481e-05, "epoch": 1.9479768786127167, "percentage": 27.82, "elapsed_time": "4:16:04", "remaining_time": "11:04:30"}
|
||||
{"current_steps": 1185, "total_steps": 4242, "loss": 0.2671, "lr": 3.622280618558696e-05, "epoch": 1.9562345169281585, "percentage": 27.93, "elapsed_time": "4:17:10", "remaining_time": "11:03:26"}
|
||||
{"current_steps": 1190, "total_steps": 4242, "loss": 0.2803, "lr": 3.617453257500055e-05, "epoch": 1.9644921552436003, "percentage": 28.05, "elapsed_time": "4:18:17", "remaining_time": "11:02:27"}
|
||||
{"current_steps": 1195, "total_steps": 4242, "loss": 0.2602, "lr": 3.6125985042637265e-05, "epoch": 1.9727497935590421, "percentage": 28.17, "elapsed_time": "4:19:17", "remaining_time": "11:01:07"}
|
||||
{"current_steps": 1200, "total_steps": 4242, "loss": 0.2627, "lr": 3.6077164410667786e-05, "epoch": 1.981007431874484, "percentage": 28.29, "elapsed_time": "4:20:20", "remaining_time": "10:59:57"}
|
||||
{"current_steps": 1205, "total_steps": 4242, "loss": 0.2553, "lr": 3.602807150588784e-05, "epoch": 1.9892650701899257, "percentage": 28.41, "elapsed_time": "4:21:21", "remaining_time": "10:58:43"}
|
||||
{"current_steps": 1210, "total_steps": 4242, "loss": 0.2546, "lr": 3.597870715970417e-05, "epoch": 1.9975227085053675, "percentage": 28.52, "elapsed_time": "4:22:27", "remaining_time": "10:57:39"}
|
||||
{"current_steps": 1215, "total_steps": 4242, "loss": 0.2451, "lr": 3.592907220812051e-05, "epoch": 2.004954582989265, "percentage": 28.64, "elapsed_time": "4:23:22", "remaining_time": "10:56:08"}
|
||||
{"current_steps": 1220, "total_steps": 4242, "loss": 0.2509, "lr": 3.587916749172338e-05, "epoch": 2.013212221304707, "percentage": 28.76, "elapsed_time": "4:24:28", "remaining_time": "10:55:06"}
|
||||
{"current_steps": 1225, "total_steps": 4242, "loss": 0.25, "lr": 3.582899385566787e-05, "epoch": 2.0214698596201486, "percentage": 28.88, "elapsed_time": "4:25:34", "remaining_time": "10:54:04"}
|
||||
{"current_steps": 1230, "total_steps": 4242, "loss": 0.2422, "lr": 3.577855214966333e-05, "epoch": 2.0297274979355904, "percentage": 29.0, "elapsed_time": "4:26:33", "remaining_time": "10:52:43"}
|
||||
{"current_steps": 1235, "total_steps": 4242, "loss": 0.2539, "lr": 3.572784322795898e-05, "epoch": 2.037985136251032, "percentage": 29.11, "elapsed_time": "4:27:42", "remaining_time": "10:51:50"}
|
||||
{"current_steps": 1240, "total_steps": 4242, "loss": 0.2542, "lr": 3.567686794932943e-05, "epoch": 2.046242774566474, "percentage": 29.23, "elapsed_time": "4:28:49", "remaining_time": "10:50:49"}
|
||||
{"current_steps": 1245, "total_steps": 4242, "loss": 0.2543, "lr": 3.5625627177060136e-05, "epoch": 2.0545004128819158, "percentage": 29.35, "elapsed_time": "4:29:54", "remaining_time": "10:49:44"}
|
||||
{"current_steps": 1250, "total_steps": 4242, "loss": 0.2457, "lr": 3.557412177893281e-05, "epoch": 2.0627580511973576, "percentage": 29.47, "elapsed_time": "4:31:01", "remaining_time": "10:48:44"}
|
||||
{"current_steps": 1255, "total_steps": 4242, "loss": 0.25, "lr": 3.5522352627210685e-05, "epoch": 2.0710156895127994, "percentage": 29.59, "elapsed_time": "4:32:08", "remaining_time": "10:47:44"}
|
||||
{"current_steps": 1260, "total_steps": 4242, "loss": 0.2573, "lr": 3.5470320598623784e-05, "epoch": 2.079273327828241, "percentage": 29.7, "elapsed_time": "4:33:13", "remaining_time": "10:46:37"}
|
||||
{"current_steps": 1265, "total_steps": 4242, "loss": 0.2583, "lr": 3.5418026574354e-05, "epoch": 2.087530966143683, "percentage": 29.82, "elapsed_time": "4:34:22", "remaining_time": "10:45:42"}
|
||||
{"current_steps": 1270, "total_steps": 4242, "loss": 0.2383, "lr": 3.536547144002027e-05, "epoch": 2.0957886044591247, "percentage": 29.94, "elapsed_time": "4:35:29", "remaining_time": "10:44:41"}
|
||||
{"current_steps": 1275, "total_steps": 4242, "loss": 0.2531, "lr": 3.531265608566351e-05, "epoch": 2.1040462427745665, "percentage": 30.06, "elapsed_time": "4:36:35", "remaining_time": "10:43:39"}
|
||||
{"current_steps": 1280, "total_steps": 4242, "loss": 0.2489, "lr": 3.525958140573155e-05, "epoch": 2.1123038810900083, "percentage": 30.17, "elapsed_time": "4:37:48", "remaining_time": "10:42:51"}
|
||||
{"current_steps": 1285, "total_steps": 4242, "loss": 0.2477, "lr": 3.5206248299064016e-05, "epoch": 2.12056151940545, "percentage": 30.29, "elapsed_time": "4:38:57", "remaining_time": "10:41:54"}
|
||||
{"current_steps": 1290, "total_steps": 4242, "loss": 0.2497, "lr": 3.5152657668877065e-05, "epoch": 2.128819157720892, "percentage": 30.41, "elapsed_time": "4:40:00", "remaining_time": "10:40:44"}
|
||||
{"current_steps": 1295, "total_steps": 4242, "loss": 0.2466, "lr": 3.5098810422748144e-05, "epoch": 2.1370767960363337, "percentage": 30.53, "elapsed_time": "4:41:03", "remaining_time": "10:39:35"}
|
||||
{"current_steps": 1300, "total_steps": 4242, "loss": 0.2431, "lr": 3.504470747260054e-05, "epoch": 2.1453344343517755, "percentage": 30.65, "elapsed_time": "4:42:10", "remaining_time": "10:38:34"}
|
||||
{"current_steps": 1305, "total_steps": 4242, "loss": 0.252, "lr": 3.499034973468802e-05, "epoch": 2.1535920726672173, "percentage": 30.76, "elapsed_time": "4:43:18", "remaining_time": "10:37:37"}
|
||||
{"current_steps": 1310, "total_steps": 4242, "loss": 0.2423, "lr": 3.493573812957927e-05, "epoch": 2.161849710982659, "percentage": 30.88, "elapsed_time": "4:44:21", "remaining_time": "10:36:26"}
|
||||
{"current_steps": 1315, "total_steps": 4242, "loss": 0.2504, "lr": 3.488087358214232e-05, "epoch": 2.170107349298101, "percentage": 31.0, "elapsed_time": "4:45:24", "remaining_time": "10:35:15"}
|
||||
{"current_steps": 1320, "total_steps": 4242, "loss": 0.2468, "lr": 3.4825757021528844e-05, "epoch": 2.1783649876135427, "percentage": 31.12, "elapsed_time": "4:46:21", "remaining_time": "10:33:53"}
|
||||
{"current_steps": 1325, "total_steps": 4242, "loss": 0.2486, "lr": 3.477038938115848e-05, "epoch": 2.1866226259289845, "percentage": 31.24, "elapsed_time": "4:47:26", "remaining_time": "10:32:48"}
|
||||
{"current_steps": 1330, "total_steps": 4242, "loss": 0.2457, "lr": 3.471477159870299e-05, "epoch": 2.1948802642444263, "percentage": 31.35, "elapsed_time": "4:48:33", "remaining_time": "10:31:46"}
|
||||
{"current_steps": 1335, "total_steps": 4242, "loss": 0.2591, "lr": 3.465890461607038e-05, "epoch": 2.203137902559868, "percentage": 31.47, "elapsed_time": "4:49:45", "remaining_time": "10:30:56"}
|
||||
{"current_steps": 1340, "total_steps": 4242, "loss": 0.2468, "lr": 3.460278937938896e-05, "epoch": 2.2113955408753094, "percentage": 31.59, "elapsed_time": "4:50:49", "remaining_time": "10:29:49"}
|
||||
{"current_steps": 1345, "total_steps": 4242, "loss": 0.2532, "lr": 3.454642683899132e-05, "epoch": 2.2196531791907512, "percentage": 31.71, "elapsed_time": "4:51:56", "remaining_time": "10:28:49"}
|
||||
{"current_steps": 1350, "total_steps": 4242, "loss": 0.2521, "lr": 3.4489817949398224e-05, "epoch": 2.227910817506193, "percentage": 31.82, "elapsed_time": "4:52:59", "remaining_time": "10:27:39"}
|
||||
{"current_steps": 1355, "total_steps": 4242, "loss": 0.254, "lr": 3.443296366930244e-05, "epoch": 2.236168455821635, "percentage": 31.94, "elapsed_time": "4:54:07", "remaining_time": "10:26:41"}
|
||||
{"current_steps": 1360, "total_steps": 4242, "loss": 0.2569, "lr": 3.4375864961552546e-05, "epoch": 2.2444260941370766, "percentage": 32.06, "elapsed_time": "4:55:11", "remaining_time": "10:25:32"}
|
||||
{"current_steps": 1365, "total_steps": 4242, "loss": 0.2476, "lr": 3.431852279313657e-05, "epoch": 2.2526837324525184, "percentage": 32.18, "elapsed_time": "4:56:13", "remaining_time": "10:24:20"}
|
||||
{"current_steps": 1370, "total_steps": 4242, "loss": 0.2444, "lr": 3.426093813516565e-05, "epoch": 2.26094137076796, "percentage": 32.3, "elapsed_time": "4:57:18", "remaining_time": "10:23:14"}
|
||||
{"current_steps": 1375, "total_steps": 4242, "loss": 0.2493, "lr": 3.420311196285757e-05, "epoch": 2.269199009083402, "percentage": 32.41, "elapsed_time": "4:58:17", "remaining_time": "10:21:57"}
|
||||
{"current_steps": 1380, "total_steps": 4242, "loss": 0.249, "lr": 3.4145045255520244e-05, "epoch": 2.277456647398844, "percentage": 32.53, "elapsed_time": "4:59:20", "remaining_time": "10:20:48"}
|
||||
{"current_steps": 1385, "total_steps": 4242, "loss": 0.2514, "lr": 3.408673899653515e-05, "epoch": 2.2857142857142856, "percentage": 32.65, "elapsed_time": "5:00:25", "remaining_time": "10:19:42"}
|
||||
{"current_steps": 1390, "total_steps": 4242, "loss": 0.2511, "lr": 3.4028194173340655e-05, "epoch": 2.2939719240297274, "percentage": 32.77, "elapsed_time": "5:01:30", "remaining_time": "10:18:37"}
|
||||
{"current_steps": 1395, "total_steps": 4242, "loss": 0.2466, "lr": 3.396941177741531e-05, "epoch": 2.302229562345169, "percentage": 32.89, "elapsed_time": "5:02:33", "remaining_time": "10:17:29"}
|
||||
{"current_steps": 1400, "total_steps": 4242, "loss": 0.2504, "lr": 3.391039280426103e-05, "epoch": 2.310487200660611, "percentage": 33.0, "elapsed_time": "5:03:40", "remaining_time": "10:16:27"}
|
||||
{"current_steps": 1405, "total_steps": 4242, "loss": 0.2554, "lr": 3.385113825338627e-05, "epoch": 2.3187448389760528, "percentage": 33.12, "elapsed_time": "5:04:47", "remaining_time": "10:15:25"}
|
||||
{"current_steps": 1410, "total_steps": 4242, "loss": 0.249, "lr": 3.379164912828908e-05, "epoch": 2.3270024772914946, "percentage": 33.24, "elapsed_time": "5:05:54", "remaining_time": "10:14:25"}
|
||||
{"current_steps": 1415, "total_steps": 4242, "loss": 0.25, "lr": 3.373192643644011e-05, "epoch": 2.3352601156069364, "percentage": 33.36, "elapsed_time": "5:06:59", "remaining_time": "10:13:20"}
|
||||
{"current_steps": 1420, "total_steps": 4242, "loss": 0.2479, "lr": 3.3671971189265554e-05, "epoch": 2.343517753922378, "percentage": 33.47, "elapsed_time": "5:08:07", "remaining_time": "10:12:20"}
|
||||
{"current_steps": 1425, "total_steps": 4242, "loss": 0.2462, "lr": 3.3611784402129995e-05, "epoch": 2.35177539223782, "percentage": 33.59, "elapsed_time": "5:09:07", "remaining_time": "10:11:04"}
|
||||
{"current_steps": 1430, "total_steps": 4242, "loss": 0.2522, "lr": 3.3551367094319275e-05, "epoch": 2.3600330305532617, "percentage": 33.71, "elapsed_time": "5:10:09", "remaining_time": "10:09:54"}
|
||||
{"current_steps": 1435, "total_steps": 4242, "loss": 0.2392, "lr": 3.349072028902316e-05, "epoch": 2.3682906688687035, "percentage": 33.83, "elapsed_time": "5:11:12", "remaining_time": "10:08:45"}
|
||||
{"current_steps": 1440, "total_steps": 4242, "loss": 0.2413, "lr": 3.342984501331805e-05, "epoch": 2.3765483071841453, "percentage": 33.95, "elapsed_time": "5:12:17", "remaining_time": "10:07:40"}
|
||||
{"current_steps": 1445, "total_steps": 4242, "loss": 0.2457, "lr": 3.3368742298149566e-05, "epoch": 2.384805945499587, "percentage": 34.06, "elapsed_time": "5:13:21", "remaining_time": "10:06:32"}
|
||||
{"current_steps": 1450, "total_steps": 4242, "loss": 0.2441, "lr": 3.330741317831512e-05, "epoch": 2.393063583815029, "percentage": 34.18, "elapsed_time": "5:14:23", "remaining_time": "10:05:22"}
|
||||
{"current_steps": 1455, "total_steps": 4242, "loss": 0.2427, "lr": 3.324585869244636e-05, "epoch": 2.4013212221304707, "percentage": 34.3, "elapsed_time": "5:15:26", "remaining_time": "10:04:13"}
|
||||
{"current_steps": 1460, "total_steps": 4242, "loss": 0.2472, "lr": 3.3184079882991606e-05, "epoch": 2.4095788604459125, "percentage": 34.42, "elapsed_time": "5:16:24", "remaining_time": "10:02:54"}
|
||||
{"current_steps": 1465, "total_steps": 4242, "loss": 0.2449, "lr": 3.312207779619815e-05, "epoch": 2.4178364987613543, "percentage": 34.54, "elapsed_time": "5:17:25", "remaining_time": "10:01:41"}
|
||||
{"current_steps": 1470, "total_steps": 4242, "loss": 0.2592, "lr": 3.305985348209462e-05, "epoch": 2.426094137076796, "percentage": 34.65, "elapsed_time": "5:18:31", "remaining_time": "10:00:38"}
|
||||
{"current_steps": 1475, "total_steps": 4242, "loss": 0.2398, "lr": 3.2997407994473095e-05, "epoch": 2.434351775392238, "percentage": 34.77, "elapsed_time": "5:19:35", "remaining_time": "9:59:32"}
|
||||
{"current_steps": 1480, "total_steps": 4242, "loss": 0.2521, "lr": 3.293474239087134e-05, "epoch": 2.4426094137076797, "percentage": 34.89, "elapsed_time": "5:20:44", "remaining_time": "9:58:33"}
|
||||
{"current_steps": 1485, "total_steps": 4242, "loss": 0.2444, "lr": 3.2871857732554854e-05, "epoch": 2.4508670520231215, "percentage": 35.01, "elapsed_time": "5:21:54", "remaining_time": "9:57:38"}
|
||||
{"current_steps": 1490, "total_steps": 4242, "loss": 0.249, "lr": 3.2808755084498936e-05, "epoch": 2.4591246903385633, "percentage": 35.12, "elapsed_time": "5:22:56", "remaining_time": "9:56:28"}
|
||||
{"current_steps": 1495, "total_steps": 4242, "loss": 0.2394, "lr": 3.2745435515370585e-05, "epoch": 2.467382328654005, "percentage": 35.24, "elapsed_time": "5:23:58", "remaining_time": "9:55:16"}
|
||||
{"current_steps": 1500, "total_steps": 4242, "loss": 0.247, "lr": 3.268190009751046e-05, "epoch": 2.475639966969447, "percentage": 35.36, "elapsed_time": "5:25:02", "remaining_time": "9:54:11"}
|
||||
{"current_steps": 1505, "total_steps": 4242, "loss": 0.2429, "lr": 3.261814990691471e-05, "epoch": 2.4838976052848887, "percentage": 35.48, "elapsed_time": "5:26:40", "remaining_time": "9:54:05"}
|
||||
{"current_steps": 1510, "total_steps": 4242, "loss": 0.2547, "lr": 3.2554186023216715e-05, "epoch": 2.4921552436003305, "percentage": 35.6, "elapsed_time": "5:27:46", "remaining_time": "9:53:01"}
|
||||
{"current_steps": 1515, "total_steps": 4242, "loss": 0.2544, "lr": 3.2490009529668845e-05, "epoch": 2.5004128819157723, "percentage": 35.71, "elapsed_time": "5:28:55", "remaining_time": "9:52:03"}
|
||||
{"current_steps": 1520, "total_steps": 4242, "loss": 0.2547, "lr": 3.24256215131241e-05, "epoch": 2.508670520231214, "percentage": 35.83, "elapsed_time": "5:30:01", "remaining_time": "9:50:59"}
|
||||
{"current_steps": 1525, "total_steps": 4242, "loss": 0.2349, "lr": 3.236102306401767e-05, "epoch": 2.516928158546656, "percentage": 35.95, "elapsed_time": "5:31:06", "remaining_time": "9:49:54"}
|
||||
{"current_steps": 1530, "total_steps": 4242, "loss": 0.2396, "lr": 3.229621527634855e-05, "epoch": 2.5251857968620977, "percentage": 36.07, "elapsed_time": "5:32:12", "remaining_time": "9:48:51"}
|
||||
{"current_steps": 1535, "total_steps": 4242, "loss": 0.2468, "lr": 3.223119924766093e-05, "epoch": 2.5334434351775394, "percentage": 36.19, "elapsed_time": "5:33:20", "remaining_time": "9:47:51"}
|
||||
{"current_steps": 1540, "total_steps": 4242, "loss": 0.241, "lr": 3.2165976079025644e-05, "epoch": 2.5417010734929812, "percentage": 36.3, "elapsed_time": "5:34:24", "remaining_time": "9:46:43"}
|
||||
{"current_steps": 1545, "total_steps": 4242, "loss": 0.2471, "lr": 3.210054687502152e-05, "epoch": 2.549958711808423, "percentage": 36.42, "elapsed_time": "5:35:22", "remaining_time": "9:45:26"}
|
||||
{"current_steps": 1550, "total_steps": 4242, "loss": 0.2407, "lr": 3.2034912743716666e-05, "epoch": 2.558216350123865, "percentage": 36.54, "elapsed_time": "5:36:24", "remaining_time": "9:44:16"}
|
||||
{"current_steps": 1555, "total_steps": 4242, "loss": 0.2392, "lr": 3.1969074796649734e-05, "epoch": 2.5664739884393066, "percentage": 36.66, "elapsed_time": "5:37:29", "remaining_time": "9:43:10"}
|
||||
{"current_steps": 1560, "total_steps": 4242, "loss": 0.247, "lr": 3.190303414881105e-05, "epoch": 2.574731626754748, "percentage": 36.78, "elapsed_time": "5:38:22", "remaining_time": "9:41:43"}
|
||||
{"current_steps": 1565, "total_steps": 4242, "loss": 0.2453, "lr": 3.183679191862375e-05, "epoch": 2.5829892650701898, "percentage": 36.89, "elapsed_time": "5:39:27", "remaining_time": "9:40:39"}
|
||||
{"current_steps": 1570, "total_steps": 4242, "loss": 0.2498, "lr": 3.1770349227924854e-05, "epoch": 2.5912469033856316, "percentage": 37.01, "elapsed_time": "5:40:29", "remaining_time": "9:39:29"}
|
||||
{"current_steps": 1575, "total_steps": 4242, "loss": 0.247, "lr": 3.170370720194626e-05, "epoch": 2.5995045417010734, "percentage": 37.13, "elapsed_time": "5:41:41", "remaining_time": "9:38:36"}
|
||||
{"current_steps": 1580, "total_steps": 4242, "loss": 0.2447, "lr": 3.1636866969295684e-05, "epoch": 2.607762180016515, "percentage": 37.25, "elapsed_time": "5:42:43", "remaining_time": "9:37:25"}
|
||||
{"current_steps": 1585, "total_steps": 4242, "loss": 0.2494, "lr": 3.156982966193753e-05, "epoch": 2.616019818331957, "percentage": 37.36, "elapsed_time": "5:43:52", "remaining_time": "9:36:27"}
|
||||
{"current_steps": 1590, "total_steps": 4242, "loss": 0.2349, "lr": 3.150259641517375e-05, "epoch": 2.6242774566473988, "percentage": 37.48, "elapsed_time": "5:44:57", "remaining_time": "9:35:21"}
|
||||
{"current_steps": 1595, "total_steps": 4242, "loss": 0.2478, "lr": 3.14351683676246e-05, "epoch": 2.6325350949628405, "percentage": 37.6, "elapsed_time": "5:46:03", "remaining_time": "9:34:18"}
|
||||
{"current_steps": 1600, "total_steps": 4242, "loss": 0.2536, "lr": 3.1367546661209355e-05, "epoch": 2.6407927332782823, "percentage": 37.72, "elapsed_time": "5:47:11", "remaining_time": "9:33:18"}
|
||||
{"current_steps": 1605, "total_steps": 4242, "loss": 0.2558, "lr": 3.1299732441126995e-05, "epoch": 2.649050371593724, "percentage": 37.84, "elapsed_time": "5:48:19", "remaining_time": "9:32:17"}
|
||||
{"current_steps": 1610, "total_steps": 4242, "loss": 0.2398, "lr": 3.123172685583676e-05, "epoch": 2.657308009909166, "percentage": 37.95, "elapsed_time": "5:49:16", "remaining_time": "9:30:59"}
|
||||
{"current_steps": 1615, "total_steps": 4242, "loss": 0.245, "lr": 3.116353105703876e-05, "epoch": 2.6655656482246077, "percentage": 38.07, "elapsed_time": "5:50:19", "remaining_time": "9:29:51"}
|
||||
{"current_steps": 1620, "total_steps": 4242, "loss": 0.2489, "lr": 3.1095146199654426e-05, "epoch": 2.6738232865400495, "percentage": 38.19, "elapsed_time": "5:51:18", "remaining_time": "9:28:36"}
|
||||
{"current_steps": 1625, "total_steps": 4242, "loss": 0.2477, "lr": 3.1026573441806976e-05, "epoch": 2.6820809248554913, "percentage": 38.31, "elapsed_time": "5:52:22", "remaining_time": "9:27:29"}
|
||||
{"current_steps": 1630, "total_steps": 4242, "loss": 0.2486, "lr": 3.095781394480177e-05, "epoch": 2.690338563170933, "percentage": 38.43, "elapsed_time": "5:53:22", "remaining_time": "9:26:15"}
|
||||
{"current_steps": 1635, "total_steps": 4242, "loss": 0.2477, "lr": 3.088886887310671e-05, "epoch": 2.698596201486375, "percentage": 38.54, "elapsed_time": "5:54:27", "remaining_time": "9:25:10"}
|
||||
{"current_steps": 1640, "total_steps": 4242, "loss": 0.2441, "lr": 3.081973939433244e-05, "epoch": 2.7068538398018167, "percentage": 38.66, "elapsed_time": "5:55:27", "remaining_time": "9:23:57"}
|
||||
{"current_steps": 1645, "total_steps": 4242, "loss": 0.2477, "lr": 3.0750426679212614e-05, "epoch": 2.7151114781172585, "percentage": 38.78, "elapsed_time": "5:56:32", "remaining_time": "9:22:53"}
|
||||
{"current_steps": 1650, "total_steps": 4242, "loss": 0.2533, "lr": 3.068093190158406e-05, "epoch": 2.7233691164327003, "percentage": 38.9, "elapsed_time": "5:57:37", "remaining_time": "9:21:48"}
|
||||
{"current_steps": 1655, "total_steps": 4242, "loss": 0.2452, "lr": 3.061125623836692e-05, "epoch": 2.731626754748142, "percentage": 39.01, "elapsed_time": "5:58:45", "remaining_time": "9:20:47"}
|
||||
{"current_steps": 1660, "total_steps": 4242, "loss": 0.2515, "lr": 3.054140086954466e-05, "epoch": 2.739884393063584, "percentage": 39.13, "elapsed_time": "5:59:51", "remaining_time": "9:19:43"}
|
||||
{"current_steps": 1665, "total_steps": 4242, "loss": 0.2401, "lr": 3.04713669781442e-05, "epoch": 2.7481420313790257, "percentage": 39.25, "elapsed_time": "6:01:01", "remaining_time": "9:18:46"}
|
||||
{"current_steps": 1670, "total_steps": 4242, "loss": 0.2438, "lr": 3.0401155750215733e-05, "epoch": 2.7563996696944675, "percentage": 39.37, "elapsed_time": "6:02:08", "remaining_time": "9:17:44"}
|
||||
{"current_steps": 1675, "total_steps": 4242, "loss": 0.2458, "lr": 3.033076837481275e-05, "epoch": 2.7646573080099093, "percentage": 39.49, "elapsed_time": "6:03:10", "remaining_time": "9:16:35"}
|
||||
{"current_steps": 1680, "total_steps": 4242, "loss": 0.2548, "lr": 3.0260206043971857e-05, "epoch": 2.772914946325351, "percentage": 39.6, "elapsed_time": "6:04:15", "remaining_time": "9:15:29"}
|
||||
{"current_steps": 1685, "total_steps": 4242, "loss": 0.2478, "lr": 3.0189469952692608e-05, "epoch": 2.781172584640793, "percentage": 39.72, "elapsed_time": "6:05:18", "remaining_time": "9:14:21"}
|
||||
{"current_steps": 1690, "total_steps": 4242, "loss": 0.2442, "lr": 3.011856129891723e-05, "epoch": 2.7894302229562347, "percentage": 39.84, "elapsed_time": "6:06:24", "remaining_time": "9:13:17"}
|
||||
{"current_steps": 1695, "total_steps": 4242, "loss": 0.2448, "lr": 3.004748128351038e-05, "epoch": 2.7976878612716765, "percentage": 39.96, "elapsed_time": "6:07:26", "remaining_time": "9:12:07"}
|
||||
{"current_steps": 1700, "total_steps": 4242, "loss": 0.2355, "lr": 2.997623111023879e-05, "epoch": 2.805945499587118, "percentage": 40.08, "elapsed_time": "6:08:29", "remaining_time": "9:10:59"}
|
||||
{"current_steps": 1705, "total_steps": 4242, "loss": 0.2428, "lr": 2.9904811985750868e-05, "epoch": 2.8142031379025596, "percentage": 40.19, "elapsed_time": "6:09:28", "remaining_time": "9:09:46"}
|
||||
{"current_steps": 1710, "total_steps": 4242, "loss": 0.245, "lr": 2.9833225119556277e-05, "epoch": 2.8224607762180014, "percentage": 40.31, "elapsed_time": "6:10:36", "remaining_time": "9:08:44"}
|
||||
{"current_steps": 1715, "total_steps": 4242, "loss": 0.2424, "lr": 2.9761471724005457e-05, "epoch": 2.830718414533443, "percentage": 40.43, "elapsed_time": "6:11:42", "remaining_time": "9:07:41"}
|
||||
{"current_steps": 1720, "total_steps": 4242, "loss": 0.2312, "lr": 2.968955301426908e-05, "epoch": 2.838976052848885, "percentage": 40.55, "elapsed_time": "6:12:41", "remaining_time": "9:06:28"}
|
||||
{"current_steps": 1725, "total_steps": 4242, "loss": 0.244, "lr": 2.9617470208317467e-05, "epoch": 2.847233691164327, "percentage": 40.66, "elapsed_time": "6:13:43", "remaining_time": "9:05:19"}
|
||||
{"current_steps": 1730, "total_steps": 4242, "loss": 0.2393, "lr": 2.954522452689998e-05, "epoch": 2.8554913294797686, "percentage": 40.78, "elapsed_time": "6:14:52", "remaining_time": "9:04:20"}
|
||||
{"current_steps": 1735, "total_steps": 4242, "loss": 0.2362, "lr": 2.947281719352434e-05, "epoch": 2.8637489677952104, "percentage": 40.9, "elapsed_time": "6:15:58", "remaining_time": "9:03:15"}
|
||||
{"current_steps": 1740, "total_steps": 4242, "loss": 0.2465, "lr": 2.9400249434435905e-05, "epoch": 2.872006606110652, "percentage": 41.02, "elapsed_time": "6:17:00", "remaining_time": "9:02:06"}
|
||||
{"current_steps": 1745, "total_steps": 4242, "loss": 0.2358, "lr": 2.9327522478596885e-05, "epoch": 2.880264244426094, "percentage": 41.14, "elapsed_time": "6:18:06", "remaining_time": "9:01:03"}
|
||||
{"current_steps": 1750, "total_steps": 4242, "loss": 0.2381, "lr": 2.9254637557665565e-05, "epoch": 2.8885218827415358, "percentage": 41.25, "elapsed_time": "6:19:06", "remaining_time": "8:59:50"}
|
||||
{"current_steps": 1755, "total_steps": 4242, "loss": 0.2499, "lr": 2.9181595905975434e-05, "epoch": 2.8967795210569776, "percentage": 41.37, "elapsed_time": "6:20:10", "remaining_time": "8:58:44"}
|
||||
{"current_steps": 1760, "total_steps": 4242, "loss": 0.2475, "lr": 2.9108398760514246e-05, "epoch": 2.9050371593724194, "percentage": 41.49, "elapsed_time": "6:21:13", "remaining_time": "8:57:36"}
|
||||
{"current_steps": 1765, "total_steps": 4242, "loss": 0.2442, "lr": 2.903504736090313e-05, "epoch": 2.913294797687861, "percentage": 41.61, "elapsed_time": "6:22:12", "remaining_time": "8:56:23"}
|
||||
{"current_steps": 1770, "total_steps": 4242, "loss": 0.241, "lr": 2.8961542949375556e-05, "epoch": 2.921552436003303, "percentage": 41.73, "elapsed_time": "6:23:15", "remaining_time": "8:55:15"}
|
||||
{"current_steps": 1775, "total_steps": 4242, "loss": 0.2467, "lr": 2.8887886770756302e-05, "epoch": 2.9298100743187447, "percentage": 41.84, "elapsed_time": "6:24:18", "remaining_time": "8:54:08"}
|
||||
{"current_steps": 1780, "total_steps": 4242, "loss": 0.2516, "lr": 2.881408007244039e-05, "epoch": 2.9380677126341865, "percentage": 41.96, "elapsed_time": "6:25:27", "remaining_time": "8:53:08"}
|
||||
{"current_steps": 1785, "total_steps": 4242, "loss": 0.2493, "lr": 2.8740124104371937e-05, "epoch": 2.9463253509496283, "percentage": 42.08, "elapsed_time": "6:26:37", "remaining_time": "8:52:11"}
|
||||
{"current_steps": 1790, "total_steps": 4242, "loss": 0.2469, "lr": 2.866602011902301e-05, "epoch": 2.95458298926507, "percentage": 42.2, "elapsed_time": "6:27:43", "remaining_time": "8:51:07"}
|
||||
{"current_steps": 1795, "total_steps": 4242, "loss": 0.2454, "lr": 2.8591769371372405e-05, "epoch": 2.962840627580512, "percentage": 42.31, "elapsed_time": "6:28:53", "remaining_time": "8:50:09"}
|
||||
{"current_steps": 1800, "total_steps": 4242, "loss": 0.2446, "lr": 2.851737311888438e-05, "epoch": 2.9710982658959537, "percentage": 42.43, "elapsed_time": "6:29:57", "remaining_time": "8:49:02"}
|
||||
{"current_steps": 1805, "total_steps": 4242, "loss": 0.2398, "lr": 2.8442832621487385e-05, "epoch": 2.9793559042113955, "percentage": 42.55, "elapsed_time": "6:31:00", "remaining_time": "8:47:54"}
|
||||
{"current_steps": 1810, "total_steps": 4242, "loss": 0.2454, "lr": 2.8368149141552698e-05, "epoch": 2.9876135425268373, "percentage": 42.67, "elapsed_time": "6:32:03", "remaining_time": "8:46:46"}
|
||||
{"current_steps": 1815, "total_steps": 4242, "loss": 0.2559, "lr": 2.8293323943873077e-05, "epoch": 2.995871180842279, "percentage": 42.79, "elapsed_time": "6:33:06", "remaining_time": "8:45:40"}
|
||||
{"current_steps": 1820, "total_steps": 4242, "loss": 0.2381, "lr": 2.8218358295641315e-05, "epoch": 3.003303055326177, "percentage": 42.9, "elapsed_time": "6:34:00", "remaining_time": "8:44:19"}
|
||||
{"current_steps": 1825, "total_steps": 4242, "loss": 0.2284, "lr": 2.8143253466428782e-05, "epoch": 3.0115606936416186, "percentage": 43.02, "elapsed_time": "6:35:03", "remaining_time": "8:43:13"}
|
||||
{"current_steps": 1830, "total_steps": 4242, "loss": 0.2344, "lr": 2.8068010728163942e-05, "epoch": 3.0198183319570604, "percentage": 43.14, "elapsed_time": "6:36:10", "remaining_time": "8:42:10"}
|
||||
{"current_steps": 1835, "total_steps": 4242, "loss": 0.2321, "lr": 2.7992631355110786e-05, "epoch": 3.028075970272502, "percentage": 43.26, "elapsed_time": "6:37:17", "remaining_time": "8:41:08"}
|
||||
{"current_steps": 1840, "total_steps": 4242, "loss": 0.2426, "lr": 2.7917116623847285e-05, "epoch": 3.036333608587944, "percentage": 43.38, "elapsed_time": "6:38:26", "remaining_time": "8:40:08"}
|
||||
{"current_steps": 1845, "total_steps": 4242, "loss": 0.2415, "lr": 2.7841467813243723e-05, "epoch": 3.044591246903386, "percentage": 43.49, "elapsed_time": "6:39:27", "remaining_time": "8:38:58"}
|
||||
{"current_steps": 1850, "total_steps": 4242, "loss": 0.2285, "lr": 2.7765686204441108e-05, "epoch": 3.0528488852188276, "percentage": 43.61, "elapsed_time": "6:40:26", "remaining_time": "8:37:46"}
|
||||
{"current_steps": 1855, "total_steps": 4242, "loss": 0.2401, "lr": 2.7689773080829406e-05, "epoch": 3.0611065235342694, "percentage": 43.73, "elapsed_time": "6:41:26", "remaining_time": "8:36:34"}
|
||||
{"current_steps": 1860, "total_steps": 4242, "loss": 0.2299, "lr": 2.7613729728025837e-05, "epoch": 3.069364161849711, "percentage": 43.85, "elapsed_time": "6:42:32", "remaining_time": "8:35:31"}
|
||||
{"current_steps": 1865, "total_steps": 4242, "loss": 0.2176, "lr": 2.7537557433853116e-05, "epoch": 3.077621800165153, "percentage": 43.97, "elapsed_time": "6:43:35", "remaining_time": "8:34:23"}
|
||||
{"current_steps": 1870, "total_steps": 4242, "loss": 0.2341, "lr": 2.7461257488317614e-05, "epoch": 3.0858794384805948, "percentage": 44.08, "elapsed_time": "6:44:40", "remaining_time": "8:33:18"}
|
||||
{"current_steps": 1875, "total_steps": 4242, "loss": 0.2336, "lr": 2.738483118358753e-05, "epoch": 3.094137076796036, "percentage": 44.2, "elapsed_time": "6:45:48", "remaining_time": "8:32:17"}
|
||||
{"current_steps": 1880, "total_steps": 4242, "loss": 0.2276, "lr": 2.7308279813971022e-05, "epoch": 3.102394715111478, "percentage": 44.32, "elapsed_time": "6:46:55", "remaining_time": "8:31:15"}
|
||||
{"current_steps": 1885, "total_steps": 4242, "loss": 0.2389, "lr": 2.7231604675894226e-05, "epoch": 3.1106523534269197, "percentage": 44.44, "elapsed_time": "6:48:02", "remaining_time": "8:30:13"}
|
||||
{"current_steps": 1890, "total_steps": 4242, "loss": 0.2346, "lr": 2.715480706787939e-05, "epoch": 3.1189099917423615, "percentage": 44.55, "elapsed_time": "6:49:10", "remaining_time": "8:29:11"}
|
||||
{"current_steps": 1895, "total_steps": 4242, "loss": 0.2253, "lr": 2.70778882905228e-05, "epoch": 3.1271676300578033, "percentage": 44.67, "elapsed_time": "6:50:15", "remaining_time": "8:28:06"}
|
||||
{"current_steps": 1900, "total_steps": 4242, "loss": 0.2353, "lr": 2.7000849646472826e-05, "epoch": 3.135425268373245, "percentage": 44.79, "elapsed_time": "6:51:22", "remaining_time": "8:27:04"}
|
||||
{"current_steps": 1905, "total_steps": 4242, "loss": 0.2274, "lr": 2.6923692440407784e-05, "epoch": 3.143682906688687, "percentage": 44.91, "elapsed_time": "6:52:28", "remaining_time": "8:26:01"}
|
||||
{"current_steps": 1910, "total_steps": 4242, "loss": 0.2347, "lr": 2.6846417979013915e-05, "epoch": 3.1519405450041287, "percentage": 45.03, "elapsed_time": "6:53:35", "remaining_time": "8:24:58"}
|
||||
{"current_steps": 1915, "total_steps": 4242, "loss": 0.2298, "lr": 2.676902757096321e-05, "epoch": 3.1601981833195705, "percentage": 45.14, "elapsed_time": "6:54:38", "remaining_time": "8:23:50"}
|
||||
{"current_steps": 1920, "total_steps": 4242, "loss": 0.2344, "lr": 2.6691522526891258e-05, "epoch": 3.1684558216350123, "percentage": 45.26, "elapsed_time": "6:55:43", "remaining_time": "8:22:46"}
|
||||
{"current_steps": 1925, "total_steps": 4242, "loss": 0.2331, "lr": 2.661390415937506e-05, "epoch": 3.176713459950454, "percentage": 45.38, "elapsed_time": "6:56:45", "remaining_time": "8:21:37"}
|
||||
{"current_steps": 1930, "total_steps": 4242, "loss": 0.2321, "lr": 2.6536173782910782e-05, "epoch": 3.184971098265896, "percentage": 45.5, "elapsed_time": "6:57:48", "remaining_time": "8:20:29"}
|
||||
{"current_steps": 1935, "total_steps": 4242, "loss": 0.2347, "lr": 2.645833271389152e-05, "epoch": 3.1932287365813377, "percentage": 45.62, "elapsed_time": "6:58:50", "remaining_time": "8:19:21"}
|
||||
{"current_steps": 1940, "total_steps": 4242, "loss": 0.2344, "lr": 2.6380382270584966e-05, "epoch": 3.2014863748967795, "percentage": 45.73, "elapsed_time": "6:59:56", "remaining_time": "8:18:18"}
|
||||
{"current_steps": 1945, "total_steps": 4242, "loss": 0.2364, "lr": 2.630232377311113e-05, "epoch": 3.2097440132122212, "percentage": 45.85, "elapsed_time": "7:01:02", "remaining_time": "8:17:14"}
|
||||
{"current_steps": 1950, "total_steps": 4242, "loss": 0.2292, "lr": 2.622415854341994e-05, "epoch": 3.218001651527663, "percentage": 45.97, "elapsed_time": "7:02:08", "remaining_time": "8:16:10"}
|
||||
{"current_steps": 1955, "total_steps": 4242, "loss": 0.2303, "lr": 2.6145887905268893e-05, "epoch": 3.226259289843105, "percentage": 46.09, "elapsed_time": "7:03:14", "remaining_time": "8:15:07"}
|
||||
{"current_steps": 1960, "total_steps": 4242, "loss": 0.2306, "lr": 2.60675131842006e-05, "epoch": 3.2345169281585466, "percentage": 46.2, "elapsed_time": "7:04:18", "remaining_time": "8:14:00"}
|
||||
{"current_steps": 1965, "total_steps": 4242, "loss": 0.2376, "lr": 2.5989035707520374e-05, "epoch": 3.2427745664739884, "percentage": 46.32, "elapsed_time": "7:05:19", "remaining_time": "8:12:51"}
|
||||
{"current_steps": 1970, "total_steps": 4242, "loss": 0.2314, "lr": 2.591045680427371e-05, "epoch": 3.2510322047894302, "percentage": 46.44, "elapsed_time": "7:06:25", "remaining_time": "8:11:47"}
|
||||
{"current_steps": 1975, "total_steps": 4242, "loss": 0.2328, "lr": 2.583177780522382e-05, "epoch": 3.259289843104872, "percentage": 46.56, "elapsed_time": "7:07:38", "remaining_time": "8:10:52"}
|
||||
{"current_steps": 1980, "total_steps": 4242, "loss": 0.2492, "lr": 2.5753000042829078e-05, "epoch": 3.267547481420314, "percentage": 46.68, "elapsed_time": "7:08:44", "remaining_time": "8:09:48"}
|
||||
{"current_steps": 1985, "total_steps": 4242, "loss": 0.2358, "lr": 2.5674124851220422e-05, "epoch": 3.2758051197357556, "percentage": 46.79, "elapsed_time": "7:09:50", "remaining_time": "8:08:44"}
|
||||
{"current_steps": 1990, "total_steps": 4242, "loss": 0.2287, "lr": 2.5595153566178824e-05, "epoch": 3.2840627580511974, "percentage": 46.91, "elapsed_time": "7:10:53", "remaining_time": "8:07:36"}
|
||||
{"current_steps": 1995, "total_steps": 4242, "loss": 0.2235, "lr": 2.5516087525112623e-05, "epoch": 3.292320396366639, "percentage": 47.03, "elapsed_time": "7:11:53", "remaining_time": "8:06:26"}
|
||||
{"current_steps": 2000, "total_steps": 4242, "loss": 0.2364, "lr": 2.5436928067034876e-05, "epoch": 3.300578034682081, "percentage": 47.15, "elapsed_time": "7:13:00", "remaining_time": "8:05:23"}
|
||||
{"current_steps": 2005, "total_steps": 4242, "loss": 0.2355, "lr": 2.535767653254071e-05, "epoch": 3.308835672997523, "percentage": 47.27, "elapsed_time": "7:14:06", "remaining_time": "8:04:19"}
|
||||
{"current_steps": 2010, "total_steps": 4242, "loss": 0.2407, "lr": 2.5278334263784587e-05, "epoch": 3.3170933113129646, "percentage": 47.38, "elapsed_time": "7:15:08", "remaining_time": "8:03:11"}
|
||||
{"current_steps": 2015, "total_steps": 4242, "loss": 0.2387, "lr": 2.5198902604457594e-05, "epoch": 3.3253509496284064, "percentage": 47.5, "elapsed_time": "7:16:17", "remaining_time": "8:02:11"}
|
||||
{"current_steps": 2020, "total_steps": 4242, "loss": 0.2291, "lr": 2.511938289976468e-05, "epoch": 3.333608587943848, "percentage": 47.62, "elapsed_time": "7:17:20", "remaining_time": "8:01:04"}
|
||||
{"current_steps": 2025, "total_steps": 4242, "loss": 0.2311, "lr": 2.503977649640188e-05, "epoch": 3.34186622625929, "percentage": 47.74, "elapsed_time": "7:18:22", "remaining_time": "7:59:56"}
|
||||
{"current_steps": 2030, "total_steps": 4242, "loss": 0.2334, "lr": 2.496008474253349e-05, "epoch": 3.3501238645747318, "percentage": 47.85, "elapsed_time": "7:19:28", "remaining_time": "7:58:52"}
|
||||
{"current_steps": 2035, "total_steps": 4242, "loss": 0.2329, "lr": 2.4880308987769262e-05, "epoch": 3.3583815028901736, "percentage": 47.97, "elapsed_time": "7:20:29", "remaining_time": "7:57:43"}
|
||||
{"current_steps": 2040, "total_steps": 4242, "loss": 0.2356, "lr": 2.4800450583141527e-05, "epoch": 3.3666391412056154, "percentage": 48.09, "elapsed_time": "7:21:35", "remaining_time": "7:56:39"}
|
||||
{"current_steps": 2045, "total_steps": 4242, "loss": 0.232, "lr": 2.472051088108233e-05, "epoch": 3.374896779521057, "percentage": 48.21, "elapsed_time": "7:22:40", "remaining_time": "7:55:35"}
|
||||
{"current_steps": 2050, "total_steps": 4242, "loss": 0.2387, "lr": 2.4640491235400513e-05, "epoch": 3.383154417836499, "percentage": 48.33, "elapsed_time": "7:23:43", "remaining_time": "7:54:27"}
|
||||
{"current_steps": 2055, "total_steps": 4242, "loss": 0.2379, "lr": 2.4560393001258786e-05, "epoch": 3.3914120561519407, "percentage": 48.44, "elapsed_time": "7:24:51", "remaining_time": "7:53:25"}
|
||||
{"current_steps": 2060, "total_steps": 4242, "loss": 0.24, "lr": 2.44802175351508e-05, "epoch": 3.3996696944673825, "percentage": 48.56, "elapsed_time": "7:25:55", "remaining_time": "7:52:20"}
|
||||
{"current_steps": 2065, "total_steps": 4242, "loss": 0.2471, "lr": 2.4399966194878158e-05, "epoch": 3.4079273327828243, "percentage": 48.68, "elapsed_time": "7:27:01", "remaining_time": "7:51:16"}
|
||||
{"current_steps": 2070, "total_steps": 4242, "loss": 0.2308, "lr": 2.4319640339527393e-05, "epoch": 3.416184971098266, "percentage": 48.8, "elapsed_time": "7:28:04", "remaining_time": "7:50:09"}
|
||||
{"current_steps": 2075, "total_steps": 4242, "loss": 0.2468, "lr": 2.4239241329447016e-05, "epoch": 3.424442609413708, "percentage": 48.92, "elapsed_time": "7:29:10", "remaining_time": "7:49:04"}
|
||||
{"current_steps": 2080, "total_steps": 4242, "loss": 0.2289, "lr": 2.4158770526224417e-05, "epoch": 3.4327002477291493, "percentage": 49.03, "elapsed_time": "7:30:13", "remaining_time": "7:47:58"}
|
||||
{"current_steps": 2085, "total_steps": 4242, "loss": 0.2321, "lr": 2.4078229292662835e-05, "epoch": 3.440957886044591, "percentage": 49.15, "elapsed_time": "7:31:13", "remaining_time": "7:46:48"}
|
||||
{"current_steps": 2090, "total_steps": 4242, "loss": 0.2328, "lr": 2.399761899275828e-05, "epoch": 3.449215524360033, "percentage": 49.27, "elapsed_time": "7:32:19", "remaining_time": "7:45:44"}
|
||||
{"current_steps": 2095, "total_steps": 4242, "loss": 0.2368, "lr": 2.3916940991676417e-05, "epoch": 3.4574731626754747, "percentage": 49.39, "elapsed_time": "7:33:23", "remaining_time": "7:44:38"}
|
||||
{"current_steps": 2100, "total_steps": 4242, "loss": 0.2349, "lr": 2.3836196655729458e-05, "epoch": 3.4657308009909165, "percentage": 49.5, "elapsed_time": "7:34:27", "remaining_time": "7:43:33"}
|
||||
{"current_steps": 2105, "total_steps": 4242, "loss": 0.2362, "lr": 2.375538735235302e-05, "epoch": 3.4739884393063583, "percentage": 49.62, "elapsed_time": "7:35:29", "remaining_time": "7:42:24"}
|
||||
{"current_steps": 2110, "total_steps": 4242, "loss": 0.2285, "lr": 2.3674514450082984e-05, "epoch": 3.4822460776218, "percentage": 49.74, "elapsed_time": "7:36:37", "remaining_time": "7:41:22"}
|
||||
{"current_steps": 2115, "total_steps": 4242, "loss": 0.2244, "lr": 2.359357931853228e-05, "epoch": 3.490503715937242, "percentage": 49.86, "elapsed_time": "7:37:41", "remaining_time": "7:40:17"}
|
||||
{"current_steps": 2120, "total_steps": 4242, "loss": 0.2397, "lr": 2.3512583328367717e-05, "epoch": 3.4987613542526836, "percentage": 49.98, "elapsed_time": "7:38:50", "remaining_time": "7:39:16"}
|
||||
{"current_steps": 2125, "total_steps": 4242, "loss": 0.2347, "lr": 2.3431527851286782e-05, "epoch": 3.5070189925681254, "percentage": 50.09, "elapsed_time": "7:39:59", "remaining_time": "7:38:16"}
|
||||
{"current_steps": 2130, "total_steps": 4242, "loss": 0.2361, "lr": 2.3350414259994382e-05, "epoch": 3.5152766308835672, "percentage": 50.21, "elapsed_time": "7:41:01", "remaining_time": "7:37:07"}
|
||||
{"current_steps": 2135, "total_steps": 4242, "loss": 0.2433, "lr": 2.326924392817962e-05, "epoch": 3.523534269199009, "percentage": 50.33, "elapsed_time": "7:42:07", "remaining_time": "7:36:03"}
|
||||
{"current_steps": 2140, "total_steps": 4242, "loss": 0.2433, "lr": 2.318801823049251e-05, "epoch": 3.531791907514451, "percentage": 50.45, "elapsed_time": "7:43:13", "remaining_time": "7:35:00"}
|
||||
{"current_steps": 2145, "total_steps": 4242, "loss": 0.2303, "lr": 2.310673854252071e-05, "epoch": 3.5400495458298926, "percentage": 50.57, "elapsed_time": "7:44:12", "remaining_time": "7:33:49"}
|
||||
{"current_steps": 2150, "total_steps": 4242, "loss": 0.2307, "lr": 2.3025406240766233e-05, "epoch": 3.5483071841453344, "percentage": 50.68, "elapsed_time": "7:45:15", "remaining_time": "7:32:42"}
|
||||
{"current_steps": 2155, "total_steps": 4242, "loss": 0.2285, "lr": 2.2944022702622117e-05, "epoch": 3.556564822460776, "percentage": 50.8, "elapsed_time": "7:46:19", "remaining_time": "7:31:36"}
|
||||
{"current_steps": 2160, "total_steps": 4242, "loss": 0.2289, "lr": 2.286258930634912e-05, "epoch": 3.564822460776218, "percentage": 50.92, "elapsed_time": "7:47:19", "remaining_time": "7:30:26"}
|
||||
{"current_steps": 2165, "total_steps": 4242, "loss": 0.2321, "lr": 2.2781107431052346e-05, "epoch": 3.57308009909166, "percentage": 51.04, "elapsed_time": "7:48:24", "remaining_time": "7:29:22"}
|
||||
{"current_steps": 2170, "total_steps": 4242, "loss": 0.2289, "lr": 2.269957845665792e-05, "epoch": 3.5813377374071016, "percentage": 51.16, "elapsed_time": "7:49:28", "remaining_time": "7:28:16"}
|
||||
{"current_steps": 2175, "total_steps": 4242, "loss": 0.2266, "lr": 2.261800376388962e-05, "epoch": 3.5895953757225434, "percentage": 51.27, "elapsed_time": "7:50:29", "remaining_time": "7:27:07"}
|
||||
{"current_steps": 2180, "total_steps": 4242, "loss": 0.2374, "lr": 2.2536384734245455e-05, "epoch": 3.597853014037985, "percentage": 51.39, "elapsed_time": "7:51:31", "remaining_time": "7:26:00"}
|
||||
{"current_steps": 2185, "total_steps": 4242, "loss": 0.2408, "lr": 2.2454722749974315e-05, "epoch": 3.606110652353427, "percentage": 51.51, "elapsed_time": "7:52:31", "remaining_time": "7:24:50"}
|
||||
{"current_steps": 2190, "total_steps": 4242, "loss": 0.2334, "lr": 2.237301919405255e-05, "epoch": 3.6143682906688688, "percentage": 51.63, "elapsed_time": "7:53:32", "remaining_time": "7:23:41"}
|
||||
{"current_steps": 2195, "total_steps": 4242, "loss": 0.2299, "lr": 2.229127545016051e-05, "epoch": 3.6226259289843106, "percentage": 51.74, "elapsed_time": "7:54:32", "remaining_time": "7:22:32"}
|
||||
{"current_steps": 2200, "total_steps": 4242, "loss": 0.2251, "lr": 2.2209492902659183e-05, "epoch": 3.6308835672997524, "percentage": 51.86, "elapsed_time": "7:55:39", "remaining_time": "7:21:30"}
|
||||
{"current_steps": 2205, "total_steps": 4242, "loss": 0.2342, "lr": 2.2127672936566676e-05, "epoch": 3.639141205615194, "percentage": 51.98, "elapsed_time": "7:56:47", "remaining_time": "7:20:27"}
|
||||
{"current_steps": 2210, "total_steps": 4242, "loss": 0.235, "lr": 2.204581693753481e-05, "epoch": 3.647398843930636, "percentage": 52.1, "elapsed_time": "7:57:53", "remaining_time": "7:19:24"}
|
||||
{"current_steps": 2215, "total_steps": 4242, "loss": 0.2318, "lr": 2.196392629182565e-05, "epoch": 3.6556564822460778, "percentage": 52.22, "elapsed_time": "7:58:56", "remaining_time": "7:18:17"}
|
||||
{"current_steps": 2220, "total_steps": 4242, "loss": 0.2302, "lr": 2.1882002386287983e-05, "epoch": 3.6639141205615195, "percentage": 52.33, "elapsed_time": "8:00:04", "remaining_time": "7:17:15"}
|
||||
{"current_steps": 2225, "total_steps": 4242, "loss": 0.2295, "lr": 2.1800046608333893e-05, "epoch": 3.6721717588769613, "percentage": 52.45, "elapsed_time": "8:01:05", "remaining_time": "7:16:07"}
|
||||
{"current_steps": 2230, "total_steps": 4242, "loss": 0.2367, "lr": 2.171806034591522e-05, "epoch": 3.6804293971924027, "percentage": 52.57, "elapsed_time": "8:02:17", "remaining_time": "7:15:08"}
|
||||
{"current_steps": 2235, "total_steps": 4242, "loss": 0.2384, "lr": 2.163604498750008e-05, "epoch": 3.6886870355078445, "percentage": 52.69, "elapsed_time": "8:03:22", "remaining_time": "7:14:03"}
|
||||
{"current_steps": 2240, "total_steps": 4242, "loss": 0.241, "lr": 2.1554001922049333e-05, "epoch": 3.6969446738232863, "percentage": 52.81, "elapsed_time": "8:04:31", "remaining_time": "7:13:02"}
|
||||
{"current_steps": 2245, "total_steps": 4242, "loss": 0.2387, "lr": 2.1471932538993063e-05, "epoch": 3.705202312138728, "percentage": 52.92, "elapsed_time": "8:05:38", "remaining_time": "7:11:59"}
|
||||
{"current_steps": 2250, "total_steps": 4242, "loss": 0.2306, "lr": 2.1389838228207068e-05, "epoch": 3.71345995045417, "percentage": 53.04, "elapsed_time": "8:06:45", "remaining_time": "7:10:56"}
|
||||
{"current_steps": 2255, "total_steps": 4242, "loss": 0.2401, "lr": 2.130772037998929e-05, "epoch": 3.7217175887696117, "percentage": 53.16, "elapsed_time": "8:07:47", "remaining_time": "7:09:49"}
|
||||
{"current_steps": 2260, "total_steps": 4242, "loss": 0.2274, "lr": 2.122558038503631e-05, "epoch": 3.7299752270850535, "percentage": 53.28, "elapsed_time": "8:08:48", "remaining_time": "7:08:41"}
|
||||
{"current_steps": 2265, "total_steps": 4242, "loss": 0.2255, "lr": 2.114341963441974e-05, "epoch": 3.7382328654004953, "percentage": 53.39, "elapsed_time": "8:09:55", "remaining_time": "7:07:38"}
|
||||
{"current_steps": 2270, "total_steps": 4242, "loss": 0.2345, "lr": 2.106123951956271e-05, "epoch": 3.746490503715937, "percentage": 53.51, "elapsed_time": "8:11:00", "remaining_time": "7:06:33"}
|
||||
{"current_steps": 2275, "total_steps": 4242, "loss": 0.2286, "lr": 2.0979041432216318e-05, "epoch": 3.754748142031379, "percentage": 53.63, "elapsed_time": "8:12:01", "remaining_time": "7:05:24"}
|
||||
{"current_steps": 2280, "total_steps": 4242, "loss": 0.2429, "lr": 2.0896826764435984e-05, "epoch": 3.7630057803468207, "percentage": 53.75, "elapsed_time": "8:13:09", "remaining_time": "7:04:22"}
|
||||
{"current_steps": 2285, "total_steps": 4242, "loss": 0.2347, "lr": 2.0814596908557966e-05, "epoch": 3.7712634186622624, "percentage": 53.87, "elapsed_time": "8:14:15", "remaining_time": "7:03:19"}
|
||||
{"current_steps": 2290, "total_steps": 4242, "loss": 0.2268, "lr": 2.073235325717571e-05, "epoch": 3.7795210569777042, "percentage": 53.98, "elapsed_time": "8:15:26", "remaining_time": "7:02:19"}
|
||||
{"current_steps": 2295, "total_steps": 4242, "loss": 0.2316, "lr": 2.0650097203116308e-05, "epoch": 3.787778695293146, "percentage": 54.1, "elapsed_time": "8:16:28", "remaining_time": "7:01:11"}
|
||||
{"current_steps": 2300, "total_steps": 4242, "loss": 0.2433, "lr": 2.0567830139416895e-05, "epoch": 3.796036333608588, "percentage": 54.22, "elapsed_time": "8:17:35", "remaining_time": "7:00:08"}
|
||||
{"current_steps": 2305, "total_steps": 4242, "loss": 0.2328, "lr": 2.0485553459301058e-05, "epoch": 3.8042939719240296, "percentage": 54.34, "elapsed_time": "8:18:39", "remaining_time": "6:59:02"}
|
||||
{"current_steps": 2310, "total_steps": 4242, "loss": 0.2332, "lr": 2.0403268556155237e-05, "epoch": 3.8125516102394714, "percentage": 54.46, "elapsed_time": "8:19:43", "remaining_time": "6:57:57"}
|
||||
{"current_steps": 2315, "total_steps": 4242, "loss": 0.229, "lr": 2.0320976823505135e-05, "epoch": 3.820809248554913, "percentage": 54.57, "elapsed_time": "8:20:44", "remaining_time": "6:56:48"}
|
||||
{"current_steps": 2320, "total_steps": 4242, "loss": 0.2341, "lr": 2.0238679654992115e-05, "epoch": 3.829066886870355, "percentage": 54.69, "elapsed_time": "8:21:48", "remaining_time": "6:55:43"}
|
||||
{"current_steps": 2325, "total_steps": 4242, "loss": 0.2274, "lr": 2.0156378444349597e-05, "epoch": 3.837324525185797, "percentage": 54.81, "elapsed_time": "8:22:50", "remaining_time": "6:54:35"}
|
||||
{"current_steps": 2330, "total_steps": 4242, "loss": 0.2331, "lr": 2.0074074585379466e-05, "epoch": 3.8455821635012386, "percentage": 54.93, "elapsed_time": "8:23:54", "remaining_time": "6:53:30"}
|
||||
{"current_steps": 2335, "total_steps": 4242, "loss": 0.2335, "lr": 1.999176947192844e-05, "epoch": 3.8538398018166804, "percentage": 55.04, "elapsed_time": "8:25:01", "remaining_time": "6:52:27"}
|
||||
{"current_steps": 2340, "total_steps": 4242, "loss": 0.2371, "lr": 1.9909464497864487e-05, "epoch": 3.862097440132122, "percentage": 55.16, "elapsed_time": "8:25:58", "remaining_time": "6:51:15"}
|
||||
{"current_steps": 2345, "total_steps": 4242, "loss": 0.227, "lr": 1.9827161057053245e-05, "epoch": 3.870355078447564, "percentage": 55.28, "elapsed_time": "8:26:57", "remaining_time": "6:50:06"}
|
||||
{"current_steps": 2350, "total_steps": 4242, "loss": 0.2294, "lr": 1.9744860543334324e-05, "epoch": 3.878612716763006, "percentage": 55.4, "elapsed_time": "8:27:57", "remaining_time": "6:48:57"}
|
||||
{"current_steps": 2355, "total_steps": 4242, "loss": 0.2326, "lr": 1.966256435049782e-05, "epoch": 3.8868703550784476, "percentage": 55.52, "elapsed_time": "8:29:01", "remaining_time": "6:47:51"}
|
||||
{"current_steps": 2360, "total_steps": 4242, "loss": 0.226, "lr": 1.9580273872260623e-05, "epoch": 3.8951279933938894, "percentage": 55.63, "elapsed_time": "8:30:06", "remaining_time": "6:46:47"}
|
||||
{"current_steps": 2365, "total_steps": 4242, "loss": 0.2331, "lr": 1.949799050224286e-05, "epoch": 3.903385631709331, "percentage": 55.75, "elapsed_time": "8:31:12", "remaining_time": "6:45:43"}
|
||||
{"current_steps": 2370, "total_steps": 4242, "loss": 0.238, "lr": 1.9415715633944264e-05, "epoch": 3.911643270024773, "percentage": 55.87, "elapsed_time": "8:32:21", "remaining_time": "6:44:41"}
|
||||
{"current_steps": 2375, "total_steps": 4242, "loss": 0.2347, "lr": 1.933345066072059e-05, "epoch": 3.9199009083402148, "percentage": 55.99, "elapsed_time": "8:33:28", "remaining_time": "6:43:38"}
|
||||
{"current_steps": 2380, "total_steps": 4242, "loss": 0.2369, "lr": 1.9251196975760036e-05, "epoch": 3.9281585466556566, "percentage": 56.11, "elapsed_time": "8:34:41", "remaining_time": "6:42:40"}
|
||||
{"current_steps": 2385, "total_steps": 4242, "loss": 0.2216, "lr": 1.9168955972059597e-05, "epoch": 3.9364161849710984, "percentage": 56.22, "elapsed_time": "8:35:45", "remaining_time": "6:41:35"}
|
||||
{"current_steps": 2390, "total_steps": 4242, "loss": 0.2354, "lr": 1.9086729042401525e-05, "epoch": 3.94467382328654, "percentage": 56.34, "elapsed_time": "8:36:49", "remaining_time": "6:40:28"}
|
||||
{"current_steps": 2395, "total_steps": 4242, "loss": 0.2274, "lr": 1.900451757932973e-05, "epoch": 3.952931461601982, "percentage": 56.46, "elapsed_time": "8:37:52", "remaining_time": "6:39:22"}
|
||||
{"current_steps": 2400, "total_steps": 4242, "loss": 0.2276, "lr": 1.8922322975126172e-05, "epoch": 3.9611890999174237, "percentage": 56.58, "elapsed_time": "8:38:53", "remaining_time": "6:38:14"}
|
||||
{"current_steps": 2405, "total_steps": 4242, "loss": 0.229, "lr": 1.884014662178731e-05, "epoch": 3.9694467382328655, "percentage": 56.69, "elapsed_time": "8:40:01", "remaining_time": "6:37:12"}
|
||||
{"current_steps": 2410, "total_steps": 4242, "loss": 0.2262, "lr": 1.8757989911000512e-05, "epoch": 3.9777043765483073, "percentage": 56.81, "elapsed_time": "8:40:58", "remaining_time": "6:36:01"}
|
||||
{"current_steps": 2415, "total_steps": 4242, "loss": 0.2365, "lr": 1.8675854234120506e-05, "epoch": 3.985962014863749, "percentage": 56.93, "elapsed_time": "8:42:03", "remaining_time": "6:34:56"}
|
||||
{"current_steps": 2420, "total_steps": 4242, "loss": 0.233, "lr": 1.8593740982145775e-05, "epoch": 3.994219653179191, "percentage": 57.05, "elapsed_time": "8:43:07", "remaining_time": "6:33:51"}
|
||||
{"current_steps": 2425, "total_steps": 4242, "loss": 0.224, "lr": 1.8511651545695057e-05, "epoch": 4.001651527663088, "percentage": 57.17, "elapsed_time": "8:44:00", "remaining_time": "6:32:37"}
|
||||
{"current_steps": 2430, "total_steps": 4242, "loss": 0.2297, "lr": 1.842958731498376e-05, "epoch": 4.00990916597853, "percentage": 57.28, "elapsed_time": "8:44:57", "remaining_time": "6:31:26"}
|
||||
{"current_steps": 2435, "total_steps": 4242, "loss": 0.2292, "lr": 1.8347549679800397e-05, "epoch": 4.018166804293972, "percentage": 57.4, "elapsed_time": "8:45:58", "remaining_time": "6:30:19"}
|
||||
{"current_steps": 2440, "total_steps": 4242, "loss": 0.2185, "lr": 1.826554002948311e-05, "epoch": 4.026424442609414, "percentage": 57.52, "elapsed_time": "8:47:05", "remaining_time": "6:29:16"}
|
||||
{"current_steps": 2445, "total_steps": 4242, "loss": 0.2209, "lr": 1.818355975289608e-05, "epoch": 4.034682080924855, "percentage": 57.64, "elapsed_time": "8:48:05", "remaining_time": "6:28:08"}
|
||||
{"current_steps": 2450, "total_steps": 4242, "loss": 0.2263, "lr": 1.810161023840607e-05, "epoch": 4.042939719240297, "percentage": 57.76, "elapsed_time": "8:49:18", "remaining_time": "6:27:08"}
|
||||
{"current_steps": 2455, "total_steps": 4242, "loss": 0.2244, "lr": 1.8019692873858824e-05, "epoch": 4.051197357555739, "percentage": 57.87, "elapsed_time": "8:50:19", "remaining_time": "6:26:01"}
|
||||
{"current_steps": 2460, "total_steps": 4242, "loss": 0.2211, "lr": 1.793780904655565e-05, "epoch": 4.059454995871181, "percentage": 57.99, "elapsed_time": "8:51:21", "remaining_time": "6:24:54"}
|
||||
{"current_steps": 2465, "total_steps": 4242, "loss": 0.2166, "lr": 1.785596014322989e-05, "epoch": 4.0677126341866225, "percentage": 58.11, "elapsed_time": "8:52:29", "remaining_time": "6:23:52"}
|
||||
{"current_steps": 2470, "total_steps": 4242, "loss": 0.2207, "lr": 1.7774147550023414e-05, "epoch": 4.075970272502064, "percentage": 58.23, "elapsed_time": "8:53:26", "remaining_time": "6:22:41"}
|
||||
{"current_steps": 2475, "total_steps": 4242, "loss": 0.2174, "lr": 1.7692372652463178e-05, "epoch": 4.084227910817506, "percentage": 58.35, "elapsed_time": "8:54:30", "remaining_time": "6:21:36"}
|
||||
{"current_steps": 2480, "total_steps": 4242, "loss": 0.2219, "lr": 1.7610636835437763e-05, "epoch": 4.092485549132948, "percentage": 58.46, "elapsed_time": "8:55:34", "remaining_time": "6:20:31"}
|
||||
{"current_steps": 2485, "total_steps": 4242, "loss": 0.2198, "lr": 1.752894148317388e-05, "epoch": 4.10074318744839, "percentage": 58.58, "elapsed_time": "8:56:33", "remaining_time": "6:19:21"}
|
||||
{"current_steps": 2490, "total_steps": 4242, "loss": 0.2227, "lr": 1.744728797921297e-05, "epoch": 4.1090008257638315, "percentage": 58.7, "elapsed_time": "8:57:41", "remaining_time": "6:18:19"}
|
||||
{"current_steps": 2495, "total_steps": 4242, "loss": 0.2224, "lr": 1.7365677706387757e-05, "epoch": 4.117258464079273, "percentage": 58.82, "elapsed_time": "8:58:48", "remaining_time": "6:17:16"}
|
||||
{"current_steps": 2500, "total_steps": 4242, "loss": 0.2199, "lr": 1.7284112046798833e-05, "epoch": 4.125516102394715, "percentage": 58.93, "elapsed_time": "8:59:52", "remaining_time": "6:16:10"}
|
||||
{"current_steps": 2505, "total_steps": 4242, "loss": 0.2211, "lr": 1.7202592381791222e-05, "epoch": 4.133773740710157, "percentage": 59.05, "elapsed_time": "9:00:52", "remaining_time": "6:15:02"}
|
||||
{"current_steps": 2510, "total_steps": 4242, "loss": 0.2192, "lr": 1.712112009193105e-05, "epoch": 4.142031379025599, "percentage": 59.17, "elapsed_time": "9:01:49", "remaining_time": "6:13:52"}
|
||||
{"current_steps": 2515, "total_steps": 4242, "loss": 0.2311, "lr": 1.7039696556982105e-05, "epoch": 4.1502890173410405, "percentage": 59.29, "elapsed_time": "9:02:52", "remaining_time": "6:12:47"}
|
||||
{"current_steps": 2520, "total_steps": 4242, "loss": 0.2278, "lr": 1.6958323155882485e-05, "epoch": 4.158546655656482, "percentage": 59.41, "elapsed_time": "9:04:04", "remaining_time": "6:11:46"}
|
||||
{"current_steps": 2525, "total_steps": 4242, "loss": 0.2253, "lr": 1.687700126672128e-05, "epoch": 4.166804293971924, "percentage": 59.52, "elapsed_time": "9:05:09", "remaining_time": "6:10:42"}
|
||||
{"current_steps": 2530, "total_steps": 4242, "loss": 0.2175, "lr": 1.6795732266715188e-05, "epoch": 4.175061932287366, "percentage": 59.64, "elapsed_time": "9:06:12", "remaining_time": "6:09:36"}
|
||||
{"current_steps": 2535, "total_steps": 4242, "loss": 0.2199, "lr": 1.67145175321852e-05, "epoch": 4.183319570602808, "percentage": 59.76, "elapsed_time": "9:07:13", "remaining_time": "6:08:29"}
|
||||
{"current_steps": 2540, "total_steps": 4242, "loss": 0.2206, "lr": 1.6633358438533318e-05, "epoch": 4.1915772089182495, "percentage": 59.88, "elapsed_time": "9:08:21", "remaining_time": "6:07:26"}
|
||||
{"current_steps": 2545, "total_steps": 4242, "loss": 0.2278, "lr": 1.655225636021924e-05, "epoch": 4.199834847233691, "percentage": 60.0, "elapsed_time": "9:09:27", "remaining_time": "6:06:22"}
|
||||
{"current_steps": 2550, "total_steps": 4242, "loss": 0.2146, "lr": 1.6471212670737092e-05, "epoch": 4.208092485549133, "percentage": 60.11, "elapsed_time": "9:10:27", "remaining_time": "6:05:14"}
|
||||
{"current_steps": 2555, "total_steps": 4242, "loss": 0.23, "lr": 1.6390228742592153e-05, "epoch": 4.216350123864575, "percentage": 60.23, "elapsed_time": "9:11:35", "remaining_time": "6:04:12"}
|
||||
{"current_steps": 2560, "total_steps": 4242, "loss": 0.2225, "lr": 1.630930594727762e-05, "epoch": 4.224607762180017, "percentage": 60.35, "elapsed_time": "9:12:34", "remaining_time": "6:03:03"}
|
||||
{"current_steps": 2565, "total_steps": 4242, "loss": 0.2222, "lr": 1.6228445655251405e-05, "epoch": 4.2328654004954585, "percentage": 60.47, "elapsed_time": "9:13:40", "remaining_time": "6:01:59"}
|
||||
{"current_steps": 2570, "total_steps": 4242, "loss": 0.2208, "lr": 1.6147649235912874e-05, "epoch": 4.2411230388109, "percentage": 60.58, "elapsed_time": "9:14:44", "remaining_time": "6:00:54"}
|
||||
{"current_steps": 2575, "total_steps": 4242, "loss": 0.2202, "lr": 1.6066918057579693e-05, "epoch": 4.249380677126342, "percentage": 60.7, "elapsed_time": "9:15:46", "remaining_time": "5:59:48"}
|
||||
{"current_steps": 2580, "total_steps": 4242, "loss": 0.2198, "lr": 1.5986253487464674e-05, "epoch": 4.257638315441784, "percentage": 60.82, "elapsed_time": "9:16:49", "remaining_time": "5:58:42"}
|
||||
{"current_steps": 2585, "total_steps": 4242, "loss": 0.2216, "lr": 1.5905656891652555e-05, "epoch": 4.265895953757226, "percentage": 60.94, "elapsed_time": "9:17:59", "remaining_time": "5:57:40"}
|
||||
{"current_steps": 2590, "total_steps": 4242, "loss": 0.221, "lr": 1.5825129635076923e-05, "epoch": 4.274153592072667, "percentage": 61.06, "elapsed_time": "9:19:09", "remaining_time": "5:56:39"}
|
||||
{"current_steps": 2595, "total_steps": 4242, "loss": 0.2177, "lr": 1.5744673081497083e-05, "epoch": 4.282411230388109, "percentage": 61.17, "elapsed_time": "9:20:16", "remaining_time": "5:55:35"}
|
||||
{"current_steps": 2600, "total_steps": 4242, "loss": 0.2248, "lr": 1.5664288593474955e-05, "epoch": 4.290668868703551, "percentage": 61.29, "elapsed_time": "9:21:25", "remaining_time": "5:54:33"}
|
||||
{"current_steps": 2605, "total_steps": 4242, "loss": 0.2148, "lr": 1.558397753235198e-05, "epoch": 4.298926507018993, "percentage": 61.41, "elapsed_time": "9:22:31", "remaining_time": "5:53:29"}
|
||||
{"current_steps": 2610, "total_steps": 4242, "loss": 0.2178, "lr": 1.550374125822613e-05, "epoch": 4.307184145334435, "percentage": 61.53, "elapsed_time": "9:23:44", "remaining_time": "5:52:30"}
|
||||
{"current_steps": 2615, "total_steps": 4242, "loss": 0.2216, "lr": 1.54235811299288e-05, "epoch": 4.315441783649876, "percentage": 61.65, "elapsed_time": "9:24:53", "remaining_time": "5:51:28"}
|
||||
{"current_steps": 2620, "total_steps": 4242, "loss": 0.2235, "lr": 1.534349850500182e-05, "epoch": 4.323699421965318, "percentage": 61.76, "elapsed_time": "9:26:04", "remaining_time": "5:50:26"}
|
||||
{"current_steps": 2625, "total_steps": 4242, "loss": 0.2193, "lr": 1.52634947396745e-05, "epoch": 4.33195706028076, "percentage": 61.88, "elapsed_time": "9:27:19", "remaining_time": "5:49:28"}
|
||||
{"current_steps": 2630, "total_steps": 4242, "loss": 0.2241, "lr": 1.5183571188840622e-05, "epoch": 4.340214698596202, "percentage": 62.0, "elapsed_time": "9:28:31", "remaining_time": "5:48:27"}
|
||||
{"current_steps": 2635, "total_steps": 4242, "loss": 0.2225, "lr": 1.5103729206035482e-05, "epoch": 4.348472336911644, "percentage": 62.12, "elapsed_time": "9:29:31", "remaining_time": "5:47:19"}
|
||||
{"current_steps": 2640, "total_steps": 4242, "loss": 0.216, "lr": 1.5023970143413029e-05, "epoch": 4.356729975227085, "percentage": 62.23, "elapsed_time": "9:30:30", "remaining_time": "5:46:11"}
|
||||
{"current_steps": 2645, "total_steps": 4242, "loss": 0.2212, "lr": 1.4944295351722898e-05, "epoch": 4.364987613542527, "percentage": 62.35, "elapsed_time": "9:31:35", "remaining_time": "5:45:06"}
|
||||
{"current_steps": 2650, "total_steps": 4242, "loss": 0.2179, "lr": 1.486470618028759e-05, "epoch": 4.373245251857969, "percentage": 62.47, "elapsed_time": "9:32:41", "remaining_time": "5:44:02"}
|
||||
{"current_steps": 2655, "total_steps": 4242, "loss": 0.2204, "lr": 1.4785203976979565e-05, "epoch": 4.381502890173411, "percentage": 62.59, "elapsed_time": "9:33:50", "remaining_time": "5:43:00"}
|
||||
{"current_steps": 2660, "total_steps": 4242, "loss": 0.2176, "lr": 1.4705790088198461e-05, "epoch": 4.389760528488853, "percentage": 62.71, "elapsed_time": "9:34:56", "remaining_time": "5:41:56"}
|
||||
{"current_steps": 2665, "total_steps": 4242, "loss": 0.2238, "lr": 1.4626465858848293e-05, "epoch": 4.398018166804294, "percentage": 62.82, "elapsed_time": "9:36:06", "remaining_time": "5:40:54"}
|
||||
{"current_steps": 2670, "total_steps": 4242, "loss": 0.2267, "lr": 1.4547232632314624e-05, "epoch": 4.406275805119736, "percentage": 62.94, "elapsed_time": "9:37:10", "remaining_time": "5:39:48"}
|
||||
{"current_steps": 2675, "total_steps": 4242, "loss": 0.2294, "lr": 1.4468091750441875e-05, "epoch": 4.414533443435178, "percentage": 63.06, "elapsed_time": "9:38:11", "remaining_time": "5:38:42"}
|
||||
{"current_steps": 2680, "total_steps": 4242, "loss": 0.2165, "lr": 1.4389044553510572e-05, "epoch": 4.422791081750619, "percentage": 63.18, "elapsed_time": "9:39:19", "remaining_time": "5:37:38"}
|
||||
{"current_steps": 2685, "total_steps": 4242, "loss": 0.2249, "lr": 1.431009238021465e-05, "epoch": 4.431048720066061, "percentage": 63.3, "elapsed_time": "9:40:23", "remaining_time": "5:36:33"}
|
||||
{"current_steps": 2690, "total_steps": 4242, "loss": 0.217, "lr": 1.423123656763877e-05, "epoch": 4.4393063583815024, "percentage": 63.41, "elapsed_time": "9:41:23", "remaining_time": "5:35:26"}
|
||||
{"current_steps": 2695, "total_steps": 4242, "loss": 0.2174, "lr": 1.4152478451235717e-05, "epoch": 4.447563996696944, "percentage": 63.53, "elapsed_time": "9:42:25", "remaining_time": "5:34:19"}
|
||||
{"current_steps": 2700, "total_steps": 4242, "loss": 0.2235, "lr": 1.4073819364803729e-05, "epoch": 4.455821635012386, "percentage": 63.65, "elapsed_time": "9:43:22", "remaining_time": "5:33:10"}
|
||||
{"current_steps": 2705, "total_steps": 4242, "loss": 0.2343, "lr": 1.399526064046394e-05, "epoch": 4.464079273327828, "percentage": 63.77, "elapsed_time": "9:44:23", "remaining_time": "5:32:03"}
|
||||
{"current_steps": 2710, "total_steps": 4242, "loss": 0.2098, "lr": 1.3916803608637818e-05, "epoch": 4.47233691164327, "percentage": 63.88, "elapsed_time": "9:45:32", "remaining_time": "5:31:00"}
|
||||
{"current_steps": 2715, "total_steps": 4242, "loss": 0.224, "lr": 1.383844959802464e-05, "epoch": 4.480594549958711, "percentage": 64.0, "elapsed_time": "9:46:36", "remaining_time": "5:29:55"}
|
||||
{"current_steps": 2720, "total_steps": 4242, "loss": 0.2281, "lr": 1.376019993557895e-05, "epoch": 4.488852188274153, "percentage": 64.12, "elapsed_time": "9:47:35", "remaining_time": "5:28:47"}
|
||||
{"current_steps": 2725, "total_steps": 4242, "loss": 0.2174, "lr": 1.3682055946488153e-05, "epoch": 4.497109826589595, "percentage": 64.24, "elapsed_time": "9:48:37", "remaining_time": "5:27:40"}
|
||||
{"current_steps": 2730, "total_steps": 4242, "loss": 0.2239, "lr": 1.360401895415001e-05, "epoch": 4.505367464905037, "percentage": 64.36, "elapsed_time": "9:49:38", "remaining_time": "5:26:34"}
|
||||
{"current_steps": 2735, "total_steps": 4242, "loss": 0.2191, "lr": 1.352609028015027e-05, "epoch": 4.513625103220479, "percentage": 64.47, "elapsed_time": "9:50:40", "remaining_time": "5:25:27"}
|
||||
{"current_steps": 2740, "total_steps": 4242, "loss": 0.2157, "lr": 1.3448271244240252e-05, "epoch": 4.52188274153592, "percentage": 64.59, "elapsed_time": "9:51:42", "remaining_time": "5:24:21"}
|
||||
{"current_steps": 2745, "total_steps": 4242, "loss": 0.2177, "lr": 1.3370563164314523e-05, "epoch": 4.530140379851362, "percentage": 64.71, "elapsed_time": "9:52:39", "remaining_time": "5:23:12"}
|
||||
{"current_steps": 2750, "total_steps": 4242, "loss": 0.2282, "lr": 1.3292967356388578e-05, "epoch": 4.538398018166804, "percentage": 64.83, "elapsed_time": "9:53:44", "remaining_time": "5:22:07"}
|
||||
{"current_steps": 2755, "total_steps": 4242, "loss": 0.2279, "lr": 1.321548513457652e-05, "epoch": 4.546655656482246, "percentage": 64.95, "elapsed_time": "9:54:55", "remaining_time": "5:21:06"}
|
||||
{"current_steps": 2760, "total_steps": 4242, "loss": 0.2195, "lr": 1.3138117811068845e-05, "epoch": 4.554913294797688, "percentage": 65.06, "elapsed_time": "9:56:08", "remaining_time": "5:20:06"}
|
||||
{"current_steps": 2765, "total_steps": 4242, "loss": 0.2215, "lr": 1.3060866696110213e-05, "epoch": 4.563170933113129, "percentage": 65.18, "elapsed_time": "9:57:10", "remaining_time": "5:19:00"}
|
||||
{"current_steps": 2770, "total_steps": 4242, "loss": 0.2319, "lr": 1.298373309797722e-05, "epoch": 4.571428571428571, "percentage": 65.3, "elapsed_time": "9:58:12", "remaining_time": "5:17:53"}
|
||||
{"current_steps": 2775, "total_steps": 4242, "loss": 0.2214, "lr": 1.290671832295629e-05, "epoch": 4.579686209744013, "percentage": 65.42, "elapsed_time": "9:59:19", "remaining_time": "5:16:50"}
|
||||
{"current_steps": 2780, "total_steps": 4242, "loss": 0.2279, "lr": 1.2829823675321535e-05, "epoch": 4.587943848059455, "percentage": 65.54, "elapsed_time": "10:00:27", "remaining_time": "5:15:46"}
|
||||
{"current_steps": 2785, "total_steps": 4242, "loss": 0.2157, "lr": 1.275305045731266e-05, "epoch": 4.596201486374897, "percentage": 65.65, "elapsed_time": "10:01:30", "remaining_time": "5:14:40"}
|
||||
{"current_steps": 2790, "total_steps": 4242, "loss": 0.2206, "lr": 1.26763999691129e-05, "epoch": 4.604459124690338, "percentage": 65.77, "elapsed_time": "10:02:30", "remaining_time": "5:13:33"}
|
||||
{"current_steps": 2795, "total_steps": 4242, "loss": 0.2253, "lr": 1.259987350882704e-05, "epoch": 4.61271676300578, "percentage": 65.89, "elapsed_time": "10:03:35", "remaining_time": "5:12:28"}
|
||||
{"current_steps": 2800, "total_steps": 4242, "loss": 0.2167, "lr": 1.252347237245939e-05, "epoch": 4.620974401321222, "percentage": 66.01, "elapsed_time": "10:04:43", "remaining_time": "5:11:25"}
|
||||
{"current_steps": 2805, "total_steps": 4242, "loss": 0.221, "lr": 1.2447197853891848e-05, "epoch": 4.629232039636664, "percentage": 66.12, "elapsed_time": "10:05:47", "remaining_time": "5:10:21"}
|
||||
{"current_steps": 2810, "total_steps": 4242, "loss": 0.2234, "lr": 1.237105124486201e-05, "epoch": 4.6374896779521055, "percentage": 66.24, "elapsed_time": "10:06:54", "remaining_time": "5:09:16"}
|
||||
{"current_steps": 2815, "total_steps": 4242, "loss": 0.2297, "lr": 1.2295033834941266e-05, "epoch": 4.645747316267547, "percentage": 66.36, "elapsed_time": "10:08:01", "remaining_time": "5:08:13"}
|
||||
{"current_steps": 2820, "total_steps": 4242, "loss": 0.2252, "lr": 1.2219146911512958e-05, "epoch": 4.654004954582989, "percentage": 66.48, "elapsed_time": "10:09:05", "remaining_time": "5:07:08"}
|
||||
{"current_steps": 2825, "total_steps": 4242, "loss": 0.2201, "lr": 1.2143391759750607e-05, "epoch": 4.662262592898431, "percentage": 66.6, "elapsed_time": "10:10:10", "remaining_time": "5:06:03"}
|
||||
{"current_steps": 2830, "total_steps": 4242, "loss": 0.2229, "lr": 1.206776966259613e-05, "epoch": 4.670520231213873, "percentage": 66.71, "elapsed_time": "10:11:19", "remaining_time": "5:05:00"}
|
||||
{"current_steps": 2835, "total_steps": 4242, "loss": 0.2247, "lr": 1.1992281900738121e-05, "epoch": 4.6787778695293145, "percentage": 66.83, "elapsed_time": "10:12:29", "remaining_time": "5:03:58"}
|
||||
{"current_steps": 2840, "total_steps": 4242, "loss": 0.222, "lr": 1.1916929752590126e-05, "epoch": 4.687035507844756, "percentage": 66.95, "elapsed_time": "10:13:31", "remaining_time": "5:02:52"}
|
||||
{"current_steps": 2845, "total_steps": 4242, "loss": 0.226, "lr": 1.1841714494269051e-05, "epoch": 4.695293146160198, "percentage": 67.07, "elapsed_time": "10:14:33", "remaining_time": "5:01:46"}
|
||||
{"current_steps": 2850, "total_steps": 4242, "loss": 0.2167, "lr": 1.1766637399573517e-05, "epoch": 4.70355078447564, "percentage": 67.19, "elapsed_time": "10:15:44", "remaining_time": "5:00:44"}
|
||||
{"current_steps": 2855, "total_steps": 4242, "loss": 0.2304, "lr": 1.1691699739962275e-05, "epoch": 4.711808422791082, "percentage": 67.3, "elapsed_time": "10:16:48", "remaining_time": "4:59:39"}
|
||||
{"current_steps": 2860, "total_steps": 4242, "loss": 0.226, "lr": 1.1616902784532711e-05, "epoch": 4.7200660611065235, "percentage": 67.42, "elapsed_time": "10:17:53", "remaining_time": "4:58:34"}
|
||||
{"current_steps": 2865, "total_steps": 4242, "loss": 0.2181, "lr": 1.1542247799999328e-05, "epoch": 4.728323699421965, "percentage": 67.54, "elapsed_time": "10:18:59", "remaining_time": "4:57:30"}
|
||||
{"current_steps": 2870, "total_steps": 4242, "loss": 0.2219, "lr": 1.146773605067228e-05, "epoch": 4.736581337737407, "percentage": 67.66, "elapsed_time": "10:19:59", "remaining_time": "4:56:23"}
|
||||
{"current_steps": 2875, "total_steps": 4242, "loss": 0.2113, "lr": 1.1393368798436007e-05, "epoch": 4.744838976052849, "percentage": 67.77, "elapsed_time": "10:21:05", "remaining_time": "4:55:19"}
|
||||
{"current_steps": 2880, "total_steps": 4242, "loss": 0.2276, "lr": 1.1319147302727827e-05, "epoch": 4.753096614368291, "percentage": 67.89, "elapsed_time": "10:22:09", "remaining_time": "4:54:13"}
|
||||
{"current_steps": 2885, "total_steps": 4242, "loss": 0.2243, "lr": 1.1245072820516606e-05, "epoch": 4.7613542526837325, "percentage": 68.01, "elapsed_time": "10:23:13", "remaining_time": "4:53:08"}
|
||||
{"current_steps": 2890, "total_steps": 4242, "loss": 0.221, "lr": 1.1171146606281482e-05, "epoch": 4.769611890999174, "percentage": 68.13, "elapsed_time": "10:24:20", "remaining_time": "4:52:04"}
|
||||
{"current_steps": 2895, "total_steps": 4242, "loss": 0.229, "lr": 1.1097369911990637e-05, "epoch": 4.777869529314616, "percentage": 68.25, "elapsed_time": "10:25:28", "remaining_time": "4:51:01"}
|
||||
{"current_steps": 2900, "total_steps": 4242, "loss": 0.2237, "lr": 1.1023743987080064e-05, "epoch": 4.786127167630058, "percentage": 68.36, "elapsed_time": "10:26:32", "remaining_time": "4:49:56"}
|
||||
{"current_steps": 2905, "total_steps": 4242, "loss": 0.2258, "lr": 1.0950270078432412e-05, "epoch": 4.7943848059455, "percentage": 68.48, "elapsed_time": "10:27:38", "remaining_time": "4:48:52"}
|
||||
{"current_steps": 2910, "total_steps": 4242, "loss": 0.2254, "lr": 1.0876949430355904e-05, "epoch": 4.8026424442609414, "percentage": 68.6, "elapsed_time": "10:28:42", "remaining_time": "4:47:46"}
|
||||
{"current_steps": 2915, "total_steps": 4242, "loss": 0.2218, "lr": 1.08037832845632e-05, "epoch": 4.810900082576383, "percentage": 68.72, "elapsed_time": "10:29:48", "remaining_time": "4:46:42"}
|
||||
{"current_steps": 2920, "total_steps": 4242, "loss": 0.2209, "lr": 1.0730772880150445e-05, "epoch": 4.819157720891825, "percentage": 68.84, "elapsed_time": "10:30:59", "remaining_time": "4:45:40"}
|
||||
{"current_steps": 2925, "total_steps": 4242, "loss": 0.2248, "lr": 1.0657919453576213e-05, "epoch": 4.827415359207267, "percentage": 68.95, "elapsed_time": "10:32:00", "remaining_time": "4:44:33"}
|
||||
{"current_steps": 2930, "total_steps": 4242, "loss": 0.2186, "lr": 1.0585224238640619e-05, "epoch": 4.835672997522709, "percentage": 69.07, "elapsed_time": "10:32:59", "remaining_time": "4:43:26"}
|
||||
{"current_steps": 2935, "total_steps": 4242, "loss": 0.2214, "lr": 1.0512688466464404e-05, "epoch": 4.84393063583815, "percentage": 69.19, "elapsed_time": "10:34:00", "remaining_time": "4:42:20"}
|
||||
{"current_steps": 2940, "total_steps": 4242, "loss": 0.2228, "lr": 1.0440313365468077e-05, "epoch": 4.852188274153592, "percentage": 69.31, "elapsed_time": "10:35:06", "remaining_time": "4:41:15"}
|
||||
{"current_steps": 2945, "total_steps": 4242, "loss": 0.2247, "lr": 1.0368100161351116e-05, "epoch": 4.860445912469034, "percentage": 69.42, "elapsed_time": "10:36:09", "remaining_time": "4:40:10"}
|
||||
{"current_steps": 2950, "total_steps": 4242, "loss": 0.2164, "lr": 1.0296050077071238e-05, "epoch": 4.868703550784476, "percentage": 69.54, "elapsed_time": "10:37:10", "remaining_time": "4:39:03"}
|
||||
{"current_steps": 2955, "total_steps": 4242, "loss": 0.2166, "lr": 1.0224164332823632e-05, "epoch": 4.876961189099918, "percentage": 69.66, "elapsed_time": "10:38:09", "remaining_time": "4:37:56"}
|
||||
{"current_steps": 2960, "total_steps": 4242, "loss": 0.2223, "lr": 1.015244414602035e-05, "epoch": 4.885218827415359, "percentage": 69.78, "elapsed_time": "10:39:14", "remaining_time": "4:36:51"}
|
||||
{"current_steps": 2965, "total_steps": 4242, "loss": 0.2225, "lr": 1.0080890731269674e-05, "epoch": 4.893476465730801, "percentage": 69.9, "elapsed_time": "10:40:20", "remaining_time": "4:35:47"}
|
||||
{"current_steps": 2970, "total_steps": 4242, "loss": 0.2229, "lr": 1.0009505300355515e-05, "epoch": 4.901734104046243, "percentage": 70.01, "elapsed_time": "10:41:19", "remaining_time": "4:34:40"}
|
||||
{"current_steps": 2975, "total_steps": 4242, "loss": 0.2223, "lr": 9.938289062216916e-06, "epoch": 4.909991742361685, "percentage": 70.13, "elapsed_time": "10:42:19", "remaining_time": "4:33:33"}
|
||||
{"current_steps": 2980, "total_steps": 4242, "loss": 0.2237, "lr": 9.867243222927593e-06, "epoch": 4.918249380677127, "percentage": 70.25, "elapsed_time": "10:43:28", "remaining_time": "4:32:30"}
|
||||
{"current_steps": 2985, "total_steps": 4242, "loss": 0.2269, "lr": 9.796368985675497e-06, "epoch": 4.926507018992568, "percentage": 70.37, "elapsed_time": "10:44:32", "remaining_time": "4:31:25"}
|
||||
{"current_steps": 2990, "total_steps": 4242, "loss": 0.2172, "lr": 9.725667550742408e-06, "epoch": 4.93476465730801, "percentage": 70.49, "elapsed_time": "10:45:41", "remaining_time": "4:30:22"}
|
||||
{"current_steps": 2995, "total_steps": 4242, "loss": 0.2267, "lr": 9.655140115483663e-06, "epoch": 4.943022295623452, "percentage": 70.6, "elapsed_time": "10:46:45", "remaining_time": "4:29:17"}
|
||||
{"current_steps": 3000, "total_steps": 4242, "loss": 0.2216, "lr": 9.584787874307828e-06, "epoch": 4.951279933938894, "percentage": 70.72, "elapsed_time": "10:47:53", "remaining_time": "4:28:13"}
|
||||
{"current_steps": 3005, "total_steps": 4242, "loss": 0.2259, "lr": 9.514612018656493e-06, "epoch": 4.959537572254336, "percentage": 70.84, "elapsed_time": "10:49:37", "remaining_time": "4:27:24"}
|
||||
{"current_steps": 3010, "total_steps": 4242, "loss": 0.2195, "lr": 9.444613736984107e-06, "epoch": 4.967795210569777, "percentage": 70.96, "elapsed_time": "10:50:43", "remaining_time": "4:26:20"}
|
||||
{"current_steps": 3015, "total_steps": 4242, "loss": 0.2232, "lr": 9.374794214737828e-06, "epoch": 4.976052848885219, "percentage": 71.07, "elapsed_time": "10:51:53", "remaining_time": "4:25:17"}
|
||||
{"current_steps": 3020, "total_steps": 4242, "loss": 0.2218, "lr": 9.305154634337466e-06, "epoch": 4.984310487200661, "percentage": 71.19, "elapsed_time": "10:53:02", "remaining_time": "4:24:14"}
|
||||
{"current_steps": 3025, "total_steps": 4242, "loss": 0.23, "lr": 9.235696175155429e-06, "epoch": 4.992568125516103, "percentage": 71.31, "elapsed_time": "10:54:05", "remaining_time": "4:23:09"}
|
||||
{"current_steps": 3030, "total_steps": 4242, "loss": 0.2217, "lr": 9.166420013496778e-06, "epoch": 5.0, "percentage": 71.43, "elapsed_time": "10:55:02", "remaining_time": "4:22:00"}
|
||||
{"current_steps": 3035, "total_steps": 4242, "loss": 0.2083, "lr": 9.097327322579309e-06, "epoch": 5.008257638315442, "percentage": 71.55, "elapsed_time": "10:56:01", "remaining_time": "4:20:53"}
|
||||
{"current_steps": 3040, "total_steps": 4242, "loss": 0.2081, "lr": 9.028419272513649e-06, "epoch": 5.016515276630884, "percentage": 71.66, "elapsed_time": "10:57:09", "remaining_time": "4:19:50"}
|
||||
{"current_steps": 3045, "total_steps": 4242, "loss": 0.2159, "lr": 8.959697030283483e-06, "epoch": 5.024772914946325, "percentage": 71.78, "elapsed_time": "10:58:23", "remaining_time": "4:18:48"}
|
||||
{"current_steps": 3050, "total_steps": 4242, "loss": 0.2155, "lr": 8.891161759725772e-06, "epoch": 5.033030553261767, "percentage": 71.9, "elapsed_time": "10:59:25", "remaining_time": "4:17:42"}
|
||||
{"current_steps": 3055, "total_steps": 4242, "loss": 0.2076, "lr": 8.822814621511026e-06, "epoch": 5.041288191577209, "percentage": 72.02, "elapsed_time": "11:00:35", "remaining_time": "4:16:40"}
|
||||
{"current_steps": 3060, "total_steps": 4242, "loss": 0.216, "lr": 8.754656773123662e-06, "epoch": 5.049545829892651, "percentage": 72.14, "elapsed_time": "11:01:34", "remaining_time": "4:15:32"}
|
||||
{"current_steps": 3065, "total_steps": 4242, "loss": 0.2112, "lr": 8.686689368842419e-06, "epoch": 5.057803468208093, "percentage": 72.25, "elapsed_time": "11:02:39", "remaining_time": "4:14:28"}
|
||||
{"current_steps": 3070, "total_steps": 4242, "loss": 0.2099, "lr": 8.61891355972079e-06, "epoch": 5.066061106523534, "percentage": 72.37, "elapsed_time": "11:03:42", "remaining_time": "4:13:22"}
|
||||
{"current_steps": 3075, "total_steps": 4242, "loss": 0.219, "lr": 8.551330493567517e-06, "epoch": 5.074318744838976, "percentage": 72.49, "elapsed_time": "11:04:44", "remaining_time": "4:12:16"}
|
||||
{"current_steps": 3080, "total_steps": 4242, "loss": 0.2244, "lr": 8.483941314927193e-06, "epoch": 5.082576383154418, "percentage": 72.61, "elapsed_time": "11:05:49", "remaining_time": "4:11:12"}
|
||||
{"current_steps": 3085, "total_steps": 4242, "loss": 0.2088, "lr": 8.41674716506083e-06, "epoch": 5.09083402146986, "percentage": 72.73, "elapsed_time": "11:06:52", "remaining_time": "4:10:06"}
|
||||
{"current_steps": 3090, "total_steps": 4242, "loss": 0.2134, "lr": 8.349749181926556e-06, "epoch": 5.0990916597853015, "percentage": 72.84, "elapsed_time": "11:07:55", "remaining_time": "4:09:00"}
|
||||
{"current_steps": 3095, "total_steps": 4242, "loss": 0.2183, "lr": 8.28294850016036e-06, "epoch": 5.107349298100743, "percentage": 72.96, "elapsed_time": "11:08:58", "remaining_time": "4:07:55"}
|
||||
{"current_steps": 3100, "total_steps": 4242, "loss": 0.2159, "lr": 8.216346251056846e-06, "epoch": 5.115606936416185, "percentage": 73.08, "elapsed_time": "11:10:09", "remaining_time": "4:06:52"}
|
||||
{"current_steps": 3105, "total_steps": 4242, "loss": 0.2081, "lr": 8.14994356255008e-06, "epoch": 5.123864574731627, "percentage": 73.2, "elapsed_time": "11:11:10", "remaining_time": "4:05:46"}
|
||||
{"current_steps": 3110, "total_steps": 4242, "loss": 0.2203, "lr": 8.083741559194515e-06, "epoch": 5.132122213047069, "percentage": 73.31, "elapsed_time": "11:12:09", "remaining_time": "4:04:39"}
|
||||
{"current_steps": 3115, "total_steps": 4242, "loss": 0.2106, "lr": 8.017741362145899e-06, "epoch": 5.1403798513625105, "percentage": 73.43, "elapsed_time": "11:13:16", "remaining_time": "4:03:35"}
|
||||
{"current_steps": 3120, "total_steps": 4242, "loss": 0.2173, "lr": 7.95194408914234e-06, "epoch": 5.148637489677952, "percentage": 73.55, "elapsed_time": "11:14:19", "remaining_time": "4:02:29"}
|
||||
{"current_steps": 3125, "total_steps": 4242, "loss": 0.2249, "lr": 7.886350854485329e-06, "epoch": 5.156895127993394, "percentage": 73.67, "elapsed_time": "11:15:21", "remaining_time": "4:01:23"}
|
||||
{"current_steps": 3130, "total_steps": 4242, "loss": 0.2179, "lr": 7.820962769020906e-06, "epoch": 5.165152766308836, "percentage": 73.79, "elapsed_time": "11:16:30", "remaining_time": "4:00:20"}
|
||||
{"current_steps": 3135, "total_steps": 4242, "loss": 0.2213, "lr": 7.755780940120836e-06, "epoch": 5.173410404624278, "percentage": 73.9, "elapsed_time": "11:17:45", "remaining_time": "3:59:19"}
|
||||
{"current_steps": 3140, "total_steps": 4242, "loss": 0.2187, "lr": 7.690806471663836e-06, "epoch": 5.1816680429397195, "percentage": 74.02, "elapsed_time": "11:18:53", "remaining_time": "3:58:15"}
|
||||
{"current_steps": 3145, "total_steps": 4242, "loss": 0.2118, "lr": 7.626040464016897e-06, "epoch": 5.189925681255161, "percentage": 74.14, "elapsed_time": "11:19:58", "remaining_time": "3:57:10"}
|
||||
{"current_steps": 3150, "total_steps": 4242, "loss": 0.2037, "lr": 7.561484014016665e-06, "epoch": 5.198183319570603, "percentage": 74.26, "elapsed_time": "11:21:00", "remaining_time": "3:56:05"}
|
||||
{"current_steps": 3155, "total_steps": 4242, "loss": 0.2148, "lr": 7.497138214950839e-06, "epoch": 5.206440957886045, "percentage": 74.38, "elapsed_time": "11:22:06", "remaining_time": "3:55:00"}
|
||||
{"current_steps": 3160, "total_steps": 4242, "loss": 0.2146, "lr": 7.433004156539656e-06, "epoch": 5.214698596201487, "percentage": 74.49, "elapsed_time": "11:23:11", "remaining_time": "3:53:55"}
|
||||
{"current_steps": 3165, "total_steps": 4242, "loss": 0.2118, "lr": 7.369082924917465e-06, "epoch": 5.2229562345169285, "percentage": 74.61, "elapsed_time": "11:24:16", "remaining_time": "3:52:50"}
|
||||
{"current_steps": 3170, "total_steps": 4242, "loss": 0.2084, "lr": 7.305375602614299e-06, "epoch": 5.23121387283237, "percentage": 74.73, "elapsed_time": "11:25:18", "remaining_time": "3:51:44"}
|
||||
{"current_steps": 3175, "total_steps": 4242, "loss": 0.2182, "lr": 7.2418832685375525e-06, "epoch": 5.239471511147812, "percentage": 74.85, "elapsed_time": "11:26:20", "remaining_time": "3:50:39"}
|
||||
{"current_steps": 3180, "total_steps": 4242, "loss": 0.2143, "lr": 7.178606997953728e-06, "epoch": 5.247729149463254, "percentage": 74.96, "elapsed_time": "11:27:22", "remaining_time": "3:49:33"}
|
||||
{"current_steps": 3185, "total_steps": 4242, "loss": 0.2115, "lr": 7.115547862470211e-06, "epoch": 5.255986787778696, "percentage": 75.08, "elapsed_time": "11:28:27", "remaining_time": "3:48:28"}
|
||||
{"current_steps": 3190, "total_steps": 4242, "loss": 0.2138, "lr": 7.052706930017106e-06, "epoch": 5.2642444260941375, "percentage": 75.2, "elapsed_time": "11:29:31", "remaining_time": "3:47:23"}
|
||||
{"current_steps": 3195, "total_steps": 4242, "loss": 0.2065, "lr": 6.9900852648291895e-06, "epoch": 5.272502064409579, "percentage": 75.32, "elapsed_time": "11:30:33", "remaining_time": "3:46:17"}
|
||||
{"current_steps": 3200, "total_steps": 4242, "loss": 0.2062, "lr": 6.927683927427842e-06, "epoch": 5.280759702725021, "percentage": 75.44, "elapsed_time": "11:31:34", "remaining_time": "3:45:11"}
|
||||
{"current_steps": 3205, "total_steps": 4242, "loss": 0.2174, "lr": 6.8655039746031315e-06, "epoch": 5.289017341040463, "percentage": 75.55, "elapsed_time": "11:32:36", "remaining_time": "3:44:05"}
|
||||
{"current_steps": 3210, "total_steps": 4242, "loss": 0.221, "lr": 6.803546459395873e-06, "epoch": 5.297274979355905, "percentage": 75.67, "elapsed_time": "11:33:49", "remaining_time": "3:43:03"}
|
||||
{"current_steps": 3215, "total_steps": 4242, "loss": 0.2084, "lr": 6.741812431079839e-06, "epoch": 5.305532617671346, "percentage": 75.79, "elapsed_time": "11:34:50", "remaining_time": "3:41:57"}
|
||||
{"current_steps": 3220, "total_steps": 4242, "loss": 0.2075, "lr": 6.680302935143963e-06, "epoch": 5.313790255986788, "percentage": 75.91, "elapsed_time": "11:35:51", "remaining_time": "3:40:51"}
|
||||
{"current_steps": 3225, "total_steps": 4242, "loss": 0.2154, "lr": 6.619019013274626e-06, "epoch": 5.32204789430223, "percentage": 76.03, "elapsed_time": "11:36:55", "remaining_time": "3:39:46"}
|
||||
{"current_steps": 3230, "total_steps": 4242, "loss": 0.2168, "lr": 6.557961703338027e-06, "epoch": 5.330305532617672, "percentage": 76.14, "elapsed_time": "11:37:53", "remaining_time": "3:38:39"}
|
||||
{"current_steps": 3235, "total_steps": 4242, "loss": 0.2195, "lr": 6.49713203936263e-06, "epoch": 5.338563170933113, "percentage": 76.26, "elapsed_time": "11:38:58", "remaining_time": "3:37:34"}
|
||||
{"current_steps": 3240, "total_steps": 4242, "loss": 0.2183, "lr": 6.4365310515216e-06, "epoch": 5.3468208092485545, "percentage": 76.38, "elapsed_time": "11:39:57", "remaining_time": "3:36:27"}
|
||||
{"current_steps": 3245, "total_steps": 4242, "loss": 0.2192, "lr": 6.376159766115408e-06, "epoch": 5.355078447563996, "percentage": 76.5, "elapsed_time": "11:41:02", "remaining_time": "3:35:23"}
|
||||
{"current_steps": 3250, "total_steps": 4242, "loss": 0.2138, "lr": 6.316019205554425e-06, "epoch": 5.363336085879438, "percentage": 76.61, "elapsed_time": "11:42:05", "remaining_time": "3:34:18"}
|
||||
{"current_steps": 3255, "total_steps": 4242, "loss": 0.2109, "lr": 6.256110388341597e-06, "epoch": 5.37159372419488, "percentage": 76.73, "elapsed_time": "11:43:08", "remaining_time": "3:33:12"}
|
||||
{"current_steps": 3260, "total_steps": 4242, "loss": 0.2177, "lr": 6.196434329055214e-06, "epoch": 5.379851362510322, "percentage": 76.85, "elapsed_time": "11:44:12", "remaining_time": "3:32:07"}
|
||||
{"current_steps": 3265, "total_steps": 4242, "loss": 0.2148, "lr": 6.136992038331735e-06, "epoch": 5.3881090008257635, "percentage": 76.97, "elapsed_time": "11:45:19", "remaining_time": "3:31:03"}
|
||||
{"current_steps": 3270, "total_steps": 4242, "loss": 0.2143, "lr": 6.077784522848653e-06, "epoch": 5.396366639141205, "percentage": 77.09, "elapsed_time": "11:46:31", "remaining_time": "3:30:00"}
|
||||
{"current_steps": 3275, "total_steps": 4242, "loss": 0.2141, "lr": 6.018812785307447e-06, "epoch": 5.404624277456647, "percentage": 77.2, "elapsed_time": "11:47:37", "remaining_time": "3:28:56"}
|
||||
{"current_steps": 3280, "total_steps": 4242, "loss": 0.2043, "lr": 5.960077824416623e-06, "epoch": 5.412881915772089, "percentage": 77.32, "elapsed_time": "11:48:44", "remaining_time": "3:27:52"}
|
||||
{"current_steps": 3285, "total_steps": 4242, "loss": 0.2143, "lr": 5.901580634874775e-06, "epoch": 5.421139554087531, "percentage": 77.44, "elapsed_time": "11:49:48", "remaining_time": "3:26:46"}
|
||||
{"current_steps": 3290, "total_steps": 4242, "loss": 0.2203, "lr": 5.843322207353746e-06, "epoch": 5.4293971924029725, "percentage": 77.56, "elapsed_time": "11:50:51", "remaining_time": "3:25:41"}
|
||||
{"current_steps": 3295, "total_steps": 4242, "loss": 0.2155, "lr": 5.785303528481862e-06, "epoch": 5.437654830718414, "percentage": 77.68, "elapsed_time": "11:51:58", "remaining_time": "3:24:37"}
|
||||
{"current_steps": 3300, "total_steps": 4242, "loss": 0.2115, "lr": 5.72752558082722e-06, "epoch": 5.445912469033856, "percentage": 77.79, "elapsed_time": "11:52:58", "remaining_time": "3:23:31"}
|
||||
{"current_steps": 3305, "total_steps": 4242, "loss": 0.2132, "lr": 5.66998934288105e-06, "epoch": 5.454170107349298, "percentage": 77.91, "elapsed_time": "11:53:57", "remaining_time": "3:22:24"}
|
||||
{"current_steps": 3310, "total_steps": 4242, "loss": 0.2197, "lr": 5.6126957890411e-06, "epoch": 5.46242774566474, "percentage": 78.03, "elapsed_time": "11:55:04", "remaining_time": "3:21:20"}
|
||||
{"current_steps": 3315, "total_steps": 4242, "loss": 0.2177, "lr": 5.5556458895952115e-06, "epoch": 5.4706853839801814, "percentage": 78.15, "elapsed_time": "11:56:07", "remaining_time": "3:20:15"}
|
||||
{"current_steps": 3320, "total_steps": 4242, "loss": 0.205, "lr": 5.498840610704837e-06, "epoch": 5.478943022295623, "percentage": 78.26, "elapsed_time": "11:57:08", "remaining_time": "3:19:09"}
|
||||
{"current_steps": 3325, "total_steps": 4242, "loss": 0.212, "lr": 5.442280914388673e-06, "epoch": 5.487200660611065, "percentage": 78.38, "elapsed_time": "11:58:17", "remaining_time": "3:18:05"}
|
||||
{"current_steps": 3330, "total_steps": 4242, "loss": 0.2099, "lr": 5.385967758506407e-06, "epoch": 5.495458298926507, "percentage": 78.5, "elapsed_time": "11:59:19", "remaining_time": "3:17:00"}
|
||||
{"current_steps": 3335, "total_steps": 4242, "loss": 0.2157, "lr": 5.329902096742452e-06, "epoch": 5.503715937241949, "percentage": 78.62, "elapsed_time": "12:00:25", "remaining_time": "3:15:55"}
|
||||
{"current_steps": 3340, "total_steps": 4242, "loss": 0.2204, "lr": 5.274084878589818e-06, "epoch": 5.51197357555739, "percentage": 78.74, "elapsed_time": "12:01:30", "remaining_time": "3:14:51"}
|
||||
{"current_steps": 3345, "total_steps": 4242, "loss": 0.2127, "lr": 5.21851704933404e-06, "epoch": 5.520231213872832, "percentage": 78.85, "elapsed_time": "12:02:36", "remaining_time": "3:13:46"}
|
||||
{"current_steps": 3350, "total_steps": 4242, "loss": 0.2107, "lr": 5.16319955003715e-06, "epoch": 5.528488852188274, "percentage": 78.97, "elapsed_time": "12:03:40", "remaining_time": "3:12:41"}
|
||||
{"current_steps": 3355, "total_steps": 4242, "loss": 0.2145, "lr": 5.108133317521757e-06, "epoch": 5.536746490503716, "percentage": 79.09, "elapsed_time": "12:04:49", "remaining_time": "3:11:37"}
|
||||
{"current_steps": 3360, "total_steps": 4242, "loss": 0.2185, "lr": 5.053319284355162e-06, "epoch": 5.545004128819158, "percentage": 79.21, "elapsed_time": "12:05:50", "remaining_time": "3:10:32"}
|
||||
{"current_steps": 3365, "total_steps": 4242, "loss": 0.2126, "lr": 4.99875837883357e-06, "epoch": 5.553261767134599, "percentage": 79.33, "elapsed_time": "12:06:57", "remaining_time": "3:09:27"}
|
||||
{"current_steps": 3370, "total_steps": 4242, "loss": 0.2057, "lr": 4.944451524966401e-06, "epoch": 5.561519405450041, "percentage": 79.44, "elapsed_time": "12:08:01", "remaining_time": "3:08:22"}
|
||||
{"current_steps": 3375, "total_steps": 4242, "loss": 0.2142, "lr": 4.890399642460582e-06, "epoch": 5.569777043765483, "percentage": 79.56, "elapsed_time": "12:09:09", "remaining_time": "3:07:18"}
|
||||
{"current_steps": 3380, "total_steps": 4242, "loss": 0.224, "lr": 4.836603646705027e-06, "epoch": 5.578034682080925, "percentage": 79.68, "elapsed_time": "12:10:13", "remaining_time": "3:06:13"}
|
||||
{"current_steps": 3385, "total_steps": 4242, "loss": 0.215, "lr": 4.783064448755113e-06, "epoch": 5.586292320396367, "percentage": 79.8, "elapsed_time": "12:11:13", "remaining_time": "3:05:07"}
|
||||
{"current_steps": 3390, "total_steps": 4242, "loss": 0.2133, "lr": 4.729782955317233e-06, "epoch": 5.594549958711808, "percentage": 79.92, "elapsed_time": "12:12:15", "remaining_time": "3:04:02"}
|
||||
{"current_steps": 3395, "total_steps": 4242, "loss": 0.2088, "lr": 4.676760068733461e-06, "epoch": 5.60280759702725, "percentage": 80.03, "elapsed_time": "12:13:16", "remaining_time": "3:02:56"}
|
||||
{"current_steps": 3400, "total_steps": 4242, "loss": 0.2182, "lr": 4.623996686966279e-06, "epoch": 5.611065235342692, "percentage": 80.15, "elapsed_time": "12:14:15", "remaining_time": "3:01:50"}
|
||||
{"current_steps": 3405, "total_steps": 4242, "loss": 0.2162, "lr": 4.571493703583358e-06, "epoch": 5.619322873658134, "percentage": 80.27, "elapsed_time": "12:15:21", "remaining_time": "3:00:45"}
|
||||
{"current_steps": 3410, "total_steps": 4242, "loss": 0.2174, "lr": 4.519252007742405e-06, "epoch": 5.627580511973576, "percentage": 80.39, "elapsed_time": "12:16:31", "remaining_time": "2:59:42"}
|
||||
{"current_steps": 3415, "total_steps": 4242, "loss": 0.2083, "lr": 4.467272484176146e-06, "epoch": 5.635838150289017, "percentage": 80.5, "elapsed_time": "12:17:31", "remaining_time": "2:58:36"}
|
||||
{"current_steps": 3420, "total_steps": 4242, "loss": 0.2223, "lr": 4.415556013177311e-06, "epoch": 5.644095788604459, "percentage": 80.62, "elapsed_time": "12:18:37", "remaining_time": "2:57:31"}
|
||||
{"current_steps": 3425, "total_steps": 4242, "loss": 0.2234, "lr": 4.364103470583729e-06, "epoch": 5.652353426919901, "percentage": 80.74, "elapsed_time": "12:19:44", "remaining_time": "2:56:27"}
|
||||
{"current_steps": 3430, "total_steps": 4242, "loss": 0.2158, "lr": 4.312915727763516e-06, "epoch": 5.660611065235343, "percentage": 80.86, "elapsed_time": "12:20:46", "remaining_time": "2:55:21"}
|
||||
{"current_steps": 3435, "total_steps": 4242, "loss": 0.2137, "lr": 4.2619936516003e-06, "epoch": 5.6688687035507845, "percentage": 80.98, "elapsed_time": "12:21:54", "remaining_time": "2:54:17"}
|
||||
{"current_steps": 3440, "total_steps": 4242, "loss": 0.2177, "lr": 4.211338104478548e-06, "epoch": 5.677126341866226, "percentage": 81.09, "elapsed_time": "12:22:58", "remaining_time": "2:53:13"}
|
||||
{"current_steps": 3445, "total_steps": 4242, "loss": 0.2199, "lr": 4.16094994426895e-06, "epoch": 5.685383980181668, "percentage": 81.21, "elapsed_time": "12:23:59", "remaining_time": "2:52:07"}
|
||||
{"current_steps": 3450, "total_steps": 4242, "loss": 0.204, "lr": 4.1108300243138945e-06, "epoch": 5.69364161849711, "percentage": 81.33, "elapsed_time": "12:25:00", "remaining_time": "2:51:01"}
|
||||
{"current_steps": 3455, "total_steps": 4242, "loss": 0.2142, "lr": 4.060979193413041e-06, "epoch": 5.701899256812552, "percentage": 81.45, "elapsed_time": "12:26:01", "remaining_time": "2:49:56"}
|
||||
{"current_steps": 3460, "total_steps": 4242, "loss": 0.2163, "lr": 4.011398295808899e-06, "epoch": 5.7101568951279935, "percentage": 81.57, "elapsed_time": "12:27:02", "remaining_time": "2:48:50"}
|
||||
{"current_steps": 3465, "total_steps": 4242, "loss": 0.2164, "lr": 3.962088171172574e-06, "epoch": 5.718414533443435, "percentage": 81.68, "elapsed_time": "12:28:08", "remaining_time": "2:47:45"}
|
||||
{"current_steps": 3470, "total_steps": 4242, "loss": 0.2168, "lr": 3.913049654589531e-06, "epoch": 5.726672171758877, "percentage": 81.8, "elapsed_time": "12:29:10", "remaining_time": "2:46:40"}
|
||||
{"current_steps": 3475, "total_steps": 4242, "loss": 0.2077, "lr": 3.864283576545442e-06, "epoch": 5.734929810074319, "percentage": 81.92, "elapsed_time": "12:30:15", "remaining_time": "2:45:35"}
|
||||
{"current_steps": 3480, "total_steps": 4242, "loss": 0.2152, "lr": 3.815790762912124e-06, "epoch": 5.743187448389761, "percentage": 82.04, "elapsed_time": "12:31:19", "remaining_time": "2:44:30"}
|
||||
{"current_steps": 3485, "total_steps": 4242, "loss": 0.2122, "lr": 3.767572034933573e-06, "epoch": 5.7514450867052025, "percentage": 82.15, "elapsed_time": "12:32:28", "remaining_time": "2:43:26"}
|
||||
{"current_steps": 3490, "total_steps": 4242, "loss": 0.2197, "lr": 3.719628209212043e-06, "epoch": 5.759702725020644, "percentage": 82.27, "elapsed_time": "12:33:31", "remaining_time": "2:42:21"}
|
||||
{"current_steps": 3495, "total_steps": 4242, "loss": 0.2121, "lr": 3.671960097694196e-06, "epoch": 5.767960363336086, "percentage": 82.39, "elapsed_time": "12:34:37", "remaining_time": "2:41:17"}
|
||||
{"current_steps": 3500, "total_steps": 4242, "loss": 0.2095, "lr": 3.6245685076573956e-06, "epoch": 5.776218001651528, "percentage": 82.51, "elapsed_time": "12:35:42", "remaining_time": "2:40:12"}
|
||||
{"current_steps": 3505, "total_steps": 4242, "loss": 0.2222, "lr": 3.577454241695988e-06, "epoch": 5.78447563996697, "percentage": 82.63, "elapsed_time": "12:36:47", "remaining_time": "2:39:07"}
|
||||
{"current_steps": 3510, "total_steps": 4242, "loss": 0.2067, "lr": 3.530618097707743e-06, "epoch": 5.7927332782824115, "percentage": 82.74, "elapsed_time": "12:37:55", "remaining_time": "2:38:03"}
|
||||
{"current_steps": 3515, "total_steps": 4242, "loss": 0.2177, "lr": 3.484060868880328e-06, "epoch": 5.800990916597853, "percentage": 82.86, "elapsed_time": "12:38:59", "remaining_time": "2:36:58"}
|
||||
{"current_steps": 3520, "total_steps": 4242, "loss": 0.2182, "lr": 3.4377833436778874e-06, "epoch": 5.809248554913295, "percentage": 82.98, "elapsed_time": "12:40:03", "remaining_time": "2:35:53"}
|
||||
{"current_steps": 3525, "total_steps": 4242, "loss": 0.215, "lr": 3.39178630582766e-06, "epoch": 5.817506193228737, "percentage": 83.1, "elapsed_time": "12:41:07", "remaining_time": "2:34:48"}
|
||||
{"current_steps": 3530, "total_steps": 4242, "loss": 0.209, "lr": 3.3460705343067467e-06, "epoch": 5.825763831544179, "percentage": 83.22, "elapsed_time": "12:42:09", "remaining_time": "2:33:43"}
|
||||
{"current_steps": 3535, "total_steps": 4242, "loss": 0.2059, "lr": 3.3006368033288783e-06, "epoch": 5.8340214698596204, "percentage": 83.33, "elapsed_time": "12:43:16", "remaining_time": "2:32:39"}
|
||||
{"current_steps": 3540, "total_steps": 4242, "loss": 0.2156, "lr": 3.2554858823313417e-06, "epoch": 5.842279108175062, "percentage": 83.45, "elapsed_time": "12:44:22", "remaining_time": "2:31:34"}
|
||||
{"current_steps": 3545, "total_steps": 4242, "loss": 0.2054, "lr": 3.210618535961916e-06, "epoch": 5.850536746490504, "percentage": 83.57, "elapsed_time": "12:45:20", "remaining_time": "2:30:28"}
|
||||
{"current_steps": 3550, "total_steps": 4242, "loss": 0.2136, "lr": 3.1660355240659423e-06, "epoch": 5.858794384805946, "percentage": 83.69, "elapsed_time": "12:46:27", "remaining_time": "2:29:24"}
|
||||
{"current_steps": 3555, "total_steps": 4242, "loss": 0.2102, "lr": 3.1217376016734624e-06, "epoch": 5.867052023121388, "percentage": 83.8, "elapsed_time": "12:47:27", "remaining_time": "2:28:18"}
|
||||
{"current_steps": 3560, "total_steps": 4242, "loss": 0.2145, "lr": 3.077725518986401e-06, "epoch": 5.875309661436829, "percentage": 83.92, "elapsed_time": "12:48:31", "remaining_time": "2:27:13"}
|
||||
{"current_steps": 3565, "total_steps": 4242, "loss": 0.2097, "lr": 3.0340000213658882e-06, "epoch": 5.883567299752271, "percentage": 84.04, "elapsed_time": "12:49:42", "remaining_time": "2:26:10"}
|
||||
{"current_steps": 3570, "total_steps": 4242, "loss": 0.2199, "lr": 2.9905618493196353e-06, "epoch": 5.891824938067713, "percentage": 84.16, "elapsed_time": "12:50:47", "remaining_time": "2:25:05"}
|
||||
{"current_steps": 3575, "total_steps": 4242, "loss": 0.21, "lr": 2.947411738489374e-06, "epoch": 5.900082576383154, "percentage": 84.28, "elapsed_time": "12:51:55", "remaining_time": "2:24:01"}
|
||||
{"current_steps": 3580, "total_steps": 4242, "loss": 0.2171, "lr": 2.904550419638421e-06, "epoch": 5.908340214698596, "percentage": 84.39, "elapsed_time": "12:52:55", "remaining_time": "2:22:55"}
|
||||
{"current_steps": 3585, "total_steps": 4242, "loss": 0.2154, "lr": 2.8619786186392986e-06, "epoch": 5.9165978530140375, "percentage": 84.51, "elapsed_time": "12:53:56", "remaining_time": "2:21:50"}
|
||||
{"current_steps": 3590, "total_steps": 4242, "loss": 0.2171, "lr": 2.8196970564614167e-06, "epoch": 5.924855491329479, "percentage": 84.63, "elapsed_time": "12:54:56", "remaining_time": "2:20:44"}
|
||||
{"current_steps": 3595, "total_steps": 4242, "loss": 0.2174, "lr": 2.7777064491588946e-06, "epoch": 5.933113129644921, "percentage": 84.75, "elapsed_time": "12:56:01", "remaining_time": "2:19:39"}
|
||||
{"current_steps": 3600, "total_steps": 4242, "loss": 0.2098, "lr": 2.736007507858418e-06, "epoch": 5.941370767960363, "percentage": 84.87, "elapsed_time": "12:57:04", "remaining_time": "2:18:34"}
|
||||
{"current_steps": 3605, "total_steps": 4242, "loss": 0.2136, "lr": 2.6946009387472074e-06, "epoch": 5.949628406275805, "percentage": 84.98, "elapsed_time": "12:58:09", "remaining_time": "2:17:29"}
|
||||
{"current_steps": 3610, "total_steps": 4242, "loss": 0.21, "lr": 2.653487443061036e-06, "epoch": 5.9578860445912465, "percentage": 85.1, "elapsed_time": "12:59:14", "remaining_time": "2:16:25"}
|
||||
{"current_steps": 3615, "total_steps": 4242, "loss": 0.215, "lr": 2.6126677170723837e-06, "epoch": 5.966143682906688, "percentage": 85.22, "elapsed_time": "13:00:20", "remaining_time": "2:15:20"}
|
||||
{"current_steps": 3620, "total_steps": 4242, "loss": 0.2172, "lr": 2.5721424520786163e-06, "epoch": 5.97440132122213, "percentage": 85.34, "elapsed_time": "13:01:19", "remaining_time": "2:14:15"}
|
||||
{"current_steps": 3625, "total_steps": 4242, "loss": 0.2126, "lr": 2.531912334390307e-06, "epoch": 5.982658959537572, "percentage": 85.45, "elapsed_time": "13:02:28", "remaining_time": "2:13:10"}
|
||||
{"current_steps": 3630, "total_steps": 4242, "loss": 0.215, "lr": 2.4919780453195808e-06, "epoch": 5.990916597853014, "percentage": 85.57, "elapsed_time": "13:03:32", "remaining_time": "2:12:06"}
|
||||
{"current_steps": 3635, "total_steps": 4242, "loss": 0.2171, "lr": 2.452340261168611e-06, "epoch": 5.9991742361684555, "percentage": 85.69, "elapsed_time": "13:04:42", "remaining_time": "2:11:02"}
|
||||
{"current_steps": 3640, "total_steps": 4242, "loss": 0.2003, "lr": 2.4129996532181423e-06, "epoch": 6.006606110652354, "percentage": 85.81, "elapsed_time": "13:05:39", "remaining_time": "2:09:56"}
|
||||
{"current_steps": 3645, "total_steps": 4242, "loss": 0.2112, "lr": 2.3739568877161266e-06, "epoch": 6.014863748967795, "percentage": 85.93, "elapsed_time": "13:06:39", "remaining_time": "2:08:50"}
|
||||
{"current_steps": 3650, "total_steps": 4242, "loss": 0.2192, "lr": 2.335212625866443e-06, "epoch": 6.023121387283237, "percentage": 86.04, "elapsed_time": "13:07:45", "remaining_time": "2:07:46"}
|
||||
{"current_steps": 3655, "total_steps": 4242, "loss": 0.2067, "lr": 2.296767523817702e-06, "epoch": 6.031379025598679, "percentage": 86.16, "elapsed_time": "13:08:50", "remaining_time": "2:06:41"}
|
||||
{"current_steps": 3660, "total_steps": 4242, "loss": 0.2099, "lr": 2.2586222326521277e-06, "epoch": 6.039636663914121, "percentage": 86.28, "elapsed_time": "13:09:54", "remaining_time": "2:05:36"}
|
||||
{"current_steps": 3665, "total_steps": 4242, "loss": 0.2204, "lr": 2.220777398374534e-06, "epoch": 6.047894302229563, "percentage": 86.4, "elapsed_time": "13:11:05", "remaining_time": "2:04:32"}
|
||||
{"current_steps": 3670, "total_steps": 4242, "loss": 0.2038, "lr": 2.183233661901396e-06, "epoch": 6.056151940545004, "percentage": 86.52, "elapsed_time": "13:12:06", "remaining_time": "2:03:27"}
|
||||
{"current_steps": 3675, "total_steps": 4242, "loss": 0.2111, "lr": 2.1459916590499663e-06, "epoch": 6.064409578860446, "percentage": 86.63, "elapsed_time": "13:13:11", "remaining_time": "2:02:22"}
|
||||
{"current_steps": 3680, "total_steps": 4242, "loss": 0.209, "lr": 2.1090520205275333e-06, "epoch": 6.072667217175888, "percentage": 86.75, "elapsed_time": "13:14:14", "remaining_time": "2:01:17"}
|
||||
{"current_steps": 3685, "total_steps": 4242, "loss": 0.2053, "lr": 2.072415371920735e-06, "epoch": 6.08092485549133, "percentage": 86.87, "elapsed_time": "13:15:16", "remaining_time": "2:00:12"}
|
||||
{"current_steps": 3690, "total_steps": 4242, "loss": 0.2048, "lr": 2.0360823336849634e-06, "epoch": 6.089182493806772, "percentage": 86.99, "elapsed_time": "13:16:25", "remaining_time": "1:59:08"}
|
||||
{"current_steps": 3695, "total_steps": 4242, "loss": 0.2108, "lr": 2.0000535211338447e-06, "epoch": 6.097440132122213, "percentage": 87.11, "elapsed_time": "13:17:24", "remaining_time": "1:58:02"}
|
||||
{"current_steps": 3700, "total_steps": 4242, "loss": 0.2084, "lr": 1.964329544428842e-06, "epoch": 6.105697770437655, "percentage": 87.22, "elapsed_time": "13:18:29", "remaining_time": "1:56:58"}
|
||||
{"current_steps": 3705, "total_steps": 4242, "loss": 0.2103, "lr": 1.928911008568899e-06, "epoch": 6.113955408753097, "percentage": 87.34, "elapsed_time": "13:19:27", "remaining_time": "1:55:52"}
|
||||
{"current_steps": 3710, "total_steps": 4242, "loss": 0.2111, "lr": 1.8937985133802028e-06, "epoch": 6.122213047068539, "percentage": 87.46, "elapsed_time": "13:20:32", "remaining_time": "1:54:47"}
|
||||
{"current_steps": 3715, "total_steps": 4242, "loss": 0.213, "lr": 1.8589926535060353e-06, "epoch": 6.1304706853839805, "percentage": 87.58, "elapsed_time": "13:21:47", "remaining_time": "1:53:44"}
|
||||
{"current_steps": 3720, "total_steps": 4242, "loss": 0.2158, "lr": 1.824494018396692e-06, "epoch": 6.138728323699422, "percentage": 87.69, "elapsed_time": "13:22:56", "remaining_time": "1:52:40"}
|
||||
{"current_steps": 3725, "total_steps": 4242, "loss": 0.2128, "lr": 1.7903031922995007e-06, "epoch": 6.146985962014864, "percentage": 87.81, "elapsed_time": "13:23:57", "remaining_time": "1:51:34"}
|
||||
{"current_steps": 3730, "total_steps": 4242, "loss": 0.2094, "lr": 1.7564207542489287e-06, "epoch": 6.155243600330306, "percentage": 87.93, "elapsed_time": "13:25:06", "remaining_time": "1:50:30"}
|
||||
{"current_steps": 3735, "total_steps": 4242, "loss": 0.2114, "lr": 1.7228472780567673e-06, "epoch": 6.163501238645748, "percentage": 88.05, "elapsed_time": "13:26:07", "remaining_time": "1:49:25"}
|
||||
{"current_steps": 3740, "total_steps": 4242, "loss": 0.2103, "lr": 1.6895833323024403e-06, "epoch": 6.1717588769611895, "percentage": 88.17, "elapsed_time": "13:27:11", "remaining_time": "1:48:20"}
|
||||
{"current_steps": 3745, "total_steps": 4242, "loss": 0.2157, "lr": 1.6566294803233374e-06, "epoch": 6.18001651527663, "percentage": 88.28, "elapsed_time": "13:28:11", "remaining_time": "1:47:15"}
|
||||
{"current_steps": 3750, "total_steps": 4242, "loss": 0.2049, "lr": 1.623986280205312e-06, "epoch": 6.188274153592072, "percentage": 88.4, "elapsed_time": "13:29:11", "remaining_time": "1:46:10"}
|
||||
{"current_steps": 3755, "total_steps": 4242, "loss": 0.2091, "lr": 1.5916542847732076e-06, "epoch": 6.196531791907514, "percentage": 88.52, "elapsed_time": "13:30:12", "remaining_time": "1:45:04"}
|
||||
{"current_steps": 3760, "total_steps": 4242, "loss": 0.2111, "lr": 1.5596340415814837e-06, "epoch": 6.204789430222956, "percentage": 88.64, "elapsed_time": "13:31:15", "remaining_time": "1:43:59"}
|
||||
{"current_steps": 3765, "total_steps": 4242, "loss": 0.2081, "lr": 1.5279260929049766e-06, "epoch": 6.213047068538398, "percentage": 88.76, "elapsed_time": "13:32:22", "remaining_time": "1:42:55"}
|
||||
{"current_steps": 3770, "total_steps": 4242, "loss": 0.2092, "lr": 1.496530975729693e-06, "epoch": 6.221304706853839, "percentage": 88.87, "elapsed_time": "13:33:28", "remaining_time": "1:41:50"}
|
||||
{"current_steps": 3775, "total_steps": 4242, "loss": 0.2121, "lr": 1.4654492217437222e-06, "epoch": 6.229562345169281, "percentage": 88.99, "elapsed_time": "13:34:39", "remaining_time": "1:40:46"}
|
||||
{"current_steps": 3780, "total_steps": 4242, "loss": 0.2043, "lr": 1.434681357328227e-06, "epoch": 6.237819983484723, "percentage": 89.11, "elapsed_time": "13:35:41", "remaining_time": "1:39:41"}
|
||||
{"current_steps": 3785, "total_steps": 4242, "loss": 0.2136, "lr": 1.4042279035485251e-06, "epoch": 6.246077621800165, "percentage": 89.23, "elapsed_time": "13:36:49", "remaining_time": "1:38:37"}
|
||||
{"current_steps": 3790, "total_steps": 4242, "loss": 0.2089, "lr": 1.3740893761452934e-06, "epoch": 6.254335260115607, "percentage": 89.34, "elapsed_time": "13:37:55", "remaining_time": "1:37:32"}
|
||||
{"current_steps": 3795, "total_steps": 4242, "loss": 0.2238, "lr": 1.3442662855257883e-06, "epoch": 6.262592898431048, "percentage": 89.46, "elapsed_time": "13:38:59", "remaining_time": "1:36:28"}
|
||||
{"current_steps": 3800, "total_steps": 4242, "loss": 0.2064, "lr": 1.3147591367552416e-06, "epoch": 6.27085053674649, "percentage": 89.58, "elapsed_time": "13:40:07", "remaining_time": "1:35:23"}
|
||||
{"current_steps": 3805, "total_steps": 4242, "loss": 0.2046, "lr": 1.2855684295482918e-06, "epoch": 6.279108175061932, "percentage": 89.7, "elapsed_time": "13:41:10", "remaining_time": "1:34:18"}
|
||||
{"current_steps": 3810, "total_steps": 4242, "loss": 0.204, "lr": 1.2566946582605133e-06, "epoch": 6.287365813377374, "percentage": 89.82, "elapsed_time": "13:42:12", "remaining_time": "1:33:13"}
|
||||
{"current_steps": 3815, "total_steps": 4242, "loss": 0.2051, "lr": 1.2281383118800472e-06, "epoch": 6.295623451692816, "percentage": 89.93, "elapsed_time": "13:43:12", "remaining_time": "1:32:08"}
|
||||
{"current_steps": 3820, "total_steps": 4242, "loss": 0.2166, "lr": 1.1998998740193413e-06, "epoch": 6.303881090008257, "percentage": 90.05, "elapsed_time": "13:44:14", "remaining_time": "1:31:03"}
|
||||
{"current_steps": 3825, "total_steps": 4242, "loss": 0.2116, "lr": 1.1719798229069324e-06, "epoch": 6.312138728323699, "percentage": 90.17, "elapsed_time": "13:45:14", "remaining_time": "1:29:58"}
|
||||
{"current_steps": 3830, "total_steps": 4242, "loss": 0.2086, "lr": 1.1443786313793548e-06, "epoch": 6.320396366639141, "percentage": 90.29, "elapsed_time": "13:46:20", "remaining_time": "1:28:53"}
|
||||
{"current_steps": 3835, "total_steps": 4242, "loss": 0.2035, "lr": 1.117096766873149e-06, "epoch": 6.328654004954583, "percentage": 90.41, "elapsed_time": "13:47:14", "remaining_time": "1:27:47"}
|
||||
{"current_steps": 3840, "total_steps": 4242, "loss": 0.2123, "lr": 1.0901346914169197e-06, "epoch": 6.3369116432700245, "percentage": 90.52, "elapsed_time": "13:48:23", "remaining_time": "1:26:43"}
|
||||
{"current_steps": 3845, "total_steps": 4242, "loss": 0.2099, "lr": 1.0634928616235273e-06, "epoch": 6.345169281585466, "percentage": 90.64, "elapsed_time": "13:49:26", "remaining_time": "1:25:38"}
|
||||
{"current_steps": 3850, "total_steps": 4242, "loss": 0.21, "lr": 1.0371717286823601e-06, "epoch": 6.353426919900908, "percentage": 90.76, "elapsed_time": "13:50:29", "remaining_time": "1:24:33"}
|
||||
{"current_steps": 3855, "total_steps": 4242, "loss": 0.2102, "lr": 1.0111717383516728e-06, "epoch": 6.36168455821635, "percentage": 90.88, "elapsed_time": "13:51:36", "remaining_time": "1:23:29"}
|
||||
{"current_steps": 3860, "total_steps": 4242, "loss": 0.2031, "lr": 9.854933309510618e-07, "epoch": 6.369942196531792, "percentage": 90.99, "elapsed_time": "13:52:38", "remaining_time": "1:22:24"}
|
||||
{"current_steps": 3865, "total_steps": 4242, "loss": 0.2106, "lr": 9.60136941353984e-07, "epoch": 6.3781998348472335, "percentage": 91.11, "elapsed_time": "13:53:42", "remaining_time": "1:21:19"}
|
||||
{"current_steps": 3870, "total_steps": 4242, "loss": 0.2086, "lr": 9.351029989804106e-07, "epoch": 6.386457473162675, "percentage": 91.23, "elapsed_time": "13:54:51", "remaining_time": "1:20:14"}
|
||||
{"current_steps": 3875, "total_steps": 4242, "loss": 0.2085, "lr": 9.103919277895468e-07, "epoch": 6.394715111478117, "percentage": 91.35, "elapsed_time": "13:55:53", "remaining_time": "1:19:10"}
|
||||
{"current_steps": 3880, "total_steps": 4242, "loss": 0.2052, "lr": 8.860041462726543e-07, "epoch": 6.402972749793559, "percentage": 91.47, "elapsed_time": "13:56:53", "remaining_time": "1:18:04"}
|
||||
{"current_steps": 3885, "total_steps": 4242, "loss": 0.2078, "lr": 8.619400674459589e-07, "epoch": 6.411230388109001, "percentage": 91.58, "elapsed_time": "13:57:55", "remaining_time": "1:16:59"}
|
||||
{"current_steps": 3890, "total_steps": 4242, "loss": 0.2158, "lr": 8.38200098843669e-07, "epoch": 6.4194880264244425, "percentage": 91.7, "elapsed_time": "13:59:05", "remaining_time": "1:15:55"}
|
||||
{"current_steps": 3895, "total_steps": 4242, "loss": 0.2049, "lr": 8.14784642511055e-07, "epoch": 6.427745664739884, "percentage": 91.82, "elapsed_time": "14:00:07", "remaining_time": "1:14:50"}
|
||||
{"current_steps": 3900, "total_steps": 4242, "loss": 0.2143, "lr": 7.916940949976526e-07, "epoch": 6.436003303055326, "percentage": 91.94, "elapsed_time": "14:01:14", "remaining_time": "1:13:46"}
|
||||
{"current_steps": 3905, "total_steps": 4242, "loss": 0.2132, "lr": 7.689288473505474e-07, "epoch": 6.444260941370768, "percentage": 92.06, "elapsed_time": "14:02:18", "remaining_time": "1:12:41"}
|
||||
{"current_steps": 3910, "total_steps": 4242, "loss": 0.2112, "lr": 7.464892851077499e-07, "epoch": 6.45251857968621, "percentage": 92.17, "elapsed_time": "14:03:22", "remaining_time": "1:11:36"}
|
||||
{"current_steps": 3915, "total_steps": 4242, "loss": 0.2139, "lr": 7.243757882916624e-07, "epoch": 6.4607762180016515, "percentage": 92.29, "elapsed_time": "14:04:19", "remaining_time": "1:10:31"}
|
||||
{"current_steps": 3920, "total_steps": 4242, "loss": 0.2069, "lr": 7.025887314026513e-07, "epoch": 6.469033856317093, "percentage": 92.41, "elapsed_time": "14:05:21", "remaining_time": "1:09:26"}
|
||||
{"current_steps": 3925, "total_steps": 4242, "loss": 0.2066, "lr": 6.811284834126963e-07, "epoch": 6.477291494632535, "percentage": 92.53, "elapsed_time": "14:06:24", "remaining_time": "1:08:21"}
|
||||
{"current_steps": 3930, "total_steps": 4242, "loss": 0.2043, "lr": 6.599954077591464e-07, "epoch": 6.485549132947977, "percentage": 92.64, "elapsed_time": "14:07:23", "remaining_time": "1:07:16"}
|
||||
{"current_steps": 3935, "total_steps": 4242, "loss": 0.2187, "lr": 6.391898623385695e-07, "epoch": 6.493806771263419, "percentage": 92.76, "elapsed_time": "14:08:30", "remaining_time": "1:06:11"}
|
||||
{"current_steps": 3940, "total_steps": 4242, "loss": 0.2086, "lr": 6.187121995006817e-07, "epoch": 6.5020644095788604, "percentage": 92.88, "elapsed_time": "14:09:27", "remaining_time": "1:05:06"}
|
||||
{"current_steps": 3945, "total_steps": 4242, "loss": 0.2072, "lr": 5.98562766042381e-07, "epoch": 6.510322047894302, "percentage": 93.0, "elapsed_time": "14:10:32", "remaining_time": "1:04:01"}
|
||||
{"current_steps": 3950, "total_steps": 4242, "loss": 0.2061, "lr": 5.78741903201887e-07, "epoch": 6.518579686209744, "percentage": 93.12, "elapsed_time": "14:11:35", "remaining_time": "1:02:57"}
|
||||
{"current_steps": 3955, "total_steps": 4242, "loss": 0.2081, "lr": 5.592499466529445e-07, "epoch": 6.526837324525186, "percentage": 93.23, "elapsed_time": "14:12:36", "remaining_time": "1:01:52"}
|
||||
{"current_steps": 3960, "total_steps": 4242, "loss": 0.2098, "lr": 5.400872264991508e-07, "epoch": 6.535094962840628, "percentage": 93.35, "elapsed_time": "14:13:43", "remaining_time": "1:00:47"}
|
||||
{"current_steps": 3965, "total_steps": 4242, "loss": 0.2103, "lr": 5.212540672683575e-07, "epoch": 6.543352601156069, "percentage": 93.47, "elapsed_time": "14:14:49", "remaining_time": "0:59:43"}
|
||||
{"current_steps": 3970, "total_steps": 4242, "loss": 0.1998, "lr": 5.027507879071869e-07, "epoch": 6.551610239471511, "percentage": 93.59, "elapsed_time": "14:15:52", "remaining_time": "0:58:38"}
|
||||
{"current_steps": 3975, "total_steps": 4242, "loss": 0.216, "lr": 4.845777017756126e-07, "epoch": 6.559867877786953, "percentage": 93.71, "elapsed_time": "14:17:00", "remaining_time": "0:57:33"}
|
||||
{"current_steps": 3980, "total_steps": 4242, "loss": 0.2086, "lr": 4.667351166416678e-07, "epoch": 6.568125516102395, "percentage": 93.82, "elapsed_time": "14:18:05", "remaining_time": "0:56:29"}
|
||||
{"current_steps": 3985, "total_steps": 4242, "loss": 0.2067, "lr": 4.4922333467622316e-07, "epoch": 6.576383154417837, "percentage": 93.94, "elapsed_time": "14:19:10", "remaining_time": "0:55:24"}
|
||||
{"current_steps": 3990, "total_steps": 4242, "loss": 0.2102, "lr": 4.320426524478749e-07, "epoch": 6.584640792733278, "percentage": 94.06, "elapsed_time": "14:20:23", "remaining_time": "0:54:20"}
|
||||
{"current_steps": 3995, "total_steps": 4242, "loss": 0.2092, "lr": 4.1519336091792263e-07, "epoch": 6.59289843104872, "percentage": 94.18, "elapsed_time": "14:21:30", "remaining_time": "0:53:15"}
|
||||
{"current_steps": 4000, "total_steps": 4242, "loss": 0.21, "lr": 3.9867574543544174e-07, "epoch": 6.601156069364162, "percentage": 94.3, "elapsed_time": "14:22:34", "remaining_time": "0:52:11"}
|
||||
{"current_steps": 4005, "total_steps": 4242, "loss": 0.2092, "lr": 3.824900857324432e-07, "epoch": 6.609413707679604, "percentage": 94.41, "elapsed_time": "14:23:42", "remaining_time": "0:51:06"}
|
||||
{"current_steps": 4010, "total_steps": 4242, "loss": 0.2107, "lr": 3.6663665591915033e-07, "epoch": 6.617671345995046, "percentage": 94.53, "elapsed_time": "14:24:44", "remaining_time": "0:50:01"}
|
||||
{"current_steps": 4015, "total_steps": 4242, "loss": 0.2081, "lr": 3.511157244793384e-07, "epoch": 6.625928984310487, "percentage": 94.65, "elapsed_time": "14:25:52", "remaining_time": "0:48:57"}
|
||||
{"current_steps": 4020, "total_steps": 4242, "loss": 0.2153, "lr": 3.359275542658069e-07, "epoch": 6.634186622625929, "percentage": 94.77, "elapsed_time": "14:26:45", "remaining_time": "0:47:51"}
|
||||
{"current_steps": 4025, "total_steps": 4242, "loss": 0.2137, "lr": 3.2107240249591887e-07, "epoch": 6.642444260941371, "percentage": 94.88, "elapsed_time": "14:27:45", "remaining_time": "0:46:46"}
|
||||
{"current_steps": 4030, "total_steps": 4242, "loss": 0.2005, "lr": 3.0655052074723747e-07, "epoch": 6.650701899256813, "percentage": 95.0, "elapsed_time": "14:28:50", "remaining_time": "0:45:42"}
|
||||
{"current_steps": 4035, "total_steps": 4242, "loss": 0.2117, "lr": 2.9236215495328067e-07, "epoch": 6.658959537572255, "percentage": 95.12, "elapsed_time": "14:29:54", "remaining_time": "0:44:37"}
|
||||
{"current_steps": 4040, "total_steps": 4242, "loss": 0.2063, "lr": 2.785075453993469e-07, "epoch": 6.667217175887696, "percentage": 95.24, "elapsed_time": "14:30:53", "remaining_time": "0:43:32"}
|
||||
{"current_steps": 4045, "total_steps": 4242, "loss": 0.2101, "lr": 2.649869267184402e-07, "epoch": 6.675474814203138, "percentage": 95.36, "elapsed_time": "14:31:56", "remaining_time": "0:42:27"}
|
||||
{"current_steps": 4050, "total_steps": 4242, "loss": 0.2043, "lr": 2.518005278873159e-07, "epoch": 6.68373245251858, "percentage": 95.47, "elapsed_time": "14:32:55", "remaining_time": "0:41:23"}
|
||||
{"current_steps": 4055, "total_steps": 4242, "loss": 0.2095, "lr": 2.389485722225837e-07, "epoch": 6.691990090834022, "percentage": 95.59, "elapsed_time": "14:34:03", "remaining_time": "0:40:18"}
|
||||
{"current_steps": 4060, "total_steps": 4242, "loss": 0.2073, "lr": 2.2643127737693503e-07, "epoch": 6.7002477291494635, "percentage": 95.71, "elapsed_time": "14:35:07", "remaining_time": "0:39:13"}
|
||||
{"current_steps": 4065, "total_steps": 4242, "loss": 0.2094, "lr": 2.1424885533545269e-07, "epoch": 6.708505367464905, "percentage": 95.83, "elapsed_time": "14:36:04", "remaining_time": "0:38:08"}
|
||||
{"current_steps": 4070, "total_steps": 4242, "loss": 0.2113, "lr": 2.0240151241202265e-07, "epoch": 6.716763005780347, "percentage": 95.95, "elapsed_time": "14:37:08", "remaining_time": "0:37:04"}
|
||||
{"current_steps": 4075, "total_steps": 4242, "loss": 0.214, "lr": 1.9088944924584572e-07, "epoch": 6.725020644095789, "percentage": 96.06, "elapsed_time": "14:38:13", "remaining_time": "0:35:59"}
|
||||
{"current_steps": 4080, "total_steps": 4242, "loss": 0.2152, "lr": 1.7971286079802474e-07, "epoch": 6.733278282411231, "percentage": 96.18, "elapsed_time": "14:39:22", "remaining_time": "0:34:54"}
|
||||
{"current_steps": 4085, "total_steps": 4242, "loss": 0.2148, "lr": 1.6887193634828048e-07, "epoch": 6.7415359207266725, "percentage": 96.3, "elapsed_time": "14:40:24", "remaining_time": "0:33:50"}
|
||||
{"current_steps": 4090, "total_steps": 4242, "loss": 0.2035, "lr": 1.5836685949173648e-07, "epoch": 6.749793559042114, "percentage": 96.42, "elapsed_time": "14:41:30", "remaining_time": "0:32:45"}
|
||||
{"current_steps": 4095, "total_steps": 4242, "loss": 0.206, "lr": 1.481978081358104e-07, "epoch": 6.758051197357556, "percentage": 96.53, "elapsed_time": "14:42:39", "remaining_time": "0:31:41"}
|
||||
{"current_steps": 4100, "total_steps": 4242, "loss": 0.2157, "lr": 1.3836495449719878e-07, "epoch": 6.766308835672998, "percentage": 96.65, "elapsed_time": "14:43:38", "remaining_time": "0:30:36"}
|
||||
{"current_steps": 4105, "total_steps": 4242, "loss": 0.2059, "lr": 1.28868465098968e-07, "epoch": 6.77456647398844, "percentage": 96.77, "elapsed_time": "14:44:41", "remaining_time": "0:29:31"}
|
||||
{"current_steps": 4110, "total_steps": 4242, "loss": 0.2144, "lr": 1.1970850076773234e-07, "epoch": 6.7828241123038815, "percentage": 96.89, "elapsed_time": "14:45:47", "remaining_time": "0:28:26"}
|
||||
{"current_steps": 4115, "total_steps": 4242, "loss": 0.2091, "lr": 1.1088521663091823e-07, "epoch": 6.791081750619323, "percentage": 97.01, "elapsed_time": "14:46:50", "remaining_time": "0:27:22"}
|
||||
{"current_steps": 4120, "total_steps": 4242, "loss": 0.2113, "lr": 1.0239876211415533e-07, "epoch": 6.799339388934765, "percentage": 97.12, "elapsed_time": "14:47:57", "remaining_time": "0:26:17"}
|
||||
{"current_steps": 4125, "total_steps": 4242, "loss": 0.2163, "lr": 9.424928093873409e-08, "epoch": 6.807597027250207, "percentage": 97.24, "elapsed_time": "14:49:03", "remaining_time": "0:25:13"}
|
||||
{"current_steps": 4130, "total_steps": 4242, "loss": 0.2095, "lr": 8.643691111917652e-08, "epoch": 6.815854665565649, "percentage": 97.36, "elapsed_time": "14:50:02", "remaining_time": "0:24:08"}
|
||||
{"current_steps": 4135, "total_steps": 4242, "loss": 0.2149, "lr": 7.896178496089368e-08, "epoch": 6.8241123038810905, "percentage": 97.48, "elapsed_time": "14:51:01", "remaining_time": "0:23:03"}
|
||||
{"current_steps": 4140, "total_steps": 4242, "loss": 0.2088, "lr": 7.182402905795194e-08, "epoch": 6.832369942196532, "percentage": 97.6, "elapsed_time": "14:52:16", "remaining_time": "0:21:59"}
|
||||
{"current_steps": 4145, "total_steps": 4242, "loss": 0.2104, "lr": 6.502376429092794e-08, "epoch": 6.840627580511974, "percentage": 97.71, "elapsed_time": "14:53:23", "remaining_time": "0:20:54"}
|
||||
{"current_steps": 4150, "total_steps": 4242, "loss": 0.2082, "lr": 5.856110582485475e-08, "epoch": 6.848885218827416, "percentage": 97.83, "elapsed_time": "14:54:22", "remaining_time": "0:19:49"}
|
||||
{"current_steps": 4155, "total_steps": 4242, "loss": 0.2038, "lr": 5.2436163107276726e-08, "epoch": 6.857142857142857, "percentage": 97.95, "elapsed_time": "14:55:24", "remaining_time": "0:18:44"}
|
||||
{"current_steps": 4160, "total_steps": 4242, "loss": 0.2128, "lr": 4.6649039866399904e-08, "epoch": 6.865400495458299, "percentage": 98.07, "elapsed_time": "14:56:31", "remaining_time": "0:17:40"}
|
||||
{"current_steps": 4165, "total_steps": 4242, "loss": 0.215, "lr": 4.1199834109322266e-08, "epoch": 6.87365813377374, "percentage": 98.18, "elapsed_time": "14:57:32", "remaining_time": "0:16:35"}
|
||||
{"current_steps": 4170, "total_steps": 4242, "loss": 0.2121, "lr": 3.608863812039065e-08, "epoch": 6.881915772089182, "percentage": 98.3, "elapsed_time": "14:58:36", "remaining_time": "0:15:30"}
|
||||
{"current_steps": 4175, "total_steps": 4242, "loss": 0.2101, "lr": 3.131553845962643e-08, "epoch": 6.890173410404624, "percentage": 98.42, "elapsed_time": "14:59:39", "remaining_time": "0:14:26"}
|
||||
{"current_steps": 4180, "total_steps": 4242, "loss": 0.2078, "lr": 2.6880615961264456e-08, "epoch": 6.898431048720066, "percentage": 98.54, "elapsed_time": "15:00:48", "remaining_time": "0:13:21"}
|
||||
{"current_steps": 4185, "total_steps": 4242, "loss": 0.2067, "lr": 2.278394573237641e-08, "epoch": 6.9066886870355075, "percentage": 98.66, "elapsed_time": "15:01:50", "remaining_time": "0:12:16"}
|
||||
{"current_steps": 4190, "total_steps": 4242, "loss": 0.2143, "lr": 1.9025597151614006e-08, "epoch": 6.914946325350949, "percentage": 98.77, "elapsed_time": "15:02:58", "remaining_time": "0:11:12"}
|
||||
{"current_steps": 4195, "total_steps": 4242, "loss": 0.2122, "lr": 1.5605633868018833e-08, "epoch": 6.923203963666391, "percentage": 98.89, "elapsed_time": "15:04:00", "remaining_time": "0:10:07"}
|
||||
{"current_steps": 4200, "total_steps": 4242, "loss": 0.2033, "lr": 1.2524113799949888e-08, "epoch": 6.931461601981833, "percentage": 99.01, "elapsed_time": "15:04:59", "remaining_time": "0:09:02"}
|
||||
{"current_steps": 4205, "total_steps": 4242, "loss": 0.2142, "lr": 9.781089134108801e-09, "epoch": 6.939719240297275, "percentage": 99.13, "elapsed_time": "15:06:13", "remaining_time": "0:07:58"}
|
||||
{"current_steps": 4210, "total_steps": 4242, "loss": 0.2111, "lr": 7.376606324644986e-09, "epoch": 6.9479768786127165, "percentage": 99.25, "elapsed_time": "15:07:15", "remaining_time": "0:06:53"}
|
||||
{"current_steps": 4215, "total_steps": 4242, "loss": 0.2112, "lr": 5.310706092378493e-09, "epoch": 6.956234516928158, "percentage": 99.36, "elapsed_time": "15:08:19", "remaining_time": "0:05:49"}
|
||||
{"current_steps": 4220, "total_steps": 4242, "loss": 0.2098, "lr": 3.5834234241050037e-09, "epoch": 6.9644921552436, "percentage": 99.48, "elapsed_time": "15:09:16", "remaining_time": "0:04:44"}
|
||||
{"current_steps": 4225, "total_steps": 4242, "loss": 0.2116, "lr": 2.194787572000756e-09, "epoch": 6.972749793559042, "percentage": 99.6, "elapsed_time": "15:10:19", "remaining_time": "0:03:39"}
|
||||
{"current_steps": 4230, "total_steps": 4242, "loss": 0.2101, "lr": 1.1448220531407039e-09, "epoch": 6.981007431874484, "percentage": 99.72, "elapsed_time": "15:11:31", "remaining_time": "0:02:35"}
|
||||
{"current_steps": 4235, "total_steps": 4242, "loss": 0.2062, "lr": 4.3354464907885417e-10, "epoch": 6.9892650701899255, "percentage": 99.83, "elapsed_time": "15:12:37", "remaining_time": "0:01:30"}
|
||||
{"current_steps": 4240, "total_steps": 4242, "loss": 0.2019, "lr": 6.096740556849057e-11, "epoch": 6.997522708505367, "percentage": 99.95, "elapsed_time": "15:13:42", "remaining_time": "0:00:25"}
|
||||
{"current_steps": 4242, "total_steps": 4242, "epoch": 7.0, "percentage": 100.0, "elapsed_time": "15:14:41", "remaining_time": "0:00:00"}
|
||||
9375
trainer_state.json
Normal file
9375
trainer_state.json
Normal file
File diff suppressed because it is too large
Load Diff
3
training_args.bin
Normal file
3
training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:da38a14c25bbb13562575703a0d0e1c1db3ca0a004de20a157aad8b968d20d4e
|
||||
size 8657
|
||||
BIN
training_loss.png
Normal file
BIN
training_loss.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 39 KiB |
1
vocab.json
Normal file
1
vocab.json
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user