初始化项目,由ModelHub XC社区提供模型
Model: DCAgent/a1-curriculum_medium Source: Original Platform
This commit is contained in:
36
.gitattributes
vendored
Normal file
36
.gitattributes
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||
60
README.md
Normal file
60
README.md
Normal file
@@ -0,0 +1,60 @@
|
||||
---
|
||||
library_name: transformers
|
||||
license: other
|
||||
base_model: Qwen/Qwen3-8B
|
||||
tags:
|
||||
- llama-factory
|
||||
- full
|
||||
- generated_from_trainer
|
||||
model-index:
|
||||
- name: sft_a1_curriculum_medium__Qwen3-8B
|
||||
results: []
|
||||
---
|
||||
|
||||
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
||||
should probably proofread and complete it, then remove this comment. -->
|
||||
|
||||
# sft_a1_curriculum_medium__Qwen3-8B
|
||||
|
||||
This model is a fine-tuned version of [Qwen/Qwen3-8B](https://huggingface.co/Qwen/Qwen3-8B) on the /e/scratch/jureap59/raoof1/sft_data/hf_hub/datasets--DCAgent--exp_rpt_curriculum-medium_10k_glm_4.7_traces_jupiter/snapshots/fdf78173c8d3508962f19b140386e4f3836ffc5b_thinking_preprocessed dataset.
|
||||
|
||||
## Model description
|
||||
|
||||
More information needed
|
||||
|
||||
## Intended uses & limitations
|
||||
|
||||
More information needed
|
||||
|
||||
## Training and evaluation data
|
||||
|
||||
More information needed
|
||||
|
||||
## Training procedure
|
||||
|
||||
### Training hyperparameters
|
||||
|
||||
The following hyperparameters were used during training:
|
||||
- learning_rate: 4e-05
|
||||
- train_batch_size: 1
|
||||
- eval_batch_size: 8
|
||||
- seed: 42
|
||||
- distributed_type: multi-GPU
|
||||
- num_devices: 16
|
||||
- total_train_batch_size: 16
|
||||
- total_eval_batch_size: 128
|
||||
- optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.98) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
||||
- lr_scheduler_type: cosine
|
||||
- lr_scheduler_warmup_ratio: 0.1
|
||||
- num_epochs: 7.0
|
||||
|
||||
### Training results
|
||||
|
||||
|
||||
|
||||
### Framework versions
|
||||
|
||||
- Transformers 4.57.6
|
||||
- Pytorch 2.9.1+cu130
|
||||
- Datasets 4.7.0
|
||||
- Tokenizers 0.22.2
|
||||
28
added_tokens.json
Normal file
28
added_tokens.json
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"</think>": 151668,
|
||||
"</tool_call>": 151658,
|
||||
"</tool_response>": 151666,
|
||||
"<think>": 151667,
|
||||
"<tool_call>": 151657,
|
||||
"<tool_response>": 151665,
|
||||
"<|box_end|>": 151649,
|
||||
"<|box_start|>": 151648,
|
||||
"<|endoftext|>": 151643,
|
||||
"<|file_sep|>": 151664,
|
||||
"<|fim_middle|>": 151660,
|
||||
"<|fim_pad|>": 151662,
|
||||
"<|fim_prefix|>": 151659,
|
||||
"<|fim_suffix|>": 151661,
|
||||
"<|im_end|>": 151645,
|
||||
"<|im_start|>": 151644,
|
||||
"<|image_pad|>": 151655,
|
||||
"<|object_ref_end|>": 151647,
|
||||
"<|object_ref_start|>": 151646,
|
||||
"<|quad_end|>": 151651,
|
||||
"<|quad_start|>": 151650,
|
||||
"<|repo_name|>": 151663,
|
||||
"<|video_pad|>": 151656,
|
||||
"<|vision_end|>": 151653,
|
||||
"<|vision_pad|>": 151654,
|
||||
"<|vision_start|>": 151652
|
||||
}
|
||||
16
all_results.json
Normal file
16
all_results.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"achieved_tflops_per_gpu": 0.00364986281178115,
|
||||
"achieved_tflops_per_gpu_theoretical": 626.7623441222861,
|
||||
"epoch": 7.0,
|
||||
"loss_nan_ranks": 0,
|
||||
"loss_rank_avg": 0.14995810389518738,
|
||||
"mfu_percent": 0.0002579408347548516,
|
||||
"mfu_percent_theoretical": 44.29415859521456,
|
||||
"total_flos": 1254621303865344.0,
|
||||
"train_loss": 0.1876786600655953,
|
||||
"train_runtime": 21484.049,
|
||||
"train_samples_per_second": 2.956,
|
||||
"train_steps_per_second": 0.185,
|
||||
"valid_targets_mean": 5724.8,
|
||||
"valid_targets_min": 613
|
||||
}
|
||||
89
chat_template.jinja
Normal file
89
chat_template.jinja
Normal file
@@ -0,0 +1,89 @@
|
||||
{%- if tools %}
|
||||
{{- '<|im_start|>system\n' }}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- messages[0].content + '\n\n' }}
|
||||
{%- endif %}
|
||||
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
||||
{%- for tool in tools %}
|
||||
{{- "\n" }}
|
||||
{{- tool | tojson }}
|
||||
{%- endfor %}
|
||||
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
||||
{%- else %}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
|
||||
{%- for message in messages[::-1] %}
|
||||
{%- set index = (messages|length - 1) - loop.index0 %}
|
||||
{%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
|
||||
{%- set ns.multi_step_tool = false %}
|
||||
{%- set ns.last_query_index = index %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- for message in messages %}
|
||||
{%- if message.content is string %}
|
||||
{%- set content = message.content %}
|
||||
{%- else %}
|
||||
{%- set content = '' %}
|
||||
{%- endif %}
|
||||
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
|
||||
{%- elif message.role == "assistant" %}
|
||||
{%- set reasoning_content = '' %}
|
||||
{%- if message.reasoning_content is string %}
|
||||
{%- set reasoning_content = message.reasoning_content %}
|
||||
{%- else %}
|
||||
{%- if '</think>' in content %}
|
||||
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
||||
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- if loop.index0 > ns.last_query_index %}
|
||||
{%- if loop.last or (not loop.last and reasoning_content) %}
|
||||
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- if message.tool_calls %}
|
||||
{%- for tool_call in message.tool_calls %}
|
||||
{%- if (loop.first and content) or (not loop.first) %}
|
||||
{{- '\n' }}
|
||||
{%- endif %}
|
||||
{%- if tool_call.function %}
|
||||
{%- set tool_call = tool_call.function %}
|
||||
{%- endif %}
|
||||
{{- '<tool_call>\n{"name": "' }}
|
||||
{{- tool_call.name }}
|
||||
{{- '", "arguments": ' }}
|
||||
{%- if tool_call.arguments is string %}
|
||||
{{- tool_call.arguments }}
|
||||
{%- else %}
|
||||
{{- tool_call.arguments | tojson }}
|
||||
{%- endif %}
|
||||
{{- '}\n</tool_call>' }}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- elif message.role == "tool" %}
|
||||
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
||||
{{- '<|im_start|>user' }}
|
||||
{%- endif %}
|
||||
{{- '\n<tool_response>\n' }}
|
||||
{{- content }}
|
||||
{{- '\n</tool_response>' }}
|
||||
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- if add_generation_prompt %}
|
||||
{{- '<|im_start|>assistant\n' }}
|
||||
{%- if enable_thinking is defined and enable_thinking is false %}
|
||||
{{- '<think>\n\n</think>\n\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
68
config.json
Normal file
68
config.json
Normal file
@@ -0,0 +1,68 @@
|
||||
{
|
||||
"architectures": [
|
||||
"Qwen3ForCausalLM"
|
||||
],
|
||||
"attention_bias": false,
|
||||
"attention_dropout": 0.0,
|
||||
"dtype": "bfloat16",
|
||||
"eos_token_id": 151645,
|
||||
"head_dim": 128,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 4096,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 12288,
|
||||
"layer_types": [
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention"
|
||||
],
|
||||
"max_position_embeddings": 40960,
|
||||
"max_window_layers": 36,
|
||||
"model_type": "qwen3",
|
||||
"num_attention_heads": 32,
|
||||
"num_hidden_layers": 36,
|
||||
"num_key_value_heads": 8,
|
||||
"pad_token_id": 151643,
|
||||
"rms_norm_eps": 1e-06,
|
||||
"rope_scaling": null,
|
||||
"rope_theta": 1000000,
|
||||
"sliding_window": null,
|
||||
"tie_word_embeddings": false,
|
||||
"transformers_version": "4.57.6",
|
||||
"use_cache": false,
|
||||
"use_sliding_window": false,
|
||||
"vocab_size": 151936
|
||||
}
|
||||
12
generation_config.json
Normal file
12
generation_config.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"do_sample": true,
|
||||
"eos_token_id": [
|
||||
151645,
|
||||
151643
|
||||
],
|
||||
"pad_token_id": 151643,
|
||||
"temperature": 0.6,
|
||||
"top_k": 20,
|
||||
"top_p": 0.95,
|
||||
"transformers_version": "4.57.6"
|
||||
}
|
||||
151388
merges.txt
Normal file
151388
merges.txt
Normal file
File diff suppressed because it is too large
Load Diff
3
model-00001-of-00004.safetensors
Normal file
3
model-00001-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:7c3a0af2c30da916a36093a5747925b6c1b56a26723af6f76463bd6ad92eec09
|
||||
size 4902257696
|
||||
3
model-00002-of-00004.safetensors
Normal file
3
model-00002-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b752ec7872853ca6a2223b6cb014c9e8b0399b4e8d46217241730a04a3af6cbe
|
||||
size 4915960368
|
||||
3
model-00003-of-00004.safetensors
Normal file
3
model-00003-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:7a2eeb1feaedd89405a12eba87b842b187051f92e5fa00abea949a909143919a
|
||||
size 4983068496
|
||||
3
model-00004-of-00004.safetensors
Normal file
3
model-00004-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a14f7db2c9fcf58d8d0563afdbc482d499f7db049e278baa090af1d51d0387d2
|
||||
size 1580230264
|
||||
407
model.safetensors.index.json
Normal file
407
model.safetensors.index.json
Normal file
@@ -0,0 +1,407 @@
|
||||
{
|
||||
"metadata": {
|
||||
"total_parameters": 308224,
|
||||
"total_size": 16381470720
|
||||
},
|
||||
"weight_map": {
|
||||
"lm_head.weight": "model-00004-of-00004.safetensors",
|
||||
"model.embed_tokens.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.35.input_layernorm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.q_norm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.norm.weight": "model-00004-of-00004.safetensors"
|
||||
}
|
||||
}
|
||||
12
run_summary.json
Normal file
12
run_summary.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"agent_name": "fdf78173c8d3508962f19b140386e4f3836ffc5b_thinking_preprocessed",
|
||||
"training_start": null,
|
||||
"training_end": null,
|
||||
"created_by": "raoof1",
|
||||
"base_model_name": "Qwen/Qwen3-8B",
|
||||
"dataset_name": "/e/scratch/jureap59/raoof1/sft_data/hf_hub/datasets--DCAgent--exp_rpt_curriculum-medium_10k_glm_4.7_traces_jupiter/snapshots/fdf78173c8d3508962f19b140386e4f3836ffc5b_thinking_preprocessed",
|
||||
"training_type": "SFT",
|
||||
"training_parameters": "https://huggingface.co/DCAgent/a1-curriculum_medium/blob/main/config.json",
|
||||
"wandb_link": null,
|
||||
"traces_location_s3": null
|
||||
}
|
||||
31
special_tokens_map.json
Normal file
31
special_tokens_map.json
Normal file
@@ -0,0 +1,31 @@
|
||||
{
|
||||
"additional_special_tokens": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|object_ref_start|>",
|
||||
"<|object_ref_end|>",
|
||||
"<|box_start|>",
|
||||
"<|box_end|>",
|
||||
"<|quad_start|>",
|
||||
"<|quad_end|>",
|
||||
"<|vision_start|>",
|
||||
"<|vision_end|>",
|
||||
"<|vision_pad|>",
|
||||
"<|image_pad|>",
|
||||
"<|video_pad|>"
|
||||
],
|
||||
"eos_token": {
|
||||
"content": "<|im_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"pad_token": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
}
|
||||
}
|
||||
BIN
tokenizer.json
(Stored with Git LFS)
Normal file
BIN
tokenizer.json
(Stored with Git LFS)
Normal file
Binary file not shown.
240
tokenizer_config.json
Normal file
240
tokenizer_config.json
Normal file
@@ -0,0 +1,240 @@
|
||||
{
|
||||
"add_bos_token": false,
|
||||
"add_prefix_space": false,
|
||||
"added_tokens_decoder": {
|
||||
"151643": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151644": {
|
||||
"content": "<|im_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151645": {
|
||||
"content": "<|im_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151646": {
|
||||
"content": "<|object_ref_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151647": {
|
||||
"content": "<|object_ref_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151648": {
|
||||
"content": "<|box_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151649": {
|
||||
"content": "<|box_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151650": {
|
||||
"content": "<|quad_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151651": {
|
||||
"content": "<|quad_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151652": {
|
||||
"content": "<|vision_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151653": {
|
||||
"content": "<|vision_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151654": {
|
||||
"content": "<|vision_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151655": {
|
||||
"content": "<|image_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151656": {
|
||||
"content": "<|video_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151657": {
|
||||
"content": "<tool_call>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151658": {
|
||||
"content": "</tool_call>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151659": {
|
||||
"content": "<|fim_prefix|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151660": {
|
||||
"content": "<|fim_middle|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151661": {
|
||||
"content": "<|fim_suffix|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151662": {
|
||||
"content": "<|fim_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151663": {
|
||||
"content": "<|repo_name|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151664": {
|
||||
"content": "<|file_sep|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151665": {
|
||||
"content": "<tool_response>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151666": {
|
||||
"content": "</tool_response>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151667": {
|
||||
"content": "<think>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151668": {
|
||||
"content": "</think>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
}
|
||||
},
|
||||
"additional_special_tokens": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|object_ref_start|>",
|
||||
"<|object_ref_end|>",
|
||||
"<|box_start|>",
|
||||
"<|box_end|>",
|
||||
"<|quad_start|>",
|
||||
"<|quad_end|>",
|
||||
"<|vision_start|>",
|
||||
"<|vision_end|>",
|
||||
"<|vision_pad|>",
|
||||
"<|image_pad|>",
|
||||
"<|video_pad|>"
|
||||
],
|
||||
"bos_token": null,
|
||||
"clean_up_tokenization_spaces": false,
|
||||
"eos_token": "<|im_end|>",
|
||||
"errors": "replace",
|
||||
"extra_special_tokens": {},
|
||||
"model_max_length": 32768,
|
||||
"pad_token": "<|endoftext|>",
|
||||
"padding_side": "right",
|
||||
"split_special_tokens": false,
|
||||
"tokenizer_class": "Qwen2Tokenizer",
|
||||
"unk_token": null
|
||||
}
|
||||
16
train_results.json
Normal file
16
train_results.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"achieved_tflops_per_gpu": 0.00364986281178115,
|
||||
"achieved_tflops_per_gpu_theoretical": 626.7623441222861,
|
||||
"epoch": 7.0,
|
||||
"loss_nan_ranks": 0,
|
||||
"loss_rank_avg": 0.14995810389518738,
|
||||
"mfu_percent": 0.0002579408347548516,
|
||||
"mfu_percent_theoretical": 44.29415859521456,
|
||||
"total_flos": 1254621303865344.0,
|
||||
"train_loss": 0.1876786600655953,
|
||||
"train_runtime": 21484.049,
|
||||
"train_samples_per_second": 2.956,
|
||||
"train_steps_per_second": 0.185,
|
||||
"valid_targets_mean": 5724.8,
|
||||
"valid_targets_min": 613
|
||||
}
|
||||
796
trainer_log.jsonl
Normal file
796
trainer_log.jsonl
Normal file
@@ -0,0 +1,796 @@
|
||||
{"current_steps": 5, "total_steps": 3976, "loss": 0.6346, "lr": 4.0201005025125634e-07, "epoch": 0.008802816901408451, "percentage": 0.13, "elapsed_time": "0:00:33", "remaining_time": "7:26:01"}
|
||||
{"current_steps": 10, "total_steps": 3976, "loss": 0.6336, "lr": 9.045226130653267e-07, "epoch": 0.017605633802816902, "percentage": 0.25, "elapsed_time": "0:01:00", "remaining_time": "6:38:31"}
|
||||
{"current_steps": 15, "total_steps": 3976, "loss": 0.6311, "lr": 1.407035175879397e-06, "epoch": 0.02640845070422535, "percentage": 0.38, "elapsed_time": "0:01:32", "remaining_time": "6:46:49"}
|
||||
{"current_steps": 20, "total_steps": 3976, "loss": 0.5807, "lr": 1.9095477386934674e-06, "epoch": 0.035211267605633804, "percentage": 0.5, "elapsed_time": "0:02:01", "remaining_time": "6:40:33"}
|
||||
{"current_steps": 25, "total_steps": 3976, "loss": 0.5455, "lr": 2.412060301507538e-06, "epoch": 0.04401408450704225, "percentage": 0.63, "elapsed_time": "0:02:27", "remaining_time": "6:28:17"}
|
||||
{"current_steps": 30, "total_steps": 3976, "loss": 0.5127, "lr": 2.914572864321608e-06, "epoch": 0.0528169014084507, "percentage": 0.75, "elapsed_time": "0:02:53", "remaining_time": "6:19:38"}
|
||||
{"current_steps": 35, "total_steps": 3976, "loss": 0.4755, "lr": 3.4170854271356786e-06, "epoch": 0.061619718309859156, "percentage": 0.88, "elapsed_time": "0:03:19", "remaining_time": "6:13:39"}
|
||||
{"current_steps": 40, "total_steps": 3976, "loss": 0.4209, "lr": 3.919597989949749e-06, "epoch": 0.07042253521126761, "percentage": 1.01, "elapsed_time": "0:03:47", "remaining_time": "6:13:32"}
|
||||
{"current_steps": 45, "total_steps": 3976, "loss": 0.4442, "lr": 4.42211055276382e-06, "epoch": 0.07922535211267606, "percentage": 1.13, "elapsed_time": "0:04:14", "remaining_time": "6:10:48"}
|
||||
{"current_steps": 50, "total_steps": 3976, "loss": 0.4508, "lr": 4.92462311557789e-06, "epoch": 0.0880281690140845, "percentage": 1.26, "elapsed_time": "0:04:42", "remaining_time": "6:09:04"}
|
||||
{"current_steps": 55, "total_steps": 3976, "loss": 0.3845, "lr": 5.42713567839196e-06, "epoch": 0.09683098591549295, "percentage": 1.38, "elapsed_time": "0:05:10", "remaining_time": "6:09:07"}
|
||||
{"current_steps": 60, "total_steps": 3976, "loss": 0.4014, "lr": 5.9296482412060305e-06, "epoch": 0.1056338028169014, "percentage": 1.51, "elapsed_time": "0:05:36", "remaining_time": "6:06:31"}
|
||||
{"current_steps": 65, "total_steps": 3976, "loss": 0.405, "lr": 6.4321608040201015e-06, "epoch": 0.11443661971830986, "percentage": 1.63, "elapsed_time": "0:06:06", "remaining_time": "6:08:01"}
|
||||
{"current_steps": 70, "total_steps": 3976, "loss": 0.3717, "lr": 6.934673366834172e-06, "epoch": 0.12323943661971831, "percentage": 1.76, "elapsed_time": "0:06:33", "remaining_time": "6:05:42"}
|
||||
{"current_steps": 75, "total_steps": 3976, "loss": 0.39, "lr": 7.437185929648242e-06, "epoch": 0.13204225352112675, "percentage": 1.89, "elapsed_time": "0:07:02", "remaining_time": "6:06:02"}
|
||||
{"current_steps": 80, "total_steps": 3976, "loss": 0.3668, "lr": 7.939698492462312e-06, "epoch": 0.14084507042253522, "percentage": 2.01, "elapsed_time": "0:07:33", "remaining_time": "6:07:42"}
|
||||
{"current_steps": 85, "total_steps": 3976, "loss": 0.3597, "lr": 8.442211055276383e-06, "epoch": 0.14964788732394366, "percentage": 2.14, "elapsed_time": "0:08:02", "remaining_time": "6:08:12"}
|
||||
{"current_steps": 90, "total_steps": 3976, "loss": 0.3577, "lr": 8.944723618090452e-06, "epoch": 0.15845070422535212, "percentage": 2.26, "elapsed_time": "0:08:26", "remaining_time": "6:04:41"}
|
||||
{"current_steps": 95, "total_steps": 3976, "loss": 0.3687, "lr": 9.447236180904523e-06, "epoch": 0.16725352112676056, "percentage": 2.39, "elapsed_time": "0:08:53", "remaining_time": "6:03:32"}
|
||||
{"current_steps": 100, "total_steps": 3976, "loss": 0.3444, "lr": 9.949748743718594e-06, "epoch": 0.176056338028169, "percentage": 2.52, "elapsed_time": "0:09:21", "remaining_time": "6:02:53"}
|
||||
{"current_steps": 105, "total_steps": 3976, "loss": 0.3243, "lr": 1.0452261306532665e-05, "epoch": 0.18485915492957747, "percentage": 2.64, "elapsed_time": "0:09:50", "remaining_time": "6:02:55"}
|
||||
{"current_steps": 110, "total_steps": 3976, "loss": 0.3261, "lr": 1.0954773869346736e-05, "epoch": 0.1936619718309859, "percentage": 2.77, "elapsed_time": "0:10:23", "remaining_time": "6:05:28"}
|
||||
{"current_steps": 115, "total_steps": 3976, "loss": 0.3036, "lr": 1.1457286432160805e-05, "epoch": 0.20246478873239437, "percentage": 2.89, "elapsed_time": "0:10:44", "remaining_time": "6:00:41"}
|
||||
{"current_steps": 120, "total_steps": 3976, "loss": 0.3068, "lr": 1.1959798994974876e-05, "epoch": 0.2112676056338028, "percentage": 3.02, "elapsed_time": "0:11:11", "remaining_time": "5:59:36"}
|
||||
{"current_steps": 125, "total_steps": 3976, "loss": 0.3086, "lr": 1.2462311557788947e-05, "epoch": 0.22007042253521128, "percentage": 3.14, "elapsed_time": "0:11:42", "remaining_time": "6:00:37"}
|
||||
{"current_steps": 130, "total_steps": 3976, "loss": 0.313, "lr": 1.2964824120603017e-05, "epoch": 0.22887323943661972, "percentage": 3.27, "elapsed_time": "0:12:13", "remaining_time": "6:01:29"}
|
||||
{"current_steps": 135, "total_steps": 3976, "loss": 0.3229, "lr": 1.3467336683417087e-05, "epoch": 0.23767605633802816, "percentage": 3.4, "elapsed_time": "0:12:46", "remaining_time": "6:03:35"}
|
||||
{"current_steps": 140, "total_steps": 3976, "loss": 0.3142, "lr": 1.3969849246231157e-05, "epoch": 0.24647887323943662, "percentage": 3.52, "elapsed_time": "0:13:12", "remaining_time": "6:02:05"}
|
||||
{"current_steps": 145, "total_steps": 3976, "loss": 0.3246, "lr": 1.4472361809045228e-05, "epoch": 0.25528169014084506, "percentage": 3.65, "elapsed_time": "0:13:44", "remaining_time": "6:03:14"}
|
||||
{"current_steps": 150, "total_steps": 3976, "loss": 0.2966, "lr": 1.4974874371859299e-05, "epoch": 0.2640845070422535, "percentage": 3.77, "elapsed_time": "0:14:12", "remaining_time": "6:02:28"}
|
||||
{"current_steps": 155, "total_steps": 3976, "loss": 0.3044, "lr": 1.547738693467337e-05, "epoch": 0.272887323943662, "percentage": 3.9, "elapsed_time": "0:14:38", "remaining_time": "6:01:03"}
|
||||
{"current_steps": 160, "total_steps": 3976, "loss": 0.3073, "lr": 1.5979899497487437e-05, "epoch": 0.28169014084507044, "percentage": 4.02, "elapsed_time": "0:15:07", "remaining_time": "6:00:33"}
|
||||
{"current_steps": 165, "total_steps": 3976, "loss": 0.2912, "lr": 1.6482412060301508e-05, "epoch": 0.2904929577464789, "percentage": 4.15, "elapsed_time": "0:15:28", "remaining_time": "5:57:29"}
|
||||
{"current_steps": 170, "total_steps": 3976, "loss": 0.2879, "lr": 1.698492462311558e-05, "epoch": 0.2992957746478873, "percentage": 4.28, "elapsed_time": "0:15:56", "remaining_time": "5:56:47"}
|
||||
{"current_steps": 175, "total_steps": 3976, "loss": 0.2886, "lr": 1.748743718592965e-05, "epoch": 0.30809859154929575, "percentage": 4.4, "elapsed_time": "0:16:23", "remaining_time": "5:55:52"}
|
||||
{"current_steps": 180, "total_steps": 3976, "loss": 0.2916, "lr": 1.798994974874372e-05, "epoch": 0.31690140845070425, "percentage": 4.53, "elapsed_time": "0:16:49", "remaining_time": "5:54:41"}
|
||||
{"current_steps": 185, "total_steps": 3976, "loss": 0.2818, "lr": 1.8492462311557792e-05, "epoch": 0.3257042253521127, "percentage": 4.65, "elapsed_time": "0:17:14", "remaining_time": "5:53:25"}
|
||||
{"current_steps": 190, "total_steps": 3976, "loss": 0.2872, "lr": 1.899497487437186e-05, "epoch": 0.3345070422535211, "percentage": 4.78, "elapsed_time": "0:17:41", "remaining_time": "5:52:35"}
|
||||
{"current_steps": 195, "total_steps": 3976, "loss": 0.2792, "lr": 1.949748743718593e-05, "epoch": 0.34330985915492956, "percentage": 4.9, "elapsed_time": "0:18:06", "remaining_time": "5:51:12"}
|
||||
{"current_steps": 200, "total_steps": 3976, "loss": 0.3079, "lr": 2e-05, "epoch": 0.352112676056338, "percentage": 5.03, "elapsed_time": "0:18:34", "remaining_time": "5:50:36"}
|
||||
{"current_steps": 205, "total_steps": 3976, "loss": 0.2871, "lr": 2.0502512562814073e-05, "epoch": 0.3609154929577465, "percentage": 5.16, "elapsed_time": "0:19:03", "remaining_time": "5:50:31"}
|
||||
{"current_steps": 210, "total_steps": 3976, "loss": 0.2856, "lr": 2.1005025125628144e-05, "epoch": 0.36971830985915494, "percentage": 5.28, "elapsed_time": "0:19:34", "remaining_time": "5:50:59"}
|
||||
{"current_steps": 215, "total_steps": 3976, "loss": 0.2602, "lr": 2.150753768844221e-05, "epoch": 0.3785211267605634, "percentage": 5.41, "elapsed_time": "0:20:01", "remaining_time": "5:50:10"}
|
||||
{"current_steps": 220, "total_steps": 3976, "loss": 0.2832, "lr": 2.2010050251256282e-05, "epoch": 0.3873239436619718, "percentage": 5.53, "elapsed_time": "0:20:27", "remaining_time": "5:49:13"}
|
||||
{"current_steps": 225, "total_steps": 3976, "loss": 0.2871, "lr": 2.2512562814070353e-05, "epoch": 0.3961267605633803, "percentage": 5.66, "elapsed_time": "0:21:00", "remaining_time": "5:50:15"}
|
||||
{"current_steps": 230, "total_steps": 3976, "loss": 0.2886, "lr": 2.3015075376884424e-05, "epoch": 0.40492957746478875, "percentage": 5.78, "elapsed_time": "0:21:25", "remaining_time": "5:48:59"}
|
||||
{"current_steps": 235, "total_steps": 3976, "loss": 0.2821, "lr": 2.3517587939698495e-05, "epoch": 0.4137323943661972, "percentage": 5.91, "elapsed_time": "0:21:48", "remaining_time": "5:47:04"}
|
||||
{"current_steps": 240, "total_steps": 3976, "loss": 0.2697, "lr": 2.4020100502512566e-05, "epoch": 0.4225352112676056, "percentage": 6.04, "elapsed_time": "0:22:22", "remaining_time": "5:48:15"}
|
||||
{"current_steps": 245, "total_steps": 3976, "loss": 0.2875, "lr": 2.4522613065326634e-05, "epoch": 0.43133802816901406, "percentage": 6.16, "elapsed_time": "0:22:53", "remaining_time": "5:48:31"}
|
||||
{"current_steps": 250, "total_steps": 3976, "loss": 0.2679, "lr": 2.5025125628140705e-05, "epoch": 0.44014084507042256, "percentage": 6.29, "elapsed_time": "0:23:21", "remaining_time": "5:48:02"}
|
||||
{"current_steps": 255, "total_steps": 3976, "loss": 0.2813, "lr": 2.5527638190954776e-05, "epoch": 0.448943661971831, "percentage": 6.41, "elapsed_time": "0:23:51", "remaining_time": "5:48:03"}
|
||||
{"current_steps": 260, "total_steps": 3976, "loss": 0.2843, "lr": 2.6030150753768847e-05, "epoch": 0.45774647887323944, "percentage": 6.54, "elapsed_time": "0:24:20", "remaining_time": "5:47:57"}
|
||||
{"current_steps": 265, "total_steps": 3976, "loss": 0.2545, "lr": 2.6532663316582917e-05, "epoch": 0.4665492957746479, "percentage": 6.66, "elapsed_time": "0:24:45", "remaining_time": "5:46:40"}
|
||||
{"current_steps": 270, "total_steps": 3976, "loss": 0.3004, "lr": 2.7035175879396985e-05, "epoch": 0.4753521126760563, "percentage": 6.79, "elapsed_time": "0:25:19", "remaining_time": "5:47:42"}
|
||||
{"current_steps": 275, "total_steps": 3976, "loss": 0.2801, "lr": 2.7537688442211056e-05, "epoch": 0.4841549295774648, "percentage": 6.92, "elapsed_time": "0:25:48", "remaining_time": "5:47:22"}
|
||||
{"current_steps": 280, "total_steps": 3976, "loss": 0.2815, "lr": 2.8040201005025127e-05, "epoch": 0.49295774647887325, "percentage": 7.04, "elapsed_time": "0:26:16", "remaining_time": "5:46:56"}
|
||||
{"current_steps": 285, "total_steps": 3976, "loss": 0.258, "lr": 2.8542713567839198e-05, "epoch": 0.5017605633802817, "percentage": 7.17, "elapsed_time": "0:26:41", "remaining_time": "5:45:39"}
|
||||
{"current_steps": 290, "total_steps": 3976, "loss": 0.2617, "lr": 2.904522613065327e-05, "epoch": 0.5105633802816901, "percentage": 7.29, "elapsed_time": "0:27:08", "remaining_time": "5:45:02"}
|
||||
{"current_steps": 295, "total_steps": 3976, "loss": 0.2799, "lr": 2.954773869346734e-05, "epoch": 0.5193661971830986, "percentage": 7.42, "elapsed_time": "0:27:36", "remaining_time": "5:44:30"}
|
||||
{"current_steps": 300, "total_steps": 3976, "loss": 0.2717, "lr": 3.0050251256281408e-05, "epoch": 0.528169014084507, "percentage": 7.55, "elapsed_time": "0:28:02", "remaining_time": "5:43:40"}
|
||||
{"current_steps": 305, "total_steps": 3976, "loss": 0.2723, "lr": 3.055276381909548e-05, "epoch": 0.5369718309859155, "percentage": 7.67, "elapsed_time": "0:28:33", "remaining_time": "5:43:42"}
|
||||
{"current_steps": 310, "total_steps": 3976, "loss": 0.2471, "lr": 3.1055276381909546e-05, "epoch": 0.545774647887324, "percentage": 7.8, "elapsed_time": "0:28:57", "remaining_time": "5:42:28"}
|
||||
{"current_steps": 315, "total_steps": 3976, "loss": 0.2761, "lr": 3.155778894472362e-05, "epoch": 0.5545774647887324, "percentage": 7.92, "elapsed_time": "0:29:24", "remaining_time": "5:41:42"}
|
||||
{"current_steps": 320, "total_steps": 3976, "loss": 0.2777, "lr": 3.206030150753769e-05, "epoch": 0.5633802816901409, "percentage": 8.05, "elapsed_time": "0:29:54", "remaining_time": "5:41:42"}
|
||||
{"current_steps": 325, "total_steps": 3976, "loss": 0.2567, "lr": 3.256281407035176e-05, "epoch": 0.5721830985915493, "percentage": 8.17, "elapsed_time": "0:30:21", "remaining_time": "5:41:06"}
|
||||
{"current_steps": 330, "total_steps": 3976, "loss": 0.2697, "lr": 3.306532663316583e-05, "epoch": 0.5809859154929577, "percentage": 8.3, "elapsed_time": "0:30:45", "remaining_time": "5:39:53"}
|
||||
{"current_steps": 335, "total_steps": 3976, "loss": 0.2586, "lr": 3.3567839195979904e-05, "epoch": 0.5897887323943662, "percentage": 8.43, "elapsed_time": "0:31:17", "remaining_time": "5:40:01"}
|
||||
{"current_steps": 340, "total_steps": 3976, "loss": 0.2608, "lr": 3.407035175879397e-05, "epoch": 0.5985915492957746, "percentage": 8.55, "elapsed_time": "0:31:49", "remaining_time": "5:40:17"}
|
||||
{"current_steps": 345, "total_steps": 3976, "loss": 0.24, "lr": 3.457286432160804e-05, "epoch": 0.6073943661971831, "percentage": 8.68, "elapsed_time": "0:32:17", "remaining_time": "5:39:51"}
|
||||
{"current_steps": 350, "total_steps": 3976, "loss": 0.2538, "lr": 3.5075376884422114e-05, "epoch": 0.6161971830985915, "percentage": 8.8, "elapsed_time": "0:32:41", "remaining_time": "5:38:37"}
|
||||
{"current_steps": 355, "total_steps": 3976, "loss": 0.2672, "lr": 3.557788944723618e-05, "epoch": 0.625, "percentage": 8.93, "elapsed_time": "0:33:14", "remaining_time": "5:39:08"}
|
||||
{"current_steps": 360, "total_steps": 3976, "loss": 0.2472, "lr": 3.6080402010050256e-05, "epoch": 0.6338028169014085, "percentage": 9.05, "elapsed_time": "0:33:39", "remaining_time": "5:38:02"}
|
||||
{"current_steps": 365, "total_steps": 3976, "loss": 0.2367, "lr": 3.658291457286432e-05, "epoch": 0.6426056338028169, "percentage": 9.18, "elapsed_time": "0:34:04", "remaining_time": "5:37:01"}
|
||||
{"current_steps": 370, "total_steps": 3976, "loss": 0.2578, "lr": 3.708542713567839e-05, "epoch": 0.6514084507042254, "percentage": 9.31, "elapsed_time": "0:34:36", "remaining_time": "5:37:15"}
|
||||
{"current_steps": 375, "total_steps": 3976, "loss": 0.2447, "lr": 3.7587939698492465e-05, "epoch": 0.6602112676056338, "percentage": 9.43, "elapsed_time": "0:35:02", "remaining_time": "5:36:25"}
|
||||
{"current_steps": 380, "total_steps": 3976, "loss": 0.2683, "lr": 3.809045226130653e-05, "epoch": 0.6690140845070423, "percentage": 9.56, "elapsed_time": "0:35:30", "remaining_time": "5:35:56"}
|
||||
{"current_steps": 385, "total_steps": 3976, "loss": 0.2524, "lr": 3.859296482412061e-05, "epoch": 0.6778169014084507, "percentage": 9.68, "elapsed_time": "0:36:00", "remaining_time": "5:35:49"}
|
||||
{"current_steps": 390, "total_steps": 3976, "loss": 0.2435, "lr": 3.9095477386934675e-05, "epoch": 0.6866197183098591, "percentage": 9.81, "elapsed_time": "0:36:26", "remaining_time": "5:35:05"}
|
||||
{"current_steps": 395, "total_steps": 3976, "loss": 0.2605, "lr": 3.959798994974874e-05, "epoch": 0.6954225352112676, "percentage": 9.93, "elapsed_time": "0:36:55", "remaining_time": "5:34:45"}
|
||||
{"current_steps": 400, "total_steps": 3976, "loss": 0.2417, "lr": 3.9999992290627244e-05, "epoch": 0.704225352112676, "percentage": 10.06, "elapsed_time": "0:37:19", "remaining_time": "5:33:44"}
|
||||
{"current_steps": 405, "total_steps": 3976, "loss": 0.2695, "lr": 3.999972246320468e-05, "epoch": 0.7130281690140845, "percentage": 10.19, "elapsed_time": "0:37:46", "remaining_time": "5:33:02"}
|
||||
{"current_steps": 410, "total_steps": 3976, "loss": 0.2344, "lr": 3.99990671730875e-05, "epoch": 0.721830985915493, "percentage": 10.31, "elapsed_time": "0:38:12", "remaining_time": "5:32:21"}
|
||||
{"current_steps": 415, "total_steps": 3976, "loss": 0.2649, "lr": 3.9998026432905376e-05, "epoch": 0.7306338028169014, "percentage": 10.44, "elapsed_time": "0:38:37", "remaining_time": "5:31:28"}
|
||||
{"current_steps": 420, "total_steps": 3976, "loss": 0.2508, "lr": 3.9996600262716914e-05, "epoch": 0.7394366197183099, "percentage": 10.56, "elapsed_time": "0:39:07", "remaining_time": "5:31:14"}
|
||||
{"current_steps": 425, "total_steps": 3976, "loss": 0.2554, "lr": 3.999478869000926e-05, "epoch": 0.7482394366197183, "percentage": 10.69, "elapsed_time": "0:39:35", "remaining_time": "5:30:45"}
|
||||
{"current_steps": 430, "total_steps": 3976, "loss": 0.2421, "lr": 3.999259174969759e-05, "epoch": 0.7570422535211268, "percentage": 10.81, "elapsed_time": "0:40:03", "remaining_time": "5:30:19"}
|
||||
{"current_steps": 435, "total_steps": 3976, "loss": 0.2392, "lr": 3.999000948412441e-05, "epoch": 0.7658450704225352, "percentage": 10.94, "elapsed_time": "0:40:29", "remaining_time": "5:29:36"}
|
||||
{"current_steps": 440, "total_steps": 3976, "loss": 0.2434, "lr": 3.9987041943058776e-05, "epoch": 0.7746478873239436, "percentage": 11.07, "elapsed_time": "0:40:54", "remaining_time": "5:28:43"}
|
||||
{"current_steps": 445, "total_steps": 3976, "loss": 0.2432, "lr": 3.998368918369529e-05, "epoch": 0.7834507042253521, "percentage": 11.19, "elapsed_time": "0:41:27", "remaining_time": "5:28:59"}
|
||||
{"current_steps": 450, "total_steps": 3976, "loss": 0.2538, "lr": 3.997995127065303e-05, "epoch": 0.7922535211267606, "percentage": 11.32, "elapsed_time": "0:41:51", "remaining_time": "5:27:55"}
|
||||
{"current_steps": 455, "total_steps": 3976, "loss": 0.2469, "lr": 3.9975828275974306e-05, "epoch": 0.801056338028169, "percentage": 11.44, "elapsed_time": "0:42:21", "remaining_time": "5:27:51"}
|
||||
{"current_steps": 460, "total_steps": 3976, "loss": 0.2306, "lr": 3.997132027912324e-05, "epoch": 0.8098591549295775, "percentage": 11.57, "elapsed_time": "0:42:51", "remaining_time": "5:27:36"}
|
||||
{"current_steps": 465, "total_steps": 3976, "loss": 0.2307, "lr": 3.9966427366984286e-05, "epoch": 0.8186619718309859, "percentage": 11.7, "elapsed_time": "0:43:17", "remaining_time": "5:26:53"}
|
||||
{"current_steps": 470, "total_steps": 3976, "loss": 0.2504, "lr": 3.996114963386049e-05, "epoch": 0.8274647887323944, "percentage": 11.82, "elapsed_time": "0:43:43", "remaining_time": "5:26:10"}
|
||||
{"current_steps": 475, "total_steps": 3976, "loss": 0.2389, "lr": 3.995548718147173e-05, "epoch": 0.8362676056338029, "percentage": 11.95, "elapsed_time": "0:44:08", "remaining_time": "5:25:18"}
|
||||
{"current_steps": 480, "total_steps": 3976, "loss": 0.2481, "lr": 3.994944011895275e-05, "epoch": 0.8450704225352113, "percentage": 12.07, "elapsed_time": "0:44:40", "remaining_time": "5:25:24"}
|
||||
{"current_steps": 485, "total_steps": 3976, "loss": 0.2454, "lr": 3.994300856285098e-05, "epoch": 0.8538732394366197, "percentage": 12.2, "elapsed_time": "0:45:05", "remaining_time": "5:24:35"}
|
||||
{"current_steps": 490, "total_steps": 3976, "loss": 0.2382, "lr": 3.993619263712442e-05, "epoch": 0.8626760563380281, "percentage": 12.32, "elapsed_time": "0:45:36", "remaining_time": "5:24:29"}
|
||||
{"current_steps": 495, "total_steps": 3976, "loss": 0.2434, "lr": 3.992899247313912e-05, "epoch": 0.8714788732394366, "percentage": 12.45, "elapsed_time": "0:46:03", "remaining_time": "5:23:56"}
|
||||
{"current_steps": 500, "total_steps": 3976, "loss": 0.2468, "lr": 3.9921408209666766e-05, "epoch": 0.8802816901408451, "percentage": 12.58, "elapsed_time": "0:46:31", "remaining_time": "5:23:29"}
|
||||
{"current_steps": 505, "total_steps": 3976, "loss": 0.283, "lr": 3.9913439992881893e-05, "epoch": 0.8890845070422535, "percentage": 12.7, "elapsed_time": "0:47:05", "remaining_time": "5:23:41"}
|
||||
{"current_steps": 510, "total_steps": 3976, "loss": 0.2281, "lr": 3.990508797635917e-05, "epoch": 0.897887323943662, "percentage": 12.83, "elapsed_time": "0:47:33", "remaining_time": "5:23:14"}
|
||||
{"current_steps": 515, "total_steps": 3976, "loss": 0.2496, "lr": 3.989635232107034e-05, "epoch": 0.9066901408450704, "percentage": 12.95, "elapsed_time": "0:48:04", "remaining_time": "5:23:03"}
|
||||
{"current_steps": 520, "total_steps": 3976, "loss": 0.2357, "lr": 3.988723319538123e-05, "epoch": 0.9154929577464789, "percentage": 13.08, "elapsed_time": "0:48:29", "remaining_time": "5:22:17"}
|
||||
{"current_steps": 525, "total_steps": 3976, "loss": 0.2182, "lr": 3.98777307750484e-05, "epoch": 0.9242957746478874, "percentage": 13.2, "elapsed_time": "0:48:58", "remaining_time": "5:21:54"}
|
||||
{"current_steps": 530, "total_steps": 3976, "loss": 0.2306, "lr": 3.9867845243215835e-05, "epoch": 0.9330985915492958, "percentage": 13.33, "elapsed_time": "0:49:26", "remaining_time": "5:21:29"}
|
||||
{"current_steps": 535, "total_steps": 3976, "loss": 0.2472, "lr": 3.9857576790411346e-05, "epoch": 0.9419014084507042, "percentage": 13.46, "elapsed_time": "0:49:57", "remaining_time": "5:21:16"}
|
||||
{"current_steps": 540, "total_steps": 3976, "loss": 0.2459, "lr": 3.9846925614542964e-05, "epoch": 0.9507042253521126, "percentage": 13.58, "elapsed_time": "0:50:25", "remaining_time": "5:20:52"}
|
||||
{"current_steps": 545, "total_steps": 3976, "loss": 0.2437, "lr": 3.983589192089509e-05, "epoch": 0.9595070422535211, "percentage": 13.71, "elapsed_time": "0:50:52", "remaining_time": "5:20:15"}
|
||||
{"current_steps": 550, "total_steps": 3976, "loss": 0.2248, "lr": 3.9824475922124535e-05, "epoch": 0.9683098591549296, "percentage": 13.83, "elapsed_time": "0:51:17", "remaining_time": "5:19:27"}
|
||||
{"current_steps": 555, "total_steps": 3976, "loss": 0.2553, "lr": 3.981267783825643e-05, "epoch": 0.977112676056338, "percentage": 13.96, "elapsed_time": "0:51:40", "remaining_time": "5:18:32"}
|
||||
{"current_steps": 560, "total_steps": 3976, "loss": 0.2433, "lr": 3.980049789667999e-05, "epoch": 0.9859154929577465, "percentage": 14.08, "elapsed_time": "0:52:11", "remaining_time": "5:18:20"}
|
||||
{"current_steps": 565, "total_steps": 3976, "loss": 0.2305, "lr": 3.9787936332144134e-05, "epoch": 0.9947183098591549, "percentage": 14.21, "elapsed_time": "0:52:35", "remaining_time": "5:17:29"}
|
||||
{"current_steps": 570, "total_steps": 3976, "loss": 0.2467, "lr": 3.9774993386752945e-05, "epoch": 1.0035211267605635, "percentage": 14.34, "elapsed_time": "0:53:02", "remaining_time": "5:16:58"}
|
||||
{"current_steps": 575, "total_steps": 3976, "loss": 0.2077, "lr": 3.976166930996102e-05, "epoch": 1.0123239436619718, "percentage": 14.46, "elapsed_time": "0:53:24", "remaining_time": "5:15:51"}
|
||||
{"current_steps": 580, "total_steps": 3976, "loss": 0.2395, "lr": 3.974796435856863e-05, "epoch": 1.0211267605633803, "percentage": 14.59, "elapsed_time": "0:53:54", "remaining_time": "5:15:38"}
|
||||
{"current_steps": 585, "total_steps": 3976, "loss": 0.2385, "lr": 3.973387879671684e-05, "epoch": 1.0299295774647887, "percentage": 14.71, "elapsed_time": "0:54:21", "remaining_time": "5:15:07"}
|
||||
{"current_steps": 590, "total_steps": 3976, "loss": 0.2037, "lr": 3.971941289588234e-05, "epoch": 1.0387323943661972, "percentage": 14.84, "elapsed_time": "0:54:49", "remaining_time": "5:14:37"}
|
||||
{"current_steps": 595, "total_steps": 3976, "loss": 0.214, "lr": 3.970456693487225e-05, "epoch": 1.0475352112676057, "percentage": 14.96, "elapsed_time": "0:55:10", "remaining_time": "5:13:29"}
|
||||
{"current_steps": 600, "total_steps": 3976, "loss": 0.23, "lr": 3.968934119981875e-05, "epoch": 1.056338028169014, "percentage": 15.09, "elapsed_time": "0:55:33", "remaining_time": "5:12:37"}
|
||||
{"current_steps": 605, "total_steps": 3976, "loss": 0.2561, "lr": 3.967373598417355e-05, "epoch": 1.0651408450704225, "percentage": 15.22, "elapsed_time": "0:55:57", "remaining_time": "5:11:46"}
|
||||
{"current_steps": 610, "total_steps": 3976, "loss": 0.2196, "lr": 3.965775158870226e-05, "epoch": 1.073943661971831, "percentage": 15.34, "elapsed_time": "0:56:22", "remaining_time": "5:11:06"}
|
||||
{"current_steps": 615, "total_steps": 3976, "loss": 0.2191, "lr": 3.964138832147856e-05, "epoch": 1.0827464788732395, "percentage": 15.47, "elapsed_time": "0:56:47", "remaining_time": "5:10:23"}
|
||||
{"current_steps": 620, "total_steps": 3976, "loss": 0.2254, "lr": 3.962464649787827e-05, "epoch": 1.091549295774648, "percentage": 15.59, "elapsed_time": "0:57:11", "remaining_time": "5:09:34"}
|
||||
{"current_steps": 625, "total_steps": 3976, "loss": 0.2235, "lr": 3.960752644057329e-05, "epoch": 1.1003521126760563, "percentage": 15.72, "elapsed_time": "0:57:35", "remaining_time": "5:08:46"}
|
||||
{"current_steps": 630, "total_steps": 3976, "loss": 0.2178, "lr": 3.9590028479525384e-05, "epoch": 1.1091549295774648, "percentage": 15.85, "elapsed_time": "0:58:04", "remaining_time": "5:08:25"}
|
||||
{"current_steps": 635, "total_steps": 3976, "loss": 0.2422, "lr": 3.957215295197978e-05, "epoch": 1.1179577464788732, "percentage": 15.97, "elapsed_time": "0:58:33", "remaining_time": "5:08:08"}
|
||||
{"current_steps": 640, "total_steps": 3976, "loss": 0.1944, "lr": 3.955390020245872e-05, "epoch": 1.1267605633802817, "percentage": 16.1, "elapsed_time": "0:58:56", "remaining_time": "5:07:15"}
|
||||
{"current_steps": 645, "total_steps": 3976, "loss": 0.2216, "lr": 3.953527058275476e-05, "epoch": 1.1355633802816902, "percentage": 16.22, "elapsed_time": "0:59:26", "remaining_time": "5:07:00"}
|
||||
{"current_steps": 650, "total_steps": 3976, "loss": 0.2227, "lr": 3.951626445192409e-05, "epoch": 1.1443661971830985, "percentage": 16.35, "elapsed_time": "0:59:52", "remaining_time": "5:06:20"}
|
||||
{"current_steps": 655, "total_steps": 3976, "loss": 0.2147, "lr": 3.949688217627949e-05, "epoch": 1.153169014084507, "percentage": 16.47, "elapsed_time": "1:00:23", "remaining_time": "5:06:11"}
|
||||
{"current_steps": 660, "total_steps": 3976, "loss": 0.2182, "lr": 3.947712412938336e-05, "epoch": 1.1619718309859155, "percentage": 16.6, "elapsed_time": "1:00:49", "remaining_time": "5:05:36"}
|
||||
{"current_steps": 665, "total_steps": 3976, "loss": 0.2174, "lr": 3.945699069204049e-05, "epoch": 1.170774647887324, "percentage": 16.73, "elapsed_time": "1:01:18", "remaining_time": "5:05:17"}
|
||||
{"current_steps": 670, "total_steps": 3976, "loss": 0.2305, "lr": 3.9436482252290706e-05, "epoch": 1.1795774647887325, "percentage": 16.85, "elapsed_time": "1:01:49", "remaining_time": "5:05:04"}
|
||||
{"current_steps": 675, "total_steps": 3976, "loss": 0.2107, "lr": 3.9415599205401424e-05, "epoch": 1.1883802816901408, "percentage": 16.98, "elapsed_time": "1:02:15", "remaining_time": "5:04:26"}
|
||||
{"current_steps": 680, "total_steps": 3976, "loss": 0.2154, "lr": 3.939434195385999e-05, "epoch": 1.1971830985915493, "percentage": 17.1, "elapsed_time": "1:02:41", "remaining_time": "5:03:53"}
|
||||
{"current_steps": 685, "total_steps": 3976, "loss": 0.2126, "lr": 3.937271090736599e-05, "epoch": 1.2059859154929577, "percentage": 17.23, "elapsed_time": "1:03:05", "remaining_time": "5:03:09"}
|
||||
{"current_steps": 690, "total_steps": 3976, "loss": 0.2218, "lr": 3.935070648282325e-05, "epoch": 1.2147887323943662, "percentage": 17.35, "elapsed_time": "1:03:40", "remaining_time": "5:03:13"}
|
||||
{"current_steps": 695, "total_steps": 3976, "loss": 0.2119, "lr": 3.9328329104331915e-05, "epoch": 1.2235915492957747, "percentage": 17.48, "elapsed_time": "1:04:03", "remaining_time": "5:02:26"}
|
||||
{"current_steps": 700, "total_steps": 3976, "loss": 0.2147, "lr": 3.93055792031802e-05, "epoch": 1.232394366197183, "percentage": 17.61, "elapsed_time": "1:04:33", "remaining_time": "5:02:06"}
|
||||
{"current_steps": 705, "total_steps": 3976, "loss": 0.2107, "lr": 3.928245721783609e-05, "epoch": 1.2411971830985915, "percentage": 17.73, "elapsed_time": "1:05:02", "remaining_time": "5:01:44"}
|
||||
{"current_steps": 710, "total_steps": 3976, "loss": 0.2391, "lr": 3.925896359393891e-05, "epoch": 1.25, "percentage": 17.86, "elapsed_time": "1:05:36", "remaining_time": "5:01:48"}
|
||||
{"current_steps": 715, "total_steps": 3976, "loss": 0.2197, "lr": 3.923509878429073e-05, "epoch": 1.2588028169014085, "percentage": 17.98, "elapsed_time": "1:06:05", "remaining_time": "5:01:24"}
|
||||
{"current_steps": 720, "total_steps": 3976, "loss": 0.2068, "lr": 3.921086324884762e-05, "epoch": 1.267605633802817, "percentage": 18.11, "elapsed_time": "1:06:38", "remaining_time": "5:01:23"}
|
||||
{"current_steps": 725, "total_steps": 3976, "loss": 0.2257, "lr": 3.9186257454710797e-05, "epoch": 1.2764084507042255, "percentage": 18.23, "elapsed_time": "1:07:04", "remaining_time": "5:00:47"}
|
||||
{"current_steps": 730, "total_steps": 3976, "loss": 0.2267, "lr": 3.9161281876117635e-05, "epoch": 1.2852112676056338, "percentage": 18.36, "elapsed_time": "1:07:35", "remaining_time": "5:00:32"}
|
||||
{"current_steps": 735, "total_steps": 3976, "loss": 0.2243, "lr": 3.91359369944325e-05, "epoch": 1.2940140845070423, "percentage": 18.49, "elapsed_time": "1:07:58", "remaining_time": "4:59:42"}
|
||||
{"current_steps": 740, "total_steps": 3976, "loss": 0.2294, "lr": 3.911022329813749e-05, "epoch": 1.3028169014084507, "percentage": 18.61, "elapsed_time": "1:08:28", "remaining_time": "4:59:25"}
|
||||
{"current_steps": 745, "total_steps": 3976, "loss": 0.2155, "lr": 3.908414128282302e-05, "epoch": 1.311619718309859, "percentage": 18.74, "elapsed_time": "1:08:57", "remaining_time": "4:59:03"}
|
||||
{"current_steps": 750, "total_steps": 3976, "loss": 0.2014, "lr": 3.905769145117825e-05, "epoch": 1.3204225352112675, "percentage": 18.86, "elapsed_time": "1:09:21", "remaining_time": "4:58:18"}
|
||||
{"current_steps": 755, "total_steps": 3976, "loss": 0.214, "lr": 3.903087431298145e-05, "epoch": 1.329225352112676, "percentage": 18.99, "elapsed_time": "1:09:45", "remaining_time": "4:57:34"}
|
||||
{"current_steps": 760, "total_steps": 3976, "loss": 0.1914, "lr": 3.900369038509007e-05, "epoch": 1.3380281690140845, "percentage": 19.11, "elapsed_time": "1:10:11", "remaining_time": "4:56:59"}
|
||||
{"current_steps": 765, "total_steps": 3976, "loss": 0.2252, "lr": 3.8976140191430914e-05, "epoch": 1.346830985915493, "percentage": 19.24, "elapsed_time": "1:10:34", "remaining_time": "4:56:12"}
|
||||
{"current_steps": 770, "total_steps": 3976, "loss": 0.2328, "lr": 3.894822426298994e-05, "epoch": 1.3556338028169015, "percentage": 19.37, "elapsed_time": "1:10:59", "remaining_time": "4:55:35"}
|
||||
{"current_steps": 775, "total_steps": 3976, "loss": 0.2161, "lr": 3.891994313780205e-05, "epoch": 1.36443661971831, "percentage": 19.49, "elapsed_time": "1:11:29", "remaining_time": "4:55:15"}
|
||||
{"current_steps": 780, "total_steps": 3976, "loss": 0.2056, "lr": 3.8891297360940766e-05, "epoch": 1.3732394366197183, "percentage": 19.62, "elapsed_time": "1:11:53", "remaining_time": "4:54:34"}
|
||||
{"current_steps": 785, "total_steps": 3976, "loss": 0.2111, "lr": 3.886228748450765e-05, "epoch": 1.3820422535211268, "percentage": 19.74, "elapsed_time": "1:12:21", "remaining_time": "4:54:06"}
|
||||
{"current_steps": 790, "total_steps": 3976, "loss": 0.2496, "lr": 3.883291406762173e-05, "epoch": 1.3908450704225352, "percentage": 19.87, "elapsed_time": "1:12:58", "remaining_time": "4:54:19"}
|
||||
{"current_steps": 795, "total_steps": 3976, "loss": 0.2261, "lr": 3.880317767640867e-05, "epoch": 1.3996478873239437, "percentage": 19.99, "elapsed_time": "1:13:30", "remaining_time": "4:54:06"}
|
||||
{"current_steps": 800, "total_steps": 3976, "loss": 0.2178, "lr": 3.8773078883989906e-05, "epoch": 1.408450704225352, "percentage": 20.12, "elapsed_time": "1:14:04", "remaining_time": "4:54:06"}
|
||||
{"current_steps": 805, "total_steps": 3976, "loss": 0.1957, "lr": 3.874261827047156e-05, "epoch": 1.4172535211267605, "percentage": 20.25, "elapsed_time": "1:14:29", "remaining_time": "4:53:27"}
|
||||
{"current_steps": 810, "total_steps": 3976, "loss": 0.2149, "lr": 3.8711796422933295e-05, "epoch": 1.426056338028169, "percentage": 20.37, "elapsed_time": "1:14:57", "remaining_time": "4:52:59"}
|
||||
{"current_steps": 815, "total_steps": 3976, "loss": 0.2192, "lr": 3.868061393541698e-05, "epoch": 1.4348591549295775, "percentage": 20.5, "elapsed_time": "1:15:19", "remaining_time": "4:52:08"}
|
||||
{"current_steps": 820, "total_steps": 3976, "loss": 0.2104, "lr": 3.8649071408915235e-05, "epoch": 1.443661971830986, "percentage": 20.62, "elapsed_time": "1:15:44", "remaining_time": "4:51:30"}
|
||||
{"current_steps": 825, "total_steps": 3976, "loss": 0.1994, "lr": 3.861716945135985e-05, "epoch": 1.4524647887323945, "percentage": 20.75, "elapsed_time": "1:16:12", "remaining_time": "4:51:03"}
|
||||
{"current_steps": 830, "total_steps": 3976, "loss": 0.2165, "lr": 3.858490867761009e-05, "epoch": 1.4612676056338028, "percentage": 20.88, "elapsed_time": "1:16:42", "remaining_time": "4:50:44"}
|
||||
{"current_steps": 835, "total_steps": 3976, "loss": 0.2069, "lr": 3.8552289709440824e-05, "epoch": 1.4700704225352113, "percentage": 21.0, "elapsed_time": "1:17:11", "remaining_time": "4:50:20"}
|
||||
{"current_steps": 840, "total_steps": 3976, "loss": 0.24, "lr": 3.851931317553054e-05, "epoch": 1.4788732394366197, "percentage": 21.13, "elapsed_time": "1:17:41", "remaining_time": "4:50:02"}
|
||||
{"current_steps": 845, "total_steps": 3976, "loss": 0.2106, "lr": 3.848597971144924e-05, "epoch": 1.4876760563380282, "percentage": 21.25, "elapsed_time": "1:18:11", "remaining_time": "4:49:43"}
|
||||
{"current_steps": 850, "total_steps": 3976, "loss": 0.1988, "lr": 3.845228995964619e-05, "epoch": 1.4964788732394365, "percentage": 21.38, "elapsed_time": "1:18:41", "remaining_time": "4:49:25"}
|
||||
{"current_steps": 855, "total_steps": 3976, "loss": 0.2058, "lr": 3.8418244569437514e-05, "epoch": 1.505281690140845, "percentage": 21.5, "elapsed_time": "1:19:10", "remaining_time": "4:49:02"}
|
||||
{"current_steps": 860, "total_steps": 3976, "loss": 0.1991, "lr": 3.838384419699372e-05, "epoch": 1.5140845070422535, "percentage": 21.63, "elapsed_time": "1:19:33", "remaining_time": "4:48:15"}
|
||||
{"current_steps": 865, "total_steps": 3976, "loss": 0.2174, "lr": 3.8349089505327014e-05, "epoch": 1.522887323943662, "percentage": 21.76, "elapsed_time": "1:20:02", "remaining_time": "4:47:50"}
|
||||
{"current_steps": 870, "total_steps": 3976, "loss": 0.2123, "lr": 3.831398116427855e-05, "epoch": 1.5316901408450705, "percentage": 21.88, "elapsed_time": "1:20:31", "remaining_time": "4:47:29"}
|
||||
{"current_steps": 875, "total_steps": 3976, "loss": 0.198, "lr": 3.827851985050551e-05, "epoch": 1.540492957746479, "percentage": 22.01, "elapsed_time": "1:20:54", "remaining_time": "4:46:43"}
|
||||
{"current_steps": 880, "total_steps": 3976, "loss": 0.1891, "lr": 3.824270624746805e-05, "epoch": 1.5492957746478875, "percentage": 22.13, "elapsed_time": "1:21:16", "remaining_time": "4:45:56"}
|
||||
{"current_steps": 885, "total_steps": 3976, "loss": 0.2133, "lr": 3.8206541045416144e-05, "epoch": 1.5580985915492958, "percentage": 22.26, "elapsed_time": "1:21:44", "remaining_time": "4:45:30"}
|
||||
{"current_steps": 890, "total_steps": 3976, "loss": 0.1947, "lr": 3.8170024941376284e-05, "epoch": 1.5669014084507042, "percentage": 22.38, "elapsed_time": "1:22:07", "remaining_time": "4:44:46"}
|
||||
{"current_steps": 895, "total_steps": 3976, "loss": 0.1948, "lr": 3.813315863913802e-05, "epoch": 1.5757042253521125, "percentage": 22.51, "elapsed_time": "1:22:31", "remaining_time": "4:44:06"}
|
||||
{"current_steps": 900, "total_steps": 3976, "loss": 0.2187, "lr": 3.809594284924043e-05, "epoch": 1.584507042253521, "percentage": 22.64, "elapsed_time": "1:23:02", "remaining_time": "4:43:50"}
|
||||
{"current_steps": 905, "total_steps": 3976, "loss": 0.2154, "lr": 3.8058378288958386e-05, "epoch": 1.5933098591549295, "percentage": 22.76, "elapsed_time": "1:23:26", "remaining_time": "4:43:08"}
|
||||
{"current_steps": 910, "total_steps": 3976, "loss": 0.2411, "lr": 3.802046568228879e-05, "epoch": 1.602112676056338, "percentage": 22.89, "elapsed_time": "1:23:55", "remaining_time": "4:42:46"}
|
||||
{"current_steps": 915, "total_steps": 3976, "loss": 0.2264, "lr": 3.798220575993654e-05, "epoch": 1.6109154929577465, "percentage": 23.01, "elapsed_time": "1:24:25", "remaining_time": "4:42:27"}
|
||||
{"current_steps": 920, "total_steps": 3976, "loss": 0.2139, "lr": 3.7943599259300506e-05, "epoch": 1.619718309859155, "percentage": 23.14, "elapsed_time": "1:24:50", "remaining_time": "4:41:49"}
|
||||
{"current_steps": 925, "total_steps": 3976, "loss": 0.2302, "lr": 3.7904646924459316e-05, "epoch": 1.6285211267605635, "percentage": 23.26, "elapsed_time": "1:25:18", "remaining_time": "4:41:21"}
|
||||
{"current_steps": 930, "total_steps": 3976, "loss": 0.2071, "lr": 3.786534950615697e-05, "epoch": 1.637323943661972, "percentage": 23.39, "elapsed_time": "1:25:42", "remaining_time": "4:40:43"}
|
||||
{"current_steps": 935, "total_steps": 3976, "loss": 0.198, "lr": 3.782570776178843e-05, "epoch": 1.6461267605633803, "percentage": 23.52, "elapsed_time": "1:26:08", "remaining_time": "4:40:10"}
|
||||
{"current_steps": 940, "total_steps": 3976, "loss": 0.244, "lr": 3.778572245538497e-05, "epoch": 1.6549295774647887, "percentage": 23.64, "elapsed_time": "1:26:41", "remaining_time": "4:40:01"}
|
||||
{"current_steps": 945, "total_steps": 3976, "loss": 0.2307, "lr": 3.774539435759948e-05, "epoch": 1.663732394366197, "percentage": 23.77, "elapsed_time": "1:27:04", "remaining_time": "4:39:16"}
|
||||
{"current_steps": 950, "total_steps": 3976, "loss": 0.2222, "lr": 3.7704724245691614e-05, "epoch": 1.6725352112676055, "percentage": 23.89, "elapsed_time": "1:27:31", "remaining_time": "4:38:47"}
|
||||
{"current_steps": 955, "total_steps": 3976, "loss": 0.2073, "lr": 3.766371290351279e-05, "epoch": 1.681338028169014, "percentage": 24.02, "elapsed_time": "1:27:52", "remaining_time": "4:37:58"}
|
||||
{"current_steps": 960, "total_steps": 3976, "loss": 0.1858, "lr": 3.76223611214911e-05, "epoch": 1.6901408450704225, "percentage": 24.14, "elapsed_time": "1:28:14", "remaining_time": "4:37:13"}
|
||||
{"current_steps": 965, "total_steps": 3976, "loss": 0.1997, "lr": 3.758066969661608e-05, "epoch": 1.698943661971831, "percentage": 24.27, "elapsed_time": "1:28:36", "remaining_time": "4:36:28"}
|
||||
{"current_steps": 970, "total_steps": 3976, "loss": 0.1986, "lr": 3.7538639432423317e-05, "epoch": 1.7077464788732395, "percentage": 24.4, "elapsed_time": "1:28:57", "remaining_time": "4:35:41"}
|
||||
{"current_steps": 975, "total_steps": 3976, "loss": 0.2229, "lr": 3.749627113897901e-05, "epoch": 1.716549295774648, "percentage": 24.52, "elapsed_time": "1:29:23", "remaining_time": "4:35:07"}
|
||||
{"current_steps": 980, "total_steps": 3976, "loss": 0.1938, "lr": 3.74535656328643e-05, "epoch": 1.7253521126760565, "percentage": 24.65, "elapsed_time": "1:29:48", "remaining_time": "4:34:33"}
|
||||
{"current_steps": 985, "total_steps": 3976, "loss": 0.2181, "lr": 3.7410523737159594e-05, "epoch": 1.7341549295774648, "percentage": 24.77, "elapsed_time": "1:30:10", "remaining_time": "4:33:48"}
|
||||
{"current_steps": 990, "total_steps": 3976, "loss": 0.241, "lr": 3.7367146281428664e-05, "epoch": 1.7429577464788732, "percentage": 24.9, "elapsed_time": "1:30:38", "remaining_time": "4:33:23"}
|
||||
{"current_steps": 995, "total_steps": 3976, "loss": 0.1959, "lr": 3.7323434101702645e-05, "epoch": 1.7517605633802817, "percentage": 25.03, "elapsed_time": "1:31:00", "remaining_time": "4:32:38"}
|
||||
{"current_steps": 1000, "total_steps": 3976, "loss": 0.2139, "lr": 3.7279388040463965e-05, "epoch": 1.76056338028169, "percentage": 25.15, "elapsed_time": "1:31:28", "remaining_time": "4:32:14"}
|
||||
{"current_steps": 1005, "total_steps": 3976, "loss": 0.229, "lr": 3.723500894663008e-05, "epoch": 1.7693661971830985, "percentage": 25.28, "elapsed_time": "1:31:54", "remaining_time": "4:31:43"}
|
||||
{"current_steps": 1010, "total_steps": 3976, "loss": 0.2182, "lr": 3.719029767553711e-05, "epoch": 1.778169014084507, "percentage": 25.4, "elapsed_time": "1:32:24", "remaining_time": "4:31:22"}
|
||||
{"current_steps": 1015, "total_steps": 3976, "loss": 0.1941, "lr": 3.7145255088923364e-05, "epoch": 1.7869718309859155, "percentage": 25.53, "elapsed_time": "1:32:53", "remaining_time": "4:30:58"}
|
||||
{"current_steps": 1020, "total_steps": 3976, "loss": 0.1972, "lr": 3.709988205491273e-05, "epoch": 1.795774647887324, "percentage": 25.65, "elapsed_time": "1:33:17", "remaining_time": "4:30:20"}
|
||||
{"current_steps": 1025, "total_steps": 3976, "loss": 0.2169, "lr": 3.7054179447997946e-05, "epoch": 1.8045774647887325, "percentage": 25.78, "elapsed_time": "1:33:46", "remaining_time": "4:29:58"}
|
||||
{"current_steps": 1030, "total_steps": 3976, "loss": 0.1986, "lr": 3.700814814902373e-05, "epoch": 1.813380281690141, "percentage": 25.91, "elapsed_time": "1:34:09", "remaining_time": "4:29:17"}
|
||||
{"current_steps": 1035, "total_steps": 3976, "loss": 0.2275, "lr": 3.696178904516982e-05, "epoch": 1.8221830985915493, "percentage": 26.03, "elapsed_time": "1:34:39", "remaining_time": "4:28:58"}
|
||||
{"current_steps": 1040, "total_steps": 3976, "loss": 0.2223, "lr": 3.691510302993388e-05, "epoch": 1.8309859154929577, "percentage": 26.16, "elapsed_time": "1:35:11", "remaining_time": "4:28:45"}
|
||||
{"current_steps": 1045, "total_steps": 3976, "loss": 0.1998, "lr": 3.6868091003114244e-05, "epoch": 1.8397887323943662, "percentage": 26.28, "elapsed_time": "1:35:35", "remaining_time": "4:28:08"}
|
||||
{"current_steps": 1050, "total_steps": 3976, "loss": 0.208, "lr": 3.682075387079262e-05, "epoch": 1.8485915492957745, "percentage": 26.41, "elapsed_time": "1:36:05", "remaining_time": "4:27:46"}
|
||||
{"current_steps": 1055, "total_steps": 3976, "loss": 0.2173, "lr": 3.677309254531659e-05, "epoch": 1.857394366197183, "percentage": 26.53, "elapsed_time": "1:36:38", "remaining_time": "4:27:35"}
|
||||
{"current_steps": 1060, "total_steps": 3976, "loss": 0.2035, "lr": 3.672510794528206e-05, "epoch": 1.8661971830985915, "percentage": 26.66, "elapsed_time": "1:37:04", "remaining_time": "4:27:03"}
|
||||
{"current_steps": 1065, "total_steps": 3976, "loss": 0.2294, "lr": 3.667680099551551e-05, "epoch": 1.875, "percentage": 26.79, "elapsed_time": "1:37:35", "remaining_time": "4:26:44"}
|
||||
{"current_steps": 1070, "total_steps": 3976, "loss": 0.2199, "lr": 3.6628172627056234e-05, "epoch": 1.8838028169014085, "percentage": 26.91, "elapsed_time": "1:38:03", "remaining_time": "4:26:18"}
|
||||
{"current_steps": 1075, "total_steps": 3976, "loss": 0.2225, "lr": 3.6579223777138316e-05, "epoch": 1.892605633802817, "percentage": 27.04, "elapsed_time": "1:38:35", "remaining_time": "4:26:04"}
|
||||
{"current_steps": 1080, "total_steps": 3976, "loss": 0.2079, "lr": 3.652995538917263e-05, "epoch": 1.9014084507042255, "percentage": 27.16, "elapsed_time": "1:38:59", "remaining_time": "4:25:26"}
|
||||
{"current_steps": 1085, "total_steps": 3976, "loss": 0.2208, "lr": 3.648036841272864e-05, "epoch": 1.9102112676056338, "percentage": 27.29, "elapsed_time": "1:39:32", "remaining_time": "4:25:12"}
|
||||
{"current_steps": 1090, "total_steps": 3976, "loss": 0.1889, "lr": 3.643046380351607e-05, "epoch": 1.9190140845070423, "percentage": 27.41, "elapsed_time": "1:39:57", "remaining_time": "4:24:39"}
|
||||
{"current_steps": 1095, "total_steps": 3976, "loss": 0.2072, "lr": 3.6380242523366536e-05, "epoch": 1.9278169014084507, "percentage": 27.54, "elapsed_time": "1:40:19", "remaining_time": "4:23:58"}
|
||||
{"current_steps": 1100, "total_steps": 3976, "loss": 0.2075, "lr": 3.6329705540214973e-05, "epoch": 1.936619718309859, "percentage": 27.67, "elapsed_time": "1:40:48", "remaining_time": "4:23:34"}
|
||||
{"current_steps": 1105, "total_steps": 3976, "loss": 0.1969, "lr": 3.627885382808098e-05, "epoch": 1.9454225352112675, "percentage": 27.79, "elapsed_time": "1:41:15", "remaining_time": "4:23:04"}
|
||||
{"current_steps": 1110, "total_steps": 3976, "loss": 0.1889, "lr": 3.622768836705005e-05, "epoch": 1.954225352112676, "percentage": 27.92, "elapsed_time": "1:41:43", "remaining_time": "4:22:38"}
|
||||
{"current_steps": 1115, "total_steps": 3976, "loss": 0.2106, "lr": 3.61762101432547e-05, "epoch": 1.9630281690140845, "percentage": 28.04, "elapsed_time": "1:42:13", "remaining_time": "4:22:17"}
|
||||
{"current_steps": 1120, "total_steps": 3976, "loss": 0.2117, "lr": 3.6124420148855426e-05, "epoch": 1.971830985915493, "percentage": 28.17, "elapsed_time": "1:42:43", "remaining_time": "4:21:56"}
|
||||
{"current_steps": 1125, "total_steps": 3976, "loss": 0.2217, "lr": 3.607231938202163e-05, "epoch": 1.9806338028169015, "percentage": 28.29, "elapsed_time": "1:43:09", "remaining_time": "4:21:25"}
|
||||
{"current_steps": 1130, "total_steps": 3976, "loss": 0.1905, "lr": 3.601990884691235e-05, "epoch": 1.98943661971831, "percentage": 28.42, "elapsed_time": "1:43:34", "remaining_time": "4:20:52"}
|
||||
{"current_steps": 1135, "total_steps": 3976, "loss": 0.213, "lr": 3.59671895536569e-05, "epoch": 1.9982394366197183, "percentage": 28.55, "elapsed_time": "1:44:03", "remaining_time": "4:20:29"}
|
||||
{"current_steps": 1140, "total_steps": 3976, "loss": 0.1872, "lr": 3.591416251833543e-05, "epoch": 2.007042253521127, "percentage": 28.67, "elapsed_time": "1:44:28", "remaining_time": "4:19:54"}
|
||||
{"current_steps": 1145, "total_steps": 3976, "loss": 0.1905, "lr": 3.586082876295931e-05, "epoch": 2.015845070422535, "percentage": 28.8, "elapsed_time": "1:44:52", "remaining_time": "4:19:17"}
|
||||
{"current_steps": 1150, "total_steps": 3976, "loss": 0.1883, "lr": 3.5807189315451456e-05, "epoch": 2.0246478873239435, "percentage": 28.92, "elapsed_time": "1:45:17", "remaining_time": "4:18:44"}
|
||||
{"current_steps": 1155, "total_steps": 3976, "loss": 0.182, "lr": 3.575324520962652e-05, "epoch": 2.033450704225352, "percentage": 29.05, "elapsed_time": "1:45:43", "remaining_time": "4:18:12"}
|
||||
{"current_steps": 1160, "total_steps": 3976, "loss": 0.1829, "lr": 3.569899748517094e-05, "epoch": 2.0422535211267605, "percentage": 29.18, "elapsed_time": "1:46:07", "remaining_time": "4:17:36"}
|
||||
{"current_steps": 1165, "total_steps": 3976, "loss": 0.1949, "lr": 3.5644447187622937e-05, "epoch": 2.051056338028169, "percentage": 29.3, "elapsed_time": "1:46:37", "remaining_time": "4:17:15"}
|
||||
{"current_steps": 1170, "total_steps": 3976, "loss": 0.194, "lr": 3.558959536835233e-05, "epoch": 2.0598591549295775, "percentage": 29.43, "elapsed_time": "1:47:03", "remaining_time": "4:16:46"}
|
||||
{"current_steps": 1175, "total_steps": 3976, "loss": 0.1791, "lr": 3.553444308454029e-05, "epoch": 2.068661971830986, "percentage": 29.55, "elapsed_time": "1:47:32", "remaining_time": "4:16:21"}
|
||||
{"current_steps": 1180, "total_steps": 3976, "loss": 0.1854, "lr": 3.5478991399158976e-05, "epoch": 2.0774647887323945, "percentage": 29.68, "elapsed_time": "1:47:51", "remaining_time": "4:15:33"}
|
||||
{"current_steps": 1185, "total_steps": 3976, "loss": 0.2002, "lr": 3.542324138095101e-05, "epoch": 2.086267605633803, "percentage": 29.8, "elapsed_time": "1:48:23", "remaining_time": "4:15:17"}
|
||||
{"current_steps": 1190, "total_steps": 3976, "loss": 0.1909, "lr": 3.536719410440891e-05, "epoch": 2.0950704225352115, "percentage": 29.93, "elapsed_time": "1:48:48", "remaining_time": "4:14:45"}
|
||||
{"current_steps": 1195, "total_steps": 3976, "loss": 0.1932, "lr": 3.53108506497544e-05, "epoch": 2.1038732394366195, "percentage": 30.06, "elapsed_time": "1:49:20", "remaining_time": "4:14:27"}
|
||||
{"current_steps": 1200, "total_steps": 3976, "loss": 0.2028, "lr": 3.525421210291752e-05, "epoch": 2.112676056338028, "percentage": 30.18, "elapsed_time": "1:49:52", "remaining_time": "4:14:10"}
|
||||
{"current_steps": 1205, "total_steps": 3976, "loss": 0.1771, "lr": 3.5197279555515776e-05, "epoch": 2.1214788732394365, "percentage": 30.31, "elapsed_time": "1:50:15", "remaining_time": "4:13:32"}
|
||||
{"current_steps": 1210, "total_steps": 3976, "loss": 0.2069, "lr": 3.514005410483304e-05, "epoch": 2.130281690140845, "percentage": 30.43, "elapsed_time": "1:50:42", "remaining_time": "4:13:05"}
|
||||
{"current_steps": 1215, "total_steps": 3976, "loss": 0.1848, "lr": 3.5082536853798443e-05, "epoch": 2.1390845070422535, "percentage": 30.56, "elapsed_time": "1:51:08", "remaining_time": "4:12:34"}
|
||||
{"current_steps": 1220, "total_steps": 3976, "loss": 0.1854, "lr": 3.5024728910965104e-05, "epoch": 2.147887323943662, "percentage": 30.68, "elapsed_time": "1:51:37", "remaining_time": "4:12:09"}
|
||||
{"current_steps": 1225, "total_steps": 3976, "loss": 0.1871, "lr": 3.496663139048876e-05, "epoch": 2.1566901408450705, "percentage": 30.81, "elapsed_time": "1:52:07", "remaining_time": "4:11:48"}
|
||||
{"current_steps": 1230, "total_steps": 3976, "loss": 0.1866, "lr": 3.490824541210626e-05, "epoch": 2.165492957746479, "percentage": 30.94, "elapsed_time": "1:52:38", "remaining_time": "4:11:28"}
|
||||
{"current_steps": 1235, "total_steps": 3976, "loss": 0.1949, "lr": 3.484957210111407e-05, "epoch": 2.1742957746478875, "percentage": 31.06, "elapsed_time": "1:53:07", "remaining_time": "4:11:04"}
|
||||
{"current_steps": 1240, "total_steps": 3976, "loss": 0.1699, "lr": 3.479061258834651e-05, "epoch": 2.183098591549296, "percentage": 31.19, "elapsed_time": "1:53:33", "remaining_time": "4:10:32"}
|
||||
{"current_steps": 1245, "total_steps": 3976, "loss": 0.1846, "lr": 3.473136801015397e-05, "epoch": 2.191901408450704, "percentage": 31.31, "elapsed_time": "1:53:57", "remaining_time": "4:09:59"}
|
||||
{"current_steps": 1250, "total_steps": 3976, "loss": 0.1979, "lr": 3.4671839508381046e-05, "epoch": 2.2007042253521125, "percentage": 31.44, "elapsed_time": "1:54:24", "remaining_time": "4:09:29"}
|
||||
{"current_steps": 1255, "total_steps": 3976, "loss": 0.1906, "lr": 3.461202823034449e-05, "epoch": 2.209507042253521, "percentage": 31.56, "elapsed_time": "1:54:49", "remaining_time": "4:08:56"}
|
||||
{"current_steps": 1260, "total_steps": 3976, "loss": 0.1959, "lr": 3.4551935328811115e-05, "epoch": 2.2183098591549295, "percentage": 31.69, "elapsed_time": "1:55:11", "remaining_time": "4:08:18"}
|
||||
{"current_steps": 1265, "total_steps": 3976, "loss": 0.2014, "lr": 3.449156196197558e-05, "epoch": 2.227112676056338, "percentage": 31.82, "elapsed_time": "1:55:41", "remaining_time": "4:07:57"}
|
||||
{"current_steps": 1270, "total_steps": 3976, "loss": 0.2077, "lr": 3.443090929343807e-05, "epoch": 2.2359154929577465, "percentage": 31.94, "elapsed_time": "1:56:14", "remaining_time": "4:07:39"}
|
||||
{"current_steps": 1275, "total_steps": 3976, "loss": 0.1883, "lr": 3.436997849218186e-05, "epoch": 2.244718309859155, "percentage": 32.07, "elapsed_time": "1:56:38", "remaining_time": "4:07:05"}
|
||||
{"current_steps": 1280, "total_steps": 3976, "loss": 0.2023, "lr": 3.430877073255078e-05, "epoch": 2.2535211267605635, "percentage": 32.19, "elapsed_time": "1:57:09", "remaining_time": "4:06:45"}
|
||||
{"current_steps": 1285, "total_steps": 3976, "loss": 0.1872, "lr": 3.424728719422662e-05, "epoch": 2.262323943661972, "percentage": 32.32, "elapsed_time": "1:57:38", "remaining_time": "4:06:22"}
|
||||
{"current_steps": 1290, "total_steps": 3976, "loss": 0.1981, "lr": 3.4185529062206316e-05, "epoch": 2.2711267605633805, "percentage": 32.44, "elapsed_time": "1:58:07", "remaining_time": "4:05:58"}
|
||||
{"current_steps": 1295, "total_steps": 3976, "loss": 0.1861, "lr": 3.4123497526779186e-05, "epoch": 2.279929577464789, "percentage": 32.57, "elapsed_time": "1:58:31", "remaining_time": "4:05:23"}
|
||||
{"current_steps": 1300, "total_steps": 3976, "loss": 0.1737, "lr": 3.406119378350398e-05, "epoch": 2.288732394366197, "percentage": 32.7, "elapsed_time": "1:58:54", "remaining_time": "4:04:46"}
|
||||
{"current_steps": 1305, "total_steps": 3976, "loss": 0.1766, "lr": 3.399861903318578e-05, "epoch": 2.2975352112676055, "percentage": 32.82, "elapsed_time": "1:59:24", "remaining_time": "4:04:23"}
|
||||
{"current_steps": 1310, "total_steps": 3976, "loss": 0.1827, "lr": 3.393577448185293e-05, "epoch": 2.306338028169014, "percentage": 32.95, "elapsed_time": "1:59:48", "remaining_time": "4:03:48"}
|
||||
{"current_steps": 1315, "total_steps": 3976, "loss": 0.1909, "lr": 3.387266134073373e-05, "epoch": 2.3151408450704225, "percentage": 33.07, "elapsed_time": "2:00:11", "remaining_time": "4:03:12"}
|
||||
{"current_steps": 1320, "total_steps": 3976, "loss": 0.1992, "lr": 3.380928082623315e-05, "epoch": 2.323943661971831, "percentage": 33.2, "elapsed_time": "2:00:43", "remaining_time": "4:02:55"}
|
||||
{"current_steps": 1325, "total_steps": 3976, "loss": 0.1917, "lr": 3.374563415990932e-05, "epoch": 2.3327464788732395, "percentage": 33.32, "elapsed_time": "2:01:07", "remaining_time": "4:02:19"}
|
||||
{"current_steps": 1330, "total_steps": 3976, "loss": 0.1818, "lr": 3.3681722568450045e-05, "epoch": 2.341549295774648, "percentage": 33.45, "elapsed_time": "2:01:33", "remaining_time": "4:01:50"}
|
||||
{"current_steps": 1335, "total_steps": 3976, "loss": 0.1839, "lr": 3.3617547283649125e-05, "epoch": 2.3503521126760565, "percentage": 33.58, "elapsed_time": "2:02:01", "remaining_time": "4:01:23"}
|
||||
{"current_steps": 1340, "total_steps": 3976, "loss": 0.1992, "lr": 3.355310954238265e-05, "epoch": 2.359154929577465, "percentage": 33.7, "elapsed_time": "2:02:28", "remaining_time": "4:00:56"}
|
||||
{"current_steps": 1345, "total_steps": 3976, "loss": 0.208, "lr": 3.34884105865851e-05, "epoch": 2.367957746478873, "percentage": 33.83, "elapsed_time": "2:02:58", "remaining_time": "4:00:33"}
|
||||
{"current_steps": 1350, "total_steps": 3976, "loss": 0.1925, "lr": 3.3423451663225485e-05, "epoch": 2.3767605633802815, "percentage": 33.95, "elapsed_time": "2:03:24", "remaining_time": "4:00:03"}
|
||||
{"current_steps": 1355, "total_steps": 3976, "loss": 0.1806, "lr": 3.335823402428326e-05, "epoch": 2.38556338028169, "percentage": 34.08, "elapsed_time": "2:03:49", "remaining_time": "3:59:30"}
|
||||
{"current_steps": 1360, "total_steps": 3976, "loss": 0.2071, "lr": 3.3292758926724205e-05, "epoch": 2.3943661971830985, "percentage": 34.21, "elapsed_time": "2:04:21", "remaining_time": "3:59:13"}
|
||||
{"current_steps": 1365, "total_steps": 3976, "loss": 0.1886, "lr": 3.322702763247622e-05, "epoch": 2.403169014084507, "percentage": 34.33, "elapsed_time": "2:04:49", "remaining_time": "3:58:45"}
|
||||
{"current_steps": 1370, "total_steps": 3976, "loss": 0.1904, "lr": 3.316104140840497e-05, "epoch": 2.4119718309859155, "percentage": 34.46, "elapsed_time": "2:05:13", "remaining_time": "3:58:12"}
|
||||
{"current_steps": 1375, "total_steps": 3976, "loss": 0.2043, "lr": 3.309480152628948e-05, "epoch": 2.420774647887324, "percentage": 34.58, "elapsed_time": "2:05:40", "remaining_time": "3:57:44"}
|
||||
{"current_steps": 1380, "total_steps": 3976, "loss": 0.1973, "lr": 3.3028309262797645e-05, "epoch": 2.4295774647887325, "percentage": 34.71, "elapsed_time": "2:06:04", "remaining_time": "3:57:09"}
|
||||
{"current_steps": 1385, "total_steps": 3976, "loss": 0.2005, "lr": 3.296156589946161e-05, "epoch": 2.438380281690141, "percentage": 34.83, "elapsed_time": "2:06:33", "remaining_time": "3:56:45"}
|
||||
{"current_steps": 1390, "total_steps": 3976, "loss": 0.1772, "lr": 3.289457272265304e-05, "epoch": 2.4471830985915495, "percentage": 34.96, "elapsed_time": "2:06:58", "remaining_time": "3:56:14"}
|
||||
{"current_steps": 1395, "total_steps": 3976, "loss": 0.1741, "lr": 3.282733102355839e-05, "epoch": 2.455985915492958, "percentage": 35.09, "elapsed_time": "2:07:24", "remaining_time": "3:55:43"}
|
||||
{"current_steps": 1400, "total_steps": 3976, "loss": 0.1687, "lr": 3.2759842098153974e-05, "epoch": 2.464788732394366, "percentage": 35.21, "elapsed_time": "2:07:51", "remaining_time": "3:55:16"}
|
||||
{"current_steps": 1405, "total_steps": 3976, "loss": 0.1751, "lr": 3.269210724718098e-05, "epoch": 2.4735915492957745, "percentage": 35.34, "elapsed_time": "2:08:13", "remaining_time": "3:54:37"}
|
||||
{"current_steps": 1410, "total_steps": 3976, "loss": 0.1976, "lr": 3.262412777612045e-05, "epoch": 2.482394366197183, "percentage": 35.46, "elapsed_time": "2:08:41", "remaining_time": "3:54:12"}
|
||||
{"current_steps": 1415, "total_steps": 3976, "loss": 0.1672, "lr": 3.2555904995168055e-05, "epoch": 2.4911971830985915, "percentage": 35.59, "elapsed_time": "2:09:07", "remaining_time": "3:53:41"}
|
||||
{"current_steps": 1420, "total_steps": 3976, "loss": 0.185, "lr": 3.2487440219208894e-05, "epoch": 2.5, "percentage": 35.71, "elapsed_time": "2:09:31", "remaining_time": "3:53:09"}
|
||||
{"current_steps": 1425, "total_steps": 3976, "loss": 0.191, "lr": 3.241873476779215e-05, "epoch": 2.5088028169014085, "percentage": 35.84, "elapsed_time": "2:09:57", "remaining_time": "3:52:38"}
|
||||
{"current_steps": 1430, "total_steps": 3976, "loss": 0.2029, "lr": 3.2349789965105576e-05, "epoch": 2.517605633802817, "percentage": 35.97, "elapsed_time": "2:10:32", "remaining_time": "3:52:24"}
|
||||
{"current_steps": 1435, "total_steps": 3976, "loss": 0.1942, "lr": 3.228060713995013e-05, "epoch": 2.5264084507042255, "percentage": 36.09, "elapsed_time": "2:10:58", "remaining_time": "3:51:56"}
|
||||
{"current_steps": 1440, "total_steps": 3976, "loss": 0.192, "lr": 3.2211187625714194e-05, "epoch": 2.535211267605634, "percentage": 36.22, "elapsed_time": "2:11:23", "remaining_time": "3:51:24"}
|
||||
{"current_steps": 1445, "total_steps": 3976, "loss": 0.1795, "lr": 3.214153276034799e-05, "epoch": 2.544014084507042, "percentage": 36.34, "elapsed_time": "2:11:54", "remaining_time": "3:51:02"}
|
||||
{"current_steps": 1450, "total_steps": 3976, "loss": 0.1766, "lr": 3.207164388633777e-05, "epoch": 2.552816901408451, "percentage": 36.47, "elapsed_time": "2:12:20", "remaining_time": "3:50:33"}
|
||||
{"current_steps": 1455, "total_steps": 3976, "loss": 0.1791, "lr": 3.200152235067989e-05, "epoch": 2.561619718309859, "percentage": 36.59, "elapsed_time": "2:12:42", "remaining_time": "3:49:56"}
|
||||
{"current_steps": 1460, "total_steps": 3976, "loss": 0.1885, "lr": 3.1931169504854925e-05, "epoch": 2.5704225352112675, "percentage": 36.72, "elapsed_time": "2:13:10", "remaining_time": "3:49:29"}
|
||||
{"current_steps": 1465, "total_steps": 3976, "loss": 0.1932, "lr": 3.186058670480155e-05, "epoch": 2.579225352112676, "percentage": 36.85, "elapsed_time": "2:13:35", "remaining_time": "3:48:58"}
|
||||
{"current_steps": 1470, "total_steps": 3976, "loss": 0.2068, "lr": 3.178977531089048e-05, "epoch": 2.5880281690140845, "percentage": 36.97, "elapsed_time": "2:14:03", "remaining_time": "3:48:31"}
|
||||
{"current_steps": 1475, "total_steps": 3976, "loss": 0.1763, "lr": 3.171873668789817e-05, "epoch": 2.596830985915493, "percentage": 37.1, "elapsed_time": "2:14:28", "remaining_time": "3:48:01"}
|
||||
{"current_steps": 1480, "total_steps": 3976, "loss": 0.1763, "lr": 3.164747220498058e-05, "epoch": 2.6056338028169015, "percentage": 37.22, "elapsed_time": "2:14:50", "remaining_time": "3:47:24"}
|
||||
{"current_steps": 1485, "total_steps": 3976, "loss": 0.1787, "lr": 3.157598323564674e-05, "epoch": 2.61443661971831, "percentage": 37.35, "elapsed_time": "2:15:19", "remaining_time": "3:46:59"}
|
||||
{"current_steps": 1490, "total_steps": 3976, "loss": 0.1921, "lr": 3.1504271157732324e-05, "epoch": 2.623239436619718, "percentage": 37.47, "elapsed_time": "2:15:44", "remaining_time": "3:46:28"}
|
||||
{"current_steps": 1495, "total_steps": 3976, "loss": 0.172, "lr": 3.143233735337305e-05, "epoch": 2.632042253521127, "percentage": 37.6, "elapsed_time": "2:16:08", "remaining_time": "3:45:55"}
|
||||
{"current_steps": 1500, "total_steps": 3976, "loss": 0.1924, "lr": 3.136018320897804e-05, "epoch": 2.640845070422535, "percentage": 37.73, "elapsed_time": "2:16:34", "remaining_time": "3:45:26"}
|
||||
{"current_steps": 1505, "total_steps": 3976, "loss": 0.1827, "lr": 3.1287810115203165e-05, "epoch": 2.6496478873239435, "percentage": 37.85, "elapsed_time": "2:17:07", "remaining_time": "3:45:08"}
|
||||
{"current_steps": 1510, "total_steps": 3976, "loss": 0.1835, "lr": 3.121521946692415e-05, "epoch": 2.658450704225352, "percentage": 37.98, "elapsed_time": "2:17:36", "remaining_time": "3:44:43"}
|
||||
{"current_steps": 1515, "total_steps": 3976, "loss": 0.1744, "lr": 3.114241266320977e-05, "epoch": 2.6672535211267605, "percentage": 38.1, "elapsed_time": "2:18:03", "remaining_time": "3:44:16"}
|
||||
{"current_steps": 1520, "total_steps": 3976, "loss": 0.1859, "lr": 3.106939110729481e-05, "epoch": 2.676056338028169, "percentage": 38.23, "elapsed_time": "2:18:30", "remaining_time": "3:43:48"}
|
||||
{"current_steps": 1525, "total_steps": 3976, "loss": 0.1822, "lr": 3.099615620655311e-05, "epoch": 2.6848591549295775, "percentage": 38.36, "elapsed_time": "2:19:01", "remaining_time": "3:43:26"}
|
||||
{"current_steps": 1530, "total_steps": 3976, "loss": 0.1911, "lr": 3.092270937247035e-05, "epoch": 2.693661971830986, "percentage": 38.48, "elapsed_time": "2:19:31", "remaining_time": "3:43:03"}
|
||||
{"current_steps": 1535, "total_steps": 3976, "loss": 0.1799, "lr": 3.0849052020616915e-05, "epoch": 2.7024647887323945, "percentage": 38.61, "elapsed_time": "2:19:52", "remaining_time": "3:42:25"}
|
||||
{"current_steps": 1540, "total_steps": 3976, "loss": 0.202, "lr": 3.077518557062056e-05, "epoch": 2.711267605633803, "percentage": 38.73, "elapsed_time": "2:20:17", "remaining_time": "3:41:55"}
|
||||
{"current_steps": 1545, "total_steps": 3976, "loss": 0.1959, "lr": 3.070111144613909e-05, "epoch": 2.720070422535211, "percentage": 38.86, "elapsed_time": "2:20:45", "remaining_time": "3:41:27"}
|
||||
{"current_steps": 1550, "total_steps": 3976, "loss": 0.1971, "lr": 3.0626831074832895e-05, "epoch": 2.72887323943662, "percentage": 38.98, "elapsed_time": "2:21:13", "remaining_time": "3:41:02"}
|
||||
{"current_steps": 1555, "total_steps": 3976, "loss": 0.17, "lr": 3.055234588833745e-05, "epoch": 2.737676056338028, "percentage": 39.11, "elapsed_time": "2:21:42", "remaining_time": "3:40:37"}
|
||||
{"current_steps": 1560, "total_steps": 3976, "loss": 0.1855, "lr": 3.047765732223571e-05, "epoch": 2.7464788732394365, "percentage": 39.24, "elapsed_time": "2:22:07", "remaining_time": "3:40:05"}
|
||||
{"current_steps": 1565, "total_steps": 3976, "loss": 0.1856, "lr": 3.040276681603043e-05, "epoch": 2.755281690140845, "percentage": 39.36, "elapsed_time": "2:22:33", "remaining_time": "3:39:37"}
|
||||
{"current_steps": 1570, "total_steps": 3976, "loss": 0.173, "lr": 3.0327675813116487e-05, "epoch": 2.7640845070422535, "percentage": 39.49, "elapsed_time": "2:22:57", "remaining_time": "3:39:05"}
|
||||
{"current_steps": 1575, "total_steps": 3976, "loss": 0.1727, "lr": 3.025238576075296e-05, "epoch": 2.772887323943662, "percentage": 39.61, "elapsed_time": "2:23:19", "remaining_time": "3:38:30"}
|
||||
{"current_steps": 1580, "total_steps": 3976, "loss": 0.1799, "lr": 3.017689811003532e-05, "epoch": 2.7816901408450705, "percentage": 39.74, "elapsed_time": "2:23:46", "remaining_time": "3:38:01"}
|
||||
{"current_steps": 1585, "total_steps": 3976, "loss": 0.208, "lr": 3.0101214315867406e-05, "epoch": 2.790492957746479, "percentage": 39.86, "elapsed_time": "2:24:16", "remaining_time": "3:37:38"}
|
||||
{"current_steps": 1590, "total_steps": 3976, "loss": 0.1967, "lr": 3.0025335836933432e-05, "epoch": 2.7992957746478875, "percentage": 39.99, "elapsed_time": "2:24:47", "remaining_time": "3:37:17"}
|
||||
{"current_steps": 1595, "total_steps": 3976, "loss": 0.2002, "lr": 2.9949264135669836e-05, "epoch": 2.808098591549296, "percentage": 40.12, "elapsed_time": "2:25:19", "remaining_time": "3:36:56"}
|
||||
{"current_steps": 1600, "total_steps": 3976, "loss": 0.1744, "lr": 2.9873000678237113e-05, "epoch": 2.816901408450704, "percentage": 40.24, "elapsed_time": "2:25:43", "remaining_time": "3:36:24"}
|
||||
{"current_steps": 1605, "total_steps": 3976, "loss": 0.172, "lr": 2.979654693449155e-05, "epoch": 2.8257042253521125, "percentage": 40.37, "elapsed_time": "2:26:11", "remaining_time": "3:35:57"}
|
||||
{"current_steps": 1610, "total_steps": 3976, "loss": 0.1821, "lr": 2.9719904377956896e-05, "epoch": 2.834507042253521, "percentage": 40.49, "elapsed_time": "2:26:39", "remaining_time": "3:35:31"}
|
||||
{"current_steps": 1615, "total_steps": 3976, "loss": 0.1791, "lr": 2.964307448579597e-05, "epoch": 2.8433098591549295, "percentage": 40.62, "elapsed_time": "2:27:01", "remaining_time": "3:34:56"}
|
||||
{"current_steps": 1620, "total_steps": 3976, "loss": 0.1696, "lr": 2.956605873878218e-05, "epoch": 2.852112676056338, "percentage": 40.74, "elapsed_time": "2:27:27", "remaining_time": "3:34:27"}
|
||||
{"current_steps": 1625, "total_steps": 3976, "loss": 0.2095, "lr": 2.9488858621271003e-05, "epoch": 2.8609154929577465, "percentage": 40.87, "elapsed_time": "2:27:54", "remaining_time": "3:33:59"}
|
||||
{"current_steps": 1630, "total_steps": 3976, "loss": 0.1771, "lr": 2.9411475621171334e-05, "epoch": 2.869718309859155, "percentage": 41.0, "elapsed_time": "2:28:19", "remaining_time": "3:33:28"}
|
||||
{"current_steps": 1635, "total_steps": 3976, "loss": 0.1875, "lr": 2.933391122991688e-05, "epoch": 2.8785211267605635, "percentage": 41.12, "elapsed_time": "2:28:43", "remaining_time": "3:32:56"}
|
||||
{"current_steps": 1640, "total_steps": 3976, "loss": 0.1732, "lr": 2.9256166942437327e-05, "epoch": 2.887323943661972, "percentage": 41.25, "elapsed_time": "2:29:09", "remaining_time": "3:32:28"}
|
||||
{"current_steps": 1645, "total_steps": 3976, "loss": 0.1734, "lr": 2.9178244257129612e-05, "epoch": 2.89612676056338, "percentage": 41.37, "elapsed_time": "2:29:36", "remaining_time": "3:31:59"}
|
||||
{"current_steps": 1650, "total_steps": 3976, "loss": 0.1953, "lr": 2.9100144675828974e-05, "epoch": 2.904929577464789, "percentage": 41.5, "elapsed_time": "2:30:02", "remaining_time": "3:31:30"}
|
||||
{"current_steps": 1655, "total_steps": 3976, "loss": 0.174, "lr": 2.9021869703780065e-05, "epoch": 2.913732394366197, "percentage": 41.62, "elapsed_time": "2:30:28", "remaining_time": "3:31:01"}
|
||||
{"current_steps": 1660, "total_steps": 3976, "loss": 0.1973, "lr": 2.8943420849607896e-05, "epoch": 2.9225352112676055, "percentage": 41.75, "elapsed_time": "2:31:00", "remaining_time": "3:30:40"}
|
||||
{"current_steps": 1665, "total_steps": 3976, "loss": 0.1951, "lr": 2.8864799625288787e-05, "epoch": 2.931338028169014, "percentage": 41.88, "elapsed_time": "2:31:29", "remaining_time": "3:30:15"}
|
||||
{"current_steps": 1670, "total_steps": 3976, "loss": 0.174, "lr": 2.878600754612121e-05, "epoch": 2.9401408450704225, "percentage": 42.0, "elapsed_time": "2:31:49", "remaining_time": "3:29:38"}
|
||||
{"current_steps": 1675, "total_steps": 3976, "loss": 0.1794, "lr": 2.87070461306966e-05, "epoch": 2.948943661971831, "percentage": 42.13, "elapsed_time": "2:32:10", "remaining_time": "3:29:02"}
|
||||
{"current_steps": 1680, "total_steps": 3976, "loss": 0.2092, "lr": 2.8627916900870078e-05, "epoch": 2.9577464788732395, "percentage": 42.25, "elapsed_time": "2:32:43", "remaining_time": "3:28:43"}
|
||||
{"current_steps": 1685, "total_steps": 3976, "loss": 0.207, "lr": 2.8548621381731102e-05, "epoch": 2.966549295774648, "percentage": 42.38, "elapsed_time": "2:33:15", "remaining_time": "3:28:22"}
|
||||
{"current_steps": 1690, "total_steps": 3976, "loss": 0.1999, "lr": 2.846916110157412e-05, "epoch": 2.9753521126760565, "percentage": 42.51, "elapsed_time": "2:33:44", "remaining_time": "3:27:57"}
|
||||
{"current_steps": 1695, "total_steps": 3976, "loss": 0.1939, "lr": 2.8389537591869057e-05, "epoch": 2.984154929577465, "percentage": 42.63, "elapsed_time": "2:34:15", "remaining_time": "3:27:34"}
|
||||
{"current_steps": 1700, "total_steps": 3976, "loss": 0.1867, "lr": 2.8309752387231842e-05, "epoch": 2.992957746478873, "percentage": 42.76, "elapsed_time": "2:34:40", "remaining_time": "3:27:05"}
|
||||
{"current_steps": 1705, "total_steps": 3976, "loss": 0.1721, "lr": 2.8229807025394815e-05, "epoch": 3.0017605633802815, "percentage": 42.88, "elapsed_time": "2:35:03", "remaining_time": "3:26:31"}
|
||||
{"current_steps": 1710, "total_steps": 3976, "loss": 0.156, "lr": 2.8149703047177083e-05, "epoch": 3.01056338028169, "percentage": 43.01, "elapsed_time": "2:35:29", "remaining_time": "3:26:03"}
|
||||
{"current_steps": 1715, "total_steps": 3976, "loss": 0.1554, "lr": 2.806944199645484e-05, "epoch": 3.0193661971830985, "percentage": 43.13, "elapsed_time": "2:35:51", "remaining_time": "3:25:28"}
|
||||
{"current_steps": 1720, "total_steps": 3976, "loss": 0.1626, "lr": 2.79890254201316e-05, "epoch": 3.028169014084507, "percentage": 43.26, "elapsed_time": "2:36:15", "remaining_time": "3:24:57"}
|
||||
{"current_steps": 1725, "total_steps": 3976, "loss": 0.1718, "lr": 2.7908454868108363e-05, "epoch": 3.0369718309859155, "percentage": 43.39, "elapsed_time": "2:36:42", "remaining_time": "3:24:29"}
|
||||
{"current_steps": 1730, "total_steps": 3976, "loss": 0.1614, "lr": 2.7827731893253796e-05, "epoch": 3.045774647887324, "percentage": 43.51, "elapsed_time": "2:37:08", "remaining_time": "3:24:00"}
|
||||
{"current_steps": 1735, "total_steps": 3976, "loss": 0.1716, "lr": 2.7746858051374265e-05, "epoch": 3.0545774647887325, "percentage": 43.64, "elapsed_time": "2:37:37", "remaining_time": "3:23:35"}
|
||||
{"current_steps": 1740, "total_steps": 3976, "loss": 0.1546, "lr": 2.7665834901183836e-05, "epoch": 3.063380281690141, "percentage": 43.76, "elapsed_time": "2:38:03", "remaining_time": "3:23:06"}
|
||||
{"current_steps": 1745, "total_steps": 3976, "loss": 0.1546, "lr": 2.7584664004274276e-05, "epoch": 3.0721830985915495, "percentage": 43.89, "elapsed_time": "2:38:30", "remaining_time": "3:22:38"}
|
||||
{"current_steps": 1750, "total_steps": 3976, "loss": 0.1625, "lr": 2.750334692508493e-05, "epoch": 3.080985915492958, "percentage": 44.01, "elapsed_time": "2:38:54", "remaining_time": "3:22:08"}
|
||||
{"current_steps": 1755, "total_steps": 3976, "loss": 0.1592, "lr": 2.7421885230872563e-05, "epoch": 3.089788732394366, "percentage": 44.14, "elapsed_time": "2:39:22", "remaining_time": "3:21:41"}
|
||||
{"current_steps": 1760, "total_steps": 3976, "loss": 0.1657, "lr": 2.7340280491681167e-05, "epoch": 3.0985915492957745, "percentage": 44.27, "elapsed_time": "2:39:48", "remaining_time": "3:21:12"}
|
||||
{"current_steps": 1765, "total_steps": 3976, "loss": 0.1478, "lr": 2.725853428031172e-05, "epoch": 3.107394366197183, "percentage": 44.39, "elapsed_time": "2:40:11", "remaining_time": "3:20:40"}
|
||||
{"current_steps": 1770, "total_steps": 3976, "loss": 0.1636, "lr": 2.7176648172291812e-05, "epoch": 3.1161971830985915, "percentage": 44.52, "elapsed_time": "2:40:37", "remaining_time": "3:20:10"}
|
||||
{"current_steps": 1775, "total_steps": 3976, "loss": 0.1965, "lr": 2.7094623745845337e-05, "epoch": 3.125, "percentage": 44.64, "elapsed_time": "2:41:03", "remaining_time": "3:19:43"}
|
||||
{"current_steps": 1780, "total_steps": 3976, "loss": 0.1899, "lr": 2.701246258186206e-05, "epoch": 3.1338028169014085, "percentage": 44.77, "elapsed_time": "2:41:30", "remaining_time": "3:19:15"}
|
||||
{"current_steps": 1785, "total_steps": 3976, "loss": 0.1682, "lr": 2.6930166263867147e-05, "epoch": 3.142605633802817, "percentage": 44.89, "elapsed_time": "2:42:00", "remaining_time": "3:18:51"}
|
||||
{"current_steps": 1790, "total_steps": 3976, "loss": 0.1708, "lr": 2.6847736377990617e-05, "epoch": 3.1514084507042255, "percentage": 45.02, "elapsed_time": "2:42:26", "remaining_time": "3:18:23"}
|
||||
{"current_steps": 1795, "total_steps": 3976, "loss": 0.1679, "lr": 2.676517451293682e-05, "epoch": 3.160211267605634, "percentage": 45.15, "elapsed_time": "2:42:51", "remaining_time": "3:17:52"}
|
||||
{"current_steps": 1800, "total_steps": 3976, "loss": 0.1681, "lr": 2.6682482259953793e-05, "epoch": 3.169014084507042, "percentage": 45.27, "elapsed_time": "2:43:16", "remaining_time": "3:17:22"}
|
||||
{"current_steps": 1805, "total_steps": 3976, "loss": 0.1872, "lr": 2.659966121280257e-05, "epoch": 3.1778169014084505, "percentage": 45.4, "elapsed_time": "2:43:47", "remaining_time": "3:17:00"}
|
||||
{"current_steps": 1810, "total_steps": 3976, "loss": 0.1781, "lr": 2.6516712967726515e-05, "epoch": 3.186619718309859, "percentage": 45.52, "elapsed_time": "2:44:11", "remaining_time": "3:16:29"}
|
||||
{"current_steps": 1815, "total_steps": 3976, "loss": 0.1487, "lr": 2.643363912342051e-05, "epoch": 3.1954225352112675, "percentage": 45.65, "elapsed_time": "2:44:36", "remaining_time": "3:15:58"}
|
||||
{"current_steps": 1820, "total_steps": 3976, "loss": 0.1746, "lr": 2.6350441281000168e-05, "epoch": 3.204225352112676, "percentage": 45.77, "elapsed_time": "2:45:02", "remaining_time": "3:15:30"}
|
||||
{"current_steps": 1825, "total_steps": 3976, "loss": 0.1652, "lr": 2.626712104397097e-05, "epoch": 3.2130281690140845, "percentage": 45.9, "elapsed_time": "2:45:27", "remaining_time": "3:15:00"}
|
||||
{"current_steps": 1830, "total_steps": 3976, "loss": 0.1663, "lr": 2.6183680018197348e-05, "epoch": 3.221830985915493, "percentage": 46.03, "elapsed_time": "2:45:54", "remaining_time": "3:14:33"}
|
||||
{"current_steps": 1835, "total_steps": 3976, "loss": 0.1551, "lr": 2.6100119811871752e-05, "epoch": 3.2306338028169015, "percentage": 46.15, "elapsed_time": "2:46:20", "remaining_time": "3:14:04"}
|
||||
{"current_steps": 1840, "total_steps": 3976, "loss": 0.1596, "lr": 2.6016442035483652e-05, "epoch": 3.23943661971831, "percentage": 46.28, "elapsed_time": "2:46:43", "remaining_time": "3:13:32"}
|
||||
{"current_steps": 1845, "total_steps": 3976, "loss": 0.1799, "lr": 2.5932648301788475e-05, "epoch": 3.2482394366197185, "percentage": 46.4, "elapsed_time": "2:47:10", "remaining_time": "3:13:05"}
|
||||
{"current_steps": 1850, "total_steps": 3976, "loss": 0.1695, "lr": 2.5848740225776566e-05, "epoch": 3.257042253521127, "percentage": 46.53, "elapsed_time": "2:47:36", "remaining_time": "3:12:36"}
|
||||
{"current_steps": 1855, "total_steps": 3976, "loss": 0.1631, "lr": 2.5764719424642014e-05, "epoch": 3.265845070422535, "percentage": 46.65, "elapsed_time": "2:48:03", "remaining_time": "3:12:09"}
|
||||
{"current_steps": 1860, "total_steps": 3976, "loss": 0.1636, "lr": 2.5680587517751502e-05, "epoch": 3.2746478873239435, "percentage": 46.78, "elapsed_time": "2:48:27", "remaining_time": "3:11:38"}
|
||||
{"current_steps": 1865, "total_steps": 3976, "loss": 0.171, "lr": 2.559634612661312e-05, "epoch": 3.283450704225352, "percentage": 46.91, "elapsed_time": "2:48:54", "remaining_time": "3:11:11"}
|
||||
{"current_steps": 1870, "total_steps": 3976, "loss": 0.1842, "lr": 2.5511996874845072e-05, "epoch": 3.2922535211267605, "percentage": 47.03, "elapsed_time": "2:49:18", "remaining_time": "3:10:40"}
|
||||
{"current_steps": 1875, "total_steps": 3976, "loss": 0.1694, "lr": 2.5427541388144414e-05, "epoch": 3.301056338028169, "percentage": 47.16, "elapsed_time": "2:49:41", "remaining_time": "3:10:09"}
|
||||
{"current_steps": 1880, "total_steps": 3976, "loss": 0.156, "lr": 2.534298129425571e-05, "epoch": 3.3098591549295775, "percentage": 47.28, "elapsed_time": "2:50:07", "remaining_time": "3:09:39"}
|
||||
{"current_steps": 1885, "total_steps": 3976, "loss": 0.1702, "lr": 2.5258318222939662e-05, "epoch": 3.318661971830986, "percentage": 47.41, "elapsed_time": "2:50:32", "remaining_time": "3:09:10"}
|
||||
{"current_steps": 1890, "total_steps": 3976, "loss": 0.1782, "lr": 2.5173553805941682e-05, "epoch": 3.3274647887323945, "percentage": 47.54, "elapsed_time": "2:51:04", "remaining_time": "3:08:49"}
|
||||
{"current_steps": 1895, "total_steps": 3976, "loss": 0.1814, "lr": 2.5088689676960477e-05, "epoch": 3.336267605633803, "percentage": 47.66, "elapsed_time": "2:51:36", "remaining_time": "3:08:26"}
|
||||
{"current_steps": 1900, "total_steps": 3976, "loss": 0.1759, "lr": 2.5003727471616533e-05, "epoch": 3.345070422535211, "percentage": 47.79, "elapsed_time": "2:52:05", "remaining_time": "3:08:02"}
|
||||
{"current_steps": 1905, "total_steps": 3976, "loss": 0.1806, "lr": 2.4918668827420612e-05, "epoch": 3.3538732394366195, "percentage": 47.91, "elapsed_time": "2:52:31", "remaining_time": "3:07:33"}
|
||||
{"current_steps": 1910, "total_steps": 3976, "loss": 0.1646, "lr": 2.4833515383742164e-05, "epoch": 3.362676056338028, "percentage": 48.04, "elapsed_time": "2:52:59", "remaining_time": "3:07:07"}
|
||||
{"current_steps": 1915, "total_steps": 3976, "loss": 0.1802, "lr": 2.4748268781777763e-05, "epoch": 3.3714788732394365, "percentage": 48.16, "elapsed_time": "2:53:27", "remaining_time": "3:06:40"}
|
||||
{"current_steps": 1920, "total_steps": 3976, "loss": 0.1574, "lr": 2.4662930664519447e-05, "epoch": 3.380281690140845, "percentage": 48.29, "elapsed_time": "2:53:53", "remaining_time": "3:06:12"}
|
||||
{"current_steps": 1925, "total_steps": 3976, "loss": 0.1704, "lr": 2.457750267672307e-05, "epoch": 3.3890845070422535, "percentage": 48.42, "elapsed_time": "2:54:21", "remaining_time": "3:05:46"}
|
||||
{"current_steps": 1930, "total_steps": 3976, "loss": 0.1694, "lr": 2.4491986464876615e-05, "epoch": 3.397887323943662, "percentage": 48.54, "elapsed_time": "2:54:46", "remaining_time": "3:05:17"}
|
||||
{"current_steps": 1935, "total_steps": 3976, "loss": 0.1684, "lr": 2.4406383677168405e-05, "epoch": 3.4066901408450705, "percentage": 48.67, "elapsed_time": "2:55:10", "remaining_time": "3:04:46"}
|
||||
{"current_steps": 1940, "total_steps": 3976, "loss": 0.1771, "lr": 2.432069596345541e-05, "epoch": 3.415492957746479, "percentage": 48.79, "elapsed_time": "2:55:36", "remaining_time": "3:04:17"}
|
||||
{"current_steps": 1945, "total_steps": 3976, "loss": 0.177, "lr": 2.423492497523139e-05, "epoch": 3.4242957746478875, "percentage": 48.92, "elapsed_time": "2:56:02", "remaining_time": "3:03:49"}
|
||||
{"current_steps": 1950, "total_steps": 3976, "loss": 0.1535, "lr": 2.4149072365595103e-05, "epoch": 3.433098591549296, "percentage": 49.04, "elapsed_time": "2:56:24", "remaining_time": "3:03:16"}
|
||||
{"current_steps": 1955, "total_steps": 3976, "loss": 0.1795, "lr": 2.406313978921842e-05, "epoch": 3.441901408450704, "percentage": 49.17, "elapsed_time": "2:56:53", "remaining_time": "3:02:51"}
|
||||
{"current_steps": 1960, "total_steps": 3976, "loss": 0.1666, "lr": 2.3977128902314445e-05, "epoch": 3.4507042253521125, "percentage": 49.3, "elapsed_time": "2:57:13", "remaining_time": "3:02:17"}
|
||||
{"current_steps": 1965, "total_steps": 3976, "loss": 0.176, "lr": 2.38910413626056e-05, "epoch": 3.459507042253521, "percentage": 49.42, "elapsed_time": "2:57:40", "remaining_time": "3:01:49"}
|
||||
{"current_steps": 1970, "total_steps": 3976, "loss": 0.1643, "lr": 2.3804878829291655e-05, "epoch": 3.4683098591549295, "percentage": 49.55, "elapsed_time": "2:58:09", "remaining_time": "3:01:25"}
|
||||
{"current_steps": 1975, "total_steps": 3976, "loss": 0.1718, "lr": 2.371864296301777e-05, "epoch": 3.477112676056338, "percentage": 49.67, "elapsed_time": "2:58:34", "remaining_time": "3:00:55"}
|
||||
{"current_steps": 1980, "total_steps": 3976, "loss": 0.1555, "lr": 2.3632335425842473e-05, "epoch": 3.4859154929577465, "percentage": 49.8, "elapsed_time": "2:58:59", "remaining_time": "3:00:26"}
|
||||
{"current_steps": 1985, "total_steps": 3976, "loss": 0.1691, "lr": 2.354595788120565e-05, "epoch": 3.494718309859155, "percentage": 49.92, "elapsed_time": "2:59:25", "remaining_time": "2:59:58"}
|
||||
{"current_steps": 1990, "total_steps": 3976, "loss": 0.1673, "lr": 2.3459511993896447e-05, "epoch": 3.5035211267605635, "percentage": 50.05, "elapsed_time": "2:59:51", "remaining_time": "2:59:29"}
|
||||
{"current_steps": 1995, "total_steps": 3976, "loss": 0.1732, "lr": 2.337299943002123e-05, "epoch": 3.512323943661972, "percentage": 50.18, "elapsed_time": "3:00:16", "remaining_time": "2:59:00"}
|
||||
{"current_steps": 2000, "total_steps": 3976, "loss": 0.177, "lr": 2.3286421856971427e-05, "epoch": 3.52112676056338, "percentage": 50.3, "elapsed_time": "3:00:43", "remaining_time": "2:58:33"}
|
||||
{"current_steps": 2005, "total_steps": 3976, "loss": 0.1721, "lr": 2.3199780943391422e-05, "epoch": 3.529929577464789, "percentage": 50.43, "elapsed_time": "3:01:08", "remaining_time": "2:58:04"}
|
||||
{"current_steps": 2010, "total_steps": 3976, "loss": 0.1671, "lr": 2.31130783591464e-05, "epoch": 3.538732394366197, "percentage": 50.55, "elapsed_time": "3:01:32", "remaining_time": "2:57:34"}
|
||||
{"current_steps": 2015, "total_steps": 3976, "loss": 0.1558, "lr": 2.3026315775290122e-05, "epoch": 3.5475352112676055, "percentage": 50.68, "elapsed_time": "3:01:57", "remaining_time": "2:57:04"}
|
||||
{"current_steps": 2020, "total_steps": 3976, "loss": 0.1747, "lr": 2.2939494864032773e-05, "epoch": 3.556338028169014, "percentage": 50.8, "elapsed_time": "3:02:23", "remaining_time": "2:56:37"}
|
||||
{"current_steps": 2025, "total_steps": 3976, "loss": 0.1671, "lr": 2.285261729870869e-05, "epoch": 3.5651408450704225, "percentage": 50.93, "elapsed_time": "3:02:55", "remaining_time": "2:56:14"}
|
||||
{"current_steps": 2030, "total_steps": 3976, "loss": 0.1649, "lr": 2.276568475374413e-05, "epoch": 3.573943661971831, "percentage": 51.06, "elapsed_time": "3:03:18", "remaining_time": "2:55:43"}
|
||||
{"current_steps": 2035, "total_steps": 3976, "loss": 0.1582, "lr": 2.2678698904624996e-05, "epoch": 3.5827464788732395, "percentage": 51.18, "elapsed_time": "3:03:45", "remaining_time": "2:55:15"}
|
||||
{"current_steps": 2040, "total_steps": 3976, "loss": 0.1789, "lr": 2.259166142786454e-05, "epoch": 3.591549295774648, "percentage": 51.31, "elapsed_time": "3:04:15", "remaining_time": "2:54:51"}
|
||||
{"current_steps": 2045, "total_steps": 3976, "loss": 0.1783, "lr": 2.250457400097106e-05, "epoch": 3.6003521126760565, "percentage": 51.43, "elapsed_time": "3:04:43", "remaining_time": "2:54:25"}
|
||||
{"current_steps": 2050, "total_steps": 3976, "loss": 0.1734, "lr": 2.2417438302415557e-05, "epoch": 3.609154929577465, "percentage": 51.56, "elapsed_time": "3:05:10", "remaining_time": "2:53:58"}
|
||||
{"current_steps": 2055, "total_steps": 3976, "loss": 0.1723, "lr": 2.2330256011599393e-05, "epoch": 3.617957746478873, "percentage": 51.69, "elapsed_time": "3:05:42", "remaining_time": "2:53:36"}
|
||||
{"current_steps": 2060, "total_steps": 3976, "loss": 0.1704, "lr": 2.224302880882193e-05, "epoch": 3.626760563380282, "percentage": 51.81, "elapsed_time": "3:06:06", "remaining_time": "2:53:05"}
|
||||
{"current_steps": 2065, "total_steps": 3976, "loss": 0.1709, "lr": 2.215575837524812e-05, "epoch": 3.63556338028169, "percentage": 51.94, "elapsed_time": "3:06:30", "remaining_time": "2:52:35"}
|
||||
{"current_steps": 2070, "total_steps": 3976, "loss": 0.1785, "lr": 2.206844639287613e-05, "epoch": 3.6443661971830985, "percentage": 52.06, "elapsed_time": "3:07:03", "remaining_time": "2:52:14"}
|
||||
{"current_steps": 2075, "total_steps": 3976, "loss": 0.1733, "lr": 2.1981094544504907e-05, "epoch": 3.653169014084507, "percentage": 52.19, "elapsed_time": "3:07:32", "remaining_time": "2:51:48"}
|
||||
{"current_steps": 2080, "total_steps": 3976, "loss": 0.1803, "lr": 2.1893704513701773e-05, "epoch": 3.6619718309859155, "percentage": 52.31, "elapsed_time": "3:07:58", "remaining_time": "2:51:20"}
|
||||
{"current_steps": 2085, "total_steps": 3976, "loss": 0.162, "lr": 2.1806277984769922e-05, "epoch": 3.670774647887324, "percentage": 52.44, "elapsed_time": "3:08:25", "remaining_time": "2:50:53"}
|
||||
{"current_steps": 2090, "total_steps": 3976, "loss": 0.1523, "lr": 2.171881664271601e-05, "epoch": 3.6795774647887325, "percentage": 52.57, "elapsed_time": "3:08:51", "remaining_time": "2:50:25"}
|
||||
{"current_steps": 2095, "total_steps": 3976, "loss": 0.17, "lr": 2.163132217321767e-05, "epoch": 3.688380281690141, "percentage": 52.69, "elapsed_time": "3:09:23", "remaining_time": "2:50:02"}
|
||||
{"current_steps": 2100, "total_steps": 3976, "loss": 0.1673, "lr": 2.1543796262590986e-05, "epoch": 3.697183098591549, "percentage": 52.82, "elapsed_time": "3:09:48", "remaining_time": "2:49:34"}
|
||||
{"current_steps": 2105, "total_steps": 3976, "loss": 0.1958, "lr": 2.145624059775804e-05, "epoch": 3.705985915492958, "percentage": 52.94, "elapsed_time": "3:10:20", "remaining_time": "2:49:11"}
|
||||
{"current_steps": 2110, "total_steps": 3976, "loss": 0.1684, "lr": 2.1368656866214385e-05, "epoch": 3.714788732394366, "percentage": 53.07, "elapsed_time": "3:10:52", "remaining_time": "2:48:47"}
|
||||
{"current_steps": 2115, "total_steps": 3976, "loss": 0.1627, "lr": 2.128104675599649e-05, "epoch": 3.7235915492957745, "percentage": 53.19, "elapsed_time": "3:11:15", "remaining_time": "2:48:17"}
|
||||
{"current_steps": 2120, "total_steps": 3976, "loss": 0.1641, "lr": 2.119341195564925e-05, "epoch": 3.732394366197183, "percentage": 53.32, "elapsed_time": "3:11:38", "remaining_time": "2:47:46"}
|
||||
{"current_steps": 2125, "total_steps": 3976, "loss": 0.1518, "lr": 2.110575415419341e-05, "epoch": 3.7411971830985915, "percentage": 53.45, "elapsed_time": "3:12:03", "remaining_time": "2:47:18"}
|
||||
{"current_steps": 2130, "total_steps": 3976, "loss": 0.1624, "lr": 2.1018075041093047e-05, "epoch": 3.75, "percentage": 53.57, "elapsed_time": "3:12:28", "remaining_time": "2:46:48"}
|
||||
{"current_steps": 2135, "total_steps": 3976, "loss": 0.1729, "lr": 2.0930376306222963e-05, "epoch": 3.7588028169014085, "percentage": 53.7, "elapsed_time": "3:12:56", "remaining_time": "2:46:22"}
|
||||
{"current_steps": 2140, "total_steps": 3976, "loss": 0.1626, "lr": 2.084265963983614e-05, "epoch": 3.767605633802817, "percentage": 53.82, "elapsed_time": "3:13:21", "remaining_time": "2:45:53"}
|
||||
{"current_steps": 2145, "total_steps": 3976, "loss": 0.1802, "lr": 2.075492673253118e-05, "epoch": 3.7764084507042255, "percentage": 53.95, "elapsed_time": "3:13:46", "remaining_time": "2:45:24"}
|
||||
{"current_steps": 2150, "total_steps": 3976, "loss": 0.1693, "lr": 2.066717927521968e-05, "epoch": 3.785211267605634, "percentage": 54.07, "elapsed_time": "3:14:14", "remaining_time": "2:44:58"}
|
||||
{"current_steps": 2155, "total_steps": 3976, "loss": 0.1613, "lr": 2.057941895909368e-05, "epoch": 3.794014084507042, "percentage": 54.2, "elapsed_time": "3:14:46", "remaining_time": "2:44:34"}
|
||||
{"current_steps": 2160, "total_steps": 3976, "loss": 0.1755, "lr": 2.049164747559305e-05, "epoch": 3.802816901408451, "percentage": 54.33, "elapsed_time": "3:15:10", "remaining_time": "2:44:05"}
|
||||
{"current_steps": 2165, "total_steps": 3976, "loss": 0.1645, "lr": 2.0403866516372884e-05, "epoch": 3.811619718309859, "percentage": 54.45, "elapsed_time": "3:15:42", "remaining_time": "2:43:42"}
|
||||
{"current_steps": 2170, "total_steps": 3976, "loss": 0.1798, "lr": 2.0316077773270923e-05, "epoch": 3.8204225352112675, "percentage": 54.58, "elapsed_time": "3:16:13", "remaining_time": "2:43:18"}
|
||||
{"current_steps": 2175, "total_steps": 3976, "loss": 0.1816, "lr": 2.0228282938274918e-05, "epoch": 3.829225352112676, "percentage": 54.7, "elapsed_time": "3:16:37", "remaining_time": "2:42:48"}
|
||||
{"current_steps": 2180, "total_steps": 3976, "loss": 0.1646, "lr": 2.0140483703490036e-05, "epoch": 3.8380281690140845, "percentage": 54.83, "elapsed_time": "3:17:01", "remaining_time": "2:42:19"}
|
||||
{"current_steps": 2185, "total_steps": 3976, "loss": 0.1724, "lr": 2.005268176110623e-05, "epoch": 3.846830985915493, "percentage": 54.95, "elapsed_time": "3:17:27", "remaining_time": "2:41:51"}
|
||||
{"current_steps": 2190, "total_steps": 3976, "loss": 0.1635, "lr": 1.9964878803365653e-05, "epoch": 3.8556338028169015, "percentage": 55.08, "elapsed_time": "3:17:52", "remaining_time": "2:41:21"}
|
||||
{"current_steps": 2195, "total_steps": 3976, "loss": 0.2126, "lr": 1.987707652253003e-05, "epoch": 3.86443661971831, "percentage": 55.21, "elapsed_time": "3:18:23", "remaining_time": "2:40:58"}
|
||||
{"current_steps": 2200, "total_steps": 3976, "loss": 0.1634, "lr": 1.9789276610848013e-05, "epoch": 3.873239436619718, "percentage": 55.33, "elapsed_time": "3:18:45", "remaining_time": "2:40:27"}
|
||||
{"current_steps": 2205, "total_steps": 3976, "loss": 0.1673, "lr": 1.9701480760522636e-05, "epoch": 3.882042253521127, "percentage": 55.46, "elapsed_time": "3:19:12", "remaining_time": "2:39:59"}
|
||||
{"current_steps": 2210, "total_steps": 3976, "loss": 0.1916, "lr": 1.9613690663678623e-05, "epoch": 3.890845070422535, "percentage": 55.58, "elapsed_time": "3:19:46", "remaining_time": "2:39:38"}
|
||||
{"current_steps": 2215, "total_steps": 3976, "loss": 0.1896, "lr": 1.9525908012329816e-05, "epoch": 3.8996478873239435, "percentage": 55.71, "elapsed_time": "3:20:23", "remaining_time": "2:39:19"}
|
||||
{"current_steps": 2220, "total_steps": 3976, "loss": 0.1566, "lr": 1.9438134498346555e-05, "epoch": 3.908450704225352, "percentage": 55.84, "elapsed_time": "3:20:49", "remaining_time": "2:38:50"}
|
||||
{"current_steps": 2225, "total_steps": 3976, "loss": 0.1564, "lr": 1.9350371813423077e-05, "epoch": 3.9172535211267605, "percentage": 55.96, "elapsed_time": "3:21:19", "remaining_time": "2:38:26"}
|
||||
{"current_steps": 2230, "total_steps": 3976, "loss": 0.1652, "lr": 1.926262164904492e-05, "epoch": 3.926056338028169, "percentage": 56.09, "elapsed_time": "3:21:43", "remaining_time": "2:37:56"}
|
||||
{"current_steps": 2235, "total_steps": 3976, "loss": 0.1885, "lr": 1.9174885696456277e-05, "epoch": 3.9348591549295775, "percentage": 56.21, "elapsed_time": "3:22:10", "remaining_time": "2:37:29"}
|
||||
{"current_steps": 2240, "total_steps": 3976, "loss": 0.161, "lr": 1.908716564662746e-05, "epoch": 3.943661971830986, "percentage": 56.34, "elapsed_time": "3:22:34", "remaining_time": "2:36:59"}
|
||||
{"current_steps": 2245, "total_steps": 3976, "loss": 0.1657, "lr": 1.899946319022225e-05, "epoch": 3.9524647887323945, "percentage": 56.46, "elapsed_time": "3:23:01", "remaining_time": "2:36:32"}
|
||||
{"current_steps": 2250, "total_steps": 3976, "loss": 0.1723, "lr": 1.8911780017565393e-05, "epoch": 3.961267605633803, "percentage": 56.59, "elapsed_time": "3:23:28", "remaining_time": "2:36:05"}
|
||||
{"current_steps": 2255, "total_steps": 3976, "loss": 0.1678, "lr": 1.882411781860991e-05, "epoch": 3.970070422535211, "percentage": 56.72, "elapsed_time": "3:23:54", "remaining_time": "2:35:37"}
|
||||
{"current_steps": 2260, "total_steps": 3976, "loss": 0.1672, "lr": 1.873647828290464e-05, "epoch": 3.97887323943662, "percentage": 56.84, "elapsed_time": "3:24:22", "remaining_time": "2:35:10"}
|
||||
{"current_steps": 2265, "total_steps": 3976, "loss": 0.178, "lr": 1.8648863099561583e-05, "epoch": 3.987676056338028, "percentage": 56.97, "elapsed_time": "3:24:49", "remaining_time": "2:34:43"}
|
||||
{"current_steps": 2270, "total_steps": 3976, "loss": 0.1793, "lr": 1.8561273957223424e-05, "epoch": 3.9964788732394365, "percentage": 57.09, "elapsed_time": "3:25:18", "remaining_time": "2:34:17"}
|
||||
{"current_steps": 2275, "total_steps": 3976, "loss": 0.1486, "lr": 1.8473712544030914e-05, "epoch": 4.005281690140845, "percentage": 57.22, "elapsed_time": "3:25:47", "remaining_time": "2:33:51"}
|
||||
{"current_steps": 2280, "total_steps": 3976, "loss": 0.1777, "lr": 1.8386180547590397e-05, "epoch": 4.014084507042254, "percentage": 57.34, "elapsed_time": "3:26:15", "remaining_time": "2:33:25"}
|
||||
{"current_steps": 2285, "total_steps": 3976, "loss": 0.1754, "lr": 1.8298679654941237e-05, "epoch": 4.022887323943662, "percentage": 57.47, "elapsed_time": "3:26:41", "remaining_time": "2:32:57"}
|
||||
{"current_steps": 2290, "total_steps": 3976, "loss": 0.1415, "lr": 1.8211211552523328e-05, "epoch": 4.03169014084507, "percentage": 57.6, "elapsed_time": "3:27:04", "remaining_time": "2:32:27"}
|
||||
{"current_steps": 2295, "total_steps": 3976, "loss": 0.1566, "lr": 1.8123777926144596e-05, "epoch": 4.040492957746479, "percentage": 57.72, "elapsed_time": "3:27:32", "remaining_time": "2:32:00"}
|
||||
{"current_steps": 2300, "total_steps": 3976, "loss": 0.1652, "lr": 1.8036380460948483e-05, "epoch": 4.049295774647887, "percentage": 57.85, "elapsed_time": "3:28:01", "remaining_time": "2:31:34"}
|
||||
{"current_steps": 2305, "total_steps": 3976, "loss": 0.1348, "lr": 1.794902084138148e-05, "epoch": 4.058098591549296, "percentage": 57.97, "elapsed_time": "3:28:28", "remaining_time": "2:31:08"}
|
||||
{"current_steps": 2310, "total_steps": 3976, "loss": 0.1616, "lr": 1.786170075116067e-05, "epoch": 4.066901408450704, "percentage": 58.1, "elapsed_time": "3:28:55", "remaining_time": "2:30:40"}
|
||||
{"current_steps": 2315, "total_steps": 3976, "loss": 0.1426, "lr": 1.777442187324128e-05, "epoch": 4.075704225352113, "percentage": 58.22, "elapsed_time": "3:29:24", "remaining_time": "2:30:15"}
|
||||
{"current_steps": 2320, "total_steps": 3976, "loss": 0.1693, "lr": 1.768718588978422e-05, "epoch": 4.084507042253521, "percentage": 58.35, "elapsed_time": "3:29:55", "remaining_time": "2:29:50"}
|
||||
{"current_steps": 2325, "total_steps": 3976, "loss": 0.1702, "lr": 1.7599994482123687e-05, "epoch": 4.09330985915493, "percentage": 58.48, "elapsed_time": "3:30:24", "remaining_time": "2:29:24"}
|
||||
{"current_steps": 2330, "total_steps": 3976, "loss": 0.1516, "lr": 1.7512849330734734e-05, "epoch": 4.102112676056338, "percentage": 58.6, "elapsed_time": "3:30:47", "remaining_time": "2:28:54"}
|
||||
{"current_steps": 2335, "total_steps": 3976, "loss": 0.1459, "lr": 1.7425752115200933e-05, "epoch": 4.110915492957746, "percentage": 58.73, "elapsed_time": "3:31:14", "remaining_time": "2:28:27"}
|
||||
{"current_steps": 2340, "total_steps": 3976, "loss": 0.1693, "lr": 1.7338704514181937e-05, "epoch": 4.119718309859155, "percentage": 58.85, "elapsed_time": "3:31:40", "remaining_time": "2:27:59"}
|
||||
{"current_steps": 2345, "total_steps": 3976, "loss": 0.152, "lr": 1.7251708205381175e-05, "epoch": 4.128521126760563, "percentage": 58.98, "elapsed_time": "3:32:10", "remaining_time": "2:27:34"}
|
||||
{"current_steps": 2350, "total_steps": 3976, "loss": 0.1489, "lr": 1.7164764865513485e-05, "epoch": 4.137323943661972, "percentage": 59.1, "elapsed_time": "3:32:40", "remaining_time": "2:27:09"}
|
||||
{"current_steps": 2355, "total_steps": 3976, "loss": 0.1524, "lr": 1.7077876170272825e-05, "epoch": 4.14612676056338, "percentage": 59.23, "elapsed_time": "3:33:11", "remaining_time": "2:26:44"}
|
||||
{"current_steps": 2360, "total_steps": 3976, "loss": 0.1532, "lr": 1.699104379429998e-05, "epoch": 4.154929577464789, "percentage": 59.36, "elapsed_time": "3:33:39", "remaining_time": "2:26:18"}
|
||||
{"current_steps": 2365, "total_steps": 3976, "loss": 0.1597, "lr": 1.6904269411150242e-05, "epoch": 4.163732394366197, "percentage": 59.48, "elapsed_time": "3:34:02", "remaining_time": "2:25:48"}
|
||||
{"current_steps": 2370, "total_steps": 3976, "loss": 0.1564, "lr": 1.6817554693261194e-05, "epoch": 4.172535211267606, "percentage": 59.61, "elapsed_time": "3:34:36", "remaining_time": "2:25:25"}
|
||||
{"current_steps": 2375, "total_steps": 3976, "loss": 0.1528, "lr": 1.673090131192047e-05, "epoch": 4.181338028169014, "percentage": 59.73, "elapsed_time": "3:35:02", "remaining_time": "2:24:57"}
|
||||
{"current_steps": 2380, "total_steps": 3976, "loss": 0.1505, "lr": 1.6644310937233553e-05, "epoch": 4.190140845070423, "percentage": 59.86, "elapsed_time": "3:35:31", "remaining_time": "2:24:31"}
|
||||
{"current_steps": 2385, "total_steps": 3976, "loss": 0.1654, "lr": 1.655778523809154e-05, "epoch": 4.198943661971831, "percentage": 59.98, "elapsed_time": "3:35:54", "remaining_time": "2:24:01"}
|
||||
{"current_steps": 2390, "total_steps": 3976, "loss": 0.147, "lr": 1.6471325882139045e-05, "epoch": 4.207746478873239, "percentage": 60.11, "elapsed_time": "3:36:17", "remaining_time": "2:23:31"}
|
||||
{"current_steps": 2395, "total_steps": 3976, "loss": 0.1446, "lr": 1.6384934535742006e-05, "epoch": 4.216549295774648, "percentage": 60.24, "elapsed_time": "3:36:44", "remaining_time": "2:23:04"}
|
||||
{"current_steps": 2400, "total_steps": 3976, "loss": 0.1435, "lr": 1.629861286395557e-05, "epoch": 4.225352112676056, "percentage": 60.36, "elapsed_time": "3:37:08", "remaining_time": "2:22:35"}
|
||||
{"current_steps": 2405, "total_steps": 3976, "loss": 0.1609, "lr": 1.6212362530492053e-05, "epoch": 4.234154929577465, "percentage": 60.49, "elapsed_time": "3:37:33", "remaining_time": "2:22:07"}
|
||||
{"current_steps": 2410, "total_steps": 3976, "loss": 0.14, "lr": 1.612618519768882e-05, "epoch": 4.242957746478873, "percentage": 60.61, "elapsed_time": "3:38:02", "remaining_time": "2:21:40"}
|
||||
{"current_steps": 2415, "total_steps": 3976, "loss": 0.1653, "lr": 1.604008252647626e-05, "epoch": 4.251760563380282, "percentage": 60.74, "elapsed_time": "3:38:30", "remaining_time": "2:21:14"}
|
||||
{"current_steps": 2420, "total_steps": 3976, "loss": 0.1495, "lr": 1.5954056176345778e-05, "epoch": 4.26056338028169, "percentage": 60.87, "elapsed_time": "3:38:54", "remaining_time": "2:20:44"}
|
||||
{"current_steps": 2425, "total_steps": 3976, "loss": 0.1643, "lr": 1.5868107805317836e-05, "epoch": 4.269366197183099, "percentage": 60.99, "elapsed_time": "3:39:23", "remaining_time": "2:20:19"}
|
||||
{"current_steps": 2430, "total_steps": 3976, "loss": 0.1723, "lr": 1.578223906990994e-05, "epoch": 4.278169014084507, "percentage": 61.12, "elapsed_time": "3:39:54", "remaining_time": "2:19:54"}
|
||||
{"current_steps": 2435, "total_steps": 3976, "loss": 0.1536, "lr": 1.569645162510477e-05, "epoch": 4.286971830985916, "percentage": 61.24, "elapsed_time": "3:40:16", "remaining_time": "2:19:24"}
|
||||
{"current_steps": 2440, "total_steps": 3976, "loss": 0.1529, "lr": 1.5610747124318244e-05, "epoch": 4.295774647887324, "percentage": 61.37, "elapsed_time": "3:40:36", "remaining_time": "2:18:52"}
|
||||
{"current_steps": 2445, "total_steps": 3976, "loss": 0.1506, "lr": 1.552512721936769e-05, "epoch": 4.304577464788732, "percentage": 61.49, "elapsed_time": "3:41:02", "remaining_time": "2:18:24"}
|
||||
{"current_steps": 2450, "total_steps": 3976, "loss": 0.1839, "lr": 1.5439593560439957e-05, "epoch": 4.313380281690141, "percentage": 61.62, "elapsed_time": "3:41:27", "remaining_time": "2:17:56"}
|
||||
{"current_steps": 2455, "total_steps": 3976, "loss": 0.1556, "lr": 1.5354147796059664e-05, "epoch": 4.322183098591549, "percentage": 61.75, "elapsed_time": "3:41:56", "remaining_time": "2:17:30"}
|
||||
{"current_steps": 2460, "total_steps": 3976, "loss": 0.1609, "lr": 1.526879157305739e-05, "epoch": 4.330985915492958, "percentage": 61.87, "elapsed_time": "3:42:20", "remaining_time": "2:17:01"}
|
||||
{"current_steps": 2465, "total_steps": 3976, "loss": 0.1588, "lr": 1.5183526536537935e-05, "epoch": 4.339788732394366, "percentage": 62.0, "elapsed_time": "3:42:50", "remaining_time": "2:16:36"}
|
||||
{"current_steps": 2470, "total_steps": 3976, "loss": 0.1529, "lr": 1.5098354329848658e-05, "epoch": 4.348591549295775, "percentage": 62.12, "elapsed_time": "3:43:17", "remaining_time": "2:16:08"}
|
||||
{"current_steps": 2475, "total_steps": 3976, "loss": 0.146, "lr": 1.5013276594547754e-05, "epoch": 4.357394366197183, "percentage": 62.25, "elapsed_time": "3:43:39", "remaining_time": "2:15:38"}
|
||||
{"current_steps": 2480, "total_steps": 3976, "loss": 0.1505, "lr": 1.4928294970372623e-05, "epoch": 4.366197183098592, "percentage": 62.37, "elapsed_time": "3:44:00", "remaining_time": "2:15:07"}
|
||||
{"current_steps": 2485, "total_steps": 3976, "loss": 0.1441, "lr": 1.4843411095208288e-05, "epoch": 4.375, "percentage": 62.5, "elapsed_time": "3:44:24", "remaining_time": "2:14:38"}
|
||||
{"current_steps": 2490, "total_steps": 3976, "loss": 0.1486, "lr": 1.4758626605055816e-05, "epoch": 4.383802816901408, "percentage": 62.63, "elapsed_time": "3:44:47", "remaining_time": "2:14:09"}
|
||||
{"current_steps": 2495, "total_steps": 3976, "loss": 0.1737, "lr": 1.4673943134000791e-05, "epoch": 4.392605633802817, "percentage": 62.75, "elapsed_time": "3:45:16", "remaining_time": "2:13:43"}
|
||||
{"current_steps": 2500, "total_steps": 3976, "loss": 0.1452, "lr": 1.4589362314181799e-05, "epoch": 4.401408450704225, "percentage": 62.88, "elapsed_time": "3:45:41", "remaining_time": "2:13:14"}
|
||||
{"current_steps": 2505, "total_steps": 3976, "loss": 0.1549, "lr": 1.4504885775758992e-05, "epoch": 4.410211267605634, "percentage": 63.0, "elapsed_time": "3:46:06", "remaining_time": "2:12:46"}
|
||||
{"current_steps": 2510, "total_steps": 3976, "loss": 0.1477, "lr": 1.4420515146882692e-05, "epoch": 4.419014084507042, "percentage": 63.13, "elapsed_time": "3:46:31", "remaining_time": "2:12:18"}
|
||||
{"current_steps": 2515, "total_steps": 3976, "loss": 0.1743, "lr": 1.433625205366195e-05, "epoch": 4.427816901408451, "percentage": 63.25, "elapsed_time": "3:47:03", "remaining_time": "2:11:53"}
|
||||
{"current_steps": 2520, "total_steps": 3976, "loss": 0.175, "lr": 1.4252098120133243e-05, "epoch": 4.436619718309859, "percentage": 63.38, "elapsed_time": "3:47:34", "remaining_time": "2:11:29"}
|
||||
{"current_steps": 2525, "total_steps": 3976, "loss": 0.1435, "lr": 1.416805496822919e-05, "epoch": 4.445422535211268, "percentage": 63.51, "elapsed_time": "3:47:59", "remaining_time": "2:11:00"}
|
||||
{"current_steps": 2530, "total_steps": 3976, "loss": 0.1472, "lr": 1.4084124217747244e-05, "epoch": 4.454225352112676, "percentage": 63.63, "elapsed_time": "3:48:22", "remaining_time": "2:10:31"}
|
||||
{"current_steps": 2535, "total_steps": 3976, "loss": 0.171, "lr": 1.4000307486318527e-05, "epoch": 4.463028169014084, "percentage": 63.76, "elapsed_time": "3:48:53", "remaining_time": "2:10:06"}
|
||||
{"current_steps": 2540, "total_steps": 3976, "loss": 0.1653, "lr": 1.3916606389376614e-05, "epoch": 4.471830985915493, "percentage": 63.88, "elapsed_time": "3:49:23", "remaining_time": "2:09:41"}
|
||||
{"current_steps": 2545, "total_steps": 3976, "loss": 0.1559, "lr": 1.3833022540126408e-05, "epoch": 4.480633802816901, "percentage": 64.01, "elapsed_time": "3:49:49", "remaining_time": "2:09:13"}
|
||||
{"current_steps": 2550, "total_steps": 3976, "loss": 0.1605, "lr": 1.3749557549513042e-05, "epoch": 4.48943661971831, "percentage": 64.13, "elapsed_time": "3:50:09", "remaining_time": "2:08:42"}
|
||||
{"current_steps": 2555, "total_steps": 3976, "loss": 0.1562, "lr": 1.3666213026190857e-05, "epoch": 4.498239436619718, "percentage": 64.26, "elapsed_time": "3:50:35", "remaining_time": "2:08:14"}
|
||||
{"current_steps": 2560, "total_steps": 3976, "loss": 0.1569, "lr": 1.3582990576492377e-05, "epoch": 4.507042253521127, "percentage": 64.39, "elapsed_time": "3:50:57", "remaining_time": "2:07:44"}
|
||||
{"current_steps": 2565, "total_steps": 3976, "loss": 0.1587, "lr": 1.3499891804397333e-05, "epoch": 4.515845070422535, "percentage": 64.51, "elapsed_time": "3:51:30", "remaining_time": "2:07:21"}
|
||||
{"current_steps": 2570, "total_steps": 3976, "loss": 0.1452, "lr": 1.3416918311501783e-05, "epoch": 4.524647887323944, "percentage": 64.64, "elapsed_time": "3:51:55", "remaining_time": "2:06:52"}
|
||||
{"current_steps": 2575, "total_steps": 3976, "loss": 0.152, "lr": 1.3334071696987238e-05, "epoch": 4.533450704225352, "percentage": 64.76, "elapsed_time": "3:52:30", "remaining_time": "2:06:30"}
|
||||
{"current_steps": 2580, "total_steps": 3976, "loss": 0.1465, "lr": 1.325135355758981e-05, "epoch": 4.542253521126761, "percentage": 64.89, "elapsed_time": "3:53:00", "remaining_time": "2:06:04"}
|
||||
{"current_steps": 2585, "total_steps": 3976, "loss": 0.1336, "lr": 1.3168765487569469e-05, "epoch": 4.551056338028169, "percentage": 65.02, "elapsed_time": "3:53:30", "remaining_time": "2:05:39"}
|
||||
{"current_steps": 2590, "total_steps": 3976, "loss": 0.1633, "lr": 1.3086309078679303e-05, "epoch": 4.559859154929578, "percentage": 65.14, "elapsed_time": "3:53:55", "remaining_time": "2:05:11"}
|
||||
{"current_steps": 2595, "total_steps": 3976, "loss": 0.1555, "lr": 1.3003985920134837e-05, "epoch": 4.568661971830986, "percentage": 65.27, "elapsed_time": "3:54:31", "remaining_time": "2:04:48"}
|
||||
{"current_steps": 2600, "total_steps": 3976, "loss": 0.1497, "lr": 1.2921797598583422e-05, "epoch": 4.577464788732394, "percentage": 65.39, "elapsed_time": "3:54:56", "remaining_time": "2:04:20"}
|
||||
{"current_steps": 2605, "total_steps": 3976, "loss": 0.1409, "lr": 1.2839745698073642e-05, "epoch": 4.586267605633803, "percentage": 65.52, "elapsed_time": "3:55:20", "remaining_time": "2:03:51"}
|
||||
{"current_steps": 2610, "total_steps": 3976, "loss": 0.1411, "lr": 1.2757831800024767e-05, "epoch": 4.595070422535211, "percentage": 65.64, "elapsed_time": "3:55:48", "remaining_time": "2:03:24"}
|
||||
{"current_steps": 2615, "total_steps": 3976, "loss": 0.1578, "lr": 1.2676057483196289e-05, "epoch": 4.60387323943662, "percentage": 65.77, "elapsed_time": "3:56:15", "remaining_time": "2:02:57"}
|
||||
{"current_steps": 2620, "total_steps": 3976, "loss": 0.1595, "lr": 1.2594424323657521e-05, "epoch": 4.612676056338028, "percentage": 65.9, "elapsed_time": "3:56:42", "remaining_time": "2:02:30"}
|
||||
{"current_steps": 2625, "total_steps": 3976, "loss": 0.1572, "lr": 1.2512933894757172e-05, "epoch": 4.621478873239437, "percentage": 66.02, "elapsed_time": "3:57:09", "remaining_time": "2:02:03"}
|
||||
{"current_steps": 2630, "total_steps": 3976, "loss": 0.1444, "lr": 1.2431587767093052e-05, "epoch": 4.630281690140845, "percentage": 66.15, "elapsed_time": "3:57:36", "remaining_time": "2:01:36"}
|
||||
{"current_steps": 2635, "total_steps": 3976, "loss": 0.1625, "lr": 1.2350387508481799e-05, "epoch": 4.639084507042254, "percentage": 66.27, "elapsed_time": "3:58:09", "remaining_time": "2:01:12"}
|
||||
{"current_steps": 2640, "total_steps": 3976, "loss": 0.1668, "lr": 1.2269334683928641e-05, "epoch": 4.647887323943662, "percentage": 66.4, "elapsed_time": "3:58:36", "remaining_time": "2:00:44"}
|
||||
{"current_steps": 2645, "total_steps": 3976, "loss": 0.1576, "lr": 1.2188430855597286e-05, "epoch": 4.65669014084507, "percentage": 66.52, "elapsed_time": "3:58:59", "remaining_time": "2:00:15"}
|
||||
{"current_steps": 2650, "total_steps": 3976, "loss": 0.1516, "lr": 1.210767758277974e-05, "epoch": 4.665492957746479, "percentage": 66.65, "elapsed_time": "3:59:24", "remaining_time": "1:59:47"}
|
||||
{"current_steps": 2655, "total_steps": 3976, "loss": 0.1727, "lr": 1.2027076421866313e-05, "epoch": 4.674295774647887, "percentage": 66.78, "elapsed_time": "3:59:57", "remaining_time": "1:59:23"}
|
||||
{"current_steps": 2660, "total_steps": 3976, "loss": 0.1607, "lr": 1.1946628926315587e-05, "epoch": 4.683098591549296, "percentage": 66.9, "elapsed_time": "4:00:25", "remaining_time": "1:58:56"}
|
||||
{"current_steps": 2665, "total_steps": 3976, "loss": 0.1669, "lr": 1.1866336646624512e-05, "epoch": 4.691901408450704, "percentage": 67.03, "elapsed_time": "4:00:52", "remaining_time": "1:58:29"}
|
||||
{"current_steps": 2670, "total_steps": 3976, "loss": 0.1549, "lr": 1.1786201130298486e-05, "epoch": 4.700704225352113, "percentage": 67.15, "elapsed_time": "4:01:19", "remaining_time": "1:58:02"}
|
||||
{"current_steps": 2675, "total_steps": 3976, "loss": 0.1536, "lr": 1.1706223921821536e-05, "epoch": 4.709507042253521, "percentage": 67.28, "elapsed_time": "4:01:43", "remaining_time": "1:57:33"}
|
||||
{"current_steps": 2680, "total_steps": 3976, "loss": 0.1655, "lr": 1.162640656262656e-05, "epoch": 4.71830985915493, "percentage": 67.4, "elapsed_time": "4:02:05", "remaining_time": "1:57:04"}
|
||||
{"current_steps": 2685, "total_steps": 3976, "loss": 0.1622, "lr": 1.1546750591065643e-05, "epoch": 4.727112676056338, "percentage": 67.53, "elapsed_time": "4:02:31", "remaining_time": "1:56:36"}
|
||||
{"current_steps": 2690, "total_steps": 3976, "loss": 0.1622, "lr": 1.1467257542380355e-05, "epoch": 4.735915492957746, "percentage": 67.66, "elapsed_time": "4:02:53", "remaining_time": "1:56:07"}
|
||||
{"current_steps": 2695, "total_steps": 3976, "loss": 0.16, "lr": 1.1387928948672186e-05, "epoch": 4.744718309859155, "percentage": 67.78, "elapsed_time": "4:03:18", "remaining_time": "1:55:38"}
|
||||
{"current_steps": 2700, "total_steps": 3976, "loss": 0.1522, "lr": 1.1308766338873038e-05, "epoch": 4.753521126760563, "percentage": 67.91, "elapsed_time": "4:03:44", "remaining_time": "1:55:11"}
|
||||
{"current_steps": 2705, "total_steps": 3976, "loss": 0.1572, "lr": 1.122977123871571e-05, "epoch": 4.762323943661972, "percentage": 68.03, "elapsed_time": "4:04:10", "remaining_time": "1:54:43"}
|
||||
{"current_steps": 2710, "total_steps": 3976, "loss": 0.1603, "lr": 1.1150945170704547e-05, "epoch": 4.77112676056338, "percentage": 68.16, "elapsed_time": "4:04:38", "remaining_time": "1:54:17"}
|
||||
{"current_steps": 2715, "total_steps": 3976, "loss": 0.1458, "lr": 1.1072289654086074e-05, "epoch": 4.779929577464789, "percentage": 68.28, "elapsed_time": "4:05:03", "remaining_time": "1:53:49"}
|
||||
{"current_steps": 2720, "total_steps": 3976, "loss": 0.1573, "lr": 1.0993806204819686e-05, "epoch": 4.788732394366197, "percentage": 68.41, "elapsed_time": "4:05:33", "remaining_time": "1:53:23"}
|
||||
{"current_steps": 2725, "total_steps": 3976, "loss": 0.1613, "lr": 1.0915496335548456e-05, "epoch": 4.797535211267606, "percentage": 68.54, "elapsed_time": "4:06:04", "remaining_time": "1:52:58"}
|
||||
{"current_steps": 2730, "total_steps": 3976, "loss": 0.1482, "lr": 1.0837361555570007e-05, "epoch": 4.806338028169014, "percentage": 68.66, "elapsed_time": "4:06:30", "remaining_time": "1:52:30"}
|
||||
{"current_steps": 2735, "total_steps": 3976, "loss": 0.1515, "lr": 1.0759403370807369e-05, "epoch": 4.815140845070422, "percentage": 68.79, "elapsed_time": "4:06:55", "remaining_time": "1:52:02"}
|
||||
{"current_steps": 2740, "total_steps": 3976, "loss": 0.1625, "lr": 1.0681623283779982e-05, "epoch": 4.823943661971831, "percentage": 68.91, "elapsed_time": "4:07:25", "remaining_time": "1:51:36"}
|
||||
{"current_steps": 2745, "total_steps": 3976, "loss": 0.1519, "lr": 1.0604022793574757e-05, "epoch": 4.832746478873239, "percentage": 69.04, "elapsed_time": "4:07:48", "remaining_time": "1:51:07"}
|
||||
{"current_steps": 2750, "total_steps": 3976, "loss": 0.1421, "lr": 1.0526603395817158e-05, "epoch": 4.841549295774648, "percentage": 69.16, "elapsed_time": "4:08:12", "remaining_time": "1:50:39"}
|
||||
{"current_steps": 2755, "total_steps": 3976, "loss": 0.1491, "lr": 1.0449366582642364e-05, "epoch": 4.850352112676056, "percentage": 69.29, "elapsed_time": "4:08:38", "remaining_time": "1:50:11"}
|
||||
{"current_steps": 2760, "total_steps": 3976, "loss": 0.1695, "lr": 1.0372313842666544e-05, "epoch": 4.859154929577465, "percentage": 69.42, "elapsed_time": "4:09:05", "remaining_time": "1:49:44"}
|
||||
{"current_steps": 2765, "total_steps": 3976, "loss": 0.1598, "lr": 1.0295446660958137e-05, "epoch": 4.867957746478873, "percentage": 69.54, "elapsed_time": "4:09:35", "remaining_time": "1:49:19"}
|
||||
{"current_steps": 2770, "total_steps": 3976, "loss": 0.1514, "lr": 1.0218766519009252e-05, "epoch": 4.876760563380282, "percentage": 69.67, "elapsed_time": "4:10:01", "remaining_time": "1:48:51"}
|
||||
{"current_steps": 2775, "total_steps": 3976, "loss": 0.168, "lr": 1.0142274894707102e-05, "epoch": 4.88556338028169, "percentage": 69.79, "elapsed_time": "4:10:30", "remaining_time": "1:48:25"}
|
||||
{"current_steps": 2780, "total_steps": 3976, "loss": 0.1524, "lr": 1.0065973262305544e-05, "epoch": 4.894366197183099, "percentage": 69.92, "elapsed_time": "4:10:55", "remaining_time": "1:47:57"}
|
||||
{"current_steps": 2785, "total_steps": 3976, "loss": 0.142, "lr": 9.989863092396615e-06, "epoch": 4.903169014084507, "percentage": 70.05, "elapsed_time": "4:11:19", "remaining_time": "1:47:28"}
|
||||
{"current_steps": 2790, "total_steps": 3976, "loss": 0.143, "lr": 9.913945851882221e-06, "epoch": 4.911971830985916, "percentage": 70.17, "elapsed_time": "4:11:40", "remaining_time": "1:46:58"}
|
||||
{"current_steps": 2795, "total_steps": 3976, "loss": 0.155, "lr": 9.838223003945886e-06, "epoch": 4.920774647887324, "percentage": 70.3, "elapsed_time": "4:12:09", "remaining_time": "1:46:32"}
|
||||
{"current_steps": 2800, "total_steps": 3976, "loss": 0.1686, "lr": 9.762696008024505e-06, "epoch": 4.929577464788732, "percentage": 70.42, "elapsed_time": "4:12:39", "remaining_time": "1:46:06"}
|
||||
{"current_steps": 2805, "total_steps": 3976, "loss": 0.1432, "lr": 9.687366319780242e-06, "epoch": 4.938380281690141, "percentage": 70.55, "elapsed_time": "4:13:00", "remaining_time": "1:45:37"}
|
||||
{"current_steps": 2810, "total_steps": 3976, "loss": 0.1533, "lr": 9.612235391072483e-06, "epoch": 4.947183098591549, "percentage": 70.67, "elapsed_time": "4:13:20", "remaining_time": "1:45:07"}
|
||||
{"current_steps": 2815, "total_steps": 3976, "loss": 0.1506, "lr": 9.537304669929837e-06, "epoch": 4.955985915492958, "percentage": 70.8, "elapsed_time": "4:13:47", "remaining_time": "1:44:40"}
|
||||
{"current_steps": 2820, "total_steps": 3976, "loss": 0.1562, "lr": 9.46257560052222e-06, "epoch": 4.964788732394366, "percentage": 70.93, "elapsed_time": "4:14:14", "remaining_time": "1:44:13"}
|
||||
{"current_steps": 2825, "total_steps": 3976, "loss": 0.1494, "lr": 9.388049623133047e-06, "epoch": 4.973591549295775, "percentage": 71.05, "elapsed_time": "4:14:41", "remaining_time": "1:43:46"}
|
||||
{"current_steps": 2830, "total_steps": 3976, "loss": 0.1421, "lr": 9.313728174131451e-06, "epoch": 4.982394366197183, "percentage": 71.18, "elapsed_time": "4:15:12", "remaining_time": "1:43:20"}
|
||||
{"current_steps": 2835, "total_steps": 3976, "loss": 0.1467, "lr": 9.239612685944599e-06, "epoch": 4.991197183098592, "percentage": 71.3, "elapsed_time": "4:15:41", "remaining_time": "1:42:54"}
|
||||
{"current_steps": 2840, "total_steps": 3976, "loss": 0.1697, "lr": 9.165704587030115e-06, "epoch": 5.0, "percentage": 71.43, "elapsed_time": "4:16:08", "remaining_time": "1:42:27"}
|
||||
{"current_steps": 2845, "total_steps": 3976, "loss": 0.1392, "lr": 9.092005301848521e-06, "epoch": 5.008802816901408, "percentage": 71.55, "elapsed_time": "4:16:34", "remaining_time": "1:42:00"}
|
||||
{"current_steps": 2850, "total_steps": 3976, "loss": 0.1466, "lr": 9.018516250835772e-06, "epoch": 5.017605633802817, "percentage": 71.68, "elapsed_time": "4:16:58", "remaining_time": "1:41:31"}
|
||||
{"current_steps": 2855, "total_steps": 3976, "loss": 0.1362, "lr": 8.945238850375894e-06, "epoch": 5.026408450704225, "percentage": 71.81, "elapsed_time": "4:17:28", "remaining_time": "1:41:05"}
|
||||
{"current_steps": 2860, "total_steps": 3976, "loss": 0.1498, "lr": 8.872174512773717e-06, "epoch": 5.035211267605634, "percentage": 71.93, "elapsed_time": "4:17:50", "remaining_time": "1:40:36"}
|
||||
{"current_steps": 2865, "total_steps": 3976, "loss": 0.1571, "lr": 8.799324646227596e-06, "epoch": 5.044014084507042, "percentage": 72.06, "elapsed_time": "4:18:13", "remaining_time": "1:40:08"}
|
||||
{"current_steps": 2870, "total_steps": 3976, "loss": 0.1418, "lr": 8.726690654802301e-06, "epoch": 5.052816901408451, "percentage": 72.18, "elapsed_time": "4:18:43", "remaining_time": "1:39:42"}
|
||||
{"current_steps": 2875, "total_steps": 3976, "loss": 0.1451, "lr": 8.654273938401973e-06, "epoch": 5.061619718309859, "percentage": 72.31, "elapsed_time": "4:19:07", "remaining_time": "1:39:14"}
|
||||
{"current_steps": 2880, "total_steps": 3976, "loss": 0.1323, "lr": 8.582075892743103e-06, "epoch": 5.070422535211268, "percentage": 72.43, "elapsed_time": "4:19:34", "remaining_time": "1:38:46"}
|
||||
{"current_steps": 2885, "total_steps": 3976, "loss": 0.1499, "lr": 8.51009790932767e-06, "epoch": 5.079225352112676, "percentage": 72.56, "elapsed_time": "4:20:02", "remaining_time": "1:38:20"}
|
||||
{"current_steps": 2890, "total_steps": 3976, "loss": 0.1422, "lr": 8.438341375416294e-06, "epoch": 5.088028169014084, "percentage": 72.69, "elapsed_time": "4:20:28", "remaining_time": "1:37:53"}
|
||||
{"current_steps": 2895, "total_steps": 3976, "loss": 0.1429, "lr": 8.36680767400151e-06, "epoch": 5.096830985915493, "percentage": 72.81, "elapsed_time": "4:20:52", "remaining_time": "1:37:24"}
|
||||
{"current_steps": 2900, "total_steps": 3976, "loss": 0.1526, "lr": 8.29549818378111e-06, "epoch": 5.105633802816901, "percentage": 72.94, "elapsed_time": "4:21:23", "remaining_time": "1:36:59"}
|
||||
{"current_steps": 2905, "total_steps": 3976, "loss": 0.1441, "lr": 8.224414279131583e-06, "epoch": 5.11443661971831, "percentage": 73.06, "elapsed_time": "4:21:50", "remaining_time": "1:36:32"}
|
||||
{"current_steps": 2910, "total_steps": 3976, "loss": 0.1387, "lr": 8.153557330081623e-06, "epoch": 5.123239436619718, "percentage": 73.19, "elapsed_time": "4:22:16", "remaining_time": "1:36:04"}
|
||||
{"current_steps": 2915, "total_steps": 3976, "loss": 0.1298, "lr": 8.082928702285694e-06, "epoch": 5.132042253521127, "percentage": 73.31, "elapsed_time": "4:22:44", "remaining_time": "1:35:37"}
|
||||
{"current_steps": 2920, "total_steps": 3976, "loss": 0.1598, "lr": 8.012529756997747e-06, "epoch": 5.140845070422535, "percentage": 73.44, "elapsed_time": "4:23:10", "remaining_time": "1:35:10"}
|
||||
{"current_steps": 2925, "total_steps": 3976, "loss": 0.1435, "lr": 7.942361851044973e-06, "epoch": 5.149647887323944, "percentage": 73.57, "elapsed_time": "4:23:39", "remaining_time": "1:34:44"}
|
||||
{"current_steps": 2930, "total_steps": 3976, "loss": 0.1413, "lr": 7.872426336801642e-06, "epoch": 5.158450704225352, "percentage": 73.69, "elapsed_time": "4:24:06", "remaining_time": "1:34:17"}
|
||||
{"current_steps": 2935, "total_steps": 3976, "loss": 0.1579, "lr": 7.802724562163038e-06, "epoch": 5.167253521126761, "percentage": 73.82, "elapsed_time": "4:24:30", "remaining_time": "1:33:49"}
|
||||
{"current_steps": 2940, "total_steps": 3976, "loss": 0.1394, "lr": 7.73325787051951e-06, "epoch": 5.176056338028169, "percentage": 73.94, "elapsed_time": "4:24:54", "remaining_time": "1:33:20"}
|
||||
{"current_steps": 2945, "total_steps": 3976, "loss": 0.1551, "lr": 7.664027600730532e-06, "epoch": 5.184859154929577, "percentage": 74.07, "elapsed_time": "4:25:23", "remaining_time": "1:32:54"}
|
||||
{"current_steps": 2950, "total_steps": 3976, "loss": 0.144, "lr": 7.595035087098952e-06, "epoch": 5.193661971830986, "percentage": 74.2, "elapsed_time": "4:25:48", "remaining_time": "1:32:26"}
|
||||
{"current_steps": 2955, "total_steps": 3976, "loss": 0.1384, "lr": 7.526281659345225e-06, "epoch": 5.202464788732394, "percentage": 74.32, "elapsed_time": "4:26:10", "remaining_time": "1:31:57"}
|
||||
{"current_steps": 2960, "total_steps": 3976, "loss": 0.1583, "lr": 7.457768642581813e-06, "epoch": 5.211267605633803, "percentage": 74.45, "elapsed_time": "4:26:47", "remaining_time": "1:31:34"}
|
||||
{"current_steps": 2965, "total_steps": 3976, "loss": 0.1514, "lr": 7.389497357287639e-06, "epoch": 5.220070422535211, "percentage": 74.57, "elapsed_time": "4:27:16", "remaining_time": "1:31:07"}
|
||||
{"current_steps": 2970, "total_steps": 3976, "loss": 0.1423, "lr": 7.321469119282649e-06, "epoch": 5.22887323943662, "percentage": 74.7, "elapsed_time": "4:27:40", "remaining_time": "1:30:40"}
|
||||
{"current_steps": 2975, "total_steps": 3976, "loss": 0.1649, "lr": 7.253685239702439e-06, "epoch": 5.237676056338028, "percentage": 74.82, "elapsed_time": "4:28:14", "remaining_time": "1:30:15"}
|
||||
{"current_steps": 2980, "total_steps": 3976, "loss": 0.1607, "lr": 7.186147024972978e-06, "epoch": 5.246478873239437, "percentage": 74.95, "elapsed_time": "4:28:39", "remaining_time": "1:29:47"}
|
||||
{"current_steps": 2985, "total_steps": 3976, "loss": 0.1303, "lr": 7.118855776785432e-06, "epoch": 5.255281690140845, "percentage": 75.08, "elapsed_time": "4:29:09", "remaining_time": "1:29:21"}
|
||||
{"current_steps": 2990, "total_steps": 3976, "loss": 0.158, "lr": 7.051812792071104e-06, "epoch": 5.264084507042254, "percentage": 75.2, "elapsed_time": "4:29:39", "remaining_time": "1:28:55"}
|
||||
{"current_steps": 2995, "total_steps": 3976, "loss": 0.1693, "lr": 6.9850193629763975e-06, "epoch": 5.272887323943662, "percentage": 75.33, "elapsed_time": "4:30:06", "remaining_time": "1:28:28"}
|
||||
{"current_steps": 3000, "total_steps": 3976, "loss": 0.1575, "lr": 6.918476776837926e-06, "epoch": 5.28169014084507, "percentage": 75.45, "elapsed_time": "4:30:38", "remaining_time": "1:28:02"}
|
||||
{"current_steps": 3005, "total_steps": 3976, "loss": 0.1473, "lr": 6.852186316157727e-06, "epoch": 5.290492957746479, "percentage": 75.58, "elapsed_time": "4:31:13", "remaining_time": "1:27:38"}
|
||||
{"current_steps": 3010, "total_steps": 3976, "loss": 0.1375, "lr": 6.7861492585785005e-06, "epoch": 5.299295774647887, "percentage": 75.7, "elapsed_time": "4:31:35", "remaining_time": "1:27:09"}
|
||||
{"current_steps": 3015, "total_steps": 3976, "loss": 0.138, "lr": 6.720366876859028e-06, "epoch": 5.308098591549296, "percentage": 75.83, "elapsed_time": "4:31:59", "remaining_time": "1:26:41"}
|
||||
{"current_steps": 3020, "total_steps": 3976, "loss": 0.149, "lr": 6.654840438849601e-06, "epoch": 5.316901408450704, "percentage": 75.96, "elapsed_time": "4:32:27", "remaining_time": "1:26:14"}
|
||||
{"current_steps": 3025, "total_steps": 3976, "loss": 0.1469, "lr": 6.589571207467615e-06, "epoch": 5.325704225352113, "percentage": 76.08, "elapsed_time": "4:32:54", "remaining_time": "1:25:47"}
|
||||
{"current_steps": 3030, "total_steps": 3976, "loss": 0.1436, "lr": 6.5245604406732114e-06, "epoch": 5.334507042253521, "percentage": 76.21, "elapsed_time": "4:33:21", "remaining_time": "1:25:20"}
|
||||
{"current_steps": 3035, "total_steps": 3976, "loss": 0.1427, "lr": 6.459809391445047e-06, "epoch": 5.34330985915493, "percentage": 76.33, "elapsed_time": "4:33:49", "remaining_time": "1:24:53"}
|
||||
{"current_steps": 3040, "total_steps": 3976, "loss": 0.1477, "lr": 6.395319307756142e-06, "epoch": 5.352112676056338, "percentage": 76.46, "elapsed_time": "4:34:18", "remaining_time": "1:24:27"}
|
||||
{"current_steps": 3045, "total_steps": 3976, "loss": 0.1397, "lr": 6.331091432549816e-06, "epoch": 5.360915492957746, "percentage": 76.58, "elapsed_time": "4:34:43", "remaining_time": "1:23:59"}
|
||||
{"current_steps": 3050, "total_steps": 3976, "loss": 0.1307, "lr": 6.267127003715727e-06, "epoch": 5.369718309859155, "percentage": 76.71, "elapsed_time": "4:35:11", "remaining_time": "1:23:33"}
|
||||
{"current_steps": 3055, "total_steps": 3976, "loss": 0.1342, "lr": 6.203427254066052e-06, "epoch": 5.378521126760563, "percentage": 76.84, "elapsed_time": "4:35:38", "remaining_time": "1:23:06"}
|
||||
{"current_steps": 3060, "total_steps": 3976, "loss": 0.1398, "lr": 6.13999341131168e-06, "epoch": 5.387323943661972, "percentage": 76.96, "elapsed_time": "4:36:05", "remaining_time": "1:22:38"}
|
||||
{"current_steps": 3065, "total_steps": 3976, "loss": 0.1413, "lr": 6.076826698038567e-06, "epoch": 5.39612676056338, "percentage": 77.09, "elapsed_time": "4:36:30", "remaining_time": "1:22:11"}
|
||||
{"current_steps": 3070, "total_steps": 3976, "loss": 0.1454, "lr": 6.013928331684193e-06, "epoch": 5.404929577464789, "percentage": 77.21, "elapsed_time": "4:36:59", "remaining_time": "1:21:44"}
|
||||
{"current_steps": 3075, "total_steps": 3976, "loss": 0.1495, "lr": 5.951299524514062e-06, "epoch": 5.413732394366197, "percentage": 77.34, "elapsed_time": "4:37:27", "remaining_time": "1:21:17"}
|
||||
{"current_steps": 3080, "total_steps": 3976, "loss": 0.135, "lr": 5.8889414835983715e-06, "epoch": 5.422535211267606, "percentage": 77.46, "elapsed_time": "4:37:55", "remaining_time": "1:20:51"}
|
||||
{"current_steps": 3085, "total_steps": 3976, "loss": 0.1429, "lr": 5.826855410788719e-06, "epoch": 5.431338028169014, "percentage": 77.59, "elapsed_time": "4:38:25", "remaining_time": "1:20:24"}
|
||||
{"current_steps": 3090, "total_steps": 3976, "loss": 0.1403, "lr": 5.765042502694955e-06, "epoch": 5.440140845070423, "percentage": 77.72, "elapsed_time": "4:38:51", "remaining_time": "1:19:57"}
|
||||
{"current_steps": 3095, "total_steps": 3976, "loss": 0.1351, "lr": 5.703503950662113e-06, "epoch": 5.448943661971831, "percentage": 77.84, "elapsed_time": "4:39:12", "remaining_time": "1:19:28"}
|
||||
{"current_steps": 3100, "total_steps": 3976, "loss": 0.1238, "lr": 5.642240940747466e-06, "epoch": 5.457746478873239, "percentage": 77.97, "elapsed_time": "4:39:37", "remaining_time": "1:19:00"}
|
||||
{"current_steps": 3105, "total_steps": 3976, "loss": 0.1525, "lr": 5.58125465369763e-06, "epoch": 5.466549295774648, "percentage": 78.09, "elapsed_time": "4:40:06", "remaining_time": "1:18:34"}
|
||||
{"current_steps": 3110, "total_steps": 3976, "loss": 0.135, "lr": 5.520546264925859e-06, "epoch": 5.475352112676056, "percentage": 78.22, "elapsed_time": "4:40:32", "remaining_time": "1:18:07"}
|
||||
{"current_steps": 3115, "total_steps": 3976, "loss": 0.155, "lr": 5.460116944489335e-06, "epoch": 5.484154929577465, "percentage": 78.35, "elapsed_time": "4:40:56", "remaining_time": "1:17:39"}
|
||||
{"current_steps": 3120, "total_steps": 3976, "loss": 0.145, "lr": 5.3999678570666544e-06, "epoch": 5.492957746478873, "percentage": 78.47, "elapsed_time": "4:41:20", "remaining_time": "1:17:11"}
|
||||
{"current_steps": 3125, "total_steps": 3976, "loss": 0.1345, "lr": 5.340100161935378e-06, "epoch": 5.501760563380282, "percentage": 78.6, "elapsed_time": "4:41:46", "remaining_time": "1:16:44"}
|
||||
{"current_steps": 3130, "total_steps": 3976, "loss": 0.1306, "lr": 5.280515012949667e-06, "epoch": 5.51056338028169, "percentage": 78.72, "elapsed_time": "4:42:12", "remaining_time": "1:16:16"}
|
||||
{"current_steps": 3135, "total_steps": 3976, "loss": 0.1591, "lr": 5.221213558518057e-06, "epoch": 5.519366197183099, "percentage": 78.85, "elapsed_time": "4:42:37", "remaining_time": "1:15:49"}
|
||||
{"current_steps": 3140, "total_steps": 3976, "loss": 0.1627, "lr": 5.162196941581334e-06, "epoch": 5.528169014084507, "percentage": 78.97, "elapsed_time": "4:43:02", "remaining_time": "1:15:21"}
|
||||
{"current_steps": 3145, "total_steps": 3976, "loss": 0.1634, "lr": 5.103466299590498e-06, "epoch": 5.536971830985916, "percentage": 79.1, "elapsed_time": "4:43:28", "remaining_time": "1:14:54"}
|
||||
{"current_steps": 3150, "total_steps": 3976, "loss": 0.1511, "lr": 5.045022764484826e-06, "epoch": 5.545774647887324, "percentage": 79.23, "elapsed_time": "4:43:53", "remaining_time": "1:14:26"}
|
||||
{"current_steps": 3155, "total_steps": 3976, "loss": 0.1505, "lr": 4.986867462670077e-06, "epoch": 5.554577464788732, "percentage": 79.35, "elapsed_time": "4:44:21", "remaining_time": "1:13:59"}
|
||||
{"current_steps": 3160, "total_steps": 3976, "loss": 0.1606, "lr": 4.9290015149967654e-06, "epoch": 5.563380281690141, "percentage": 79.48, "elapsed_time": "4:44:48", "remaining_time": "1:13:32"}
|
||||
{"current_steps": 3165, "total_steps": 3976, "loss": 0.1402, "lr": 4.871426036738584e-06, "epoch": 5.572183098591549, "percentage": 79.6, "elapsed_time": "4:45:11", "remaining_time": "1:13:04"}
|
||||
{"current_steps": 3170, "total_steps": 3976, "loss": 0.1363, "lr": 4.814142137570872e-06, "epoch": 5.580985915492958, "percentage": 79.73, "elapsed_time": "4:45:34", "remaining_time": "1:12:36"}
|
||||
{"current_steps": 3175, "total_steps": 3976, "loss": 0.1493, "lr": 4.757150921549265e-06, "epoch": 5.589788732394366, "percentage": 79.85, "elapsed_time": "4:46:02", "remaining_time": "1:12:09"}
|
||||
{"current_steps": 3180, "total_steps": 3976, "loss": 0.146, "lr": 4.7004534870883875e-06, "epoch": 5.598591549295775, "percentage": 79.98, "elapsed_time": "4:46:31", "remaining_time": "1:11:43"}
|
||||
{"current_steps": 3185, "total_steps": 3976, "loss": 0.1406, "lr": 4.6440509269406904e-06, "epoch": 5.607394366197183, "percentage": 80.11, "elapsed_time": "4:46:53", "remaining_time": "1:11:14"}
|
||||
{"current_steps": 3190, "total_steps": 3976, "loss": 0.1339, "lr": 4.587944328175411e-06, "epoch": 5.616197183098592, "percentage": 80.23, "elapsed_time": "4:47:17", "remaining_time": "1:10:47"}
|
||||
{"current_steps": 3195, "total_steps": 3976, "loss": 0.1417, "lr": 4.5321347721575885e-06, "epoch": 5.625, "percentage": 80.36, "elapsed_time": "4:47:47", "remaining_time": "1:10:21"}
|
||||
{"current_steps": 3200, "total_steps": 3976, "loss": 0.1455, "lr": 4.47662333452723e-06, "epoch": 5.633802816901408, "percentage": 80.48, "elapsed_time": "4:48:17", "remaining_time": "1:09:54"}
|
||||
{"current_steps": 3205, "total_steps": 3976, "loss": 0.1524, "lr": 4.4214110851786105e-06, "epoch": 5.642605633802817, "percentage": 80.61, "elapsed_time": "4:48:45", "remaining_time": "1:09:27"}
|
||||
{"current_steps": 3210, "total_steps": 3976, "loss": 0.1565, "lr": 4.366499088239622e-06, "epoch": 5.651408450704225, "percentage": 80.73, "elapsed_time": "4:49:16", "remaining_time": "1:09:01"}
|
||||
{"current_steps": 3215, "total_steps": 3976, "loss": 0.1468, "lr": 4.3118884020512584e-06, "epoch": 5.660211267605634, "percentage": 80.86, "elapsed_time": "4:49:44", "remaining_time": "1:08:35"}
|
||||
{"current_steps": 3220, "total_steps": 3976, "loss": 0.1422, "lr": 4.257580079147241e-06, "epoch": 5.669014084507042, "percentage": 80.99, "elapsed_time": "4:50:09", "remaining_time": "1:08:07"}
|
||||
{"current_steps": 3225, "total_steps": 3976, "loss": 0.1471, "lr": 4.2035751662337106e-06, "epoch": 5.677816901408451, "percentage": 81.11, "elapsed_time": "4:50:32", "remaining_time": "1:07:39"}
|
||||
{"current_steps": 3230, "total_steps": 3976, "loss": 0.1425, "lr": 4.149874704169086e-06, "epoch": 5.686619718309859, "percentage": 81.24, "elapsed_time": "4:50:56", "remaining_time": "1:07:11"}
|
||||
{"current_steps": 3235, "total_steps": 3976, "loss": 0.1623, "lr": 4.096479727943958e-06, "epoch": 5.695422535211268, "percentage": 81.36, "elapsed_time": "4:51:25", "remaining_time": "1:06:45"}
|
||||
{"current_steps": 3240, "total_steps": 3976, "loss": 0.1353, "lr": 4.043391266661192e-06, "epoch": 5.704225352112676, "percentage": 81.49, "elapsed_time": "4:51:55", "remaining_time": "1:06:18"}
|
||||
{"current_steps": 3245, "total_steps": 3976, "loss": 0.1408, "lr": 3.990610343516046e-06, "epoch": 5.713028169014084, "percentage": 81.61, "elapsed_time": "4:52:25", "remaining_time": "1:05:52"}
|
||||
{"current_steps": 3250, "total_steps": 3976, "loss": 0.1533, "lr": 3.938137975776475e-06, "epoch": 5.721830985915493, "percentage": 81.74, "elapsed_time": "4:52:53", "remaining_time": "1:05:25"}
|
||||
{"current_steps": 3255, "total_steps": 3976, "loss": 0.166, "lr": 3.885975174763536e-06, "epoch": 5.730633802816901, "percentage": 81.87, "elapsed_time": "4:53:21", "remaining_time": "1:04:58"}
|
||||
{"current_steps": 3260, "total_steps": 3976, "loss": 0.1583, "lr": 3.834122945831866e-06, "epoch": 5.73943661971831, "percentage": 81.99, "elapsed_time": "4:53:48", "remaining_time": "1:04:31"}
|
||||
{"current_steps": 3265, "total_steps": 3976, "loss": 0.1552, "lr": 3.782582288350325e-06, "epoch": 5.748239436619718, "percentage": 82.12, "elapsed_time": "4:54:16", "remaining_time": "1:04:04"}
|
||||
{"current_steps": 3270, "total_steps": 3976, "loss": 0.1384, "lr": 3.7313541956827347e-06, "epoch": 5.757042253521127, "percentage": 82.24, "elapsed_time": "4:54:43", "remaining_time": "1:03:38"}
|
||||
{"current_steps": 3275, "total_steps": 3976, "loss": 0.1403, "lr": 3.6804396551687373e-06, "epoch": 5.765845070422535, "percentage": 82.37, "elapsed_time": "4:55:08", "remaining_time": "1:03:10"}
|
||||
{"current_steps": 3280, "total_steps": 3976, "loss": 0.1462, "lr": 3.6298396481047405e-06, "epoch": 5.774647887323944, "percentage": 82.49, "elapsed_time": "4:55:39", "remaining_time": "1:02:44"}
|
||||
{"current_steps": 3285, "total_steps": 3976, "loss": 0.1401, "lr": 3.5795551497250338e-06, "epoch": 5.783450704225352, "percentage": 82.62, "elapsed_time": "4:56:04", "remaining_time": "1:02:16"}
|
||||
{"current_steps": 3290, "total_steps": 3976, "loss": 0.1446, "lr": 3.5295871291829695e-06, "epoch": 5.792253521126761, "percentage": 82.75, "elapsed_time": "4:56:26", "remaining_time": "1:01:48"}
|
||||
{"current_steps": 3295, "total_steps": 3976, "loss": 0.1431, "lr": 3.479936549532319e-06, "epoch": 5.801056338028169, "percentage": 82.87, "elapsed_time": "4:56:57", "remaining_time": "1:01:22"}
|
||||
{"current_steps": 3300, "total_steps": 3976, "loss": 0.145, "lr": 3.4306043677086588e-06, "epoch": 5.809859154929578, "percentage": 83.0, "elapsed_time": "4:57:24", "remaining_time": "1:00:55"}
|
||||
{"current_steps": 3305, "total_steps": 3976, "loss": 0.1568, "lr": 3.381591534510982e-06, "epoch": 5.818661971830986, "percentage": 83.12, "elapsed_time": "4:57:52", "remaining_time": "1:00:28"}
|
||||
{"current_steps": 3310, "total_steps": 3976, "loss": 0.1319, "lr": 3.332898994583329e-06, "epoch": 5.827464788732394, "percentage": 83.25, "elapsed_time": "4:58:18", "remaining_time": "1:00:01"}
|
||||
{"current_steps": 3315, "total_steps": 3976, "loss": 0.1404, "lr": 3.284527686396599e-06, "epoch": 5.836267605633803, "percentage": 83.38, "elapsed_time": "4:58:41", "remaining_time": "0:59:33"}
|
||||
{"current_steps": 3320, "total_steps": 3976, "loss": 0.1369, "lr": 3.236478542230481e-06, "epoch": 5.845070422535211, "percentage": 83.5, "elapsed_time": "4:59:08", "remaining_time": "0:59:06"}
|
||||
{"current_steps": 3325, "total_steps": 3976, "loss": 0.1356, "lr": 3.1887524881554486e-06, "epoch": 5.85387323943662, "percentage": 83.63, "elapsed_time": "4:59:32", "remaining_time": "0:58:38"}
|
||||
{"current_steps": 3330, "total_steps": 3976, "loss": 0.1468, "lr": 3.1413504440149323e-06, "epoch": 5.862676056338028, "percentage": 83.75, "elapsed_time": "5:00:06", "remaining_time": "0:58:13"}
|
||||
{"current_steps": 3335, "total_steps": 3976, "loss": 0.1247, "lr": 3.0942733234075995e-06, "epoch": 5.871478873239437, "percentage": 83.88, "elapsed_time": "5:00:30", "remaining_time": "0:57:45"}
|
||||
{"current_steps": 3340, "total_steps": 3976, "loss": 0.1351, "lr": 3.047522033669732e-06, "epoch": 5.880281690140845, "percentage": 84.0, "elapsed_time": "5:00:53", "remaining_time": "0:57:17"}
|
||||
{"current_steps": 3345, "total_steps": 3976, "loss": 0.1487, "lr": 3.001097475857735e-06, "epoch": 5.889084507042254, "percentage": 84.13, "elapsed_time": "5:01:21", "remaining_time": "0:56:50"}
|
||||
{"current_steps": 3350, "total_steps": 3976, "loss": 0.1339, "lr": 2.955000544730784e-06, "epoch": 5.897887323943662, "percentage": 84.26, "elapsed_time": "5:01:50", "remaining_time": "0:56:24"}
|
||||
{"current_steps": 3355, "total_steps": 3976, "loss": 0.1625, "lr": 2.90923212873357e-06, "epoch": 5.90669014084507, "percentage": 84.38, "elapsed_time": "5:02:27", "remaining_time": "0:55:58"}
|
||||
{"current_steps": 3360, "total_steps": 3976, "loss": 0.1306, "lr": 2.8637931099791806e-06, "epoch": 5.915492957746479, "percentage": 84.51, "elapsed_time": "5:02:53", "remaining_time": "0:55:31"}
|
||||
{"current_steps": 3365, "total_steps": 3976, "loss": 0.1543, "lr": 2.8186843642321004e-06, "epoch": 5.924295774647887, "percentage": 84.63, "elapsed_time": "5:03:21", "remaining_time": "0:55:05"}
|
||||
{"current_steps": 3370, "total_steps": 3976, "loss": 0.1353, "lr": 2.773906760891334e-06, "epoch": 5.933098591549296, "percentage": 84.76, "elapsed_time": "5:03:46", "remaining_time": "0:54:37"}
|
||||
{"current_steps": 3375, "total_steps": 3976, "loss": 0.1289, "lr": 2.7294611629736345e-06, "epoch": 5.941901408450704, "percentage": 84.88, "elapsed_time": "5:04:09", "remaining_time": "0:54:09"}
|
||||
{"current_steps": 3380, "total_steps": 3976, "loss": 0.1425, "lr": 2.685348427096881e-06, "epoch": 5.950704225352113, "percentage": 85.01, "elapsed_time": "5:04:37", "remaining_time": "0:53:42"}
|
||||
{"current_steps": 3385, "total_steps": 3976, "loss": 0.1407, "lr": 2.641569403463584e-06, "epoch": 5.959507042253521, "percentage": 85.14, "elapsed_time": "5:05:02", "remaining_time": "0:53:15"}
|
||||
{"current_steps": 3390, "total_steps": 3976, "loss": 0.1497, "lr": 2.5981249358444682e-06, "epoch": 5.96830985915493, "percentage": 85.26, "elapsed_time": "5:05:33", "remaining_time": "0:52:49"}
|
||||
{"current_steps": 3395, "total_steps": 3976, "loss": 0.1532, "lr": 2.5550158615622265e-06, "epoch": 5.977112676056338, "percentage": 85.39, "elapsed_time": "5:05:58", "remaining_time": "0:52:21"}
|
||||
{"current_steps": 3400, "total_steps": 3976, "loss": 0.1562, "lr": 2.5122430114753906e-06, "epoch": 5.985915492957746, "percentage": 85.51, "elapsed_time": "5:06:26", "remaining_time": "0:51:54"}
|
||||
{"current_steps": 3405, "total_steps": 3976, "loss": 0.1546, "lr": 2.4698072099623025e-06, "epoch": 5.994718309859155, "percentage": 85.64, "elapsed_time": "5:06:56", "remaining_time": "0:51:28"}
|
||||
{"current_steps": 3410, "total_steps": 3976, "loss": 0.1316, "lr": 2.4277092749052343e-06, "epoch": 6.003521126760563, "percentage": 85.76, "elapsed_time": "5:07:24", "remaining_time": "0:51:01"}
|
||||
{"current_steps": 3415, "total_steps": 3976, "loss": 0.1333, "lr": 2.3859500176746143e-06, "epoch": 6.012323943661972, "percentage": 85.89, "elapsed_time": "5:07:50", "remaining_time": "0:50:34"}
|
||||
{"current_steps": 3420, "total_steps": 3976, "loss": 0.1327, "lr": 2.344530243113403e-06, "epoch": 6.02112676056338, "percentage": 86.02, "elapsed_time": "5:08:15", "remaining_time": "0:50:06"}
|
||||
{"current_steps": 3425, "total_steps": 3976, "loss": 0.1497, "lr": 2.303450749521572e-06, "epoch": 6.029929577464789, "percentage": 86.14, "elapsed_time": "5:08:39", "remaining_time": "0:49:39"}
|
||||
{"current_steps": 3430, "total_steps": 3976, "loss": 0.1417, "lr": 2.262712328640726e-06, "epoch": 6.038732394366197, "percentage": 86.27, "elapsed_time": "5:09:08", "remaining_time": "0:49:12"}
|
||||
{"current_steps": 3435, "total_steps": 3976, "loss": 0.1478, "lr": 2.2223157656388384e-06, "epoch": 6.047535211267606, "percentage": 86.39, "elapsed_time": "5:09:32", "remaining_time": "0:48:45"}
|
||||
{"current_steps": 3440, "total_steps": 3976, "loss": 0.1457, "lr": 2.18226183909511e-06, "epoch": 6.056338028169014, "percentage": 86.52, "elapsed_time": "5:09:55", "remaining_time": "0:48:17"}
|
||||
{"current_steps": 3445, "total_steps": 3976, "loss": 0.1342, "lr": 2.1425513209849736e-06, "epoch": 6.065140845070423, "percentage": 86.64, "elapsed_time": "5:10:24", "remaining_time": "0:47:50"}
|
||||
{"current_steps": 3450, "total_steps": 3976, "loss": 0.1394, "lr": 2.103184976665222e-06, "epoch": 6.073943661971831, "percentage": 86.77, "elapsed_time": "5:10:48", "remaining_time": "0:47:23"}
|
||||
{"current_steps": 3455, "total_steps": 3976, "loss": 0.1293, "lr": 2.0641635648592404e-06, "epoch": 6.082746478873239, "percentage": 86.9, "elapsed_time": "5:11:08", "remaining_time": "0:46:55"}
|
||||
{"current_steps": 3460, "total_steps": 3976, "loss": 0.1456, "lr": 2.0254878376423883e-06, "epoch": 6.091549295774648, "percentage": 87.02, "elapsed_time": "5:11:35", "remaining_time": "0:46:28"}
|
||||
{"current_steps": 3465, "total_steps": 3976, "loss": 0.134, "lr": 1.9871585404275117e-06, "epoch": 6.100352112676056, "percentage": 87.15, "elapsed_time": "5:12:02", "remaining_time": "0:46:01"}
|
||||
{"current_steps": 3470, "total_steps": 3976, "loss": 0.1336, "lr": 1.949176411950577e-06, "epoch": 6.109154929577465, "percentage": 87.27, "elapsed_time": "5:12:29", "remaining_time": "0:45:34"}
|
||||
{"current_steps": 3475, "total_steps": 3976, "loss": 0.1378, "lr": 1.911542184256421e-06, "epoch": 6.117957746478873, "percentage": 87.4, "elapsed_time": "5:13:01", "remaining_time": "0:45:07"}
|
||||
{"current_steps": 3480, "total_steps": 3976, "loss": 0.1445, "lr": 1.874256582684646e-06, "epoch": 6.126760563380282, "percentage": 87.53, "elapsed_time": "5:13:25", "remaining_time": "0:44:40"}
|
||||
{"current_steps": 3485, "total_steps": 3976, "loss": 0.1353, "lr": 1.8373203258556472e-06, "epoch": 6.13556338028169, "percentage": 87.65, "elapsed_time": "5:13:55", "remaining_time": "0:44:13"}
|
||||
{"current_steps": 3490, "total_steps": 3976, "loss": 0.1371, "lr": 1.8007341256567578e-06, "epoch": 6.144366197183099, "percentage": 87.78, "elapsed_time": "5:14:21", "remaining_time": "0:43:46"}
|
||||
{"current_steps": 3495, "total_steps": 3976, "loss": 0.1481, "lr": 1.7644986872285286e-06, "epoch": 6.153169014084507, "percentage": 87.9, "elapsed_time": "5:14:44", "remaining_time": "0:43:19"}
|
||||
{"current_steps": 3500, "total_steps": 3976, "loss": 0.1397, "lr": 1.7286147089511418e-06, "epoch": 6.161971830985916, "percentage": 88.03, "elapsed_time": "5:15:13", "remaining_time": "0:42:52"}
|
||||
{"current_steps": 3505, "total_steps": 3976, "loss": 0.1613, "lr": 1.6930828824309387e-06, "epoch": 6.170774647887324, "percentage": 88.15, "elapsed_time": "5:15:42", "remaining_time": "0:42:25"}
|
||||
{"current_steps": 3510, "total_steps": 3976, "loss": 0.15, "lr": 1.6579038924871005e-06, "epoch": 6.179577464788732, "percentage": 88.28, "elapsed_time": "5:16:09", "remaining_time": "0:41:58"}
|
||||
{"current_steps": 3515, "total_steps": 3976, "loss": 0.1275, "lr": 1.623078417138455e-06, "epoch": 6.188380281690141, "percentage": 88.41, "elapsed_time": "5:16:35", "remaining_time": "0:41:31"}
|
||||
{"current_steps": 3520, "total_steps": 3976, "loss": 0.1424, "lr": 1.5886071275903913e-06, "epoch": 6.197183098591549, "percentage": 88.53, "elapsed_time": "5:17:04", "remaining_time": "0:41:04"}
|
||||
{"current_steps": 3525, "total_steps": 3976, "loss": 0.1425, "lr": 1.5544906882219347e-06, "epoch": 6.205985915492958, "percentage": 88.66, "elapsed_time": "5:17:28", "remaining_time": "0:40:37"}
|
||||
{"current_steps": 3530, "total_steps": 3976, "loss": 0.1371, "lr": 1.5207297565729429e-06, "epoch": 6.214788732394366, "percentage": 88.78, "elapsed_time": "5:17:56", "remaining_time": "0:40:10"}
|
||||
{"current_steps": 3535, "total_steps": 3976, "loss": 0.1478, "lr": 1.4873249833314351e-06, "epoch": 6.223591549295775, "percentage": 88.91, "elapsed_time": "5:18:28", "remaining_time": "0:39:43"}
|
||||
{"current_steps": 3540, "total_steps": 3976, "loss": 0.1443, "lr": 1.454277012321037e-06, "epoch": 6.232394366197183, "percentage": 89.03, "elapsed_time": "5:18:54", "remaining_time": "0:39:16"}
|
||||
{"current_steps": 3545, "total_steps": 3976, "loss": 0.1664, "lr": 1.4215864804885838e-06, "epoch": 6.241197183098592, "percentage": 89.16, "elapsed_time": "5:19:30", "remaining_time": "0:38:50"}
|
||||
{"current_steps": 3550, "total_steps": 3976, "loss": 0.1426, "lr": 1.3892540178918456e-06, "epoch": 6.25, "percentage": 89.29, "elapsed_time": "5:19:57", "remaining_time": "0:38:23"}
|
||||
{"current_steps": 3555, "total_steps": 3976, "loss": 0.131, "lr": 1.3572802476873737e-06, "epoch": 6.258802816901408, "percentage": 89.41, "elapsed_time": "5:20:21", "remaining_time": "0:37:56"}
|
||||
{"current_steps": 3560, "total_steps": 3976, "loss": 0.1557, "lr": 1.3256657861185063e-06, "epoch": 6.267605633802817, "percentage": 89.54, "elapsed_time": "5:20:52", "remaining_time": "0:37:29"}
|
||||
{"current_steps": 3565, "total_steps": 3976, "loss": 0.1339, "lr": 1.2944112425034704e-06, "epoch": 6.276408450704225, "percentage": 89.66, "elapsed_time": "5:21:19", "remaining_time": "0:37:02"}
|
||||
{"current_steps": 3570, "total_steps": 3976, "loss": 0.1301, "lr": 1.26351721922366e-06, "epoch": 6.285211267605634, "percentage": 89.79, "elapsed_time": "5:21:46", "remaining_time": "0:36:35"}
|
||||
{"current_steps": 3575, "total_steps": 3976, "loss": 0.1494, "lr": 1.2329843117120066e-06, "epoch": 6.294014084507042, "percentage": 89.91, "elapsed_time": "5:22:15", "remaining_time": "0:36:08"}
|
||||
{"current_steps": 3580, "total_steps": 3976, "loss": 0.1558, "lr": 1.2028131084415206e-06, "epoch": 6.302816901408451, "percentage": 90.04, "elapsed_time": "5:22:45", "remaining_time": "0:35:42"}
|
||||
{"current_steps": 3585, "total_steps": 3976, "loss": 0.1479, "lr": 1.1730041909139377e-06, "epoch": 6.311619718309859, "percentage": 90.17, "elapsed_time": "5:23:11", "remaining_time": "0:35:14"}
|
||||
{"current_steps": 3590, "total_steps": 3976, "loss": 0.1435, "lr": 1.1435581336485102e-06, "epoch": 6.320422535211268, "percentage": 90.29, "elapsed_time": "5:23:38", "remaining_time": "0:34:47"}
|
||||
{"current_steps": 3595, "total_steps": 3976, "loss": 0.1393, "lr": 1.1144755041709399e-06, "epoch": 6.329225352112676, "percentage": 90.42, "elapsed_time": "5:24:07", "remaining_time": "0:34:21"}
|
||||
{"current_steps": 3600, "total_steps": 3976, "loss": 0.1359, "lr": 1.0857568630024472e-06, "epoch": 6.338028169014084, "percentage": 90.54, "elapsed_time": "5:24:33", "remaining_time": "0:33:53"}
|
||||
{"current_steps": 3605, "total_steps": 3976, "loss": 0.1275, "lr": 1.0574027636489537e-06, "epoch": 6.346830985915493, "percentage": 90.67, "elapsed_time": "5:25:00", "remaining_time": "0:33:26"}
|
||||
{"current_steps": 3610, "total_steps": 3976, "loss": 0.1337, "lr": 1.029413752590418e-06, "epoch": 6.355633802816901, "percentage": 90.79, "elapsed_time": "5:25:24", "remaining_time": "0:32:59"}
|
||||
{"current_steps": 3615, "total_steps": 3976, "loss": 0.1411, "lr": 1.001790369270308e-06, "epoch": 6.36443661971831, "percentage": 90.92, "elapsed_time": "5:25:52", "remaining_time": "0:32:32"}
|
||||
{"current_steps": 3620, "total_steps": 3976, "loss": 0.1368, "lr": 9.745331460851947e-07, "epoch": 6.373239436619718, "percentage": 91.05, "elapsed_time": "5:26:16", "remaining_time": "0:32:05"}
|
||||
{"current_steps": 3625, "total_steps": 3976, "loss": 0.1238, "lr": 9.476426083745104e-07, "epoch": 6.382042253521127, "percentage": 91.17, "elapsed_time": "5:26:42", "remaining_time": "0:31:38"}
|
||||
{"current_steps": 3630, "total_steps": 3976, "loss": 0.1353, "lr": 9.211192744103958e-07, "epoch": 6.390845070422535, "percentage": 91.3, "elapsed_time": "5:27:10", "remaining_time": "0:31:11"}
|
||||
{"current_steps": 3635, "total_steps": 3976, "loss": 0.1331, "lr": 8.949636553877439e-07, "epoch": 6.399647887323944, "percentage": 91.42, "elapsed_time": "5:27:34", "remaining_time": "0:30:43"}
|
||||
{"current_steps": 3640, "total_steps": 3976, "loss": 0.1404, "lr": 8.69176255414308e-07, "epoch": 6.408450704225352, "percentage": 91.55, "elapsed_time": "5:28:04", "remaining_time": "0:30:17"}
|
||||
{"current_steps": 3645, "total_steps": 3976, "loss": 0.137, "lr": 8.437575715010293e-07, "epoch": 6.417253521126761, "percentage": 91.68, "elapsed_time": "5:28:31", "remaining_time": "0:29:49"}
|
||||
{"current_steps": 3650, "total_steps": 3976, "loss": 0.1604, "lr": 8.187080935524205e-07, "epoch": 6.426056338028169, "percentage": 91.8, "elapsed_time": "5:28:54", "remaining_time": "0:29:22"}
|
||||
{"current_steps": 3655, "total_steps": 3976, "loss": 0.1379, "lr": 7.940283043571462e-07, "epoch": 6.434859154929577, "percentage": 91.93, "elapsed_time": "5:29:16", "remaining_time": "0:28:55"}
|
||||
{"current_steps": 3660, "total_steps": 3976, "loss": 0.1345, "lr": 7.697186795787059e-07, "epoch": 6.443661971830986, "percentage": 92.05, "elapsed_time": "5:29:49", "remaining_time": "0:28:28"}
|
||||
{"current_steps": 3665, "total_steps": 3976, "loss": 0.1384, "lr": 7.457796877462776e-07, "epoch": 6.452464788732394, "percentage": 92.18, "elapsed_time": "5:30:17", "remaining_time": "0:28:01"}
|
||||
{"current_steps": 3670, "total_steps": 3976, "loss": 0.1565, "lr": 7.222117902456815e-07, "epoch": 6.461267605633803, "percentage": 92.3, "elapsed_time": "5:30:48", "remaining_time": "0:27:34"}
|
||||
{"current_steps": 3675, "total_steps": 3976, "loss": 0.1419, "lr": 6.990154413104799e-07, "epoch": 6.470070422535211, "percentage": 92.43, "elapsed_time": "5:31:15", "remaining_time": "0:27:07"}
|
||||
{"current_steps": 3680, "total_steps": 3976, "loss": 0.1239, "lr": 6.761910880132406e-07, "epoch": 6.47887323943662, "percentage": 92.56, "elapsed_time": "5:31:43", "remaining_time": "0:26:40"}
|
||||
{"current_steps": 3685, "total_steps": 3976, "loss": 0.132, "lr": 6.537391702568973e-07, "epoch": 6.487676056338028, "percentage": 92.68, "elapsed_time": "5:32:06", "remaining_time": "0:26:13"}
|
||||
{"current_steps": 3690, "total_steps": 3976, "loss": 0.1256, "lr": 6.316601207662953e-07, "epoch": 6.496478873239437, "percentage": 92.81, "elapsed_time": "5:32:36", "remaining_time": "0:25:46"}
|
||||
{"current_steps": 3695, "total_steps": 3976, "loss": 0.1317, "lr": 6.099543650798345e-07, "epoch": 6.505281690140845, "percentage": 92.93, "elapsed_time": "5:32:59", "remaining_time": "0:25:19"}
|
||||
{"current_steps": 3700, "total_steps": 3976, "loss": 0.1331, "lr": 5.886223215412745e-07, "epoch": 6.514084507042254, "percentage": 93.06, "elapsed_time": "5:33:24", "remaining_time": "0:24:52"}
|
||||
{"current_steps": 3705, "total_steps": 3976, "loss": 0.1392, "lr": 5.676644012916654e-07, "epoch": 6.522887323943662, "percentage": 93.18, "elapsed_time": "5:33:48", "remaining_time": "0:24:24"}
|
||||
{"current_steps": 3710, "total_steps": 3976, "loss": 0.1298, "lr": 5.47081008261443e-07, "epoch": 6.53169014084507, "percentage": 93.31, "elapsed_time": "5:34:10", "remaining_time": "0:23:57"}
|
||||
{"current_steps": 3715, "total_steps": 3976, "loss": 0.1459, "lr": 5.268725391626106e-07, "epoch": 6.540492957746479, "percentage": 93.44, "elapsed_time": "5:34:38", "remaining_time": "0:23:30"}
|
||||
{"current_steps": 3720, "total_steps": 3976, "loss": 0.1256, "lr": 5.070393834811227e-07, "epoch": 6.549295774647887, "percentage": 93.56, "elapsed_time": "5:35:02", "remaining_time": "0:23:03"}
|
||||
{"current_steps": 3725, "total_steps": 3976, "loss": 0.1518, "lr": 4.875819234693669e-07, "epoch": 6.558098591549296, "percentage": 93.69, "elapsed_time": "5:35:31", "remaining_time": "0:22:36"}
|
||||
{"current_steps": 3730, "total_steps": 3976, "loss": 0.1452, "lr": 4.6850053413879384e-07, "epoch": 6.566901408450704, "percentage": 93.81, "elapsed_time": "5:36:01", "remaining_time": "0:22:09"}
|
||||
{"current_steps": 3735, "total_steps": 3976, "loss": 0.1379, "lr": 4.497955832526946e-07, "epoch": 6.575704225352113, "percentage": 93.94, "elapsed_time": "5:36:32", "remaining_time": "0:21:42"}
|
||||
{"current_steps": 3740, "total_steps": 3976, "loss": 0.1384, "lr": 4.314674313191147e-07, "epoch": 6.584507042253521, "percentage": 94.06, "elapsed_time": "5:37:02", "remaining_time": "0:21:16"}
|
||||
{"current_steps": 3745, "total_steps": 3976, "loss": 0.1281, "lr": 4.1351643158389135e-07, "epoch": 6.59330985915493, "percentage": 94.19, "elapsed_time": "5:37:25", "remaining_time": "0:20:48"}
|
||||
{"current_steps": 3750, "total_steps": 3976, "loss": 0.1572, "lr": 3.9594293002386486e-07, "epoch": 6.602112676056338, "percentage": 94.32, "elapsed_time": "5:37:52", "remaining_time": "0:20:21"}
|
||||
{"current_steps": 3755, "total_steps": 3976, "loss": 0.1353, "lr": 3.7874726534019803e-07, "epoch": 6.610915492957746, "percentage": 94.44, "elapsed_time": "5:38:13", "remaining_time": "0:19:54"}
|
||||
{"current_steps": 3760, "total_steps": 3976, "loss": 0.1434, "lr": 3.6192976895185197e-07, "epoch": 6.619718309859155, "percentage": 94.57, "elapsed_time": "5:38:41", "remaining_time": "0:19:27"}
|
||||
{"current_steps": 3765, "total_steps": 3976, "loss": 0.13, "lr": 3.454907649892003e-07, "epoch": 6.628521126760563, "percentage": 94.69, "elapsed_time": "5:39:11", "remaining_time": "0:19:00"}
|
||||
{"current_steps": 3770, "total_steps": 3976, "loss": 0.1548, "lr": 3.294305702877765e-07, "epoch": 6.637323943661972, "percentage": 94.82, "elapsed_time": "5:39:34", "remaining_time": "0:18:33"}
|
||||
{"current_steps": 3775, "total_steps": 3976, "loss": 0.1313, "lr": 3.137494943821717e-07, "epoch": 6.64612676056338, "percentage": 94.94, "elapsed_time": "5:39:58", "remaining_time": "0:18:06"}
|
||||
{"current_steps": 3780, "total_steps": 3976, "loss": 0.1403, "lr": 2.984478395000712e-07, "epoch": 6.654929577464789, "percentage": 95.07, "elapsed_time": "5:40:24", "remaining_time": "0:17:39"}
|
||||
{"current_steps": 3785, "total_steps": 3976, "loss": 0.1504, "lr": 2.835259005564184e-07, "epoch": 6.663732394366197, "percentage": 95.2, "elapsed_time": "5:40:49", "remaining_time": "0:17:11"}
|
||||
{"current_steps": 3790, "total_steps": 3976, "loss": 0.1391, "lr": 2.689839651477466e-07, "epoch": 6.672535211267606, "percentage": 95.32, "elapsed_time": "5:41:13", "remaining_time": "0:16:44"}
|
||||
{"current_steps": 3795, "total_steps": 3976, "loss": 0.1295, "lr": 2.5482231354662766e-07, "epoch": 6.681338028169014, "percentage": 95.45, "elapsed_time": "5:41:38", "remaining_time": "0:16:17"}
|
||||
{"current_steps": 3800, "total_steps": 3976, "loss": 0.1569, "lr": 2.410412186962674e-07, "epoch": 6.690140845070422, "percentage": 95.57, "elapsed_time": "5:42:00", "remaining_time": "0:15:50"}
|
||||
{"current_steps": 3805, "total_steps": 3976, "loss": 0.1441, "lr": 2.2764094620524758e-07, "epoch": 6.698943661971831, "percentage": 95.7, "elapsed_time": "5:42:22", "remaining_time": "0:15:23"}
|
||||
{"current_steps": 3810, "total_steps": 3976, "loss": 0.1537, "lr": 2.1462175434241006e-07, "epoch": 6.707746478873239, "percentage": 95.82, "elapsed_time": "5:42:52", "remaining_time": "0:14:56"}
|
||||
{"current_steps": 3815, "total_steps": 3976, "loss": 0.1312, "lr": 2.0198389403187634e-07, "epoch": 6.716549295774648, "percentage": 95.95, "elapsed_time": "5:43:21", "remaining_time": "0:14:29"}
|
||||
{"current_steps": 3820, "total_steps": 3976, "loss": 0.1348, "lr": 1.897276088482114e-07, "epoch": 6.725352112676056, "percentage": 96.08, "elapsed_time": "5:43:49", "remaining_time": "0:14:02"}
|
||||
{"current_steps": 3825, "total_steps": 3976, "loss": 0.1279, "lr": 1.778531350117274e-07, "epoch": 6.734154929577465, "percentage": 96.2, "elapsed_time": "5:44:12", "remaining_time": "0:13:35"}
|
||||
{"current_steps": 3830, "total_steps": 3976, "loss": 0.132, "lr": 1.6636070138393634e-07, "epoch": 6.742957746478873, "percentage": 96.33, "elapsed_time": "5:44:42", "remaining_time": "0:13:08"}
|
||||
{"current_steps": 3835, "total_steps": 3976, "loss": 0.1388, "lr": 1.5525052946313123e-07, "epoch": 6.751760563380282, "percentage": 96.45, "elapsed_time": "5:45:12", "remaining_time": "0:12:41"}
|
||||
{"current_steps": 3840, "total_steps": 3976, "loss": 0.1494, "lr": 1.4452283338012518e-07, "epoch": 6.76056338028169, "percentage": 96.58, "elapsed_time": "5:45:40", "remaining_time": "0:12:14"}
|
||||
{"current_steps": 3845, "total_steps": 3976, "loss": 0.1398, "lr": 1.3417781989411904e-07, "epoch": 6.769366197183099, "percentage": 96.71, "elapsed_time": "5:46:05", "remaining_time": "0:11:47"}
|
||||
{"current_steps": 3850, "total_steps": 3976, "loss": 0.142, "lr": 1.242156883887202e-07, "epoch": 6.778169014084507, "percentage": 96.83, "elapsed_time": "5:46:31", "remaining_time": "0:11:20"}
|
||||
{"current_steps": 3855, "total_steps": 3976, "loss": 0.1372, "lr": 1.1463663086809018e-07, "epoch": 6.786971830985916, "percentage": 96.96, "elapsed_time": "5:46:55", "remaining_time": "0:10:53"}
|
||||
{"current_steps": 3860, "total_steps": 3976, "loss": 0.1566, "lr": 1.0544083195326293e-07, "epoch": 6.795774647887324, "percentage": 97.08, "elapsed_time": "5:47:28", "remaining_time": "0:10:26"}
|
||||
{"current_steps": 3865, "total_steps": 3976, "loss": 0.1376, "lr": 9.662846887856792e-08, "epoch": 6.804577464788732, "percentage": 97.21, "elapsed_time": "5:47:55", "remaining_time": "0:09:59"}
|
||||
{"current_steps": 3870, "total_steps": 3976, "loss": 0.1497, "lr": 8.819971148822159e-08, "epoch": 6.813380281690141, "percentage": 97.33, "elapsed_time": "5:48:23", "remaining_time": "0:09:32"}
|
||||
{"current_steps": 3875, "total_steps": 3976, "loss": 0.1348, "lr": 8.015472223305676e-08, "epoch": 6.822183098591549, "percentage": 97.46, "elapsed_time": "5:48:49", "remaining_time": "0:09:05"}
|
||||
{"current_steps": 3880, "total_steps": 3976, "loss": 0.1358, "lr": 7.249365616738502e-08, "epoch": 6.830985915492958, "percentage": 97.59, "elapsed_time": "5:49:17", "remaining_time": "0:08:38"}
|
||||
{"current_steps": 3885, "total_steps": 3976, "loss": 0.1646, "lr": 6.521666094601475e-08, "epoch": 6.839788732394366, "percentage": 97.71, "elapsed_time": "5:49:41", "remaining_time": "0:08:11"}
|
||||
{"current_steps": 3890, "total_steps": 3976, "loss": 0.1352, "lr": 5.832387682140228e-08, "epoch": 6.848591549295775, "percentage": 97.84, "elapsed_time": "5:50:04", "remaining_time": "0:07:44"}
|
||||
{"current_steps": 3895, "total_steps": 3976, "loss": 0.1506, "lr": 5.181543664094735e-08, "epoch": 6.857394366197183, "percentage": 97.96, "elapsed_time": "5:50:32", "remaining_time": "0:07:17"}
|
||||
{"current_steps": 3900, "total_steps": 3976, "loss": 0.1341, "lr": 4.569146584443518e-08, "epoch": 6.866197183098592, "percentage": 98.09, "elapsed_time": "5:50:58", "remaining_time": "0:06:50"}
|
||||
{"current_steps": 3905, "total_steps": 3976, "loss": 0.148, "lr": 3.995208246161619e-08, "epoch": 6.875, "percentage": 98.21, "elapsed_time": "5:51:29", "remaining_time": "0:06:23"}
|
||||
{"current_steps": 3910, "total_steps": 3976, "loss": 0.1416, "lr": 3.4597397109936704e-08, "epoch": 6.883802816901408, "percentage": 98.34, "elapsed_time": "5:51:55", "remaining_time": "0:05:56"}
|
||||
{"current_steps": 3915, "total_steps": 3976, "loss": 0.1379, "lr": 2.962751299240285e-08, "epoch": 6.892605633802817, "percentage": 98.47, "elapsed_time": "5:52:21", "remaining_time": "0:05:29"}
|
||||
{"current_steps": 3920, "total_steps": 3976, "loss": 0.1238, "lr": 2.5042525895586645e-08, "epoch": 6.901408450704225, "percentage": 98.59, "elapsed_time": "5:52:44", "remaining_time": "0:05:02"}
|
||||
{"current_steps": 3925, "total_steps": 3976, "loss": 0.1281, "lr": 2.0842524187789647e-08, "epoch": 6.910211267605634, "percentage": 98.72, "elapsed_time": "5:53:10", "remaining_time": "0:04:35"}
|
||||
{"current_steps": 3930, "total_steps": 3976, "loss": 0.1484, "lr": 1.7027588817335462e-08, "epoch": 6.919014084507042, "percentage": 98.84, "elapsed_time": "5:53:35", "remaining_time": "0:04:08"}
|
||||
{"current_steps": 3935, "total_steps": 3976, "loss": 0.142, "lr": 1.3597793311004304e-08, "epoch": 6.927816901408451, "percentage": 98.97, "elapsed_time": "5:54:06", "remaining_time": "0:03:41"}
|
||||
{"current_steps": 3940, "total_steps": 3976, "loss": 0.1453, "lr": 1.0553203772627474e-08, "epoch": 6.936619718309859, "percentage": 99.09, "elapsed_time": "5:54:35", "remaining_time": "0:03:14"}
|
||||
{"current_steps": 3945, "total_steps": 3976, "loss": 0.1375, "lr": 7.89387888180171e-09, "epoch": 6.945422535211268, "percentage": 99.22, "elapsed_time": "5:55:03", "remaining_time": "0:02:47"}
|
||||
{"current_steps": 3950, "total_steps": 3976, "loss": 0.1509, "lr": 5.61986989276786e-09, "epoch": 6.954225352112676, "percentage": 99.35, "elapsed_time": "5:55:33", "remaining_time": "0:02:20"}
|
||||
{"current_steps": 3955, "total_steps": 3976, "loss": 0.1297, "lr": 3.7312206334116915e-09, "epoch": 6.963028169014084, "percentage": 99.47, "elapsed_time": "5:56:01", "remaining_time": "0:01:53"}
|
||||
{"current_steps": 3960, "total_steps": 3976, "loss": 0.1501, "lr": 2.227967504433437e-09, "epoch": 6.971830985915493, "percentage": 99.6, "elapsed_time": "5:56:26", "remaining_time": "0:01:26"}
|
||||
{"current_steps": 3965, "total_steps": 3976, "loss": 0.133, "lr": 1.1101394786350306e-09, "epoch": 6.980633802816901, "percentage": 99.72, "elapsed_time": "5:56:51", "remaining_time": "0:00:59"}
|
||||
{"current_steps": 3970, "total_steps": 3976, "loss": 0.1392, "lr": 3.777581003627795e-10, "epoch": 6.98943661971831, "percentage": 99.85, "elapsed_time": "5:57:12", "remaining_time": "0:00:32"}
|
||||
{"current_steps": 3975, "total_steps": 3976, "loss": 0.1479, "lr": 3.0837485098800245e-11, "epoch": 6.998239436619718, "percentage": 99.97, "elapsed_time": "5:57:44", "remaining_time": "0:00:05"}
|
||||
{"current_steps": 3976, "total_steps": 3976, "epoch": 7.0, "percentage": 100.0, "elapsed_time": "5:57:59", "remaining_time": "0:00:00"}
|
||||
8792
trainer_state.json
Normal file
8792
trainer_state.json
Normal file
File diff suppressed because it is too large
Load Diff
3
training_args.bin
Normal file
3
training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:1816acd6019e2cd62219c59bfa52a23220e419712f9aa6a4ee888c3b8348c0a1
|
||||
size 8593
|
||||
BIN
training_loss.png
Normal file
BIN
training_loss.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 44 KiB |
1
vocab.json
Normal file
1
vocab.json
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user