初始化项目,由ModelHub XC社区提供模型
Model: laion/dev_set_part1_10k_glm_4_7_traces_locetash Source: Original Platform
This commit is contained in:
36
.gitattributes
vendored
Normal file
36
.gitattributes
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||
61
README.md
Normal file
61
README.md
Normal file
@@ -0,0 +1,61 @@
|
||||
---
|
||||
library_name: transformers
|
||||
license: apache-2.0
|
||||
base_model: Qwen/Qwen3-8B
|
||||
tags:
|
||||
- llama-factory
|
||||
- full
|
||||
- generated_from_trainer
|
||||
model-index:
|
||||
- name: dev_set_part1_10k_glm_4_7_traces_locetash
|
||||
results: []
|
||||
---
|
||||
|
||||
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
||||
should probably proofread and complete it, then remove this comment. -->
|
||||
|
||||
# dev_set_part1_10k_glm_4_7_traces_locetash
|
||||
|
||||
This model is a fine-tuned version of [Qwen/Qwen3-8B](https://huggingface.co/Qwen/Qwen3-8B) on the DCAgent/dev_set_part1_10k_glm_4.7_traces_locetash dataset.
|
||||
|
||||
## Model description
|
||||
|
||||
More information needed
|
||||
|
||||
## Intended uses & limitations
|
||||
|
||||
More information needed
|
||||
|
||||
## Training and evaluation data
|
||||
|
||||
More information needed
|
||||
|
||||
## Training procedure
|
||||
|
||||
### Training hyperparameters
|
||||
|
||||
The following hyperparameters were used during training:
|
||||
- learning_rate: 4e-05
|
||||
- train_batch_size: 1
|
||||
- eval_batch_size: 8
|
||||
- seed: 42
|
||||
- distributed_type: multi-GPU
|
||||
- num_devices: 8
|
||||
- gradient_accumulation_steps: 2
|
||||
- total_train_batch_size: 16
|
||||
- total_eval_batch_size: 64
|
||||
- optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.98) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
||||
- lr_scheduler_type: cosine
|
||||
- lr_scheduler_warmup_ratio: 0.1
|
||||
- num_epochs: 7.0
|
||||
|
||||
### Training results
|
||||
|
||||
|
||||
|
||||
### Framework versions
|
||||
|
||||
- Transformers 4.57.6
|
||||
- Pytorch 2.9.0+cu128
|
||||
- Datasets 4.4.1
|
||||
- Tokenizers 0.22.2
|
||||
28
added_tokens.json
Normal file
28
added_tokens.json
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"</think>": 151668,
|
||||
"</tool_call>": 151658,
|
||||
"</tool_response>": 151666,
|
||||
"<think>": 151667,
|
||||
"<tool_call>": 151657,
|
||||
"<tool_response>": 151665,
|
||||
"<|box_end|>": 151649,
|
||||
"<|box_start|>": 151648,
|
||||
"<|endoftext|>": 151643,
|
||||
"<|file_sep|>": 151664,
|
||||
"<|fim_middle|>": 151660,
|
||||
"<|fim_pad|>": 151662,
|
||||
"<|fim_prefix|>": 151659,
|
||||
"<|fim_suffix|>": 151661,
|
||||
"<|im_end|>": 151645,
|
||||
"<|im_start|>": 151644,
|
||||
"<|image_pad|>": 151655,
|
||||
"<|object_ref_end|>": 151647,
|
||||
"<|object_ref_start|>": 151646,
|
||||
"<|quad_end|>": 151651,
|
||||
"<|quad_start|>": 151650,
|
||||
"<|repo_name|>": 151663,
|
||||
"<|video_pad|>": 151656,
|
||||
"<|vision_end|>": 151653,
|
||||
"<|vision_pad|>": 151654,
|
||||
"<|vision_start|>": 151652
|
||||
}
|
||||
16
all_results.json
Normal file
16
all_results.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"achieved_tflops_per_gpu": 3.7613453189499078,
|
||||
"achieved_tflops_per_gpu_theoretical": 420.1001657900728,
|
||||
"epoch": 7.0,
|
||||
"loss_nan_ranks": 0,
|
||||
"loss_rank_avg": 0.06611238420009613,
|
||||
"mfu_percent": 0.3803180302274932,
|
||||
"mfu_percent_theoretical": 42.47726651062415,
|
||||
"total_flos": 8.731129493365719e+17,
|
||||
"train_loss": 0.26536673449334647,
|
||||
"train_runtime": 29015.9795,
|
||||
"train_samples_per_second": 1.978,
|
||||
"train_steps_per_second": 0.124,
|
||||
"valid_targets_mean": 1762.9,
|
||||
"valid_targets_min": 374
|
||||
}
|
||||
89
chat_template.jinja
Normal file
89
chat_template.jinja
Normal file
@@ -0,0 +1,89 @@
|
||||
{%- if tools %}
|
||||
{{- '<|im_start|>system\n' }}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- messages[0].content + '\n\n' }}
|
||||
{%- endif %}
|
||||
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
||||
{%- for tool in tools %}
|
||||
{{- "\n" }}
|
||||
{{- tool | tojson }}
|
||||
{%- endfor %}
|
||||
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
||||
{%- else %}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
|
||||
{%- for message in messages[::-1] %}
|
||||
{%- set index = (messages|length - 1) - loop.index0 %}
|
||||
{%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
|
||||
{%- set ns.multi_step_tool = false %}
|
||||
{%- set ns.last_query_index = index %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- for message in messages %}
|
||||
{%- if message.content is string %}
|
||||
{%- set content = message.content %}
|
||||
{%- else %}
|
||||
{%- set content = '' %}
|
||||
{%- endif %}
|
||||
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
|
||||
{%- elif message.role == "assistant" %}
|
||||
{%- set reasoning_content = '' %}
|
||||
{%- if message.reasoning_content is string %}
|
||||
{%- set reasoning_content = message.reasoning_content %}
|
||||
{%- else %}
|
||||
{%- if '</think>' in content %}
|
||||
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
||||
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- if loop.index0 > ns.last_query_index %}
|
||||
{%- if loop.last or (not loop.last and reasoning_content) %}
|
||||
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- if message.tool_calls %}
|
||||
{%- for tool_call in message.tool_calls %}
|
||||
{%- if (loop.first and content) or (not loop.first) %}
|
||||
{{- '\n' }}
|
||||
{%- endif %}
|
||||
{%- if tool_call.function %}
|
||||
{%- set tool_call = tool_call.function %}
|
||||
{%- endif %}
|
||||
{{- '<tool_call>\n{"name": "' }}
|
||||
{{- tool_call.name }}
|
||||
{{- '", "arguments": ' }}
|
||||
{%- if tool_call.arguments is string %}
|
||||
{{- tool_call.arguments }}
|
||||
{%- else %}
|
||||
{{- tool_call.arguments | tojson }}
|
||||
{%- endif %}
|
||||
{{- '}\n</tool_call>' }}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- elif message.role == "tool" %}
|
||||
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
||||
{{- '<|im_start|>user' }}
|
||||
{%- endif %}
|
||||
{{- '\n<tool_response>\n' }}
|
||||
{{- content }}
|
||||
{{- '\n</tool_response>' }}
|
||||
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- if add_generation_prompt %}
|
||||
{{- '<|im_start|>assistant\n' }}
|
||||
{%- if enable_thinking is defined and enable_thinking is false %}
|
||||
{{- '<think>\n\n</think>\n\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
68
config.json
Normal file
68
config.json
Normal file
@@ -0,0 +1,68 @@
|
||||
{
|
||||
"architectures": [
|
||||
"Qwen3ForCausalLM"
|
||||
],
|
||||
"attention_bias": false,
|
||||
"attention_dropout": 0.0,
|
||||
"dtype": "bfloat16",
|
||||
"eos_token_id": 151645,
|
||||
"head_dim": 128,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 4096,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 12288,
|
||||
"layer_types": [
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention"
|
||||
],
|
||||
"max_position_embeddings": 40960,
|
||||
"max_window_layers": 36,
|
||||
"model_type": "qwen3",
|
||||
"num_attention_heads": 32,
|
||||
"num_hidden_layers": 36,
|
||||
"num_key_value_heads": 8,
|
||||
"pad_token_id": 151643,
|
||||
"rms_norm_eps": 1e-06,
|
||||
"rope_scaling": null,
|
||||
"rope_theta": 1000000,
|
||||
"sliding_window": null,
|
||||
"tie_word_embeddings": false,
|
||||
"transformers_version": "4.57.6",
|
||||
"use_cache": false,
|
||||
"use_sliding_window": false,
|
||||
"vocab_size": 151936
|
||||
}
|
||||
12
generation_config.json
Normal file
12
generation_config.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"do_sample": true,
|
||||
"eos_token_id": [
|
||||
151645,
|
||||
151643
|
||||
],
|
||||
"pad_token_id": 151643,
|
||||
"temperature": 0.6,
|
||||
"top_k": 20,
|
||||
"top_p": 0.95,
|
||||
"transformers_version": "4.57.6"
|
||||
}
|
||||
151388
merges.txt
Normal file
151388
merges.txt
Normal file
File diff suppressed because it is too large
Load Diff
3
model-00001-of-00004.safetensors
Normal file
3
model-00001-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b812cc9c2ca11be19e0299401cfec9f708ef216d985a995ff56e0352f49f7d56
|
||||
size 4902257696
|
||||
3
model-00002-of-00004.safetensors
Normal file
3
model-00002-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e3de271f96100be3cd9b5d89642d72e05fe4b49b0f034fab44f4e1477bd3bcd1
|
||||
size 4915960368
|
||||
3
model-00003-of-00004.safetensors
Normal file
3
model-00003-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:5e868c9fb20082ed55b4bf8de403870939cc42d489c02be627a360c72022cf65
|
||||
size 4983068496
|
||||
3
model-00004-of-00004.safetensors
Normal file
3
model-00004-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d5fc487df53121050059bf9bfd5c8ee67d575555246bf78cec5c0f5023ee52eb
|
||||
size 1580230264
|
||||
407
model.safetensors.index.json
Normal file
407
model.safetensors.index.json
Normal file
@@ -0,0 +1,407 @@
|
||||
{
|
||||
"metadata": {
|
||||
"total_parameters": 308224,
|
||||
"total_size": 16381470720
|
||||
},
|
||||
"weight_map": {
|
||||
"lm_head.weight": "model-00004-of-00004.safetensors",
|
||||
"model.embed_tokens.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.35.input_layernorm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.q_norm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.norm.weight": "model-00004-of-00004.safetensors"
|
||||
}
|
||||
}
|
||||
12
run_summary.json
Normal file
12
run_summary.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"agent_name": "dev_set_part1_10k_glm_4.7_traces_locetash",
|
||||
"training_start": null,
|
||||
"training_end": null,
|
||||
"created_by": "DCAgent",
|
||||
"base_model_name": "Qwen/Qwen3-8B",
|
||||
"dataset_name": "DCAgent/dev_set_part1_10k_glm_4.7_traces_locetash",
|
||||
"training_type": "SFT",
|
||||
"training_parameters": "https://huggingface.co/laion/dev_set_part1_10k_glm_4_7_traces_locetash/blob/main/config.json",
|
||||
"wandb_link": "https://wandb.ai/dogml/OpenThoughts-Agent/runs/sft_hf-hub-private_False_dev_set_part1_10k_glm_4-7_traces_locetash_Qwen3-8B",
|
||||
"traces_location_s3": null
|
||||
}
|
||||
31
special_tokens_map.json
Normal file
31
special_tokens_map.json
Normal file
@@ -0,0 +1,31 @@
|
||||
{
|
||||
"additional_special_tokens": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|object_ref_start|>",
|
||||
"<|object_ref_end|>",
|
||||
"<|box_start|>",
|
||||
"<|box_end|>",
|
||||
"<|quad_start|>",
|
||||
"<|quad_end|>",
|
||||
"<|vision_start|>",
|
||||
"<|vision_end|>",
|
||||
"<|vision_pad|>",
|
||||
"<|image_pad|>",
|
||||
"<|video_pad|>"
|
||||
],
|
||||
"eos_token": {
|
||||
"content": "<|im_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"pad_token": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
}
|
||||
}
|
||||
BIN
tokenizer.json
(Stored with Git LFS)
Normal file
BIN
tokenizer.json
(Stored with Git LFS)
Normal file
Binary file not shown.
240
tokenizer_config.json
Normal file
240
tokenizer_config.json
Normal file
@@ -0,0 +1,240 @@
|
||||
{
|
||||
"add_bos_token": false,
|
||||
"add_prefix_space": false,
|
||||
"added_tokens_decoder": {
|
||||
"151643": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151644": {
|
||||
"content": "<|im_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151645": {
|
||||
"content": "<|im_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151646": {
|
||||
"content": "<|object_ref_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151647": {
|
||||
"content": "<|object_ref_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151648": {
|
||||
"content": "<|box_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151649": {
|
||||
"content": "<|box_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151650": {
|
||||
"content": "<|quad_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151651": {
|
||||
"content": "<|quad_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151652": {
|
||||
"content": "<|vision_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151653": {
|
||||
"content": "<|vision_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151654": {
|
||||
"content": "<|vision_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151655": {
|
||||
"content": "<|image_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151656": {
|
||||
"content": "<|video_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151657": {
|
||||
"content": "<tool_call>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151658": {
|
||||
"content": "</tool_call>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151659": {
|
||||
"content": "<|fim_prefix|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151660": {
|
||||
"content": "<|fim_middle|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151661": {
|
||||
"content": "<|fim_suffix|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151662": {
|
||||
"content": "<|fim_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151663": {
|
||||
"content": "<|repo_name|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151664": {
|
||||
"content": "<|file_sep|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151665": {
|
||||
"content": "<tool_response>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151666": {
|
||||
"content": "</tool_response>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151667": {
|
||||
"content": "<think>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151668": {
|
||||
"content": "</think>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
}
|
||||
},
|
||||
"additional_special_tokens": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|object_ref_start|>",
|
||||
"<|object_ref_end|>",
|
||||
"<|box_start|>",
|
||||
"<|box_end|>",
|
||||
"<|quad_start|>",
|
||||
"<|quad_end|>",
|
||||
"<|vision_start|>",
|
||||
"<|vision_end|>",
|
||||
"<|vision_pad|>",
|
||||
"<|image_pad|>",
|
||||
"<|video_pad|>"
|
||||
],
|
||||
"bos_token": null,
|
||||
"clean_up_tokenization_spaces": false,
|
||||
"eos_token": "<|im_end|>",
|
||||
"errors": "replace",
|
||||
"extra_special_tokens": {},
|
||||
"model_max_length": 32768,
|
||||
"pad_token": "<|endoftext|>",
|
||||
"padding_side": "right",
|
||||
"split_special_tokens": false,
|
||||
"tokenizer_class": "Qwen2Tokenizer",
|
||||
"unk_token": null
|
||||
}
|
||||
16
train_results.json
Normal file
16
train_results.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"achieved_tflops_per_gpu": 3.7613453189499078,
|
||||
"achieved_tflops_per_gpu_theoretical": 420.1001657900728,
|
||||
"epoch": 7.0,
|
||||
"loss_nan_ranks": 0,
|
||||
"loss_rank_avg": 0.06611238420009613,
|
||||
"mfu_percent": 0.3803180302274932,
|
||||
"mfu_percent_theoretical": 42.47726651062415,
|
||||
"total_flos": 8.731129493365719e+17,
|
||||
"train_loss": 0.26536673449334647,
|
||||
"train_runtime": 29015.9795,
|
||||
"train_samples_per_second": 1.978,
|
||||
"train_steps_per_second": 0.124,
|
||||
"valid_targets_mean": 1762.9,
|
||||
"valid_targets_min": 374
|
||||
}
|
||||
719
trainer_log.jsonl
Normal file
719
trainer_log.jsonl
Normal file
@@ -0,0 +1,719 @@
|
||||
{"current_steps": 5, "total_steps": 3591, "loss": 0.7789, "lr": 4.444444444444445e-07, "epoch": 0.009746588693957114, "percentage": 0.14, "elapsed_time": "0:00:55", "remaining_time": "11:08:07"}
|
||||
{"current_steps": 10, "total_steps": 3591, "loss": 0.746, "lr": 1.0000000000000002e-06, "epoch": 0.01949317738791423, "percentage": 0.28, "elapsed_time": "0:01:33", "remaining_time": "9:16:04"}
|
||||
{"current_steps": 15, "total_steps": 3591, "loss": 0.7012, "lr": 1.5555555555555558e-06, "epoch": 0.029239766081871343, "percentage": 0.42, "elapsed_time": "0:02:12", "remaining_time": "8:47:29"}
|
||||
{"current_steps": 20, "total_steps": 3591, "loss": 0.6875, "lr": 2.1111111111111114e-06, "epoch": 0.03898635477582846, "percentage": 0.56, "elapsed_time": "0:02:59", "remaining_time": "8:53:20"}
|
||||
{"current_steps": 25, "total_steps": 3591, "loss": 0.6991, "lr": 2.666666666666667e-06, "epoch": 0.04873294346978557, "percentage": 0.7, "elapsed_time": "0:03:33", "remaining_time": "8:27:41"}
|
||||
{"current_steps": 30, "total_steps": 3591, "loss": 0.7086, "lr": 3.2222222222222227e-06, "epoch": 0.05847953216374269, "percentage": 0.84, "elapsed_time": "0:04:19", "remaining_time": "8:32:36"}
|
||||
{"current_steps": 35, "total_steps": 3591, "loss": 0.6083, "lr": 3.777777777777778e-06, "epoch": 0.0682261208576998, "percentage": 0.97, "elapsed_time": "0:05:04", "remaining_time": "8:35:12"}
|
||||
{"current_steps": 40, "total_steps": 3591, "loss": 0.5703, "lr": 4.333333333333334e-06, "epoch": 0.07797270955165692, "percentage": 1.11, "elapsed_time": "0:05:42", "remaining_time": "8:26:37"}
|
||||
{"current_steps": 45, "total_steps": 3591, "loss": 0.571, "lr": 4.888888888888889e-06, "epoch": 0.08771929824561403, "percentage": 1.25, "elapsed_time": "0:06:28", "remaining_time": "8:29:47"}
|
||||
{"current_steps": 50, "total_steps": 3591, "loss": 0.5773, "lr": 5.444444444444445e-06, "epoch": 0.09746588693957114, "percentage": 1.39, "elapsed_time": "0:07:07", "remaining_time": "8:25:04"}
|
||||
{"current_steps": 55, "total_steps": 3591, "loss": 0.5491, "lr": 6e-06, "epoch": 0.10721247563352826, "percentage": 1.53, "elapsed_time": "0:07:49", "remaining_time": "8:23:14"}
|
||||
{"current_steps": 60, "total_steps": 3591, "loss": 0.5408, "lr": 6.555555555555556e-06, "epoch": 0.11695906432748537, "percentage": 1.67, "elapsed_time": "0:08:33", "remaining_time": "8:23:19"}
|
||||
{"current_steps": 65, "total_steps": 3591, "loss": 0.5643, "lr": 7.111111111111112e-06, "epoch": 0.1267056530214425, "percentage": 1.81, "elapsed_time": "0:09:14", "remaining_time": "8:21:39"}
|
||||
{"current_steps": 70, "total_steps": 3591, "loss": 0.5033, "lr": 7.666666666666667e-06, "epoch": 0.1364522417153996, "percentage": 1.95, "elapsed_time": "0:10:03", "remaining_time": "8:26:13"}
|
||||
{"current_steps": 75, "total_steps": 3591, "loss": 0.5519, "lr": 8.222222222222222e-06, "epoch": 0.14619883040935672, "percentage": 2.09, "elapsed_time": "0:10:53", "remaining_time": "8:30:32"}
|
||||
{"current_steps": 80, "total_steps": 3591, "loss": 0.5363, "lr": 8.777777777777778e-06, "epoch": 0.15594541910331383, "percentage": 2.23, "elapsed_time": "0:11:37", "remaining_time": "8:30:29"}
|
||||
{"current_steps": 85, "total_steps": 3591, "loss": 0.5107, "lr": 9.333333333333334e-06, "epoch": 0.16569200779727095, "percentage": 2.37, "elapsed_time": "0:12:22", "remaining_time": "8:30:23"}
|
||||
{"current_steps": 90, "total_steps": 3591, "loss": 0.434, "lr": 9.88888888888889e-06, "epoch": 0.17543859649122806, "percentage": 2.51, "elapsed_time": "0:13:04", "remaining_time": "8:28:20"}
|
||||
{"current_steps": 95, "total_steps": 3591, "loss": 0.4961, "lr": 1.0444444444444445e-05, "epoch": 0.18518518518518517, "percentage": 2.65, "elapsed_time": "0:13:51", "remaining_time": "8:30:06"}
|
||||
{"current_steps": 100, "total_steps": 3591, "loss": 0.4412, "lr": 1.1000000000000001e-05, "epoch": 0.1949317738791423, "percentage": 2.78, "elapsed_time": "0:14:38", "remaining_time": "8:31:01"}
|
||||
{"current_steps": 105, "total_steps": 3591, "loss": 0.5423, "lr": 1.1555555555555556e-05, "epoch": 0.2046783625730994, "percentage": 2.92, "elapsed_time": "0:15:25", "remaining_time": "8:32:18"}
|
||||
{"current_steps": 110, "total_steps": 3591, "loss": 0.4743, "lr": 1.211111111111111e-05, "epoch": 0.21442495126705652, "percentage": 3.06, "elapsed_time": "0:16:05", "remaining_time": "8:29:09"}
|
||||
{"current_steps": 115, "total_steps": 3591, "loss": 0.437, "lr": 1.2666666666666667e-05, "epoch": 0.22417153996101363, "percentage": 3.2, "elapsed_time": "0:16:51", "remaining_time": "8:29:27"}
|
||||
{"current_steps": 120, "total_steps": 3591, "loss": 0.4986, "lr": 1.3222222222222223e-05, "epoch": 0.23391812865497075, "percentage": 3.34, "elapsed_time": "0:17:31", "remaining_time": "8:26:46"}
|
||||
{"current_steps": 125, "total_steps": 3591, "loss": 0.4542, "lr": 1.377777777777778e-05, "epoch": 0.24366471734892786, "percentage": 3.48, "elapsed_time": "0:18:08", "remaining_time": "8:22:50"}
|
||||
{"current_steps": 130, "total_steps": 3591, "loss": 0.4538, "lr": 1.4333333333333334e-05, "epoch": 0.253411306042885, "percentage": 3.62, "elapsed_time": "0:18:49", "remaining_time": "8:21:18"}
|
||||
{"current_steps": 135, "total_steps": 3591, "loss": 0.4356, "lr": 1.488888888888889e-05, "epoch": 0.2631578947368421, "percentage": 3.76, "elapsed_time": "0:19:24", "remaining_time": "8:16:41"}
|
||||
{"current_steps": 140, "total_steps": 3591, "loss": 0.4952, "lr": 1.5444444444444446e-05, "epoch": 0.2729044834307992, "percentage": 3.9, "elapsed_time": "0:19:58", "remaining_time": "8:12:16"}
|
||||
{"current_steps": 145, "total_steps": 3591, "loss": 0.4124, "lr": 1.6000000000000003e-05, "epoch": 0.2826510721247563, "percentage": 4.04, "elapsed_time": "0:20:48", "remaining_time": "8:14:20"}
|
||||
{"current_steps": 150, "total_steps": 3591, "loss": 0.4608, "lr": 1.6555555555555556e-05, "epoch": 0.29239766081871343, "percentage": 4.18, "elapsed_time": "0:21:25", "remaining_time": "8:11:34"}
|
||||
{"current_steps": 155, "total_steps": 3591, "loss": 0.3841, "lr": 1.7111111111111112e-05, "epoch": 0.30214424951267055, "percentage": 4.32, "elapsed_time": "0:22:05", "remaining_time": "8:09:36"}
|
||||
{"current_steps": 160, "total_steps": 3591, "loss": 0.4672, "lr": 1.7666666666666668e-05, "epoch": 0.31189083820662766, "percentage": 4.46, "elapsed_time": "0:22:41", "remaining_time": "8:06:31"}
|
||||
{"current_steps": 165, "total_steps": 3591, "loss": 0.4204, "lr": 1.8222222222222224e-05, "epoch": 0.3216374269005848, "percentage": 4.59, "elapsed_time": "0:23:24", "remaining_time": "8:05:58"}
|
||||
{"current_steps": 170, "total_steps": 3591, "loss": 0.4349, "lr": 1.877777777777778e-05, "epoch": 0.3313840155945419, "percentage": 4.73, "elapsed_time": "0:23:59", "remaining_time": "8:02:47"}
|
||||
{"current_steps": 175, "total_steps": 3591, "loss": 0.3493, "lr": 1.9333333333333333e-05, "epoch": 0.341130604288499, "percentage": 4.87, "elapsed_time": "0:24:51", "remaining_time": "8:05:16"}
|
||||
{"current_steps": 180, "total_steps": 3591, "loss": 0.3674, "lr": 1.988888888888889e-05, "epoch": 0.3508771929824561, "percentage": 5.01, "elapsed_time": "0:25:36", "remaining_time": "8:05:16"}
|
||||
{"current_steps": 185, "total_steps": 3591, "loss": 0.4434, "lr": 2.0444444444444446e-05, "epoch": 0.36062378167641324, "percentage": 5.15, "elapsed_time": "0:26:22", "remaining_time": "8:05:38"}
|
||||
{"current_steps": 190, "total_steps": 3591, "loss": 0.3945, "lr": 2.1000000000000002e-05, "epoch": 0.37037037037037035, "percentage": 5.29, "elapsed_time": "0:27:03", "remaining_time": "8:04:23"}
|
||||
{"current_steps": 195, "total_steps": 3591, "loss": 0.4804, "lr": 2.1555555555555555e-05, "epoch": 0.38011695906432746, "percentage": 5.43, "elapsed_time": "0:27:43", "remaining_time": "8:02:53"}
|
||||
{"current_steps": 200, "total_steps": 3591, "loss": 0.3385, "lr": 2.2111111111111115e-05, "epoch": 0.3898635477582846, "percentage": 5.57, "elapsed_time": "0:28:26", "remaining_time": "8:02:15"}
|
||||
{"current_steps": 205, "total_steps": 3591, "loss": 0.4083, "lr": 2.2666666666666668e-05, "epoch": 0.3996101364522417, "percentage": 5.71, "elapsed_time": "0:29:04", "remaining_time": "8:00:14"}
|
||||
{"current_steps": 210, "total_steps": 3591, "loss": 0.4086, "lr": 2.3222222222222227e-05, "epoch": 0.4093567251461988, "percentage": 5.85, "elapsed_time": "0:29:56", "remaining_time": "8:02:09"}
|
||||
{"current_steps": 215, "total_steps": 3591, "loss": 0.4115, "lr": 2.377777777777778e-05, "epoch": 0.4191033138401559, "percentage": 5.99, "elapsed_time": "0:30:37", "remaining_time": "8:00:59"}
|
||||
{"current_steps": 220, "total_steps": 3591, "loss": 0.4103, "lr": 2.4333333333333333e-05, "epoch": 0.42884990253411304, "percentage": 6.13, "elapsed_time": "0:31:17", "remaining_time": "7:59:29"}
|
||||
{"current_steps": 225, "total_steps": 3591, "loss": 0.3682, "lr": 2.4888888888888893e-05, "epoch": 0.43859649122807015, "percentage": 6.27, "elapsed_time": "0:31:52", "remaining_time": "7:56:53"}
|
||||
{"current_steps": 230, "total_steps": 3591, "loss": 0.3898, "lr": 2.5444444444444446e-05, "epoch": 0.44834307992202727, "percentage": 6.4, "elapsed_time": "0:32:32", "remaining_time": "7:55:26"}
|
||||
{"current_steps": 235, "total_steps": 3591, "loss": 0.4194, "lr": 2.6000000000000002e-05, "epoch": 0.4580896686159844, "percentage": 6.54, "elapsed_time": "0:33:12", "remaining_time": "7:54:20"}
|
||||
{"current_steps": 240, "total_steps": 3591, "loss": 0.3707, "lr": 2.6555555555555558e-05, "epoch": 0.4678362573099415, "percentage": 6.68, "elapsed_time": "0:33:49", "remaining_time": "7:52:20"}
|
||||
{"current_steps": 245, "total_steps": 3591, "loss": 0.3706, "lr": 2.7111111111111114e-05, "epoch": 0.4775828460038986, "percentage": 6.82, "elapsed_time": "0:34:29", "remaining_time": "7:51:07"}
|
||||
{"current_steps": 250, "total_steps": 3591, "loss": 0.3923, "lr": 2.7666666666666667e-05, "epoch": 0.4873294346978557, "percentage": 6.96, "elapsed_time": "0:35:14", "remaining_time": "7:51:04"}
|
||||
{"current_steps": 255, "total_steps": 3591, "loss": 0.3818, "lr": 2.8222222222222227e-05, "epoch": 0.49707602339181284, "percentage": 7.1, "elapsed_time": "0:35:49", "remaining_time": "7:48:44"}
|
||||
{"current_steps": 260, "total_steps": 3591, "loss": 0.3736, "lr": 2.877777777777778e-05, "epoch": 0.50682261208577, "percentage": 7.24, "elapsed_time": "0:36:27", "remaining_time": "7:47:07"}
|
||||
{"current_steps": 265, "total_steps": 3591, "loss": 0.3376, "lr": 2.9333333333333333e-05, "epoch": 0.5165692007797271, "percentage": 7.38, "elapsed_time": "0:37:05", "remaining_time": "7:45:36"}
|
||||
{"current_steps": 270, "total_steps": 3591, "loss": 0.3613, "lr": 2.9888888888888892e-05, "epoch": 0.5263157894736842, "percentage": 7.52, "elapsed_time": "0:37:37", "remaining_time": "7:42:42"}
|
||||
{"current_steps": 275, "total_steps": 3591, "loss": 0.3527, "lr": 3.0444444444444445e-05, "epoch": 0.5360623781676414, "percentage": 7.66, "elapsed_time": "0:38:20", "remaining_time": "7:42:18"}
|
||||
{"current_steps": 280, "total_steps": 3591, "loss": 0.3783, "lr": 3.1e-05, "epoch": 0.5458089668615984, "percentage": 7.8, "elapsed_time": "0:39:05", "remaining_time": "7:42:14"}
|
||||
{"current_steps": 285, "total_steps": 3591, "loss": 0.3255, "lr": 3.155555555555556e-05, "epoch": 0.5555555555555556, "percentage": 7.94, "elapsed_time": "0:39:54", "remaining_time": "7:42:57"}
|
||||
{"current_steps": 290, "total_steps": 3591, "loss": 0.3229, "lr": 3.2111111111111114e-05, "epoch": 0.5653021442495126, "percentage": 8.08, "elapsed_time": "0:40:39", "remaining_time": "7:42:45"}
|
||||
{"current_steps": 295, "total_steps": 3591, "loss": 0.3401, "lr": 3.266666666666667e-05, "epoch": 0.5750487329434698, "percentage": 8.21, "elapsed_time": "0:41:19", "remaining_time": "7:41:40"}
|
||||
{"current_steps": 300, "total_steps": 3591, "loss": 0.2983, "lr": 3.3222222222222226e-05, "epoch": 0.5847953216374269, "percentage": 8.35, "elapsed_time": "0:41:55", "remaining_time": "7:39:59"}
|
||||
{"current_steps": 305, "total_steps": 3591, "loss": 0.3777, "lr": 3.377777777777778e-05, "epoch": 0.594541910331384, "percentage": 8.49, "elapsed_time": "0:42:37", "remaining_time": "7:39:10"}
|
||||
{"current_steps": 310, "total_steps": 3591, "loss": 0.3801, "lr": 3.433333333333333e-05, "epoch": 0.6042884990253411, "percentage": 8.63, "elapsed_time": "0:43:18", "remaining_time": "7:38:17"}
|
||||
{"current_steps": 315, "total_steps": 3591, "loss": 0.3851, "lr": 3.4888888888888895e-05, "epoch": 0.6140350877192983, "percentage": 8.77, "elapsed_time": "0:43:55", "remaining_time": "7:36:53"}
|
||||
{"current_steps": 320, "total_steps": 3591, "loss": 0.3639, "lr": 3.5444444444444445e-05, "epoch": 0.6237816764132553, "percentage": 8.91, "elapsed_time": "0:44:34", "remaining_time": "7:35:33"}
|
||||
{"current_steps": 325, "total_steps": 3591, "loss": 0.3915, "lr": 3.6e-05, "epoch": 0.6335282651072125, "percentage": 9.05, "elapsed_time": "0:45:16", "remaining_time": "7:34:59"}
|
||||
{"current_steps": 330, "total_steps": 3591, "loss": 0.3845, "lr": 3.655555555555556e-05, "epoch": 0.6432748538011696, "percentage": 9.19, "elapsed_time": "0:46:03", "remaining_time": "7:35:12"}
|
||||
{"current_steps": 335, "total_steps": 3591, "loss": 0.3501, "lr": 3.7111111111111113e-05, "epoch": 0.6530214424951267, "percentage": 9.33, "elapsed_time": "0:46:44", "remaining_time": "7:34:18"}
|
||||
{"current_steps": 340, "total_steps": 3591, "loss": 0.3063, "lr": 3.766666666666667e-05, "epoch": 0.6627680311890838, "percentage": 9.47, "elapsed_time": "0:47:24", "remaining_time": "7:33:13"}
|
||||
{"current_steps": 345, "total_steps": 3591, "loss": 0.4012, "lr": 3.8222222222222226e-05, "epoch": 0.672514619883041, "percentage": 9.61, "elapsed_time": "0:48:07", "remaining_time": "7:32:49"}
|
||||
{"current_steps": 350, "total_steps": 3591, "loss": 0.2991, "lr": 3.877777777777778e-05, "epoch": 0.682261208576998, "percentage": 9.75, "elapsed_time": "0:48:56", "remaining_time": "7:33:08"}
|
||||
{"current_steps": 355, "total_steps": 3591, "loss": 0.3585, "lr": 3.933333333333333e-05, "epoch": 0.6920077972709552, "percentage": 9.89, "elapsed_time": "0:49:32", "remaining_time": "7:31:36"}
|
||||
{"current_steps": 360, "total_steps": 3591, "loss": 0.3567, "lr": 3.9888888888888895e-05, "epoch": 0.7017543859649122, "percentage": 10.03, "elapsed_time": "0:50:18", "remaining_time": "7:31:31"}
|
||||
{"current_steps": 365, "total_steps": 3591, "loss": 0.3417, "lr": 3.999984873262707e-05, "epoch": 0.7115009746588694, "percentage": 10.16, "elapsed_time": "0:51:05", "remaining_time": "7:31:30"}
|
||||
{"current_steps": 370, "total_steps": 3591, "loss": 0.3401, "lr": 3.999923421284616e-05, "epoch": 0.7212475633528265, "percentage": 10.3, "elapsed_time": "0:51:44", "remaining_time": "7:30:24"}
|
||||
{"current_steps": 375, "total_steps": 3591, "loss": 0.3453, "lr": 3.99981470009598e-05, "epoch": 0.7309941520467836, "percentage": 10.44, "elapsed_time": "0:52:28", "remaining_time": "7:30:03"}
|
||||
{"current_steps": 380, "total_steps": 3591, "loss": 0.3415, "lr": 3.999658712266477e-05, "epoch": 0.7407407407407407, "percentage": 10.58, "elapsed_time": "0:53:11", "remaining_time": "7:29:26"}
|
||||
{"current_steps": 385, "total_steps": 3591, "loss": 0.3542, "lr": 3.999455461482961e-05, "epoch": 0.7504873294346979, "percentage": 10.72, "elapsed_time": "0:54:01", "remaining_time": "7:29:50"}
|
||||
{"current_steps": 390, "total_steps": 3591, "loss": 0.3406, "lr": 3.999204952549368e-05, "epoch": 0.7602339181286549, "percentage": 10.86, "elapsed_time": "0:54:39", "remaining_time": "7:28:35"}
|
||||
{"current_steps": 395, "total_steps": 3591, "loss": 0.3332, "lr": 3.998907191386603e-05, "epoch": 0.7699805068226121, "percentage": 11.0, "elapsed_time": "0:55:15", "remaining_time": "7:27:04"}
|
||||
{"current_steps": 400, "total_steps": 3591, "loss": 0.344, "lr": 3.998562185032404e-05, "epoch": 0.7797270955165692, "percentage": 11.14, "elapsed_time": "0:55:54", "remaining_time": "7:26:02"}
|
||||
{"current_steps": 405, "total_steps": 3591, "loss": 0.3186, "lr": 3.998169941641171e-05, "epoch": 0.7894736842105263, "percentage": 11.28, "elapsed_time": "0:56:39", "remaining_time": "7:25:39"}
|
||||
{"current_steps": 410, "total_steps": 3591, "loss": 0.3039, "lr": 3.997730470483779e-05, "epoch": 0.7992202729044834, "percentage": 11.42, "elapsed_time": "0:57:20", "remaining_time": "7:24:49"}
|
||||
{"current_steps": 415, "total_steps": 3591, "loss": 0.4332, "lr": 3.9972437819473516e-05, "epoch": 0.8089668615984406, "percentage": 11.56, "elapsed_time": "0:57:57", "remaining_time": "7:23:33"}
|
||||
{"current_steps": 420, "total_steps": 3591, "loss": 0.3026, "lr": 3.996709887535021e-05, "epoch": 0.8187134502923976, "percentage": 11.7, "elapsed_time": "0:58:34", "remaining_time": "7:22:17"}
|
||||
{"current_steps": 425, "total_steps": 3591, "loss": 0.3121, "lr": 3.996128799865654e-05, "epoch": 0.8284600389863548, "percentage": 11.84, "elapsed_time": "0:59:16", "remaining_time": "7:21:32"}
|
||||
{"current_steps": 430, "total_steps": 3591, "loss": 0.3458, "lr": 3.995500532673553e-05, "epoch": 0.8382066276803118, "percentage": 11.97, "elapsed_time": "1:00:01", "remaining_time": "7:21:13"}
|
||||
{"current_steps": 435, "total_steps": 3591, "loss": 0.3743, "lr": 3.994825100808136e-05, "epoch": 0.847953216374269, "percentage": 12.11, "elapsed_time": "1:00:43", "remaining_time": "7:20:34"}
|
||||
{"current_steps": 440, "total_steps": 3591, "loss": 0.3362, "lr": 3.994102520233578e-05, "epoch": 0.8576998050682261, "percentage": 12.25, "elapsed_time": "1:01:23", "remaining_time": "7:19:39"}
|
||||
{"current_steps": 445, "total_steps": 3591, "loss": 0.3027, "lr": 3.993332808028441e-05, "epoch": 0.8674463937621832, "percentage": 12.39, "elapsed_time": "1:02:05", "remaining_time": "7:18:54"}
|
||||
{"current_steps": 450, "total_steps": 3591, "loss": 0.3155, "lr": 3.992515982385264e-05, "epoch": 0.8771929824561403, "percentage": 12.53, "elapsed_time": "1:02:44", "remaining_time": "7:17:54"}
|
||||
{"current_steps": 455, "total_steps": 3591, "loss": 0.3385, "lr": 3.991652062610139e-05, "epoch": 0.8869395711500975, "percentage": 12.67, "elapsed_time": "1:03:27", "remaining_time": "7:17:23"}
|
||||
{"current_steps": 460, "total_steps": 3591, "loss": 0.3964, "lr": 3.99074106912225e-05, "epoch": 0.8966861598440545, "percentage": 12.81, "elapsed_time": "1:04:07", "remaining_time": "7:16:27"}
|
||||
{"current_steps": 465, "total_steps": 3591, "loss": 0.3059, "lr": 3.989783023453394e-05, "epoch": 0.9064327485380117, "percentage": 12.95, "elapsed_time": "1:04:53", "remaining_time": "7:16:14"}
|
||||
{"current_steps": 470, "total_steps": 3591, "loss": 0.2936, "lr": 3.9887779482474694e-05, "epoch": 0.9161793372319688, "percentage": 13.09, "elapsed_time": "1:05:45", "remaining_time": "7:16:37"}
|
||||
{"current_steps": 475, "total_steps": 3591, "loss": 0.2974, "lr": 3.98772586725994e-05, "epoch": 0.9259259259259259, "percentage": 13.23, "elapsed_time": "1:06:28", "remaining_time": "7:16:06"}
|
||||
{"current_steps": 480, "total_steps": 3591, "loss": 0.3414, "lr": 3.986626805357277e-05, "epoch": 0.935672514619883, "percentage": 13.37, "elapsed_time": "1:07:14", "remaining_time": "7:15:50"}
|
||||
{"current_steps": 485, "total_steps": 3591, "loss": 0.3312, "lr": 3.98548078851637e-05, "epoch": 0.9454191033138402, "percentage": 13.51, "elapsed_time": "1:08:01", "remaining_time": "7:15:36"}
|
||||
{"current_steps": 490, "total_steps": 3591, "loss": 0.278, "lr": 3.9842878438239123e-05, "epoch": 0.9551656920077972, "percentage": 13.65, "elapsed_time": "1:08:36", "remaining_time": "7:14:08"}
|
||||
{"current_steps": 495, "total_steps": 3591, "loss": 0.3103, "lr": 3.9830479994757594e-05, "epoch": 0.9649122807017544, "percentage": 13.78, "elapsed_time": "1:09:09", "remaining_time": "7:12:35"}
|
||||
{"current_steps": 500, "total_steps": 3591, "loss": 0.3179, "lr": 3.981761284776266e-05, "epoch": 0.9746588693957114, "percentage": 13.92, "elapsed_time": "1:09:50", "remaining_time": "7:11:45"}
|
||||
{"current_steps": 505, "total_steps": 3591, "loss": 0.3469, "lr": 3.9804277301375886e-05, "epoch": 0.9844054580896686, "percentage": 14.06, "elapsed_time": "1:10:32", "remaining_time": "7:11:02"}
|
||||
{"current_steps": 510, "total_steps": 3591, "loss": 0.3091, "lr": 3.979047367078974e-05, "epoch": 0.9941520467836257, "percentage": 14.2, "elapsed_time": "1:11:18", "remaining_time": "7:10:46"}
|
||||
{"current_steps": 515, "total_steps": 3591, "loss": 0.3056, "lr": 3.977620228226006e-05, "epoch": 1.003898635477583, "percentage": 14.34, "elapsed_time": "1:11:52", "remaining_time": "7:09:15"}
|
||||
{"current_steps": 520, "total_steps": 3591, "loss": 0.2912, "lr": 3.976146347309841e-05, "epoch": 1.01364522417154, "percentage": 14.48, "elapsed_time": "1:12:32", "remaining_time": "7:08:25"}
|
||||
{"current_steps": 525, "total_steps": 3591, "loss": 0.3379, "lr": 3.974625759166405e-05, "epoch": 1.023391812865497, "percentage": 14.62, "elapsed_time": "1:13:20", "remaining_time": "7:08:20"}
|
||||
{"current_steps": 530, "total_steps": 3591, "loss": 0.3057, "lr": 3.973058499735575e-05, "epoch": 1.0331384015594542, "percentage": 14.76, "elapsed_time": "1:13:52", "remaining_time": "7:06:40"}
|
||||
{"current_steps": 535, "total_steps": 3591, "loss": 0.3312, "lr": 3.971444606060328e-05, "epoch": 1.0428849902534112, "percentage": 14.9, "elapsed_time": "1:14:34", "remaining_time": "7:05:58"}
|
||||
{"current_steps": 540, "total_steps": 3591, "loss": 0.2962, "lr": 3.9697841162858635e-05, "epoch": 1.0526315789473684, "percentage": 15.04, "elapsed_time": "1:15:11", "remaining_time": "7:04:49"}
|
||||
{"current_steps": 545, "total_steps": 3591, "loss": 0.301, "lr": 3.968077069658706e-05, "epoch": 1.0623781676413255, "percentage": 15.18, "elapsed_time": "1:15:53", "remaining_time": "7:04:07"}
|
||||
{"current_steps": 550, "total_steps": 3591, "loss": 0.3077, "lr": 3.966323506525772e-05, "epoch": 1.0721247563352827, "percentage": 15.32, "elapsed_time": "1:16:34", "remaining_time": "7:03:24"}
|
||||
{"current_steps": 555, "total_steps": 3591, "loss": 0.2818, "lr": 3.9645234683334226e-05, "epoch": 1.0818713450292399, "percentage": 15.46, "elapsed_time": "1:17:12", "remaining_time": "7:02:18"}
|
||||
{"current_steps": 560, "total_steps": 3591, "loss": 0.2737, "lr": 3.962676997626478e-05, "epoch": 1.0916179337231968, "percentage": 15.59, "elapsed_time": "1:17:55", "remaining_time": "7:01:48"}
|
||||
{"current_steps": 565, "total_steps": 3591, "loss": 0.3187, "lr": 3.9607841380472146e-05, "epoch": 1.101364522417154, "percentage": 15.73, "elapsed_time": "1:18:31", "remaining_time": "7:00:31"}
|
||||
{"current_steps": 570, "total_steps": 3591, "loss": 0.2908, "lr": 3.9588449343343365e-05, "epoch": 1.1111111111111112, "percentage": 15.87, "elapsed_time": "1:19:03", "remaining_time": "6:58:59"}
|
||||
{"current_steps": 575, "total_steps": 3591, "loss": 0.3217, "lr": 3.956859432321911e-05, "epoch": 1.120857699805068, "percentage": 16.01, "elapsed_time": "1:19:47", "remaining_time": "6:58:30"}
|
||||
{"current_steps": 580, "total_steps": 3591, "loss": 0.2959, "lr": 3.9548276789382926e-05, "epoch": 1.1306042884990253, "percentage": 16.15, "elapsed_time": "1:20:23", "remaining_time": "6:57:20"}
|
||||
{"current_steps": 585, "total_steps": 3591, "loss": 0.3213, "lr": 3.952749722205009e-05, "epoch": 1.1403508771929824, "percentage": 16.29, "elapsed_time": "1:21:06", "remaining_time": "6:56:46"}
|
||||
{"current_steps": 590, "total_steps": 3591, "loss": 0.3114, "lr": 3.9506256112356275e-05, "epoch": 1.1500974658869396, "percentage": 16.43, "elapsed_time": "1:21:54", "remaining_time": "6:56:38"}
|
||||
{"current_steps": 595, "total_steps": 3591, "loss": 0.2775, "lr": 3.9484553962345956e-05, "epoch": 1.1598440545808968, "percentage": 16.57, "elapsed_time": "1:22:23", "remaining_time": "6:54:52"}
|
||||
{"current_steps": 600, "total_steps": 3591, "loss": 0.299, "lr": 3.946239128496051e-05, "epoch": 1.1695906432748537, "percentage": 16.71, "elapsed_time": "1:23:07", "remaining_time": "6:54:20"}
|
||||
{"current_steps": 605, "total_steps": 3591, "loss": 0.2671, "lr": 3.943976860402613e-05, "epoch": 1.179337231968811, "percentage": 16.85, "elapsed_time": "1:23:51", "remaining_time": "6:53:55"}
|
||||
{"current_steps": 610, "total_steps": 3591, "loss": 0.3, "lr": 3.941668645424142e-05, "epoch": 1.189083820662768, "percentage": 16.99, "elapsed_time": "1:24:21", "remaining_time": "6:52:15"}
|
||||
{"current_steps": 615, "total_steps": 3591, "loss": 0.3419, "lr": 3.939314538116478e-05, "epoch": 1.198830409356725, "percentage": 17.13, "elapsed_time": "1:24:57", "remaining_time": "6:51:05"}
|
||||
{"current_steps": 620, "total_steps": 3591, "loss": 0.2968, "lr": 3.936914594120148e-05, "epoch": 1.2085769980506822, "percentage": 17.27, "elapsed_time": "1:25:35", "remaining_time": "6:50:08"}
|
||||
{"current_steps": 625, "total_steps": 3591, "loss": 0.2974, "lr": 3.9344688701590516e-05, "epoch": 1.2183235867446394, "percentage": 17.4, "elapsed_time": "1:26:10", "remaining_time": "6:48:57"}
|
||||
{"current_steps": 630, "total_steps": 3591, "loss": 0.2946, "lr": 3.931977424039124e-05, "epoch": 1.2280701754385965, "percentage": 17.54, "elapsed_time": "1:26:56", "remaining_time": "6:48:35"}
|
||||
{"current_steps": 635, "total_steps": 3591, "loss": 0.3066, "lr": 3.929440314646966e-05, "epoch": 1.2378167641325537, "percentage": 17.68, "elapsed_time": "1:27:34", "remaining_time": "6:47:40"}
|
||||
{"current_steps": 640, "total_steps": 3591, "loss": 0.2986, "lr": 3.926857601948451e-05, "epoch": 1.2475633528265107, "percentage": 17.82, "elapsed_time": "1:28:19", "remaining_time": "6:47:17"}
|
||||
{"current_steps": 645, "total_steps": 3591, "loss": 0.3146, "lr": 3.9242293469873126e-05, "epoch": 1.2573099415204678, "percentage": 17.96, "elapsed_time": "1:29:00", "remaining_time": "6:46:33"}
|
||||
{"current_steps": 650, "total_steps": 3591, "loss": 0.2884, "lr": 3.921555611883697e-05, "epoch": 1.267056530214425, "percentage": 18.1, "elapsed_time": "1:29:33", "remaining_time": "6:45:10"}
|
||||
{"current_steps": 655, "total_steps": 3591, "loss": 0.2862, "lr": 3.9188364598327e-05, "epoch": 1.276803118908382, "percentage": 18.24, "elapsed_time": "1:30:08", "remaining_time": "6:44:02"}
|
||||
{"current_steps": 660, "total_steps": 3591, "loss": 0.3172, "lr": 3.916071955102865e-05, "epoch": 1.286549707602339, "percentage": 18.38, "elapsed_time": "1:30:53", "remaining_time": "6:43:39"}
|
||||
{"current_steps": 665, "total_steps": 3591, "loss": 0.2896, "lr": 3.913262163034673e-05, "epoch": 1.2962962962962963, "percentage": 18.52, "elapsed_time": "1:31:35", "remaining_time": "6:43:01"}
|
||||
{"current_steps": 670, "total_steps": 3591, "loss": 0.3223, "lr": 3.9104071500389936e-05, "epoch": 1.3060428849902534, "percentage": 18.66, "elapsed_time": "1:32:18", "remaining_time": "6:42:26"}
|
||||
{"current_steps": 675, "total_steps": 3591, "loss": 0.3187, "lr": 3.9075069835955155e-05, "epoch": 1.3157894736842106, "percentage": 18.8, "elapsed_time": "1:33:02", "remaining_time": "6:41:54"}
|
||||
{"current_steps": 680, "total_steps": 3591, "loss": 0.3007, "lr": 3.904561732251152e-05, "epoch": 1.3255360623781676, "percentage": 18.94, "elapsed_time": "1:33:43", "remaining_time": "6:41:14"}
|
||||
{"current_steps": 685, "total_steps": 3591, "loss": 0.3298, "lr": 3.901571465618422e-05, "epoch": 1.3352826510721247, "percentage": 19.08, "elapsed_time": "1:34:25", "remaining_time": "6:40:34"}
|
||||
{"current_steps": 690, "total_steps": 3591, "loss": 0.2657, "lr": 3.8985362543738025e-05, "epoch": 1.345029239766082, "percentage": 19.21, "elapsed_time": "1:35:07", "remaining_time": "6:39:56"}
|
||||
{"current_steps": 695, "total_steps": 3591, "loss": 0.3372, "lr": 3.895456170256062e-05, "epoch": 1.3547758284600389, "percentage": 19.35, "elapsed_time": "1:35:47", "remaining_time": "6:39:07"}
|
||||
{"current_steps": 700, "total_steps": 3591, "loss": 0.3447, "lr": 3.892331286064558e-05, "epoch": 1.364522417153996, "percentage": 19.49, "elapsed_time": "1:36:18", "remaining_time": "6:37:45"}
|
||||
{"current_steps": 705, "total_steps": 3591, "loss": 0.2768, "lr": 3.8891616756575225e-05, "epoch": 1.3742690058479532, "percentage": 19.63, "elapsed_time": "1:37:03", "remaining_time": "6:37:20"}
|
||||
{"current_steps": 710, "total_steps": 3591, "loss": 0.2887, "lr": 3.8859474139503174e-05, "epoch": 1.3840155945419104, "percentage": 19.77, "elapsed_time": "1:37:47", "remaining_time": "6:36:48"}
|
||||
{"current_steps": 715, "total_steps": 3591, "loss": 0.3192, "lr": 3.882688576913657e-05, "epoch": 1.3937621832358675, "percentage": 19.91, "elapsed_time": "1:38:28", "remaining_time": "6:36:08"}
|
||||
{"current_steps": 720, "total_steps": 3591, "loss": 0.3145, "lr": 3.879385241571817e-05, "epoch": 1.4035087719298245, "percentage": 20.05, "elapsed_time": "1:39:04", "remaining_time": "6:35:02"}
|
||||
{"current_steps": 725, "total_steps": 3591, "loss": 0.2724, "lr": 3.876037486000814e-05, "epoch": 1.4132553606237817, "percentage": 20.19, "elapsed_time": "1:39:45", "remaining_time": "6:34:20"}
|
||||
{"current_steps": 730, "total_steps": 3591, "loss": 0.2423, "lr": 3.87264538932656e-05, "epoch": 1.4230019493177388, "percentage": 20.33, "elapsed_time": "1:40:22", "remaining_time": "6:33:21"}
|
||||
{"current_steps": 735, "total_steps": 3591, "loss": 0.2898, "lr": 3.869209031722989e-05, "epoch": 1.4327485380116958, "percentage": 20.47, "elapsed_time": "1:41:00", "remaining_time": "6:32:28"}
|
||||
{"current_steps": 740, "total_steps": 3591, "loss": 0.2993, "lr": 3.8657284944101686e-05, "epoch": 1.442495126705653, "percentage": 20.61, "elapsed_time": "1:41:37", "remaining_time": "6:31:31"}
|
||||
{"current_steps": 745, "total_steps": 3591, "loss": 0.3295, "lr": 3.862203859652373e-05, "epoch": 1.4522417153996101, "percentage": 20.75, "elapsed_time": "1:42:24", "remaining_time": "6:31:12"}
|
||||
{"current_steps": 750, "total_steps": 3591, "loss": 0.2801, "lr": 3.858635210756142e-05, "epoch": 1.4619883040935673, "percentage": 20.89, "elapsed_time": "1:42:59", "remaining_time": "6:30:09"}
|
||||
{"current_steps": 755, "total_steps": 3591, "loss": 0.3479, "lr": 3.855022632068315e-05, "epoch": 1.4717348927875245, "percentage": 21.02, "elapsed_time": "1:43:35", "remaining_time": "6:29:06"}
|
||||
{"current_steps": 760, "total_steps": 3591, "loss": 0.337, "lr": 3.851366208974033e-05, "epoch": 1.4814814814814814, "percentage": 21.16, "elapsed_time": "1:44:07", "remaining_time": "6:27:53"}
|
||||
{"current_steps": 765, "total_steps": 3591, "loss": 0.2466, "lr": 3.8476660278947195e-05, "epoch": 1.4912280701754386, "percentage": 21.3, "elapsed_time": "1:44:54", "remaining_time": "6:27:33"}
|
||||
{"current_steps": 770, "total_steps": 3591, "loss": 0.2811, "lr": 3.843922176286044e-05, "epoch": 1.5009746588693957, "percentage": 21.44, "elapsed_time": "1:45:33", "remaining_time": "6:26:43"}
|
||||
{"current_steps": 775, "total_steps": 3591, "loss": 0.2754, "lr": 3.840134742635849e-05, "epoch": 1.5107212475633527, "percentage": 21.58, "elapsed_time": "1:46:15", "remaining_time": "6:26:06"}
|
||||
{"current_steps": 780, "total_steps": 3591, "loss": 0.3191, "lr": 3.836303816462062e-05, "epoch": 1.52046783625731, "percentage": 21.72, "elapsed_time": "1:46:52", "remaining_time": "6:25:08"}
|
||||
{"current_steps": 785, "total_steps": 3591, "loss": 0.3441, "lr": 3.832429488310577e-05, "epoch": 1.530214424951267, "percentage": 21.86, "elapsed_time": "1:47:26", "remaining_time": "6:24:02"}
|
||||
{"current_steps": 790, "total_steps": 3591, "loss": 0.3268, "lr": 3.8285118497531174e-05, "epoch": 1.5399610136452242, "percentage": 22.0, "elapsed_time": "1:48:04", "remaining_time": "6:23:10"}
|
||||
{"current_steps": 795, "total_steps": 3591, "loss": 0.2907, "lr": 3.824550993385069e-05, "epoch": 1.5497076023391814, "percentage": 22.14, "elapsed_time": "1:48:45", "remaining_time": "6:22:29"}
|
||||
{"current_steps": 800, "total_steps": 3591, "loss": 0.3051, "lr": 3.820547012823294e-05, "epoch": 1.5594541910331383, "percentage": 22.28, "elapsed_time": "1:49:14", "remaining_time": "6:21:06"}
|
||||
{"current_steps": 805, "total_steps": 3591, "loss": 0.2547, "lr": 3.816500002703915e-05, "epoch": 1.5692007797270955, "percentage": 22.42, "elapsed_time": "1:49:50", "remaining_time": "6:20:09"}
|
||||
{"current_steps": 810, "total_steps": 3591, "loss": 0.3057, "lr": 3.812410058680082e-05, "epoch": 1.5789473684210527, "percentage": 22.56, "elapsed_time": "1:50:47", "remaining_time": "6:20:22"}
|
||||
{"current_steps": 815, "total_steps": 3591, "loss": 0.3232, "lr": 3.808277277419709e-05, "epoch": 1.5886939571150096, "percentage": 22.7, "elapsed_time": "1:51:28", "remaining_time": "6:19:42"}
|
||||
{"current_steps": 820, "total_steps": 3591, "loss": 0.3289, "lr": 3.8041017566031885e-05, "epoch": 1.598440545808967, "percentage": 22.83, "elapsed_time": "1:52:10", "remaining_time": "6:19:03"}
|
||||
{"current_steps": 825, "total_steps": 3591, "loss": 0.3289, "lr": 3.799883594921088e-05, "epoch": 1.608187134502924, "percentage": 22.97, "elapsed_time": "1:52:50", "remaining_time": "6:18:19"}
|
||||
{"current_steps": 830, "total_steps": 3591, "loss": 0.2686, "lr": 3.795622892071809e-05, "epoch": 1.6179337231968811, "percentage": 23.11, "elapsed_time": "1:53:32", "remaining_time": "6:17:40"}
|
||||
{"current_steps": 835, "total_steps": 3591, "loss": 0.3091, "lr": 3.791319748759239e-05, "epoch": 1.6276803118908383, "percentage": 23.25, "elapsed_time": "1:54:20", "remaining_time": "6:17:24"}
|
||||
{"current_steps": 840, "total_steps": 3591, "loss": 0.3094, "lr": 3.786974266690365e-05, "epoch": 1.6374269005847952, "percentage": 23.39, "elapsed_time": "1:54:59", "remaining_time": "6:16:34"}
|
||||
{"current_steps": 845, "total_steps": 3591, "loss": 0.279, "lr": 3.782586548572873e-05, "epoch": 1.6471734892787524, "percentage": 23.53, "elapsed_time": "1:55:42", "remaining_time": "6:16:02"}
|
||||
{"current_steps": 850, "total_steps": 3591, "loss": 0.3029, "lr": 3.77815669811272e-05, "epoch": 1.6569200779727096, "percentage": 23.67, "elapsed_time": "1:56:20", "remaining_time": "6:15:09"}
|
||||
{"current_steps": 855, "total_steps": 3591, "loss": 0.2818, "lr": 3.773684820011681e-05, "epoch": 1.6666666666666665, "percentage": 23.81, "elapsed_time": "1:57:04", "remaining_time": "6:14:39"}
|
||||
{"current_steps": 860, "total_steps": 3591, "loss": 0.2992, "lr": 3.769171019964879e-05, "epoch": 1.676413255360624, "percentage": 23.95, "elapsed_time": "1:57:42", "remaining_time": "6:13:47"}
|
||||
{"current_steps": 865, "total_steps": 3591, "loss": 0.2696, "lr": 3.76461540465828e-05, "epoch": 1.6861598440545809, "percentage": 24.09, "elapsed_time": "1:58:10", "remaining_time": "6:12:24"}
|
||||
{"current_steps": 870, "total_steps": 3591, "loss": 0.3413, "lr": 3.760018081766175e-05, "epoch": 1.695906432748538, "percentage": 24.23, "elapsed_time": "1:58:58", "remaining_time": "6:12:05"}
|
||||
{"current_steps": 875, "total_steps": 3591, "loss": 0.299, "lr": 3.7553791599486384e-05, "epoch": 1.7056530214424952, "percentage": 24.37, "elapsed_time": "1:59:34", "remaining_time": "6:11:10"}
|
||||
{"current_steps": 880, "total_steps": 3591, "loss": 0.3211, "lr": 3.7506987488489535e-05, "epoch": 1.7153996101364521, "percentage": 24.51, "elapsed_time": "2:00:10", "remaining_time": "6:10:12"}
|
||||
{"current_steps": 885, "total_steps": 3591, "loss": 0.3015, "lr": 3.7459769590910254e-05, "epoch": 1.7251461988304093, "percentage": 24.64, "elapsed_time": "2:00:42", "remaining_time": "6:09:04"}
|
||||
{"current_steps": 890, "total_steps": 3591, "loss": 0.2977, "lr": 3.7412139022767636e-05, "epoch": 1.7348927875243665, "percentage": 24.78, "elapsed_time": "2:01:22", "remaining_time": "6:08:20"}
|
||||
{"current_steps": 895, "total_steps": 3591, "loss": 0.3127, "lr": 3.736409690983447e-05, "epoch": 1.7446393762183234, "percentage": 24.92, "elapsed_time": "2:01:54", "remaining_time": "6:07:13"}
|
||||
{"current_steps": 900, "total_steps": 3591, "loss": 0.2945, "lr": 3.731564438761061e-05, "epoch": 1.7543859649122808, "percentage": 25.06, "elapsed_time": "2:02:31", "remaining_time": "6:06:21"}
|
||||
{"current_steps": 905, "total_steps": 3591, "loss": 0.2757, "lr": 3.726678260129614e-05, "epoch": 1.7641325536062378, "percentage": 25.2, "elapsed_time": "2:03:09", "remaining_time": "6:05:31"}
|
||||
{"current_steps": 910, "total_steps": 3591, "loss": 0.3034, "lr": 3.7217512705764305e-05, "epoch": 1.773879142300195, "percentage": 25.34, "elapsed_time": "2:03:50", "remaining_time": "6:04:50"}
|
||||
{"current_steps": 915, "total_steps": 3591, "loss": 0.2861, "lr": 3.716783586553422e-05, "epoch": 1.7836257309941521, "percentage": 25.48, "elapsed_time": "2:04:29", "remaining_time": "6:04:06"}
|
||||
{"current_steps": 920, "total_steps": 3591, "loss": 0.3266, "lr": 3.711775325474339e-05, "epoch": 1.793372319688109, "percentage": 25.62, "elapsed_time": "2:05:07", "remaining_time": "6:03:16"}
|
||||
{"current_steps": 925, "total_steps": 3591, "loss": 0.2623, "lr": 3.706726605711983e-05, "epoch": 1.8031189083820662, "percentage": 25.76, "elapsed_time": "2:05:52", "remaining_time": "6:02:47"}
|
||||
{"current_steps": 930, "total_steps": 3591, "loss": 0.2884, "lr": 3.701637546595425e-05, "epoch": 1.8128654970760234, "percentage": 25.9, "elapsed_time": "2:06:40", "remaining_time": "6:02:25"}
|
||||
{"current_steps": 935, "total_steps": 3591, "loss": 0.2723, "lr": 3.696508268407172e-05, "epoch": 1.8226120857699804, "percentage": 26.04, "elapsed_time": "2:07:23", "remaining_time": "6:01:53"}
|
||||
{"current_steps": 940, "total_steps": 3591, "loss": 0.2789, "lr": 3.691338892380332e-05, "epoch": 1.8323586744639377, "percentage": 26.18, "elapsed_time": "2:08:03", "remaining_time": "6:01:09"}
|
||||
{"current_steps": 945, "total_steps": 3591, "loss": 0.3409, "lr": 3.686129540695746e-05, "epoch": 1.8421052631578947, "percentage": 26.32, "elapsed_time": "2:08:39", "remaining_time": "6:00:15"}
|
||||
{"current_steps": 950, "total_steps": 3591, "loss": 0.2918, "lr": 3.680880336479098e-05, "epoch": 1.8518518518518519, "percentage": 26.46, "elapsed_time": "2:09:19", "remaining_time": "5:59:30"}
|
||||
{"current_steps": 955, "total_steps": 3591, "loss": 0.2529, "lr": 3.6755914037980086e-05, "epoch": 1.861598440545809, "percentage": 26.59, "elapsed_time": "2:10:12", "remaining_time": "5:59:25"}
|
||||
{"current_steps": 960, "total_steps": 3591, "loss": 0.2889, "lr": 3.6702628676591e-05, "epoch": 1.871345029239766, "percentage": 26.73, "elapsed_time": "2:10:54", "remaining_time": "5:58:46"}
|
||||
{"current_steps": 965, "total_steps": 3591, "loss": 0.2894, "lr": 3.664894854005043e-05, "epoch": 1.8810916179337231, "percentage": 26.87, "elapsed_time": "2:11:32", "remaining_time": "5:57:56"}
|
||||
{"current_steps": 970, "total_steps": 3591, "loss": 0.2765, "lr": 3.6594874897115796e-05, "epoch": 1.8908382066276803, "percentage": 27.01, "elapsed_time": "2:12:14", "remaining_time": "5:57:20"}
|
||||
{"current_steps": 975, "total_steps": 3591, "loss": 0.2683, "lr": 3.654040902584524e-05, "epoch": 1.9005847953216373, "percentage": 27.15, "elapsed_time": "2:12:51", "remaining_time": "5:56:29"}
|
||||
{"current_steps": 980, "total_steps": 3591, "loss": 0.3233, "lr": 3.648555221356743e-05, "epoch": 1.9103313840155947, "percentage": 27.29, "elapsed_time": "2:13:29", "remaining_time": "5:55:40"}
|
||||
{"current_steps": 985, "total_steps": 3591, "loss": 0.2868, "lr": 3.6430305756851094e-05, "epoch": 1.9200779727095516, "percentage": 27.43, "elapsed_time": "2:14:15", "remaining_time": "5:55:13"}
|
||||
{"current_steps": 990, "total_steps": 3591, "loss": 0.26, "lr": 3.6374670961474444e-05, "epoch": 1.9298245614035088, "percentage": 27.57, "elapsed_time": "2:14:52", "remaining_time": "5:54:20"}
|
||||
{"current_steps": 995, "total_steps": 3591, "loss": 0.2668, "lr": 3.631864914239425e-05, "epoch": 1.939571150097466, "percentage": 27.71, "elapsed_time": "2:15:31", "remaining_time": "5:53:35"}
|
||||
{"current_steps": 1000, "total_steps": 3591, "loss": 0.2963, "lr": 3.6262241623714796e-05, "epoch": 1.949317738791423, "percentage": 27.85, "elapsed_time": "2:16:20", "remaining_time": "5:53:16"}
|
||||
{"current_steps": 1005, "total_steps": 3591, "loss": 0.3151, "lr": 3.620544973865657e-05, "epoch": 1.95906432748538, "percentage": 27.99, "elapsed_time": "2:17:09", "remaining_time": "5:52:55"}
|
||||
{"current_steps": 1010, "total_steps": 3591, "loss": 0.3006, "lr": 3.6148274829524745e-05, "epoch": 1.9688109161793372, "percentage": 28.13, "elapsed_time": "2:17:48", "remaining_time": "5:52:10"}
|
||||
{"current_steps": 1015, "total_steps": 3591, "loss": 0.2494, "lr": 3.609071824767749e-05, "epoch": 1.9785575048732942, "percentage": 28.27, "elapsed_time": "2:18:18", "remaining_time": "5:51:00"}
|
||||
{"current_steps": 1020, "total_steps": 3591, "loss": 0.3284, "lr": 3.603278135349397e-05, "epoch": 1.9883040935672516, "percentage": 28.4, "elapsed_time": "2:19:05", "remaining_time": "5:50:34"}
|
||||
{"current_steps": 1025, "total_steps": 3591, "loss": 0.2635, "lr": 3.5974465516342235e-05, "epoch": 1.9980506822612085, "percentage": 28.54, "elapsed_time": "2:19:42", "remaining_time": "5:49:45"}
|
||||
{"current_steps": 1030, "total_steps": 3591, "loss": 0.2673, "lr": 3.591577211454687e-05, "epoch": 2.007797270955166, "percentage": 28.68, "elapsed_time": "2:20:28", "remaining_time": "5:49:15"}
|
||||
{"current_steps": 1035, "total_steps": 3591, "loss": 0.248, "lr": 3.585670253535637e-05, "epoch": 2.017543859649123, "percentage": 28.82, "elapsed_time": "2:21:17", "remaining_time": "5:48:55"}
|
||||
{"current_steps": 1040, "total_steps": 3591, "loss": 0.2541, "lr": 3.5797258174910374e-05, "epoch": 2.02729044834308, "percentage": 28.96, "elapsed_time": "2:21:59", "remaining_time": "5:48:18"}
|
||||
{"current_steps": 1045, "total_steps": 3591, "loss": 0.2469, "lr": 3.5737440438206685e-05, "epoch": 2.037037037037037, "percentage": 29.1, "elapsed_time": "2:22:40", "remaining_time": "5:47:35"}
|
||||
{"current_steps": 1050, "total_steps": 3591, "loss": 0.2843, "lr": 3.567725073906802e-05, "epoch": 2.046783625730994, "percentage": 29.24, "elapsed_time": "2:23:20", "remaining_time": "5:46:52"}
|
||||
{"current_steps": 1055, "total_steps": 3591, "loss": 0.2512, "lr": 3.561669050010864e-05, "epoch": 2.056530214424951, "percentage": 29.38, "elapsed_time": "2:24:00", "remaining_time": "5:46:10"}
|
||||
{"current_steps": 1060, "total_steps": 3591, "loss": 0.3245, "lr": 3.55557611527007e-05, "epoch": 2.0662768031189085, "percentage": 29.52, "elapsed_time": "2:24:32", "remaining_time": "5:45:06"}
|
||||
{"current_steps": 1065, "total_steps": 3591, "loss": 0.2629, "lr": 3.549446413694039e-05, "epoch": 2.0760233918128654, "percentage": 29.66, "elapsed_time": "2:25:15", "remaining_time": "5:44:32"}
|
||||
{"current_steps": 1070, "total_steps": 3591, "loss": 0.2847, "lr": 3.5432800901613986e-05, "epoch": 2.0857699805068224, "percentage": 29.8, "elapsed_time": "2:25:53", "remaining_time": "5:43:44"}
|
||||
{"current_steps": 1075, "total_steps": 3591, "loss": 0.2338, "lr": 3.53707729041635e-05, "epoch": 2.0955165692007798, "percentage": 29.94, "elapsed_time": "2:26:24", "remaining_time": "5:42:39"}
|
||||
{"current_steps": 1080, "total_steps": 3591, "loss": 0.2352, "lr": 3.530838161065231e-05, "epoch": 2.1052631578947367, "percentage": 30.08, "elapsed_time": "2:27:07", "remaining_time": "5:42:02"}
|
||||
{"current_steps": 1085, "total_steps": 3591, "loss": 0.2769, "lr": 3.524562849573046e-05, "epoch": 2.115009746588694, "percentage": 30.21, "elapsed_time": "2:27:51", "remaining_time": "5:41:29"}
|
||||
{"current_steps": 1090, "total_steps": 3591, "loss": 0.27, "lr": 3.518251504259986e-05, "epoch": 2.124756335282651, "percentage": 30.35, "elapsed_time": "2:28:34", "remaining_time": "5:40:54"}
|
||||
{"current_steps": 1095, "total_steps": 3591, "loss": 0.2218, "lr": 3.511904274297918e-05, "epoch": 2.134502923976608, "percentage": 30.49, "elapsed_time": "2:29:21", "remaining_time": "5:40:28"}
|
||||
{"current_steps": 1100, "total_steps": 3591, "loss": 0.2981, "lr": 3.505521309706859e-05, "epoch": 2.1442495126705654, "percentage": 30.63, "elapsed_time": "2:29:55", "remaining_time": "5:39:29"}
|
||||
{"current_steps": 1105, "total_steps": 3591, "loss": 0.2789, "lr": 3.4991027613514364e-05, "epoch": 2.1539961013645224, "percentage": 30.77, "elapsed_time": "2:30:30", "remaining_time": "5:38:37"}
|
||||
{"current_steps": 1110, "total_steps": 3591, "loss": 0.268, "lr": 3.492648780937314e-05, "epoch": 2.1637426900584797, "percentage": 30.91, "elapsed_time": "2:31:14", "remaining_time": "5:38:02"}
|
||||
{"current_steps": 1115, "total_steps": 3591, "loss": 0.2438, "lr": 3.486159521007612e-05, "epoch": 2.1734892787524367, "percentage": 31.05, "elapsed_time": "2:32:01", "remaining_time": "5:37:35"}
|
||||
{"current_steps": 1120, "total_steps": 3591, "loss": 0.2448, "lr": 3.479635134939302e-05, "epoch": 2.1832358674463936, "percentage": 31.19, "elapsed_time": "2:32:31", "remaining_time": "5:36:29"}
|
||||
{"current_steps": 1125, "total_steps": 3591, "loss": 0.2755, "lr": 3.4730757769395756e-05, "epoch": 2.192982456140351, "percentage": 31.33, "elapsed_time": "2:33:03", "remaining_time": "5:35:31"}
|
||||
{"current_steps": 1130, "total_steps": 3591, "loss": 0.257, "lr": 3.466481602042208e-05, "epoch": 2.202729044834308, "percentage": 31.47, "elapsed_time": "2:33:54", "remaining_time": "5:35:11"}
|
||||
{"current_steps": 1135, "total_steps": 3591, "loss": 0.2525, "lr": 3.459852766103886e-05, "epoch": 2.212475633528265, "percentage": 31.61, "elapsed_time": "2:34:37", "remaining_time": "5:34:36"}
|
||||
{"current_steps": 1140, "total_steps": 3591, "loss": 0.2676, "lr": 3.4531894258005305e-05, "epoch": 2.2222222222222223, "percentage": 31.75, "elapsed_time": "2:35:16", "remaining_time": "5:33:51"}
|
||||
{"current_steps": 1145, "total_steps": 3591, "loss": 0.286, "lr": 3.446491738623589e-05, "epoch": 2.2319688109161793, "percentage": 31.89, "elapsed_time": "2:36:04", "remaining_time": "5:33:24"}
|
||||
{"current_steps": 1150, "total_steps": 3591, "loss": 0.3058, "lr": 3.439759862876316e-05, "epoch": 2.241715399610136, "percentage": 32.02, "elapsed_time": "2:36:44", "remaining_time": "5:32:42"}
|
||||
{"current_steps": 1155, "total_steps": 3591, "loss": 0.2572, "lr": 3.43299395767003e-05, "epoch": 2.2514619883040936, "percentage": 32.16, "elapsed_time": "2:37:28", "remaining_time": "5:32:08"}
|
||||
{"current_steps": 1160, "total_steps": 3591, "loss": 0.2912, "lr": 3.426194182920352e-05, "epoch": 2.2612085769980506, "percentage": 32.3, "elapsed_time": "2:38:15", "remaining_time": "5:31:39"}
|
||||
{"current_steps": 1165, "total_steps": 3591, "loss": 0.2934, "lr": 3.419360699343429e-05, "epoch": 2.270955165692008, "percentage": 32.44, "elapsed_time": "2:38:56", "remaining_time": "5:30:59"}
|
||||
{"current_steps": 1170, "total_steps": 3591, "loss": 0.2919, "lr": 3.412493668452131e-05, "epoch": 2.280701754385965, "percentage": 32.58, "elapsed_time": "2:39:49", "remaining_time": "5:30:42"}
|
||||
{"current_steps": 1175, "total_steps": 3591, "loss": 0.287, "lr": 3.4055932525522376e-05, "epoch": 2.290448343079922, "percentage": 32.72, "elapsed_time": "2:40:38", "remaining_time": "5:30:19"}
|
||||
{"current_steps": 1180, "total_steps": 3591, "loss": 0.273, "lr": 3.398659614738601e-05, "epoch": 2.3001949317738792, "percentage": 32.86, "elapsed_time": "2:41:23", "remaining_time": "5:29:45"}
|
||||
{"current_steps": 1185, "total_steps": 3591, "loss": 0.2588, "lr": 3.3916929188912864e-05, "epoch": 2.309941520467836, "percentage": 33.0, "elapsed_time": "2:42:03", "remaining_time": "5:29:02"}
|
||||
{"current_steps": 1190, "total_steps": 3591, "loss": 0.3003, "lr": 3.3846933296717064e-05, "epoch": 2.3196881091617936, "percentage": 33.14, "elapsed_time": "2:42:47", "remaining_time": "5:28:26"}
|
||||
{"current_steps": 1195, "total_steps": 3591, "loss": 0.259, "lr": 3.377661012518724e-05, "epoch": 2.3294346978557505, "percentage": 33.28, "elapsed_time": "2:43:27", "remaining_time": "5:27:43"}
|
||||
{"current_steps": 1200, "total_steps": 3591, "loss": 0.2783, "lr": 3.370596133644743e-05, "epoch": 2.3391812865497075, "percentage": 33.42, "elapsed_time": "2:44:14", "remaining_time": "5:27:14"}
|
||||
{"current_steps": 1205, "total_steps": 3591, "loss": 0.293, "lr": 3.363498860031781e-05, "epoch": 2.348927875243665, "percentage": 33.56, "elapsed_time": "2:44:51", "remaining_time": "5:26:25"}
|
||||
{"current_steps": 1210, "total_steps": 3591, "loss": 0.2367, "lr": 3.35636935942752e-05, "epoch": 2.358674463937622, "percentage": 33.7, "elapsed_time": "2:45:33", "remaining_time": "5:25:47"}
|
||||
{"current_steps": 1215, "total_steps": 3591, "loss": 0.2771, "lr": 3.349207800341346e-05, "epoch": 2.3684210526315788, "percentage": 33.83, "elapsed_time": "2:46:13", "remaining_time": "5:25:04"}
|
||||
{"current_steps": 1220, "total_steps": 3591, "loss": 0.3172, "lr": 3.3420143520403625e-05, "epoch": 2.378167641325536, "percentage": 33.97, "elapsed_time": "2:46:45", "remaining_time": "5:24:05"}
|
||||
{"current_steps": 1225, "total_steps": 3591, "loss": 0.2621, "lr": 3.334789184545389e-05, "epoch": 2.387914230019493, "percentage": 34.11, "elapsed_time": "2:47:22", "remaining_time": "5:23:17"}
|
||||
{"current_steps": 1230, "total_steps": 3591, "loss": 0.2695, "lr": 3.327532468626948e-05, "epoch": 2.39766081871345, "percentage": 34.25, "elapsed_time": "2:47:55", "remaining_time": "5:22:19"}
|
||||
{"current_steps": 1235, "total_steps": 3591, "loss": 0.2523, "lr": 3.3202443758012214e-05, "epoch": 2.4074074074074074, "percentage": 34.39, "elapsed_time": "2:48:35", "remaining_time": "5:21:36"}
|
||||
{"current_steps": 1240, "total_steps": 3591, "loss": 0.27, "lr": 3.3129250783260024e-05, "epoch": 2.4171539961013644, "percentage": 34.53, "elapsed_time": "2:49:13", "remaining_time": "5:20:50"}
|
||||
{"current_steps": 1245, "total_steps": 3591, "loss": 0.2689, "lr": 3.305574749196622e-05, "epoch": 2.426900584795322, "percentage": 34.67, "elapsed_time": "2:49:50", "remaining_time": "5:20:02"}
|
||||
{"current_steps": 1250, "total_steps": 3591, "loss": 0.2279, "lr": 3.29819356214186e-05, "epoch": 2.4366471734892787, "percentage": 34.81, "elapsed_time": "2:50:35", "remaining_time": "5:19:29"}
|
||||
{"current_steps": 1255, "total_steps": 3591, "loss": 0.2849, "lr": 3.290781691619838e-05, "epoch": 2.4463937621832357, "percentage": 34.95, "elapsed_time": "2:51:09", "remaining_time": "5:18:34"}
|
||||
{"current_steps": 1260, "total_steps": 3591, "loss": 0.2684, "lr": 3.283339312813898e-05, "epoch": 2.456140350877193, "percentage": 35.09, "elapsed_time": "2:51:42", "remaining_time": "5:17:39"}
|
||||
{"current_steps": 1265, "total_steps": 3591, "loss": 0.2476, "lr": 3.275866601628461e-05, "epoch": 2.46588693957115, "percentage": 35.23, "elapsed_time": "2:52:19", "remaining_time": "5:16:50"}
|
||||
{"current_steps": 1270, "total_steps": 3591, "loss": 0.2536, "lr": 3.2683637346848683e-05, "epoch": 2.4756335282651074, "percentage": 35.37, "elapsed_time": "2:52:59", "remaining_time": "5:16:10"}
|
||||
{"current_steps": 1275, "total_steps": 3591, "loss": 0.2854, "lr": 3.260830889317209e-05, "epoch": 2.4853801169590644, "percentage": 35.51, "elapsed_time": "2:53:34", "remaining_time": "5:15:16"}
|
||||
{"current_steps": 1280, "total_steps": 3591, "loss": 0.2799, "lr": 3.253268243568128e-05, "epoch": 2.4951267056530213, "percentage": 35.64, "elapsed_time": "2:54:08", "remaining_time": "5:14:23"}
|
||||
{"current_steps": 1285, "total_steps": 3591, "loss": 0.2597, "lr": 3.2456759761846144e-05, "epoch": 2.5048732943469787, "percentage": 35.78, "elapsed_time": "2:54:45", "remaining_time": "5:13:36"}
|
||||
{"current_steps": 1290, "total_steps": 3591, "loss": 0.2702, "lr": 3.238054266613784e-05, "epoch": 2.5146198830409356, "percentage": 35.92, "elapsed_time": "2:55:19", "remaining_time": "5:12:44"}
|
||||
{"current_steps": 1295, "total_steps": 3591, "loss": 0.24, "lr": 3.2304032949986294e-05, "epoch": 2.5243664717348926, "percentage": 36.06, "elapsed_time": "2:55:59", "remaining_time": "5:12:02"}
|
||||
{"current_steps": 1300, "total_steps": 3591, "loss": 0.2694, "lr": 3.22272324217377e-05, "epoch": 2.53411306042885, "percentage": 36.2, "elapsed_time": "2:56:35", "remaining_time": "5:11:12"}
|
||||
{"current_steps": 1305, "total_steps": 3591, "loss": 0.2714, "lr": 3.2150142896611734e-05, "epoch": 2.543859649122807, "percentage": 36.34, "elapsed_time": "2:57:17", "remaining_time": "5:10:33"}
|
||||
{"current_steps": 1310, "total_steps": 3591, "loss": 0.2876, "lr": 3.207276619665865e-05, "epoch": 2.553606237816764, "percentage": 36.48, "elapsed_time": "2:57:59", "remaining_time": "5:09:54"}
|
||||
{"current_steps": 1315, "total_steps": 3591, "loss": 0.2653, "lr": 3.1995104150716244e-05, "epoch": 2.5633528265107213, "percentage": 36.62, "elapsed_time": "2:58:37", "remaining_time": "5:09:09"}
|
||||
{"current_steps": 1320, "total_steps": 3591, "loss": 0.2172, "lr": 3.191715859436658e-05, "epoch": 2.573099415204678, "percentage": 36.76, "elapsed_time": "2:59:13", "remaining_time": "5:08:21"}
|
||||
{"current_steps": 1325, "total_steps": 3591, "loss": 0.2634, "lr": 3.1838931369892684e-05, "epoch": 2.5828460038986356, "percentage": 36.9, "elapsed_time": "3:00:02", "remaining_time": "5:07:54"}
|
||||
{"current_steps": 1330, "total_steps": 3591, "loss": 0.2379, "lr": 3.17604243262349e-05, "epoch": 2.5925925925925926, "percentage": 37.04, "elapsed_time": "3:00:41", "remaining_time": "5:07:10"}
|
||||
{"current_steps": 1335, "total_steps": 3591, "loss": 0.2398, "lr": 3.168163931894728e-05, "epoch": 2.60233918128655, "percentage": 37.18, "elapsed_time": "3:01:22", "remaining_time": "5:06:30"}
|
||||
{"current_steps": 1340, "total_steps": 3591, "loss": 0.2515, "lr": 3.160257821015365e-05, "epoch": 2.612085769980507, "percentage": 37.32, "elapsed_time": "3:02:01", "remaining_time": "5:05:45"}
|
||||
{"current_steps": 1345, "total_steps": 3591, "loss": 0.2901, "lr": 3.1523242868503686e-05, "epoch": 2.621832358674464, "percentage": 37.45, "elapsed_time": "3:02:34", "remaining_time": "5:04:52"}
|
||||
{"current_steps": 1350, "total_steps": 3591, "loss": 0.2469, "lr": 3.144363516912865e-05, "epoch": 2.6315789473684212, "percentage": 37.59, "elapsed_time": "3:03:12", "remaining_time": "5:04:07"}
|
||||
{"current_steps": 1355, "total_steps": 3591, "loss": 0.2874, "lr": 3.136375699359714e-05, "epoch": 2.641325536062378, "percentage": 37.73, "elapsed_time": "3:03:56", "remaining_time": "5:03:32"}
|
||||
{"current_steps": 1360, "total_steps": 3591, "loss": 0.283, "lr": 3.1283610229870594e-05, "epoch": 2.651072124756335, "percentage": 37.87, "elapsed_time": "3:04:41", "remaining_time": "5:02:58"}
|
||||
{"current_steps": 1365, "total_steps": 3591, "loss": 0.2313, "lr": 3.1203196772258676e-05, "epoch": 2.6608187134502925, "percentage": 38.01, "elapsed_time": "3:05:33", "remaining_time": "5:02:36"}
|
||||
{"current_steps": 1370, "total_steps": 3591, "loss": 0.2512, "lr": 3.112251852137448e-05, "epoch": 2.6705653021442495, "percentage": 38.15, "elapsed_time": "3:06:19", "remaining_time": "5:02:03"}
|
||||
{"current_steps": 1375, "total_steps": 3591, "loss": 0.2625, "lr": 3.1041577384089646e-05, "epoch": 2.6803118908382064, "percentage": 38.29, "elapsed_time": "3:06:51", "remaining_time": "5:01:08"}
|
||||
{"current_steps": 1380, "total_steps": 3591, "loss": 0.2808, "lr": 3.096037527348924e-05, "epoch": 2.690058479532164, "percentage": 38.43, "elapsed_time": "3:07:34", "remaining_time": "5:00:32"}
|
||||
{"current_steps": 1385, "total_steps": 3591, "loss": 0.2765, "lr": 3.087891410882658e-05, "epoch": 2.6998050682261208, "percentage": 38.57, "elapsed_time": "3:08:07", "remaining_time": "4:59:37"}
|
||||
{"current_steps": 1390, "total_steps": 3591, "loss": 0.3003, "lr": 3.079719581547786e-05, "epoch": 2.7095516569200777, "percentage": 38.71, "elapsed_time": "3:08:48", "remaining_time": "4:58:58"}
|
||||
{"current_steps": 1395, "total_steps": 3591, "loss": 0.2866, "lr": 3.071522232489666e-05, "epoch": 2.719298245614035, "percentage": 38.85, "elapsed_time": "3:09:30", "remaining_time": "4:58:18"}
|
||||
{"current_steps": 1400, "total_steps": 3591, "loss": 0.2545, "lr": 3.063299557456824e-05, "epoch": 2.729044834307992, "percentage": 38.99, "elapsed_time": "3:10:12", "remaining_time": "4:57:40"}
|
||||
{"current_steps": 1405, "total_steps": 3591, "loss": 0.2472, "lr": 3.05505175079638e-05, "epoch": 2.7387914230019494, "percentage": 39.13, "elapsed_time": "3:10:53", "remaining_time": "4:56:59"}
|
||||
{"current_steps": 1410, "total_steps": 3591, "loss": 0.272, "lr": 3.0467790074494538e-05, "epoch": 2.7485380116959064, "percentage": 39.26, "elapsed_time": "3:11:24", "remaining_time": "4:56:04"}
|
||||
{"current_steps": 1415, "total_steps": 3591, "loss": 0.2277, "lr": 3.0384815229465557e-05, "epoch": 2.758284600389864, "percentage": 39.4, "elapsed_time": "3:12:02", "remaining_time": "4:55:19"}
|
||||
{"current_steps": 1420, "total_steps": 3591, "loss": 0.2828, "lr": 3.0301594934029643e-05, "epoch": 2.7680311890838207, "percentage": 39.54, "elapsed_time": "3:12:41", "remaining_time": "4:54:36"}
|
||||
{"current_steps": 1425, "total_steps": 3591, "loss": 0.308, "lr": 3.021813115514096e-05, "epoch": 2.7777777777777777, "percentage": 39.68, "elapsed_time": "3:13:19", "remaining_time": "4:53:51"}
|
||||
{"current_steps": 1430, "total_steps": 3591, "loss": 0.2931, "lr": 3.0134425865508507e-05, "epoch": 2.787524366471735, "percentage": 39.82, "elapsed_time": "3:13:58", "remaining_time": "4:53:07"}
|
||||
{"current_steps": 1435, "total_steps": 3591, "loss": 0.2675, "lr": 3.0050481043549512e-05, "epoch": 2.797270955165692, "percentage": 39.96, "elapsed_time": "3:14:41", "remaining_time": "4:52:30"}
|
||||
{"current_steps": 1440, "total_steps": 3591, "loss": 0.2518, "lr": 2.9966298673342678e-05, "epoch": 2.807017543859649, "percentage": 40.1, "elapsed_time": "3:15:27", "remaining_time": "4:51:58"}
|
||||
{"current_steps": 1445, "total_steps": 3591, "loss": 0.2769, "lr": 2.988188074458128e-05, "epoch": 2.8167641325536064, "percentage": 40.24, "elapsed_time": "3:16:01", "remaining_time": "4:51:07"}
|
||||
{"current_steps": 1450, "total_steps": 3591, "loss": 0.2553, "lr": 2.9797229252526132e-05, "epoch": 2.8265107212475633, "percentage": 40.38, "elapsed_time": "3:16:48", "remaining_time": "4:50:35"}
|
||||
{"current_steps": 1455, "total_steps": 3591, "loss": 0.2593, "lr": 2.971234619795846e-05, "epoch": 2.8362573099415203, "percentage": 40.52, "elapsed_time": "3:17:23", "remaining_time": "4:49:46"}
|
||||
{"current_steps": 1460, "total_steps": 3591, "loss": 0.2409, "lr": 2.962723358713255e-05, "epoch": 2.8460038986354776, "percentage": 40.66, "elapsed_time": "3:18:01", "remaining_time": "4:49:02"}
|
||||
{"current_steps": 1465, "total_steps": 3591, "loss": 0.2512, "lr": 2.9541893431728407e-05, "epoch": 2.8557504873294346, "percentage": 40.8, "elapsed_time": "3:18:46", "remaining_time": "4:48:27"}
|
||||
{"current_steps": 1470, "total_steps": 3591, "loss": 0.3006, "lr": 2.945632774880414e-05, "epoch": 2.8654970760233915, "percentage": 40.94, "elapsed_time": "3:19:22", "remaining_time": "4:47:40"}
|
||||
{"current_steps": 1475, "total_steps": 3591, "loss": 0.2274, "lr": 2.937053856074833e-05, "epoch": 2.875243664717349, "percentage": 41.07, "elapsed_time": "3:20:04", "remaining_time": "4:47:01"}
|
||||
{"current_steps": 1480, "total_steps": 3591, "loss": 0.2687, "lr": 2.928452789523221e-05, "epoch": 2.884990253411306, "percentage": 41.21, "elapsed_time": "3:20:45", "remaining_time": "4:46:21"}
|
||||
{"current_steps": 1485, "total_steps": 3591, "loss": 0.2657, "lr": 2.9198297785161744e-05, "epoch": 2.8947368421052633, "percentage": 41.35, "elapsed_time": "3:21:26", "remaining_time": "4:45:41"}
|
||||
{"current_steps": 1490, "total_steps": 3591, "loss": 0.2544, "lr": 2.911185026862959e-05, "epoch": 2.9044834307992202, "percentage": 41.49, "elapsed_time": "3:22:04", "remaining_time": "4:44:55"}
|
||||
{"current_steps": 1495, "total_steps": 3591, "loss": 0.3315, "lr": 2.902518738886691e-05, "epoch": 2.9142300194931776, "percentage": 41.63, "elapsed_time": "3:22:44", "remaining_time": "4:44:14"}
|
||||
{"current_steps": 1500, "total_steps": 3591, "loss": 0.2648, "lr": 2.8938311194195084e-05, "epoch": 2.9239766081871346, "percentage": 41.77, "elapsed_time": "3:23:24", "remaining_time": "4:43:32"}
|
||||
{"current_steps": 1505, "total_steps": 3591, "loss": 0.2345, "lr": 2.885122373797731e-05, "epoch": 2.9337231968810915, "percentage": 41.91, "elapsed_time": "3:24:40", "remaining_time": "4:43:41"}
|
||||
{"current_steps": 1510, "total_steps": 3591, "loss": 0.2818, "lr": 2.876392707857004e-05, "epoch": 2.943469785575049, "percentage": 42.05, "elapsed_time": "3:25:20", "remaining_time": "4:42:59"}
|
||||
{"current_steps": 1515, "total_steps": 3591, "loss": 0.2723, "lr": 2.8676423279274374e-05, "epoch": 2.953216374269006, "percentage": 42.19, "elapsed_time": "3:26:03", "remaining_time": "4:42:21"}
|
||||
{"current_steps": 1520, "total_steps": 3591, "loss": 0.2562, "lr": 2.858871440828726e-05, "epoch": 2.962962962962963, "percentage": 42.33, "elapsed_time": "3:26:49", "remaining_time": "4:41:47"}
|
||||
{"current_steps": 1525, "total_steps": 3591, "loss": 0.2173, "lr": 2.850080253865261e-05, "epoch": 2.97270955165692, "percentage": 42.47, "elapsed_time": "3:27:29", "remaining_time": "4:41:06"}
|
||||
{"current_steps": 1530, "total_steps": 3591, "loss": 0.2841, "lr": 2.8412689748212327e-05, "epoch": 2.982456140350877, "percentage": 42.61, "elapsed_time": "3:27:58", "remaining_time": "4:40:09"}
|
||||
{"current_steps": 1535, "total_steps": 3591, "loss": 0.2461, "lr": 2.8324378119557175e-05, "epoch": 2.992202729044834, "percentage": 42.75, "elapsed_time": "3:28:39", "remaining_time": "4:39:28"}
|
||||
{"current_steps": 1540, "total_steps": 3591, "loss": 0.2554, "lr": 2.823586973997755e-05, "epoch": 3.0019493177387915, "percentage": 42.88, "elapsed_time": "3:29:17", "remaining_time": "4:38:44"}
|
||||
{"current_steps": 1545, "total_steps": 3591, "loss": 0.2142, "lr": 2.8147166701414178e-05, "epoch": 3.0116959064327484, "percentage": 43.02, "elapsed_time": "3:29:57", "remaining_time": "4:38:02"}
|
||||
{"current_steps": 1550, "total_steps": 3591, "loss": 0.2274, "lr": 2.8058271100408627e-05, "epoch": 3.021442495126706, "percentage": 43.16, "elapsed_time": "3:30:46", "remaining_time": "4:37:33"}
|
||||
{"current_steps": 1555, "total_steps": 3591, "loss": 0.2887, "lr": 2.7969185038053798e-05, "epoch": 3.0311890838206628, "percentage": 43.3, "elapsed_time": "3:31:27", "remaining_time": "4:36:52"}
|
||||
{"current_steps": 1560, "total_steps": 3591, "loss": 0.2377, "lr": 2.787991061994421e-05, "epoch": 3.0409356725146197, "percentage": 43.44, "elapsed_time": "3:32:01", "remaining_time": "4:36:02"}
|
||||
{"current_steps": 1565, "total_steps": 3591, "loss": 0.2602, "lr": 2.7790449956126317e-05, "epoch": 3.050682261208577, "percentage": 43.58, "elapsed_time": "3:32:44", "remaining_time": "4:35:24"}
|
||||
{"current_steps": 1570, "total_steps": 3591, "loss": 0.2473, "lr": 2.7700805161048534e-05, "epoch": 3.060428849902534, "percentage": 43.72, "elapsed_time": "3:33:29", "remaining_time": "4:34:49"}
|
||||
{"current_steps": 1575, "total_steps": 3591, "loss": 0.2875, "lr": 2.7610978353511352e-05, "epoch": 3.0701754385964914, "percentage": 43.86, "elapsed_time": "3:34:13", "remaining_time": "4:34:12"}
|
||||
{"current_steps": 1580, "total_steps": 3591, "loss": 0.2763, "lr": 2.7520971656617186e-05, "epoch": 3.0799220272904484, "percentage": 44.0, "elapsed_time": "3:35:00", "remaining_time": "4:33:39"}
|
||||
{"current_steps": 1585, "total_steps": 3591, "loss": 0.2388, "lr": 2.7430787197720258e-05, "epoch": 3.0896686159844053, "percentage": 44.14, "elapsed_time": "3:35:38", "remaining_time": "4:32:55"}
|
||||
{"current_steps": 1590, "total_steps": 3591, "loss": 0.2129, "lr": 2.734042710837626e-05, "epoch": 3.0994152046783627, "percentage": 44.28, "elapsed_time": "3:36:11", "remaining_time": "4:32:04"}
|
||||
{"current_steps": 1595, "total_steps": 3591, "loss": 0.2419, "lr": 2.7249893524292024e-05, "epoch": 3.1091617933723197, "percentage": 44.42, "elapsed_time": "3:36:44", "remaining_time": "4:31:13"}
|
||||
{"current_steps": 1600, "total_steps": 3591, "loss": 0.2107, "lr": 2.7159188585274992e-05, "epoch": 3.1189083820662766, "percentage": 44.56, "elapsed_time": "3:37:22", "remaining_time": "4:30:29"}
|
||||
{"current_steps": 1605, "total_steps": 3591, "loss": 0.2228, "lr": 2.7068314435182694e-05, "epoch": 3.128654970760234, "percentage": 44.7, "elapsed_time": "3:38:08", "remaining_time": "4:29:55"}
|
||||
{"current_steps": 1610, "total_steps": 3591, "loss": 0.2442, "lr": 2.6977273221872023e-05, "epoch": 3.138401559454191, "percentage": 44.83, "elapsed_time": "3:38:50", "remaining_time": "4:29:15"}
|
||||
{"current_steps": 1615, "total_steps": 3591, "loss": 0.2463, "lr": 2.6886067097148526e-05, "epoch": 3.148148148148148, "percentage": 44.97, "elapsed_time": "3:39:14", "remaining_time": "4:28:14"}
|
||||
{"current_steps": 1620, "total_steps": 3591, "loss": 0.2487, "lr": 2.6794698216715484e-05, "epoch": 3.1578947368421053, "percentage": 45.11, "elapsed_time": "3:39:57", "remaining_time": "4:27:37"}
|
||||
{"current_steps": 1625, "total_steps": 3591, "loss": 0.2253, "lr": 2.670316874012302e-05, "epoch": 3.1676413255360623, "percentage": 45.25, "elapsed_time": "3:40:36", "remaining_time": "4:26:53"}
|
||||
{"current_steps": 1630, "total_steps": 3591, "loss": 0.2639, "lr": 2.6611480830717012e-05, "epoch": 3.1773879142300196, "percentage": 45.39, "elapsed_time": "3:41:11", "remaining_time": "4:26:06"}
|
||||
{"current_steps": 1635, "total_steps": 3591, "loss": 0.2716, "lr": 2.6519636655587988e-05, "epoch": 3.1871345029239766, "percentage": 45.53, "elapsed_time": "3:41:49", "remaining_time": "4:25:22"}
|
||||
{"current_steps": 1640, "total_steps": 3591, "loss": 0.3046, "lr": 2.642763838551988e-05, "epoch": 3.1968810916179335, "percentage": 45.67, "elapsed_time": "3:42:37", "remaining_time": "4:24:50"}
|
||||
{"current_steps": 1645, "total_steps": 3591, "loss": 0.2127, "lr": 2.6335488194938757e-05, "epoch": 3.206627680311891, "percentage": 45.81, "elapsed_time": "3:43:16", "remaining_time": "4:24:07"}
|
||||
{"current_steps": 1650, "total_steps": 3591, "loss": 0.2928, "lr": 2.624318826186139e-05, "epoch": 3.216374269005848, "percentage": 45.95, "elapsed_time": "3:43:58", "remaining_time": "4:23:28"}
|
||||
{"current_steps": 1655, "total_steps": 3591, "loss": 0.2494, "lr": 2.61507407678438e-05, "epoch": 3.2261208576998053, "percentage": 46.09, "elapsed_time": "3:44:38", "remaining_time": "4:22:46"}
|
||||
{"current_steps": 1660, "total_steps": 3591, "loss": 0.2099, "lr": 2.6058147897929665e-05, "epoch": 3.2358674463937622, "percentage": 46.23, "elapsed_time": "3:45:24", "remaining_time": "4:22:12"}
|
||||
{"current_steps": 1665, "total_steps": 3591, "loss": 0.2502, "lr": 2.5965411840598726e-05, "epoch": 3.245614035087719, "percentage": 46.37, "elapsed_time": "3:45:58", "remaining_time": "4:21:23"}
|
||||
{"current_steps": 1670, "total_steps": 3591, "loss": 0.2349, "lr": 2.5872534787715013e-05, "epoch": 3.2553606237816766, "percentage": 46.51, "elapsed_time": "3:46:40", "remaining_time": "4:20:44"}
|
||||
{"current_steps": 1675, "total_steps": 3591, "loss": 0.2563, "lr": 2.5779518934475064e-05, "epoch": 3.2651072124756335, "percentage": 46.64, "elapsed_time": "3:47:32", "remaining_time": "4:20:16"}
|
||||
{"current_steps": 1680, "total_steps": 3591, "loss": 0.2303, "lr": 2.5686366479356032e-05, "epoch": 3.2748538011695905, "percentage": 46.78, "elapsed_time": "3:48:20", "remaining_time": "4:19:44"}
|
||||
{"current_steps": 1685, "total_steps": 3591, "loss": 0.2363, "lr": 2.559307962406372e-05, "epoch": 3.284600389863548, "percentage": 46.92, "elapsed_time": "3:49:02", "remaining_time": "4:19:04"}
|
||||
{"current_steps": 1690, "total_steps": 3591, "loss": 0.2256, "lr": 2.5499660573480567e-05, "epoch": 3.294346978557505, "percentage": 47.06, "elapsed_time": "3:49:50", "remaining_time": "4:18:31"}
|
||||
{"current_steps": 1695, "total_steps": 3591, "loss": 0.22, "lr": 2.540611153561349e-05, "epoch": 3.3040935672514617, "percentage": 47.2, "elapsed_time": "3:50:28", "remaining_time": "4:17:48"}
|
||||
{"current_steps": 1700, "total_steps": 3591, "loss": 0.2376, "lr": 2.5312434721541738e-05, "epoch": 3.313840155945419, "percentage": 47.34, "elapsed_time": "3:51:13", "remaining_time": "4:17:11"}
|
||||
{"current_steps": 1705, "total_steps": 3591, "loss": 0.251, "lr": 2.5218632345364613e-05, "epoch": 3.323586744639376, "percentage": 47.48, "elapsed_time": "3:51:40", "remaining_time": "4:16:16"}
|
||||
{"current_steps": 1710, "total_steps": 3591, "loss": 0.214, "lr": 2.512470662414913e-05, "epoch": 3.3333333333333335, "percentage": 47.62, "elapsed_time": "3:52:21", "remaining_time": "4:15:35"}
|
||||
{"current_steps": 1715, "total_steps": 3591, "loss": 0.2679, "lr": 2.503065977787765e-05, "epoch": 3.3430799220272904, "percentage": 47.76, "elapsed_time": "3:52:52", "remaining_time": "4:14:44"}
|
||||
{"current_steps": 1720, "total_steps": 3591, "loss": 0.2523, "lr": 2.493649402939536e-05, "epoch": 3.3528265107212474, "percentage": 47.9, "elapsed_time": "3:53:21", "remaining_time": "4:13:50"}
|
||||
{"current_steps": 1725, "total_steps": 3591, "loss": 0.2444, "lr": 2.484221160435779e-05, "epoch": 3.3625730994152048, "percentage": 48.04, "elapsed_time": "3:54:11", "remaining_time": "4:13:20"}
|
||||
{"current_steps": 1730, "total_steps": 3591, "loss": 0.2378, "lr": 2.4747814731178144e-05, "epoch": 3.3723196881091617, "percentage": 48.18, "elapsed_time": "3:54:50", "remaining_time": "4:12:37"}
|
||||
{"current_steps": 1735, "total_steps": 3591, "loss": 0.1995, "lr": 2.4653305640974714e-05, "epoch": 3.382066276803119, "percentage": 48.32, "elapsed_time": "3:55:35", "remaining_time": "4:12:01"}
|
||||
{"current_steps": 1740, "total_steps": 3591, "loss": 0.228, "lr": 2.4558686567518045e-05, "epoch": 3.391812865497076, "percentage": 48.45, "elapsed_time": "3:56:10", "remaining_time": "4:11:14"}
|
||||
{"current_steps": 1745, "total_steps": 3591, "loss": 0.267, "lr": 2.4463959747178235e-05, "epoch": 3.401559454191033, "percentage": 48.59, "elapsed_time": "3:56:50", "remaining_time": "4:10:33"}
|
||||
{"current_steps": 1750, "total_steps": 3591, "loss": 0.2259, "lr": 2.4369127418872006e-05, "epoch": 3.4113060428849904, "percentage": 48.73, "elapsed_time": "3:57:30", "remaining_time": "4:09:51"}
|
||||
{"current_steps": 1755, "total_steps": 3591, "loss": 0.229, "lr": 2.4274191824009844e-05, "epoch": 3.4210526315789473, "percentage": 48.87, "elapsed_time": "3:58:13", "remaining_time": "4:09:13"}
|
||||
{"current_steps": 1760, "total_steps": 3591, "loss": 0.266, "lr": 2.4179155206442957e-05, "epoch": 3.4307992202729043, "percentage": 49.01, "elapsed_time": "3:58:58", "remaining_time": "4:08:36"}
|
||||
{"current_steps": 1765, "total_steps": 3591, "loss": 0.2133, "lr": 2.4084019812410303e-05, "epoch": 3.4405458089668617, "percentage": 49.15, "elapsed_time": "3:59:33", "remaining_time": "4:07:50"}
|
||||
{"current_steps": 1770, "total_steps": 3591, "loss": 0.2424, "lr": 2.3988787890485464e-05, "epoch": 3.4502923976608186, "percentage": 49.29, "elapsed_time": "4:00:14", "remaining_time": "4:07:09"}
|
||||
{"current_steps": 1775, "total_steps": 3591, "loss": 0.2489, "lr": 2.3893461691523516e-05, "epoch": 3.4600389863547756, "percentage": 49.43, "elapsed_time": "4:00:55", "remaining_time": "4:06:29"}
|
||||
{"current_steps": 1780, "total_steps": 3591, "loss": 0.2596, "lr": 2.3798043468607794e-05, "epoch": 3.469785575048733, "percentage": 49.57, "elapsed_time": "4:01:40", "remaining_time": "4:05:53"}
|
||||
{"current_steps": 1785, "total_steps": 3591, "loss": 0.2508, "lr": 2.3702535476996702e-05, "epoch": 3.47953216374269, "percentage": 49.71, "elapsed_time": "4:02:14", "remaining_time": "4:05:05"}
|
||||
{"current_steps": 1790, "total_steps": 3591, "loss": 0.2731, "lr": 2.3606939974070348e-05, "epoch": 3.4892787524366473, "percentage": 49.85, "elapsed_time": "4:02:52", "remaining_time": "4:04:22"}
|
||||
{"current_steps": 1795, "total_steps": 3591, "loss": 0.2657, "lr": 2.3511259219277224e-05, "epoch": 3.4990253411306043, "percentage": 49.99, "elapsed_time": "4:03:33", "remaining_time": "4:03:42"}
|
||||
{"current_steps": 1800, "total_steps": 3591, "loss": 0.2328, "lr": 2.3415495474080786e-05, "epoch": 3.5087719298245617, "percentage": 50.13, "elapsed_time": "4:04:09", "remaining_time": "4:02:55"}
|
||||
{"current_steps": 1805, "total_steps": 3591, "loss": 0.2471, "lr": 2.331965100190603e-05, "epoch": 3.5185185185185186, "percentage": 50.26, "elapsed_time": "4:04:47", "remaining_time": "4:02:12"}
|
||||
{"current_steps": 1810, "total_steps": 3591, "loss": 0.2134, "lr": 2.3223728068085944e-05, "epoch": 3.5282651072124755, "percentage": 50.4, "elapsed_time": "4:05:30", "remaining_time": "4:01:34"}
|
||||
{"current_steps": 1815, "total_steps": 3591, "loss": 0.206, "lr": 2.312772893980803e-05, "epoch": 3.538011695906433, "percentage": 50.54, "elapsed_time": "4:06:13", "remaining_time": "4:00:55"}
|
||||
{"current_steps": 1820, "total_steps": 3591, "loss": 0.2142, "lr": 2.3031655886060663e-05, "epoch": 3.54775828460039, "percentage": 50.68, "elapsed_time": "4:06:54", "remaining_time": "4:00:15"}
|
||||
{"current_steps": 1825, "total_steps": 3591, "loss": 0.2552, "lr": 2.2935511177579493e-05, "epoch": 3.557504873294347, "percentage": 50.82, "elapsed_time": "4:07:33", "remaining_time": "3:59:33"}
|
||||
{"current_steps": 1830, "total_steps": 3591, "loss": 0.2373, "lr": 2.2839297086793754e-05, "epoch": 3.5672514619883042, "percentage": 50.96, "elapsed_time": "4:08:25", "remaining_time": "3:59:03"}
|
||||
{"current_steps": 1835, "total_steps": 3591, "loss": 0.2322, "lr": 2.2743015887772596e-05, "epoch": 3.576998050682261, "percentage": 51.1, "elapsed_time": "4:09:11", "remaining_time": "3:58:28"}
|
||||
{"current_steps": 1840, "total_steps": 3591, "loss": 0.255, "lr": 2.2646669856171264e-05, "epoch": 3.586744639376218, "percentage": 51.24, "elapsed_time": "4:10:00", "remaining_time": "3:57:54"}
|
||||
{"current_steps": 1845, "total_steps": 3591, "loss": 0.2449, "lr": 2.255026126917739e-05, "epoch": 3.5964912280701755, "percentage": 51.38, "elapsed_time": "4:10:41", "remaining_time": "3:57:14"}
|
||||
{"current_steps": 1850, "total_steps": 3591, "loss": 0.2087, "lr": 2.2453792405457118e-05, "epoch": 3.6062378167641325, "percentage": 51.52, "elapsed_time": "4:11:31", "remaining_time": "3:56:42"}
|
||||
{"current_steps": 1855, "total_steps": 3591, "loss": 0.2451, "lr": 2.235726554510127e-05, "epoch": 3.6159844054580894, "percentage": 51.66, "elapsed_time": "4:12:06", "remaining_time": "3:55:56"}
|
||||
{"current_steps": 1860, "total_steps": 3591, "loss": 0.2721, "lr": 2.2260682969571438e-05, "epoch": 3.625730994152047, "percentage": 51.8, "elapsed_time": "4:12:40", "remaining_time": "3:55:08"}
|
||||
{"current_steps": 1865, "total_steps": 3591, "loss": 0.2432, "lr": 2.2164046961646086e-05, "epoch": 3.6354775828460038, "percentage": 51.94, "elapsed_time": "4:13:13", "remaining_time": "3:54:21"}
|
||||
{"current_steps": 1870, "total_steps": 3591, "loss": 0.2529, "lr": 2.2067359805366562e-05, "epoch": 3.645224171539961, "percentage": 52.07, "elapsed_time": "4:13:57", "remaining_time": "3:53:43"}
|
||||
{"current_steps": 1875, "total_steps": 3591, "loss": 0.2576, "lr": 2.197062378598314e-05, "epoch": 3.654970760233918, "percentage": 52.21, "elapsed_time": "4:14:37", "remaining_time": "3:53:02"}
|
||||
{"current_steps": 1880, "total_steps": 3591, "loss": 0.2375, "lr": 2.187384118990101e-05, "epoch": 3.6647173489278755, "percentage": 52.35, "elapsed_time": "4:15:19", "remaining_time": "3:52:22"}
|
||||
{"current_steps": 1885, "total_steps": 3591, "loss": 0.2894, "lr": 2.1777014304626197e-05, "epoch": 3.6744639376218324, "percentage": 52.49, "elapsed_time": "4:15:59", "remaining_time": "3:51:40"}
|
||||
{"current_steps": 1890, "total_steps": 3591, "loss": 0.244, "lr": 2.168014541871156e-05, "epoch": 3.6842105263157894, "percentage": 52.63, "elapsed_time": "4:16:46", "remaining_time": "3:51:06"}
|
||||
{"current_steps": 1895, "total_steps": 3591, "loss": 0.2126, "lr": 2.1583236821702632e-05, "epoch": 3.6939571150097468, "percentage": 52.77, "elapsed_time": "4:17:20", "remaining_time": "3:50:18"}
|
||||
{"current_steps": 1900, "total_steps": 3591, "loss": 0.2223, "lr": 2.1486290804083563e-05, "epoch": 3.7037037037037037, "percentage": 52.91, "elapsed_time": "4:18:10", "remaining_time": "3:49:46"}
|
||||
{"current_steps": 1905, "total_steps": 3591, "loss": 0.2528, "lr": 2.1389309657222937e-05, "epoch": 3.7134502923976607, "percentage": 53.05, "elapsed_time": "4:18:52", "remaining_time": "3:49:06"}
|
||||
{"current_steps": 1910, "total_steps": 3591, "loss": 0.2546, "lr": 2.1292295673319655e-05, "epoch": 3.723196881091618, "percentage": 53.19, "elapsed_time": "4:19:30", "remaining_time": "3:48:23"}
|
||||
{"current_steps": 1915, "total_steps": 3591, "loss": 0.2297, "lr": 2.119525114534873e-05, "epoch": 3.732943469785575, "percentage": 53.33, "elapsed_time": "4:20:10", "remaining_time": "3:47:42"}
|
||||
{"current_steps": 1920, "total_steps": 3591, "loss": 0.2261, "lr": 2.1098178367007084e-05, "epoch": 3.742690058479532, "percentage": 53.47, "elapsed_time": "4:20:42", "remaining_time": "3:46:53"}
|
||||
{"current_steps": 1925, "total_steps": 3591, "loss": 0.2421, "lr": 2.100107963265938e-05, "epoch": 3.7524366471734893, "percentage": 53.61, "elapsed_time": "4:21:18", "remaining_time": "3:46:08"}
|
||||
{"current_steps": 1930, "total_steps": 3591, "loss": 0.2363, "lr": 2.0903957237283748e-05, "epoch": 3.7621832358674463, "percentage": 53.75, "elapsed_time": "4:21:57", "remaining_time": "3:45:26"}
|
||||
{"current_steps": 1935, "total_steps": 3591, "loss": 0.207, "lr": 2.0806813476417558e-05, "epoch": 3.7719298245614032, "percentage": 53.88, "elapsed_time": "4:22:33", "remaining_time": "3:44:41"}
|
||||
{"current_steps": 1940, "total_steps": 3591, "loss": 0.2634, "lr": 2.0709650646103162e-05, "epoch": 3.7816764132553606, "percentage": 54.02, "elapsed_time": "4:23:05", "remaining_time": "3:43:53"}
|
||||
{"current_steps": 1945, "total_steps": 3591, "loss": 0.238, "lr": 2.0612471042833643e-05, "epoch": 3.7914230019493176, "percentage": 54.16, "elapsed_time": "4:23:46", "remaining_time": "3:43:13"}
|
||||
{"current_steps": 1950, "total_steps": 3591, "loss": 0.2338, "lr": 2.05152769634985e-05, "epoch": 3.801169590643275, "percentage": 54.3, "elapsed_time": "4:24:33", "remaining_time": "3:42:38"}
|
||||
{"current_steps": 1955, "total_steps": 3591, "loss": 0.2434, "lr": 2.0418070705329402e-05, "epoch": 3.810916179337232, "percentage": 54.44, "elapsed_time": "4:25:13", "remaining_time": "3:41:56"}
|
||||
{"current_steps": 1960, "total_steps": 3591, "loss": 0.2589, "lr": 2.0320854565845857e-05, "epoch": 3.8206627680311893, "percentage": 54.58, "elapsed_time": "4:26:06", "remaining_time": "3:41:26"}
|
||||
{"current_steps": 1965, "total_steps": 3591, "loss": 0.2456, "lr": 2.0223630842800933e-05, "epoch": 3.8304093567251463, "percentage": 54.72, "elapsed_time": "4:26:38", "remaining_time": "3:40:38"}
|
||||
{"current_steps": 1970, "total_steps": 3591, "loss": 0.2002, "lr": 2.012640183412692e-05, "epoch": 3.840155945419103, "percentage": 54.86, "elapsed_time": "4:27:12", "remaining_time": "3:39:52"}
|
||||
{"current_steps": 1975, "total_steps": 3591, "loss": 0.2556, "lr": 2.0029169837881068e-05, "epoch": 3.8499025341130606, "percentage": 55.0, "elapsed_time": "4:28:02", "remaining_time": "3:39:19"}
|
||||
{"current_steps": 1980, "total_steps": 3591, "loss": 0.227, "lr": 1.9931937152191206e-05, "epoch": 3.8596491228070176, "percentage": 55.14, "elapsed_time": "4:28:44", "remaining_time": "3:38:39"}
|
||||
{"current_steps": 1985, "total_steps": 3591, "loss": 0.2751, "lr": 1.9834706075201487e-05, "epoch": 3.8693957115009745, "percentage": 55.28, "elapsed_time": "4:29:25", "remaining_time": "3:37:59"}
|
||||
{"current_steps": 1990, "total_steps": 3591, "loss": 0.2592, "lr": 1.9737478905018023e-05, "epoch": 3.879142300194932, "percentage": 55.42, "elapsed_time": "4:30:02", "remaining_time": "3:37:15"}
|
||||
{"current_steps": 1995, "total_steps": 3591, "loss": 0.261, "lr": 1.9640257939654592e-05, "epoch": 3.888888888888889, "percentage": 55.56, "elapsed_time": "4:30:47", "remaining_time": "3:36:37"}
|
||||
{"current_steps": 2000, "total_steps": 3591, "loss": 0.214, "lr": 1.9543045476978322e-05, "epoch": 3.898635477582846, "percentage": 55.69, "elapsed_time": "4:31:25", "remaining_time": "3:35:55"}
|
||||
{"current_steps": 2005, "total_steps": 3591, "loss": 0.236, "lr": 1.9445843814655367e-05, "epoch": 3.908382066276803, "percentage": 55.83, "elapsed_time": "4:31:59", "remaining_time": "3:35:09"}
|
||||
{"current_steps": 2010, "total_steps": 3591, "loss": 0.24, "lr": 1.9348655250096622e-05, "epoch": 3.91812865497076, "percentage": 55.97, "elapsed_time": "4:32:33", "remaining_time": "3:34:23"}
|
||||
{"current_steps": 2015, "total_steps": 3591, "loss": 0.2758, "lr": 1.925148208040339e-05, "epoch": 3.927875243664717, "percentage": 56.11, "elapsed_time": "4:33:10", "remaining_time": "3:33:39"}
|
||||
{"current_steps": 2020, "total_steps": 3591, "loss": 0.2129, "lr": 1.915432660231314e-05, "epoch": 3.9376218323586745, "percentage": 56.25, "elapsed_time": "4:33:47", "remaining_time": "3:32:56"}
|
||||
{"current_steps": 2025, "total_steps": 3591, "loss": 0.2234, "lr": 1.9057191112145156e-05, "epoch": 3.9473684210526314, "percentage": 56.39, "elapsed_time": "4:34:24", "remaining_time": "3:32:12"}
|
||||
{"current_steps": 2030, "total_steps": 3591, "loss": 0.2235, "lr": 1.896007790574631e-05, "epoch": 3.957115009746589, "percentage": 56.53, "elapsed_time": "4:35:00", "remaining_time": "3:31:28"}
|
||||
{"current_steps": 2035, "total_steps": 3591, "loss": 0.2396, "lr": 1.8862989278436794e-05, "epoch": 3.9668615984405458, "percentage": 56.67, "elapsed_time": "4:35:44", "remaining_time": "3:30:50"}
|
||||
{"current_steps": 2040, "total_steps": 3591, "loss": 0.2563, "lr": 1.8765927524955855e-05, "epoch": 3.976608187134503, "percentage": 56.81, "elapsed_time": "4:36:20", "remaining_time": "3:30:05"}
|
||||
{"current_steps": 2045, "total_steps": 3591, "loss": 0.2665, "lr": 1.8668894939407566e-05, "epoch": 3.98635477582846, "percentage": 56.95, "elapsed_time": "4:36:54", "remaining_time": "3:29:20"}
|
||||
{"current_steps": 2050, "total_steps": 3591, "loss": 0.2444, "lr": 1.8571893815206594e-05, "epoch": 3.996101364522417, "percentage": 57.09, "elapsed_time": "4:37:36", "remaining_time": "3:28:40"}
|
||||
{"current_steps": 2055, "total_steps": 3591, "loss": 0.2277, "lr": 1.8474926445024012e-05, "epoch": 4.005847953216374, "percentage": 57.23, "elapsed_time": "4:38:17", "remaining_time": "3:28:00"}
|
||||
{"current_steps": 2060, "total_steps": 3591, "loss": 0.2421, "lr": 1.837799512073308e-05, "epoch": 4.015594541910332, "percentage": 57.37, "elapsed_time": "4:38:59", "remaining_time": "3:27:20"}
|
||||
{"current_steps": 2065, "total_steps": 3591, "loss": 0.2314, "lr": 1.8281102133355125e-05, "epoch": 4.025341130604288, "percentage": 57.5, "elapsed_time": "4:39:33", "remaining_time": "3:26:35"}
|
||||
{"current_steps": 2070, "total_steps": 3591, "loss": 0.2033, "lr": 1.818424977300533e-05, "epoch": 4.035087719298246, "percentage": 57.64, "elapsed_time": "4:40:06", "remaining_time": "3:25:48"}
|
||||
{"current_steps": 2075, "total_steps": 3591, "loss": 0.2212, "lr": 1.8087440328838663e-05, "epoch": 4.044834307992203, "percentage": 57.78, "elapsed_time": "4:40:44", "remaining_time": "3:25:06"}
|
||||
{"current_steps": 2080, "total_steps": 3591, "loss": 0.1796, "lr": 1.7990676088995724e-05, "epoch": 4.05458089668616, "percentage": 57.92, "elapsed_time": "4:41:18", "remaining_time": "3:24:21"}
|
||||
{"current_steps": 2085, "total_steps": 3591, "loss": 0.2476, "lr": 1.7893959340548705e-05, "epoch": 4.064327485380117, "percentage": 58.06, "elapsed_time": "4:41:57", "remaining_time": "3:23:39"}
|
||||
{"current_steps": 2090, "total_steps": 3591, "loss": 0.2103, "lr": 1.7797292369447302e-05, "epoch": 4.074074074074074, "percentage": 58.2, "elapsed_time": "4:42:40", "remaining_time": "3:23:00"}
|
||||
{"current_steps": 2095, "total_steps": 3591, "loss": 0.2279, "lr": 1.770067746046471e-05, "epoch": 4.083820662768031, "percentage": 58.34, "elapsed_time": "4:43:20", "remaining_time": "3:22:19"}
|
||||
{"current_steps": 2100, "total_steps": 3591, "loss": 0.1944, "lr": 1.760411689714359e-05, "epoch": 4.093567251461988, "percentage": 58.48, "elapsed_time": "4:43:56", "remaining_time": "3:21:36"}
|
||||
{"current_steps": 2105, "total_steps": 3591, "loss": 0.2178, "lr": 1.750761296174214e-05, "epoch": 4.103313840155946, "percentage": 58.62, "elapsed_time": "4:44:43", "remaining_time": "3:20:59"}
|
||||
{"current_steps": 2110, "total_steps": 3591, "loss": 0.1996, "lr": 1.74111679351801e-05, "epoch": 4.113060428849902, "percentage": 58.76, "elapsed_time": "4:45:12", "remaining_time": "3:20:10"}
|
||||
{"current_steps": 2115, "total_steps": 3591, "loss": 0.2438, "lr": 1.73147840969849e-05, "epoch": 4.12280701754386, "percentage": 58.9, "elapsed_time": "4:45:52", "remaining_time": "3:19:30"}
|
||||
{"current_steps": 2120, "total_steps": 3591, "loss": 0.1869, "lr": 1.721846372523773e-05, "epoch": 4.132553606237817, "percentage": 59.04, "elapsed_time": "4:46:27", "remaining_time": "3:18:45"}
|
||||
{"current_steps": 2125, "total_steps": 3591, "loss": 0.2236, "lr": 1.7122209096519728e-05, "epoch": 4.1423001949317735, "percentage": 59.18, "elapsed_time": "4:47:02", "remaining_time": "3:18:01"}
|
||||
{"current_steps": 2130, "total_steps": 3591, "loss": 0.2255, "lr": 1.702602248585815e-05, "epoch": 4.152046783625731, "percentage": 59.31, "elapsed_time": "4:47:50", "remaining_time": "3:17:25"}
|
||||
{"current_steps": 2135, "total_steps": 3591, "loss": 0.2326, "lr": 1.692990616667263e-05, "epoch": 4.161793372319688, "percentage": 59.45, "elapsed_time": "4:48:29", "remaining_time": "3:16:44"}
|
||||
{"current_steps": 2140, "total_steps": 3591, "loss": 0.2591, "lr": 1.683386241072141e-05, "epoch": 4.171539961013645, "percentage": 59.59, "elapsed_time": "4:49:16", "remaining_time": "3:16:08"}
|
||||
{"current_steps": 2145, "total_steps": 3591, "loss": 0.2176, "lr": 1.673789348804767e-05, "epoch": 4.181286549707602, "percentage": 59.73, "elapsed_time": "4:49:55", "remaining_time": "3:15:27"}
|
||||
{"current_steps": 2150, "total_steps": 3591, "loss": 0.1945, "lr": 1.6642001666925872e-05, "epoch": 4.1910331384015596, "percentage": 59.87, "elapsed_time": "4:50:26", "remaining_time": "3:14:39"}
|
||||
{"current_steps": 2155, "total_steps": 3591, "loss": 0.2224, "lr": 1.6546189213808132e-05, "epoch": 4.200779727095517, "percentage": 60.01, "elapsed_time": "4:51:06", "remaining_time": "3:13:58"}
|
||||
{"current_steps": 2160, "total_steps": 3591, "loss": 0.2053, "lr": 1.6450458393270668e-05, "epoch": 4.2105263157894735, "percentage": 60.15, "elapsed_time": "4:51:36", "remaining_time": "3:13:11"}
|
||||
{"current_steps": 2165, "total_steps": 3591, "loss": 0.1996, "lr": 1.635481146796028e-05, "epoch": 4.220272904483431, "percentage": 60.29, "elapsed_time": "4:52:15", "remaining_time": "3:12:29"}
|
||||
{"current_steps": 2170, "total_steps": 3591, "loss": 0.206, "lr": 1.625925069854084e-05, "epoch": 4.230019493177388, "percentage": 60.43, "elapsed_time": "4:52:45", "remaining_time": "3:11:42"}
|
||||
{"current_steps": 2175, "total_steps": 3591, "loss": 0.222, "lr": 1.6163778343639907e-05, "epoch": 4.239766081871345, "percentage": 60.57, "elapsed_time": "4:53:20", "remaining_time": "3:10:58"}
|
||||
{"current_steps": 2180, "total_steps": 3591, "loss": 0.238, "lr": 1.6068396659795297e-05, "epoch": 4.249512670565302, "percentage": 60.71, "elapsed_time": "4:54:02", "remaining_time": "3:10:19"}
|
||||
{"current_steps": 2185, "total_steps": 3591, "loss": 0.2248, "lr": 1.597310790140178e-05, "epoch": 4.2592592592592595, "percentage": 60.85, "elapsed_time": "4:54:41", "remaining_time": "3:09:37"}
|
||||
{"current_steps": 2190, "total_steps": 3591, "loss": 0.2135, "lr": 1.587791432065778e-05, "epoch": 4.269005847953216, "percentage": 60.99, "elapsed_time": "4:55:18", "remaining_time": "3:08:54"}
|
||||
{"current_steps": 2195, "total_steps": 3591, "loss": 0.2272, "lr": 1.5782818167512156e-05, "epoch": 4.278752436647173, "percentage": 61.13, "elapsed_time": "4:55:50", "remaining_time": "3:08:09"}
|
||||
{"current_steps": 2200, "total_steps": 3591, "loss": 0.236, "lr": 1.5687821689611007e-05, "epoch": 4.288499025341131, "percentage": 61.26, "elapsed_time": "4:56:35", "remaining_time": "3:07:31"}
|
||||
{"current_steps": 2205, "total_steps": 3591, "loss": 0.1978, "lr": 1.5592927132244566e-05, "epoch": 4.298245614035087, "percentage": 61.4, "elapsed_time": "4:57:12", "remaining_time": "3:06:48"}
|
||||
{"current_steps": 2210, "total_steps": 3591, "loss": 0.2372, "lr": 1.5498136738294112e-05, "epoch": 4.307992202729045, "percentage": 61.54, "elapsed_time": "4:57:44", "remaining_time": "3:06:03"}
|
||||
{"current_steps": 2215, "total_steps": 3591, "loss": 0.2189, "lr": 1.540345274817898e-05, "epoch": 4.317738791423002, "percentage": 61.68, "elapsed_time": "4:58:33", "remaining_time": "3:05:28"}
|
||||
{"current_steps": 2220, "total_steps": 3591, "loss": 0.1825, "lr": 1.5308877399803582e-05, "epoch": 4.3274853801169595, "percentage": 61.82, "elapsed_time": "4:59:14", "remaining_time": "3:04:48"}
|
||||
{"current_steps": 2225, "total_steps": 3591, "loss": 0.2305, "lr": 1.5214412928504544e-05, "epoch": 4.337231968810916, "percentage": 61.96, "elapsed_time": "4:59:55", "remaining_time": "3:04:08"}
|
||||
{"current_steps": 2230, "total_steps": 3591, "loss": 0.2143, "lr": 1.5120061566997839e-05, "epoch": 4.346978557504873, "percentage": 62.1, "elapsed_time": "5:00:35", "remaining_time": "3:03:27"}
|
||||
{"current_steps": 2235, "total_steps": 3591, "loss": 0.2449, "lr": 1.502582554532605e-05, "epoch": 4.356725146198831, "percentage": 62.24, "elapsed_time": "5:01:17", "remaining_time": "3:02:47"}
|
||||
{"current_steps": 2240, "total_steps": 3591, "loss": 0.201, "lr": 1.493170709080562e-05, "epoch": 4.366471734892787, "percentage": 62.38, "elapsed_time": "5:01:47", "remaining_time": "3:02:01"}
|
||||
{"current_steps": 2245, "total_steps": 3591, "loss": 0.2359, "lr": 1.483770842797426e-05, "epoch": 4.376218323586745, "percentage": 62.52, "elapsed_time": "5:02:29", "remaining_time": "3:01:21"}
|
||||
{"current_steps": 2250, "total_steps": 3591, "loss": 0.2273, "lr": 1.4743831778538322e-05, "epoch": 4.385964912280702, "percentage": 62.66, "elapsed_time": "5:03:04", "remaining_time": "3:00:38"}
|
||||
{"current_steps": 2255, "total_steps": 3591, "loss": 0.2141, "lr": 1.465007936132032e-05, "epoch": 4.395711500974659, "percentage": 62.8, "elapsed_time": "5:03:42", "remaining_time": "2:59:55"}
|
||||
{"current_steps": 2260, "total_steps": 3591, "loss": 0.2084, "lr": 1.4556453392206478e-05, "epoch": 4.405458089668616, "percentage": 62.94, "elapsed_time": "5:04:24", "remaining_time": "2:59:16"}
|
||||
{"current_steps": 2265, "total_steps": 3591, "loss": 0.2469, "lr": 1.4462956084094336e-05, "epoch": 4.415204678362573, "percentage": 63.07, "elapsed_time": "5:05:08", "remaining_time": "2:58:38"}
|
||||
{"current_steps": 2270, "total_steps": 3591, "loss": 0.2671, "lr": 1.4369589646840493e-05, "epoch": 4.42495126705653, "percentage": 63.21, "elapsed_time": "5:05:57", "remaining_time": "2:58:03"}
|
||||
{"current_steps": 2275, "total_steps": 3591, "loss": 0.2274, "lr": 1.4276356287208324e-05, "epoch": 4.434697855750487, "percentage": 63.35, "elapsed_time": "5:06:37", "remaining_time": "2:57:22"}
|
||||
{"current_steps": 2280, "total_steps": 3591, "loss": 0.2094, "lr": 1.4183258208815872e-05, "epoch": 4.444444444444445, "percentage": 63.49, "elapsed_time": "5:07:14", "remaining_time": "2:56:39"}
|
||||
{"current_steps": 2285, "total_steps": 3591, "loss": 0.2147, "lr": 1.4090297612083705e-05, "epoch": 4.454191033138401, "percentage": 63.63, "elapsed_time": "5:07:52", "remaining_time": "2:55:58"}
|
||||
{"current_steps": 2290, "total_steps": 3591, "loss": 0.1915, "lr": 1.399747669418298e-05, "epoch": 4.4639376218323585, "percentage": 63.77, "elapsed_time": "5:08:36", "remaining_time": "2:55:19"}
|
||||
{"current_steps": 2295, "total_steps": 3591, "loss": 0.2094, "lr": 1.3904797648983438e-05, "epoch": 4.473684210526316, "percentage": 63.91, "elapsed_time": "5:09:23", "remaining_time": "2:54:42"}
|
||||
{"current_steps": 2300, "total_steps": 3591, "loss": 0.2246, "lr": 1.3812262667001615e-05, "epoch": 4.483430799220272, "percentage": 64.05, "elapsed_time": "5:10:08", "remaining_time": "2:54:04"}
|
||||
{"current_steps": 2305, "total_steps": 3591, "loss": 0.2368, "lr": 1.3719873935349019e-05, "epoch": 4.49317738791423, "percentage": 64.19, "elapsed_time": "5:10:50", "remaining_time": "2:53:25"}
|
||||
{"current_steps": 2310, "total_steps": 3591, "loss": 0.2729, "lr": 1.3627633637680475e-05, "epoch": 4.502923976608187, "percentage": 64.33, "elapsed_time": "5:11:34", "remaining_time": "2:52:47"}
|
||||
{"current_steps": 2315, "total_steps": 3591, "loss": 0.2442, "lr": 1.3535543954142475e-05, "epoch": 4.512670565302145, "percentage": 64.47, "elapsed_time": "5:12:01", "remaining_time": "2:51:59"}
|
||||
{"current_steps": 2320, "total_steps": 3591, "loss": 0.2263, "lr": 1.3443607061321684e-05, "epoch": 4.522417153996101, "percentage": 64.61, "elapsed_time": "5:12:44", "remaining_time": "2:51:20"}
|
||||
{"current_steps": 2325, "total_steps": 3591, "loss": 0.2595, "lr": 1.3351825132193472e-05, "epoch": 4.5321637426900585, "percentage": 64.75, "elapsed_time": "5:13:24", "remaining_time": "2:50:39"}
|
||||
{"current_steps": 2330, "total_steps": 3591, "loss": 0.2088, "lr": 1.3260200336070575e-05, "epoch": 4.541910331384016, "percentage": 64.88, "elapsed_time": "5:14:07", "remaining_time": "2:50:00"}
|
||||
{"current_steps": 2335, "total_steps": 3591, "loss": 0.2052, "lr": 1.3168734838551789e-05, "epoch": 4.551656920077972, "percentage": 65.02, "elapsed_time": "5:14:48", "remaining_time": "2:49:19"}
|
||||
{"current_steps": 2340, "total_steps": 3591, "loss": 0.2338, "lr": 1.3077430801470833e-05, "epoch": 4.56140350877193, "percentage": 65.16, "elapsed_time": "5:15:33", "remaining_time": "2:48:42"}
|
||||
{"current_steps": 2345, "total_steps": 3591, "loss": 0.2348, "lr": 1.2986290382845203e-05, "epoch": 4.571150097465887, "percentage": 65.3, "elapsed_time": "5:16:15", "remaining_time": "2:48:02"}
|
||||
{"current_steps": 2350, "total_steps": 3591, "loss": 0.2468, "lr": 1.2895315736825205e-05, "epoch": 4.580896686159844, "percentage": 65.44, "elapsed_time": "5:16:49", "remaining_time": "2:47:18"}
|
||||
{"current_steps": 2355, "total_steps": 3591, "loss": 0.1994, "lr": 1.2804509013643011e-05, "epoch": 4.590643274853801, "percentage": 65.58, "elapsed_time": "5:17:23", "remaining_time": "2:46:34"}
|
||||
{"current_steps": 2360, "total_steps": 3591, "loss": 0.2645, "lr": 1.2713872359561868e-05, "epoch": 4.6003898635477585, "percentage": 65.72, "elapsed_time": "5:18:06", "remaining_time": "2:45:55"}
|
||||
{"current_steps": 2365, "total_steps": 3591, "loss": 0.2293, "lr": 1.2623407916825334e-05, "epoch": 4.610136452241715, "percentage": 65.86, "elapsed_time": "5:18:39", "remaining_time": "2:45:11"}
|
||||
{"current_steps": 2370, "total_steps": 3591, "loss": 0.2216, "lr": 1.253311782360668e-05, "epoch": 4.619883040935672, "percentage": 66.0, "elapsed_time": "5:19:17", "remaining_time": "2:44:29"}
|
||||
{"current_steps": 2375, "total_steps": 3591, "loss": 0.2379, "lr": 1.2443004213958327e-05, "epoch": 4.62962962962963, "percentage": 66.14, "elapsed_time": "5:20:00", "remaining_time": "2:43:50"}
|
||||
{"current_steps": 2380, "total_steps": 3591, "loss": 0.2647, "lr": 1.2353069217761426e-05, "epoch": 4.639376218323587, "percentage": 66.28, "elapsed_time": "5:20:47", "remaining_time": "2:43:13"}
|
||||
{"current_steps": 2385, "total_steps": 3591, "loss": 0.2341, "lr": 1.2263314960675494e-05, "epoch": 4.649122807017544, "percentage": 66.42, "elapsed_time": "5:21:14", "remaining_time": "2:42:26"}
|
||||
{"current_steps": 2390, "total_steps": 3591, "loss": 0.217, "lr": 1.2173743564088208e-05, "epoch": 4.658869395711501, "percentage": 66.56, "elapsed_time": "5:21:48", "remaining_time": "2:41:42"}
|
||||
{"current_steps": 2395, "total_steps": 3591, "loss": 0.2062, "lr": 1.2084357145065217e-05, "epoch": 4.668615984405458, "percentage": 66.69, "elapsed_time": "5:22:33", "remaining_time": "2:41:04"}
|
||||
{"current_steps": 2400, "total_steps": 3591, "loss": 0.2255, "lr": 1.1995157816300157e-05, "epoch": 4.678362573099415, "percentage": 66.83, "elapsed_time": "5:23:09", "remaining_time": "2:40:21"}
|
||||
{"current_steps": 2405, "total_steps": 3591, "loss": 0.211, "lr": 1.1906147686064666e-05, "epoch": 4.688109161793372, "percentage": 66.97, "elapsed_time": "5:23:57", "remaining_time": "2:39:45"}
|
||||
{"current_steps": 2410, "total_steps": 3591, "loss": 0.2405, "lr": 1.1817328858158606e-05, "epoch": 4.69785575048733, "percentage": 67.11, "elapsed_time": "5:24:45", "remaining_time": "2:39:08"}
|
||||
{"current_steps": 2415, "total_steps": 3591, "loss": 0.2451, "lr": 1.1728703431860278e-05, "epoch": 4.707602339181286, "percentage": 67.25, "elapsed_time": "5:25:19", "remaining_time": "2:38:25"}
|
||||
{"current_steps": 2420, "total_steps": 3591, "loss": 0.2256, "lr": 1.1640273501876871e-05, "epoch": 4.717348927875244, "percentage": 67.39, "elapsed_time": "5:25:56", "remaining_time": "2:37:43"}
|
||||
{"current_steps": 2425, "total_steps": 3591, "loss": 0.234, "lr": 1.1552041158294895e-05, "epoch": 4.727095516569201, "percentage": 67.53, "elapsed_time": "5:26:45", "remaining_time": "2:37:06"}
|
||||
{"current_steps": 2430, "total_steps": 3591, "loss": 0.2662, "lr": 1.1464008486530803e-05, "epoch": 4.7368421052631575, "percentage": 67.67, "elapsed_time": "5:27:29", "remaining_time": "2:36:28"}
|
||||
{"current_steps": 2435, "total_steps": 3591, "loss": 0.2264, "lr": 1.1376177567281733e-05, "epoch": 4.746588693957115, "percentage": 67.81, "elapsed_time": "5:28:13", "remaining_time": "2:35:49"}
|
||||
{"current_steps": 2440, "total_steps": 3591, "loss": 0.2005, "lr": 1.128855047647626e-05, "epoch": 4.756335282651072, "percentage": 67.95, "elapsed_time": "5:28:56", "remaining_time": "2:35:09"}
|
||||
{"current_steps": 2445, "total_steps": 3591, "loss": 0.2564, "lr": 1.12011292852254e-05, "epoch": 4.76608187134503, "percentage": 68.09, "elapsed_time": "5:29:45", "remaining_time": "2:34:33"}
|
||||
{"current_steps": 2450, "total_steps": 3591, "loss": 0.2003, "lr": 1.1113916059773607e-05, "epoch": 4.775828460038986, "percentage": 68.23, "elapsed_time": "5:30:28", "remaining_time": "2:33:54"}
|
||||
{"current_steps": 2455, "total_steps": 3591, "loss": 0.2335, "lr": 1.1026912861449972e-05, "epoch": 4.785575048732944, "percentage": 68.37, "elapsed_time": "5:31:05", "remaining_time": "2:33:12"}
|
||||
{"current_steps": 2460, "total_steps": 3591, "loss": 0.2408, "lr": 1.0940121746619478e-05, "epoch": 4.7953216374269, "percentage": 68.5, "elapsed_time": "5:31:49", "remaining_time": "2:32:33"}
|
||||
{"current_steps": 2465, "total_steps": 3591, "loss": 0.1861, "lr": 1.0853544766634403e-05, "epoch": 4.8050682261208575, "percentage": 68.64, "elapsed_time": "5:32:31", "remaining_time": "2:31:53"}
|
||||
{"current_steps": 2470, "total_steps": 3591, "loss": 0.227, "lr": 1.0767183967785864e-05, "epoch": 4.814814814814815, "percentage": 68.78, "elapsed_time": "5:33:15", "remaining_time": "2:31:14"}
|
||||
{"current_steps": 2475, "total_steps": 3591, "loss": 0.2281, "lr": 1.0681041391255396e-05, "epoch": 4.824561403508772, "percentage": 68.92, "elapsed_time": "5:33:49", "remaining_time": "2:30:31"}
|
||||
{"current_steps": 2480, "total_steps": 3591, "loss": 0.2347, "lr": 1.0595119073066756e-05, "epoch": 4.834307992202729, "percentage": 69.06, "elapsed_time": "5:34:31", "remaining_time": "2:29:51"}
|
||||
{"current_steps": 2485, "total_steps": 3591, "loss": 0.238, "lr": 1.0509419044037763e-05, "epoch": 4.844054580896686, "percentage": 69.2, "elapsed_time": "5:35:10", "remaining_time": "2:29:10"}
|
||||
{"current_steps": 2490, "total_steps": 3591, "loss": 0.2138, "lr": 1.0423943329732355e-05, "epoch": 4.853801169590644, "percentage": 69.34, "elapsed_time": "5:35:45", "remaining_time": "2:28:27"}
|
||||
{"current_steps": 2495, "total_steps": 3591, "loss": 0.2272, "lr": 1.0338693950412643e-05, "epoch": 4.8635477582846, "percentage": 69.48, "elapsed_time": "5:36:19", "remaining_time": "2:27:44"}
|
||||
{"current_steps": 2500, "total_steps": 3591, "loss": 0.257, "lr": 1.0253672920991207e-05, "epoch": 4.8732943469785575, "percentage": 69.62, "elapsed_time": "5:36:55", "remaining_time": "2:27:01"}
|
||||
{"current_steps": 2505, "total_steps": 3591, "loss": 0.2392, "lr": 1.0168882250983454e-05, "epoch": 4.883040935672515, "percentage": 69.76, "elapsed_time": "5:37:39", "remaining_time": "2:26:23"}
|
||||
{"current_steps": 2510, "total_steps": 3591, "loss": 0.2335, "lr": 1.0084323944460148e-05, "epoch": 4.892787524366471, "percentage": 69.9, "elapsed_time": "5:38:24", "remaining_time": "2:25:44"}
|
||||
{"current_steps": 2515, "total_steps": 3591, "loss": 0.2078, "lr": 1.0000000000000006e-05, "epoch": 4.902534113060429, "percentage": 70.04, "elapsed_time": "5:39:08", "remaining_time": "2:25:05"}
|
||||
{"current_steps": 2520, "total_steps": 3591, "loss": 0.2004, "lr": 9.915912410642478e-06, "epoch": 4.912280701754386, "percentage": 70.18, "elapsed_time": "5:39:45", "remaining_time": "2:24:23"}
|
||||
{"current_steps": 2525, "total_steps": 3591, "loss": 0.2063, "lr": 9.832063163840642e-06, "epoch": 4.922027290448343, "percentage": 70.31, "elapsed_time": "5:40:16", "remaining_time": "2:23:39"}
|
||||
{"current_steps": 2530, "total_steps": 3591, "loss": 0.199, "lr": 9.748454241414245e-06, "epoch": 4.9317738791423, "percentage": 70.45, "elapsed_time": "5:40:55", "remaining_time": "2:22:58"}
|
||||
{"current_steps": 2535, "total_steps": 3591, "loss": 0.1882, "lr": 9.665087619502824e-06, "epoch": 4.941520467836257, "percentage": 70.59, "elapsed_time": "5:41:39", "remaining_time": "2:22:19"}
|
||||
{"current_steps": 2540, "total_steps": 3591, "loss": 0.2268, "lr": 9.581965268519024e-06, "epoch": 4.951267056530215, "percentage": 70.73, "elapsed_time": "5:42:21", "remaining_time": "2:21:39"}
|
||||
{"current_steps": 2545, "total_steps": 3591, "loss": 0.2161, "lr": 9.499089153102022e-06, "epoch": 4.961013645224171, "percentage": 70.87, "elapsed_time": "5:42:56", "remaining_time": "2:20:56"}
|
||||
{"current_steps": 2550, "total_steps": 3591, "loss": 0.2226, "lr": 9.416461232071104e-06, "epoch": 4.970760233918129, "percentage": 71.01, "elapsed_time": "5:43:38", "remaining_time": "2:20:17"}
|
||||
{"current_steps": 2555, "total_steps": 3591, "loss": 0.2199, "lr": 9.33408345837934e-06, "epoch": 4.980506822612086, "percentage": 71.15, "elapsed_time": "5:44:14", "remaining_time": "2:19:35"}
|
||||
{"current_steps": 2560, "total_steps": 3591, "loss": 0.1924, "lr": 9.251957779067447e-06, "epoch": 4.990253411306043, "percentage": 71.29, "elapsed_time": "5:44:59", "remaining_time": "2:18:56"}
|
||||
{"current_steps": 2565, "total_steps": 3591, "loss": 0.2578, "lr": 9.17008613521775e-06, "epoch": 5.0, "percentage": 71.43, "elapsed_time": "5:45:58", "remaining_time": "2:18:23"}
|
||||
{"current_steps": 2570, "total_steps": 3591, "loss": 0.2267, "lr": 9.088470461908348e-06, "epoch": 5.009746588693957, "percentage": 71.57, "elapsed_time": "5:46:37", "remaining_time": "2:17:42"}
|
||||
{"current_steps": 2575, "total_steps": 3591, "loss": 0.1993, "lr": 9.007112688167312e-06, "epoch": 5.019493177387914, "percentage": 71.71, "elapsed_time": "5:47:14", "remaining_time": "2:17:00"}
|
||||
{"current_steps": 2580, "total_steps": 3591, "loss": 0.205, "lr": 8.926014736927142e-06, "epoch": 5.029239766081871, "percentage": 71.85, "elapsed_time": "5:47:59", "remaining_time": "2:16:21"}
|
||||
{"current_steps": 2585, "total_steps": 3591, "loss": 0.1822, "lr": 8.845178524979287e-06, "epoch": 5.038986354775829, "percentage": 71.99, "elapsed_time": "5:48:45", "remaining_time": "2:15:43"}
|
||||
{"current_steps": 2590, "total_steps": 3591, "loss": 0.1846, "lr": 8.764605962928878e-06, "epoch": 5.048732943469785, "percentage": 72.12, "elapsed_time": "5:49:30", "remaining_time": "2:15:04"}
|
||||
{"current_steps": 2595, "total_steps": 3591, "loss": 0.2055, "lr": 8.684298955149529e-06, "epoch": 5.058479532163743, "percentage": 72.26, "elapsed_time": "5:50:16", "remaining_time": "2:14:26"}
|
||||
{"current_steps": 2600, "total_steps": 3591, "loss": 0.2279, "lr": 8.604259399738342e-06, "epoch": 5.0682261208577, "percentage": 72.4, "elapsed_time": "5:51:02", "remaining_time": "2:13:48"}
|
||||
{"current_steps": 2605, "total_steps": 3591, "loss": 0.1953, "lr": 8.524489188471046e-06, "epoch": 5.077972709551657, "percentage": 72.54, "elapsed_time": "5:51:39", "remaining_time": "2:13:06"}
|
||||
{"current_steps": 2610, "total_steps": 3591, "loss": 0.1965, "lr": 8.444990206757299e-06, "epoch": 5.087719298245614, "percentage": 72.68, "elapsed_time": "5:52:25", "remaining_time": "2:12:27"}
|
||||
{"current_steps": 2615, "total_steps": 3591, "loss": 0.2122, "lr": 8.365764333596098e-06, "epoch": 5.097465886939571, "percentage": 72.82, "elapsed_time": "5:53:05", "remaining_time": "2:11:47"}
|
||||
{"current_steps": 2620, "total_steps": 3591, "loss": 0.2145, "lr": 8.286813441531374e-06, "epoch": 5.107212475633529, "percentage": 72.96, "elapsed_time": "5:53:36", "remaining_time": "2:11:02"}
|
||||
{"current_steps": 2625, "total_steps": 3591, "loss": 0.1903, "lr": 8.208139396607743e-06, "epoch": 5.116959064327485, "percentage": 73.1, "elapsed_time": "5:54:18", "remaining_time": "2:10:23"}
|
||||
{"current_steps": 2630, "total_steps": 3591, "loss": 0.2298, "lr": 8.129744058326416e-06, "epoch": 5.1267056530214425, "percentage": 73.24, "elapsed_time": "5:55:03", "remaining_time": "2:09:44"}
|
||||
{"current_steps": 2635, "total_steps": 3591, "loss": 0.2075, "lr": 8.051629279601207e-06, "epoch": 5.1364522417154, "percentage": 73.38, "elapsed_time": "5:55:43", "remaining_time": "2:09:03"}
|
||||
{"current_steps": 2640, "total_steps": 3591, "loss": 0.2166, "lr": 7.97379690671477e-06, "epoch": 5.146198830409356, "percentage": 73.52, "elapsed_time": "5:56:25", "remaining_time": "2:08:23"}
|
||||
{"current_steps": 2645, "total_steps": 3591, "loss": 0.1936, "lr": 7.896248779274954e-06, "epoch": 5.155945419103314, "percentage": 73.66, "elapsed_time": "5:57:06", "remaining_time": "2:07:43"}
|
||||
{"current_steps": 2650, "total_steps": 3591, "loss": 0.2048, "lr": 7.818986730171312e-06, "epoch": 5.165692007797271, "percentage": 73.8, "elapsed_time": "5:57:44", "remaining_time": "2:07:01"}
|
||||
{"current_steps": 2655, "total_steps": 3591, "loss": 0.2074, "lr": 7.742012585531816e-06, "epoch": 5.175438596491228, "percentage": 73.93, "elapsed_time": "5:58:25", "remaining_time": "2:06:21"}
|
||||
{"current_steps": 2660, "total_steps": 3591, "loss": 0.2089, "lr": 7.66532816467964e-06, "epoch": 5.185185185185185, "percentage": 74.07, "elapsed_time": "5:59:03", "remaining_time": "2:05:40"}
|
||||
{"current_steps": 2665, "total_steps": 3591, "loss": 0.2093, "lr": 7.5889352800902035e-06, "epoch": 5.1949317738791425, "percentage": 74.21, "elapsed_time": "5:59:39", "remaining_time": "2:04:58"}
|
||||
{"current_steps": 2670, "total_steps": 3591, "loss": 0.2323, "lr": 7.512835737348305e-06, "epoch": 5.204678362573099, "percentage": 74.35, "elapsed_time": "6:00:16", "remaining_time": "2:04:16"}
|
||||
{"current_steps": 2675, "total_steps": 3591, "loss": 0.2573, "lr": 7.437031335105484e-06, "epoch": 5.214424951267056, "percentage": 74.49, "elapsed_time": "6:00:55", "remaining_time": "2:03:35"}
|
||||
{"current_steps": 2680, "total_steps": 3591, "loss": 0.2067, "lr": 7.3615238650374586e-06, "epoch": 5.224171539961014, "percentage": 74.63, "elapsed_time": "6:01:39", "remaining_time": "2:02:56"}
|
||||
{"current_steps": 2685, "total_steps": 3591, "loss": 0.2082, "lr": 7.2863151118018136e-06, "epoch": 5.23391812865497, "percentage": 74.77, "elapsed_time": "6:02:25", "remaining_time": "2:02:17"}
|
||||
{"current_steps": 2690, "total_steps": 3591, "loss": 0.2113, "lr": 7.2114068529958016e-06, "epoch": 5.243664717348928, "percentage": 74.91, "elapsed_time": "6:03:06", "remaining_time": "2:01:37"}
|
||||
{"current_steps": 2695, "total_steps": 3591, "loss": 0.2111, "lr": 7.136800859114353e-06, "epoch": 5.253411306042885, "percentage": 75.05, "elapsed_time": "6:03:47", "remaining_time": "2:00:56"}
|
||||
{"current_steps": 2700, "total_steps": 3591, "loss": 0.2326, "lr": 7.0624988935081984e-06, "epoch": 5.2631578947368425, "percentage": 75.19, "elapsed_time": "6:04:30", "remaining_time": "2:00:17"}
|
||||
{"current_steps": 2705, "total_steps": 3591, "loss": 0.1945, "lr": 6.988502712342207e-06, "epoch": 5.272904483430799, "percentage": 75.33, "elapsed_time": "6:05:10", "remaining_time": "1:59:36"}
|
||||
{"current_steps": 2710, "total_steps": 3591, "loss": 0.206, "lr": 6.914814064553869e-06, "epoch": 5.282651072124756, "percentage": 75.47, "elapsed_time": "6:05:59", "remaining_time": "1:58:58"}
|
||||
{"current_steps": 2715, "total_steps": 3591, "loss": 0.2316, "lr": 6.841434691811992e-06, "epoch": 5.292397660818714, "percentage": 75.61, "elapsed_time": "6:06:44", "remaining_time": "1:58:19"}
|
||||
{"current_steps": 2720, "total_steps": 3591, "loss": 0.2424, "lr": 6.768366328475486e-06, "epoch": 5.30214424951267, "percentage": 75.74, "elapsed_time": "6:07:24", "remaining_time": "1:57:39"}
|
||||
{"current_steps": 2725, "total_steps": 3591, "loss": 0.1981, "lr": 6.695610701552404e-06, "epoch": 5.311890838206628, "percentage": 75.88, "elapsed_time": "6:08:10", "remaining_time": "1:57:00"}
|
||||
{"current_steps": 2730, "total_steps": 3591, "loss": 0.2126, "lr": 6.6231695306591015e-06, "epoch": 5.321637426900585, "percentage": 76.02, "elapsed_time": "6:08:51", "remaining_time": "1:56:20"}
|
||||
{"current_steps": 2735, "total_steps": 3591, "loss": 0.2002, "lr": 6.551044527979635e-06, "epoch": 5.331384015594542, "percentage": 76.16, "elapsed_time": "6:09:30", "remaining_time": "1:55:39"}
|
||||
{"current_steps": 2740, "total_steps": 3591, "loss": 0.2262, "lr": 6.4792373982252355e-06, "epoch": 5.341130604288499, "percentage": 76.3, "elapsed_time": "6:10:15", "remaining_time": "1:54:59"}
|
||||
{"current_steps": 2745, "total_steps": 3591, "loss": 0.1817, "lr": 6.407749838594055e-06, "epoch": 5.350877192982456, "percentage": 76.44, "elapsed_time": "6:10:51", "remaining_time": "1:54:17"}
|
||||
{"current_steps": 2750, "total_steps": 3591, "loss": 0.1935, "lr": 6.336583538731038e-06, "epoch": 5.360623781676413, "percentage": 76.58, "elapsed_time": "6:11:27", "remaining_time": "1:53:35"}
|
||||
{"current_steps": 2755, "total_steps": 3591, "loss": 0.2216, "lr": 6.265740180688009e-06, "epoch": 5.37037037037037, "percentage": 76.72, "elapsed_time": "6:12:12", "remaining_time": "1:52:56"}
|
||||
{"current_steps": 2760, "total_steps": 3591, "loss": 0.2179, "lr": 6.195221438883873e-06, "epoch": 5.380116959064328, "percentage": 76.86, "elapsed_time": "6:12:56", "remaining_time": "1:52:17"}
|
||||
{"current_steps": 2765, "total_steps": 3591, "loss": 0.235, "lr": 6.125028980065082e-06, "epoch": 5.389863547758285, "percentage": 77.0, "elapsed_time": "6:13:30", "remaining_time": "1:51:34"}
|
||||
{"current_steps": 2770, "total_steps": 3591, "loss": 0.1848, "lr": 6.055164463266203e-06, "epoch": 5.3996101364522415, "percentage": 77.14, "elapsed_time": "6:14:04", "remaining_time": "1:50:52"}
|
||||
{"current_steps": 2775, "total_steps": 3591, "loss": 0.222, "lr": 5.985629539770761e-06, "epoch": 5.409356725146199, "percentage": 77.28, "elapsed_time": "6:14:47", "remaining_time": "1:50:12"}
|
||||
{"current_steps": 2780, "total_steps": 3591, "loss": 0.1899, "lr": 5.916425853072139e-06, "epoch": 5.419103313840156, "percentage": 77.42, "elapsed_time": "6:15:31", "remaining_time": "1:49:32"}
|
||||
{"current_steps": 2785, "total_steps": 3591, "loss": 0.1976, "lr": 5.847555038834789e-06, "epoch": 5.428849902534113, "percentage": 77.55, "elapsed_time": "6:16:16", "remaining_time": "1:48:53"}
|
||||
{"current_steps": 2790, "total_steps": 3591, "loss": 0.1983, "lr": 5.77901872485554e-06, "epoch": 5.43859649122807, "percentage": 77.69, "elapsed_time": "6:17:01", "remaining_time": "1:48:14"}
|
||||
{"current_steps": 2795, "total_steps": 3591, "loss": 0.2379, "lr": 5.7108185310251594e-06, "epoch": 5.448343079922028, "percentage": 77.83, "elapsed_time": "6:17:39", "remaining_time": "1:47:33"}
|
||||
{"current_steps": 2800, "total_steps": 3591, "loss": 0.2249, "lr": 5.642956069290018e-06, "epoch": 5.458089668615984, "percentage": 77.97, "elapsed_time": "6:18:24", "remaining_time": "1:46:53"}
|
||||
{"current_steps": 2805, "total_steps": 3591, "loss": 0.1912, "lr": 5.57543294361403e-06, "epoch": 5.4678362573099415, "percentage": 78.11, "elapsed_time": "6:19:00", "remaining_time": "1:46:12"}
|
||||
{"current_steps": 2810, "total_steps": 3591, "loss": 0.242, "lr": 5.508250749940716e-06, "epoch": 5.477582846003899, "percentage": 78.25, "elapsed_time": "6:19:43", "remaining_time": "1:45:32"}
|
||||
{"current_steps": 2815, "total_steps": 3591, "loss": 0.2121, "lr": 5.441411076155516e-06, "epoch": 5.487329434697855, "percentage": 78.39, "elapsed_time": "6:20:24", "remaining_time": "1:44:51"}
|
||||
{"current_steps": 2820, "total_steps": 3591, "loss": 0.2224, "lr": 5.374915502048224e-06, "epoch": 5.497076023391813, "percentage": 78.53, "elapsed_time": "6:21:08", "remaining_time": "1:44:12"}
|
||||
{"current_steps": 2825, "total_steps": 3591, "loss": 0.2084, "lr": 5.30876559927566e-06, "epoch": 5.50682261208577, "percentage": 78.67, "elapsed_time": "6:21:50", "remaining_time": "1:43:32"}
|
||||
{"current_steps": 2830, "total_steps": 3591, "loss": 0.191, "lr": 5.242962931324527e-06, "epoch": 5.516569200779728, "percentage": 78.81, "elapsed_time": "6:22:25", "remaining_time": "1:42:50"}
|
||||
{"current_steps": 2835, "total_steps": 3591, "loss": 0.2241, "lr": 5.177509053474468e-06, "epoch": 5.526315789473684, "percentage": 78.95, "elapsed_time": "6:23:04", "remaining_time": "1:42:09"}
|
||||
{"current_steps": 2840, "total_steps": 3591, "loss": 0.2103, "lr": 5.1124055127612785e-06, "epoch": 5.5360623781676415, "percentage": 79.09, "elapsed_time": "6:23:41", "remaining_time": "1:41:27"}
|
||||
{"current_steps": 2845, "total_steps": 3591, "loss": 0.234, "lr": 5.047653847940363e-06, "epoch": 5.545808966861598, "percentage": 79.23, "elapsed_time": "6:24:19", "remaining_time": "1:40:46"}
|
||||
{"current_steps": 2850, "total_steps": 3591, "loss": 0.1777, "lr": 4.9832555894503555e-06, "epoch": 5.555555555555555, "percentage": 79.37, "elapsed_time": "6:25:00", "remaining_time": "1:40:06"}
|
||||
{"current_steps": 2855, "total_steps": 3591, "loss": 0.2324, "lr": 4.9192122593769686e-06, "epoch": 5.565302144249513, "percentage": 79.5, "elapsed_time": "6:25:31", "remaining_time": "1:39:23"}
|
||||
{"current_steps": 2860, "total_steps": 3591, "loss": 0.228, "lr": 4.855525371416978e-06, "epoch": 5.57504873294347, "percentage": 79.64, "elapsed_time": "6:26:09", "remaining_time": "1:38:42"}
|
||||
{"current_steps": 2865, "total_steps": 3591, "loss": 0.2081, "lr": 4.79219643084248e-06, "epoch": 5.584795321637427, "percentage": 79.78, "elapsed_time": "6:26:44", "remaining_time": "1:38:00"}
|
||||
{"current_steps": 2870, "total_steps": 3591, "loss": 0.2317, "lr": 4.7292269344652964e-06, "epoch": 5.594541910331384, "percentage": 79.92, "elapsed_time": "6:27:12", "remaining_time": "1:37:16"}
|
||||
{"current_steps": 2875, "total_steps": 3591, "loss": 0.2152, "lr": 4.6666183706016076e-06, "epoch": 5.604288499025341, "percentage": 80.06, "elapsed_time": "6:27:55", "remaining_time": "1:36:36"}
|
||||
{"current_steps": 2880, "total_steps": 3591, "loss": 0.218, "lr": 4.604372219036768e-06, "epoch": 5.614035087719298, "percentage": 80.2, "elapsed_time": "6:28:33", "remaining_time": "1:35:55"}
|
||||
{"current_steps": 2885, "total_steps": 3591, "loss": 0.1891, "lr": 4.542489950990325e-06, "epoch": 5.623781676413255, "percentage": 80.34, "elapsed_time": "6:29:07", "remaining_time": "1:35:13"}
|
||||
{"current_steps": 2890, "total_steps": 3591, "loss": 0.1978, "lr": 4.4809730290812705e-06, "epoch": 5.633528265107213, "percentage": 80.48, "elapsed_time": "6:29:44", "remaining_time": "1:34:32"}
|
||||
{"current_steps": 2895, "total_steps": 3591, "loss": 0.2298, "lr": 4.419822907293438e-06, "epoch": 5.643274853801169, "percentage": 80.62, "elapsed_time": "6:30:21", "remaining_time": "1:33:50"}
|
||||
{"current_steps": 2900, "total_steps": 3591, "loss": 0.2082, "lr": 4.35904103094116e-06, "epoch": 5.653021442495127, "percentage": 80.76, "elapsed_time": "6:31:02", "remaining_time": "1:33:10"}
|
||||
{"current_steps": 2905, "total_steps": 3591, "loss": 0.2276, "lr": 4.298628836635092e-06, "epoch": 5.662768031189084, "percentage": 80.9, "elapsed_time": "6:31:45", "remaining_time": "1:32:30"}
|
||||
{"current_steps": 2910, "total_steps": 3591, "loss": 0.2063, "lr": 4.238587752248284e-06, "epoch": 5.6725146198830405, "percentage": 81.04, "elapsed_time": "6:32:28", "remaining_time": "1:31:50"}
|
||||
{"current_steps": 2915, "total_steps": 3591, "loss": 0.1982, "lr": 4.178919196882398e-06, "epoch": 5.682261208576998, "percentage": 81.18, "elapsed_time": "6:33:06", "remaining_time": "1:31:09"}
|
||||
{"current_steps": 2920, "total_steps": 3591, "loss": 0.2318, "lr": 4.119624580834183e-06, "epoch": 5.692007797270955, "percentage": 81.31, "elapsed_time": "6:33:40", "remaining_time": "1:30:27"}
|
||||
{"current_steps": 2925, "total_steps": 3591, "loss": 0.2122, "lr": 4.060705305562143e-06, "epoch": 5.701754385964913, "percentage": 81.45, "elapsed_time": "6:34:16", "remaining_time": "1:29:46"}
|
||||
{"current_steps": 2930, "total_steps": 3591, "loss": 0.1845, "lr": 4.002162763653421e-06, "epoch": 5.711500974658869, "percentage": 81.59, "elapsed_time": "6:35:09", "remaining_time": "1:29:08"}
|
||||
{"current_steps": 2935, "total_steps": 3591, "loss": 0.2128, "lr": 3.943998338790864e-06, "epoch": 5.721247563352827, "percentage": 81.73, "elapsed_time": "6:35:58", "remaining_time": "1:28:30"}
|
||||
{"current_steps": 2940, "total_steps": 3591, "loss": 0.1976, "lr": 3.886213405720334e-06, "epoch": 5.730994152046784, "percentage": 81.87, "elapsed_time": "6:36:48", "remaining_time": "1:27:51"}
|
||||
{"current_steps": 2945, "total_steps": 3591, "loss": 0.1862, "lr": 3.828809330218199e-06, "epoch": 5.7407407407407405, "percentage": 82.01, "elapsed_time": "6:37:31", "remaining_time": "1:27:12"}
|
||||
{"current_steps": 2950, "total_steps": 3591, "loss": 0.2038, "lr": 3.7717874690590918e-06, "epoch": 5.750487329434698, "percentage": 82.15, "elapsed_time": "6:38:06", "remaining_time": "1:26:30"}
|
||||
{"current_steps": 2955, "total_steps": 3591, "loss": 0.22, "lr": 3.715149169983787e-06, "epoch": 5.760233918128655, "percentage": 82.29, "elapsed_time": "6:38:46", "remaining_time": "1:25:49"}
|
||||
{"current_steps": 2960, "total_steps": 3591, "loss": 0.1847, "lr": 3.6588957716673856e-06, "epoch": 5.769980506822612, "percentage": 82.43, "elapsed_time": "6:39:27", "remaining_time": "1:25:09"}
|
||||
{"current_steps": 2965, "total_steps": 3591, "loss": 0.2036, "lr": 3.603028603687655e-06, "epoch": 5.779727095516569, "percentage": 82.57, "elapsed_time": "6:40:07", "remaining_time": "1:24:28"}
|
||||
{"current_steps": 2970, "total_steps": 3591, "loss": 0.2092, "lr": 3.5475489864936275e-06, "epoch": 5.7894736842105265, "percentage": 82.71, "elapsed_time": "6:40:39", "remaining_time": "1:23:46"}
|
||||
{"current_steps": 2975, "total_steps": 3591, "loss": 0.1899, "lr": 3.4924582313743604e-06, "epoch": 5.799220272904483, "percentage": 82.85, "elapsed_time": "6:41:14", "remaining_time": "1:23:04"}
|
||||
{"current_steps": 2980, "total_steps": 3591, "loss": 0.181, "lr": 3.4377576404279587e-06, "epoch": 5.8089668615984404, "percentage": 82.99, "elapsed_time": "6:41:47", "remaining_time": "1:22:22"}
|
||||
{"current_steps": 2985, "total_steps": 3591, "loss": 0.2364, "lr": 3.3834485065307996e-06, "epoch": 5.818713450292398, "percentage": 83.12, "elapsed_time": "6:42:23", "remaining_time": "1:21:41"}
|
||||
{"current_steps": 2990, "total_steps": 3591, "loss": 0.2255, "lr": 3.3295321133069838e-06, "epoch": 5.828460038986355, "percentage": 83.26, "elapsed_time": "6:42:57", "remaining_time": "1:20:59"}
|
||||
{"current_steps": 2995, "total_steps": 3591, "loss": 0.1907, "lr": 3.2760097350979715e-06, "epoch": 5.838206627680312, "percentage": 83.4, "elapsed_time": "6:43:32", "remaining_time": "1:20:18"}
|
||||
{"current_steps": 3000, "total_steps": 3591, "loss": 0.2136, "lr": 3.2228826369324806e-06, "epoch": 5.847953216374269, "percentage": 83.54, "elapsed_time": "6:44:14", "remaining_time": "1:19:38"}
|
||||
{"current_steps": 3005, "total_steps": 3591, "loss": 0.2123, "lr": 3.170152074496582e-06, "epoch": 5.857699805068226, "percentage": 83.68, "elapsed_time": "6:45:33", "remaining_time": "1:19:05"}
|
||||
{"current_steps": 3010, "total_steps": 3591, "loss": 0.205, "lr": 3.117819294104032e-06, "epoch": 5.867446393762183, "percentage": 83.82, "elapsed_time": "6:46:10", "remaining_time": "1:18:24"}
|
||||
{"current_steps": 3015, "total_steps": 3591, "loss": 0.2092, "lr": 3.065885532666799e-06, "epoch": 5.87719298245614, "percentage": 83.96, "elapsed_time": "6:46:46", "remaining_time": "1:17:42"}
|
||||
{"current_steps": 3020, "total_steps": 3591, "loss": 0.2308, "lr": 3.014352017665829e-06, "epoch": 5.886939571150098, "percentage": 84.1, "elapsed_time": "6:47:25", "remaining_time": "1:17:01"}
|
||||
{"current_steps": 3025, "total_steps": 3591, "loss": 0.2262, "lr": 2.9632199671220417e-06, "epoch": 5.896686159844054, "percentage": 84.24, "elapsed_time": "6:48:05", "remaining_time": "1:16:21"}
|
||||
{"current_steps": 3030, "total_steps": 3591, "loss": 0.2145, "lr": 2.9124905895675537e-06, "epoch": 5.906432748538012, "percentage": 84.38, "elapsed_time": "6:48:44", "remaining_time": "1:15:40"}
|
||||
{"current_steps": 3035, "total_steps": 3591, "loss": 0.2228, "lr": 2.8621650840170832e-06, "epoch": 5.916179337231969, "percentage": 84.52, "elapsed_time": "6:49:24", "remaining_time": "1:15:00"}
|
||||
{"current_steps": 3040, "total_steps": 3591, "loss": 0.1726, "lr": 2.8122446399396295e-06, "epoch": 5.925925925925926, "percentage": 84.66, "elapsed_time": "6:50:02", "remaining_time": "1:14:19"}
|
||||
{"current_steps": 3045, "total_steps": 3591, "loss": 0.2113, "lr": 2.7627304372303676e-06, "epoch": 5.935672514619883, "percentage": 84.8, "elapsed_time": "6:50:46", "remaining_time": "1:13:39"}
|
||||
{"current_steps": 3050, "total_steps": 3591, "loss": 0.1938, "lr": 2.7136236461827327e-06, "epoch": 5.94541910331384, "percentage": 84.93, "elapsed_time": "6:51:29", "remaining_time": "1:12:59"}
|
||||
{"current_steps": 3055, "total_steps": 3591, "loss": 0.213, "lr": 2.6649254274608026e-06, "epoch": 5.955165692007797, "percentage": 85.07, "elapsed_time": "6:52:05", "remaining_time": "1:12:18"}
|
||||
{"current_steps": 3060, "total_steps": 3591, "loss": 0.1852, "lr": 2.6166369320718167e-06, "epoch": 5.964912280701754, "percentage": 85.21, "elapsed_time": "6:52:42", "remaining_time": "1:11:36"}
|
||||
{"current_steps": 3065, "total_steps": 3591, "loss": 0.2274, "lr": 2.568759301339008e-06, "epoch": 5.974658869395712, "percentage": 85.35, "elapsed_time": "6:53:13", "remaining_time": "1:10:54"}
|
||||
{"current_steps": 3070, "total_steps": 3591, "loss": 0.2289, "lr": 2.5212936668746e-06, "epoch": 5.984405458089668, "percentage": 85.49, "elapsed_time": "6:53:45", "remaining_time": "1:10:13"}
|
||||
{"current_steps": 3075, "total_steps": 3591, "loss": 0.1762, "lr": 2.474241150553094e-06, "epoch": 5.994152046783626, "percentage": 85.63, "elapsed_time": "6:54:24", "remaining_time": "1:09:32"}
|
||||
{"current_steps": 3080, "total_steps": 3591, "loss": 0.1901, "lr": 2.4276028644847172e-06, "epoch": 6.003898635477583, "percentage": 85.77, "elapsed_time": "6:54:57", "remaining_time": "1:08:50"}
|
||||
{"current_steps": 3085, "total_steps": 3591, "loss": 0.1976, "lr": 2.381379910989161e-06, "epoch": 6.01364522417154, "percentage": 85.91, "elapsed_time": "6:55:34", "remaining_time": "1:08:09"}
|
||||
{"current_steps": 3090, "total_steps": 3591, "loss": 0.1971, "lr": 2.3355733825695114e-06, "epoch": 6.023391812865497, "percentage": 86.05, "elapsed_time": "6:56:15", "remaining_time": "1:07:29"}
|
||||
{"current_steps": 3095, "total_steps": 3591, "loss": 0.1835, "lr": 2.2901843618864492e-06, "epoch": 6.033138401559454, "percentage": 86.19, "elapsed_time": "6:56:59", "remaining_time": "1:06:49"}
|
||||
{"current_steps": 3100, "total_steps": 3591, "loss": 0.1956, "lr": 2.245213921732632e-06, "epoch": 6.042884990253412, "percentage": 86.33, "elapsed_time": "6:57:43", "remaining_time": "1:06:09"}
|
||||
{"current_steps": 3105, "total_steps": 3591, "loss": 0.1788, "lr": 2.2006631250073627e-06, "epoch": 6.052631578947368, "percentage": 86.47, "elapsed_time": "6:58:20", "remaining_time": "1:05:28"}
|
||||
{"current_steps": 3110, "total_steps": 3591, "loss": 0.1907, "lr": 2.1565330246914516e-06, "epoch": 6.0623781676413255, "percentage": 86.61, "elapsed_time": "6:59:02", "remaining_time": "1:04:48"}
|
||||
{"current_steps": 3115, "total_steps": 3591, "loss": 0.2154, "lr": 2.1128246638223416e-06, "epoch": 6.072124756335283, "percentage": 86.74, "elapsed_time": "6:59:42", "remaining_time": "1:04:08"}
|
||||
{"current_steps": 3120, "total_steps": 3591, "loss": 0.2217, "lr": 2.0695390754694424e-06, "epoch": 6.081871345029239, "percentage": 86.88, "elapsed_time": "7:00:22", "remaining_time": "1:03:27"}
|
||||
{"current_steps": 3125, "total_steps": 3591, "loss": 0.2118, "lr": 2.0266772827097213e-06, "epoch": 6.091617933723197, "percentage": 87.02, "elapsed_time": "7:01:03", "remaining_time": "1:02:47"}
|
||||
{"current_steps": 3130, "total_steps": 3591, "loss": 0.2077, "lr": 1.9842402986035193e-06, "epoch": 6.101364522417154, "percentage": 87.16, "elapsed_time": "7:01:49", "remaining_time": "1:02:07"}
|
||||
{"current_steps": 3135, "total_steps": 3591, "loss": 0.2079, "lr": 1.942229126170614e-06, "epoch": 6.111111111111111, "percentage": 87.3, "elapsed_time": "7:02:35", "remaining_time": "1:01:28"}
|
||||
{"current_steps": 3140, "total_steps": 3591, "loss": 0.1849, "lr": 1.900644758366501e-06, "epoch": 6.120857699805068, "percentage": 87.44, "elapsed_time": "7:03:16", "remaining_time": "1:00:47"}
|
||||
{"current_steps": 3145, "total_steps": 3591, "loss": 0.2067, "lr": 1.8594881780589302e-06, "epoch": 6.1306042884990255, "percentage": 87.58, "elapsed_time": "7:03:53", "remaining_time": "1:00:06"}
|
||||
{"current_steps": 3150, "total_steps": 3591, "loss": 0.1705, "lr": 1.8187603580046765e-06, "epoch": 6.140350877192983, "percentage": 87.72, "elapsed_time": "7:04:31", "remaining_time": "0:59:25"}
|
||||
{"current_steps": 3155, "total_steps": 3591, "loss": 0.1979, "lr": 1.7784622608265567e-06, "epoch": 6.150097465886939, "percentage": 87.86, "elapsed_time": "7:05:09", "remaining_time": "0:58:45"}
|
||||
{"current_steps": 3160, "total_steps": 3591, "loss": 0.2075, "lr": 1.7385948389906526e-06, "epoch": 6.159844054580897, "percentage": 88.0, "elapsed_time": "7:05:48", "remaining_time": "0:58:04"}
|
||||
{"current_steps": 3165, "total_steps": 3591, "loss": 0.2506, "lr": 1.6991590347838238e-06, "epoch": 6.169590643274854, "percentage": 88.14, "elapsed_time": "7:06:33", "remaining_time": "0:57:24"}
|
||||
{"current_steps": 3170, "total_steps": 3591, "loss": 0.2183, "lr": 1.6601557802914237e-06, "epoch": 6.179337231968811, "percentage": 88.28, "elapsed_time": "7:07:11", "remaining_time": "0:56:44"}
|
||||
{"current_steps": 3175, "total_steps": 3591, "loss": 0.2132, "lr": 1.6215859973752811e-06, "epoch": 6.189083820662768, "percentage": 88.42, "elapsed_time": "7:07:48", "remaining_time": "0:56:03"}
|
||||
{"current_steps": 3180, "total_steps": 3591, "loss": 0.1855, "lr": 1.58345059765189e-06, "epoch": 6.1988304093567255, "percentage": 88.55, "elapsed_time": "7:08:35", "remaining_time": "0:55:23"}
|
||||
{"current_steps": 3185, "total_steps": 3591, "loss": 0.1793, "lr": 1.5457504824708824e-06, "epoch": 6.208576998050682, "percentage": 88.69, "elapsed_time": "7:09:13", "remaining_time": "0:54:42"}
|
||||
{"current_steps": 3190, "total_steps": 3591, "loss": 0.1963, "lr": 1.5084865428937146e-06, "epoch": 6.218323586744639, "percentage": 88.83, "elapsed_time": "7:09:46", "remaining_time": "0:54:01"}
|
||||
{"current_steps": 3195, "total_steps": 3591, "loss": 0.1673, "lr": 1.4716596596726197e-06, "epoch": 6.228070175438597, "percentage": 88.97, "elapsed_time": "7:10:31", "remaining_time": "0:53:21"}
|
||||
{"current_steps": 3200, "total_steps": 3591, "loss": 0.2333, "lr": 1.4352707032297697e-06, "epoch": 6.237816764132553, "percentage": 89.11, "elapsed_time": "7:11:15", "remaining_time": "0:52:41"}
|
||||
{"current_steps": 3205, "total_steps": 3591, "loss": 0.1953, "lr": 1.399320533636721e-06, "epoch": 6.247563352826511, "percentage": 89.25, "elapsed_time": "7:11:55", "remaining_time": "0:52:01"}
|
||||
{"current_steps": 3210, "total_steps": 3591, "loss": 0.1867, "lr": 1.3638100005940702e-06, "epoch": 6.257309941520468, "percentage": 89.39, "elapsed_time": "7:12:33", "remaining_time": "0:51:20"}
|
||||
{"current_steps": 3215, "total_steps": 3591, "loss": 0.202, "lr": 1.3287399434113923e-06, "epoch": 6.2670565302144245, "percentage": 89.53, "elapsed_time": "7:13:12", "remaining_time": "0:50:39"}
|
||||
{"current_steps": 3220, "total_steps": 3591, "loss": 0.2131, "lr": 1.2941111909873793e-06, "epoch": 6.276803118908382, "percentage": 89.67, "elapsed_time": "7:13:43", "remaining_time": "0:49:58"}
|
||||
{"current_steps": 3225, "total_steps": 3591, "loss": 0.1996, "lr": 1.2599245617902668e-06, "epoch": 6.286549707602339, "percentage": 89.81, "elapsed_time": "7:14:13", "remaining_time": "0:49:16"}
|
||||
{"current_steps": 3230, "total_steps": 3591, "loss": 0.212, "lr": 1.2261808638384753e-06, "epoch": 6.296296296296296, "percentage": 89.95, "elapsed_time": "7:14:52", "remaining_time": "0:48:36"}
|
||||
{"current_steps": 3235, "total_steps": 3591, "loss": 0.1902, "lr": 1.1928808946815318e-06, "epoch": 6.306042884990253, "percentage": 90.09, "elapsed_time": "7:15:26", "remaining_time": "0:47:55"}
|
||||
{"current_steps": 3240, "total_steps": 3591, "loss": 0.2076, "lr": 1.160025441381194e-06, "epoch": 6.315789473684211, "percentage": 90.23, "elapsed_time": "7:16:06", "remaining_time": "0:47:14"}
|
||||
{"current_steps": 3245, "total_steps": 3591, "loss": 0.205, "lr": 1.1276152804928664e-06, "epoch": 6.325536062378168, "percentage": 90.36, "elapsed_time": "7:16:51", "remaining_time": "0:46:34"}
|
||||
{"current_steps": 3250, "total_steps": 3591, "loss": 0.218, "lr": 1.0956511780472368e-06, "epoch": 6.3352826510721245, "percentage": 90.5, "elapsed_time": "7:17:32", "remaining_time": "0:45:54"}
|
||||
{"current_steps": 3255, "total_steps": 3591, "loss": 0.2265, "lr": 1.0641338895321818e-06, "epoch": 6.345029239766082, "percentage": 90.64, "elapsed_time": "7:18:15", "remaining_time": "0:45:14"}
|
||||
{"current_steps": 3260, "total_steps": 3591, "loss": 0.2192, "lr": 1.0330641598748925e-06, "epoch": 6.354775828460039, "percentage": 90.78, "elapsed_time": "7:18:49", "remaining_time": "0:44:33"}
|
||||
{"current_steps": 3265, "total_steps": 3591, "loss": 0.1869, "lr": 1.0024427234242883e-06, "epoch": 6.364522417153996, "percentage": 90.92, "elapsed_time": "7:19:35", "remaining_time": "0:43:53"}
|
||||
{"current_steps": 3270, "total_steps": 3591, "loss": 0.1891, "lr": 9.722703039336378e-07, "epoch": 6.374269005847953, "percentage": 91.06, "elapsed_time": "7:20:15", "remaining_time": "0:43:13"}
|
||||
{"current_steps": 3275, "total_steps": 3591, "loss": 0.2018, "lr": 9.425476145434765e-07, "epoch": 6.384015594541911, "percentage": 91.2, "elapsed_time": "7:21:01", "remaining_time": "0:42:33"}
|
||||
{"current_steps": 3280, "total_steps": 3591, "loss": 0.2059, "lr": 9.132753577647357e-07, "epoch": 6.393762183235867, "percentage": 91.34, "elapsed_time": "7:21:56", "remaining_time": "0:41:54"}
|
||||
{"current_steps": 3285, "total_steps": 3591, "loss": 0.1771, "lr": 8.844542254621369e-07, "epoch": 6.4035087719298245, "percentage": 91.48, "elapsed_time": "7:22:36", "remaining_time": "0:41:13"}
|
||||
{"current_steps": 3290, "total_steps": 3591, "loss": 0.2081, "lr": 8.560848988378478e-07, "epoch": 6.413255360623782, "percentage": 91.62, "elapsed_time": "7:23:17", "remaining_time": "0:40:33"}
|
||||
{"current_steps": 3295, "total_steps": 3591, "loss": 0.1952, "lr": 8.281680484153854e-07, "epoch": 6.423001949317738, "percentage": 91.76, "elapsed_time": "7:24:07", "remaining_time": "0:39:53"}
|
||||
{"current_steps": 3300, "total_steps": 3591, "loss": 0.1941, "lr": 8.007043340237475e-07, "epoch": 6.432748538011696, "percentage": 91.9, "elapsed_time": "7:24:53", "remaining_time": "0:39:13"}
|
||||
{"current_steps": 3305, "total_steps": 3591, "loss": 0.2046, "lr": 7.736944047818395e-07, "epoch": 6.442495126705653, "percentage": 92.04, "elapsed_time": "7:25:39", "remaining_time": "0:38:33"}
|
||||
{"current_steps": 3310, "total_steps": 3591, "loss": 0.1773, "lr": 7.471388990831219e-07, "epoch": 6.4522417153996106, "percentage": 92.17, "elapsed_time": "7:26:18", "remaining_time": "0:37:53"}
|
||||
{"current_steps": 3315, "total_steps": 3591, "loss": 0.19, "lr": 7.210384445805241e-07, "epoch": 6.461988304093567, "percentage": 92.31, "elapsed_time": "7:26:52", "remaining_time": "0:37:12"}
|
||||
{"current_steps": 3320, "total_steps": 3591, "loss": 0.2433, "lr": 6.953936581716014e-07, "epoch": 6.4717348927875245, "percentage": 92.45, "elapsed_time": "7:27:30", "remaining_time": "0:36:31"}
|
||||
{"current_steps": 3325, "total_steps": 3591, "loss": 0.1867, "lr": 6.702051459839687e-07, "epoch": 6.481481481481482, "percentage": 92.59, "elapsed_time": "7:28:09", "remaining_time": "0:35:51"}
|
||||
{"current_steps": 3330, "total_steps": 3591, "loss": 0.2231, "lr": 6.454735033609649e-07, "epoch": 6.491228070175438, "percentage": 92.73, "elapsed_time": "7:28:42", "remaining_time": "0:35:10"}
|
||||
{"current_steps": 3335, "total_steps": 3591, "loss": 0.219, "lr": 6.211993148475826e-07, "epoch": 6.500974658869396, "percentage": 92.87, "elapsed_time": "7:29:11", "remaining_time": "0:34:28"}
|
||||
{"current_steps": 3340, "total_steps": 3591, "loss": 0.2169, "lr": 5.973831541766561e-07, "epoch": 6.510721247563353, "percentage": 93.01, "elapsed_time": "7:29:50", "remaining_time": "0:33:48"}
|
||||
{"current_steps": 3345, "total_steps": 3591, "loss": 0.1809, "lr": 5.740255842552866e-07, "epoch": 6.52046783625731, "percentage": 93.15, "elapsed_time": "7:30:33", "remaining_time": "0:33:08"}
|
||||
{"current_steps": 3350, "total_steps": 3591, "loss": 0.2196, "lr": 5.511271571515697e-07, "epoch": 6.530214424951267, "percentage": 93.29, "elapsed_time": "7:31:20", "remaining_time": "0:32:28"}
|
||||
{"current_steps": 3355, "total_steps": 3591, "loss": 0.2257, "lr": 5.286884140815063e-07, "epoch": 6.539961013645224, "percentage": 93.43, "elapsed_time": "7:31:56", "remaining_time": "0:31:47"}
|
||||
{"current_steps": 3360, "total_steps": 3591, "loss": 0.2053, "lr": 5.067098853962393e-07, "epoch": 6.549707602339181, "percentage": 93.57, "elapsed_time": "7:32:26", "remaining_time": "0:31:06"}
|
||||
{"current_steps": 3365, "total_steps": 3591, "loss": 0.1916, "lr": 4.851920905695107e-07, "epoch": 6.559454191033138, "percentage": 93.71, "elapsed_time": "7:33:01", "remaining_time": "0:30:25"}
|
||||
{"current_steps": 3370, "total_steps": 3591, "loss": 0.1907, "lr": 4.641355381853796e-07, "epoch": 6.569200779727096, "percentage": 93.85, "elapsed_time": "7:33:48", "remaining_time": "0:29:45"}
|
||||
{"current_steps": 3375, "total_steps": 3591, "loss": 0.1877, "lr": 4.4354072592620813e-07, "epoch": 6.578947368421053, "percentage": 93.98, "elapsed_time": "7:34:32", "remaining_time": "0:29:05"}
|
||||
{"current_steps": 3380, "total_steps": 3591, "loss": 0.2409, "lr": 4.234081405608903e-07, "epoch": 6.58869395711501, "percentage": 94.12, "elapsed_time": "7:35:15", "remaining_time": "0:28:25"}
|
||||
{"current_steps": 3385, "total_steps": 3591, "loss": 0.199, "lr": 4.037382579333549e-07, "epoch": 6.598440545808967, "percentage": 94.26, "elapsed_time": "7:36:01", "remaining_time": "0:27:45"}
|
||||
{"current_steps": 3390, "total_steps": 3591, "loss": 0.2588, "lr": 3.8453154295131414e-07, "epoch": 6.6081871345029235, "percentage": 94.4, "elapsed_time": "7:36:42", "remaining_time": "0:27:04"}
|
||||
{"current_steps": 3395, "total_steps": 3591, "loss": 0.1731, "lr": 3.657884495752795e-07, "epoch": 6.617933723196881, "percentage": 94.54, "elapsed_time": "7:37:23", "remaining_time": "0:26:24"}
|
||||
{"current_steps": 3400, "total_steps": 3591, "loss": 0.2051, "lr": 3.4750942080782557e-07, "epoch": 6.627680311890838, "percentage": 94.68, "elapsed_time": "7:38:10", "remaining_time": "0:25:44"}
|
||||
{"current_steps": 3405, "total_steps": 3591, "loss": 0.1734, "lr": 3.2969488868312083e-07, "epoch": 6.637426900584796, "percentage": 94.82, "elapsed_time": "7:38:58", "remaining_time": "0:25:04"}
|
||||
{"current_steps": 3410, "total_steps": 3591, "loss": 0.1896, "lr": 3.12345274256729e-07, "epoch": 6.647173489278752, "percentage": 94.96, "elapsed_time": "7:39:39", "remaining_time": "0:24:23"}
|
||||
{"current_steps": 3415, "total_steps": 3591, "loss": 0.2371, "lr": 2.954609875956327e-07, "epoch": 6.65692007797271, "percentage": 95.1, "elapsed_time": "7:40:16", "remaining_time": "0:23:43"}
|
||||
{"current_steps": 3420, "total_steps": 3591, "loss": 0.1901, "lr": 2.7904242776856774e-07, "epoch": 6.666666666666667, "percentage": 95.24, "elapsed_time": "7:40:49", "remaining_time": "0:23:02"}
|
||||
{"current_steps": 3425, "total_steps": 3591, "loss": 0.2338, "lr": 2.6308998283657293e-07, "epoch": 6.6764132553606235, "percentage": 95.38, "elapsed_time": "7:41:21", "remaining_time": "0:22:21"}
|
||||
{"current_steps": 3430, "total_steps": 3591, "loss": 0.212, "lr": 2.476040298438198e-07, "epoch": 6.686159844054581, "percentage": 95.52, "elapsed_time": "7:41:54", "remaining_time": "0:21:40"}
|
||||
{"current_steps": 3435, "total_steps": 3591, "loss": 0.2124, "lr": 2.325849348087128e-07, "epoch": 6.695906432748538, "percentage": 95.66, "elapsed_time": "7:42:30", "remaining_time": "0:21:00"}
|
||||
{"current_steps": 3440, "total_steps": 3591, "loss": 0.1947, "lr": 2.1803305271522745e-07, "epoch": 6.705653021442495, "percentage": 95.8, "elapsed_time": "7:43:08", "remaining_time": "0:20:19"}
|
||||
{"current_steps": 3445, "total_steps": 3591, "loss": 0.2489, "lr": 2.0394872750451933e-07, "epoch": 6.715399610136452, "percentage": 95.93, "elapsed_time": "7:43:39", "remaining_time": "0:19:38"}
|
||||
{"current_steps": 3450, "total_steps": 3591, "loss": 0.1782, "lr": 1.903322920668038e-07, "epoch": 6.7251461988304095, "percentage": 96.07, "elapsed_time": "7:44:12", "remaining_time": "0:18:58"}
|
||||
{"current_steps": 3455, "total_steps": 3591, "loss": 0.1816, "lr": 1.7718406823348023e-07, "epoch": 6.734892787524366, "percentage": 96.21, "elapsed_time": "7:44:55", "remaining_time": "0:18:18"}
|
||||
{"current_steps": 3460, "total_steps": 3591, "loss": 0.2113, "lr": 1.6450436676952896e-07, "epoch": 6.744639376218323, "percentage": 96.35, "elapsed_time": "7:45:38", "remaining_time": "0:17:37"}
|
||||
{"current_steps": 3465, "total_steps": 3591, "loss": 0.1663, "lr": 1.5229348736615968e-07, "epoch": 6.754385964912281, "percentage": 96.49, "elapsed_time": "7:46:09", "remaining_time": "0:16:57"}
|
||||
{"current_steps": 3470, "total_steps": 3591, "loss": 0.2027, "lr": 1.4055171863374127e-07, "epoch": 6.764132553606238, "percentage": 96.63, "elapsed_time": "7:46:44", "remaining_time": "0:16:16"}
|
||||
{"current_steps": 3475, "total_steps": 3591, "loss": 0.2291, "lr": 1.2927933809496974e-07, "epoch": 6.773879142300195, "percentage": 96.77, "elapsed_time": "7:47:27", "remaining_time": "0:15:36"}
|
||||
{"current_steps": 3480, "total_steps": 3591, "loss": 0.2047, "lr": 1.1847661217830875e-07, "epoch": 6.783625730994152, "percentage": 96.91, "elapsed_time": "7:48:03", "remaining_time": "0:14:55"}
|
||||
{"current_steps": 3485, "total_steps": 3591, "loss": 0.1821, "lr": 1.0814379621169935e-07, "epoch": 6.7933723196881095, "percentage": 97.05, "elapsed_time": "7:48:49", "remaining_time": "0:14:15"}
|
||||
{"current_steps": 3490, "total_steps": 3591, "loss": 0.203, "lr": 9.828113441651798e-08, "epoch": 6.803118908382066, "percentage": 97.19, "elapsed_time": "7:49:33", "remaining_time": "0:13:35"}
|
||||
{"current_steps": 3495, "total_steps": 3591, "loss": 0.1954, "lr": 8.888885990181007e-08, "epoch": 6.812865497076023, "percentage": 97.33, "elapsed_time": "7:50:07", "remaining_time": "0:12:54"}
|
||||
{"current_steps": 3500, "total_steps": 3591, "loss": 0.1841, "lr": 7.996719465877656e-08, "epoch": 6.822612085769981, "percentage": 97.47, "elapsed_time": "7:50:45", "remaining_time": "0:12:14"}
|
||||
{"current_steps": 3505, "total_steps": 3591, "loss": 0.1813, "lr": 7.151634955552489e-08, "epoch": 6.832358674463937, "percentage": 97.61, "elapsed_time": "7:51:25", "remaining_time": "0:11:34"}
|
||||
{"current_steps": 3510, "total_steps": 3591, "loss": 0.1839, "lr": 6.35365243320929e-08, "epoch": 6.842105263157895, "percentage": 97.74, "elapsed_time": "7:52:08", "remaining_time": "0:10:53"}
|
||||
{"current_steps": 3515, "total_steps": 3591, "loss": 0.2092, "lr": 5.602790759572374e-08, "epoch": 6.851851851851852, "percentage": 97.88, "elapsed_time": "7:52:40", "remaining_time": "0:10:13"}
|
||||
{"current_steps": 3520, "total_steps": 3591, "loss": 0.183, "lr": 4.8990676816402794e-08, "epoch": 6.861598440545809, "percentage": 98.02, "elapsed_time": "7:53:18", "remaining_time": "0:09:32"}
|
||||
{"current_steps": 3525, "total_steps": 3591, "loss": 0.1964, "lr": 4.2424998322669886e-08, "epoch": 6.871345029239766, "percentage": 98.16, "elapsed_time": "7:53:54", "remaining_time": "0:08:52"}
|
||||
{"current_steps": 3530, "total_steps": 3591, "loss": 0.1947, "lr": 3.633102729768689e-08, "epoch": 6.881091617933723, "percentage": 98.3, "elapsed_time": "7:54:38", "remaining_time": "0:08:12"}
|
||||
{"current_steps": 3535, "total_steps": 3591, "loss": 0.1942, "lr": 3.070890777557178e-08, "epoch": 6.890838206627681, "percentage": 98.44, "elapsed_time": "7:55:12", "remaining_time": "0:07:31"}
|
||||
{"current_steps": 3540, "total_steps": 3591, "loss": 0.1782, "lr": 2.5558772637983566e-08, "epoch": 6.900584795321637, "percentage": 98.58, "elapsed_time": "7:55:52", "remaining_time": "0:06:51"}
|
||||
{"current_steps": 3545, "total_steps": 3591, "loss": 0.2136, "lr": 2.0880743610998122e-08, "epoch": 6.910331384015595, "percentage": 98.72, "elapsed_time": "7:56:36", "remaining_time": "0:06:11"}
|
||||
{"current_steps": 3550, "total_steps": 3591, "loss": 0.2187, "lr": 1.6674931262214977e-08, "epoch": 6.920077972709551, "percentage": 98.86, "elapsed_time": "7:57:16", "remaining_time": "0:05:30"}
|
||||
{"current_steps": 3555, "total_steps": 3591, "loss": 0.2054, "lr": 1.2941434998154922e-08, "epoch": 6.9298245614035086, "percentage": 99.0, "elapsed_time": "7:58:04", "remaining_time": "0:04:50"}
|
||||
{"current_steps": 3560, "total_steps": 3591, "loss": 0.2011, "lr": 9.680343061913011e-09, "epoch": 6.939571150097466, "percentage": 99.14, "elapsed_time": "7:58:45", "remaining_time": "0:04:10"}
|
||||
{"current_steps": 3565, "total_steps": 3591, "loss": 0.1977, "lr": 6.891732531053574e-09, "epoch": 6.949317738791423, "percentage": 99.28, "elapsed_time": "7:59:25", "remaining_time": "0:03:29"}
|
||||
{"current_steps": 3570, "total_steps": 3591, "loss": 0.2085, "lr": 4.57566931581388e-09, "epoch": 6.95906432748538, "percentage": 99.42, "elapsed_time": "8:00:03", "remaining_time": "0:02:49"}
|
||||
{"current_steps": 3575, "total_steps": 3591, "loss": 0.2038, "lr": 2.7322081575298365e-09, "epoch": 6.968810916179337, "percentage": 99.55, "elapsed_time": "8:00:50", "remaining_time": "0:02:09"}
|
||||
{"current_steps": 3580, "total_steps": 3591, "loss": 0.2141, "lr": 1.3613926273459144e-09, "epoch": 6.978557504873295, "percentage": 99.69, "elapsed_time": "8:01:27", "remaining_time": "0:01:28"}
|
||||
{"current_steps": 3585, "total_steps": 3591, "loss": 0.2207, "lr": 4.632551251848583e-10, "epoch": 6.988304093567251, "percentage": 99.83, "elapsed_time": "8:02:14", "remaining_time": "0:00:48"}
|
||||
{"current_steps": 3590, "total_steps": 3591, "loss": 0.2263, "lr": 3.7816878986074446e-11, "epoch": 6.9980506822612085, "percentage": 99.97, "elapsed_time": "8:02:46", "remaining_time": "0:00:08"}
|
||||
{"current_steps": 3591, "total_steps": 3591, "epoch": 7.0, "percentage": 100.0, "elapsed_time": "8:03:33", "remaining_time": "0:00:00"}
|
||||
7945
trainer_state.json
Normal file
7945
trainer_state.json
Normal file
File diff suppressed because it is too large
Load Diff
3
training_args.bin
Normal file
3
training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e016c8af4ace3bf5e6fb9f37bbb7b696aecf777a6c23bdb3953b9881a97a7ca7
|
||||
size 8721
|
||||
BIN
training_loss.png
Normal file
BIN
training_loss.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 48 KiB |
1
vocab.json
Normal file
1
vocab.json
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user